diff options
36 files changed, 1568 insertions, 727 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0896008f7509..57ccdcec1469 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -184,6 +184,9 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING | |||
184 | config ARCH_SUPPORTS_DEBUG_PAGEALLOC | 184 | config ARCH_SUPPORTS_DEBUG_PAGEALLOC |
185 | def_bool y | 185 | def_bool y |
186 | 186 | ||
187 | config HAVE_EARLY_RES | ||
188 | def_bool y | ||
189 | |||
187 | config HAVE_INTEL_TXT | 190 | config HAVE_INTEL_TXT |
188 | def_bool y | 191 | def_bool y |
189 | depends on EXPERIMENTAL && DMAR && ACPI | 192 | depends on EXPERIMENTAL && DMAR && ACPI |
@@ -569,6 +572,18 @@ config PARAVIRT_DEBUG | |||
569 | Enable to debug paravirt_ops internals. Specifically, BUG if | 572 | Enable to debug paravirt_ops internals. Specifically, BUG if |
570 | a paravirt_op is missing when it is called. | 573 | a paravirt_op is missing when it is called. |
571 | 574 | ||
575 | config NO_BOOTMEM | ||
576 | default y | ||
577 | bool "Disable Bootmem code" | ||
578 | ---help--- | ||
579 | Use early_res directly instead of bootmem before slab is ready. | ||
580 | - allocator (buddy) [generic] | ||
581 | - early allocator (bootmem) [generic] | ||
582 | - very early allocator (reserve_early*()) [x86] | ||
583 | - very very early allocator (early brk model) [x86] | ||
584 | So reduce one layer between early allocator to final allocator | ||
585 | |||
586 | |||
572 | config MEMTEST | 587 | config MEMTEST |
573 | bool "Memtest" | 588 | bool "Memtest" |
574 | ---help--- | 589 | ---help--- |
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index 761249e396fe..0e22296790d3 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h | |||
@@ -111,11 +111,8 @@ extern unsigned long end_user_pfn; | |||
111 | 111 | ||
112 | extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); | 112 | extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); |
113 | extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); | 113 | extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); |
114 | extern void reserve_early(u64 start, u64 end, char *name); | ||
115 | extern void reserve_early_overlap_ok(u64 start, u64 end, char *name); | ||
116 | extern void free_early(u64 start, u64 end); | ||
117 | extern void early_res_to_bootmem(u64 start, u64 end); | ||
118 | extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); | 114 | extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); |
115 | #include <linux/early_res.h> | ||
119 | 116 | ||
120 | extern unsigned long e820_end_of_ram_pfn(void); | 117 | extern unsigned long e820_end_of_ram_pfn(void); |
121 | extern unsigned long e820_end_of_low_ram_pfn(void); | 118 | extern unsigned long e820_end_of_low_ram_pfn(void); |
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index ada8c201d513..b4a00dd4eed5 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h | |||
@@ -124,6 +124,8 @@ extern void pci_iommu_alloc(void); | |||
124 | #include "pci_64.h" | 124 | #include "pci_64.h" |
125 | #endif | 125 | #endif |
126 | 126 | ||
127 | void dma32_reserve_bootmem(void); | ||
128 | |||
127 | /* implement the pci_ DMA API in terms of the generic device dma_ one */ | 129 | /* implement the pci_ DMA API in terms of the generic device dma_ one */ |
128 | #include <asm-generic/pci-dma-compat.h> | 130 | #include <asm-generic/pci-dma-compat.h> |
129 | 131 | ||
diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h index ae5e40f67daf..fe15cfb21b9b 100644 --- a/arch/x86/include/asm/pci_64.h +++ b/arch/x86/include/asm/pci_64.h | |||
@@ -22,8 +22,6 @@ extern int (*pci_config_read)(int seg, int bus, int dev, int fn, | |||
22 | extern int (*pci_config_write)(int seg, int bus, int dev, int fn, | 22 | extern int (*pci_config_write)(int seg, int bus, int dev, int fn, |
23 | int reg, int len, u32 value); | 23 | int reg, int len, u32 value); |
24 | 24 | ||
25 | extern void dma32_reserve_bootmem(void); | ||
26 | |||
27 | #endif /* __KERNEL__ */ | 25 | #endif /* __KERNEL__ */ |
28 | 26 | ||
29 | #endif /* _ASM_X86_PCI_64_H */ | 27 | #endif /* _ASM_X86_PCI_64_H */ |
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 4009f6534f52..6f414ed88620 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h | |||
@@ -23,14 +23,4 @@ extern int reboot_force; | |||
23 | 23 | ||
24 | long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); | 24 | long do_arch_prctl(struct task_struct *task, int code, unsigned long addr); |
25 | 25 | ||
26 | /* | ||
27 | * This looks more complex than it should be. But we need to | ||
28 | * get the type for the ~ right in round_down (it needs to be | ||
29 | * as wide as the result!), and we want to evaluate the macro | ||
30 | * arguments just once each. | ||
31 | */ | ||
32 | #define __round_mask(x,y) ((__typeof__(x))((y)-1)) | ||
33 | #define round_up(x,y) ((((x)-1) | __round_mask(x,y))+1) | ||
34 | #define round_down(x,y) ((x) & ~__round_mask(x,y)) | ||
35 | |||
36 | #endif /* _ASM_X86_PROTO_H */ | 26 | #endif /* _ASM_X86_PROTO_H */ |
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 09b1698e0466..06130b52f012 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c | |||
@@ -22,10 +22,10 @@ | |||
22 | #include <linux/pci.h> | 22 | #include <linux/pci.h> |
23 | #include <linux/smp.h> | 23 | #include <linux/smp.h> |
24 | #include <linux/cpu.h> | 24 | #include <linux/cpu.h> |
25 | #include <linux/sort.h> | ||
26 | #include <linux/mutex.h> | 25 | #include <linux/mutex.h> |
27 | #include <linux/uaccess.h> | 26 | #include <linux/uaccess.h> |
28 | #include <linux/kvm_para.h> | 27 | #include <linux/kvm_para.h> |
28 | #include <linux/range.h> | ||
29 | 29 | ||
30 | #include <asm/processor.h> | 30 | #include <asm/processor.h> |
31 | #include <asm/e820.h> | 31 | #include <asm/e820.h> |
@@ -34,11 +34,6 @@ | |||
34 | 34 | ||
35 | #include "mtrr.h" | 35 | #include "mtrr.h" |
36 | 36 | ||
37 | struct res_range { | ||
38 | unsigned long start; | ||
39 | unsigned long end; | ||
40 | }; | ||
41 | |||
42 | struct var_mtrr_range_state { | 37 | struct var_mtrr_range_state { |
43 | unsigned long base_pfn; | 38 | unsigned long base_pfn; |
44 | unsigned long size_pfn; | 39 | unsigned long size_pfn; |
@@ -56,7 +51,7 @@ struct var_mtrr_state { | |||
56 | /* Should be related to MTRR_VAR_RANGES nums */ | 51 | /* Should be related to MTRR_VAR_RANGES nums */ |
57 | #define RANGE_NUM 256 | 52 | #define RANGE_NUM 256 |
58 | 53 | ||
59 | static struct res_range __initdata range[RANGE_NUM]; | 54 | static struct range __initdata range[RANGE_NUM]; |
60 | static int __initdata nr_range; | 55 | static int __initdata nr_range; |
61 | 56 | ||
62 | static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; | 57 | static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; |
@@ -64,152 +59,11 @@ static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; | |||
64 | static int __initdata debug_print; | 59 | static int __initdata debug_print; |
65 | #define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0) | 60 | #define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0) |
66 | 61 | ||
67 | |||
68 | static int __init | ||
69 | add_range(struct res_range *range, int nr_range, | ||
70 | unsigned long start, unsigned long end) | ||
71 | { | ||
72 | /* Out of slots: */ | ||
73 | if (nr_range >= RANGE_NUM) | ||
74 | return nr_range; | ||
75 | |||
76 | range[nr_range].start = start; | ||
77 | range[nr_range].end = end; | ||
78 | |||
79 | nr_range++; | ||
80 | |||
81 | return nr_range; | ||
82 | } | ||
83 | |||
84 | static int __init | ||
85 | add_range_with_merge(struct res_range *range, int nr_range, | ||
86 | unsigned long start, unsigned long end) | ||
87 | { | ||
88 | int i; | ||
89 | |||
90 | /* Try to merge it with old one: */ | ||
91 | for (i = 0; i < nr_range; i++) { | ||
92 | unsigned long final_start, final_end; | ||
93 | unsigned long common_start, common_end; | ||
94 | |||
95 | if (!range[i].end) | ||
96 | continue; | ||
97 | |||
98 | common_start = max(range[i].start, start); | ||
99 | common_end = min(range[i].end, end); | ||
100 | if (common_start > common_end + 1) | ||
101 | continue; | ||
102 | |||
103 | final_start = min(range[i].start, start); | ||
104 | final_end = max(range[i].end, end); | ||
105 | |||
106 | range[i].start = final_start; | ||
107 | range[i].end = final_end; | ||
108 | return nr_range; | ||
109 | } | ||
110 | |||
111 | /* Need to add it: */ | ||
112 | return add_range(range, nr_range, start, end); | ||
113 | } | ||
114 | |||
115 | static void __init | ||
116 | subtract_range(struct res_range *range, unsigned long start, unsigned long end) | ||
117 | { | ||
118 | int i, j; | ||
119 | |||
120 | for (j = 0; j < RANGE_NUM; j++) { | ||
121 | if (!range[j].end) | ||
122 | continue; | ||
123 | |||
124 | if (start <= range[j].start && end >= range[j].end) { | ||
125 | range[j].start = 0; | ||
126 | range[j].end = 0; | ||
127 | continue; | ||
128 | } | ||
129 | |||
130 | if (start <= range[j].start && end < range[j].end && | ||
131 | range[j].start < end + 1) { | ||
132 | range[j].start = end + 1; | ||
133 | continue; | ||
134 | } | ||
135 | |||
136 | |||
137 | if (start > range[j].start && end >= range[j].end && | ||
138 | range[j].end > start - 1) { | ||
139 | range[j].end = start - 1; | ||
140 | continue; | ||
141 | } | ||
142 | |||
143 | if (start > range[j].start && end < range[j].end) { | ||
144 | /* Find the new spare: */ | ||
145 | for (i = 0; i < RANGE_NUM; i++) { | ||
146 | if (range[i].end == 0) | ||
147 | break; | ||
148 | } | ||
149 | if (i < RANGE_NUM) { | ||
150 | range[i].end = range[j].end; | ||
151 | range[i].start = end + 1; | ||
152 | } else { | ||
153 | printk(KERN_ERR "run of slot in ranges\n"); | ||
154 | } | ||
155 | range[j].end = start - 1; | ||
156 | continue; | ||
157 | } | ||
158 | } | ||
159 | } | ||
160 | |||
161 | static int __init cmp_range(const void *x1, const void *x2) | ||
162 | { | ||
163 | const struct res_range *r1 = x1; | ||
164 | const struct res_range *r2 = x2; | ||
165 | long start1, start2; | ||
166 | |||
167 | start1 = r1->start; | ||
168 | start2 = r2->start; | ||
169 | |||
170 | return start1 - start2; | ||
171 | } | ||
172 | |||
173 | static int __init clean_sort_range(struct res_range *range, int az) | ||
174 | { | ||
175 | int i, j, k = az - 1, nr_range = 0; | ||
176 | |||
177 | for (i = 0; i < k; i++) { | ||
178 | if (range[i].end) | ||
179 | continue; | ||
180 | for (j = k; j > i; j--) { | ||
181 | if (range[j].end) { | ||
182 | k = j; | ||
183 | break; | ||
184 | } | ||
185 | } | ||
186 | if (j == i) | ||
187 | break; | ||
188 | range[i].start = range[k].start; | ||
189 | range[i].end = range[k].end; | ||
190 | range[k].start = 0; | ||
191 | range[k].end = 0; | ||
192 | k--; | ||
193 | } | ||
194 | /* count it */ | ||
195 | for (i = 0; i < az; i++) { | ||
196 | if (!range[i].end) { | ||
197 | nr_range = i; | ||
198 | break; | ||
199 | } | ||
200 | } | ||
201 | |||
202 | /* sort them */ | ||
203 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | ||
204 | |||
205 | return nr_range; | ||
206 | } | ||
207 | |||
208 | #define BIOS_BUG_MSG KERN_WARNING \ | 62 | #define BIOS_BUG_MSG KERN_WARNING \ |
209 | "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" | 63 | "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" |
210 | 64 | ||
211 | static int __init | 65 | static int __init |
212 | x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | 66 | x86_get_mtrr_mem_range(struct range *range, int nr_range, |
213 | unsigned long extra_remove_base, | 67 | unsigned long extra_remove_base, |
214 | unsigned long extra_remove_size) | 68 | unsigned long extra_remove_size) |
215 | { | 69 | { |
@@ -223,14 +77,14 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
223 | continue; | 77 | continue; |
224 | base = range_state[i].base_pfn; | 78 | base = range_state[i].base_pfn; |
225 | size = range_state[i].size_pfn; | 79 | size = range_state[i].size_pfn; |
226 | nr_range = add_range_with_merge(range, nr_range, base, | 80 | nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, |
227 | base + size - 1); | 81 | base, base + size); |
228 | } | 82 | } |
229 | if (debug_print) { | 83 | if (debug_print) { |
230 | printk(KERN_DEBUG "After WB checking\n"); | 84 | printk(KERN_DEBUG "After WB checking\n"); |
231 | for (i = 0; i < nr_range; i++) | 85 | for (i = 0; i < nr_range; i++) |
232 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | 86 | printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", |
233 | range[i].start, range[i].end + 1); | 87 | range[i].start, range[i].end); |
234 | } | 88 | } |
235 | 89 | ||
236 | /* Take out UC ranges: */ | 90 | /* Take out UC ranges: */ |
@@ -252,19 +106,19 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
252 | size -= (1<<(20-PAGE_SHIFT)) - base; | 106 | size -= (1<<(20-PAGE_SHIFT)) - base; |
253 | base = 1<<(20-PAGE_SHIFT); | 107 | base = 1<<(20-PAGE_SHIFT); |
254 | } | 108 | } |
255 | subtract_range(range, base, base + size - 1); | 109 | subtract_range(range, RANGE_NUM, base, base + size); |
256 | } | 110 | } |
257 | if (extra_remove_size) | 111 | if (extra_remove_size) |
258 | subtract_range(range, extra_remove_base, | 112 | subtract_range(range, RANGE_NUM, extra_remove_base, |
259 | extra_remove_base + extra_remove_size - 1); | 113 | extra_remove_base + extra_remove_size); |
260 | 114 | ||
261 | if (debug_print) { | 115 | if (debug_print) { |
262 | printk(KERN_DEBUG "After UC checking\n"); | 116 | printk(KERN_DEBUG "After UC checking\n"); |
263 | for (i = 0; i < RANGE_NUM; i++) { | 117 | for (i = 0; i < RANGE_NUM; i++) { |
264 | if (!range[i].end) | 118 | if (!range[i].end) |
265 | continue; | 119 | continue; |
266 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | 120 | printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", |
267 | range[i].start, range[i].end + 1); | 121 | range[i].start, range[i].end); |
268 | } | 122 | } |
269 | } | 123 | } |
270 | 124 | ||
@@ -273,26 +127,22 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
273 | if (debug_print) { | 127 | if (debug_print) { |
274 | printk(KERN_DEBUG "After sorting\n"); | 128 | printk(KERN_DEBUG "After sorting\n"); |
275 | for (i = 0; i < nr_range; i++) | 129 | for (i = 0; i < nr_range; i++) |
276 | printk(KERN_DEBUG "MTRR MAP PFN: %016lx - %016lx\n", | 130 | printk(KERN_DEBUG "MTRR MAP PFN: %016llx - %016llx\n", |
277 | range[i].start, range[i].end + 1); | 131 | range[i].start, range[i].end); |
278 | } | 132 | } |
279 | 133 | ||
280 | /* clear those is not used */ | ||
281 | for (i = nr_range; i < RANGE_NUM; i++) | ||
282 | memset(&range[i], 0, sizeof(range[i])); | ||
283 | |||
284 | return nr_range; | 134 | return nr_range; |
285 | } | 135 | } |
286 | 136 | ||
287 | #ifdef CONFIG_MTRR_SANITIZER | 137 | #ifdef CONFIG_MTRR_SANITIZER |
288 | 138 | ||
289 | static unsigned long __init sum_ranges(struct res_range *range, int nr_range) | 139 | static unsigned long __init sum_ranges(struct range *range, int nr_range) |
290 | { | 140 | { |
291 | unsigned long sum = 0; | 141 | unsigned long sum = 0; |
292 | int i; | 142 | int i; |
293 | 143 | ||
294 | for (i = 0; i < nr_range; i++) | 144 | for (i = 0; i < nr_range; i++) |
295 | sum += range[i].end + 1 - range[i].start; | 145 | sum += range[i].end - range[i].start; |
296 | 146 | ||
297 | return sum; | 147 | return sum; |
298 | } | 148 | } |
@@ -621,7 +471,7 @@ static int __init parse_mtrr_spare_reg(char *arg) | |||
621 | early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); | 471 | early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); |
622 | 472 | ||
623 | static int __init | 473 | static int __init |
624 | x86_setup_var_mtrrs(struct res_range *range, int nr_range, | 474 | x86_setup_var_mtrrs(struct range *range, int nr_range, |
625 | u64 chunk_size, u64 gran_size) | 475 | u64 chunk_size, u64 gran_size) |
626 | { | 476 | { |
627 | struct var_mtrr_state var_state; | 477 | struct var_mtrr_state var_state; |
@@ -639,7 +489,7 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range, | |||
639 | /* Write the range: */ | 489 | /* Write the range: */ |
640 | for (i = 0; i < nr_range; i++) { | 490 | for (i = 0; i < nr_range; i++) { |
641 | set_var_mtrr_range(&var_state, range[i].start, | 491 | set_var_mtrr_range(&var_state, range[i].start, |
642 | range[i].end - range[i].start + 1); | 492 | range[i].end - range[i].start); |
643 | } | 493 | } |
644 | 494 | ||
645 | /* Write the last range: */ | 495 | /* Write the last range: */ |
@@ -742,7 +592,7 @@ mtrr_calc_range_state(u64 chunk_size, u64 gran_size, | |||
742 | unsigned long x_remove_base, | 592 | unsigned long x_remove_base, |
743 | unsigned long x_remove_size, int i) | 593 | unsigned long x_remove_size, int i) |
744 | { | 594 | { |
745 | static struct res_range range_new[RANGE_NUM]; | 595 | static struct range range_new[RANGE_NUM]; |
746 | unsigned long range_sums_new; | 596 | unsigned long range_sums_new; |
747 | static int nr_range_new; | 597 | static int nr_range_new; |
748 | int num_reg; | 598 | int num_reg; |
@@ -869,10 +719,10 @@ int __init mtrr_cleanup(unsigned address_bits) | |||
869 | * [0, 1M) should always be covered by var mtrr with WB | 719 | * [0, 1M) should always be covered by var mtrr with WB |
870 | * and fixed mtrrs should take effect before var mtrr for it: | 720 | * and fixed mtrrs should take effect before var mtrr for it: |
871 | */ | 721 | */ |
872 | nr_range = add_range_with_merge(range, nr_range, 0, | 722 | nr_range = add_range_with_merge(range, RANGE_NUM, nr_range, 0, |
873 | (1ULL<<(20 - PAGE_SHIFT)) - 1); | 723 | 1ULL<<(20 - PAGE_SHIFT)); |
874 | /* Sort the ranges: */ | 724 | /* Sort the ranges: */ |
875 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | 725 | sort_range(range, nr_range); |
876 | 726 | ||
877 | range_sums = sum_ranges(range, nr_range); | 727 | range_sums = sum_ranges(range, nr_range); |
878 | printk(KERN_INFO "total RAM covered: %ldM\n", | 728 | printk(KERN_INFO "total RAM covered: %ldM\n", |
@@ -1089,9 +939,9 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
1089 | nr_range = 0; | 939 | nr_range = 0; |
1090 | if (mtrr_tom2) { | 940 | if (mtrr_tom2) { |
1091 | range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); | 941 | range[nr_range].start = (1ULL<<(32 - PAGE_SHIFT)); |
1092 | range[nr_range].end = (mtrr_tom2 >> PAGE_SHIFT) - 1; | 942 | range[nr_range].end = mtrr_tom2 >> PAGE_SHIFT; |
1093 | if (highest_pfn < range[nr_range].end + 1) | 943 | if (highest_pfn < range[nr_range].end) |
1094 | highest_pfn = range[nr_range].end + 1; | 944 | highest_pfn = range[nr_range].end; |
1095 | nr_range++; | 945 | nr_range++; |
1096 | } | 946 | } |
1097 | nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); | 947 | nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); |
@@ -1103,15 +953,15 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
1103 | 953 | ||
1104 | /* Check the holes: */ | 954 | /* Check the holes: */ |
1105 | for (i = 0; i < nr_range - 1; i++) { | 955 | for (i = 0; i < nr_range - 1; i++) { |
1106 | if (range[i].end + 1 < range[i+1].start) | 956 | if (range[i].end < range[i+1].start) |
1107 | total_trim_size += real_trim_memory(range[i].end + 1, | 957 | total_trim_size += real_trim_memory(range[i].end, |
1108 | range[i+1].start); | 958 | range[i+1].start); |
1109 | } | 959 | } |
1110 | 960 | ||
1111 | /* Check the top: */ | 961 | /* Check the top: */ |
1112 | i = nr_range - 1; | 962 | i = nr_range - 1; |
1113 | if (range[i].end + 1 < end_pfn) | 963 | if (range[i].end < end_pfn) |
1114 | total_trim_size += real_trim_memory(range[i].end + 1, | 964 | total_trim_size += real_trim_memory(range[i].end, |
1115 | end_pfn); | 965 | end_pfn); |
1116 | 966 | ||
1117 | if (total_trim_size) { | 967 | if (total_trim_size) { |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index a966b753e496..740b440fbd73 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -12,21 +12,13 @@ | |||
12 | #include <linux/types.h> | 12 | #include <linux/types.h> |
13 | #include <linux/init.h> | 13 | #include <linux/init.h> |
14 | #include <linux/bootmem.h> | 14 | #include <linux/bootmem.h> |
15 | #include <linux/ioport.h> | ||
16 | #include <linux/string.h> | ||
17 | #include <linux/kexec.h> | ||
18 | #include <linux/module.h> | ||
19 | #include <linux/mm.h> | ||
20 | #include <linux/pfn.h> | 15 | #include <linux/pfn.h> |
21 | #include <linux/suspend.h> | 16 | #include <linux/suspend.h> |
22 | #include <linux/firmware-map.h> | 17 | #include <linux/firmware-map.h> |
23 | 18 | ||
24 | #include <asm/pgtable.h> | ||
25 | #include <asm/page.h> | ||
26 | #include <asm/e820.h> | 19 | #include <asm/e820.h> |
27 | #include <asm/proto.h> | 20 | #include <asm/proto.h> |
28 | #include <asm/setup.h> | 21 | #include <asm/setup.h> |
29 | #include <asm/trampoline.h> | ||
30 | 22 | ||
31 | /* | 23 | /* |
32 | * The e820 map is the map that gets modified e.g. with command line parameters | 24 | * The e820 map is the map that gets modified e.g. with command line parameters |
@@ -730,319 +722,44 @@ core_initcall(e820_mark_nvs_memory); | |||
730 | #endif | 722 | #endif |
731 | 723 | ||
732 | /* | 724 | /* |
733 | * Early reserved memory areas. | 725 | * Find a free area with specified alignment in a specific range. |
734 | */ | ||
735 | #define MAX_EARLY_RES 32 | ||
736 | |||
737 | struct early_res { | ||
738 | u64 start, end; | ||
739 | char name[16]; | ||
740 | char overlap_ok; | ||
741 | }; | ||
742 | static struct early_res early_res[MAX_EARLY_RES] __initdata = { | ||
743 | { 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */ | ||
744 | #if defined(CONFIG_X86_32) && defined(CONFIG_X86_TRAMPOLINE) | ||
745 | /* | ||
746 | * But first pinch a few for the stack/trampoline stuff | ||
747 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
748 | * trampoline before removing it. (see the GDT stuff) | ||
749 | */ | ||
750 | { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE", 1 }, | ||
751 | #endif | ||
752 | |||
753 | {} | ||
754 | }; | ||
755 | |||
756 | static int __init find_overlapped_early(u64 start, u64 end) | ||
757 | { | ||
758 | int i; | ||
759 | struct early_res *r; | ||
760 | |||
761 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
762 | r = &early_res[i]; | ||
763 | if (end > r->start && start < r->end) | ||
764 | break; | ||
765 | } | ||
766 | |||
767 | return i; | ||
768 | } | ||
769 | |||
770 | /* | ||
771 | * Drop the i-th range from the early reservation map, | ||
772 | * by copying any higher ranges down one over it, and | ||
773 | * clearing what had been the last slot. | ||
774 | */ | ||
775 | static void __init drop_range(int i) | ||
776 | { | ||
777 | int j; | ||
778 | |||
779 | for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) | ||
780 | ; | ||
781 | |||
782 | memmove(&early_res[i], &early_res[i + 1], | ||
783 | (j - 1 - i) * sizeof(struct early_res)); | ||
784 | |||
785 | early_res[j - 1].end = 0; | ||
786 | } | ||
787 | |||
788 | /* | ||
789 | * Split any existing ranges that: | ||
790 | * 1) are marked 'overlap_ok', and | ||
791 | * 2) overlap with the stated range [start, end) | ||
792 | * into whatever portion (if any) of the existing range is entirely | ||
793 | * below or entirely above the stated range. Drop the portion | ||
794 | * of the existing range that overlaps with the stated range, | ||
795 | * which will allow the caller of this routine to then add that | ||
796 | * stated range without conflicting with any existing range. | ||
797 | */ | 726 | */ |
798 | static void __init drop_overlaps_that_are_ok(u64 start, u64 end) | 727 | u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) |
799 | { | 728 | { |
800 | int i; | 729 | int i; |
801 | struct early_res *r; | ||
802 | u64 lower_start, lower_end; | ||
803 | u64 upper_start, upper_end; | ||
804 | char name[16]; | ||
805 | 730 | ||
806 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | 731 | for (i = 0; i < e820.nr_map; i++) { |
807 | r = &early_res[i]; | 732 | struct e820entry *ei = &e820.map[i]; |
733 | u64 addr; | ||
734 | u64 ei_start, ei_last; | ||
808 | 735 | ||
809 | /* Continue past non-overlapping ranges */ | 736 | if (ei->type != E820_RAM) |
810 | if (end <= r->start || start >= r->end) | ||
811 | continue; | 737 | continue; |
812 | 738 | ||
813 | /* | 739 | ei_last = ei->addr + ei->size; |
814 | * Leave non-ok overlaps as is; let caller | 740 | ei_start = ei->addr; |
815 | * panic "Overlapping early reservations" | 741 | addr = find_early_area(ei_start, ei_last, start, end, |
816 | * when it hits this overlap. | 742 | size, align); |
817 | */ | ||
818 | if (!r->overlap_ok) | ||
819 | return; | ||
820 | |||
821 | /* | ||
822 | * We have an ok overlap. We will drop it from the early | ||
823 | * reservation map, and add back in any non-overlapping | ||
824 | * portions (lower or upper) as separate, overlap_ok, | ||
825 | * non-overlapping ranges. | ||
826 | */ | ||
827 | |||
828 | /* 1. Note any non-overlapping (lower or upper) ranges. */ | ||
829 | strncpy(name, r->name, sizeof(name) - 1); | ||
830 | |||
831 | lower_start = lower_end = 0; | ||
832 | upper_start = upper_end = 0; | ||
833 | if (r->start < start) { | ||
834 | lower_start = r->start; | ||
835 | lower_end = start; | ||
836 | } | ||
837 | if (r->end > end) { | ||
838 | upper_start = end; | ||
839 | upper_end = r->end; | ||
840 | } | ||
841 | |||
842 | /* 2. Drop the original ok overlapping range */ | ||
843 | drop_range(i); | ||
844 | |||
845 | i--; /* resume for-loop on copied down entry */ | ||
846 | |||
847 | /* 3. Add back in any non-overlapping ranges. */ | ||
848 | if (lower_end) | ||
849 | reserve_early_overlap_ok(lower_start, lower_end, name); | ||
850 | if (upper_end) | ||
851 | reserve_early_overlap_ok(upper_start, upper_end, name); | ||
852 | } | ||
853 | } | ||
854 | |||
855 | static void __init __reserve_early(u64 start, u64 end, char *name, | ||
856 | int overlap_ok) | ||
857 | { | ||
858 | int i; | ||
859 | struct early_res *r; | ||
860 | |||
861 | i = find_overlapped_early(start, end); | ||
862 | if (i >= MAX_EARLY_RES) | ||
863 | panic("Too many early reservations"); | ||
864 | r = &early_res[i]; | ||
865 | if (r->end) | ||
866 | panic("Overlapping early reservations " | ||
867 | "%llx-%llx %s to %llx-%llx %s\n", | ||
868 | start, end - 1, name?name:"", r->start, | ||
869 | r->end - 1, r->name); | ||
870 | r->start = start; | ||
871 | r->end = end; | ||
872 | r->overlap_ok = overlap_ok; | ||
873 | if (name) | ||
874 | strncpy(r->name, name, sizeof(r->name) - 1); | ||
875 | } | ||
876 | |||
877 | /* | ||
878 | * A few early reservtations come here. | ||
879 | * | ||
880 | * The 'overlap_ok' in the name of this routine does -not- mean it | ||
881 | * is ok for these reservations to overlap an earlier reservation. | ||
882 | * Rather it means that it is ok for subsequent reservations to | ||
883 | * overlap this one. | ||
884 | * | ||
885 | * Use this entry point to reserve early ranges when you are doing | ||
886 | * so out of "Paranoia", reserving perhaps more memory than you need, | ||
887 | * just in case, and don't mind a subsequent overlapping reservation | ||
888 | * that is known to be needed. | ||
889 | * | ||
890 | * The drop_overlaps_that_are_ok() call here isn't really needed. | ||
891 | * It would be needed if we had two colliding 'overlap_ok' | ||
892 | * reservations, so that the second such would not panic on the | ||
893 | * overlap with the first. We don't have any such as of this | ||
894 | * writing, but might as well tolerate such if it happens in | ||
895 | * the future. | ||
896 | */ | ||
897 | void __init reserve_early_overlap_ok(u64 start, u64 end, char *name) | ||
898 | { | ||
899 | drop_overlaps_that_are_ok(start, end); | ||
900 | __reserve_early(start, end, name, 1); | ||
901 | } | ||
902 | |||
903 | /* | ||
904 | * Most early reservations come here. | ||
905 | * | ||
906 | * We first have drop_overlaps_that_are_ok() drop any pre-existing | ||
907 | * 'overlap_ok' ranges, so that we can then reserve this memory | ||
908 | * range without risk of panic'ing on an overlapping overlap_ok | ||
909 | * early reservation. | ||
910 | */ | ||
911 | void __init reserve_early(u64 start, u64 end, char *name) | ||
912 | { | ||
913 | if (start >= end) | ||
914 | return; | ||
915 | |||
916 | drop_overlaps_that_are_ok(start, end); | ||
917 | __reserve_early(start, end, name, 0); | ||
918 | } | ||
919 | |||
920 | void __init free_early(u64 start, u64 end) | ||
921 | { | ||
922 | struct early_res *r; | ||
923 | int i; | ||
924 | |||
925 | i = find_overlapped_early(start, end); | ||
926 | r = &early_res[i]; | ||
927 | if (i >= MAX_EARLY_RES || r->end != end || r->start != start) | ||
928 | panic("free_early on not reserved area: %llx-%llx!", | ||
929 | start, end - 1); | ||
930 | |||
931 | drop_range(i); | ||
932 | } | ||
933 | |||
934 | void __init early_res_to_bootmem(u64 start, u64 end) | ||
935 | { | ||
936 | int i, count; | ||
937 | u64 final_start, final_end; | ||
938 | |||
939 | count = 0; | ||
940 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) | ||
941 | count++; | ||
942 | |||
943 | printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n", | ||
944 | count, start, end); | ||
945 | for (i = 0; i < count; i++) { | ||
946 | struct early_res *r = &early_res[i]; | ||
947 | printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, | ||
948 | r->start, r->end, r->name); | ||
949 | final_start = max(start, r->start); | ||
950 | final_end = min(end, r->end); | ||
951 | if (final_start >= final_end) { | ||
952 | printk(KERN_CONT "\n"); | ||
953 | continue; | ||
954 | } | ||
955 | printk(KERN_CONT " ==> [%010llx - %010llx]\n", | ||
956 | final_start, final_end); | ||
957 | reserve_bootmem_generic(final_start, final_end - final_start, | ||
958 | BOOTMEM_DEFAULT); | ||
959 | } | ||
960 | } | ||
961 | 743 | ||
962 | /* Check for already reserved areas */ | 744 | if (addr != -1ULL) |
963 | static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) | 745 | return addr; |
964 | { | ||
965 | int i; | ||
966 | u64 addr = *addrp; | ||
967 | int changed = 0; | ||
968 | struct early_res *r; | ||
969 | again: | ||
970 | i = find_overlapped_early(addr, addr + size); | ||
971 | r = &early_res[i]; | ||
972 | if (i < MAX_EARLY_RES && r->end) { | ||
973 | *addrp = addr = round_up(r->end, align); | ||
974 | changed = 1; | ||
975 | goto again; | ||
976 | } | 746 | } |
977 | return changed; | 747 | return -1ULL; |
978 | } | 748 | } |
979 | 749 | ||
980 | /* Check for already reserved areas */ | 750 | u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align) |
981 | static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align) | ||
982 | { | 751 | { |
983 | int i; | 752 | return find_e820_area(start, end, size, align); |
984 | u64 addr = *addrp, last; | ||
985 | u64 size = *sizep; | ||
986 | int changed = 0; | ||
987 | again: | ||
988 | last = addr + size; | ||
989 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | ||
990 | struct early_res *r = &early_res[i]; | ||
991 | if (last > r->start && addr < r->start) { | ||
992 | size = r->start - addr; | ||
993 | changed = 1; | ||
994 | goto again; | ||
995 | } | ||
996 | if (last > r->end && addr < r->end) { | ||
997 | addr = round_up(r->end, align); | ||
998 | size = last - addr; | ||
999 | changed = 1; | ||
1000 | goto again; | ||
1001 | } | ||
1002 | if (last <= r->end && addr >= r->start) { | ||
1003 | (*sizep)++; | ||
1004 | return 0; | ||
1005 | } | ||
1006 | } | ||
1007 | if (changed) { | ||
1008 | *addrp = addr; | ||
1009 | *sizep = size; | ||
1010 | } | ||
1011 | return changed; | ||
1012 | } | 753 | } |
1013 | 754 | ||
1014 | /* | 755 | u64 __init get_max_mapped(void) |
1015 | * Find a free area with specified alignment in a specific range. | ||
1016 | */ | ||
1017 | u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) | ||
1018 | { | 756 | { |
1019 | int i; | 757 | u64 end = max_pfn_mapped; |
1020 | 758 | ||
1021 | for (i = 0; i < e820.nr_map; i++) { | 759 | end <<= PAGE_SHIFT; |
1022 | struct e820entry *ei = &e820.map[i]; | ||
1023 | u64 addr, last; | ||
1024 | u64 ei_last; | ||
1025 | 760 | ||
1026 | if (ei->type != E820_RAM) | 761 | return end; |
1027 | continue; | ||
1028 | addr = round_up(ei->addr, align); | ||
1029 | ei_last = ei->addr + ei->size; | ||
1030 | if (addr < start) | ||
1031 | addr = round_up(start, align); | ||
1032 | if (addr >= ei_last) | ||
1033 | continue; | ||
1034 | while (bad_addr(&addr, size, align) && addr+size <= ei_last) | ||
1035 | ; | ||
1036 | last = addr + size; | ||
1037 | if (last > ei_last) | ||
1038 | continue; | ||
1039 | if (last > end) | ||
1040 | continue; | ||
1041 | return addr; | ||
1042 | } | ||
1043 | return -1ULL; | ||
1044 | } | 762 | } |
1045 | |||
1046 | /* | 763 | /* |
1047 | * Find next free range after *start | 764 | * Find next free range after *start |
1048 | */ | 765 | */ |
@@ -1052,25 +769,19 @@ u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) | |||
1052 | 769 | ||
1053 | for (i = 0; i < e820.nr_map; i++) { | 770 | for (i = 0; i < e820.nr_map; i++) { |
1054 | struct e820entry *ei = &e820.map[i]; | 771 | struct e820entry *ei = &e820.map[i]; |
1055 | u64 addr, last; | 772 | u64 addr; |
1056 | u64 ei_last; | 773 | u64 ei_start, ei_last; |
1057 | 774 | ||
1058 | if (ei->type != E820_RAM) | 775 | if (ei->type != E820_RAM) |
1059 | continue; | 776 | continue; |
1060 | addr = round_up(ei->addr, align); | 777 | |
1061 | ei_last = ei->addr + ei->size; | 778 | ei_last = ei->addr + ei->size; |
1062 | if (addr < start) | 779 | ei_start = ei->addr; |
1063 | addr = round_up(start, align); | 780 | addr = find_early_area_size(ei_start, ei_last, start, |
1064 | if (addr >= ei_last) | 781 | sizep, align); |
1065 | continue; | 782 | |
1066 | *sizep = ei_last - addr; | 783 | if (addr != -1ULL) |
1067 | while (bad_addr_size(&addr, sizep, align) && | 784 | return addr; |
1068 | addr + *sizep <= ei_last) | ||
1069 | ; | ||
1070 | last = addr + *sizep; | ||
1071 | if (last > ei_last) | ||
1072 | continue; | ||
1073 | return addr; | ||
1074 | } | 785 | } |
1075 | 786 | ||
1076 | return -1ULL; | 787 | return -1ULL; |
@@ -1429,6 +1140,8 @@ void __init e820_reserve_resources_late(void) | |||
1429 | end = MAX_RESOURCE_SIZE; | 1140 | end = MAX_RESOURCE_SIZE; |
1430 | if (start >= end) | 1141 | if (start >= end) |
1431 | continue; | 1142 | continue; |
1143 | printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ", | ||
1144 | start, end); | ||
1432 | reserve_region_with_split(&iomem_resource, start, end, | 1145 | reserve_region_with_split(&iomem_resource, start, end, |
1433 | "RAM buffer"); | 1146 | "RAM buffer"); |
1434 | } | 1147 | } |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 5051b94c9069..adedeef1dedc 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -29,6 +29,16 @@ static void __init i386_default_early_setup(void) | |||
29 | 29 | ||
30 | void __init i386_start_kernel(void) | 30 | void __init i386_start_kernel(void) |
31 | { | 31 | { |
32 | #ifdef CONFIG_X86_TRAMPOLINE | ||
33 | /* | ||
34 | * But first pinch a few for the stack/trampoline stuff | ||
35 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
36 | * trampoline before removing it. (see the GDT stuff) | ||
37 | */ | ||
38 | reserve_early_overlap_ok(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, | ||
39 | "EX TRAMPOLINE"); | ||
40 | #endif | ||
41 | |||
32 | reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); | 42 | reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); |
33 | 43 | ||
34 | #ifdef CONFIG_BLK_DEV_INITRD | 44 | #ifdef CONFIG_BLK_DEV_INITRD |
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index 712d15fdc416..71825806cd44 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c | |||
@@ -7,6 +7,8 @@ | |||
7 | #include <linux/string.h> | 7 | #include <linux/string.h> |
8 | #include <linux/pci.h> | 8 | #include <linux/pci.h> |
9 | #include <linux/dmi.h> | 9 | #include <linux/dmi.h> |
10 | #include <linux/range.h> | ||
11 | |||
10 | #include <asm/pci-direct.h> | 12 | #include <asm/pci-direct.h> |
11 | #include <linux/sort.h> | 13 | #include <linux/sort.h> |
12 | #include <asm/io.h> | 14 | #include <asm/io.h> |
@@ -30,11 +32,6 @@ static struct pci_hostbridge_probe pci_probes[] __cpuinitdata = { | |||
30 | { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 }, | 32 | { 0xff, 0, PCI_VENDOR_ID_AMD, 0x1200 }, |
31 | }; | 33 | }; |
32 | 34 | ||
33 | struct range { | ||
34 | u64 start; | ||
35 | u64 end; | ||
36 | }; | ||
37 | |||
38 | static int __cpuinit cmp_range(const void *x1, const void *x2) | 35 | static int __cpuinit cmp_range(const void *x1, const void *x2) |
39 | { | 36 | { |
40 | const struct range *r1 = x1; | 37 | const struct range *r1 = x1; |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 75e14e21f61a..1aa966c565f9 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -65,7 +65,7 @@ int dma_set_mask(struct device *dev, u64 mask) | |||
65 | } | 65 | } |
66 | EXPORT_SYMBOL(dma_set_mask); | 66 | EXPORT_SYMBOL(dma_set_mask); |
67 | 67 | ||
68 | #ifdef CONFIG_X86_64 | 68 | #if defined(CONFIG_X86_64) && !defined(CONFIG_NUMA) |
69 | static __initdata void *dma32_bootmem_ptr; | 69 | static __initdata void *dma32_bootmem_ptr; |
70 | static unsigned long dma32_bootmem_size __initdata = (128ULL<<20); | 70 | static unsigned long dma32_bootmem_size __initdata = (128ULL<<20); |
71 | 71 | ||
@@ -116,14 +116,21 @@ static void __init dma32_free_bootmem(void) | |||
116 | dma32_bootmem_ptr = NULL; | 116 | dma32_bootmem_ptr = NULL; |
117 | dma32_bootmem_size = 0; | 117 | dma32_bootmem_size = 0; |
118 | } | 118 | } |
119 | #else | ||
120 | void __init dma32_reserve_bootmem(void) | ||
121 | { | ||
122 | } | ||
123 | static void __init dma32_free_bootmem(void) | ||
124 | { | ||
125 | } | ||
126 | |||
119 | #endif | 127 | #endif |
120 | 128 | ||
121 | void __init pci_iommu_alloc(void) | 129 | void __init pci_iommu_alloc(void) |
122 | { | 130 | { |
123 | #ifdef CONFIG_X86_64 | ||
124 | /* free the range so iommu could get some range less than 4G */ | 131 | /* free the range so iommu could get some range less than 4G */ |
125 | dma32_free_bootmem(); | 132 | dma32_free_bootmem(); |
126 | #endif | 133 | |
127 | if (pci_swiotlb_detect()) | 134 | if (pci_swiotlb_detect()) |
128 | goto out; | 135 | goto out; |
129 | 136 | ||
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index cb42109a55b4..5d7ba1a449bd 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -969,15 +969,11 @@ void __init setup_arch(char **cmdline_p) | |||
969 | #endif | 969 | #endif |
970 | 970 | ||
971 | initmem_init(0, max_pfn, acpi, k8); | 971 | initmem_init(0, max_pfn, acpi, k8); |
972 | #ifndef CONFIG_NO_BOOTMEM | ||
973 | early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT); | ||
974 | #endif | ||
972 | 975 | ||
973 | #ifdef CONFIG_X86_64 | ||
974 | /* | ||
975 | * dma32_reserve_bootmem() allocates bootmem which may conflict | ||
976 | * with the crashkernel command line, so do that after | ||
977 | * reserve_crashkernel() | ||
978 | */ | ||
979 | dma32_reserve_bootmem(); | 976 | dma32_reserve_bootmem(); |
980 | #endif | ||
981 | 977 | ||
982 | reserve_ibft_region(); | 978 | reserve_ibft_region(); |
983 | 979 | ||
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 35abcb8b00e9..ef6370b00e70 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -137,7 +137,13 @@ static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) | |||
137 | 137 | ||
138 | static void __init pcpu_fc_free(void *ptr, size_t size) | 138 | static void __init pcpu_fc_free(void *ptr, size_t size) |
139 | { | 139 | { |
140 | #ifdef CONFIG_NO_BOOTMEM | ||
141 | u64 start = __pa(ptr); | ||
142 | u64 end = start + size; | ||
143 | free_early_partial(start, end); | ||
144 | #else | ||
140 | free_bootmem(__pa(ptr), size); | 145 | free_bootmem(__pa(ptr), size); |
146 | #endif | ||
141 | } | 147 | } |
142 | 148 | ||
143 | static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) | 149 | static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 2226f2c70ea3..5cb3f0f54f47 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -750,6 +750,7 @@ static void __init zone_sizes_init(void) | |||
750 | free_area_init_nodes(max_zone_pfns); | 750 | free_area_init_nodes(max_zone_pfns); |
751 | } | 751 | } |
752 | 752 | ||
753 | #ifndef CONFIG_NO_BOOTMEM | ||
753 | static unsigned long __init setup_node_bootmem(int nodeid, | 754 | static unsigned long __init setup_node_bootmem(int nodeid, |
754 | unsigned long start_pfn, | 755 | unsigned long start_pfn, |
755 | unsigned long end_pfn, | 756 | unsigned long end_pfn, |
@@ -766,13 +767,14 @@ static unsigned long __init setup_node_bootmem(int nodeid, | |||
766 | printk(KERN_INFO " node %d bootmap %08lx - %08lx\n", | 767 | printk(KERN_INFO " node %d bootmap %08lx - %08lx\n", |
767 | nodeid, bootmap, bootmap + bootmap_size); | 768 | nodeid, bootmap, bootmap + bootmap_size); |
768 | free_bootmem_with_active_regions(nodeid, end_pfn); | 769 | free_bootmem_with_active_regions(nodeid, end_pfn); |
769 | early_res_to_bootmem(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); | ||
770 | 770 | ||
771 | return bootmap + bootmap_size; | 771 | return bootmap + bootmap_size; |
772 | } | 772 | } |
773 | #endif | ||
773 | 774 | ||
774 | void __init setup_bootmem_allocator(void) | 775 | void __init setup_bootmem_allocator(void) |
775 | { | 776 | { |
777 | #ifndef CONFIG_NO_BOOTMEM | ||
776 | int nodeid; | 778 | int nodeid; |
777 | unsigned long bootmap_size, bootmap; | 779 | unsigned long bootmap_size, bootmap; |
778 | /* | 780 | /* |
@@ -784,11 +786,13 @@ void __init setup_bootmem_allocator(void) | |||
784 | if (bootmap == -1L) | 786 | if (bootmap == -1L) |
785 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); | 787 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); |
786 | reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); | 788 | reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); |
789 | #endif | ||
787 | 790 | ||
788 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", | 791 | printk(KERN_INFO " mapped low ram: 0 - %08lx\n", |
789 | max_pfn_mapped<<PAGE_SHIFT); | 792 | max_pfn_mapped<<PAGE_SHIFT); |
790 | printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); | 793 | printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT); |
791 | 794 | ||
795 | #ifndef CONFIG_NO_BOOTMEM | ||
792 | for_each_online_node(nodeid) { | 796 | for_each_online_node(nodeid) { |
793 | unsigned long start_pfn, end_pfn; | 797 | unsigned long start_pfn, end_pfn; |
794 | 798 | ||
@@ -806,6 +810,7 @@ void __init setup_bootmem_allocator(void) | |||
806 | bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn, | 810 | bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn, |
807 | bootmap); | 811 | bootmap); |
808 | } | 812 | } |
813 | #endif | ||
809 | 814 | ||
810 | after_bootmem = 1; | 815 | after_bootmem = 1; |
811 | } | 816 | } |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 69ddfbd91135..e9b040e1cde5 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -572,6 +572,7 @@ kernel_physical_mapping_init(unsigned long start, | |||
572 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, | 572 | void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, |
573 | int acpi, int k8) | 573 | int acpi, int k8) |
574 | { | 574 | { |
575 | #ifndef CONFIG_NO_BOOTMEM | ||
575 | unsigned long bootmap_size, bootmap; | 576 | unsigned long bootmap_size, bootmap; |
576 | 577 | ||
577 | bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; | 578 | bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; |
@@ -579,13 +580,15 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, | |||
579 | PAGE_SIZE); | 580 | PAGE_SIZE); |
580 | if (bootmap == -1L) | 581 | if (bootmap == -1L) |
581 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); | 582 | panic("Cannot find bootmem map of size %ld\n", bootmap_size); |
583 | reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); | ||
582 | /* don't touch min_low_pfn */ | 584 | /* don't touch min_low_pfn */ |
583 | bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT, | 585 | bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT, |
584 | 0, end_pfn); | 586 | 0, end_pfn); |
585 | e820_register_active_regions(0, start_pfn, end_pfn); | 587 | e820_register_active_regions(0, start_pfn, end_pfn); |
586 | free_bootmem_with_active_regions(0, end_pfn); | 588 | free_bootmem_with_active_regions(0, end_pfn); |
587 | early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); | 589 | #else |
588 | reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); | 590 | e820_register_active_regions(0, start_pfn, end_pfn); |
591 | #endif | ||
589 | } | 592 | } |
590 | #endif | 593 | #endif |
591 | 594 | ||
@@ -974,7 +977,7 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) | |||
974 | if (pmd_none(*pmd)) { | 977 | if (pmd_none(*pmd)) { |
975 | pte_t entry; | 978 | pte_t entry; |
976 | 979 | ||
977 | p = vmemmap_alloc_block(PMD_SIZE, node); | 980 | p = vmemmap_alloc_block_buf(PMD_SIZE, node); |
978 | if (!p) | 981 | if (!p) |
979 | return -ENOMEM; | 982 | return -ENOMEM; |
980 | 983 | ||
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index b20760ca7244..809baaaf48b1 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -418,7 +418,10 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, | |||
418 | 418 | ||
419 | for_each_online_node(nid) { | 419 | for_each_online_node(nid) { |
420 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); | 420 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); |
421 | NODE_DATA(nid)->node_id = nid; | ||
422 | #ifndef CONFIG_NO_BOOTMEM | ||
421 | NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; | 423 | NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; |
424 | #endif | ||
422 | } | 425 | } |
423 | 426 | ||
424 | setup_bootmem_allocator(); | 427 | setup_bootmem_allocator(); |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 3307ea8bd43a..8948f47fde05 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -163,30 +163,48 @@ static void * __init early_node_mem(int nodeid, unsigned long start, | |||
163 | unsigned long end, unsigned long size, | 163 | unsigned long end, unsigned long size, |
164 | unsigned long align) | 164 | unsigned long align) |
165 | { | 165 | { |
166 | unsigned long mem = find_e820_area(start, end, size, align); | 166 | unsigned long mem; |
167 | void *ptr; | ||
168 | 167 | ||
168 | /* | ||
169 | * put it on high as possible | ||
170 | * something will go with NODE_DATA | ||
171 | */ | ||
172 | if (start < (MAX_DMA_PFN<<PAGE_SHIFT)) | ||
173 | start = MAX_DMA_PFN<<PAGE_SHIFT; | ||
174 | if (start < (MAX_DMA32_PFN<<PAGE_SHIFT) && | ||
175 | end > (MAX_DMA32_PFN<<PAGE_SHIFT)) | ||
176 | start = MAX_DMA32_PFN<<PAGE_SHIFT; | ||
177 | mem = find_e820_area(start, end, size, align); | ||
178 | if (mem != -1L) | ||
179 | return __va(mem); | ||
180 | |||
181 | /* extend the search scope */ | ||
182 | end = max_pfn_mapped << PAGE_SHIFT; | ||
183 | if (end > (MAX_DMA32_PFN<<PAGE_SHIFT)) | ||
184 | start = MAX_DMA32_PFN<<PAGE_SHIFT; | ||
185 | else | ||
186 | start = MAX_DMA_PFN<<PAGE_SHIFT; | ||
187 | mem = find_e820_area(start, end, size, align); | ||
169 | if (mem != -1L) | 188 | if (mem != -1L) |
170 | return __va(mem); | 189 | return __va(mem); |
171 | 190 | ||
172 | ptr = __alloc_bootmem_nopanic(size, align, __pa(MAX_DMA_ADDRESS)); | 191 | printk(KERN_ERR "Cannot find %lu bytes in node %d\n", |
173 | if (ptr == NULL) { | ||
174 | printk(KERN_ERR "Cannot find %lu bytes in node %d\n", | ||
175 | size, nodeid); | 192 | size, nodeid); |
176 | return NULL; | 193 | |
177 | } | 194 | return NULL; |
178 | return ptr; | ||
179 | } | 195 | } |
180 | 196 | ||
181 | /* Initialize bootmem allocator for a node */ | 197 | /* Initialize bootmem allocator for a node */ |
182 | void __init | 198 | void __init |
183 | setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | 199 | setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) |
184 | { | 200 | { |
185 | unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; | 201 | unsigned long start_pfn, last_pfn, nodedata_phys; |
186 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | 202 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); |
187 | unsigned long bootmap_start, nodedata_phys; | ||
188 | void *bootmap; | ||
189 | int nid; | 203 | int nid; |
204 | #ifndef CONFIG_NO_BOOTMEM | ||
205 | unsigned long bootmap_start, bootmap_pages, bootmap_size; | ||
206 | void *bootmap; | ||
207 | #endif | ||
190 | 208 | ||
191 | if (!end) | 209 | if (!end) |
192 | return; | 210 | return; |
@@ -200,7 +218,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
200 | 218 | ||
201 | start = roundup(start, ZONE_ALIGN); | 219 | start = roundup(start, ZONE_ALIGN); |
202 | 220 | ||
203 | printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, | 221 | printk(KERN_INFO "Initmem setup node %d %016lx-%016lx\n", nodeid, |
204 | start, end); | 222 | start, end); |
205 | 223 | ||
206 | start_pfn = start >> PAGE_SHIFT; | 224 | start_pfn = start >> PAGE_SHIFT; |
@@ -211,14 +229,21 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
211 | if (node_data[nodeid] == NULL) | 229 | if (node_data[nodeid] == NULL) |
212 | return; | 230 | return; |
213 | nodedata_phys = __pa(node_data[nodeid]); | 231 | nodedata_phys = __pa(node_data[nodeid]); |
232 | reserve_early(nodedata_phys, nodedata_phys + pgdat_size, "NODE_DATA"); | ||
214 | printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys, | 233 | printk(KERN_INFO " NODE_DATA [%016lx - %016lx]\n", nodedata_phys, |
215 | nodedata_phys + pgdat_size - 1); | 234 | nodedata_phys + pgdat_size - 1); |
235 | nid = phys_to_nid(nodedata_phys); | ||
236 | if (nid != nodeid) | ||
237 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); | ||
216 | 238 | ||
217 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); | 239 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); |
218 | NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; | 240 | NODE_DATA(nodeid)->node_id = nodeid; |
219 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; | 241 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; |
220 | NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; | 242 | NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; |
221 | 243 | ||
244 | #ifndef CONFIG_NO_BOOTMEM | ||
245 | NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; | ||
246 | |||
222 | /* | 247 | /* |
223 | * Find a place for the bootmem map | 248 | * Find a place for the bootmem map |
224 | * nodedata_phys could be on other nodes by alloc_bootmem, | 249 | * nodedata_phys could be on other nodes by alloc_bootmem, |
@@ -227,11 +252,7 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
227 | * of alloc_bootmem, that could clash with reserved range | 252 | * of alloc_bootmem, that could clash with reserved range |
228 | */ | 253 | */ |
229 | bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn); | 254 | bootmap_pages = bootmem_bootmap_pages(last_pfn - start_pfn); |
230 | nid = phys_to_nid(nodedata_phys); | 255 | bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE); |
231 | if (nid == nodeid) | ||
232 | bootmap_start = roundup(nodedata_phys + pgdat_size, PAGE_SIZE); | ||
233 | else | ||
234 | bootmap_start = roundup(start, PAGE_SIZE); | ||
235 | /* | 256 | /* |
236 | * SMP_CACHE_BYTES could be enough, but init_bootmem_node like | 257 | * SMP_CACHE_BYTES could be enough, but init_bootmem_node like |
237 | * to use that to align to PAGE_SIZE | 258 | * to use that to align to PAGE_SIZE |
@@ -239,18 +260,13 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
239 | bootmap = early_node_mem(nodeid, bootmap_start, end, | 260 | bootmap = early_node_mem(nodeid, bootmap_start, end, |
240 | bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); | 261 | bootmap_pages<<PAGE_SHIFT, PAGE_SIZE); |
241 | if (bootmap == NULL) { | 262 | if (bootmap == NULL) { |
242 | if (nodedata_phys < start || nodedata_phys >= end) { | 263 | free_early(nodedata_phys, nodedata_phys + pgdat_size); |
243 | /* | ||
244 | * only need to free it if it is from other node | ||
245 | * bootmem | ||
246 | */ | ||
247 | if (nid != nodeid) | ||
248 | free_bootmem(nodedata_phys, pgdat_size); | ||
249 | } | ||
250 | node_data[nodeid] = NULL; | 264 | node_data[nodeid] = NULL; |
251 | return; | 265 | return; |
252 | } | 266 | } |
253 | bootmap_start = __pa(bootmap); | 267 | bootmap_start = __pa(bootmap); |
268 | reserve_early(bootmap_start, bootmap_start+(bootmap_pages<<PAGE_SHIFT), | ||
269 | "BOOTMAP"); | ||
254 | 270 | ||
255 | bootmap_size = init_bootmem_node(NODE_DATA(nodeid), | 271 | bootmap_size = init_bootmem_node(NODE_DATA(nodeid), |
256 | bootmap_start >> PAGE_SHIFT, | 272 | bootmap_start >> PAGE_SHIFT, |
@@ -259,31 +275,12 @@ setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) | |||
259 | printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n", | 275 | printk(KERN_INFO " bootmap [%016lx - %016lx] pages %lx\n", |
260 | bootmap_start, bootmap_start + bootmap_size - 1, | 276 | bootmap_start, bootmap_start + bootmap_size - 1, |
261 | bootmap_pages); | 277 | bootmap_pages); |
262 | |||
263 | free_bootmem_with_active_regions(nodeid, end); | ||
264 | |||
265 | /* | ||
266 | * convert early reserve to bootmem reserve earlier | ||
267 | * otherwise early_node_mem could use early reserved mem | ||
268 | * on previous node | ||
269 | */ | ||
270 | early_res_to_bootmem(start, end); | ||
271 | |||
272 | /* | ||
273 | * in some case early_node_mem could use alloc_bootmem | ||
274 | * to get range on other node, don't reserve that again | ||
275 | */ | ||
276 | if (nid != nodeid) | ||
277 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); | ||
278 | else | ||
279 | reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, | ||
280 | pgdat_size, BOOTMEM_DEFAULT); | ||
281 | nid = phys_to_nid(bootmap_start); | 278 | nid = phys_to_nid(bootmap_start); |
282 | if (nid != nodeid) | 279 | if (nid != nodeid) |
283 | printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); | 280 | printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); |
284 | else | 281 | |
285 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, | 282 | free_bootmem_with_active_regions(nodeid, end); |
286 | bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); | 283 | #endif |
287 | 284 | ||
288 | node_set_online(nodeid); | 285 | node_set_online(nodeid); |
289 | } | 286 | } |
@@ -709,6 +706,10 @@ unsigned long __init numa_free_all_bootmem(void) | |||
709 | for_each_online_node(i) | 706 | for_each_online_node(i) |
710 | pages += free_all_bootmem_node(NODE_DATA(i)); | 707 | pages += free_all_bootmem_node(NODE_DATA(i)); |
711 | 708 | ||
709 | #ifdef CONFIG_NO_BOOTMEM | ||
710 | pages += free_all_memory_core_early(MAX_NUMNODES); | ||
711 | #endif | ||
712 | |||
712 | return pages; | 713 | return pages; |
713 | } | 714 | } |
714 | 715 | ||
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index 39fba37f702f..0b7d3e9593e1 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile | |||
@@ -14,8 +14,7 @@ obj-$(CONFIG_X86_VISWS) += visws.o | |||
14 | obj-$(CONFIG_X86_NUMAQ) += numaq_32.o | 14 | obj-$(CONFIG_X86_NUMAQ) += numaq_32.o |
15 | 15 | ||
16 | obj-y += common.o early.o | 16 | obj-y += common.o early.o |
17 | obj-y += amd_bus.o | 17 | obj-y += amd_bus.o bus_numa.o |
18 | obj-$(CONFIG_X86_64) += bus_numa.o | ||
19 | 18 | ||
20 | ifeq ($(CONFIG_PCI_DEBUG),y) | 19 | ifeq ($(CONFIG_PCI_DEBUG),y) |
21 | EXTRA_CFLAGS += -DDEBUG | 20 | EXTRA_CFLAGS += -DDEBUG |
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 95ecbd495955..fc1e8fe07e5c 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c | |||
@@ -2,11 +2,11 @@ | |||
2 | #include <linux/pci.h> | 2 | #include <linux/pci.h> |
3 | #include <linux/topology.h> | 3 | #include <linux/topology.h> |
4 | #include <linux/cpu.h> | 4 | #include <linux/cpu.h> |
5 | #include <linux/range.h> | ||
6 | |||
5 | #include <asm/pci_x86.h> | 7 | #include <asm/pci_x86.h> |
6 | 8 | ||
7 | #ifdef CONFIG_X86_64 | ||
8 | #include <asm/pci-direct.h> | 9 | #include <asm/pci-direct.h> |
9 | #endif | ||
10 | 10 | ||
11 | #include "bus_numa.h" | 11 | #include "bus_numa.h" |
12 | 12 | ||
@@ -15,60 +15,6 @@ | |||
15 | * also get peer root bus resource for io,mmio | 15 | * also get peer root bus resource for io,mmio |
16 | */ | 16 | */ |
17 | 17 | ||
18 | #ifdef CONFIG_X86_64 | ||
19 | |||
20 | #define RANGE_NUM 16 | ||
21 | |||
22 | struct res_range { | ||
23 | size_t start; | ||
24 | size_t end; | ||
25 | }; | ||
26 | |||
27 | static void __init update_range(struct res_range *range, size_t start, | ||
28 | size_t end) | ||
29 | { | ||
30 | int i; | ||
31 | int j; | ||
32 | |||
33 | for (j = 0; j < RANGE_NUM; j++) { | ||
34 | if (!range[j].end) | ||
35 | continue; | ||
36 | |||
37 | if (start <= range[j].start && end >= range[j].end) { | ||
38 | range[j].start = 0; | ||
39 | range[j].end = 0; | ||
40 | continue; | ||
41 | } | ||
42 | |||
43 | if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) { | ||
44 | range[j].start = end + 1; | ||
45 | continue; | ||
46 | } | ||
47 | |||
48 | |||
49 | if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) { | ||
50 | range[j].end = start - 1; | ||
51 | continue; | ||
52 | } | ||
53 | |||
54 | if (start > range[j].start && end < range[j].end) { | ||
55 | /* find the new spare */ | ||
56 | for (i = 0; i < RANGE_NUM; i++) { | ||
57 | if (range[i].end == 0) | ||
58 | break; | ||
59 | } | ||
60 | if (i < RANGE_NUM) { | ||
61 | range[i].end = range[j].end; | ||
62 | range[i].start = end + 1; | ||
63 | } else { | ||
64 | printk(KERN_ERR "run of slot in ranges\n"); | ||
65 | } | ||
66 | range[j].end = start - 1; | ||
67 | continue; | ||
68 | } | ||
69 | } | ||
70 | } | ||
71 | |||
72 | struct pci_hostbridge_probe { | 18 | struct pci_hostbridge_probe { |
73 | u32 bus; | 19 | u32 bus; |
74 | u32 slot; | 20 | u32 slot; |
@@ -111,6 +57,8 @@ static void __init get_pci_mmcfg_amd_fam10h_range(void) | |||
111 | fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1; | 57 | fam10h_mmconf_end = base + (1ULL<<(segn_busn_bits + 20)) - 1; |
112 | } | 58 | } |
113 | 59 | ||
60 | #define RANGE_NUM 16 | ||
61 | |||
114 | /** | 62 | /** |
115 | * early_fill_mp_bus_to_node() | 63 | * early_fill_mp_bus_to_node() |
116 | * called before pcibios_scan_root and pci_scan_bus | 64 | * called before pcibios_scan_root and pci_scan_bus |
@@ -130,16 +78,17 @@ static int __init early_fill_mp_bus_info(void) | |||
130 | struct pci_root_info *info; | 78 | struct pci_root_info *info; |
131 | u32 reg; | 79 | u32 reg; |
132 | struct resource *res; | 80 | struct resource *res; |
133 | size_t start; | 81 | u64 start; |
134 | size_t end; | 82 | u64 end; |
135 | struct res_range range[RANGE_NUM]; | 83 | struct range range[RANGE_NUM]; |
136 | u64 val; | 84 | u64 val; |
137 | u32 address; | 85 | u32 address; |
86 | bool found; | ||
138 | 87 | ||
139 | if (!early_pci_allowed()) | 88 | if (!early_pci_allowed()) |
140 | return -1; | 89 | return -1; |
141 | 90 | ||
142 | found_all_numa_early = 0; | 91 | found = false; |
143 | for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { | 92 | for (i = 0; i < ARRAY_SIZE(pci_probes); i++) { |
144 | u32 id; | 93 | u32 id; |
145 | u16 device; | 94 | u16 device; |
@@ -153,12 +102,12 @@ static int __init early_fill_mp_bus_info(void) | |||
153 | device = (id>>16) & 0xffff; | 102 | device = (id>>16) & 0xffff; |
154 | if (pci_probes[i].vendor == vendor && | 103 | if (pci_probes[i].vendor == vendor && |
155 | pci_probes[i].device == device) { | 104 | pci_probes[i].device == device) { |
156 | found_all_numa_early = 1; | 105 | found = true; |
157 | break; | 106 | break; |
158 | } | 107 | } |
159 | } | 108 | } |
160 | 109 | ||
161 | if (!found_all_numa_early) | 110 | if (!found) |
162 | return 0; | 111 | return 0; |
163 | 112 | ||
164 | pci_root_num = 0; | 113 | pci_root_num = 0; |
@@ -196,7 +145,7 @@ static int __init early_fill_mp_bus_info(void) | |||
196 | def_link = (reg >> 8) & 0x03; | 145 | def_link = (reg >> 8) & 0x03; |
197 | 146 | ||
198 | memset(range, 0, sizeof(range)); | 147 | memset(range, 0, sizeof(range)); |
199 | range[0].end = 0xffff; | 148 | add_range(range, RANGE_NUM, 0, 0, 0xffff + 1); |
200 | /* io port resource */ | 149 | /* io port resource */ |
201 | for (i = 0; i < 4; i++) { | 150 | for (i = 0; i < 4; i++) { |
202 | reg = read_pci_config(bus, slot, 1, 0xc0 + (i << 3)); | 151 | reg = read_pci_config(bus, slot, 1, 0xc0 + (i << 3)); |
@@ -220,13 +169,13 @@ static int __init early_fill_mp_bus_info(void) | |||
220 | 169 | ||
221 | info = &pci_root_info[j]; | 170 | info = &pci_root_info[j]; |
222 | printk(KERN_DEBUG "node %d link %d: io port [%llx, %llx]\n", | 171 | printk(KERN_DEBUG "node %d link %d: io port [%llx, %llx]\n", |
223 | node, link, (u64)start, (u64)end); | 172 | node, link, start, end); |
224 | 173 | ||
225 | /* kernel only handle 16 bit only */ | 174 | /* kernel only handle 16 bit only */ |
226 | if (end > 0xffff) | 175 | if (end > 0xffff) |
227 | end = 0xffff; | 176 | end = 0xffff; |
228 | update_res(info, start, end, IORESOURCE_IO, 1); | 177 | update_res(info, start, end, IORESOURCE_IO, 1); |
229 | update_range(range, start, end); | 178 | subtract_range(range, RANGE_NUM, start, end + 1); |
230 | } | 179 | } |
231 | /* add left over io port range to def node/link, [0, 0xffff] */ | 180 | /* add left over io port range to def node/link, [0, 0xffff] */ |
232 | /* find the position */ | 181 | /* find the position */ |
@@ -241,29 +190,32 @@ static int __init early_fill_mp_bus_info(void) | |||
241 | if (!range[i].end) | 190 | if (!range[i].end) |
242 | continue; | 191 | continue; |
243 | 192 | ||
244 | update_res(info, range[i].start, range[i].end, | 193 | update_res(info, range[i].start, range[i].end - 1, |
245 | IORESOURCE_IO, 1); | 194 | IORESOURCE_IO, 1); |
246 | } | 195 | } |
247 | } | 196 | } |
248 | 197 | ||
249 | memset(range, 0, sizeof(range)); | 198 | memset(range, 0, sizeof(range)); |
250 | /* 0xfd00000000-0xffffffffff for HT */ | 199 | /* 0xfd00000000-0xffffffffff for HT */ |
251 | range[0].end = (0xfdULL<<32) - 1; | 200 | end = cap_resource((0xfdULL<<32) - 1); |
201 | end++; | ||
202 | add_range(range, RANGE_NUM, 0, 0, end); | ||
252 | 203 | ||
253 | /* need to take out [0, TOM) for RAM*/ | 204 | /* need to take out [0, TOM) for RAM*/ |
254 | address = MSR_K8_TOP_MEM1; | 205 | address = MSR_K8_TOP_MEM1; |
255 | rdmsrl(address, val); | 206 | rdmsrl(address, val); |
256 | end = (val & 0xffffff800000ULL); | 207 | end = (val & 0xffffff800000ULL); |
257 | printk(KERN_INFO "TOM: %016lx aka %ldM\n", end, end>>20); | 208 | printk(KERN_INFO "TOM: %016llx aka %lldM\n", end, end>>20); |
258 | if (end < (1ULL<<32)) | 209 | if (end < (1ULL<<32)) |
259 | update_range(range, 0, end - 1); | 210 | subtract_range(range, RANGE_NUM, 0, end); |
260 | 211 | ||
261 | /* get mmconfig */ | 212 | /* get mmconfig */ |
262 | get_pci_mmcfg_amd_fam10h_range(); | 213 | get_pci_mmcfg_amd_fam10h_range(); |
263 | /* need to take out mmconf range */ | 214 | /* need to take out mmconf range */ |
264 | if (fam10h_mmconf_end) { | 215 | if (fam10h_mmconf_end) { |
265 | printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end); | 216 | printk(KERN_DEBUG "Fam 10h mmconf [%llx, %llx]\n", fam10h_mmconf_start, fam10h_mmconf_end); |
266 | update_range(range, fam10h_mmconf_start, fam10h_mmconf_end); | 217 | subtract_range(range, RANGE_NUM, fam10h_mmconf_start, |
218 | fam10h_mmconf_end + 1); | ||
267 | } | 219 | } |
268 | 220 | ||
269 | /* mmio resource */ | 221 | /* mmio resource */ |
@@ -293,7 +245,7 @@ static int __init early_fill_mp_bus_info(void) | |||
293 | info = &pci_root_info[j]; | 245 | info = &pci_root_info[j]; |
294 | 246 | ||
295 | printk(KERN_DEBUG "node %d link %d: mmio [%llx, %llx]", | 247 | printk(KERN_DEBUG "node %d link %d: mmio [%llx, %llx]", |
296 | node, link, (u64)start, (u64)end); | 248 | node, link, start, end); |
297 | /* | 249 | /* |
298 | * some sick allocation would have range overlap with fam10h | 250 | * some sick allocation would have range overlap with fam10h |
299 | * mmconf range, so need to update start and end. | 251 | * mmconf range, so need to update start and end. |
@@ -318,14 +270,15 @@ static int __init early_fill_mp_bus_info(void) | |||
318 | /* we got a hole */ | 270 | /* we got a hole */ |
319 | endx = fam10h_mmconf_start - 1; | 271 | endx = fam10h_mmconf_start - 1; |
320 | update_res(info, start, endx, IORESOURCE_MEM, 0); | 272 | update_res(info, start, endx, IORESOURCE_MEM, 0); |
321 | update_range(range, start, endx); | 273 | subtract_range(range, RANGE_NUM, start, |
322 | printk(KERN_CONT " ==> [%llx, %llx]", (u64)start, endx); | 274 | endx + 1); |
275 | printk(KERN_CONT " ==> [%llx, %llx]", start, endx); | ||
323 | start = fam10h_mmconf_end + 1; | 276 | start = fam10h_mmconf_end + 1; |
324 | changed = 1; | 277 | changed = 1; |
325 | } | 278 | } |
326 | if (changed) { | 279 | if (changed) { |
327 | if (start <= end) { | 280 | if (start <= end) { |
328 | printk(KERN_CONT " %s [%llx, %llx]", endx?"and":"==>", (u64)start, (u64)end); | 281 | printk(KERN_CONT " %s [%llx, %llx]", endx ? "and" : "==>", start, end); |
329 | } else { | 282 | } else { |
330 | printk(KERN_CONT "%s\n", endx?"":" ==> none"); | 283 | printk(KERN_CONT "%s\n", endx?"":" ==> none"); |
331 | continue; | 284 | continue; |
@@ -333,8 +286,9 @@ static int __init early_fill_mp_bus_info(void) | |||
333 | } | 286 | } |
334 | } | 287 | } |
335 | 288 | ||
336 | update_res(info, start, end, IORESOURCE_MEM, 1); | 289 | update_res(info, cap_resource(start), cap_resource(end), |
337 | update_range(range, start, end); | 290 | IORESOURCE_MEM, 1); |
291 | subtract_range(range, RANGE_NUM, start, end + 1); | ||
338 | printk(KERN_CONT "\n"); | 292 | printk(KERN_CONT "\n"); |
339 | } | 293 | } |
340 | 294 | ||
@@ -348,8 +302,8 @@ static int __init early_fill_mp_bus_info(void) | |||
348 | address = MSR_K8_TOP_MEM2; | 302 | address = MSR_K8_TOP_MEM2; |
349 | rdmsrl(address, val); | 303 | rdmsrl(address, val); |
350 | end = (val & 0xffffff800000ULL); | 304 | end = (val & 0xffffff800000ULL); |
351 | printk(KERN_INFO "TOM2: %016lx aka %ldM\n", end, end>>20); | 305 | printk(KERN_INFO "TOM2: %016llx aka %lldM\n", end, end>>20); |
352 | update_range(range, 1ULL<<32, end - 1); | 306 | subtract_range(range, RANGE_NUM, 1ULL<<32, end); |
353 | } | 307 | } |
354 | 308 | ||
355 | /* | 309 | /* |
@@ -368,7 +322,8 @@ static int __init early_fill_mp_bus_info(void) | |||
368 | if (!range[i].end) | 322 | if (!range[i].end) |
369 | continue; | 323 | continue; |
370 | 324 | ||
371 | update_res(info, range[i].start, range[i].end, | 325 | update_res(info, cap_resource(range[i].start), |
326 | cap_resource(range[i].end - 1), | ||
372 | IORESOURCE_MEM, 1); | 327 | IORESOURCE_MEM, 1); |
373 | } | 328 | } |
374 | } | 329 | } |
@@ -384,24 +339,14 @@ static int __init early_fill_mp_bus_info(void) | |||
384 | info->bus_min, info->bus_max, info->node, info->link); | 339 | info->bus_min, info->bus_max, info->node, info->link); |
385 | for (j = 0; j < res_num; j++) { | 340 | for (j = 0; j < res_num; j++) { |
386 | res = &info->res[j]; | 341 | res = &info->res[j]; |
387 | printk(KERN_DEBUG "bus: %02x index %x %s: [%llx, %llx]\n", | 342 | printk(KERN_DEBUG "bus: %02x index %x %pR\n", |
388 | busnum, j, | 343 | busnum, j, res); |
389 | (res->flags & IORESOURCE_IO)?"io port":"mmio", | ||
390 | res->start, res->end); | ||
391 | } | 344 | } |
392 | } | 345 | } |
393 | 346 | ||
394 | return 0; | 347 | return 0; |
395 | } | 348 | } |
396 | 349 | ||
397 | #else /* !CONFIG_X86_64 */ | ||
398 | |||
399 | static int __init early_fill_mp_bus_info(void) { return 0; } | ||
400 | |||
401 | #endif /* !CONFIG_X86_64 */ | ||
402 | |||
403 | /* common 32/64 bit code */ | ||
404 | |||
405 | #define ENABLE_CF8_EXT_CFG (1ULL << 46) | 350 | #define ENABLE_CF8_EXT_CFG (1ULL << 46) |
406 | 351 | ||
407 | static void enable_pci_io_ecs(void *unused) | 352 | static void enable_pci_io_ecs(void *unused) |
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c index 12d54ff3654d..64a122883896 100644 --- a/arch/x86/pci/bus_numa.c +++ b/arch/x86/pci/bus_numa.c | |||
@@ -1,11 +1,11 @@ | |||
1 | #include <linux/init.h> | 1 | #include <linux/init.h> |
2 | #include <linux/pci.h> | 2 | #include <linux/pci.h> |
3 | #include <linux/range.h> | ||
3 | 4 | ||
4 | #include "bus_numa.h" | 5 | #include "bus_numa.h" |
5 | 6 | ||
6 | int pci_root_num; | 7 | int pci_root_num; |
7 | struct pci_root_info pci_root_info[PCI_ROOT_NR]; | 8 | struct pci_root_info pci_root_info[PCI_ROOT_NR]; |
8 | int found_all_numa_early; | ||
9 | 9 | ||
10 | void x86_pci_root_bus_res_quirks(struct pci_bus *b) | 10 | void x86_pci_root_bus_res_quirks(struct pci_bus *b) |
11 | { | 11 | { |
@@ -21,10 +21,6 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b) | |||
21 | if (!pci_root_num) | 21 | if (!pci_root_num) |
22 | return; | 22 | return; |
23 | 23 | ||
24 | /* for amd, if only one root bus, don't need to do anything */ | ||
25 | if (pci_root_num < 2 && found_all_numa_early) | ||
26 | return; | ||
27 | |||
28 | for (i = 0; i < pci_root_num; i++) { | 24 | for (i = 0; i < pci_root_num; i++) { |
29 | if (pci_root_info[i].bus_min == b->number) | 25 | if (pci_root_info[i].bus_min == b->number) |
30 | break; | 26 | break; |
@@ -52,8 +48,8 @@ void x86_pci_root_bus_res_quirks(struct pci_bus *b) | |||
52 | } | 48 | } |
53 | } | 49 | } |
54 | 50 | ||
55 | void __devinit update_res(struct pci_root_info *info, size_t start, | 51 | void __devinit update_res(struct pci_root_info *info, resource_size_t start, |
56 | size_t end, unsigned long flags, int merge) | 52 | resource_size_t end, unsigned long flags, int merge) |
57 | { | 53 | { |
58 | int i; | 54 | int i; |
59 | struct resource *res; | 55 | struct resource *res; |
@@ -61,25 +57,28 @@ void __devinit update_res(struct pci_root_info *info, size_t start, | |||
61 | if (start > end) | 57 | if (start > end) |
62 | return; | 58 | return; |
63 | 59 | ||
60 | if (start == MAX_RESOURCE) | ||
61 | return; | ||
62 | |||
64 | if (!merge) | 63 | if (!merge) |
65 | goto addit; | 64 | goto addit; |
66 | 65 | ||
67 | /* try to merge it with old one */ | 66 | /* try to merge it with old one */ |
68 | for (i = 0; i < info->res_num; i++) { | 67 | for (i = 0; i < info->res_num; i++) { |
69 | size_t final_start, final_end; | 68 | resource_size_t final_start, final_end; |
70 | size_t common_start, common_end; | 69 | resource_size_t common_start, common_end; |
71 | 70 | ||
72 | res = &info->res[i]; | 71 | res = &info->res[i]; |
73 | if (res->flags != flags) | 72 | if (res->flags != flags) |
74 | continue; | 73 | continue; |
75 | 74 | ||
76 | common_start = max((size_t)res->start, start); | 75 | common_start = max(res->start, start); |
77 | common_end = min((size_t)res->end, end); | 76 | common_end = min(res->end, end); |
78 | if (common_start > common_end + 1) | 77 | if (common_start > common_end + 1) |
79 | continue; | 78 | continue; |
80 | 79 | ||
81 | final_start = min((size_t)res->start, start); | 80 | final_start = min(res->start, start); |
82 | final_end = max((size_t)res->end, end); | 81 | final_end = max(res->end, end); |
83 | 82 | ||
84 | res->start = final_start; | 83 | res->start = final_start; |
85 | res->end = final_end; | 84 | res->end = final_end; |
diff --git a/arch/x86/pci/bus_numa.h b/arch/x86/pci/bus_numa.h index 731b64ee8d84..804a4b40c31a 100644 --- a/arch/x86/pci/bus_numa.h +++ b/arch/x86/pci/bus_numa.h | |||
@@ -1,5 +1,5 @@ | |||
1 | #ifdef CONFIG_X86_64 | 1 | #ifndef __BUS_NUMA_H |
2 | 2 | #define __BUS_NUMA_H | |
3 | /* | 3 | /* |
4 | * sub bus (transparent) will use entres from 3 to store extra from | 4 | * sub bus (transparent) will use entres from 3 to store extra from |
5 | * root, so need to make sure we have enough slot there. | 5 | * root, so need to make sure we have enough slot there. |
@@ -19,8 +19,7 @@ struct pci_root_info { | |||
19 | #define PCI_ROOT_NR 4 | 19 | #define PCI_ROOT_NR 4 |
20 | extern int pci_root_num; | 20 | extern int pci_root_num; |
21 | extern struct pci_root_info pci_root_info[PCI_ROOT_NR]; | 21 | extern struct pci_root_info pci_root_info[PCI_ROOT_NR]; |
22 | extern int found_all_numa_early; | ||
23 | 22 | ||
24 | extern void update_res(struct pci_root_info *info, size_t start, | 23 | extern void update_res(struct pci_root_info *info, resource_size_t start, |
25 | size_t end, unsigned long flags, int merge); | 24 | resource_size_t end, unsigned long flags, int merge); |
26 | #endif | 25 | #endif |
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 5a8fbf8d4cac..dece3eb9c906 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c | |||
@@ -255,10 +255,6 @@ void __init pcibios_resource_survey(void) | |||
255 | */ | 255 | */ |
256 | fs_initcall(pcibios_assign_resources); | 256 | fs_initcall(pcibios_assign_resources); |
257 | 257 | ||
258 | void __weak x86_pci_root_bus_res_quirks(struct pci_bus *b) | ||
259 | { | ||
260 | } | ||
261 | |||
262 | /* | 258 | /* |
263 | * If we set up a device for bus mastering, we need to check the latency | 259 | * If we set up a device for bus mastering, we need to check the latency |
264 | * timer as certain crappy BIOSes forget to set it properly. | 260 | * timer as certain crappy BIOSes forget to set it properly. |
diff --git a/drivers/pci/hotplug/ibmphp_res.c b/drivers/pci/hotplug/ibmphp_res.c index ec73294d1fa6..e2dc289f767c 100644 --- a/drivers/pci/hotplug/ibmphp_res.c +++ b/drivers/pci/hotplug/ibmphp_res.c | |||
@@ -40,7 +40,7 @@ static void update_resources (struct bus_node *bus_cur, int type, int rangeno); | |||
40 | static int once_over (void); | 40 | static int once_over (void); |
41 | static int remove_ranges (struct bus_node *, struct bus_node *); | 41 | static int remove_ranges (struct bus_node *, struct bus_node *); |
42 | static int update_bridge_ranges (struct bus_node **); | 42 | static int update_bridge_ranges (struct bus_node **); |
43 | static int add_range (int type, struct range_node *, struct bus_node *); | 43 | static int add_bus_range (int type, struct range_node *, struct bus_node *); |
44 | static void fix_resources (struct bus_node *); | 44 | static void fix_resources (struct bus_node *); |
45 | static struct bus_node *find_bus_wprev (u8, struct bus_node **, u8); | 45 | static struct bus_node *find_bus_wprev (u8, struct bus_node **, u8); |
46 | 46 | ||
@@ -133,7 +133,7 @@ static int __init alloc_bus_range (struct bus_node **new_bus, struct range_node | |||
133 | newrange->rangeno = 1; | 133 | newrange->rangeno = 1; |
134 | else { | 134 | else { |
135 | /* need to insert our range */ | 135 | /* need to insert our range */ |
136 | add_range (flag, newrange, newbus); | 136 | add_bus_range (flag, newrange, newbus); |
137 | debug ("%d resource Primary Bus inserted on bus %x [%x - %x]\n", flag, newbus->busno, newrange->start, newrange->end); | 137 | debug ("%d resource Primary Bus inserted on bus %x [%x - %x]\n", flag, newbus->busno, newrange->start, newrange->end); |
138 | } | 138 | } |
139 | 139 | ||
@@ -384,7 +384,7 @@ int __init ibmphp_rsrc_init (void) | |||
384 | * Input: type of the resource, range to add, current bus | 384 | * Input: type of the resource, range to add, current bus |
385 | * Output: 0 or -1, bus and range ptrs | 385 | * Output: 0 or -1, bus and range ptrs |
386 | ********************************************************************************/ | 386 | ********************************************************************************/ |
387 | static int add_range (int type, struct range_node *range, struct bus_node *bus_cur) | 387 | static int add_bus_range (int type, struct range_node *range, struct bus_node *bus_cur) |
388 | { | 388 | { |
389 | struct range_node *range_cur = NULL; | 389 | struct range_node *range_cur = NULL; |
390 | struct range_node *range_prev; | 390 | struct range_node *range_prev; |
@@ -455,7 +455,7 @@ static int add_range (int type, struct range_node *range, struct bus_node *bus_c | |||
455 | 455 | ||
456 | /******************************************************************************* | 456 | /******************************************************************************* |
457 | * This routine goes through the list of resources of type 'type' and updates | 457 | * This routine goes through the list of resources of type 'type' and updates |
458 | * the range numbers that they correspond to. It was called from add_range fnc | 458 | * the range numbers that they correspond to. It was called from add_bus_range fnc |
459 | * | 459 | * |
460 | * Input: bus, type of the resource, the rangeno starting from which to update | 460 | * Input: bus, type of the resource, the rangeno starting from which to update |
461 | ******************************************************************************/ | 461 | ******************************************************************************/ |
@@ -1999,7 +1999,7 @@ static int __init update_bridge_ranges (struct bus_node **bus) | |||
1999 | 1999 | ||
2000 | if (bus_sec->noIORanges > 0) { | 2000 | if (bus_sec->noIORanges > 0) { |
2001 | if (!range_exists_already (range, bus_sec, IO)) { | 2001 | if (!range_exists_already (range, bus_sec, IO)) { |
2002 | add_range (IO, range, bus_sec); | 2002 | add_bus_range (IO, range, bus_sec); |
2003 | ++bus_sec->noIORanges; | 2003 | ++bus_sec->noIORanges; |
2004 | } else { | 2004 | } else { |
2005 | kfree (range); | 2005 | kfree (range); |
@@ -2048,7 +2048,7 @@ static int __init update_bridge_ranges (struct bus_node **bus) | |||
2048 | 2048 | ||
2049 | if (bus_sec->noMemRanges > 0) { | 2049 | if (bus_sec->noMemRanges > 0) { |
2050 | if (!range_exists_already (range, bus_sec, MEM)) { | 2050 | if (!range_exists_already (range, bus_sec, MEM)) { |
2051 | add_range (MEM, range, bus_sec); | 2051 | add_bus_range (MEM, range, bus_sec); |
2052 | ++bus_sec->noMemRanges; | 2052 | ++bus_sec->noMemRanges; |
2053 | } else { | 2053 | } else { |
2054 | kfree (range); | 2054 | kfree (range); |
@@ -2102,7 +2102,7 @@ static int __init update_bridge_ranges (struct bus_node **bus) | |||
2102 | 2102 | ||
2103 | if (bus_sec->noPFMemRanges > 0) { | 2103 | if (bus_sec->noPFMemRanges > 0) { |
2104 | if (!range_exists_already (range, bus_sec, PFMEM)) { | 2104 | if (!range_exists_already (range, bus_sec, PFMEM)) { |
2105 | add_range (PFMEM, range, bus_sec); | 2105 | add_bus_range (PFMEM, range, bus_sec); |
2106 | ++bus_sec->noPFMemRanges; | 2106 | ++bus_sec->noPFMemRanges; |
2107 | } else { | 2107 | } else { |
2108 | kfree (range); | 2108 | kfree (range); |
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index b10ec49ee2dd..266ab9291232 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h | |||
@@ -23,6 +23,7 @@ extern unsigned long max_pfn; | |||
23 | extern unsigned long saved_max_pfn; | 23 | extern unsigned long saved_max_pfn; |
24 | #endif | 24 | #endif |
25 | 25 | ||
26 | #ifndef CONFIG_NO_BOOTMEM | ||
26 | /* | 27 | /* |
27 | * node_bootmem_map is a map pointer - the bits represent all physical | 28 | * node_bootmem_map is a map pointer - the bits represent all physical |
28 | * memory pages (including holes) on the node. | 29 | * memory pages (including holes) on the node. |
@@ -37,6 +38,7 @@ typedef struct bootmem_data { | |||
37 | } bootmem_data_t; | 38 | } bootmem_data_t; |
38 | 39 | ||
39 | extern bootmem_data_t bootmem_node_data[]; | 40 | extern bootmem_data_t bootmem_node_data[]; |
41 | #endif | ||
40 | 42 | ||
41 | extern unsigned long bootmem_bootmap_pages(unsigned long); | 43 | extern unsigned long bootmem_bootmap_pages(unsigned long); |
42 | 44 | ||
@@ -46,6 +48,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat, | |||
46 | unsigned long endpfn); | 48 | unsigned long endpfn); |
47 | extern unsigned long init_bootmem(unsigned long addr, unsigned long memend); | 49 | extern unsigned long init_bootmem(unsigned long addr, unsigned long memend); |
48 | 50 | ||
51 | unsigned long free_all_memory_core_early(int nodeid); | ||
49 | extern unsigned long free_all_bootmem_node(pg_data_t *pgdat); | 52 | extern unsigned long free_all_bootmem_node(pg_data_t *pgdat); |
50 | extern unsigned long free_all_bootmem(void); | 53 | extern unsigned long free_all_bootmem(void); |
51 | 54 | ||
@@ -84,6 +87,10 @@ extern void *__alloc_bootmem_node(pg_data_t *pgdat, | |||
84 | unsigned long size, | 87 | unsigned long size, |
85 | unsigned long align, | 88 | unsigned long align, |
86 | unsigned long goal); | 89 | unsigned long goal); |
90 | void *__alloc_bootmem_node_high(pg_data_t *pgdat, | ||
91 | unsigned long size, | ||
92 | unsigned long align, | ||
93 | unsigned long goal); | ||
87 | extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat, | 94 | extern void *__alloc_bootmem_node_nopanic(pg_data_t *pgdat, |
88 | unsigned long size, | 95 | unsigned long size, |
89 | unsigned long align, | 96 | unsigned long align, |
diff --git a/include/linux/early_res.h b/include/linux/early_res.h new file mode 100644 index 000000000000..29c09f57a13c --- /dev/null +++ b/include/linux/early_res.h | |||
@@ -0,0 +1,23 @@ | |||
1 | #ifndef _LINUX_EARLY_RES_H | ||
2 | #define _LINUX_EARLY_RES_H | ||
3 | #ifdef __KERNEL__ | ||
4 | |||
5 | extern void reserve_early(u64 start, u64 end, char *name); | ||
6 | extern void reserve_early_overlap_ok(u64 start, u64 end, char *name); | ||
7 | extern void free_early(u64 start, u64 end); | ||
8 | void free_early_partial(u64 start, u64 end); | ||
9 | extern void early_res_to_bootmem(u64 start, u64 end); | ||
10 | |||
11 | void reserve_early_without_check(u64 start, u64 end, char *name); | ||
12 | u64 find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end, | ||
13 | u64 size, u64 align); | ||
14 | u64 find_early_area_size(u64 ei_start, u64 ei_last, u64 start, | ||
15 | u64 *sizep, u64 align); | ||
16 | u64 find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align); | ||
17 | u64 get_max_mapped(void); | ||
18 | #include <linux/range.h> | ||
19 | int get_free_all_memory_range(struct range **rangep, int nodeid); | ||
20 | |||
21 | #endif /* __KERNEL__ */ | ||
22 | |||
23 | #endif /* _LINUX_EARLY_RES_H */ | ||
diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 1221d2331a6d..7f0707463360 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h | |||
@@ -44,6 +44,16 @@ extern const char linux_proc_banner[]; | |||
44 | 44 | ||
45 | #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) | 45 | #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) |
46 | 46 | ||
47 | /* | ||
48 | * This looks more complex than it should be. But we need to | ||
49 | * get the type for the ~ right in round_down (it needs to be | ||
50 | * as wide as the result!), and we want to evaluate the macro | ||
51 | * arguments just once each. | ||
52 | */ | ||
53 | #define __round_mask(x, y) ((__typeof__(x))((y)-1)) | ||
54 | #define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) | ||
55 | #define round_down(x, y) ((x) & ~__round_mask(x, y)) | ||
56 | |||
47 | #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) | 57 | #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) |
48 | #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) | 58 | #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) |
49 | #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) | 59 | #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 2e724c877ec1..90957f14195c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/prio_tree.h> | 12 | #include <linux/prio_tree.h> |
13 | #include <linux/debug_locks.h> | 13 | #include <linux/debug_locks.h> |
14 | #include <linux/mm_types.h> | 14 | #include <linux/mm_types.h> |
15 | #include <linux/range.h> | ||
15 | 16 | ||
16 | struct mempolicy; | 17 | struct mempolicy; |
17 | struct anon_vma; | 18 | struct anon_vma; |
@@ -1049,6 +1050,10 @@ extern void get_pfn_range_for_nid(unsigned int nid, | |||
1049 | extern unsigned long find_min_pfn_with_active_regions(void); | 1050 | extern unsigned long find_min_pfn_with_active_regions(void); |
1050 | extern void free_bootmem_with_active_regions(int nid, | 1051 | extern void free_bootmem_with_active_regions(int nid, |
1051 | unsigned long max_low_pfn); | 1052 | unsigned long max_low_pfn); |
1053 | int add_from_early_node_map(struct range *range, int az, | ||
1054 | int nr_range, int nid); | ||
1055 | void *__alloc_memory_core_early(int nodeid, u64 size, u64 align, | ||
1056 | u64 goal, u64 limit); | ||
1052 | typedef int (*work_fn_t)(unsigned long, unsigned long, void *); | 1057 | typedef int (*work_fn_t)(unsigned long, unsigned long, void *); |
1053 | extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); | 1058 | extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data); |
1054 | extern void sparse_memory_present_with_active_regions(int nid); | 1059 | extern void sparse_memory_present_with_active_regions(int nid); |
@@ -1317,12 +1322,19 @@ extern int randomize_va_space; | |||
1317 | const char * arch_vma_name(struct vm_area_struct *vma); | 1322 | const char * arch_vma_name(struct vm_area_struct *vma); |
1318 | void print_vma_addr(char *prefix, unsigned long rip); | 1323 | void print_vma_addr(char *prefix, unsigned long rip); |
1319 | 1324 | ||
1325 | void sparse_mem_maps_populate_node(struct page **map_map, | ||
1326 | unsigned long pnum_begin, | ||
1327 | unsigned long pnum_end, | ||
1328 | unsigned long map_count, | ||
1329 | int nodeid); | ||
1330 | |||
1320 | struct page *sparse_mem_map_populate(unsigned long pnum, int nid); | 1331 | struct page *sparse_mem_map_populate(unsigned long pnum, int nid); |
1321 | pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); | 1332 | pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); |
1322 | pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node); | 1333 | pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node); |
1323 | pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); | 1334 | pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node); |
1324 | pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node); | 1335 | pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node); |
1325 | void *vmemmap_alloc_block(unsigned long size, int node); | 1336 | void *vmemmap_alloc_block(unsigned long size, int node); |
1337 | void *vmemmap_alloc_block_buf(unsigned long size, int node); | ||
1326 | void vmemmap_verify(pte_t *, int, unsigned long, unsigned long); | 1338 | void vmemmap_verify(pte_t *, int, unsigned long, unsigned long); |
1327 | int vmemmap_populate_basepages(struct page *start_page, | 1339 | int vmemmap_populate_basepages(struct page *start_page, |
1328 | unsigned long pages, int node); | 1340 | unsigned long pages, int node); |
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 41acd4bf7664..a01a103341bd 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h | |||
@@ -612,7 +612,9 @@ typedef struct pglist_data { | |||
612 | struct page_cgroup *node_page_cgroup; | 612 | struct page_cgroup *node_page_cgroup; |
613 | #endif | 613 | #endif |
614 | #endif | 614 | #endif |
615 | #ifndef CONFIG_NO_BOOTMEM | ||
615 | struct bootmem_data *bdata; | 616 | struct bootmem_data *bdata; |
617 | #endif | ||
616 | #ifdef CONFIG_MEMORY_HOTPLUG | 618 | #ifdef CONFIG_MEMORY_HOTPLUG |
617 | /* | 619 | /* |
618 | * Must be held any time you expect node_start_pfn, node_present_pages | 620 | * Must be held any time you expect node_start_pfn, node_present_pages |
diff --git a/include/linux/range.h b/include/linux/range.h new file mode 100644 index 000000000000..bd184a5db791 --- /dev/null +++ b/include/linux/range.h | |||
@@ -0,0 +1,30 @@ | |||
1 | #ifndef _LINUX_RANGE_H | ||
2 | #define _LINUX_RANGE_H | ||
3 | |||
4 | struct range { | ||
5 | u64 start; | ||
6 | u64 end; | ||
7 | }; | ||
8 | |||
9 | int add_range(struct range *range, int az, int nr_range, | ||
10 | u64 start, u64 end); | ||
11 | |||
12 | |||
13 | int add_range_with_merge(struct range *range, int az, int nr_range, | ||
14 | u64 start, u64 end); | ||
15 | |||
16 | void subtract_range(struct range *range, int az, u64 start, u64 end); | ||
17 | |||
18 | int clean_sort_range(struct range *range, int az); | ||
19 | |||
20 | void sort_range(struct range *range, int nr_range); | ||
21 | |||
22 | #define MAX_RESOURCE ((resource_size_t)~0) | ||
23 | static inline resource_size_t cap_resource(u64 val) | ||
24 | { | ||
25 | if (val > MAX_RESOURCE) | ||
26 | return MAX_RESOURCE; | ||
27 | |||
28 | return val; | ||
29 | } | ||
30 | #endif | ||
diff --git a/kernel/Makefile b/kernel/Makefile index 6aebdeb2aa34..7b974699f8c2 100644 --- a/kernel/Makefile +++ b/kernel/Makefile | |||
@@ -10,7 +10,8 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ | |||
10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ | 10 | kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ |
11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ | 11 | hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ |
12 | notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ | 12 | notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ |
13 | async.o | 13 | async.o range.o |
14 | obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o | ||
14 | obj-y += groups.o | 15 | obj-y += groups.o |
15 | 16 | ||
16 | ifdef CONFIG_FUNCTION_TRACER | 17 | ifdef CONFIG_FUNCTION_TRACER |
diff --git a/kernel/early_res.c b/kernel/early_res.c new file mode 100644 index 000000000000..3cb2c661bb78 --- /dev/null +++ b/kernel/early_res.c | |||
@@ -0,0 +1,578 @@ | |||
1 | /* | ||
2 | * early_res, could be used to replace bootmem | ||
3 | */ | ||
4 | #include <linux/kernel.h> | ||
5 | #include <linux/types.h> | ||
6 | #include <linux/init.h> | ||
7 | #include <linux/bootmem.h> | ||
8 | #include <linux/mm.h> | ||
9 | #include <linux/early_res.h> | ||
10 | |||
11 | /* | ||
12 | * Early reserved memory areas. | ||
13 | */ | ||
14 | /* | ||
15 | * need to make sure this one is bigger enough before | ||
16 | * find_fw_memmap_area could be used | ||
17 | */ | ||
18 | #define MAX_EARLY_RES_X 32 | ||
19 | |||
20 | struct early_res { | ||
21 | u64 start, end; | ||
22 | char name[15]; | ||
23 | char overlap_ok; | ||
24 | }; | ||
25 | static struct early_res early_res_x[MAX_EARLY_RES_X] __initdata; | ||
26 | |||
27 | static int max_early_res __initdata = MAX_EARLY_RES_X; | ||
28 | static struct early_res *early_res __initdata = &early_res_x[0]; | ||
29 | static int early_res_count __initdata; | ||
30 | |||
31 | static int __init find_overlapped_early(u64 start, u64 end) | ||
32 | { | ||
33 | int i; | ||
34 | struct early_res *r; | ||
35 | |||
36 | for (i = 0; i < max_early_res && early_res[i].end; i++) { | ||
37 | r = &early_res[i]; | ||
38 | if (end > r->start && start < r->end) | ||
39 | break; | ||
40 | } | ||
41 | |||
42 | return i; | ||
43 | } | ||
44 | |||
45 | /* | ||
46 | * Drop the i-th range from the early reservation map, | ||
47 | * by copying any higher ranges down one over it, and | ||
48 | * clearing what had been the last slot. | ||
49 | */ | ||
50 | static void __init drop_range(int i) | ||
51 | { | ||
52 | int j; | ||
53 | |||
54 | for (j = i + 1; j < max_early_res && early_res[j].end; j++) | ||
55 | ; | ||
56 | |||
57 | memmove(&early_res[i], &early_res[i + 1], | ||
58 | (j - 1 - i) * sizeof(struct early_res)); | ||
59 | |||
60 | early_res[j - 1].end = 0; | ||
61 | early_res_count--; | ||
62 | } | ||
63 | |||
64 | static void __init drop_range_partial(int i, u64 start, u64 end) | ||
65 | { | ||
66 | u64 common_start, common_end; | ||
67 | u64 old_start, old_end; | ||
68 | |||
69 | old_start = early_res[i].start; | ||
70 | old_end = early_res[i].end; | ||
71 | common_start = max(old_start, start); | ||
72 | common_end = min(old_end, end); | ||
73 | |||
74 | /* no overlap ? */ | ||
75 | if (common_start >= common_end) | ||
76 | return; | ||
77 | |||
78 | if (old_start < common_start) { | ||
79 | /* make head segment */ | ||
80 | early_res[i].end = common_start; | ||
81 | if (old_end > common_end) { | ||
82 | char name[15]; | ||
83 | |||
84 | /* | ||
85 | * Save a local copy of the name, since the | ||
86 | * early_res array could get resized inside | ||
87 | * reserve_early_without_check() -> | ||
88 | * __check_and_double_early_res(), which would | ||
89 | * make the current name pointer invalid. | ||
90 | */ | ||
91 | strncpy(name, early_res[i].name, | ||
92 | sizeof(early_res[i].name) - 1); | ||
93 | /* add another for left over on tail */ | ||
94 | reserve_early_without_check(common_end, old_end, name); | ||
95 | } | ||
96 | return; | ||
97 | } else { | ||
98 | if (old_end > common_end) { | ||
99 | /* reuse the entry for tail left */ | ||
100 | early_res[i].start = common_end; | ||
101 | return; | ||
102 | } | ||
103 | /* all covered */ | ||
104 | drop_range(i); | ||
105 | } | ||
106 | } | ||
107 | |||
108 | /* | ||
109 | * Split any existing ranges that: | ||
110 | * 1) are marked 'overlap_ok', and | ||
111 | * 2) overlap with the stated range [start, end) | ||
112 | * into whatever portion (if any) of the existing range is entirely | ||
113 | * below or entirely above the stated range. Drop the portion | ||
114 | * of the existing range that overlaps with the stated range, | ||
115 | * which will allow the caller of this routine to then add that | ||
116 | * stated range without conflicting with any existing range. | ||
117 | */ | ||
118 | static void __init drop_overlaps_that_are_ok(u64 start, u64 end) | ||
119 | { | ||
120 | int i; | ||
121 | struct early_res *r; | ||
122 | u64 lower_start, lower_end; | ||
123 | u64 upper_start, upper_end; | ||
124 | char name[15]; | ||
125 | |||
126 | for (i = 0; i < max_early_res && early_res[i].end; i++) { | ||
127 | r = &early_res[i]; | ||
128 | |||
129 | /* Continue past non-overlapping ranges */ | ||
130 | if (end <= r->start || start >= r->end) | ||
131 | continue; | ||
132 | |||
133 | /* | ||
134 | * Leave non-ok overlaps as is; let caller | ||
135 | * panic "Overlapping early reservations" | ||
136 | * when it hits this overlap. | ||
137 | */ | ||
138 | if (!r->overlap_ok) | ||
139 | return; | ||
140 | |||
141 | /* | ||
142 | * We have an ok overlap. We will drop it from the early | ||
143 | * reservation map, and add back in any non-overlapping | ||
144 | * portions (lower or upper) as separate, overlap_ok, | ||
145 | * non-overlapping ranges. | ||
146 | */ | ||
147 | |||
148 | /* 1. Note any non-overlapping (lower or upper) ranges. */ | ||
149 | strncpy(name, r->name, sizeof(name) - 1); | ||
150 | |||
151 | lower_start = lower_end = 0; | ||
152 | upper_start = upper_end = 0; | ||
153 | if (r->start < start) { | ||
154 | lower_start = r->start; | ||
155 | lower_end = start; | ||
156 | } | ||
157 | if (r->end > end) { | ||
158 | upper_start = end; | ||
159 | upper_end = r->end; | ||
160 | } | ||
161 | |||
162 | /* 2. Drop the original ok overlapping range */ | ||
163 | drop_range(i); | ||
164 | |||
165 | i--; /* resume for-loop on copied down entry */ | ||
166 | |||
167 | /* 3. Add back in any non-overlapping ranges. */ | ||
168 | if (lower_end) | ||
169 | reserve_early_overlap_ok(lower_start, lower_end, name); | ||
170 | if (upper_end) | ||
171 | reserve_early_overlap_ok(upper_start, upper_end, name); | ||
172 | } | ||
173 | } | ||
174 | |||
175 | static void __init __reserve_early(u64 start, u64 end, char *name, | ||
176 | int overlap_ok) | ||
177 | { | ||
178 | int i; | ||
179 | struct early_res *r; | ||
180 | |||
181 | i = find_overlapped_early(start, end); | ||
182 | if (i >= max_early_res) | ||
183 | panic("Too many early reservations"); | ||
184 | r = &early_res[i]; | ||
185 | if (r->end) | ||
186 | panic("Overlapping early reservations " | ||
187 | "%llx-%llx %s to %llx-%llx %s\n", | ||
188 | start, end - 1, name ? name : "", r->start, | ||
189 | r->end - 1, r->name); | ||
190 | r->start = start; | ||
191 | r->end = end; | ||
192 | r->overlap_ok = overlap_ok; | ||
193 | if (name) | ||
194 | strncpy(r->name, name, sizeof(r->name) - 1); | ||
195 | early_res_count++; | ||
196 | } | ||
197 | |||
198 | /* | ||
199 | * A few early reservtations come here. | ||
200 | * | ||
201 | * The 'overlap_ok' in the name of this routine does -not- mean it | ||
202 | * is ok for these reservations to overlap an earlier reservation. | ||
203 | * Rather it means that it is ok for subsequent reservations to | ||
204 | * overlap this one. | ||
205 | * | ||
206 | * Use this entry point to reserve early ranges when you are doing | ||
207 | * so out of "Paranoia", reserving perhaps more memory than you need, | ||
208 | * just in case, and don't mind a subsequent overlapping reservation | ||
209 | * that is known to be needed. | ||
210 | * | ||
211 | * The drop_overlaps_that_are_ok() call here isn't really needed. | ||
212 | * It would be needed if we had two colliding 'overlap_ok' | ||
213 | * reservations, so that the second such would not panic on the | ||
214 | * overlap with the first. We don't have any such as of this | ||
215 | * writing, but might as well tolerate such if it happens in | ||
216 | * the future. | ||
217 | */ | ||
218 | void __init reserve_early_overlap_ok(u64 start, u64 end, char *name) | ||
219 | { | ||
220 | drop_overlaps_that_are_ok(start, end); | ||
221 | __reserve_early(start, end, name, 1); | ||
222 | } | ||
223 | |||
224 | static void __init __check_and_double_early_res(u64 ex_start, u64 ex_end) | ||
225 | { | ||
226 | u64 start, end, size, mem; | ||
227 | struct early_res *new; | ||
228 | |||
229 | /* do we have enough slots left ? */ | ||
230 | if ((max_early_res - early_res_count) > max(max_early_res/8, 2)) | ||
231 | return; | ||
232 | |||
233 | /* double it */ | ||
234 | mem = -1ULL; | ||
235 | size = sizeof(struct early_res) * max_early_res * 2; | ||
236 | if (early_res == early_res_x) | ||
237 | start = 0; | ||
238 | else | ||
239 | start = early_res[0].end; | ||
240 | end = ex_start; | ||
241 | if (start + size < end) | ||
242 | mem = find_fw_memmap_area(start, end, size, | ||
243 | sizeof(struct early_res)); | ||
244 | if (mem == -1ULL) { | ||
245 | start = ex_end; | ||
246 | end = get_max_mapped(); | ||
247 | if (start + size < end) | ||
248 | mem = find_fw_memmap_area(start, end, size, | ||
249 | sizeof(struct early_res)); | ||
250 | } | ||
251 | if (mem == -1ULL) | ||
252 | panic("can not find more space for early_res array"); | ||
253 | |||
254 | new = __va(mem); | ||
255 | /* save the first one for own */ | ||
256 | new[0].start = mem; | ||
257 | new[0].end = mem + size; | ||
258 | new[0].overlap_ok = 0; | ||
259 | /* copy old to new */ | ||
260 | if (early_res == early_res_x) { | ||
261 | memcpy(&new[1], &early_res[0], | ||
262 | sizeof(struct early_res) * max_early_res); | ||
263 | memset(&new[max_early_res+1], 0, | ||
264 | sizeof(struct early_res) * (max_early_res - 1)); | ||
265 | early_res_count++; | ||
266 | } else { | ||
267 | memcpy(&new[1], &early_res[1], | ||
268 | sizeof(struct early_res) * (max_early_res - 1)); | ||
269 | memset(&new[max_early_res], 0, | ||
270 | sizeof(struct early_res) * max_early_res); | ||
271 | } | ||
272 | memset(&early_res[0], 0, sizeof(struct early_res) * max_early_res); | ||
273 | early_res = new; | ||
274 | max_early_res *= 2; | ||
275 | printk(KERN_DEBUG "early_res array is doubled to %d at [%llx - %llx]\n", | ||
276 | max_early_res, mem, mem + size - 1); | ||
277 | } | ||
278 | |||
279 | /* | ||
280 | * Most early reservations come here. | ||
281 | * | ||
282 | * We first have drop_overlaps_that_are_ok() drop any pre-existing | ||
283 | * 'overlap_ok' ranges, so that we can then reserve this memory | ||
284 | * range without risk of panic'ing on an overlapping overlap_ok | ||
285 | * early reservation. | ||
286 | */ | ||
287 | void __init reserve_early(u64 start, u64 end, char *name) | ||
288 | { | ||
289 | if (start >= end) | ||
290 | return; | ||
291 | |||
292 | __check_and_double_early_res(start, end); | ||
293 | |||
294 | drop_overlaps_that_are_ok(start, end); | ||
295 | __reserve_early(start, end, name, 0); | ||
296 | } | ||
297 | |||
298 | void __init reserve_early_without_check(u64 start, u64 end, char *name) | ||
299 | { | ||
300 | struct early_res *r; | ||
301 | |||
302 | if (start >= end) | ||
303 | return; | ||
304 | |||
305 | __check_and_double_early_res(start, end); | ||
306 | |||
307 | r = &early_res[early_res_count]; | ||
308 | |||
309 | r->start = start; | ||
310 | r->end = end; | ||
311 | r->overlap_ok = 0; | ||
312 | if (name) | ||
313 | strncpy(r->name, name, sizeof(r->name) - 1); | ||
314 | early_res_count++; | ||
315 | } | ||
316 | |||
317 | void __init free_early(u64 start, u64 end) | ||
318 | { | ||
319 | struct early_res *r; | ||
320 | int i; | ||
321 | |||
322 | i = find_overlapped_early(start, end); | ||
323 | r = &early_res[i]; | ||
324 | if (i >= max_early_res || r->end != end || r->start != start) | ||
325 | panic("free_early on not reserved area: %llx-%llx!", | ||
326 | start, end - 1); | ||
327 | |||
328 | drop_range(i); | ||
329 | } | ||
330 | |||
331 | void __init free_early_partial(u64 start, u64 end) | ||
332 | { | ||
333 | struct early_res *r; | ||
334 | int i; | ||
335 | |||
336 | try_next: | ||
337 | i = find_overlapped_early(start, end); | ||
338 | if (i >= max_early_res) | ||
339 | return; | ||
340 | |||
341 | r = &early_res[i]; | ||
342 | /* hole ? */ | ||
343 | if (r->end >= end && r->start <= start) { | ||
344 | drop_range_partial(i, start, end); | ||
345 | return; | ||
346 | } | ||
347 | |||
348 | drop_range_partial(i, start, end); | ||
349 | goto try_next; | ||
350 | } | ||
351 | |||
352 | #ifdef CONFIG_NO_BOOTMEM | ||
353 | static void __init subtract_early_res(struct range *range, int az) | ||
354 | { | ||
355 | int i, count; | ||
356 | u64 final_start, final_end; | ||
357 | int idx = 0; | ||
358 | |||
359 | count = 0; | ||
360 | for (i = 0; i < max_early_res && early_res[i].end; i++) | ||
361 | count++; | ||
362 | |||
363 | /* need to skip first one ?*/ | ||
364 | if (early_res != early_res_x) | ||
365 | idx = 1; | ||
366 | |||
367 | #define DEBUG_PRINT_EARLY_RES 1 | ||
368 | |||
369 | #if DEBUG_PRINT_EARLY_RES | ||
370 | printk(KERN_INFO "Subtract (%d early reservations)\n", count); | ||
371 | #endif | ||
372 | for (i = idx; i < count; i++) { | ||
373 | struct early_res *r = &early_res[i]; | ||
374 | #if DEBUG_PRINT_EARLY_RES | ||
375 | printk(KERN_INFO " #%d [%010llx - %010llx] %15s\n", i, | ||
376 | r->start, r->end, r->name); | ||
377 | #endif | ||
378 | final_start = PFN_DOWN(r->start); | ||
379 | final_end = PFN_UP(r->end); | ||
380 | if (final_start >= final_end) | ||
381 | continue; | ||
382 | subtract_range(range, az, final_start, final_end); | ||
383 | } | ||
384 | |||
385 | } | ||
386 | |||
387 | int __init get_free_all_memory_range(struct range **rangep, int nodeid) | ||
388 | { | ||
389 | int i, count; | ||
390 | u64 start = 0, end; | ||
391 | u64 size; | ||
392 | u64 mem; | ||
393 | struct range *range; | ||
394 | int nr_range; | ||
395 | |||
396 | count = 0; | ||
397 | for (i = 0; i < max_early_res && early_res[i].end; i++) | ||
398 | count++; | ||
399 | |||
400 | count *= 2; | ||
401 | |||
402 | size = sizeof(struct range) * count; | ||
403 | end = get_max_mapped(); | ||
404 | #ifdef MAX_DMA32_PFN | ||
405 | if (end > (MAX_DMA32_PFN << PAGE_SHIFT)) | ||
406 | start = MAX_DMA32_PFN << PAGE_SHIFT; | ||
407 | #endif | ||
408 | mem = find_fw_memmap_area(start, end, size, sizeof(struct range)); | ||
409 | if (mem == -1ULL) | ||
410 | panic("can not find more space for range free"); | ||
411 | |||
412 | range = __va(mem); | ||
413 | /* use early_node_map[] and early_res to get range array at first */ | ||
414 | memset(range, 0, size); | ||
415 | nr_range = 0; | ||
416 | |||
417 | /* need to go over early_node_map to find out good range for node */ | ||
418 | nr_range = add_from_early_node_map(range, count, nr_range, nodeid); | ||
419 | #ifdef CONFIG_X86_32 | ||
420 | subtract_range(range, count, max_low_pfn, -1ULL); | ||
421 | #endif | ||
422 | subtract_early_res(range, count); | ||
423 | nr_range = clean_sort_range(range, count); | ||
424 | |||
425 | /* need to clear it ? */ | ||
426 | if (nodeid == MAX_NUMNODES) { | ||
427 | memset(&early_res[0], 0, | ||
428 | sizeof(struct early_res) * max_early_res); | ||
429 | early_res = NULL; | ||
430 | max_early_res = 0; | ||
431 | } | ||
432 | |||
433 | *rangep = range; | ||
434 | return nr_range; | ||
435 | } | ||
436 | #else | ||
437 | void __init early_res_to_bootmem(u64 start, u64 end) | ||
438 | { | ||
439 | int i, count; | ||
440 | u64 final_start, final_end; | ||
441 | int idx = 0; | ||
442 | |||
443 | count = 0; | ||
444 | for (i = 0; i < max_early_res && early_res[i].end; i++) | ||
445 | count++; | ||
446 | |||
447 | /* need to skip first one ?*/ | ||
448 | if (early_res != early_res_x) | ||
449 | idx = 1; | ||
450 | |||
451 | printk(KERN_INFO "(%d/%d early reservations) ==> bootmem [%010llx - %010llx]\n", | ||
452 | count - idx, max_early_res, start, end); | ||
453 | for (i = idx; i < count; i++) { | ||
454 | struct early_res *r = &early_res[i]; | ||
455 | printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, | ||
456 | r->start, r->end, r->name); | ||
457 | final_start = max(start, r->start); | ||
458 | final_end = min(end, r->end); | ||
459 | if (final_start >= final_end) { | ||
460 | printk(KERN_CONT "\n"); | ||
461 | continue; | ||
462 | } | ||
463 | printk(KERN_CONT " ==> [%010llx - %010llx]\n", | ||
464 | final_start, final_end); | ||
465 | reserve_bootmem_generic(final_start, final_end - final_start, | ||
466 | BOOTMEM_DEFAULT); | ||
467 | } | ||
468 | /* clear them */ | ||
469 | memset(&early_res[0], 0, sizeof(struct early_res) * max_early_res); | ||
470 | early_res = NULL; | ||
471 | max_early_res = 0; | ||
472 | early_res_count = 0; | ||
473 | } | ||
474 | #endif | ||
475 | |||
476 | /* Check for already reserved areas */ | ||
477 | static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) | ||
478 | { | ||
479 | int i; | ||
480 | u64 addr = *addrp; | ||
481 | int changed = 0; | ||
482 | struct early_res *r; | ||
483 | again: | ||
484 | i = find_overlapped_early(addr, addr + size); | ||
485 | r = &early_res[i]; | ||
486 | if (i < max_early_res && r->end) { | ||
487 | *addrp = addr = round_up(r->end, align); | ||
488 | changed = 1; | ||
489 | goto again; | ||
490 | } | ||
491 | return changed; | ||
492 | } | ||
493 | |||
494 | /* Check for already reserved areas */ | ||
495 | static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align) | ||
496 | { | ||
497 | int i; | ||
498 | u64 addr = *addrp, last; | ||
499 | u64 size = *sizep; | ||
500 | int changed = 0; | ||
501 | again: | ||
502 | last = addr + size; | ||
503 | for (i = 0; i < max_early_res && early_res[i].end; i++) { | ||
504 | struct early_res *r = &early_res[i]; | ||
505 | if (last > r->start && addr < r->start) { | ||
506 | size = r->start - addr; | ||
507 | changed = 1; | ||
508 | goto again; | ||
509 | } | ||
510 | if (last > r->end && addr < r->end) { | ||
511 | addr = round_up(r->end, align); | ||
512 | size = last - addr; | ||
513 | changed = 1; | ||
514 | goto again; | ||
515 | } | ||
516 | if (last <= r->end && addr >= r->start) { | ||
517 | (*sizep)++; | ||
518 | return 0; | ||
519 | } | ||
520 | } | ||
521 | if (changed) { | ||
522 | *addrp = addr; | ||
523 | *sizep = size; | ||
524 | } | ||
525 | return changed; | ||
526 | } | ||
527 | |||
528 | /* | ||
529 | * Find a free area with specified alignment in a specific range. | ||
530 | * only with the area.between start to end is active range from early_node_map | ||
531 | * so they are good as RAM | ||
532 | */ | ||
533 | u64 __init find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end, | ||
534 | u64 size, u64 align) | ||
535 | { | ||
536 | u64 addr, last; | ||
537 | |||
538 | addr = round_up(ei_start, align); | ||
539 | if (addr < start) | ||
540 | addr = round_up(start, align); | ||
541 | if (addr >= ei_last) | ||
542 | goto out; | ||
543 | while (bad_addr(&addr, size, align) && addr+size <= ei_last) | ||
544 | ; | ||
545 | last = addr + size; | ||
546 | if (last > ei_last) | ||
547 | goto out; | ||
548 | if (last > end) | ||
549 | goto out; | ||
550 | |||
551 | return addr; | ||
552 | |||
553 | out: | ||
554 | return -1ULL; | ||
555 | } | ||
556 | |||
557 | u64 __init find_early_area_size(u64 ei_start, u64 ei_last, u64 start, | ||
558 | u64 *sizep, u64 align) | ||
559 | { | ||
560 | u64 addr, last; | ||
561 | |||
562 | addr = round_up(ei_start, align); | ||
563 | if (addr < start) | ||
564 | addr = round_up(start, align); | ||
565 | if (addr >= ei_last) | ||
566 | goto out; | ||
567 | *sizep = ei_last - addr; | ||
568 | while (bad_addr_size(&addr, sizep, align) && addr + *sizep <= ei_last) | ||
569 | ; | ||
570 | last = addr + *sizep; | ||
571 | if (last > ei_last) | ||
572 | goto out; | ||
573 | |||
574 | return addr; | ||
575 | |||
576 | out: | ||
577 | return -1ULL; | ||
578 | } | ||
diff --git a/kernel/range.c b/kernel/range.c new file mode 100644 index 000000000000..74e2e6114927 --- /dev/null +++ b/kernel/range.c | |||
@@ -0,0 +1,163 @@ | |||
1 | /* | ||
2 | * Range add and subtract | ||
3 | */ | ||
4 | #include <linux/module.h> | ||
5 | #include <linux/init.h> | ||
6 | #include <linux/sort.h> | ||
7 | |||
8 | #include <linux/range.h> | ||
9 | |||
10 | #ifndef ARRAY_SIZE | ||
11 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) | ||
12 | #endif | ||
13 | |||
14 | int add_range(struct range *range, int az, int nr_range, u64 start, u64 end) | ||
15 | { | ||
16 | if (start >= end) | ||
17 | return nr_range; | ||
18 | |||
19 | /* Out of slots: */ | ||
20 | if (nr_range >= az) | ||
21 | return nr_range; | ||
22 | |||
23 | range[nr_range].start = start; | ||
24 | range[nr_range].end = end; | ||
25 | |||
26 | nr_range++; | ||
27 | |||
28 | return nr_range; | ||
29 | } | ||
30 | |||
31 | int add_range_with_merge(struct range *range, int az, int nr_range, | ||
32 | u64 start, u64 end) | ||
33 | { | ||
34 | int i; | ||
35 | |||
36 | if (start >= end) | ||
37 | return nr_range; | ||
38 | |||
39 | /* Try to merge it with old one: */ | ||
40 | for (i = 0; i < nr_range; i++) { | ||
41 | u64 final_start, final_end; | ||
42 | u64 common_start, common_end; | ||
43 | |||
44 | if (!range[i].end) | ||
45 | continue; | ||
46 | |||
47 | common_start = max(range[i].start, start); | ||
48 | common_end = min(range[i].end, end); | ||
49 | if (common_start > common_end) | ||
50 | continue; | ||
51 | |||
52 | final_start = min(range[i].start, start); | ||
53 | final_end = max(range[i].end, end); | ||
54 | |||
55 | range[i].start = final_start; | ||
56 | range[i].end = final_end; | ||
57 | return nr_range; | ||
58 | } | ||
59 | |||
60 | /* Need to add it: */ | ||
61 | return add_range(range, az, nr_range, start, end); | ||
62 | } | ||
63 | |||
64 | void subtract_range(struct range *range, int az, u64 start, u64 end) | ||
65 | { | ||
66 | int i, j; | ||
67 | |||
68 | if (start >= end) | ||
69 | return; | ||
70 | |||
71 | for (j = 0; j < az; j++) { | ||
72 | if (!range[j].end) | ||
73 | continue; | ||
74 | |||
75 | if (start <= range[j].start && end >= range[j].end) { | ||
76 | range[j].start = 0; | ||
77 | range[j].end = 0; | ||
78 | continue; | ||
79 | } | ||
80 | |||
81 | if (start <= range[j].start && end < range[j].end && | ||
82 | range[j].start < end) { | ||
83 | range[j].start = end; | ||
84 | continue; | ||
85 | } | ||
86 | |||
87 | |||
88 | if (start > range[j].start && end >= range[j].end && | ||
89 | range[j].end > start) { | ||
90 | range[j].end = start; | ||
91 | continue; | ||
92 | } | ||
93 | |||
94 | if (start > range[j].start && end < range[j].end) { | ||
95 | /* Find the new spare: */ | ||
96 | for (i = 0; i < az; i++) { | ||
97 | if (range[i].end == 0) | ||
98 | break; | ||
99 | } | ||
100 | if (i < az) { | ||
101 | range[i].end = range[j].end; | ||
102 | range[i].start = end; | ||
103 | } else { | ||
104 | printk(KERN_ERR "run of slot in ranges\n"); | ||
105 | } | ||
106 | range[j].end = start; | ||
107 | continue; | ||
108 | } | ||
109 | } | ||
110 | } | ||
111 | |||
112 | static int cmp_range(const void *x1, const void *x2) | ||
113 | { | ||
114 | const struct range *r1 = x1; | ||
115 | const struct range *r2 = x2; | ||
116 | s64 start1, start2; | ||
117 | |||
118 | start1 = r1->start; | ||
119 | start2 = r2->start; | ||
120 | |||
121 | return start1 - start2; | ||
122 | } | ||
123 | |||
124 | int clean_sort_range(struct range *range, int az) | ||
125 | { | ||
126 | int i, j, k = az - 1, nr_range = 0; | ||
127 | |||
128 | for (i = 0; i < k; i++) { | ||
129 | if (range[i].end) | ||
130 | continue; | ||
131 | for (j = k; j > i; j--) { | ||
132 | if (range[j].end) { | ||
133 | k = j; | ||
134 | break; | ||
135 | } | ||
136 | } | ||
137 | if (j == i) | ||
138 | break; | ||
139 | range[i].start = range[k].start; | ||
140 | range[i].end = range[k].end; | ||
141 | range[k].start = 0; | ||
142 | range[k].end = 0; | ||
143 | k--; | ||
144 | } | ||
145 | /* count it */ | ||
146 | for (i = 0; i < az; i++) { | ||
147 | if (!range[i].end) { | ||
148 | nr_range = i; | ||
149 | break; | ||
150 | } | ||
151 | } | ||
152 | |||
153 | /* sort them */ | ||
154 | sort(range, nr_range, sizeof(struct range), cmp_range, NULL); | ||
155 | |||
156 | return nr_range; | ||
157 | } | ||
158 | |||
159 | void sort_range(struct range *range, int nr_range) | ||
160 | { | ||
161 | /* sort them */ | ||
162 | sort(range, nr_range, sizeof(struct range), cmp_range, NULL); | ||
163 | } | ||
diff --git a/mm/Kconfig b/mm/Kconfig index d34c2b971032..9c61158308dc 100644 --- a/mm/Kconfig +++ b/mm/Kconfig | |||
@@ -115,6 +115,10 @@ config SPARSEMEM_EXTREME | |||
115 | config SPARSEMEM_VMEMMAP_ENABLE | 115 | config SPARSEMEM_VMEMMAP_ENABLE |
116 | bool | 116 | bool |
117 | 117 | ||
118 | config SPARSEMEM_ALLOC_MEM_MAP_TOGETHER | ||
119 | def_bool y | ||
120 | depends on SPARSEMEM && X86_64 | ||
121 | |||
118 | config SPARSEMEM_VMEMMAP | 122 | config SPARSEMEM_VMEMMAP |
119 | bool "Sparse Memory virtual memmap" | 123 | bool "Sparse Memory virtual memmap" |
120 | depends on SPARSEMEM && SPARSEMEM_VMEMMAP_ENABLE | 124 | depends on SPARSEMEM && SPARSEMEM_VMEMMAP_ENABLE |
diff --git a/mm/bootmem.c b/mm/bootmem.c index 7d1486875e1c..d7c791ef0036 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/bootmem.h> | 13 | #include <linux/bootmem.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/kmemleak.h> | 15 | #include <linux/kmemleak.h> |
16 | #include <linux/range.h> | ||
16 | 17 | ||
17 | #include <asm/bug.h> | 18 | #include <asm/bug.h> |
18 | #include <asm/io.h> | 19 | #include <asm/io.h> |
@@ -32,6 +33,7 @@ unsigned long max_pfn; | |||
32 | unsigned long saved_max_pfn; | 33 | unsigned long saved_max_pfn; |
33 | #endif | 34 | #endif |
34 | 35 | ||
36 | #ifndef CONFIG_NO_BOOTMEM | ||
35 | bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata; | 37 | bootmem_data_t bootmem_node_data[MAX_NUMNODES] __initdata; |
36 | 38 | ||
37 | static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list); | 39 | static struct list_head bdata_list __initdata = LIST_HEAD_INIT(bdata_list); |
@@ -142,7 +144,7 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages) | |||
142 | min_low_pfn = start; | 144 | min_low_pfn = start; |
143 | return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages); | 145 | return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages); |
144 | } | 146 | } |
145 | 147 | #endif | |
146 | /* | 148 | /* |
147 | * free_bootmem_late - free bootmem pages directly to page allocator | 149 | * free_bootmem_late - free bootmem pages directly to page allocator |
148 | * @addr: starting address of the range | 150 | * @addr: starting address of the range |
@@ -167,6 +169,60 @@ void __init free_bootmem_late(unsigned long addr, unsigned long size) | |||
167 | } | 169 | } |
168 | } | 170 | } |
169 | 171 | ||
172 | #ifdef CONFIG_NO_BOOTMEM | ||
173 | static void __init __free_pages_memory(unsigned long start, unsigned long end) | ||
174 | { | ||
175 | int i; | ||
176 | unsigned long start_aligned, end_aligned; | ||
177 | int order = ilog2(BITS_PER_LONG); | ||
178 | |||
179 | start_aligned = (start + (BITS_PER_LONG - 1)) & ~(BITS_PER_LONG - 1); | ||
180 | end_aligned = end & ~(BITS_PER_LONG - 1); | ||
181 | |||
182 | if (end_aligned <= start_aligned) { | ||
183 | #if 1 | ||
184 | printk(KERN_DEBUG " %lx - %lx\n", start, end); | ||
185 | #endif | ||
186 | for (i = start; i < end; i++) | ||
187 | __free_pages_bootmem(pfn_to_page(i), 0); | ||
188 | |||
189 | return; | ||
190 | } | ||
191 | |||
192 | #if 1 | ||
193 | printk(KERN_DEBUG " %lx %lx - %lx %lx\n", | ||
194 | start, start_aligned, end_aligned, end); | ||
195 | #endif | ||
196 | for (i = start; i < start_aligned; i++) | ||
197 | __free_pages_bootmem(pfn_to_page(i), 0); | ||
198 | |||
199 | for (i = start_aligned; i < end_aligned; i += BITS_PER_LONG) | ||
200 | __free_pages_bootmem(pfn_to_page(i), order); | ||
201 | |||
202 | for (i = end_aligned; i < end; i++) | ||
203 | __free_pages_bootmem(pfn_to_page(i), 0); | ||
204 | } | ||
205 | |||
206 | unsigned long __init free_all_memory_core_early(int nodeid) | ||
207 | { | ||
208 | int i; | ||
209 | u64 start, end; | ||
210 | unsigned long count = 0; | ||
211 | struct range *range = NULL; | ||
212 | int nr_range; | ||
213 | |||
214 | nr_range = get_free_all_memory_range(&range, nodeid); | ||
215 | |||
216 | for (i = 0; i < nr_range; i++) { | ||
217 | start = range[i].start; | ||
218 | end = range[i].end; | ||
219 | count += end - start; | ||
220 | __free_pages_memory(start, end); | ||
221 | } | ||
222 | |||
223 | return count; | ||
224 | } | ||
225 | #else | ||
170 | static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) | 226 | static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) |
171 | { | 227 | { |
172 | int aligned; | 228 | int aligned; |
@@ -227,6 +283,7 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) | |||
227 | 283 | ||
228 | return count; | 284 | return count; |
229 | } | 285 | } |
286 | #endif | ||
230 | 287 | ||
231 | /** | 288 | /** |
232 | * free_all_bootmem_node - release a node's free pages to the buddy allocator | 289 | * free_all_bootmem_node - release a node's free pages to the buddy allocator |
@@ -237,7 +294,12 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) | |||
237 | unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) | 294 | unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) |
238 | { | 295 | { |
239 | register_page_bootmem_info_node(pgdat); | 296 | register_page_bootmem_info_node(pgdat); |
297 | #ifdef CONFIG_NO_BOOTMEM | ||
298 | /* free_all_memory_core_early(MAX_NUMNODES) will be called later */ | ||
299 | return 0; | ||
300 | #else | ||
240 | return free_all_bootmem_core(pgdat->bdata); | 301 | return free_all_bootmem_core(pgdat->bdata); |
302 | #endif | ||
241 | } | 303 | } |
242 | 304 | ||
243 | /** | 305 | /** |
@@ -247,9 +309,14 @@ unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) | |||
247 | */ | 309 | */ |
248 | unsigned long __init free_all_bootmem(void) | 310 | unsigned long __init free_all_bootmem(void) |
249 | { | 311 | { |
312 | #ifdef CONFIG_NO_BOOTMEM | ||
313 | return free_all_memory_core_early(NODE_DATA(0)->node_id); | ||
314 | #else | ||
250 | return free_all_bootmem_core(NODE_DATA(0)->bdata); | 315 | return free_all_bootmem_core(NODE_DATA(0)->bdata); |
316 | #endif | ||
251 | } | 317 | } |
252 | 318 | ||
319 | #ifndef CONFIG_NO_BOOTMEM | ||
253 | static void __init __free(bootmem_data_t *bdata, | 320 | static void __init __free(bootmem_data_t *bdata, |
254 | unsigned long sidx, unsigned long eidx) | 321 | unsigned long sidx, unsigned long eidx) |
255 | { | 322 | { |
@@ -344,6 +411,7 @@ static int __init mark_bootmem(unsigned long start, unsigned long end, | |||
344 | } | 411 | } |
345 | BUG(); | 412 | BUG(); |
346 | } | 413 | } |
414 | #endif | ||
347 | 415 | ||
348 | /** | 416 | /** |
349 | * free_bootmem_node - mark a page range as usable | 417 | * free_bootmem_node - mark a page range as usable |
@@ -358,6 +426,12 @@ static int __init mark_bootmem(unsigned long start, unsigned long end, | |||
358 | void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, | 426 | void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, |
359 | unsigned long size) | 427 | unsigned long size) |
360 | { | 428 | { |
429 | #ifdef CONFIG_NO_BOOTMEM | ||
430 | free_early(physaddr, physaddr + size); | ||
431 | #if 0 | ||
432 | printk(KERN_DEBUG "free %lx %lx\n", physaddr, size); | ||
433 | #endif | ||
434 | #else | ||
361 | unsigned long start, end; | 435 | unsigned long start, end; |
362 | 436 | ||
363 | kmemleak_free_part(__va(physaddr), size); | 437 | kmemleak_free_part(__va(physaddr), size); |
@@ -366,6 +440,7 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, | |||
366 | end = PFN_DOWN(physaddr + size); | 440 | end = PFN_DOWN(physaddr + size); |
367 | 441 | ||
368 | mark_bootmem_node(pgdat->bdata, start, end, 0, 0); | 442 | mark_bootmem_node(pgdat->bdata, start, end, 0, 0); |
443 | #endif | ||
369 | } | 444 | } |
370 | 445 | ||
371 | /** | 446 | /** |
@@ -379,6 +454,12 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, | |||
379 | */ | 454 | */ |
380 | void __init free_bootmem(unsigned long addr, unsigned long size) | 455 | void __init free_bootmem(unsigned long addr, unsigned long size) |
381 | { | 456 | { |
457 | #ifdef CONFIG_NO_BOOTMEM | ||
458 | free_early(addr, addr + size); | ||
459 | #if 0 | ||
460 | printk(KERN_DEBUG "free %lx %lx\n", addr, size); | ||
461 | #endif | ||
462 | #else | ||
382 | unsigned long start, end; | 463 | unsigned long start, end; |
383 | 464 | ||
384 | kmemleak_free_part(__va(addr), size); | 465 | kmemleak_free_part(__va(addr), size); |
@@ -387,6 +468,7 @@ void __init free_bootmem(unsigned long addr, unsigned long size) | |||
387 | end = PFN_DOWN(addr + size); | 468 | end = PFN_DOWN(addr + size); |
388 | 469 | ||
389 | mark_bootmem(start, end, 0, 0); | 470 | mark_bootmem(start, end, 0, 0); |
471 | #endif | ||
390 | } | 472 | } |
391 | 473 | ||
392 | /** | 474 | /** |
@@ -403,12 +485,17 @@ void __init free_bootmem(unsigned long addr, unsigned long size) | |||
403 | int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, | 485 | int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, |
404 | unsigned long size, int flags) | 486 | unsigned long size, int flags) |
405 | { | 487 | { |
488 | #ifdef CONFIG_NO_BOOTMEM | ||
489 | panic("no bootmem"); | ||
490 | return 0; | ||
491 | #else | ||
406 | unsigned long start, end; | 492 | unsigned long start, end; |
407 | 493 | ||
408 | start = PFN_DOWN(physaddr); | 494 | start = PFN_DOWN(physaddr); |
409 | end = PFN_UP(physaddr + size); | 495 | end = PFN_UP(physaddr + size); |
410 | 496 | ||
411 | return mark_bootmem_node(pgdat->bdata, start, end, 1, flags); | 497 | return mark_bootmem_node(pgdat->bdata, start, end, 1, flags); |
498 | #endif | ||
412 | } | 499 | } |
413 | 500 | ||
414 | /** | 501 | /** |
@@ -424,14 +511,20 @@ int __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, | |||
424 | int __init reserve_bootmem(unsigned long addr, unsigned long size, | 511 | int __init reserve_bootmem(unsigned long addr, unsigned long size, |
425 | int flags) | 512 | int flags) |
426 | { | 513 | { |
514 | #ifdef CONFIG_NO_BOOTMEM | ||
515 | panic("no bootmem"); | ||
516 | return 0; | ||
517 | #else | ||
427 | unsigned long start, end; | 518 | unsigned long start, end; |
428 | 519 | ||
429 | start = PFN_DOWN(addr); | 520 | start = PFN_DOWN(addr); |
430 | end = PFN_UP(addr + size); | 521 | end = PFN_UP(addr + size); |
431 | 522 | ||
432 | return mark_bootmem(start, end, 1, flags); | 523 | return mark_bootmem(start, end, 1, flags); |
524 | #endif | ||
433 | } | 525 | } |
434 | 526 | ||
527 | #ifndef CONFIG_NO_BOOTMEM | ||
435 | static unsigned long __init align_idx(struct bootmem_data *bdata, | 528 | static unsigned long __init align_idx(struct bootmem_data *bdata, |
436 | unsigned long idx, unsigned long step) | 529 | unsigned long idx, unsigned long step) |
437 | { | 530 | { |
@@ -582,12 +675,33 @@ static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata, | |||
582 | #endif | 675 | #endif |
583 | return NULL; | 676 | return NULL; |
584 | } | 677 | } |
678 | #endif | ||
585 | 679 | ||
586 | static void * __init ___alloc_bootmem_nopanic(unsigned long size, | 680 | static void * __init ___alloc_bootmem_nopanic(unsigned long size, |
587 | unsigned long align, | 681 | unsigned long align, |
588 | unsigned long goal, | 682 | unsigned long goal, |
589 | unsigned long limit) | 683 | unsigned long limit) |
590 | { | 684 | { |
685 | #ifdef CONFIG_NO_BOOTMEM | ||
686 | void *ptr; | ||
687 | |||
688 | if (WARN_ON_ONCE(slab_is_available())) | ||
689 | return kzalloc(size, GFP_NOWAIT); | ||
690 | |||
691 | restart: | ||
692 | |||
693 | ptr = __alloc_memory_core_early(MAX_NUMNODES, size, align, goal, limit); | ||
694 | |||
695 | if (ptr) | ||
696 | return ptr; | ||
697 | |||
698 | if (goal != 0) { | ||
699 | goal = 0; | ||
700 | goto restart; | ||
701 | } | ||
702 | |||
703 | return NULL; | ||
704 | #else | ||
591 | bootmem_data_t *bdata; | 705 | bootmem_data_t *bdata; |
592 | void *region; | 706 | void *region; |
593 | 707 | ||
@@ -613,6 +727,7 @@ restart: | |||
613 | } | 727 | } |
614 | 728 | ||
615 | return NULL; | 729 | return NULL; |
730 | #endif | ||
616 | } | 731 | } |
617 | 732 | ||
618 | /** | 733 | /** |
@@ -631,7 +746,13 @@ restart: | |||
631 | void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, | 746 | void * __init __alloc_bootmem_nopanic(unsigned long size, unsigned long align, |
632 | unsigned long goal) | 747 | unsigned long goal) |
633 | { | 748 | { |
634 | return ___alloc_bootmem_nopanic(size, align, goal, 0); | 749 | unsigned long limit = 0; |
750 | |||
751 | #ifdef CONFIG_NO_BOOTMEM | ||
752 | limit = -1UL; | ||
753 | #endif | ||
754 | |||
755 | return ___alloc_bootmem_nopanic(size, align, goal, limit); | ||
635 | } | 756 | } |
636 | 757 | ||
637 | static void * __init ___alloc_bootmem(unsigned long size, unsigned long align, | 758 | static void * __init ___alloc_bootmem(unsigned long size, unsigned long align, |
@@ -665,9 +786,16 @@ static void * __init ___alloc_bootmem(unsigned long size, unsigned long align, | |||
665 | void * __init __alloc_bootmem(unsigned long size, unsigned long align, | 786 | void * __init __alloc_bootmem(unsigned long size, unsigned long align, |
666 | unsigned long goal) | 787 | unsigned long goal) |
667 | { | 788 | { |
668 | return ___alloc_bootmem(size, align, goal, 0); | 789 | unsigned long limit = 0; |
790 | |||
791 | #ifdef CONFIG_NO_BOOTMEM | ||
792 | limit = -1UL; | ||
793 | #endif | ||
794 | |||
795 | return ___alloc_bootmem(size, align, goal, limit); | ||
669 | } | 796 | } |
670 | 797 | ||
798 | #ifndef CONFIG_NO_BOOTMEM | ||
671 | static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, | 799 | static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, |
672 | unsigned long size, unsigned long align, | 800 | unsigned long size, unsigned long align, |
673 | unsigned long goal, unsigned long limit) | 801 | unsigned long goal, unsigned long limit) |
@@ -684,6 +812,7 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, | |||
684 | 812 | ||
685 | return ___alloc_bootmem(size, align, goal, limit); | 813 | return ___alloc_bootmem(size, align, goal, limit); |
686 | } | 814 | } |
815 | #endif | ||
687 | 816 | ||
688 | /** | 817 | /** |
689 | * __alloc_bootmem_node - allocate boot memory from a specific node | 818 | * __alloc_bootmem_node - allocate boot memory from a specific node |
@@ -706,7 +835,46 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, | |||
706 | if (WARN_ON_ONCE(slab_is_available())) | 835 | if (WARN_ON_ONCE(slab_is_available())) |
707 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); | 836 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); |
708 | 837 | ||
838 | #ifdef CONFIG_NO_BOOTMEM | ||
839 | return __alloc_memory_core_early(pgdat->node_id, size, align, | ||
840 | goal, -1ULL); | ||
841 | #else | ||
709 | return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0); | 842 | return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0); |
843 | #endif | ||
844 | } | ||
845 | |||
846 | void * __init __alloc_bootmem_node_high(pg_data_t *pgdat, unsigned long size, | ||
847 | unsigned long align, unsigned long goal) | ||
848 | { | ||
849 | #ifdef MAX_DMA32_PFN | ||
850 | unsigned long end_pfn; | ||
851 | |||
852 | if (WARN_ON_ONCE(slab_is_available())) | ||
853 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); | ||
854 | |||
855 | /* update goal according ...MAX_DMA32_PFN */ | ||
856 | end_pfn = pgdat->node_start_pfn + pgdat->node_spanned_pages; | ||
857 | |||
858 | if (end_pfn > MAX_DMA32_PFN + (128 >> (20 - PAGE_SHIFT)) && | ||
859 | (goal >> PAGE_SHIFT) < MAX_DMA32_PFN) { | ||
860 | void *ptr; | ||
861 | unsigned long new_goal; | ||
862 | |||
863 | new_goal = MAX_DMA32_PFN << PAGE_SHIFT; | ||
864 | #ifdef CONFIG_NO_BOOTMEM | ||
865 | ptr = __alloc_memory_core_early(pgdat->node_id, size, align, | ||
866 | new_goal, -1ULL); | ||
867 | #else | ||
868 | ptr = alloc_bootmem_core(pgdat->bdata, size, align, | ||
869 | new_goal, 0); | ||
870 | #endif | ||
871 | if (ptr) | ||
872 | return ptr; | ||
873 | } | ||
874 | #endif | ||
875 | |||
876 | return __alloc_bootmem_node(pgdat, size, align, goal); | ||
877 | |||
710 | } | 878 | } |
711 | 879 | ||
712 | #ifdef CONFIG_SPARSEMEM | 880 | #ifdef CONFIG_SPARSEMEM |
@@ -720,6 +888,16 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, | |||
720 | void * __init alloc_bootmem_section(unsigned long size, | 888 | void * __init alloc_bootmem_section(unsigned long size, |
721 | unsigned long section_nr) | 889 | unsigned long section_nr) |
722 | { | 890 | { |
891 | #ifdef CONFIG_NO_BOOTMEM | ||
892 | unsigned long pfn, goal, limit; | ||
893 | |||
894 | pfn = section_nr_to_pfn(section_nr); | ||
895 | goal = pfn << PAGE_SHIFT; | ||
896 | limit = section_nr_to_pfn(section_nr + 1) << PAGE_SHIFT; | ||
897 | |||
898 | return __alloc_memory_core_early(early_pfn_to_nid(pfn), size, | ||
899 | SMP_CACHE_BYTES, goal, limit); | ||
900 | #else | ||
723 | bootmem_data_t *bdata; | 901 | bootmem_data_t *bdata; |
724 | unsigned long pfn, goal, limit; | 902 | unsigned long pfn, goal, limit; |
725 | 903 | ||
@@ -729,6 +907,7 @@ void * __init alloc_bootmem_section(unsigned long size, | |||
729 | bdata = &bootmem_node_data[early_pfn_to_nid(pfn)]; | 907 | bdata = &bootmem_node_data[early_pfn_to_nid(pfn)]; |
730 | 908 | ||
731 | return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit); | 909 | return alloc_bootmem_core(bdata, size, SMP_CACHE_BYTES, goal, limit); |
910 | #endif | ||
732 | } | 911 | } |
733 | #endif | 912 | #endif |
734 | 913 | ||
@@ -740,11 +919,16 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, | |||
740 | if (WARN_ON_ONCE(slab_is_available())) | 919 | if (WARN_ON_ONCE(slab_is_available())) |
741 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); | 920 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); |
742 | 921 | ||
922 | #ifdef CONFIG_NO_BOOTMEM | ||
923 | ptr = __alloc_memory_core_early(pgdat->node_id, size, align, | ||
924 | goal, -1ULL); | ||
925 | #else | ||
743 | ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0); | 926 | ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0); |
744 | if (ptr) | 927 | if (ptr) |
745 | return ptr; | 928 | return ptr; |
746 | 929 | ||
747 | ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); | 930 | ptr = alloc_bootmem_core(pgdat->bdata, size, align, goal, 0); |
931 | #endif | ||
748 | if (ptr) | 932 | if (ptr) |
749 | return ptr; | 933 | return ptr; |
750 | 934 | ||
@@ -795,6 +979,11 @@ void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, | |||
795 | if (WARN_ON_ONCE(slab_is_available())) | 979 | if (WARN_ON_ONCE(slab_is_available())) |
796 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); | 980 | return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); |
797 | 981 | ||
982 | #ifdef CONFIG_NO_BOOTMEM | ||
983 | return __alloc_memory_core_early(pgdat->node_id, size, align, | ||
984 | goal, ARCH_LOW_ADDRESS_LIMIT); | ||
985 | #else | ||
798 | return ___alloc_bootmem_node(pgdat->bdata, size, align, | 986 | return ___alloc_bootmem_node(pgdat->bdata, size, align, |
799 | goal, ARCH_LOW_ADDRESS_LIMIT); | 987 | goal, ARCH_LOW_ADDRESS_LIMIT); |
988 | #endif | ||
800 | } | 989 | } |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9a7aaae07ab4..a6b17aa4740b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -3374,6 +3374,61 @@ void __init free_bootmem_with_active_regions(int nid, | |||
3374 | } | 3374 | } |
3375 | } | 3375 | } |
3376 | 3376 | ||
3377 | int __init add_from_early_node_map(struct range *range, int az, | ||
3378 | int nr_range, int nid) | ||
3379 | { | ||
3380 | int i; | ||
3381 | u64 start, end; | ||
3382 | |||
3383 | /* need to go over early_node_map to find out good range for node */ | ||
3384 | for_each_active_range_index_in_nid(i, nid) { | ||
3385 | start = early_node_map[i].start_pfn; | ||
3386 | end = early_node_map[i].end_pfn; | ||
3387 | nr_range = add_range(range, az, nr_range, start, end); | ||
3388 | } | ||
3389 | return nr_range; | ||
3390 | } | ||
3391 | |||
3392 | #ifdef CONFIG_NO_BOOTMEM | ||
3393 | void * __init __alloc_memory_core_early(int nid, u64 size, u64 align, | ||
3394 | u64 goal, u64 limit) | ||
3395 | { | ||
3396 | int i; | ||
3397 | void *ptr; | ||
3398 | |||
3399 | /* need to go over early_node_map to find out good range for node */ | ||
3400 | for_each_active_range_index_in_nid(i, nid) { | ||
3401 | u64 addr; | ||
3402 | u64 ei_start, ei_last; | ||
3403 | |||
3404 | ei_last = early_node_map[i].end_pfn; | ||
3405 | ei_last <<= PAGE_SHIFT; | ||
3406 | ei_start = early_node_map[i].start_pfn; | ||
3407 | ei_start <<= PAGE_SHIFT; | ||
3408 | addr = find_early_area(ei_start, ei_last, | ||
3409 | goal, limit, size, align); | ||
3410 | |||
3411 | if (addr == -1ULL) | ||
3412 | continue; | ||
3413 | |||
3414 | #if 0 | ||
3415 | printk(KERN_DEBUG "alloc (nid=%d %llx - %llx) (%llx - %llx) %llx %llx => %llx\n", | ||
3416 | nid, | ||
3417 | ei_start, ei_last, goal, limit, size, | ||
3418 | align, addr); | ||
3419 | #endif | ||
3420 | |||
3421 | ptr = phys_to_virt(addr); | ||
3422 | memset(ptr, 0, size); | ||
3423 | reserve_early_without_check(addr, addr + size, "BOOTMEM"); | ||
3424 | return ptr; | ||
3425 | } | ||
3426 | |||
3427 | return NULL; | ||
3428 | } | ||
3429 | #endif | ||
3430 | |||
3431 | |||
3377 | void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data) | 3432 | void __init work_with_active_regions(int nid, work_fn_t work_fn, void *data) |
3378 | { | 3433 | { |
3379 | int i; | 3434 | int i; |
@@ -4406,7 +4461,11 @@ void __init set_dma_reserve(unsigned long new_dma_reserve) | |||
4406 | } | 4461 | } |
4407 | 4462 | ||
4408 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 4463 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
4409 | struct pglist_data __refdata contig_page_data = { .bdata = &bootmem_node_data[0] }; | 4464 | struct pglist_data __refdata contig_page_data = { |
4465 | #ifndef CONFIG_NO_BOOTMEM | ||
4466 | .bdata = &bootmem_node_data[0] | ||
4467 | #endif | ||
4468 | }; | ||
4410 | EXPORT_SYMBOL(contig_page_data); | 4469 | EXPORT_SYMBOL(contig_page_data); |
4411 | #endif | 4470 | #endif |
4412 | 4471 | ||
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index d9714bdcb4a3..392b9bb5bc01 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c | |||
@@ -40,9 +40,11 @@ static void * __init_refok __earlyonly_bootmem_alloc(int node, | |||
40 | unsigned long align, | 40 | unsigned long align, |
41 | unsigned long goal) | 41 | unsigned long goal) |
42 | { | 42 | { |
43 | return __alloc_bootmem_node(NODE_DATA(node), size, align, goal); | 43 | return __alloc_bootmem_node_high(NODE_DATA(node), size, align, goal); |
44 | } | 44 | } |
45 | 45 | ||
46 | static void *vmemmap_buf; | ||
47 | static void *vmemmap_buf_end; | ||
46 | 48 | ||
47 | void * __meminit vmemmap_alloc_block(unsigned long size, int node) | 49 | void * __meminit vmemmap_alloc_block(unsigned long size, int node) |
48 | { | 50 | { |
@@ -64,6 +66,24 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node) | |||
64 | __pa(MAX_DMA_ADDRESS)); | 66 | __pa(MAX_DMA_ADDRESS)); |
65 | } | 67 | } |
66 | 68 | ||
69 | /* need to make sure size is all the same during early stage */ | ||
70 | void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node) | ||
71 | { | ||
72 | void *ptr; | ||
73 | |||
74 | if (!vmemmap_buf) | ||
75 | return vmemmap_alloc_block(size, node); | ||
76 | |||
77 | /* take the from buf */ | ||
78 | ptr = (void *)ALIGN((unsigned long)vmemmap_buf, size); | ||
79 | if (ptr + size > vmemmap_buf_end) | ||
80 | return vmemmap_alloc_block(size, node); | ||
81 | |||
82 | vmemmap_buf = ptr + size; | ||
83 | |||
84 | return ptr; | ||
85 | } | ||
86 | |||
67 | void __meminit vmemmap_verify(pte_t *pte, int node, | 87 | void __meminit vmemmap_verify(pte_t *pte, int node, |
68 | unsigned long start, unsigned long end) | 88 | unsigned long start, unsigned long end) |
69 | { | 89 | { |
@@ -80,7 +100,7 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node) | |||
80 | pte_t *pte = pte_offset_kernel(pmd, addr); | 100 | pte_t *pte = pte_offset_kernel(pmd, addr); |
81 | if (pte_none(*pte)) { | 101 | if (pte_none(*pte)) { |
82 | pte_t entry; | 102 | pte_t entry; |
83 | void *p = vmemmap_alloc_block(PAGE_SIZE, node); | 103 | void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node); |
84 | if (!p) | 104 | if (!p) |
85 | return NULL; | 105 | return NULL; |
86 | entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); | 106 | entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); |
@@ -163,3 +183,55 @@ struct page * __meminit sparse_mem_map_populate(unsigned long pnum, int nid) | |||
163 | 183 | ||
164 | return map; | 184 | return map; |
165 | } | 185 | } |
186 | |||
187 | void __init sparse_mem_maps_populate_node(struct page **map_map, | ||
188 | unsigned long pnum_begin, | ||
189 | unsigned long pnum_end, | ||
190 | unsigned long map_count, int nodeid) | ||
191 | { | ||
192 | unsigned long pnum; | ||
193 | unsigned long size = sizeof(struct page) * PAGES_PER_SECTION; | ||
194 | void *vmemmap_buf_start; | ||
195 | |||
196 | size = ALIGN(size, PMD_SIZE); | ||
197 | vmemmap_buf_start = __earlyonly_bootmem_alloc(nodeid, size * map_count, | ||
198 | PMD_SIZE, __pa(MAX_DMA_ADDRESS)); | ||
199 | |||
200 | if (vmemmap_buf_start) { | ||
201 | vmemmap_buf = vmemmap_buf_start; | ||
202 | vmemmap_buf_end = vmemmap_buf_start + size * map_count; | ||
203 | } | ||
204 | |||
205 | for (pnum = pnum_begin; pnum < pnum_end; pnum++) { | ||
206 | struct mem_section *ms; | ||
207 | |||
208 | if (!present_section_nr(pnum)) | ||
209 | continue; | ||
210 | |||
211 | map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); | ||
212 | if (map_map[pnum]) | ||
213 | continue; | ||
214 | ms = __nr_to_section(pnum); | ||
215 | printk(KERN_ERR "%s: sparsemem memory map backing failed " | ||
216 | "some memory will not be available.\n", __func__); | ||
217 | ms->section_mem_map = 0; | ||
218 | } | ||
219 | |||
220 | if (vmemmap_buf_start) { | ||
221 | /* need to free left buf */ | ||
222 | #ifdef CONFIG_NO_BOOTMEM | ||
223 | free_early(__pa(vmemmap_buf_start), __pa(vmemmap_buf_end)); | ||
224 | if (vmemmap_buf_start < vmemmap_buf) { | ||
225 | char name[15]; | ||
226 | |||
227 | snprintf(name, sizeof(name), "MEMMAP %d", nodeid); | ||
228 | reserve_early_without_check(__pa(vmemmap_buf_start), | ||
229 | __pa(vmemmap_buf), name); | ||
230 | } | ||
231 | #else | ||
232 | free_bootmem(__pa(vmemmap_buf), vmemmap_buf_end - vmemmap_buf); | ||
233 | #endif | ||
234 | vmemmap_buf = NULL; | ||
235 | vmemmap_buf_end = NULL; | ||
236 | } | ||
237 | } | ||
diff --git a/mm/sparse.c b/mm/sparse.c index 6ce4aab69e99..22896d589133 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
@@ -271,7 +271,8 @@ static unsigned long *__kmalloc_section_usemap(void) | |||
271 | 271 | ||
272 | #ifdef CONFIG_MEMORY_HOTREMOVE | 272 | #ifdef CONFIG_MEMORY_HOTREMOVE |
273 | static unsigned long * __init | 273 | static unsigned long * __init |
274 | sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat) | 274 | sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, |
275 | unsigned long count) | ||
275 | { | 276 | { |
276 | unsigned long section_nr; | 277 | unsigned long section_nr; |
277 | 278 | ||
@@ -286,7 +287,7 @@ sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat) | |||
286 | * this problem. | 287 | * this problem. |
287 | */ | 288 | */ |
288 | section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); | 289 | section_nr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); |
289 | return alloc_bootmem_section(usemap_size(), section_nr); | 290 | return alloc_bootmem_section(usemap_size() * count, section_nr); |
290 | } | 291 | } |
291 | 292 | ||
292 | static void __init check_usemap_section_nr(int nid, unsigned long *usemap) | 293 | static void __init check_usemap_section_nr(int nid, unsigned long *usemap) |
@@ -329,7 +330,8 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap) | |||
329 | } | 330 | } |
330 | #else | 331 | #else |
331 | static unsigned long * __init | 332 | static unsigned long * __init |
332 | sparse_early_usemap_alloc_pgdat_section(struct pglist_data *pgdat) | 333 | sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, |
334 | unsigned long count) | ||
333 | { | 335 | { |
334 | return NULL; | 336 | return NULL; |
335 | } | 337 | } |
@@ -339,27 +341,40 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap) | |||
339 | } | 341 | } |
340 | #endif /* CONFIG_MEMORY_HOTREMOVE */ | 342 | #endif /* CONFIG_MEMORY_HOTREMOVE */ |
341 | 343 | ||
342 | static unsigned long *__init sparse_early_usemap_alloc(unsigned long pnum) | 344 | static void __init sparse_early_usemaps_alloc_node(unsigned long**usemap_map, |
345 | unsigned long pnum_begin, | ||
346 | unsigned long pnum_end, | ||
347 | unsigned long usemap_count, int nodeid) | ||
343 | { | 348 | { |
344 | unsigned long *usemap; | 349 | void *usemap; |
345 | struct mem_section *ms = __nr_to_section(pnum); | 350 | unsigned long pnum; |
346 | int nid = sparse_early_nid(ms); | 351 | int size = usemap_size(); |
347 | |||
348 | usemap = sparse_early_usemap_alloc_pgdat_section(NODE_DATA(nid)); | ||
349 | if (usemap) | ||
350 | return usemap; | ||
351 | 352 | ||
352 | usemap = alloc_bootmem_node(NODE_DATA(nid), usemap_size()); | 353 | usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid), |
354 | usemap_count); | ||
353 | if (usemap) { | 355 | if (usemap) { |
354 | check_usemap_section_nr(nid, usemap); | 356 | for (pnum = pnum_begin; pnum < pnum_end; pnum++) { |
355 | return usemap; | 357 | if (!present_section_nr(pnum)) |
358 | continue; | ||
359 | usemap_map[pnum] = usemap; | ||
360 | usemap += size; | ||
361 | } | ||
362 | return; | ||
356 | } | 363 | } |
357 | 364 | ||
358 | /* Stupid: suppress gcc warning for SPARSEMEM && !NUMA */ | 365 | usemap = alloc_bootmem_node(NODE_DATA(nodeid), size * usemap_count); |
359 | nid = 0; | 366 | if (usemap) { |
367 | for (pnum = pnum_begin; pnum < pnum_end; pnum++) { | ||
368 | if (!present_section_nr(pnum)) | ||
369 | continue; | ||
370 | usemap_map[pnum] = usemap; | ||
371 | usemap += size; | ||
372 | check_usemap_section_nr(nodeid, usemap_map[pnum]); | ||
373 | } | ||
374 | return; | ||
375 | } | ||
360 | 376 | ||
361 | printk(KERN_WARNING "%s: allocation failed\n", __func__); | 377 | printk(KERN_WARNING "%s: allocation failed\n", __func__); |
362 | return NULL; | ||
363 | } | 378 | } |
364 | 379 | ||
365 | #ifndef CONFIG_SPARSEMEM_VMEMMAP | 380 | #ifndef CONFIG_SPARSEMEM_VMEMMAP |
@@ -375,8 +390,65 @@ struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid) | |||
375 | PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION)); | 390 | PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION)); |
376 | return map; | 391 | return map; |
377 | } | 392 | } |
393 | void __init sparse_mem_maps_populate_node(struct page **map_map, | ||
394 | unsigned long pnum_begin, | ||
395 | unsigned long pnum_end, | ||
396 | unsigned long map_count, int nodeid) | ||
397 | { | ||
398 | void *map; | ||
399 | unsigned long pnum; | ||
400 | unsigned long size = sizeof(struct page) * PAGES_PER_SECTION; | ||
401 | |||
402 | map = alloc_remap(nodeid, size * map_count); | ||
403 | if (map) { | ||
404 | for (pnum = pnum_begin; pnum < pnum_end; pnum++) { | ||
405 | if (!present_section_nr(pnum)) | ||
406 | continue; | ||
407 | map_map[pnum] = map; | ||
408 | map += size; | ||
409 | } | ||
410 | return; | ||
411 | } | ||
412 | |||
413 | size = PAGE_ALIGN(size); | ||
414 | map = alloc_bootmem_pages_node(NODE_DATA(nodeid), size * map_count); | ||
415 | if (map) { | ||
416 | for (pnum = pnum_begin; pnum < pnum_end; pnum++) { | ||
417 | if (!present_section_nr(pnum)) | ||
418 | continue; | ||
419 | map_map[pnum] = map; | ||
420 | map += size; | ||
421 | } | ||
422 | return; | ||
423 | } | ||
424 | |||
425 | /* fallback */ | ||
426 | for (pnum = pnum_begin; pnum < pnum_end; pnum++) { | ||
427 | struct mem_section *ms; | ||
428 | |||
429 | if (!present_section_nr(pnum)) | ||
430 | continue; | ||
431 | map_map[pnum] = sparse_mem_map_populate(pnum, nodeid); | ||
432 | if (map_map[pnum]) | ||
433 | continue; | ||
434 | ms = __nr_to_section(pnum); | ||
435 | printk(KERN_ERR "%s: sparsemem memory map backing failed " | ||
436 | "some memory will not be available.\n", __func__); | ||
437 | ms->section_mem_map = 0; | ||
438 | } | ||
439 | } | ||
378 | #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ | 440 | #endif /* !CONFIG_SPARSEMEM_VMEMMAP */ |
379 | 441 | ||
442 | #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER | ||
443 | static void __init sparse_early_mem_maps_alloc_node(struct page **map_map, | ||
444 | unsigned long pnum_begin, | ||
445 | unsigned long pnum_end, | ||
446 | unsigned long map_count, int nodeid) | ||
447 | { | ||
448 | sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end, | ||
449 | map_count, nodeid); | ||
450 | } | ||
451 | #else | ||
380 | static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) | 452 | static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) |
381 | { | 453 | { |
382 | struct page *map; | 454 | struct page *map; |
@@ -392,10 +464,12 @@ static struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) | |||
392 | ms->section_mem_map = 0; | 464 | ms->section_mem_map = 0; |
393 | return NULL; | 465 | return NULL; |
394 | } | 466 | } |
467 | #endif | ||
395 | 468 | ||
396 | void __attribute__((weak)) __meminit vmemmap_populate_print_last(void) | 469 | void __attribute__((weak)) __meminit vmemmap_populate_print_last(void) |
397 | { | 470 | { |
398 | } | 471 | } |
472 | |||
399 | /* | 473 | /* |
400 | * Allocate the accumulated non-linear sections, allocate a mem_map | 474 | * Allocate the accumulated non-linear sections, allocate a mem_map |
401 | * for each and record the physical to section mapping. | 475 | * for each and record the physical to section mapping. |
@@ -407,6 +481,14 @@ void __init sparse_init(void) | |||
407 | unsigned long *usemap; | 481 | unsigned long *usemap; |
408 | unsigned long **usemap_map; | 482 | unsigned long **usemap_map; |
409 | int size; | 483 | int size; |
484 | int nodeid_begin = 0; | ||
485 | unsigned long pnum_begin = 0; | ||
486 | unsigned long usemap_count; | ||
487 | #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER | ||
488 | unsigned long map_count; | ||
489 | int size2; | ||
490 | struct page **map_map; | ||
491 | #endif | ||
410 | 492 | ||
411 | /* | 493 | /* |
412 | * map is using big page (aka 2M in x86 64 bit) | 494 | * map is using big page (aka 2M in x86 64 bit) |
@@ -425,10 +507,81 @@ void __init sparse_init(void) | |||
425 | panic("can not allocate usemap_map\n"); | 507 | panic("can not allocate usemap_map\n"); |
426 | 508 | ||
427 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { | 509 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { |
510 | struct mem_section *ms; | ||
511 | |||
428 | if (!present_section_nr(pnum)) | 512 | if (!present_section_nr(pnum)) |
429 | continue; | 513 | continue; |
430 | usemap_map[pnum] = sparse_early_usemap_alloc(pnum); | 514 | ms = __nr_to_section(pnum); |
515 | nodeid_begin = sparse_early_nid(ms); | ||
516 | pnum_begin = pnum; | ||
517 | break; | ||
431 | } | 518 | } |
519 | usemap_count = 1; | ||
520 | for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) { | ||
521 | struct mem_section *ms; | ||
522 | int nodeid; | ||
523 | |||
524 | if (!present_section_nr(pnum)) | ||
525 | continue; | ||
526 | ms = __nr_to_section(pnum); | ||
527 | nodeid = sparse_early_nid(ms); | ||
528 | if (nodeid == nodeid_begin) { | ||
529 | usemap_count++; | ||
530 | continue; | ||
531 | } | ||
532 | /* ok, we need to take cake of from pnum_begin to pnum - 1*/ | ||
533 | sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, pnum, | ||
534 | usemap_count, nodeid_begin); | ||
535 | /* new start, update count etc*/ | ||
536 | nodeid_begin = nodeid; | ||
537 | pnum_begin = pnum; | ||
538 | usemap_count = 1; | ||
539 | } | ||
540 | /* ok, last chunk */ | ||
541 | sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, NR_MEM_SECTIONS, | ||
542 | usemap_count, nodeid_begin); | ||
543 | |||
544 | #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER | ||
545 | size2 = sizeof(struct page *) * NR_MEM_SECTIONS; | ||
546 | map_map = alloc_bootmem(size2); | ||
547 | if (!map_map) | ||
548 | panic("can not allocate map_map\n"); | ||
549 | |||
550 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { | ||
551 | struct mem_section *ms; | ||
552 | |||
553 | if (!present_section_nr(pnum)) | ||
554 | continue; | ||
555 | ms = __nr_to_section(pnum); | ||
556 | nodeid_begin = sparse_early_nid(ms); | ||
557 | pnum_begin = pnum; | ||
558 | break; | ||
559 | } | ||
560 | map_count = 1; | ||
561 | for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) { | ||
562 | struct mem_section *ms; | ||
563 | int nodeid; | ||
564 | |||
565 | if (!present_section_nr(pnum)) | ||
566 | continue; | ||
567 | ms = __nr_to_section(pnum); | ||
568 | nodeid = sparse_early_nid(ms); | ||
569 | if (nodeid == nodeid_begin) { | ||
570 | map_count++; | ||
571 | continue; | ||
572 | } | ||
573 | /* ok, we need to take cake of from pnum_begin to pnum - 1*/ | ||
574 | sparse_early_mem_maps_alloc_node(map_map, pnum_begin, pnum, | ||
575 | map_count, nodeid_begin); | ||
576 | /* new start, update count etc*/ | ||
577 | nodeid_begin = nodeid; | ||
578 | pnum_begin = pnum; | ||
579 | map_count = 1; | ||
580 | } | ||
581 | /* ok, last chunk */ | ||
582 | sparse_early_mem_maps_alloc_node(map_map, pnum_begin, NR_MEM_SECTIONS, | ||
583 | map_count, nodeid_begin); | ||
584 | #endif | ||
432 | 585 | ||
433 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { | 586 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { |
434 | if (!present_section_nr(pnum)) | 587 | if (!present_section_nr(pnum)) |
@@ -438,7 +591,11 @@ void __init sparse_init(void) | |||
438 | if (!usemap) | 591 | if (!usemap) |
439 | continue; | 592 | continue; |
440 | 593 | ||
594 | #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER | ||
595 | map = map_map[pnum]; | ||
596 | #else | ||
441 | map = sparse_early_mem_map_alloc(pnum); | 597 | map = sparse_early_mem_map_alloc(pnum); |
598 | #endif | ||
442 | if (!map) | 599 | if (!map) |
443 | continue; | 600 | continue; |
444 | 601 | ||
@@ -448,6 +605,9 @@ void __init sparse_init(void) | |||
448 | 605 | ||
449 | vmemmap_populate_print_last(); | 606 | vmemmap_populate_print_last(); |
450 | 607 | ||
608 | #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER | ||
609 | free_bootmem(__pa(map_map), size2); | ||
610 | #endif | ||
451 | free_bootmem(__pa(usemap_map), size); | 611 | free_bootmem(__pa(usemap_map), size); |
452 | } | 612 | } |
453 | 613 | ||