diff options
| -rw-r--r-- | arch/x86/kernel/e820_64.c | 13 | ||||
| -rw-r--r-- | arch/x86/kernel/setup_64.c | 3 | ||||
| -rw-r--r-- | arch/x86/mm/init_64.c | 38 | ||||
| -rw-r--r-- | arch/x86/mm/numa_64.c | 42 | ||||
| -rw-r--r-- | include/asm-x86/e820_64.h | 2 | ||||
| -rw-r--r-- | include/linux/mm.h | 1 | ||||
| -rw-r--r-- | mm/bootmem.c | 164 | ||||
| -rw-r--r-- | mm/sparse.c | 37 |
8 files changed, 228 insertions, 72 deletions
diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 79f0d52fa99a..645ee5e32a27 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c | |||
| @@ -106,14 +106,19 @@ void __init free_early(unsigned long start, unsigned long end) | |||
| 106 | early_res[j - 1].end = 0; | 106 | early_res[j - 1].end = 0; |
| 107 | } | 107 | } |
| 108 | 108 | ||
| 109 | void __init early_res_to_bootmem(void) | 109 | void __init early_res_to_bootmem(unsigned long start, unsigned long end) |
| 110 | { | 110 | { |
| 111 | int i; | 111 | int i; |
| 112 | unsigned long final_start, final_end; | ||
| 112 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { | 113 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { |
| 113 | struct early_res *r = &early_res[i]; | 114 | struct early_res *r = &early_res[i]; |
| 114 | printk(KERN_INFO "early res: %d [%lx-%lx] %s\n", i, | 115 | final_start = max(start, r->start); |
| 115 | r->start, r->end - 1, r->name); | 116 | final_end = min(end, r->end); |
| 116 | reserve_bootmem_generic(r->start, r->end - r->start); | 117 | if (final_start >= final_end) |
| 118 | continue; | ||
| 119 | printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i, | ||
| 120 | final_start, final_end - 1, r->name); | ||
| 121 | reserve_bootmem_generic(final_start, final_end - final_start); | ||
| 117 | } | 122 | } |
| 118 | } | 123 | } |
| 119 | 124 | ||
diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index b04e2c011e1a..60e64c8eee92 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c | |||
| @@ -190,6 +190,7 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
| 190 | bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); | 190 | bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn); |
| 191 | e820_register_active_regions(0, start_pfn, end_pfn); | 191 | e820_register_active_regions(0, start_pfn, end_pfn); |
| 192 | free_bootmem_with_active_regions(0, end_pfn); | 192 | free_bootmem_with_active_regions(0, end_pfn); |
| 193 | early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); | ||
| 193 | reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); | 194 | reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); |
| 194 | } | 195 | } |
| 195 | #endif | 196 | #endif |
| @@ -421,8 +422,6 @@ void __init setup_arch(char **cmdline_p) | |||
| 421 | contig_initmem_init(0, end_pfn); | 422 | contig_initmem_init(0, end_pfn); |
| 422 | #endif | 423 | #endif |
| 423 | 424 | ||
| 424 | early_res_to_bootmem(); | ||
| 425 | |||
| 426 | dma32_reserve_bootmem(); | 425 | dma32_reserve_bootmem(); |
| 427 | 426 | ||
| 428 | #ifdef CONFIG_ACPI_SLEEP | 427 | #ifdef CONFIG_ACPI_SLEEP |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0cca62663037..5fbb8652cf59 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
| @@ -810,7 +810,7 @@ void free_initrd_mem(unsigned long start, unsigned long end) | |||
| 810 | void __init reserve_bootmem_generic(unsigned long phys, unsigned len) | 810 | void __init reserve_bootmem_generic(unsigned long phys, unsigned len) |
| 811 | { | 811 | { |
| 812 | #ifdef CONFIG_NUMA | 812 | #ifdef CONFIG_NUMA |
| 813 | int nid = phys_to_nid(phys); | 813 | int nid, next_nid; |
| 814 | #endif | 814 | #endif |
| 815 | unsigned long pfn = phys >> PAGE_SHIFT; | 815 | unsigned long pfn = phys >> PAGE_SHIFT; |
| 816 | 816 | ||
| @@ -829,10 +829,16 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len) | |||
| 829 | 829 | ||
| 830 | /* Should check here against the e820 map to avoid double free */ | 830 | /* Should check here against the e820 map to avoid double free */ |
| 831 | #ifdef CONFIG_NUMA | 831 | #ifdef CONFIG_NUMA |
| 832 | reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT); | 832 | nid = phys_to_nid(phys); |
| 833 | next_nid = phys_to_nid(phys + len - 1); | ||
| 834 | if (nid == next_nid) | ||
| 835 | reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT); | ||
| 836 | else | ||
| 837 | reserve_bootmem(phys, len, BOOTMEM_DEFAULT); | ||
| 833 | #else | 838 | #else |
| 834 | reserve_bootmem(phys, len, BOOTMEM_DEFAULT); | 839 | reserve_bootmem(phys, len, BOOTMEM_DEFAULT); |
| 835 | #endif | 840 | #endif |
| 841 | |||
| 836 | if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) { | 842 | if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) { |
| 837 | dma_reserve += len / PAGE_SIZE; | 843 | dma_reserve += len / PAGE_SIZE; |
| 838 | set_dma_reserve(dma_reserve); | 844 | set_dma_reserve(dma_reserve); |
| @@ -926,6 +932,10 @@ const char *arch_vma_name(struct vm_area_struct *vma) | |||
| 926 | /* | 932 | /* |
| 927 | * Initialise the sparsemem vmemmap using huge-pages at the PMD level. | 933 | * Initialise the sparsemem vmemmap using huge-pages at the PMD level. |
| 928 | */ | 934 | */ |
| 935 | static long __meminitdata addr_start, addr_end; | ||
| 936 | static void __meminitdata *p_start, *p_end; | ||
| 937 | static int __meminitdata node_start; | ||
| 938 | |||
| 929 | int __meminit | 939 | int __meminit |
| 930 | vmemmap_populate(struct page *start_page, unsigned long size, int node) | 940 | vmemmap_populate(struct page *start_page, unsigned long size, int node) |
| 931 | { | 941 | { |
| @@ -960,12 +970,32 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) | |||
| 960 | PAGE_KERNEL_LARGE); | 970 | PAGE_KERNEL_LARGE); |
| 961 | set_pmd(pmd, __pmd(pte_val(entry))); | 971 | set_pmd(pmd, __pmd(pte_val(entry))); |
| 962 | 972 | ||
| 963 | printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n", | 973 | /* check to see if we have contiguous blocks */ |
| 964 | addr, addr + PMD_SIZE - 1, p, node); | 974 | if (p_end != p || node_start != node) { |
| 975 | if (p_start) | ||
| 976 | printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n", | ||
| 977 | addr_start, addr_end-1, p_start, p_end-1, node_start); | ||
| 978 | addr_start = addr; | ||
| 979 | node_start = node; | ||
| 980 | p_start = p; | ||
| 981 | } | ||
| 982 | addr_end = addr + PMD_SIZE; | ||
| 983 | p_end = p + PMD_SIZE; | ||
| 965 | } else { | 984 | } else { |
| 966 | vmemmap_verify((pte_t *)pmd, node, addr, next); | 985 | vmemmap_verify((pte_t *)pmd, node, addr, next); |
| 967 | } | 986 | } |
| 968 | } | 987 | } |
| 969 | return 0; | 988 | return 0; |
| 970 | } | 989 | } |
| 990 | |||
| 991 | void __meminit vmemmap_populate_print_last(void) | ||
| 992 | { | ||
| 993 | if (p_start) { | ||
| 994 | printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n", | ||
| 995 | addr_start, addr_end-1, p_start, p_end-1, node_start); | ||
| 996 | p_start = NULL; | ||
| 997 | p_end = NULL; | ||
| 998 | node_start = 0; | ||
| 999 | } | ||
| 1000 | } | ||
| 971 | #endif | 1001 | #endif |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 9a6892200b27..c5066d519e5d 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
| @@ -196,6 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
| 196 | unsigned long bootmap_start, nodedata_phys; | 196 | unsigned long bootmap_start, nodedata_phys; |
| 197 | void *bootmap; | 197 | void *bootmap; |
| 198 | const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); | 198 | const int pgdat_size = round_up(sizeof(pg_data_t), PAGE_SIZE); |
| 199 | int nid; | ||
| 199 | 200 | ||
| 200 | start = round_up(start, ZONE_ALIGN); | 201 | start = round_up(start, ZONE_ALIGN); |
| 201 | 202 | ||
| @@ -218,9 +219,19 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
| 218 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; | 219 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; |
| 219 | NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; | 220 | NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn; |
| 220 | 221 | ||
| 221 | /* Find a place for the bootmem map */ | 222 | /* |
| 223 | * Find a place for the bootmem map | ||
| 224 | * nodedata_phys could be on other nodes by alloc_bootmem, | ||
| 225 | * so need to sure bootmap_start not to be small, otherwise | ||
| 226 | * early_node_mem will get that with find_e820_area instead | ||
| 227 | * of alloc_bootmem, that could clash with reserved range | ||
| 228 | */ | ||
| 222 | bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); | 229 | bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); |
| 223 | bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); | 230 | nid = phys_to_nid(nodedata_phys); |
| 231 | if (nid == nodeid) | ||
| 232 | bootmap_start = round_up(nodedata_phys + pgdat_size, PAGE_SIZE); | ||
| 233 | else | ||
| 234 | bootmap_start = round_up(start, PAGE_SIZE); | ||
| 224 | /* | 235 | /* |
| 225 | * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like | 236 | * SMP_CAHCE_BYTES could be enough, but init_bootmem_node like |
| 226 | * to use that to align to PAGE_SIZE | 237 | * to use that to align to PAGE_SIZE |
| @@ -245,10 +256,29 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
| 245 | 256 | ||
| 246 | free_bootmem_with_active_regions(nodeid, end); | 257 | free_bootmem_with_active_regions(nodeid, end); |
| 247 | 258 | ||
| 248 | reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, pgdat_size, | 259 | /* |
| 249 | BOOTMEM_DEFAULT); | 260 | * convert early reserve to bootmem reserve earlier |
| 250 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, | 261 | * otherwise early_node_mem could use early reserved mem |
| 251 | bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); | 262 | * on previous node |
| 263 | */ | ||
| 264 | early_res_to_bootmem(start, end); | ||
| 265 | |||
| 266 | /* | ||
| 267 | * in some case early_node_mem could use alloc_bootmem | ||
| 268 | * to get range on other node, don't reserve that again | ||
| 269 | */ | ||
| 270 | if (nid != nodeid) | ||
| 271 | printk(KERN_INFO " NODE_DATA(%d) on node %d\n", nodeid, nid); | ||
| 272 | else | ||
| 273 | reserve_bootmem_node(NODE_DATA(nodeid), nodedata_phys, | ||
| 274 | pgdat_size, BOOTMEM_DEFAULT); | ||
| 275 | nid = phys_to_nid(bootmap_start); | ||
| 276 | if (nid != nodeid) | ||
| 277 | printk(KERN_INFO " bootmap(%d) on node %d\n", nodeid, nid); | ||
| 278 | else | ||
| 279 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, | ||
| 280 | bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); | ||
| 281 | |||
| 252 | #ifdef CONFIG_ACPI_NUMA | 282 | #ifdef CONFIG_ACPI_NUMA |
| 253 | srat_reserve_add_area(nodeid); | 283 | srat_reserve_add_area(nodeid); |
| 254 | #endif | 284 | #endif |
diff --git a/include/asm-x86/e820_64.h b/include/asm-x86/e820_64.h index b5e02e379af3..71c4d685d30d 100644 --- a/include/asm-x86/e820_64.h +++ b/include/asm-x86/e820_64.h | |||
| @@ -49,7 +49,7 @@ extern void update_e820(void); | |||
| 49 | 49 | ||
| 50 | extern void reserve_early(unsigned long start, unsigned long end, char *name); | 50 | extern void reserve_early(unsigned long start, unsigned long end, char *name); |
| 51 | extern void free_early(unsigned long start, unsigned long end); | 51 | extern void free_early(unsigned long start, unsigned long end); |
| 52 | extern void early_res_to_bootmem(void); | 52 | extern void early_res_to_bootmem(unsigned long start, unsigned long end); |
| 53 | 53 | ||
| 54 | #endif/*!__ASSEMBLY__*/ | 54 | #endif/*!__ASSEMBLY__*/ |
| 55 | 55 | ||
diff --git a/include/linux/mm.h b/include/linux/mm.h index b695875d63e3..286d31521605 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
| @@ -1229,6 +1229,7 @@ void vmemmap_verify(pte_t *, int, unsigned long, unsigned long); | |||
| 1229 | int vmemmap_populate_basepages(struct page *start_page, | 1229 | int vmemmap_populate_basepages(struct page *start_page, |
| 1230 | unsigned long pages, int node); | 1230 | unsigned long pages, int node); |
| 1231 | int vmemmap_populate(struct page *start_page, unsigned long pages, int node); | 1231 | int vmemmap_populate(struct page *start_page, unsigned long pages, int node); |
| 1232 | void vmemmap_populate_print_last(void); | ||
| 1232 | 1233 | ||
| 1233 | #endif /* __KERNEL__ */ | 1234 | #endif /* __KERNEL__ */ |
| 1234 | #endif /* _LINUX_MM_H */ | 1235 | #endif /* _LINUX_MM_H */ |
diff --git a/mm/bootmem.c b/mm/bootmem.c index 2ccea700968f..b6791646143e 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c | |||
| @@ -111,44 +111,74 @@ static unsigned long __init init_bootmem_core(pg_data_t *pgdat, | |||
| 111 | * might be used for boot-time allocations - or it might get added | 111 | * might be used for boot-time allocations - or it might get added |
| 112 | * to the free page pool later on. | 112 | * to the free page pool later on. |
| 113 | */ | 113 | */ |
| 114 | static int __init reserve_bootmem_core(bootmem_data_t *bdata, | 114 | static int __init can_reserve_bootmem_core(bootmem_data_t *bdata, |
| 115 | unsigned long addr, unsigned long size, int flags) | 115 | unsigned long addr, unsigned long size, int flags) |
| 116 | { | 116 | { |
| 117 | unsigned long sidx, eidx; | 117 | unsigned long sidx, eidx; |
| 118 | unsigned long i; | 118 | unsigned long i; |
| 119 | int ret; | 119 | |
| 120 | BUG_ON(!size); | ||
| 121 | |||
| 122 | /* out of range, don't hold other */ | ||
| 123 | if (addr + size < bdata->node_boot_start || | ||
| 124 | PFN_DOWN(addr) > bdata->node_low_pfn) | ||
| 125 | return 0; | ||
| 120 | 126 | ||
| 121 | /* | 127 | /* |
| 122 | * round up, partially reserved pages are considered | 128 | * Round up to index to the range. |
| 123 | * fully reserved. | ||
| 124 | */ | 129 | */ |
| 130 | if (addr > bdata->node_boot_start) | ||
| 131 | sidx= PFN_DOWN(addr - bdata->node_boot_start); | ||
| 132 | else | ||
| 133 | sidx = 0; | ||
| 134 | |||
| 135 | eidx = PFN_UP(addr + size - bdata->node_boot_start); | ||
| 136 | if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start)) | ||
| 137 | eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start); | ||
| 138 | |||
| 139 | for (i = sidx; i < eidx; i++) { | ||
| 140 | if (test_bit(i, bdata->node_bootmem_map)) { | ||
| 141 | if (flags & BOOTMEM_EXCLUSIVE) | ||
| 142 | return -EBUSY; | ||
| 143 | } | ||
| 144 | } | ||
| 145 | |||
| 146 | return 0; | ||
| 147 | |||
| 148 | } | ||
| 149 | |||
| 150 | static void __init reserve_bootmem_core(bootmem_data_t *bdata, | ||
| 151 | unsigned long addr, unsigned long size, int flags) | ||
| 152 | { | ||
| 153 | unsigned long sidx, eidx; | ||
| 154 | unsigned long i; | ||
| 155 | |||
| 125 | BUG_ON(!size); | 156 | BUG_ON(!size); |
| 126 | BUG_ON(PFN_DOWN(addr) >= bdata->node_low_pfn); | ||
| 127 | BUG_ON(PFN_UP(addr + size) > bdata->node_low_pfn); | ||
| 128 | BUG_ON(addr < bdata->node_boot_start); | ||
| 129 | 157 | ||
| 130 | sidx = PFN_DOWN(addr - bdata->node_boot_start); | 158 | /* out of range */ |
| 159 | if (addr + size < bdata->node_boot_start || | ||
| 160 | PFN_DOWN(addr) > bdata->node_low_pfn) | ||
| 161 | return; | ||
| 162 | |||
| 163 | /* | ||
| 164 | * Round up to index to the range. | ||
| 165 | */ | ||
| 166 | if (addr > bdata->node_boot_start) | ||
| 167 | sidx= PFN_DOWN(addr - bdata->node_boot_start); | ||
| 168 | else | ||
| 169 | sidx = 0; | ||
| 170 | |||
| 131 | eidx = PFN_UP(addr + size - bdata->node_boot_start); | 171 | eidx = PFN_UP(addr + size - bdata->node_boot_start); |
| 172 | if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start)) | ||
| 173 | eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start); | ||
| 132 | 174 | ||
| 133 | for (i = sidx; i < eidx; i++) | 175 | for (i = sidx; i < eidx; i++) { |
| 134 | if (test_and_set_bit(i, bdata->node_bootmem_map)) { | 176 | if (test_and_set_bit(i, bdata->node_bootmem_map)) { |
| 135 | #ifdef CONFIG_DEBUG_BOOTMEM | 177 | #ifdef CONFIG_DEBUG_BOOTMEM |
| 136 | printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE); | 178 | printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE); |
| 137 | #endif | 179 | #endif |
| 138 | if (flags & BOOTMEM_EXCLUSIVE) { | ||
| 139 | ret = -EBUSY; | ||
| 140 | goto err; | ||
| 141 | } | ||
| 142 | } | 180 | } |
| 143 | 181 | } | |
| 144 | return 0; | ||
| 145 | |||
| 146 | err: | ||
| 147 | /* unreserve memory we accidentally reserved */ | ||
| 148 | for (i--; i >= sidx; i--) | ||
| 149 | clear_bit(i, bdata->node_bootmem_map); | ||
| 150 | |||
| 151 | return ret; | ||
| 152 | } | 182 | } |
| 153 | 183 | ||
| 154 | static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, | 184 | static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, |
| @@ -206,9 +236,11 @@ void * __init | |||
| 206 | __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, | 236 | __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, |
| 207 | unsigned long align, unsigned long goal, unsigned long limit) | 237 | unsigned long align, unsigned long goal, unsigned long limit) |
| 208 | { | 238 | { |
| 209 | unsigned long offset, remaining_size, areasize, preferred; | 239 | unsigned long areasize, preferred; |
| 210 | unsigned long i, start = 0, incr, eidx, end_pfn; | 240 | unsigned long i, start = 0, incr, eidx, end_pfn; |
| 211 | void *ret; | 241 | void *ret; |
| 242 | unsigned long node_boot_start; | ||
| 243 | void *node_bootmem_map; | ||
| 212 | 244 | ||
| 213 | if (!size) { | 245 | if (!size) { |
| 214 | printk("__alloc_bootmem_core(): zero-sized request\n"); | 246 | printk("__alloc_bootmem_core(): zero-sized request\n"); |
| @@ -216,70 +248,83 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, | |||
| 216 | } | 248 | } |
| 217 | BUG_ON(align & (align-1)); | 249 | BUG_ON(align & (align-1)); |
| 218 | 250 | ||
| 219 | if (limit && bdata->node_boot_start >= limit) | ||
| 220 | return NULL; | ||
| 221 | |||
| 222 | /* on nodes without memory - bootmem_map is NULL */ | 251 | /* on nodes without memory - bootmem_map is NULL */ |
| 223 | if (!bdata->node_bootmem_map) | 252 | if (!bdata->node_bootmem_map) |
| 224 | return NULL; | 253 | return NULL; |
| 225 | 254 | ||
| 255 | /* bdata->node_boot_start is supposed to be (12+6)bits alignment on x86_64 ? */ | ||
| 256 | node_boot_start = bdata->node_boot_start; | ||
| 257 | node_bootmem_map = bdata->node_bootmem_map; | ||
| 258 | if (align) { | ||
| 259 | node_boot_start = ALIGN(bdata->node_boot_start, align); | ||
| 260 | if (node_boot_start > bdata->node_boot_start) | ||
| 261 | node_bootmem_map = (unsigned long *)bdata->node_bootmem_map + | ||
| 262 | PFN_DOWN(node_boot_start - bdata->node_boot_start)/BITS_PER_LONG; | ||
| 263 | } | ||
| 264 | |||
| 265 | if (limit && node_boot_start >= limit) | ||
| 266 | return NULL; | ||
| 267 | |||
| 226 | end_pfn = bdata->node_low_pfn; | 268 | end_pfn = bdata->node_low_pfn; |
| 227 | limit = PFN_DOWN(limit); | 269 | limit = PFN_DOWN(limit); |
| 228 | if (limit && end_pfn > limit) | 270 | if (limit && end_pfn > limit) |
| 229 | end_pfn = limit; | 271 | end_pfn = limit; |
| 230 | 272 | ||
| 231 | eidx = end_pfn - PFN_DOWN(bdata->node_boot_start); | 273 | eidx = end_pfn - PFN_DOWN(node_boot_start); |
| 232 | offset = 0; | ||
| 233 | if (align && (bdata->node_boot_start & (align - 1UL)) != 0) | ||
| 234 | offset = align - (bdata->node_boot_start & (align - 1UL)); | ||
| 235 | offset = PFN_DOWN(offset); | ||
| 236 | 274 | ||
| 237 | /* | 275 | /* |
| 238 | * We try to allocate bootmem pages above 'goal' | 276 | * We try to allocate bootmem pages above 'goal' |
| 239 | * first, then we try to allocate lower pages. | 277 | * first, then we try to allocate lower pages. |
| 240 | */ | 278 | */ |
| 241 | if (goal && goal >= bdata->node_boot_start && PFN_DOWN(goal) < end_pfn) { | 279 | preferred = 0; |
| 242 | preferred = goal - bdata->node_boot_start; | 280 | if (goal && PFN_DOWN(goal) < end_pfn) { |
| 281 | if (goal > node_boot_start) | ||
| 282 | preferred = goal - node_boot_start; | ||
| 243 | 283 | ||
| 244 | if (bdata->last_success >= preferred) | 284 | if (bdata->last_success > node_boot_start && |
| 285 | bdata->last_success - node_boot_start >= preferred) | ||
| 245 | if (!limit || (limit && limit > bdata->last_success)) | 286 | if (!limit || (limit && limit > bdata->last_success)) |
| 246 | preferred = bdata->last_success; | 287 | preferred = bdata->last_success - node_boot_start; |
| 247 | } else | 288 | } |
| 248 | preferred = 0; | ||
| 249 | 289 | ||
| 250 | preferred = PFN_DOWN(ALIGN(preferred, align)) + offset; | 290 | preferred = PFN_DOWN(ALIGN(preferred, align)); |
| 251 | areasize = (size + PAGE_SIZE-1) / PAGE_SIZE; | 291 | areasize = (size + PAGE_SIZE-1) / PAGE_SIZE; |
| 252 | incr = align >> PAGE_SHIFT ? : 1; | 292 | incr = align >> PAGE_SHIFT ? : 1; |
| 253 | 293 | ||
| 254 | restart_scan: | 294 | restart_scan: |
| 255 | for (i = preferred; i < eidx; i += incr) { | 295 | for (i = preferred; i < eidx;) { |
| 256 | unsigned long j; | 296 | unsigned long j; |
| 257 | i = find_next_zero_bit(bdata->node_bootmem_map, eidx, i); | 297 | |
| 298 | i = find_next_zero_bit(node_bootmem_map, eidx, i); | ||
| 258 | i = ALIGN(i, incr); | 299 | i = ALIGN(i, incr); |
| 259 | if (i >= eidx) | 300 | if (i >= eidx) |
| 260 | break; | 301 | break; |
| 261 | if (test_bit(i, bdata->node_bootmem_map)) | 302 | if (test_bit(i, node_bootmem_map)) { |
| 303 | i += incr; | ||
| 262 | continue; | 304 | continue; |
| 305 | } | ||
| 263 | for (j = i + 1; j < i + areasize; ++j) { | 306 | for (j = i + 1; j < i + areasize; ++j) { |
| 264 | if (j >= eidx) | 307 | if (j >= eidx) |
| 265 | goto fail_block; | 308 | goto fail_block; |
| 266 | if (test_bit(j, bdata->node_bootmem_map)) | 309 | if (test_bit(j, node_bootmem_map)) |
| 267 | goto fail_block; | 310 | goto fail_block; |
| 268 | } | 311 | } |
| 269 | start = i; | 312 | start = i; |
| 270 | goto found; | 313 | goto found; |
| 271 | fail_block: | 314 | fail_block: |
| 272 | i = ALIGN(j, incr); | 315 | i = ALIGN(j, incr); |
| 316 | if (i == j) | ||
| 317 | i += incr; | ||
| 273 | } | 318 | } |
| 274 | 319 | ||
| 275 | if (preferred > offset) { | 320 | if (preferred > 0) { |
| 276 | preferred = offset; | 321 | preferred = 0; |
| 277 | goto restart_scan; | 322 | goto restart_scan; |
| 278 | } | 323 | } |
| 279 | return NULL; | 324 | return NULL; |
| 280 | 325 | ||
| 281 | found: | 326 | found: |
| 282 | bdata->last_success = PFN_PHYS(start); | 327 | bdata->last_success = PFN_PHYS(start) + node_boot_start; |
| 283 | BUG_ON(start >= eidx); | 328 | BUG_ON(start >= eidx); |
| 284 | 329 | ||
| 285 | /* | 330 | /* |
| @@ -289,6 +334,7 @@ found: | |||
| 289 | */ | 334 | */ |
| 290 | if (align < PAGE_SIZE && | 335 | if (align < PAGE_SIZE && |
| 291 | bdata->last_offset && bdata->last_pos+1 == start) { | 336 | bdata->last_offset && bdata->last_pos+1 == start) { |
| 337 | unsigned long offset, remaining_size; | ||
| 292 | offset = ALIGN(bdata->last_offset, align); | 338 | offset = ALIGN(bdata->last_offset, align); |
| 293 | BUG_ON(offset > PAGE_SIZE); | 339 | BUG_ON(offset > PAGE_SIZE); |
| 294 | remaining_size = PAGE_SIZE - offset; | 340 | remaining_size = PAGE_SIZE - offset; |
| @@ -297,14 +343,12 @@ found: | |||
| 297 | /* last_pos unchanged */ | 343 | /* last_pos unchanged */ |
| 298 | bdata->last_offset = offset + size; | 344 | bdata->last_offset = offset + size; |
| 299 | ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + | 345 | ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + |
| 300 | offset + | 346 | offset + node_boot_start); |
| 301 | bdata->node_boot_start); | ||
| 302 | } else { | 347 | } else { |
| 303 | remaining_size = size - remaining_size; | 348 | remaining_size = size - remaining_size; |
| 304 | areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE; | 349 | areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE; |
| 305 | ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + | 350 | ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + |
| 306 | offset + | 351 | offset + node_boot_start); |
| 307 | bdata->node_boot_start); | ||
| 308 | bdata->last_pos = start + areasize - 1; | 352 | bdata->last_pos = start + areasize - 1; |
| 309 | bdata->last_offset = remaining_size; | 353 | bdata->last_offset = remaining_size; |
| 310 | } | 354 | } |
| @@ -312,14 +356,14 @@ found: | |||
| 312 | } else { | 356 | } else { |
| 313 | bdata->last_pos = start + areasize - 1; | 357 | bdata->last_pos = start + areasize - 1; |
| 314 | bdata->last_offset = size & ~PAGE_MASK; | 358 | bdata->last_offset = size & ~PAGE_MASK; |
| 315 | ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start); | 359 | ret = phys_to_virt(start * PAGE_SIZE + node_boot_start); |
| 316 | } | 360 | } |
| 317 | 361 | ||
| 318 | /* | 362 | /* |
| 319 | * Reserve the area now: | 363 | * Reserve the area now: |
| 320 | */ | 364 | */ |
| 321 | for (i = start; i < start + areasize; i++) | 365 | for (i = start; i < start + areasize; i++) |
| 322 | if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map))) | 366 | if (unlikely(test_and_set_bit(i, node_bootmem_map))) |
| 323 | BUG(); | 367 | BUG(); |
| 324 | memset(ret, 0, size); | 368 | memset(ret, 0, size); |
| 325 | return ret; | 369 | return ret; |
| @@ -401,6 +445,11 @@ unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn, | |||
| 401 | void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, | 445 | void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, |
| 402 | unsigned long size, int flags) | 446 | unsigned long size, int flags) |
| 403 | { | 447 | { |
| 448 | int ret; | ||
| 449 | |||
| 450 | ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); | ||
| 451 | if (ret < 0) | ||
| 452 | return; | ||
| 404 | reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); | 453 | reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); |
| 405 | } | 454 | } |
| 406 | 455 | ||
| @@ -426,7 +475,18 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages) | |||
| 426 | int __init reserve_bootmem(unsigned long addr, unsigned long size, | 475 | int __init reserve_bootmem(unsigned long addr, unsigned long size, |
| 427 | int flags) | 476 | int flags) |
| 428 | { | 477 | { |
| 429 | return reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size, flags); | 478 | bootmem_data_t *bdata; |
| 479 | int ret; | ||
| 480 | |||
| 481 | list_for_each_entry(bdata, &bdata_list, list) { | ||
| 482 | ret = can_reserve_bootmem_core(bdata, addr, size, flags); | ||
| 483 | if (ret < 0) | ||
| 484 | return ret; | ||
| 485 | } | ||
| 486 | list_for_each_entry(bdata, &bdata_list, list) | ||
| 487 | reserve_bootmem_core(bdata, addr, size, flags); | ||
| 488 | |||
| 489 | return 0; | ||
| 430 | } | 490 | } |
| 431 | #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ | 491 | #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ |
| 432 | 492 | ||
diff --git a/mm/sparse.c b/mm/sparse.c index 98d6b39c3472..7e9191381f86 100644 --- a/mm/sparse.c +++ b/mm/sparse.c | |||
| @@ -295,6 +295,9 @@ struct page __init *sparse_early_mem_map_alloc(unsigned long pnum) | |||
| 295 | return NULL; | 295 | return NULL; |
| 296 | } | 296 | } |
| 297 | 297 | ||
| 298 | void __attribute__((weak)) __meminit vmemmap_populate_print_last(void) | ||
| 299 | { | ||
| 300 | } | ||
| 298 | /* | 301 | /* |
| 299 | * Allocate the accumulated non-linear sections, allocate a mem_map | 302 | * Allocate the accumulated non-linear sections, allocate a mem_map |
| 300 | * for each and record the physical to section mapping. | 303 | * for each and record the physical to section mapping. |
| @@ -304,22 +307,50 @@ void __init sparse_init(void) | |||
| 304 | unsigned long pnum; | 307 | unsigned long pnum; |
| 305 | struct page *map; | 308 | struct page *map; |
| 306 | unsigned long *usemap; | 309 | unsigned long *usemap; |
| 310 | unsigned long **usemap_map; | ||
| 311 | int size; | ||
| 312 | |||
| 313 | /* | ||
| 314 | * map is using big page (aka 2M in x86 64 bit) | ||
| 315 | * usemap is less one page (aka 24 bytes) | ||
| 316 | * so alloc 2M (with 2M align) and 24 bytes in turn will | ||
| 317 | * make next 2M slip to one more 2M later. | ||
| 318 | * then in big system, the memory will have a lot of holes... | ||
| 319 | * here try to allocate 2M pages continously. | ||
| 320 | * | ||
| 321 | * powerpc need to call sparse_init_one_section right after each | ||
| 322 | * sparse_early_mem_map_alloc, so allocate usemap_map at first. | ||
| 323 | */ | ||
| 324 | size = sizeof(unsigned long *) * NR_MEM_SECTIONS; | ||
| 325 | usemap_map = alloc_bootmem(size); | ||
| 326 | if (!usemap_map) | ||
| 327 | panic("can not allocate usemap_map\n"); | ||
| 307 | 328 | ||
| 308 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { | 329 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { |
| 309 | if (!present_section_nr(pnum)) | 330 | if (!present_section_nr(pnum)) |
| 310 | continue; | 331 | continue; |
| 332 | usemap_map[pnum] = sparse_early_usemap_alloc(pnum); | ||
| 333 | } | ||
| 311 | 334 | ||
| 312 | map = sparse_early_mem_map_alloc(pnum); | 335 | for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) { |
| 313 | if (!map) | 336 | if (!present_section_nr(pnum)) |
| 314 | continue; | 337 | continue; |
| 315 | 338 | ||
| 316 | usemap = sparse_early_usemap_alloc(pnum); | 339 | usemap = usemap_map[pnum]; |
| 317 | if (!usemap) | 340 | if (!usemap) |
| 318 | continue; | 341 | continue; |
| 319 | 342 | ||
| 343 | map = sparse_early_mem_map_alloc(pnum); | ||
| 344 | if (!map) | ||
| 345 | continue; | ||
| 346 | |||
| 320 | sparse_init_one_section(__nr_to_section(pnum), pnum, map, | 347 | sparse_init_one_section(__nr_to_section(pnum), pnum, map, |
| 321 | usemap); | 348 | usemap); |
| 322 | } | 349 | } |
| 350 | |||
| 351 | vmemmap_populate_print_last(); | ||
| 352 | |||
| 353 | free_bootmem(__pa(usemap_map), size); | ||
| 323 | } | 354 | } |
| 324 | 355 | ||
| 325 | #ifdef CONFIG_MEMORY_HOTPLUG | 356 | #ifdef CONFIG_MEMORY_HOTPLUG |
