diff options
Diffstat (limited to 'mm/bootmem.c')
-rw-r--r-- | mm/bootmem.c | 196 |
1 files changed, 144 insertions, 52 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c index 2ccea700968f..e8fb927392b9 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c | |||
@@ -111,44 +111,74 @@ static unsigned long __init init_bootmem_core(pg_data_t *pgdat, | |||
111 | * might be used for boot-time allocations - or it might get added | 111 | * might be used for boot-time allocations - or it might get added |
112 | * to the free page pool later on. | 112 | * to the free page pool later on. |
113 | */ | 113 | */ |
114 | static int __init reserve_bootmem_core(bootmem_data_t *bdata, | 114 | static int __init can_reserve_bootmem_core(bootmem_data_t *bdata, |
115 | unsigned long addr, unsigned long size, int flags) | 115 | unsigned long addr, unsigned long size, int flags) |
116 | { | 116 | { |
117 | unsigned long sidx, eidx; | 117 | unsigned long sidx, eidx; |
118 | unsigned long i; | 118 | unsigned long i; |
119 | int ret; | 119 | |
120 | BUG_ON(!size); | ||
121 | |||
122 | /* out of range, don't hold other */ | ||
123 | if (addr + size < bdata->node_boot_start || | ||
124 | PFN_DOWN(addr) > bdata->node_low_pfn) | ||
125 | return 0; | ||
120 | 126 | ||
121 | /* | 127 | /* |
122 | * round up, partially reserved pages are considered | 128 | * Round up to index to the range. |
123 | * fully reserved. | ||
124 | */ | 129 | */ |
130 | if (addr > bdata->node_boot_start) | ||
131 | sidx= PFN_DOWN(addr - bdata->node_boot_start); | ||
132 | else | ||
133 | sidx = 0; | ||
134 | |||
135 | eidx = PFN_UP(addr + size - bdata->node_boot_start); | ||
136 | if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start)) | ||
137 | eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start); | ||
138 | |||
139 | for (i = sidx; i < eidx; i++) { | ||
140 | if (test_bit(i, bdata->node_bootmem_map)) { | ||
141 | if (flags & BOOTMEM_EXCLUSIVE) | ||
142 | return -EBUSY; | ||
143 | } | ||
144 | } | ||
145 | |||
146 | return 0; | ||
147 | |||
148 | } | ||
149 | |||
150 | static void __init reserve_bootmem_core(bootmem_data_t *bdata, | ||
151 | unsigned long addr, unsigned long size, int flags) | ||
152 | { | ||
153 | unsigned long sidx, eidx; | ||
154 | unsigned long i; | ||
155 | |||
125 | BUG_ON(!size); | 156 | BUG_ON(!size); |
126 | BUG_ON(PFN_DOWN(addr) >= bdata->node_low_pfn); | ||
127 | BUG_ON(PFN_UP(addr + size) > bdata->node_low_pfn); | ||
128 | BUG_ON(addr < bdata->node_boot_start); | ||
129 | 157 | ||
130 | sidx = PFN_DOWN(addr - bdata->node_boot_start); | 158 | /* out of range */ |
159 | if (addr + size < bdata->node_boot_start || | ||
160 | PFN_DOWN(addr) > bdata->node_low_pfn) | ||
161 | return; | ||
162 | |||
163 | /* | ||
164 | * Round up to index to the range. | ||
165 | */ | ||
166 | if (addr > bdata->node_boot_start) | ||
167 | sidx= PFN_DOWN(addr - bdata->node_boot_start); | ||
168 | else | ||
169 | sidx = 0; | ||
170 | |||
131 | eidx = PFN_UP(addr + size - bdata->node_boot_start); | 171 | eidx = PFN_UP(addr + size - bdata->node_boot_start); |
172 | if (eidx > bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start)) | ||
173 | eidx = bdata->node_low_pfn - PFN_DOWN(bdata->node_boot_start); | ||
132 | 174 | ||
133 | for (i = sidx; i < eidx; i++) | 175 | for (i = sidx; i < eidx; i++) { |
134 | if (test_and_set_bit(i, bdata->node_bootmem_map)) { | 176 | if (test_and_set_bit(i, bdata->node_bootmem_map)) { |
135 | #ifdef CONFIG_DEBUG_BOOTMEM | 177 | #ifdef CONFIG_DEBUG_BOOTMEM |
136 | printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE); | 178 | printk("hm, page %08lx reserved twice.\n", i*PAGE_SIZE); |
137 | #endif | 179 | #endif |
138 | if (flags & BOOTMEM_EXCLUSIVE) { | ||
139 | ret = -EBUSY; | ||
140 | goto err; | ||
141 | } | ||
142 | } | 180 | } |
143 | 181 | } | |
144 | return 0; | ||
145 | |||
146 | err: | ||
147 | /* unreserve memory we accidentally reserved */ | ||
148 | for (i--; i >= sidx; i--) | ||
149 | clear_bit(i, bdata->node_bootmem_map); | ||
150 | |||
151 | return ret; | ||
152 | } | 182 | } |
153 | 183 | ||
154 | static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, | 184 | static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, |
@@ -206,9 +236,11 @@ void * __init | |||
206 | __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, | 236 | __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, |
207 | unsigned long align, unsigned long goal, unsigned long limit) | 237 | unsigned long align, unsigned long goal, unsigned long limit) |
208 | { | 238 | { |
209 | unsigned long offset, remaining_size, areasize, preferred; | 239 | unsigned long areasize, preferred; |
210 | unsigned long i, start = 0, incr, eidx, end_pfn; | 240 | unsigned long i, start = 0, incr, eidx, end_pfn; |
211 | void *ret; | 241 | void *ret; |
242 | unsigned long node_boot_start; | ||
243 | void *node_bootmem_map; | ||
212 | 244 | ||
213 | if (!size) { | 245 | if (!size) { |
214 | printk("__alloc_bootmem_core(): zero-sized request\n"); | 246 | printk("__alloc_bootmem_core(): zero-sized request\n"); |
@@ -216,70 +248,83 @@ __alloc_bootmem_core(struct bootmem_data *bdata, unsigned long size, | |||
216 | } | 248 | } |
217 | BUG_ON(align & (align-1)); | 249 | BUG_ON(align & (align-1)); |
218 | 250 | ||
219 | if (limit && bdata->node_boot_start >= limit) | ||
220 | return NULL; | ||
221 | |||
222 | /* on nodes without memory - bootmem_map is NULL */ | 251 | /* on nodes without memory - bootmem_map is NULL */ |
223 | if (!bdata->node_bootmem_map) | 252 | if (!bdata->node_bootmem_map) |
224 | return NULL; | 253 | return NULL; |
225 | 254 | ||
255 | /* bdata->node_boot_start is supposed to be (12+6)bits alignment on x86_64 ? */ | ||
256 | node_boot_start = bdata->node_boot_start; | ||
257 | node_bootmem_map = bdata->node_bootmem_map; | ||
258 | if (align) { | ||
259 | node_boot_start = ALIGN(bdata->node_boot_start, align); | ||
260 | if (node_boot_start > bdata->node_boot_start) | ||
261 | node_bootmem_map = (unsigned long *)bdata->node_bootmem_map + | ||
262 | PFN_DOWN(node_boot_start - bdata->node_boot_start)/BITS_PER_LONG; | ||
263 | } | ||
264 | |||
265 | if (limit && node_boot_start >= limit) | ||
266 | return NULL; | ||
267 | |||
226 | end_pfn = bdata->node_low_pfn; | 268 | end_pfn = bdata->node_low_pfn; |
227 | limit = PFN_DOWN(limit); | 269 | limit = PFN_DOWN(limit); |
228 | if (limit && end_pfn > limit) | 270 | if (limit && end_pfn > limit) |
229 | end_pfn = limit; | 271 | end_pfn = limit; |
230 | 272 | ||
231 | eidx = end_pfn - PFN_DOWN(bdata->node_boot_start); | 273 | eidx = end_pfn - PFN_DOWN(node_boot_start); |
232 | offset = 0; | ||
233 | if (align && (bdata->node_boot_start & (align - 1UL)) != 0) | ||
234 | offset = align - (bdata->node_boot_start & (align - 1UL)); | ||
235 | offset = PFN_DOWN(offset); | ||
236 | 274 | ||
237 | /* | 275 | /* |
238 | * We try to allocate bootmem pages above 'goal' | 276 | * We try to allocate bootmem pages above 'goal' |
239 | * first, then we try to allocate lower pages. | 277 | * first, then we try to allocate lower pages. |
240 | */ | 278 | */ |
241 | if (goal && goal >= bdata->node_boot_start && PFN_DOWN(goal) < end_pfn) { | 279 | preferred = 0; |
242 | preferred = goal - bdata->node_boot_start; | 280 | if (goal && PFN_DOWN(goal) < end_pfn) { |
281 | if (goal > node_boot_start) | ||
282 | preferred = goal - node_boot_start; | ||
243 | 283 | ||
244 | if (bdata->last_success >= preferred) | 284 | if (bdata->last_success > node_boot_start && |
285 | bdata->last_success - node_boot_start >= preferred) | ||
245 | if (!limit || (limit && limit > bdata->last_success)) | 286 | if (!limit || (limit && limit > bdata->last_success)) |
246 | preferred = bdata->last_success; | 287 | preferred = bdata->last_success - node_boot_start; |
247 | } else | 288 | } |
248 | preferred = 0; | ||
249 | 289 | ||
250 | preferred = PFN_DOWN(ALIGN(preferred, align)) + offset; | 290 | preferred = PFN_DOWN(ALIGN(preferred, align)); |
251 | areasize = (size + PAGE_SIZE-1) / PAGE_SIZE; | 291 | areasize = (size + PAGE_SIZE-1) / PAGE_SIZE; |
252 | incr = align >> PAGE_SHIFT ? : 1; | 292 | incr = align >> PAGE_SHIFT ? : 1; |
253 | 293 | ||
254 | restart_scan: | 294 | restart_scan: |
255 | for (i = preferred; i < eidx; i += incr) { | 295 | for (i = preferred; i < eidx;) { |
256 | unsigned long j; | 296 | unsigned long j; |
257 | i = find_next_zero_bit(bdata->node_bootmem_map, eidx, i); | 297 | |
298 | i = find_next_zero_bit(node_bootmem_map, eidx, i); | ||
258 | i = ALIGN(i, incr); | 299 | i = ALIGN(i, incr); |
259 | if (i >= eidx) | 300 | if (i >= eidx) |
260 | break; | 301 | break; |
261 | if (test_bit(i, bdata->node_bootmem_map)) | 302 | if (test_bit(i, node_bootmem_map)) { |
303 | i += incr; | ||
262 | continue; | 304 | continue; |
305 | } | ||
263 | for (j = i + 1; j < i + areasize; ++j) { | 306 | for (j = i + 1; j < i + areasize; ++j) { |
264 | if (j >= eidx) | 307 | if (j >= eidx) |
265 | goto fail_block; | 308 | goto fail_block; |
266 | if (test_bit(j, bdata->node_bootmem_map)) | 309 | if (test_bit(j, node_bootmem_map)) |
267 | goto fail_block; | 310 | goto fail_block; |
268 | } | 311 | } |
269 | start = i; | 312 | start = i; |
270 | goto found; | 313 | goto found; |
271 | fail_block: | 314 | fail_block: |
272 | i = ALIGN(j, incr); | 315 | i = ALIGN(j, incr); |
316 | if (i == j) | ||
317 | i += incr; | ||
273 | } | 318 | } |
274 | 319 | ||
275 | if (preferred > offset) { | 320 | if (preferred > 0) { |
276 | preferred = offset; | 321 | preferred = 0; |
277 | goto restart_scan; | 322 | goto restart_scan; |
278 | } | 323 | } |
279 | return NULL; | 324 | return NULL; |
280 | 325 | ||
281 | found: | 326 | found: |
282 | bdata->last_success = PFN_PHYS(start); | 327 | bdata->last_success = PFN_PHYS(start) + node_boot_start; |
283 | BUG_ON(start >= eidx); | 328 | BUG_ON(start >= eidx); |
284 | 329 | ||
285 | /* | 330 | /* |
@@ -289,6 +334,7 @@ found: | |||
289 | */ | 334 | */ |
290 | if (align < PAGE_SIZE && | 335 | if (align < PAGE_SIZE && |
291 | bdata->last_offset && bdata->last_pos+1 == start) { | 336 | bdata->last_offset && bdata->last_pos+1 == start) { |
337 | unsigned long offset, remaining_size; | ||
292 | offset = ALIGN(bdata->last_offset, align); | 338 | offset = ALIGN(bdata->last_offset, align); |
293 | BUG_ON(offset > PAGE_SIZE); | 339 | BUG_ON(offset > PAGE_SIZE); |
294 | remaining_size = PAGE_SIZE - offset; | 340 | remaining_size = PAGE_SIZE - offset; |
@@ -297,14 +343,12 @@ found: | |||
297 | /* last_pos unchanged */ | 343 | /* last_pos unchanged */ |
298 | bdata->last_offset = offset + size; | 344 | bdata->last_offset = offset + size; |
299 | ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + | 345 | ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + |
300 | offset + | 346 | offset + node_boot_start); |
301 | bdata->node_boot_start); | ||
302 | } else { | 347 | } else { |
303 | remaining_size = size - remaining_size; | 348 | remaining_size = size - remaining_size; |
304 | areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE; | 349 | areasize = (remaining_size + PAGE_SIZE-1) / PAGE_SIZE; |
305 | ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + | 350 | ret = phys_to_virt(bdata->last_pos * PAGE_SIZE + |
306 | offset + | 351 | offset + node_boot_start); |
307 | bdata->node_boot_start); | ||
308 | bdata->last_pos = start + areasize - 1; | 352 | bdata->last_pos = start + areasize - 1; |
309 | bdata->last_offset = remaining_size; | 353 | bdata->last_offset = remaining_size; |
310 | } | 354 | } |
@@ -312,14 +356,14 @@ found: | |||
312 | } else { | 356 | } else { |
313 | bdata->last_pos = start + areasize - 1; | 357 | bdata->last_pos = start + areasize - 1; |
314 | bdata->last_offset = size & ~PAGE_MASK; | 358 | bdata->last_offset = size & ~PAGE_MASK; |
315 | ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start); | 359 | ret = phys_to_virt(start * PAGE_SIZE + node_boot_start); |
316 | } | 360 | } |
317 | 361 | ||
318 | /* | 362 | /* |
319 | * Reserve the area now: | 363 | * Reserve the area now: |
320 | */ | 364 | */ |
321 | for (i = start; i < start + areasize; i++) | 365 | for (i = start; i < start + areasize; i++) |
322 | if (unlikely(test_and_set_bit(i, bdata->node_bootmem_map))) | 366 | if (unlikely(test_and_set_bit(i, node_bootmem_map))) |
323 | BUG(); | 367 | BUG(); |
324 | memset(ret, 0, size); | 368 | memset(ret, 0, size); |
325 | return ret; | 369 | return ret; |
@@ -401,6 +445,11 @@ unsigned long __init init_bootmem_node(pg_data_t *pgdat, unsigned long freepfn, | |||
401 | void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, | 445 | void __init reserve_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, |
402 | unsigned long size, int flags) | 446 | unsigned long size, int flags) |
403 | { | 447 | { |
448 | int ret; | ||
449 | |||
450 | ret = can_reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); | ||
451 | if (ret < 0) | ||
452 | return; | ||
404 | reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); | 453 | reserve_bootmem_core(pgdat->bdata, physaddr, size, flags); |
405 | } | 454 | } |
406 | 455 | ||
@@ -412,6 +461,7 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr, | |||
412 | 461 | ||
413 | unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) | 462 | unsigned long __init free_all_bootmem_node(pg_data_t *pgdat) |
414 | { | 463 | { |
464 | register_page_bootmem_info_node(pgdat); | ||
415 | return free_all_bootmem_core(pgdat); | 465 | return free_all_bootmem_core(pgdat); |
416 | } | 466 | } |
417 | 467 | ||
@@ -426,7 +476,18 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages) | |||
426 | int __init reserve_bootmem(unsigned long addr, unsigned long size, | 476 | int __init reserve_bootmem(unsigned long addr, unsigned long size, |
427 | int flags) | 477 | int flags) |
428 | { | 478 | { |
429 | return reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size, flags); | 479 | bootmem_data_t *bdata; |
480 | int ret; | ||
481 | |||
482 | list_for_each_entry(bdata, &bdata_list, list) { | ||
483 | ret = can_reserve_bootmem_core(bdata, addr, size, flags); | ||
484 | if (ret < 0) | ||
485 | return ret; | ||
486 | } | ||
487 | list_for_each_entry(bdata, &bdata_list, list) | ||
488 | reserve_bootmem_core(bdata, addr, size, flags); | ||
489 | |||
490 | return 0; | ||
430 | } | 491 | } |
431 | #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ | 492 | #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */ |
432 | 493 | ||
@@ -484,6 +545,37 @@ void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, | |||
484 | return __alloc_bootmem(size, align, goal); | 545 | return __alloc_bootmem(size, align, goal); |
485 | } | 546 | } |
486 | 547 | ||
548 | #ifdef CONFIG_SPARSEMEM | ||
549 | void * __init alloc_bootmem_section(unsigned long size, | ||
550 | unsigned long section_nr) | ||
551 | { | ||
552 | void *ptr; | ||
553 | unsigned long limit, goal, start_nr, end_nr, pfn; | ||
554 | struct pglist_data *pgdat; | ||
555 | |||
556 | pfn = section_nr_to_pfn(section_nr); | ||
557 | goal = PFN_PHYS(pfn); | ||
558 | limit = PFN_PHYS(section_nr_to_pfn(section_nr + 1)) - 1; | ||
559 | pgdat = NODE_DATA(early_pfn_to_nid(pfn)); | ||
560 | ptr = __alloc_bootmem_core(pgdat->bdata, size, SMP_CACHE_BYTES, goal, | ||
561 | limit); | ||
562 | |||
563 | if (!ptr) | ||
564 | return NULL; | ||
565 | |||
566 | start_nr = pfn_to_section_nr(PFN_DOWN(__pa(ptr))); | ||
567 | end_nr = pfn_to_section_nr(PFN_DOWN(__pa(ptr) + size)); | ||
568 | if (start_nr != section_nr || end_nr != section_nr) { | ||
569 | printk(KERN_WARNING "alloc_bootmem failed on section %ld.\n", | ||
570 | section_nr); | ||
571 | free_bootmem_core(pgdat->bdata, __pa(ptr), size); | ||
572 | ptr = NULL; | ||
573 | } | ||
574 | |||
575 | return ptr; | ||
576 | } | ||
577 | #endif | ||
578 | |||
487 | #ifndef ARCH_LOW_ADDRESS_LIMIT | 579 | #ifndef ARCH_LOW_ADDRESS_LIMIT |
488 | #define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL | 580 | #define ARCH_LOW_ADDRESS_LIMIT 0xffffffffUL |
489 | #endif | 581 | #endif |