diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-02-13 08:37:52 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-02-26 06:55:50 -0500 |
commit | 92cb54a37a42a41cfb2ef7f1478bfa4395198258 (patch) | |
tree | fab9504379894da8d941cc385abd071604f5f7bb /arch/x86 | |
parent | 1ce70c4fac3c3954bd48c035f448793867592bc0 (diff) |
x86: make DEBUG_PAGEALLOC and CPA more robust
Use PF_MEMALLOC to prevent recursive calls in the DBEUG_PAGEALLOC
case. This makes the code simpler and more robust against allocation
failures.
This fixes the following fallback to non-mmconfig:
http://lkml.org/lkml/2008/2/20/551
http://bugzilla.kernel.org/show_bug.cgi?id=10083
Also, for DEBUG_PAGEALLOC=n reduce the pool size to one page.
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/mm/pageattr.c | 84 |
1 files changed, 51 insertions, 33 deletions
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 464d8fc21ce6..14e48b5a94ba 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -44,6 +44,12 @@ static inline unsigned long highmap_end_pfn(void) | |||
44 | 44 | ||
45 | #endif | 45 | #endif |
46 | 46 | ||
47 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
48 | # define debug_pagealloc 1 | ||
49 | #else | ||
50 | # define debug_pagealloc 0 | ||
51 | #endif | ||
52 | |||
47 | static inline int | 53 | static inline int |
48 | within(unsigned long addr, unsigned long start, unsigned long end) | 54 | within(unsigned long addr, unsigned long start, unsigned long end) |
49 | { | 55 | { |
@@ -355,45 +361,48 @@ out_unlock: | |||
355 | 361 | ||
356 | static LIST_HEAD(page_pool); | 362 | static LIST_HEAD(page_pool); |
357 | static unsigned long pool_size, pool_pages, pool_low; | 363 | static unsigned long pool_size, pool_pages, pool_low; |
358 | static unsigned long pool_used, pool_failed, pool_refill; | 364 | static unsigned long pool_used, pool_failed; |
359 | 365 | ||
360 | static void cpa_fill_pool(void) | 366 | static void cpa_fill_pool(struct page **ret) |
361 | { | 367 | { |
362 | struct page *p; | ||
363 | gfp_t gfp = GFP_KERNEL; | 368 | gfp_t gfp = GFP_KERNEL; |
369 | unsigned long flags; | ||
370 | struct page *p; | ||
364 | 371 | ||
365 | /* Do not allocate from interrupt context */ | ||
366 | if (in_irq() || irqs_disabled()) | ||
367 | return; | ||
368 | /* | 372 | /* |
369 | * Check unlocked. I does not matter when we have one more | 373 | * Avoid recursion (on debug-pagealloc) and also signal |
370 | * page in the pool. The bit lock avoids recursive pool | 374 | * our priority to get to these pagetables: |
371 | * allocations: | ||
372 | */ | 375 | */ |
373 | if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill)) | 376 | if (current->flags & PF_MEMALLOC) |
374 | return; | 377 | return; |
378 | current->flags |= PF_MEMALLOC; | ||
375 | 379 | ||
376 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
377 | /* | 380 | /* |
378 | * We could do: | 381 | * Allocate atomically from atomic contexts: |
379 | * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL; | ||
380 | * but this fails on !PREEMPT kernels | ||
381 | */ | 382 | */ |
382 | gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; | 383 | if (in_atomic() || irqs_disabled() || debug_pagealloc) |
383 | #endif | 384 | gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; |
384 | 385 | ||
385 | while (pool_pages < pool_size) { | 386 | while (pool_pages < pool_size || (ret && !*ret)) { |
386 | p = alloc_pages(gfp, 0); | 387 | p = alloc_pages(gfp, 0); |
387 | if (!p) { | 388 | if (!p) { |
388 | pool_failed++; | 389 | pool_failed++; |
389 | break; | 390 | break; |
390 | } | 391 | } |
391 | spin_lock_irq(&pgd_lock); | 392 | /* |
393 | * If the call site needs a page right now, provide it: | ||
394 | */ | ||
395 | if (ret && !*ret) { | ||
396 | *ret = p; | ||
397 | continue; | ||
398 | } | ||
399 | spin_lock_irqsave(&pgd_lock, flags); | ||
392 | list_add(&p->lru, &page_pool); | 400 | list_add(&p->lru, &page_pool); |
393 | pool_pages++; | 401 | pool_pages++; |
394 | spin_unlock_irq(&pgd_lock); | 402 | spin_unlock_irqrestore(&pgd_lock, flags); |
395 | } | 403 | } |
396 | clear_bit_unlock(0, &pool_refill); | 404 | |
405 | current->flags &= ~PF_MEMALLOC; | ||
397 | } | 406 | } |
398 | 407 | ||
399 | #define SHIFT_MB (20 - PAGE_SHIFT) | 408 | #define SHIFT_MB (20 - PAGE_SHIFT) |
@@ -414,11 +423,15 @@ void __init cpa_init(void) | |||
414 | * GiB. Shift MiB to Gib and multiply the result by | 423 | * GiB. Shift MiB to Gib and multiply the result by |
415 | * POOL_PAGES_PER_GB: | 424 | * POOL_PAGES_PER_GB: |
416 | */ | 425 | */ |
417 | gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB; | 426 | if (debug_pagealloc) { |
418 | pool_size = POOL_PAGES_PER_GB * gb; | 427 | gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB; |
428 | pool_size = POOL_PAGES_PER_GB * gb; | ||
429 | } else { | ||
430 | pool_size = 1; | ||
431 | } | ||
419 | pool_low = pool_size; | 432 | pool_low = pool_size; |
420 | 433 | ||
421 | cpa_fill_pool(); | 434 | cpa_fill_pool(NULL); |
422 | printk(KERN_DEBUG | 435 | printk(KERN_DEBUG |
423 | "CPA: page pool initialized %lu of %lu pages preallocated\n", | 436 | "CPA: page pool initialized %lu of %lu pages preallocated\n", |
424 | pool_pages, pool_size); | 437 | pool_pages, pool_size); |
@@ -440,16 +453,20 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
440 | spin_lock_irqsave(&pgd_lock, flags); | 453 | spin_lock_irqsave(&pgd_lock, flags); |
441 | if (list_empty(&page_pool)) { | 454 | if (list_empty(&page_pool)) { |
442 | spin_unlock_irqrestore(&pgd_lock, flags); | 455 | spin_unlock_irqrestore(&pgd_lock, flags); |
443 | return -ENOMEM; | 456 | base = NULL; |
457 | cpa_fill_pool(&base); | ||
458 | if (!base) | ||
459 | return -ENOMEM; | ||
460 | spin_lock_irqsave(&pgd_lock, flags); | ||
461 | } else { | ||
462 | base = list_first_entry(&page_pool, struct page, lru); | ||
463 | list_del(&base->lru); | ||
464 | pool_pages--; | ||
465 | |||
466 | if (pool_pages < pool_low) | ||
467 | pool_low = pool_pages; | ||
444 | } | 468 | } |
445 | 469 | ||
446 | base = list_first_entry(&page_pool, struct page, lru); | ||
447 | list_del(&base->lru); | ||
448 | pool_pages--; | ||
449 | |||
450 | if (pool_pages < pool_low) | ||
451 | pool_low = pool_pages; | ||
452 | |||
453 | /* | 470 | /* |
454 | * Check for races, another CPU might have split this page | 471 | * Check for races, another CPU might have split this page |
455 | * up for us already: | 472 | * up for us already: |
@@ -734,7 +751,8 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, | |||
734 | cpa_flush_all(cache); | 751 | cpa_flush_all(cache); |
735 | 752 | ||
736 | out: | 753 | out: |
737 | cpa_fill_pool(); | 754 | cpa_fill_pool(NULL); |
755 | |||
738 | return ret; | 756 | return ret; |
739 | } | 757 | } |
740 | 758 | ||
@@ -897,7 +915,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) | |||
897 | * Try to refill the page pool here. We can do this only after | 915 | * Try to refill the page pool here. We can do this only after |
898 | * the tlb flush. | 916 | * the tlb flush. |
899 | */ | 917 | */ |
900 | cpa_fill_pool(); | 918 | cpa_fill_pool(NULL); |
901 | } | 919 | } |
902 | 920 | ||
903 | #ifdef CONFIG_HIBERNATION | 921 | #ifdef CONFIG_HIBERNATION |