diff options
Diffstat (limited to 'arch/x86/mm/pageattr.c')
-rw-r--r-- | arch/x86/mm/pageattr.c | 105 |
1 files changed, 62 insertions, 43 deletions
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 464d8fc21ce6..7049294fb469 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -26,6 +26,7 @@ struct cpa_data { | |||
26 | pgprot_t mask_set; | 26 | pgprot_t mask_set; |
27 | pgprot_t mask_clr; | 27 | pgprot_t mask_clr; |
28 | int numpages; | 28 | int numpages; |
29 | int processed; | ||
29 | int flushtlb; | 30 | int flushtlb; |
30 | unsigned long pfn; | 31 | unsigned long pfn; |
31 | }; | 32 | }; |
@@ -44,6 +45,12 @@ static inline unsigned long highmap_end_pfn(void) | |||
44 | 45 | ||
45 | #endif | 46 | #endif |
46 | 47 | ||
48 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
49 | # define debug_pagealloc 1 | ||
50 | #else | ||
51 | # define debug_pagealloc 0 | ||
52 | #endif | ||
53 | |||
47 | static inline int | 54 | static inline int |
48 | within(unsigned long addr, unsigned long start, unsigned long end) | 55 | within(unsigned long addr, unsigned long start, unsigned long end) |
49 | { | 56 | { |
@@ -284,8 +291,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
284 | */ | 291 | */ |
285 | nextpage_addr = (address + psize) & pmask; | 292 | nextpage_addr = (address + psize) & pmask; |
286 | numpages = (nextpage_addr - address) >> PAGE_SHIFT; | 293 | numpages = (nextpage_addr - address) >> PAGE_SHIFT; |
287 | if (numpages < cpa->numpages) | 294 | if (numpages < cpa->processed) |
288 | cpa->numpages = numpages; | 295 | cpa->processed = numpages; |
289 | 296 | ||
290 | /* | 297 | /* |
291 | * We are safe now. Check whether the new pgprot is the same: | 298 | * We are safe now. Check whether the new pgprot is the same: |
@@ -312,7 +319,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
312 | */ | 319 | */ |
313 | addr = address + PAGE_SIZE; | 320 | addr = address + PAGE_SIZE; |
314 | pfn++; | 321 | pfn++; |
315 | for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE, pfn++) { | 322 | for (i = 1; i < cpa->processed; i++, addr += PAGE_SIZE, pfn++) { |
316 | pgprot_t chk_prot = static_protections(new_prot, addr, pfn); | 323 | pgprot_t chk_prot = static_protections(new_prot, addr, pfn); |
317 | 324 | ||
318 | if (pgprot_val(chk_prot) != pgprot_val(new_prot)) | 325 | if (pgprot_val(chk_prot) != pgprot_val(new_prot)) |
@@ -336,7 +343,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
336 | * that we limited the number of possible pages already to | 343 | * that we limited the number of possible pages already to |
337 | * the number of pages in the large page. | 344 | * the number of pages in the large page. |
338 | */ | 345 | */ |
339 | if (address == (nextpage_addr - psize) && cpa->numpages == numpages) { | 346 | if (address == (nextpage_addr - psize) && cpa->processed == numpages) { |
340 | /* | 347 | /* |
341 | * The address is aligned and the number of pages | 348 | * The address is aligned and the number of pages |
342 | * covers the full page. | 349 | * covers the full page. |
@@ -355,45 +362,48 @@ out_unlock: | |||
355 | 362 | ||
356 | static LIST_HEAD(page_pool); | 363 | static LIST_HEAD(page_pool); |
357 | static unsigned long pool_size, pool_pages, pool_low; | 364 | static unsigned long pool_size, pool_pages, pool_low; |
358 | static unsigned long pool_used, pool_failed, pool_refill; | 365 | static unsigned long pool_used, pool_failed; |
359 | 366 | ||
360 | static void cpa_fill_pool(void) | 367 | static void cpa_fill_pool(struct page **ret) |
361 | { | 368 | { |
362 | struct page *p; | ||
363 | gfp_t gfp = GFP_KERNEL; | 369 | gfp_t gfp = GFP_KERNEL; |
370 | unsigned long flags; | ||
371 | struct page *p; | ||
364 | 372 | ||
365 | /* Do not allocate from interrupt context */ | ||
366 | if (in_irq() || irqs_disabled()) | ||
367 | return; | ||
368 | /* | 373 | /* |
369 | * Check unlocked. I does not matter when we have one more | 374 | * Avoid recursion (on debug-pagealloc) and also signal |
370 | * page in the pool. The bit lock avoids recursive pool | 375 | * our priority to get to these pagetables: |
371 | * allocations: | ||
372 | */ | 376 | */ |
373 | if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill)) | 377 | if (current->flags & PF_MEMALLOC) |
374 | return; | 378 | return; |
379 | current->flags |= PF_MEMALLOC; | ||
375 | 380 | ||
376 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
377 | /* | 381 | /* |
378 | * We could do: | 382 | * Allocate atomically from atomic contexts: |
379 | * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL; | ||
380 | * but this fails on !PREEMPT kernels | ||
381 | */ | 383 | */ |
382 | gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; | 384 | if (in_atomic() || irqs_disabled() || debug_pagealloc) |
383 | #endif | 385 | gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; |
384 | 386 | ||
385 | while (pool_pages < pool_size) { | 387 | while (pool_pages < pool_size || (ret && !*ret)) { |
386 | p = alloc_pages(gfp, 0); | 388 | p = alloc_pages(gfp, 0); |
387 | if (!p) { | 389 | if (!p) { |
388 | pool_failed++; | 390 | pool_failed++; |
389 | break; | 391 | break; |
390 | } | 392 | } |
391 | spin_lock_irq(&pgd_lock); | 393 | /* |
394 | * If the call site needs a page right now, provide it: | ||
395 | */ | ||
396 | if (ret && !*ret) { | ||
397 | *ret = p; | ||
398 | continue; | ||
399 | } | ||
400 | spin_lock_irqsave(&pgd_lock, flags); | ||
392 | list_add(&p->lru, &page_pool); | 401 | list_add(&p->lru, &page_pool); |
393 | pool_pages++; | 402 | pool_pages++; |
394 | spin_unlock_irq(&pgd_lock); | 403 | spin_unlock_irqrestore(&pgd_lock, flags); |
395 | } | 404 | } |
396 | clear_bit_unlock(0, &pool_refill); | 405 | |
406 | current->flags &= ~PF_MEMALLOC; | ||
397 | } | 407 | } |
398 | 408 | ||
399 | #define SHIFT_MB (20 - PAGE_SHIFT) | 409 | #define SHIFT_MB (20 - PAGE_SHIFT) |
@@ -414,11 +424,15 @@ void __init cpa_init(void) | |||
414 | * GiB. Shift MiB to Gib and multiply the result by | 424 | * GiB. Shift MiB to Gib and multiply the result by |
415 | * POOL_PAGES_PER_GB: | 425 | * POOL_PAGES_PER_GB: |
416 | */ | 426 | */ |
417 | gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB; | 427 | if (debug_pagealloc) { |
418 | pool_size = POOL_PAGES_PER_GB * gb; | 428 | gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB; |
429 | pool_size = POOL_PAGES_PER_GB * gb; | ||
430 | } else { | ||
431 | pool_size = 1; | ||
432 | } | ||
419 | pool_low = pool_size; | 433 | pool_low = pool_size; |
420 | 434 | ||
421 | cpa_fill_pool(); | 435 | cpa_fill_pool(NULL); |
422 | printk(KERN_DEBUG | 436 | printk(KERN_DEBUG |
423 | "CPA: page pool initialized %lu of %lu pages preallocated\n", | 437 | "CPA: page pool initialized %lu of %lu pages preallocated\n", |
424 | pool_pages, pool_size); | 438 | pool_pages, pool_size); |
@@ -440,16 +454,20 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
440 | spin_lock_irqsave(&pgd_lock, flags); | 454 | spin_lock_irqsave(&pgd_lock, flags); |
441 | if (list_empty(&page_pool)) { | 455 | if (list_empty(&page_pool)) { |
442 | spin_unlock_irqrestore(&pgd_lock, flags); | 456 | spin_unlock_irqrestore(&pgd_lock, flags); |
443 | return -ENOMEM; | 457 | base = NULL; |
458 | cpa_fill_pool(&base); | ||
459 | if (!base) | ||
460 | return -ENOMEM; | ||
461 | spin_lock_irqsave(&pgd_lock, flags); | ||
462 | } else { | ||
463 | base = list_first_entry(&page_pool, struct page, lru); | ||
464 | list_del(&base->lru); | ||
465 | pool_pages--; | ||
466 | |||
467 | if (pool_pages < pool_low) | ||
468 | pool_low = pool_pages; | ||
444 | } | 469 | } |
445 | 470 | ||
446 | base = list_first_entry(&page_pool, struct page, lru); | ||
447 | list_del(&base->lru); | ||
448 | pool_pages--; | ||
449 | |||
450 | if (pool_pages < pool_low) | ||
451 | pool_low = pool_pages; | ||
452 | |||
453 | /* | 471 | /* |
454 | * Check for races, another CPU might have split this page | 472 | * Check for races, another CPU might have split this page |
455 | * up for us already: | 473 | * up for us already: |
@@ -555,7 +573,7 @@ repeat: | |||
555 | set_pte_atomic(kpte, new_pte); | 573 | set_pte_atomic(kpte, new_pte); |
556 | cpa->flushtlb = 1; | 574 | cpa->flushtlb = 1; |
557 | } | 575 | } |
558 | cpa->numpages = 1; | 576 | cpa->processed = 1; |
559 | return 0; | 577 | return 0; |
560 | } | 578 | } |
561 | 579 | ||
@@ -566,7 +584,7 @@ repeat: | |||
566 | do_split = try_preserve_large_page(kpte, address, cpa); | 584 | do_split = try_preserve_large_page(kpte, address, cpa); |
567 | /* | 585 | /* |
568 | * When the range fits into the existing large page, | 586 | * When the range fits into the existing large page, |
569 | * return. cp->numpages and cpa->tlbflush have been updated in | 587 | * return. cp->processed and cpa->tlbflush have been updated in |
570 | * try_large_page: | 588 | * try_large_page: |
571 | */ | 589 | */ |
572 | if (do_split <= 0) | 590 | if (do_split <= 0) |
@@ -645,7 +663,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) | |||
645 | * Store the remaining nr of pages for the large page | 663 | * Store the remaining nr of pages for the large page |
646 | * preservation check. | 664 | * preservation check. |
647 | */ | 665 | */ |
648 | cpa->numpages = numpages; | 666 | cpa->numpages = cpa->processed = numpages; |
649 | 667 | ||
650 | ret = __change_page_attr(cpa, checkalias); | 668 | ret = __change_page_attr(cpa, checkalias); |
651 | if (ret) | 669 | if (ret) |
@@ -662,9 +680,9 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) | |||
662 | * CPA operation. Either a large page has been | 680 | * CPA operation. Either a large page has been |
663 | * preserved or a single page update happened. | 681 | * preserved or a single page update happened. |
664 | */ | 682 | */ |
665 | BUG_ON(cpa->numpages > numpages); | 683 | BUG_ON(cpa->processed > numpages); |
666 | numpages -= cpa->numpages; | 684 | numpages -= cpa->processed; |
667 | cpa->vaddr += cpa->numpages * PAGE_SIZE; | 685 | cpa->vaddr += cpa->processed * PAGE_SIZE; |
668 | } | 686 | } |
669 | return 0; | 687 | return 0; |
670 | } | 688 | } |
@@ -734,7 +752,8 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, | |||
734 | cpa_flush_all(cache); | 752 | cpa_flush_all(cache); |
735 | 753 | ||
736 | out: | 754 | out: |
737 | cpa_fill_pool(); | 755 | cpa_fill_pool(NULL); |
756 | |||
738 | return ret; | 757 | return ret; |
739 | } | 758 | } |
740 | 759 | ||
@@ -897,7 +916,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable) | |||
897 | * Try to refill the page pool here. We can do this only after | 916 | * Try to refill the page pool here. We can do this only after |
898 | * the tlb flush. | 917 | * the tlb flush. |
899 | */ | 918 | */ |
900 | cpa_fill_pool(); | 919 | cpa_fill_pool(NULL); |
901 | } | 920 | } |
902 | 921 | ||
903 | #ifdef CONFIG_HIBERNATION | 922 | #ifdef CONFIG_HIBERNATION |