aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm/pageattr.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm/pageattr.c')
-rw-r--r--arch/x86/mm/pageattr.c105
1 files changed, 62 insertions, 43 deletions
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 464d8fc21ce6..7049294fb469 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -26,6 +26,7 @@ struct cpa_data {
26 pgprot_t mask_set; 26 pgprot_t mask_set;
27 pgprot_t mask_clr; 27 pgprot_t mask_clr;
28 int numpages; 28 int numpages;
29 int processed;
29 int flushtlb; 30 int flushtlb;
30 unsigned long pfn; 31 unsigned long pfn;
31}; 32};
@@ -44,6 +45,12 @@ static inline unsigned long highmap_end_pfn(void)
44 45
45#endif 46#endif
46 47
48#ifdef CONFIG_DEBUG_PAGEALLOC
49# define debug_pagealloc 1
50#else
51# define debug_pagealloc 0
52#endif
53
47static inline int 54static inline int
48within(unsigned long addr, unsigned long start, unsigned long end) 55within(unsigned long addr, unsigned long start, unsigned long end)
49{ 56{
@@ -284,8 +291,8 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
284 */ 291 */
285 nextpage_addr = (address + psize) & pmask; 292 nextpage_addr = (address + psize) & pmask;
286 numpages = (nextpage_addr - address) >> PAGE_SHIFT; 293 numpages = (nextpage_addr - address) >> PAGE_SHIFT;
287 if (numpages < cpa->numpages) 294 if (numpages < cpa->processed)
288 cpa->numpages = numpages; 295 cpa->processed = numpages;
289 296
290 /* 297 /*
291 * We are safe now. Check whether the new pgprot is the same: 298 * We are safe now. Check whether the new pgprot is the same:
@@ -312,7 +319,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
312 */ 319 */
313 addr = address + PAGE_SIZE; 320 addr = address + PAGE_SIZE;
314 pfn++; 321 pfn++;
315 for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE, pfn++) { 322 for (i = 1; i < cpa->processed; i++, addr += PAGE_SIZE, pfn++) {
316 pgprot_t chk_prot = static_protections(new_prot, addr, pfn); 323 pgprot_t chk_prot = static_protections(new_prot, addr, pfn);
317 324
318 if (pgprot_val(chk_prot) != pgprot_val(new_prot)) 325 if (pgprot_val(chk_prot) != pgprot_val(new_prot))
@@ -336,7 +343,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
336 * that we limited the number of possible pages already to 343 * that we limited the number of possible pages already to
337 * the number of pages in the large page. 344 * the number of pages in the large page.
338 */ 345 */
339 if (address == (nextpage_addr - psize) && cpa->numpages == numpages) { 346 if (address == (nextpage_addr - psize) && cpa->processed == numpages) {
340 /* 347 /*
341 * The address is aligned and the number of pages 348 * The address is aligned and the number of pages
342 * covers the full page. 349 * covers the full page.
@@ -355,45 +362,48 @@ out_unlock:
355 362
356static LIST_HEAD(page_pool); 363static LIST_HEAD(page_pool);
357static unsigned long pool_size, pool_pages, pool_low; 364static unsigned long pool_size, pool_pages, pool_low;
358static unsigned long pool_used, pool_failed, pool_refill; 365static unsigned long pool_used, pool_failed;
359 366
360static void cpa_fill_pool(void) 367static void cpa_fill_pool(struct page **ret)
361{ 368{
362 struct page *p;
363 gfp_t gfp = GFP_KERNEL; 369 gfp_t gfp = GFP_KERNEL;
370 unsigned long flags;
371 struct page *p;
364 372
365 /* Do not allocate from interrupt context */
366 if (in_irq() || irqs_disabled())
367 return;
368 /* 373 /*
369 * Check unlocked. I does not matter when we have one more 374 * Avoid recursion (on debug-pagealloc) and also signal
370 * page in the pool. The bit lock avoids recursive pool 375 * our priority to get to these pagetables:
371 * allocations:
372 */ 376 */
373 if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill)) 377 if (current->flags & PF_MEMALLOC)
374 return; 378 return;
379 current->flags |= PF_MEMALLOC;
375 380
376#ifdef CONFIG_DEBUG_PAGEALLOC
377 /* 381 /*
378 * We could do: 382 * Allocate atomically from atomic contexts:
379 * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL;
380 * but this fails on !PREEMPT kernels
381 */ 383 */
382 gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; 384 if (in_atomic() || irqs_disabled() || debug_pagealloc)
383#endif 385 gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
384 386
385 while (pool_pages < pool_size) { 387 while (pool_pages < pool_size || (ret && !*ret)) {
386 p = alloc_pages(gfp, 0); 388 p = alloc_pages(gfp, 0);
387 if (!p) { 389 if (!p) {
388 pool_failed++; 390 pool_failed++;
389 break; 391 break;
390 } 392 }
391 spin_lock_irq(&pgd_lock); 393 /*
394 * If the call site needs a page right now, provide it:
395 */
396 if (ret && !*ret) {
397 *ret = p;
398 continue;
399 }
400 spin_lock_irqsave(&pgd_lock, flags);
392 list_add(&p->lru, &page_pool); 401 list_add(&p->lru, &page_pool);
393 pool_pages++; 402 pool_pages++;
394 spin_unlock_irq(&pgd_lock); 403 spin_unlock_irqrestore(&pgd_lock, flags);
395 } 404 }
396 clear_bit_unlock(0, &pool_refill); 405
406 current->flags &= ~PF_MEMALLOC;
397} 407}
398 408
399#define SHIFT_MB (20 - PAGE_SHIFT) 409#define SHIFT_MB (20 - PAGE_SHIFT)
@@ -414,11 +424,15 @@ void __init cpa_init(void)
414 * GiB. Shift MiB to Gib and multiply the result by 424 * GiB. Shift MiB to Gib and multiply the result by
415 * POOL_PAGES_PER_GB: 425 * POOL_PAGES_PER_GB:
416 */ 426 */
417 gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB; 427 if (debug_pagealloc) {
418 pool_size = POOL_PAGES_PER_GB * gb; 428 gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
429 pool_size = POOL_PAGES_PER_GB * gb;
430 } else {
431 pool_size = 1;
432 }
419 pool_low = pool_size; 433 pool_low = pool_size;
420 434
421 cpa_fill_pool(); 435 cpa_fill_pool(NULL);
422 printk(KERN_DEBUG 436 printk(KERN_DEBUG
423 "CPA: page pool initialized %lu of %lu pages preallocated\n", 437 "CPA: page pool initialized %lu of %lu pages preallocated\n",
424 pool_pages, pool_size); 438 pool_pages, pool_size);
@@ -440,16 +454,20 @@ static int split_large_page(pte_t *kpte, unsigned long address)
440 spin_lock_irqsave(&pgd_lock, flags); 454 spin_lock_irqsave(&pgd_lock, flags);
441 if (list_empty(&page_pool)) { 455 if (list_empty(&page_pool)) {
442 spin_unlock_irqrestore(&pgd_lock, flags); 456 spin_unlock_irqrestore(&pgd_lock, flags);
443 return -ENOMEM; 457 base = NULL;
458 cpa_fill_pool(&base);
459 if (!base)
460 return -ENOMEM;
461 spin_lock_irqsave(&pgd_lock, flags);
462 } else {
463 base = list_first_entry(&page_pool, struct page, lru);
464 list_del(&base->lru);
465 pool_pages--;
466
467 if (pool_pages < pool_low)
468 pool_low = pool_pages;
444 } 469 }
445 470
446 base = list_first_entry(&page_pool, struct page, lru);
447 list_del(&base->lru);
448 pool_pages--;
449
450 if (pool_pages < pool_low)
451 pool_low = pool_pages;
452
453 /* 471 /*
454 * Check for races, another CPU might have split this page 472 * Check for races, another CPU might have split this page
455 * up for us already: 473 * up for us already:
@@ -555,7 +573,7 @@ repeat:
555 set_pte_atomic(kpte, new_pte); 573 set_pte_atomic(kpte, new_pte);
556 cpa->flushtlb = 1; 574 cpa->flushtlb = 1;
557 } 575 }
558 cpa->numpages = 1; 576 cpa->processed = 1;
559 return 0; 577 return 0;
560 } 578 }
561 579
@@ -566,7 +584,7 @@ repeat:
566 do_split = try_preserve_large_page(kpte, address, cpa); 584 do_split = try_preserve_large_page(kpte, address, cpa);
567 /* 585 /*
568 * When the range fits into the existing large page, 586 * When the range fits into the existing large page,
569 * return. cp->numpages and cpa->tlbflush have been updated in 587 * return. cp->processed and cpa->tlbflush have been updated in
570 * try_large_page: 588 * try_large_page:
571 */ 589 */
572 if (do_split <= 0) 590 if (do_split <= 0)
@@ -645,7 +663,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
645 * Store the remaining nr of pages for the large page 663 * Store the remaining nr of pages for the large page
646 * preservation check. 664 * preservation check.
647 */ 665 */
648 cpa->numpages = numpages; 666 cpa->numpages = cpa->processed = numpages;
649 667
650 ret = __change_page_attr(cpa, checkalias); 668 ret = __change_page_attr(cpa, checkalias);
651 if (ret) 669 if (ret)
@@ -662,9 +680,9 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
662 * CPA operation. Either a large page has been 680 * CPA operation. Either a large page has been
663 * preserved or a single page update happened. 681 * preserved or a single page update happened.
664 */ 682 */
665 BUG_ON(cpa->numpages > numpages); 683 BUG_ON(cpa->processed > numpages);
666 numpages -= cpa->numpages; 684 numpages -= cpa->processed;
667 cpa->vaddr += cpa->numpages * PAGE_SIZE; 685 cpa->vaddr += cpa->processed * PAGE_SIZE;
668 } 686 }
669 return 0; 687 return 0;
670} 688}
@@ -734,7 +752,8 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
734 cpa_flush_all(cache); 752 cpa_flush_all(cache);
735 753
736out: 754out:
737 cpa_fill_pool(); 755 cpa_fill_pool(NULL);
756
738 return ret; 757 return ret;
739} 758}
740 759
@@ -897,7 +916,7 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
897 * Try to refill the page pool here. We can do this only after 916 * Try to refill the page pool here. We can do this only after
898 * the tlb flush. 917 * the tlb flush.
899 */ 918 */
900 cpa_fill_pool(); 919 cpa_fill_pool(NULL);
901} 920}
902 921
903#ifdef CONFIG_HIBERNATION 922#ifdef CONFIG_HIBERNATION