diff options
Diffstat (limited to 'arch/x86/mm/pageattr.c')
-rw-r--r-- | arch/x86/mm/pageattr.c | 140 |
1 files changed, 125 insertions, 15 deletions
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 8493c855582b..440210a2277d 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/sched.h> | 8 | #include <linux/sched.h> |
9 | #include <linux/slab.h> | 9 | #include <linux/slab.h> |
10 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
11 | #include <linux/interrupt.h> | ||
11 | 12 | ||
12 | #include <asm/e820.h> | 13 | #include <asm/e820.h> |
13 | #include <asm/processor.h> | 14 | #include <asm/processor.h> |
@@ -191,7 +192,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address) | |||
191 | * or when the present bit is not set. Otherwise we would return a | 192 | * or when the present bit is not set. Otherwise we would return a |
192 | * pointer to a nonexisting mapping. | 193 | * pointer to a nonexisting mapping. |
193 | */ | 194 | */ |
194 | pte_t *lookup_address(unsigned long address, int *level) | 195 | pte_t *lookup_address(unsigned long address, unsigned int *level) |
195 | { | 196 | { |
196 | pgd_t *pgd = pgd_offset_k(address); | 197 | pgd_t *pgd = pgd_offset_k(address); |
197 | pud_t *pud; | 198 | pud_t *pud; |
@@ -252,10 +253,11 @@ static int | |||
252 | try_preserve_large_page(pte_t *kpte, unsigned long address, | 253 | try_preserve_large_page(pte_t *kpte, unsigned long address, |
253 | struct cpa_data *cpa) | 254 | struct cpa_data *cpa) |
254 | { | 255 | { |
255 | unsigned long nextpage_addr, numpages, pmask, psize, flags; | 256 | unsigned long nextpage_addr, numpages, pmask, psize, flags, addr; |
256 | pte_t new_pte, old_pte, *tmp; | 257 | pte_t new_pte, old_pte, *tmp; |
257 | pgprot_t old_prot, new_prot; | 258 | pgprot_t old_prot, new_prot; |
258 | int level, do_split = 1; | 259 | int i, do_split = 1; |
260 | unsigned int level; | ||
259 | 261 | ||
260 | spin_lock_irqsave(&pgd_lock, flags); | 262 | spin_lock_irqsave(&pgd_lock, flags); |
261 | /* | 263 | /* |
@@ -302,6 +304,19 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
302 | new_prot = static_protections(new_prot, address); | 304 | new_prot = static_protections(new_prot, address); |
303 | 305 | ||
304 | /* | 306 | /* |
307 | * We need to check the full range, whether | ||
308 | * static_protection() requires a different pgprot for one of | ||
309 | * the pages in the range we try to preserve: | ||
310 | */ | ||
311 | addr = address + PAGE_SIZE; | ||
312 | for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE) { | ||
313 | pgprot_t chk_prot = static_protections(new_prot, addr); | ||
314 | |||
315 | if (pgprot_val(chk_prot) != pgprot_val(new_prot)) | ||
316 | goto out_unlock; | ||
317 | } | ||
318 | |||
319 | /* | ||
305 | * If there are no changes, return. maxpages has been updated | 320 | * If there are no changes, return. maxpages has been updated |
306 | * above: | 321 | * above: |
307 | */ | 322 | */ |
@@ -335,23 +350,103 @@ out_unlock: | |||
335 | return do_split; | 350 | return do_split; |
336 | } | 351 | } |
337 | 352 | ||
353 | static LIST_HEAD(page_pool); | ||
354 | static unsigned long pool_size, pool_pages, pool_low; | ||
355 | static unsigned long pool_used, pool_failed, pool_refill; | ||
356 | |||
357 | static void cpa_fill_pool(void) | ||
358 | { | ||
359 | struct page *p; | ||
360 | gfp_t gfp = GFP_KERNEL; | ||
361 | |||
362 | /* Do not allocate from interrupt context */ | ||
363 | if (in_irq() || irqs_disabled()) | ||
364 | return; | ||
365 | /* | ||
366 | * Check unlocked. I does not matter when we have one more | ||
367 | * page in the pool. The bit lock avoids recursive pool | ||
368 | * allocations: | ||
369 | */ | ||
370 | if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill)) | ||
371 | return; | ||
372 | |||
373 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
374 | /* | ||
375 | * We could do: | ||
376 | * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL; | ||
377 | * but this fails on !PREEMPT kernels | ||
378 | */ | ||
379 | gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; | ||
380 | #endif | ||
381 | |||
382 | while (pool_pages < pool_size) { | ||
383 | p = alloc_pages(gfp, 0); | ||
384 | if (!p) { | ||
385 | pool_failed++; | ||
386 | break; | ||
387 | } | ||
388 | spin_lock_irq(&pgd_lock); | ||
389 | list_add(&p->lru, &page_pool); | ||
390 | pool_pages++; | ||
391 | spin_unlock_irq(&pgd_lock); | ||
392 | } | ||
393 | clear_bit_unlock(0, &pool_refill); | ||
394 | } | ||
395 | |||
396 | #define SHIFT_MB (20 - PAGE_SHIFT) | ||
397 | #define ROUND_MB_GB ((1 << 10) - 1) | ||
398 | #define SHIFT_MB_GB 10 | ||
399 | #define POOL_PAGES_PER_GB 16 | ||
400 | |||
401 | void __init cpa_init(void) | ||
402 | { | ||
403 | struct sysinfo si; | ||
404 | unsigned long gb; | ||
405 | |||
406 | si_meminfo(&si); | ||
407 | /* | ||
408 | * Calculate the number of pool pages: | ||
409 | * | ||
410 | * Convert totalram (nr of pages) to MiB and round to the next | ||
411 | * GiB. Shift MiB to Gib and multiply the result by | ||
412 | * POOL_PAGES_PER_GB: | ||
413 | */ | ||
414 | gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB; | ||
415 | pool_size = POOL_PAGES_PER_GB * gb; | ||
416 | pool_low = pool_size; | ||
417 | |||
418 | cpa_fill_pool(); | ||
419 | printk(KERN_DEBUG | ||
420 | "CPA: page pool initialized %lu of %lu pages preallocated\n", | ||
421 | pool_pages, pool_size); | ||
422 | } | ||
423 | |||
338 | static int split_large_page(pte_t *kpte, unsigned long address) | 424 | static int split_large_page(pte_t *kpte, unsigned long address) |
339 | { | 425 | { |
340 | unsigned long flags, pfn, pfninc = 1; | 426 | unsigned long flags, pfn, pfninc = 1; |
341 | gfp_t gfp_flags = GFP_KERNEL; | ||
342 | unsigned int i, level; | 427 | unsigned int i, level; |
343 | pte_t *pbase, *tmp; | 428 | pte_t *pbase, *tmp; |
344 | pgprot_t ref_prot; | 429 | pgprot_t ref_prot; |
345 | struct page *base; | 430 | struct page *base; |
346 | 431 | ||
347 | #ifdef CONFIG_DEBUG_PAGEALLOC | 432 | /* |
348 | gfp_flags = GFP_ATOMIC | __GFP_NOWARN; | 433 | * Get a page from the pool. The pool list is protected by the |
349 | #endif | 434 | * pgd_lock, which we have to take anyway for the split |
350 | base = alloc_pages(gfp_flags, 0); | 435 | * operation: |
351 | if (!base) | 436 | */ |
437 | spin_lock_irqsave(&pgd_lock, flags); | ||
438 | if (list_empty(&page_pool)) { | ||
439 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
352 | return -ENOMEM; | 440 | return -ENOMEM; |
441 | } | ||
442 | |||
443 | base = list_first_entry(&page_pool, struct page, lru); | ||
444 | list_del(&base->lru); | ||
445 | pool_pages--; | ||
446 | |||
447 | if (pool_pages < pool_low) | ||
448 | pool_low = pool_pages; | ||
353 | 449 | ||
354 | spin_lock_irqsave(&pgd_lock, flags); | ||
355 | /* | 450 | /* |
356 | * Check for races, another CPU might have split this page | 451 | * Check for races, another CPU might have split this page |
357 | * up for us already: | 452 | * up for us already: |
@@ -396,17 +491,24 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
396 | base = NULL; | 491 | base = NULL; |
397 | 492 | ||
398 | out_unlock: | 493 | out_unlock: |
494 | /* | ||
495 | * If we dropped out via the lookup_address check under | ||
496 | * pgd_lock then stick the page back into the pool: | ||
497 | */ | ||
498 | if (base) { | ||
499 | list_add(&base->lru, &page_pool); | ||
500 | pool_pages++; | ||
501 | } else | ||
502 | pool_used++; | ||
399 | spin_unlock_irqrestore(&pgd_lock, flags); | 503 | spin_unlock_irqrestore(&pgd_lock, flags); |
400 | 504 | ||
401 | if (base) | ||
402 | __free_pages(base, 0); | ||
403 | |||
404 | return 0; | 505 | return 0; |
405 | } | 506 | } |
406 | 507 | ||
407 | static int __change_page_attr(unsigned long address, struct cpa_data *cpa) | 508 | static int __change_page_attr(unsigned long address, struct cpa_data *cpa) |
408 | { | 509 | { |
409 | int level, do_split, err; | 510 | int do_split, err; |
511 | unsigned int level; | ||
410 | struct page *kpte_page; | 512 | struct page *kpte_page; |
411 | pte_t *kpte; | 513 | pte_t *kpte; |
412 | 514 | ||
@@ -598,7 +700,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, | |||
598 | * Check whether we really changed something: | 700 | * Check whether we really changed something: |
599 | */ | 701 | */ |
600 | if (!cpa.flushtlb) | 702 | if (!cpa.flushtlb) |
601 | return ret; | 703 | goto out; |
602 | 704 | ||
603 | /* | 705 | /* |
604 | * No need to flush, when we did not set any of the caching | 706 | * No need to flush, when we did not set any of the caching |
@@ -617,6 +719,8 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, | |||
617 | else | 719 | else |
618 | cpa_flush_all(cache); | 720 | cpa_flush_all(cache); |
619 | 721 | ||
722 | out: | ||
723 | cpa_fill_pool(); | ||
620 | return ret; | 724 | return ret; |
621 | } | 725 | } |
622 | 726 | ||
@@ -770,6 +874,12 @@ void kernel_map_pages(struct page *page, int numpages, int enable) | |||
770 | * but that can deadlock->flush only current cpu: | 874 | * but that can deadlock->flush only current cpu: |
771 | */ | 875 | */ |
772 | __flush_tlb_all(); | 876 | __flush_tlb_all(); |
877 | |||
878 | /* | ||
879 | * Try to refill the page pool here. We can do this only after | ||
880 | * the tlb flush. | ||
881 | */ | ||
882 | cpa_fill_pool(); | ||
773 | } | 883 | } |
774 | #endif | 884 | #endif |
775 | 885 | ||