aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm/pageattr.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm/pageattr.c')
-rw-r--r--arch/x86/mm/pageattr.c140
1 files changed, 125 insertions, 15 deletions
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 8493c855582b..440210a2277d 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -8,6 +8,7 @@
8#include <linux/sched.h> 8#include <linux/sched.h>
9#include <linux/slab.h> 9#include <linux/slab.h>
10#include <linux/mm.h> 10#include <linux/mm.h>
11#include <linux/interrupt.h>
11 12
12#include <asm/e820.h> 13#include <asm/e820.h>
13#include <asm/processor.h> 14#include <asm/processor.h>
@@ -191,7 +192,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address)
191 * or when the present bit is not set. Otherwise we would return a 192 * or when the present bit is not set. Otherwise we would return a
192 * pointer to a nonexisting mapping. 193 * pointer to a nonexisting mapping.
193 */ 194 */
194pte_t *lookup_address(unsigned long address, int *level) 195pte_t *lookup_address(unsigned long address, unsigned int *level)
195{ 196{
196 pgd_t *pgd = pgd_offset_k(address); 197 pgd_t *pgd = pgd_offset_k(address);
197 pud_t *pud; 198 pud_t *pud;
@@ -252,10 +253,11 @@ static int
252try_preserve_large_page(pte_t *kpte, unsigned long address, 253try_preserve_large_page(pte_t *kpte, unsigned long address,
253 struct cpa_data *cpa) 254 struct cpa_data *cpa)
254{ 255{
255 unsigned long nextpage_addr, numpages, pmask, psize, flags; 256 unsigned long nextpage_addr, numpages, pmask, psize, flags, addr;
256 pte_t new_pte, old_pte, *tmp; 257 pte_t new_pte, old_pte, *tmp;
257 pgprot_t old_prot, new_prot; 258 pgprot_t old_prot, new_prot;
258 int level, do_split = 1; 259 int i, do_split = 1;
260 unsigned int level;
259 261
260 spin_lock_irqsave(&pgd_lock, flags); 262 spin_lock_irqsave(&pgd_lock, flags);
261 /* 263 /*
@@ -302,6 +304,19 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
302 new_prot = static_protections(new_prot, address); 304 new_prot = static_protections(new_prot, address);
303 305
304 /* 306 /*
307 * We need to check the full range, whether
308 * static_protection() requires a different pgprot for one of
309 * the pages in the range we try to preserve:
310 */
311 addr = address + PAGE_SIZE;
312 for (i = 1; i < cpa->numpages; i++, addr += PAGE_SIZE) {
313 pgprot_t chk_prot = static_protections(new_prot, addr);
314
315 if (pgprot_val(chk_prot) != pgprot_val(new_prot))
316 goto out_unlock;
317 }
318
319 /*
305 * If there are no changes, return. maxpages has been updated 320 * If there are no changes, return. maxpages has been updated
306 * above: 321 * above:
307 */ 322 */
@@ -335,23 +350,103 @@ out_unlock:
335 return do_split; 350 return do_split;
336} 351}
337 352
353static LIST_HEAD(page_pool);
354static unsigned long pool_size, pool_pages, pool_low;
355static unsigned long pool_used, pool_failed, pool_refill;
356
357static void cpa_fill_pool(void)
358{
359 struct page *p;
360 gfp_t gfp = GFP_KERNEL;
361
362 /* Do not allocate from interrupt context */
363 if (in_irq() || irqs_disabled())
364 return;
365 /*
366 * Check unlocked. I does not matter when we have one more
367 * page in the pool. The bit lock avoids recursive pool
368 * allocations:
369 */
370 if (pool_pages >= pool_size || test_and_set_bit_lock(0, &pool_refill))
371 return;
372
373#ifdef CONFIG_DEBUG_PAGEALLOC
374 /*
375 * We could do:
376 * gfp = in_atomic() ? GFP_ATOMIC : GFP_KERNEL;
377 * but this fails on !PREEMPT kernels
378 */
379 gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
380#endif
381
382 while (pool_pages < pool_size) {
383 p = alloc_pages(gfp, 0);
384 if (!p) {
385 pool_failed++;
386 break;
387 }
388 spin_lock_irq(&pgd_lock);
389 list_add(&p->lru, &page_pool);
390 pool_pages++;
391 spin_unlock_irq(&pgd_lock);
392 }
393 clear_bit_unlock(0, &pool_refill);
394}
395
396#define SHIFT_MB (20 - PAGE_SHIFT)
397#define ROUND_MB_GB ((1 << 10) - 1)
398#define SHIFT_MB_GB 10
399#define POOL_PAGES_PER_GB 16
400
401void __init cpa_init(void)
402{
403 struct sysinfo si;
404 unsigned long gb;
405
406 si_meminfo(&si);
407 /*
408 * Calculate the number of pool pages:
409 *
410 * Convert totalram (nr of pages) to MiB and round to the next
411 * GiB. Shift MiB to Gib and multiply the result by
412 * POOL_PAGES_PER_GB:
413 */
414 gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
415 pool_size = POOL_PAGES_PER_GB * gb;
416 pool_low = pool_size;
417
418 cpa_fill_pool();
419 printk(KERN_DEBUG
420 "CPA: page pool initialized %lu of %lu pages preallocated\n",
421 pool_pages, pool_size);
422}
423
338static int split_large_page(pte_t *kpte, unsigned long address) 424static int split_large_page(pte_t *kpte, unsigned long address)
339{ 425{
340 unsigned long flags, pfn, pfninc = 1; 426 unsigned long flags, pfn, pfninc = 1;
341 gfp_t gfp_flags = GFP_KERNEL;
342 unsigned int i, level; 427 unsigned int i, level;
343 pte_t *pbase, *tmp; 428 pte_t *pbase, *tmp;
344 pgprot_t ref_prot; 429 pgprot_t ref_prot;
345 struct page *base; 430 struct page *base;
346 431
347#ifdef CONFIG_DEBUG_PAGEALLOC 432 /*
348 gfp_flags = GFP_ATOMIC | __GFP_NOWARN; 433 * Get a page from the pool. The pool list is protected by the
349#endif 434 * pgd_lock, which we have to take anyway for the split
350 base = alloc_pages(gfp_flags, 0); 435 * operation:
351 if (!base) 436 */
437 spin_lock_irqsave(&pgd_lock, flags);
438 if (list_empty(&page_pool)) {
439 spin_unlock_irqrestore(&pgd_lock, flags);
352 return -ENOMEM; 440 return -ENOMEM;
441 }
442
443 base = list_first_entry(&page_pool, struct page, lru);
444 list_del(&base->lru);
445 pool_pages--;
446
447 if (pool_pages < pool_low)
448 pool_low = pool_pages;
353 449
354 spin_lock_irqsave(&pgd_lock, flags);
355 /* 450 /*
356 * Check for races, another CPU might have split this page 451 * Check for races, another CPU might have split this page
357 * up for us already: 452 * up for us already:
@@ -396,17 +491,24 @@ static int split_large_page(pte_t *kpte, unsigned long address)
396 base = NULL; 491 base = NULL;
397 492
398out_unlock: 493out_unlock:
494 /*
495 * If we dropped out via the lookup_address check under
496 * pgd_lock then stick the page back into the pool:
497 */
498 if (base) {
499 list_add(&base->lru, &page_pool);
500 pool_pages++;
501 } else
502 pool_used++;
399 spin_unlock_irqrestore(&pgd_lock, flags); 503 spin_unlock_irqrestore(&pgd_lock, flags);
400 504
401 if (base)
402 __free_pages(base, 0);
403
404 return 0; 505 return 0;
405} 506}
406 507
407static int __change_page_attr(unsigned long address, struct cpa_data *cpa) 508static int __change_page_attr(unsigned long address, struct cpa_data *cpa)
408{ 509{
409 int level, do_split, err; 510 int do_split, err;
511 unsigned int level;
410 struct page *kpte_page; 512 struct page *kpte_page;
411 pte_t *kpte; 513 pte_t *kpte;
412 514
@@ -598,7 +700,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
598 * Check whether we really changed something: 700 * Check whether we really changed something:
599 */ 701 */
600 if (!cpa.flushtlb) 702 if (!cpa.flushtlb)
601 return ret; 703 goto out;
602 704
603 /* 705 /*
604 * No need to flush, when we did not set any of the caching 706 * No need to flush, when we did not set any of the caching
@@ -617,6 +719,8 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
617 else 719 else
618 cpa_flush_all(cache); 720 cpa_flush_all(cache);
619 721
722out:
723 cpa_fill_pool();
620 return ret; 724 return ret;
621} 725}
622 726
@@ -770,6 +874,12 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
770 * but that can deadlock->flush only current cpu: 874 * but that can deadlock->flush only current cpu:
771 */ 875 */
772 __flush_tlb_all(); 876 __flush_tlb_all();
877
878 /*
879 * Try to refill the page pool here. We can do this only after
880 * the tlb flush.
881 */
882 cpa_fill_pool();
773} 883}
774#endif 884#endif
775 885