diff options
Diffstat (limited to 'arch/x86/mm/pageattr.c')
-rw-r--r-- | arch/x86/mm/pageattr.c | 490 |
1 files changed, 270 insertions, 220 deletions
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 65c6e46bf059..a9ec89c3fbca 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -25,15 +25,27 @@ | |||
25 | * The current flushing context - we pass it instead of 5 arguments: | 25 | * The current flushing context - we pass it instead of 5 arguments: |
26 | */ | 26 | */ |
27 | struct cpa_data { | 27 | struct cpa_data { |
28 | unsigned long vaddr; | 28 | unsigned long *vaddr; |
29 | pgprot_t mask_set; | 29 | pgprot_t mask_set; |
30 | pgprot_t mask_clr; | 30 | pgprot_t mask_clr; |
31 | int numpages; | 31 | int numpages; |
32 | int flushtlb; | 32 | int flags; |
33 | unsigned long pfn; | 33 | unsigned long pfn; |
34 | unsigned force_split : 1; | 34 | unsigned force_split : 1; |
35 | int curpage; | ||
35 | }; | 36 | }; |
36 | 37 | ||
38 | /* | ||
39 | * Serialize cpa() (for !DEBUG_PAGEALLOC which uses large identity mappings) | ||
40 | * using cpa_lock. So that we don't allow any other cpu, with stale large tlb | ||
41 | * entries change the page attribute in parallel to some other cpu | ||
42 | * splitting a large page entry along with changing the attribute. | ||
43 | */ | ||
44 | static DEFINE_SPINLOCK(cpa_lock); | ||
45 | |||
46 | #define CPA_FLUSHTLB 1 | ||
47 | #define CPA_ARRAY 2 | ||
48 | |||
37 | #ifdef CONFIG_PROC_FS | 49 | #ifdef CONFIG_PROC_FS |
38 | static unsigned long direct_pages_count[PG_LEVEL_NUM]; | 50 | static unsigned long direct_pages_count[PG_LEVEL_NUM]; |
39 | 51 | ||
@@ -55,13 +67,19 @@ static void split_page_count(int level) | |||
55 | 67 | ||
56 | int arch_report_meminfo(char *page) | 68 | int arch_report_meminfo(char *page) |
57 | { | 69 | { |
58 | int n = sprintf(page, "DirectMap4k: %8lu\n" | 70 | int n = sprintf(page, "DirectMap4k: %8lu kB\n", |
59 | "DirectMap2M: %8lu\n", | 71 | direct_pages_count[PG_LEVEL_4K] << 2); |
60 | direct_pages_count[PG_LEVEL_4K], | 72 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) |
61 | direct_pages_count[PG_LEVEL_2M]); | 73 | n += sprintf(page + n, "DirectMap2M: %8lu kB\n", |
74 | direct_pages_count[PG_LEVEL_2M] << 11); | ||
75 | #else | ||
76 | n += sprintf(page + n, "DirectMap4M: %8lu kB\n", | ||
77 | direct_pages_count[PG_LEVEL_2M] << 12); | ||
78 | #endif | ||
62 | #ifdef CONFIG_X86_64 | 79 | #ifdef CONFIG_X86_64 |
63 | n += sprintf(page + n, "DirectMap1G: %8lu\n", | 80 | if (direct_gbpages) |
64 | direct_pages_count[PG_LEVEL_1G]); | 81 | n += sprintf(page + n, "DirectMap1G: %8lu kB\n", |
82 | direct_pages_count[PG_LEVEL_1G] << 20); | ||
65 | #endif | 83 | #endif |
66 | return n; | 84 | return n; |
67 | } | 85 | } |
@@ -78,7 +96,7 @@ static inline unsigned long highmap_start_pfn(void) | |||
78 | 96 | ||
79 | static inline unsigned long highmap_end_pfn(void) | 97 | static inline unsigned long highmap_end_pfn(void) |
80 | { | 98 | { |
81 | return __pa(round_up((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT; | 99 | return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT; |
82 | } | 100 | } |
83 | 101 | ||
84 | #endif | 102 | #endif |
@@ -184,6 +202,41 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache) | |||
184 | } | 202 | } |
185 | } | 203 | } |
186 | 204 | ||
205 | static void cpa_flush_array(unsigned long *start, int numpages, int cache) | ||
206 | { | ||
207 | unsigned int i, level; | ||
208 | unsigned long *addr; | ||
209 | |||
210 | BUG_ON(irqs_disabled()); | ||
211 | |||
212 | on_each_cpu(__cpa_flush_range, NULL, 1); | ||
213 | |||
214 | if (!cache) | ||
215 | return; | ||
216 | |||
217 | /* 4M threshold */ | ||
218 | if (numpages >= 1024) { | ||
219 | if (boot_cpu_data.x86_model >= 4) | ||
220 | wbinvd(); | ||
221 | return; | ||
222 | } | ||
223 | /* | ||
224 | * We only need to flush on one CPU, | ||
225 | * clflush is a MESI-coherent instruction that | ||
226 | * will cause all other CPUs to flush the same | ||
227 | * cachelines: | ||
228 | */ | ||
229 | for (i = 0, addr = start; i < numpages; i++, addr++) { | ||
230 | pte_t *pte = lookup_address(*addr, &level); | ||
231 | |||
232 | /* | ||
233 | * Only flush present addresses: | ||
234 | */ | ||
235 | if (pte && (pte_val(*pte) & _PAGE_PRESENT)) | ||
236 | clflush_cache_range((void *) *addr, PAGE_SIZE); | ||
237 | } | ||
238 | } | ||
239 | |||
187 | /* | 240 | /* |
188 | * Certain areas of memory on x86 require very specific protection flags, | 241 | * Certain areas of memory on x86 require very specific protection flags, |
189 | * for example the BIOS area or kernel text. Callers don't always get this | 242 | * for example the BIOS area or kernel text. Callers don't always get this |
@@ -392,7 +445,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
392 | */ | 445 | */ |
393 | new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot)); | 446 | new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot)); |
394 | __set_pmd_pte(kpte, address, new_pte); | 447 | __set_pmd_pte(kpte, address, new_pte); |
395 | cpa->flushtlb = 1; | 448 | cpa->flags |= CPA_FLUSHTLB; |
396 | do_split = 0; | 449 | do_split = 0; |
397 | } | 450 | } |
398 | 451 | ||
@@ -402,84 +455,6 @@ out_unlock: | |||
402 | return do_split; | 455 | return do_split; |
403 | } | 456 | } |
404 | 457 | ||
405 | static LIST_HEAD(page_pool); | ||
406 | static unsigned long pool_size, pool_pages, pool_low; | ||
407 | static unsigned long pool_used, pool_failed; | ||
408 | |||
409 | static void cpa_fill_pool(struct page **ret) | ||
410 | { | ||
411 | gfp_t gfp = GFP_KERNEL; | ||
412 | unsigned long flags; | ||
413 | struct page *p; | ||
414 | |||
415 | /* | ||
416 | * Avoid recursion (on debug-pagealloc) and also signal | ||
417 | * our priority to get to these pagetables: | ||
418 | */ | ||
419 | if (current->flags & PF_MEMALLOC) | ||
420 | return; | ||
421 | current->flags |= PF_MEMALLOC; | ||
422 | |||
423 | /* | ||
424 | * Allocate atomically from atomic contexts: | ||
425 | */ | ||
426 | if (in_atomic() || irqs_disabled() || debug_pagealloc) | ||
427 | gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; | ||
428 | |||
429 | while (pool_pages < pool_size || (ret && !*ret)) { | ||
430 | p = alloc_pages(gfp, 0); | ||
431 | if (!p) { | ||
432 | pool_failed++; | ||
433 | break; | ||
434 | } | ||
435 | /* | ||
436 | * If the call site needs a page right now, provide it: | ||
437 | */ | ||
438 | if (ret && !*ret) { | ||
439 | *ret = p; | ||
440 | continue; | ||
441 | } | ||
442 | spin_lock_irqsave(&pgd_lock, flags); | ||
443 | list_add(&p->lru, &page_pool); | ||
444 | pool_pages++; | ||
445 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
446 | } | ||
447 | |||
448 | current->flags &= ~PF_MEMALLOC; | ||
449 | } | ||
450 | |||
451 | #define SHIFT_MB (20 - PAGE_SHIFT) | ||
452 | #define ROUND_MB_GB ((1 << 10) - 1) | ||
453 | #define SHIFT_MB_GB 10 | ||
454 | #define POOL_PAGES_PER_GB 16 | ||
455 | |||
456 | void __init cpa_init(void) | ||
457 | { | ||
458 | struct sysinfo si; | ||
459 | unsigned long gb; | ||
460 | |||
461 | si_meminfo(&si); | ||
462 | /* | ||
463 | * Calculate the number of pool pages: | ||
464 | * | ||
465 | * Convert totalram (nr of pages) to MiB and round to the next | ||
466 | * GiB. Shift MiB to Gib and multiply the result by | ||
467 | * POOL_PAGES_PER_GB: | ||
468 | */ | ||
469 | if (debug_pagealloc) { | ||
470 | gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB; | ||
471 | pool_size = POOL_PAGES_PER_GB * gb; | ||
472 | } else { | ||
473 | pool_size = 1; | ||
474 | } | ||
475 | pool_low = pool_size; | ||
476 | |||
477 | cpa_fill_pool(NULL); | ||
478 | printk(KERN_DEBUG | ||
479 | "CPA: page pool initialized %lu of %lu pages preallocated\n", | ||
480 | pool_pages, pool_size); | ||
481 | } | ||
482 | |||
483 | static int split_large_page(pte_t *kpte, unsigned long address) | 458 | static int split_large_page(pte_t *kpte, unsigned long address) |
484 | { | 459 | { |
485 | unsigned long flags, pfn, pfninc = 1; | 460 | unsigned long flags, pfn, pfninc = 1; |
@@ -488,28 +463,15 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
488 | pgprot_t ref_prot; | 463 | pgprot_t ref_prot; |
489 | struct page *base; | 464 | struct page *base; |
490 | 465 | ||
491 | /* | 466 | if (!debug_pagealloc) |
492 | * Get a page from the pool. The pool list is protected by the | 467 | spin_unlock(&cpa_lock); |
493 | * pgd_lock, which we have to take anyway for the split | 468 | base = alloc_pages(GFP_KERNEL, 0); |
494 | * operation: | 469 | if (!debug_pagealloc) |
495 | */ | 470 | spin_lock(&cpa_lock); |
496 | spin_lock_irqsave(&pgd_lock, flags); | 471 | if (!base) |
497 | if (list_empty(&page_pool)) { | 472 | return -ENOMEM; |
498 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
499 | base = NULL; | ||
500 | cpa_fill_pool(&base); | ||
501 | if (!base) | ||
502 | return -ENOMEM; | ||
503 | spin_lock_irqsave(&pgd_lock, flags); | ||
504 | } else { | ||
505 | base = list_first_entry(&page_pool, struct page, lru); | ||
506 | list_del(&base->lru); | ||
507 | pool_pages--; | ||
508 | |||
509 | if (pool_pages < pool_low) | ||
510 | pool_low = pool_pages; | ||
511 | } | ||
512 | 473 | ||
474 | spin_lock_irqsave(&pgd_lock, flags); | ||
513 | /* | 475 | /* |
514 | * Check for races, another CPU might have split this page | 476 | * Check for races, another CPU might have split this page |
515 | * up for us already: | 477 | * up for us already: |
@@ -566,11 +528,8 @@ out_unlock: | |||
566 | * If we dropped out via the lookup_address check under | 528 | * If we dropped out via the lookup_address check under |
567 | * pgd_lock then stick the page back into the pool: | 529 | * pgd_lock then stick the page back into the pool: |
568 | */ | 530 | */ |
569 | if (base) { | 531 | if (base) |
570 | list_add(&base->lru, &page_pool); | 532 | __free_page(base); |
571 | pool_pages++; | ||
572 | } else | ||
573 | pool_used++; | ||
574 | spin_unlock_irqrestore(&pgd_lock, flags); | 533 | spin_unlock_irqrestore(&pgd_lock, flags); |
575 | 534 | ||
576 | return 0; | 535 | return 0; |
@@ -578,11 +537,16 @@ out_unlock: | |||
578 | 537 | ||
579 | static int __change_page_attr(struct cpa_data *cpa, int primary) | 538 | static int __change_page_attr(struct cpa_data *cpa, int primary) |
580 | { | 539 | { |
581 | unsigned long address = cpa->vaddr; | 540 | unsigned long address; |
582 | int do_split, err; | 541 | int do_split, err; |
583 | unsigned int level; | 542 | unsigned int level; |
584 | pte_t *kpte, old_pte; | 543 | pte_t *kpte, old_pte; |
585 | 544 | ||
545 | if (cpa->flags & CPA_ARRAY) | ||
546 | address = cpa->vaddr[cpa->curpage]; | ||
547 | else | ||
548 | address = *cpa->vaddr; | ||
549 | |||
586 | repeat: | 550 | repeat: |
587 | kpte = lookup_address(address, &level); | 551 | kpte = lookup_address(address, &level); |
588 | if (!kpte) | 552 | if (!kpte) |
@@ -592,10 +556,9 @@ repeat: | |||
592 | if (!pte_val(old_pte)) { | 556 | if (!pte_val(old_pte)) { |
593 | if (!primary) | 557 | if (!primary) |
594 | return 0; | 558 | return 0; |
595 | printk(KERN_WARNING "CPA: called for zero pte. " | 559 | WARN(1, KERN_WARNING "CPA: called for zero pte. " |
596 | "vaddr = %lx cpa->vaddr = %lx\n", address, | 560 | "vaddr = %lx cpa->vaddr = %lx\n", address, |
597 | cpa->vaddr); | 561 | *cpa->vaddr); |
598 | WARN_ON(1); | ||
599 | return -EINVAL; | 562 | return -EINVAL; |
600 | } | 563 | } |
601 | 564 | ||
@@ -621,7 +584,7 @@ repeat: | |||
621 | */ | 584 | */ |
622 | if (pte_val(old_pte) != pte_val(new_pte)) { | 585 | if (pte_val(old_pte) != pte_val(new_pte)) { |
623 | set_pte_atomic(kpte, new_pte); | 586 | set_pte_atomic(kpte, new_pte); |
624 | cpa->flushtlb = 1; | 587 | cpa->flags |= CPA_FLUSHTLB; |
625 | } | 588 | } |
626 | cpa->numpages = 1; | 589 | cpa->numpages = 1; |
627 | return 0; | 590 | return 0; |
@@ -645,7 +608,25 @@ repeat: | |||
645 | */ | 608 | */ |
646 | err = split_large_page(kpte, address); | 609 | err = split_large_page(kpte, address); |
647 | if (!err) { | 610 | if (!err) { |
648 | cpa->flushtlb = 1; | 611 | /* |
612 | * Do a global flush tlb after splitting the large page | ||
613 | * and before we do the actual change page attribute in the PTE. | ||
614 | * | ||
615 | * With out this, we violate the TLB application note, that says | ||
616 | * "The TLBs may contain both ordinary and large-page | ||
617 | * translations for a 4-KByte range of linear addresses. This | ||
618 | * may occur if software modifies the paging structures so that | ||
619 | * the page size used for the address range changes. If the two | ||
620 | * translations differ with respect to page frame or attributes | ||
621 | * (e.g., permissions), processor behavior is undefined and may | ||
622 | * be implementation-specific." | ||
623 | * | ||
624 | * We do this global tlb flush inside the cpa_lock, so that we | ||
625 | * don't allow any other cpu, with stale tlb entries change the | ||
626 | * page attribute in parallel, that also falls into the | ||
627 | * just split large page entry. | ||
628 | */ | ||
629 | flush_tlb_all(); | ||
649 | goto repeat; | 630 | goto repeat; |
650 | } | 631 | } |
651 | 632 | ||
@@ -658,6 +639,7 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
658 | { | 639 | { |
659 | struct cpa_data alias_cpa; | 640 | struct cpa_data alias_cpa; |
660 | int ret = 0; | 641 | int ret = 0; |
642 | unsigned long temp_cpa_vaddr, vaddr; | ||
661 | 643 | ||
662 | if (cpa->pfn >= max_pfn_mapped) | 644 | if (cpa->pfn >= max_pfn_mapped) |
663 | return 0; | 645 | return 0; |
@@ -670,16 +652,24 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
670 | * No need to redo, when the primary call touched the direct | 652 | * No need to redo, when the primary call touched the direct |
671 | * mapping already: | 653 | * mapping already: |
672 | */ | 654 | */ |
673 | if (!(within(cpa->vaddr, PAGE_OFFSET, | 655 | if (cpa->flags & CPA_ARRAY) |
656 | vaddr = cpa->vaddr[cpa->curpage]; | ||
657 | else | ||
658 | vaddr = *cpa->vaddr; | ||
659 | |||
660 | if (!(within(vaddr, PAGE_OFFSET, | ||
674 | PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT)) | 661 | PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT)) |
675 | #ifdef CONFIG_X86_64 | 662 | #ifdef CONFIG_X86_64 |
676 | || within(cpa->vaddr, PAGE_OFFSET + (1UL<<32), | 663 | || within(vaddr, PAGE_OFFSET + (1UL<<32), |
677 | PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)) | 664 | PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)) |
678 | #endif | 665 | #endif |
679 | )) { | 666 | )) { |
680 | 667 | ||
681 | alias_cpa = *cpa; | 668 | alias_cpa = *cpa; |
682 | alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); | 669 | temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); |
670 | alias_cpa.vaddr = &temp_cpa_vaddr; | ||
671 | alias_cpa.flags &= ~CPA_ARRAY; | ||
672 | |||
683 | 673 | ||
684 | ret = __change_page_attr_set_clr(&alias_cpa, 0); | 674 | ret = __change_page_attr_set_clr(&alias_cpa, 0); |
685 | } | 675 | } |
@@ -691,7 +681,7 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
691 | * No need to redo, when the primary call touched the high | 681 | * No need to redo, when the primary call touched the high |
692 | * mapping already: | 682 | * mapping already: |
693 | */ | 683 | */ |
694 | if (within(cpa->vaddr, (unsigned long) _text, (unsigned long) _end)) | 684 | if (within(vaddr, (unsigned long) _text, (unsigned long) _end)) |
695 | return 0; | 685 | return 0; |
696 | 686 | ||
697 | /* | 687 | /* |
@@ -702,8 +692,9 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
702 | return 0; | 692 | return 0; |
703 | 693 | ||
704 | alias_cpa = *cpa; | 694 | alias_cpa = *cpa; |
705 | alias_cpa.vaddr = | 695 | temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base; |
706 | (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base; | 696 | alias_cpa.vaddr = &temp_cpa_vaddr; |
697 | alias_cpa.flags &= ~CPA_ARRAY; | ||
707 | 698 | ||
708 | /* | 699 | /* |
709 | * The high mapping range is imprecise, so ignore the return value. | 700 | * The high mapping range is imprecise, so ignore the return value. |
@@ -723,8 +714,15 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) | |||
723 | * preservation check. | 714 | * preservation check. |
724 | */ | 715 | */ |
725 | cpa->numpages = numpages; | 716 | cpa->numpages = numpages; |
717 | /* for array changes, we can't use large page */ | ||
718 | if (cpa->flags & CPA_ARRAY) | ||
719 | cpa->numpages = 1; | ||
726 | 720 | ||
721 | if (!debug_pagealloc) | ||
722 | spin_lock(&cpa_lock); | ||
727 | ret = __change_page_attr(cpa, checkalias); | 723 | ret = __change_page_attr(cpa, checkalias); |
724 | if (!debug_pagealloc) | ||
725 | spin_unlock(&cpa_lock); | ||
728 | if (ret) | 726 | if (ret) |
729 | return ret; | 727 | return ret; |
730 | 728 | ||
@@ -741,7 +739,11 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) | |||
741 | */ | 739 | */ |
742 | BUG_ON(cpa->numpages > numpages); | 740 | BUG_ON(cpa->numpages > numpages); |
743 | numpages -= cpa->numpages; | 741 | numpages -= cpa->numpages; |
744 | cpa->vaddr += cpa->numpages * PAGE_SIZE; | 742 | if (cpa->flags & CPA_ARRAY) |
743 | cpa->curpage++; | ||
744 | else | ||
745 | *cpa->vaddr += cpa->numpages * PAGE_SIZE; | ||
746 | |||
745 | } | 747 | } |
746 | return 0; | 748 | return 0; |
747 | } | 749 | } |
@@ -752,9 +754,9 @@ static inline int cache_attr(pgprot_t attr) | |||
752 | (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD); | 754 | (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD); |
753 | } | 755 | } |
754 | 756 | ||
755 | static int change_page_attr_set_clr(unsigned long addr, int numpages, | 757 | static int change_page_attr_set_clr(unsigned long *addr, int numpages, |
756 | pgprot_t mask_set, pgprot_t mask_clr, | 758 | pgprot_t mask_set, pgprot_t mask_clr, |
757 | int force_split) | 759 | int force_split, int array) |
758 | { | 760 | { |
759 | struct cpa_data cpa; | 761 | struct cpa_data cpa; |
760 | int ret, cache, checkalias; | 762 | int ret, cache, checkalias; |
@@ -769,21 +771,38 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, | |||
769 | return 0; | 771 | return 0; |
770 | 772 | ||
771 | /* Ensure we are PAGE_SIZE aligned */ | 773 | /* Ensure we are PAGE_SIZE aligned */ |
772 | if (addr & ~PAGE_MASK) { | 774 | if (!array) { |
773 | addr &= PAGE_MASK; | 775 | if (*addr & ~PAGE_MASK) { |
774 | /* | 776 | *addr &= PAGE_MASK; |
775 | * People should not be passing in unaligned addresses: | 777 | /* |
776 | */ | 778 | * People should not be passing in unaligned addresses: |
777 | WARN_ON_ONCE(1); | 779 | */ |
780 | WARN_ON_ONCE(1); | ||
781 | } | ||
782 | } else { | ||
783 | int i; | ||
784 | for (i = 0; i < numpages; i++) { | ||
785 | if (addr[i] & ~PAGE_MASK) { | ||
786 | addr[i] &= PAGE_MASK; | ||
787 | WARN_ON_ONCE(1); | ||
788 | } | ||
789 | } | ||
778 | } | 790 | } |
779 | 791 | ||
792 | /* Must avoid aliasing mappings in the highmem code */ | ||
793 | kmap_flush_unused(); | ||
794 | |||
780 | cpa.vaddr = addr; | 795 | cpa.vaddr = addr; |
781 | cpa.numpages = numpages; | 796 | cpa.numpages = numpages; |
782 | cpa.mask_set = mask_set; | 797 | cpa.mask_set = mask_set; |
783 | cpa.mask_clr = mask_clr; | 798 | cpa.mask_clr = mask_clr; |
784 | cpa.flushtlb = 0; | 799 | cpa.flags = 0; |
800 | cpa.curpage = 0; | ||
785 | cpa.force_split = force_split; | 801 | cpa.force_split = force_split; |
786 | 802 | ||
803 | if (array) | ||
804 | cpa.flags |= CPA_ARRAY; | ||
805 | |||
787 | /* No alias checking for _NX bit modifications */ | 806 | /* No alias checking for _NX bit modifications */ |
788 | checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; | 807 | checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; |
789 | 808 | ||
@@ -792,7 +811,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, | |||
792 | /* | 811 | /* |
793 | * Check whether we really changed something: | 812 | * Check whether we really changed something: |
794 | */ | 813 | */ |
795 | if (!cpa.flushtlb) | 814 | if (!(cpa.flags & CPA_FLUSHTLB)) |
796 | goto out; | 815 | goto out; |
797 | 816 | ||
798 | /* | 817 | /* |
@@ -807,27 +826,30 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, | |||
807 | * error case we fall back to cpa_flush_all (which uses | 826 | * error case we fall back to cpa_flush_all (which uses |
808 | * wbindv): | 827 | * wbindv): |
809 | */ | 828 | */ |
810 | if (!ret && cpu_has_clflush) | 829 | if (!ret && cpu_has_clflush) { |
811 | cpa_flush_range(addr, numpages, cache); | 830 | if (cpa.flags & CPA_ARRAY) |
812 | else | 831 | cpa_flush_array(addr, numpages, cache); |
832 | else | ||
833 | cpa_flush_range(*addr, numpages, cache); | ||
834 | } else | ||
813 | cpa_flush_all(cache); | 835 | cpa_flush_all(cache); |
814 | 836 | ||
815 | out: | 837 | out: |
816 | cpa_fill_pool(NULL); | ||
817 | |||
818 | return ret; | 838 | return ret; |
819 | } | 839 | } |
820 | 840 | ||
821 | static inline int change_page_attr_set(unsigned long addr, int numpages, | 841 | static inline int change_page_attr_set(unsigned long *addr, int numpages, |
822 | pgprot_t mask) | 842 | pgprot_t mask, int array) |
823 | { | 843 | { |
824 | return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0); | 844 | return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0, |
845 | array); | ||
825 | } | 846 | } |
826 | 847 | ||
827 | static inline int change_page_attr_clear(unsigned long addr, int numpages, | 848 | static inline int change_page_attr_clear(unsigned long *addr, int numpages, |
828 | pgprot_t mask) | 849 | pgprot_t mask, int array) |
829 | { | 850 | { |
830 | return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0); | 851 | return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0, |
852 | array); | ||
831 | } | 853 | } |
832 | 854 | ||
833 | int _set_memory_uc(unsigned long addr, int numpages) | 855 | int _set_memory_uc(unsigned long addr, int numpages) |
@@ -835,8 +857,8 @@ int _set_memory_uc(unsigned long addr, int numpages) | |||
835 | /* | 857 | /* |
836 | * for now UC MINUS. see comments in ioremap_nocache() | 858 | * for now UC MINUS. see comments in ioremap_nocache() |
837 | */ | 859 | */ |
838 | return change_page_attr_set(addr, numpages, | 860 | return change_page_attr_set(&addr, numpages, |
839 | __pgprot(_PAGE_CACHE_UC_MINUS)); | 861 | __pgprot(_PAGE_CACHE_UC_MINUS), 0); |
840 | } | 862 | } |
841 | 863 | ||
842 | int set_memory_uc(unsigned long addr, int numpages) | 864 | int set_memory_uc(unsigned long addr, int numpages) |
@@ -844,7 +866,7 @@ int set_memory_uc(unsigned long addr, int numpages) | |||
844 | /* | 866 | /* |
845 | * for now UC MINUS. see comments in ioremap_nocache() | 867 | * for now UC MINUS. see comments in ioremap_nocache() |
846 | */ | 868 | */ |
847 | if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, | 869 | if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, |
848 | _PAGE_CACHE_UC_MINUS, NULL)) | 870 | _PAGE_CACHE_UC_MINUS, NULL)) |
849 | return -EINVAL; | 871 | return -EINVAL; |
850 | 872 | ||
@@ -852,10 +874,48 @@ int set_memory_uc(unsigned long addr, int numpages) | |||
852 | } | 874 | } |
853 | EXPORT_SYMBOL(set_memory_uc); | 875 | EXPORT_SYMBOL(set_memory_uc); |
854 | 876 | ||
877 | int set_memory_array_uc(unsigned long *addr, int addrinarray) | ||
878 | { | ||
879 | unsigned long start; | ||
880 | unsigned long end; | ||
881 | int i; | ||
882 | /* | ||
883 | * for now UC MINUS. see comments in ioremap_nocache() | ||
884 | */ | ||
885 | for (i = 0; i < addrinarray; i++) { | ||
886 | start = __pa(addr[i]); | ||
887 | for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) { | ||
888 | if (end != __pa(addr[i + 1])) | ||
889 | break; | ||
890 | i++; | ||
891 | } | ||
892 | if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL)) | ||
893 | goto out; | ||
894 | } | ||
895 | |||
896 | return change_page_attr_set(addr, addrinarray, | ||
897 | __pgprot(_PAGE_CACHE_UC_MINUS), 1); | ||
898 | out: | ||
899 | for (i = 0; i < addrinarray; i++) { | ||
900 | unsigned long tmp = __pa(addr[i]); | ||
901 | |||
902 | if (tmp == start) | ||
903 | break; | ||
904 | for (end = tmp + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) { | ||
905 | if (end != __pa(addr[i + 1])) | ||
906 | break; | ||
907 | i++; | ||
908 | } | ||
909 | free_memtype(tmp, end); | ||
910 | } | ||
911 | return -EINVAL; | ||
912 | } | ||
913 | EXPORT_SYMBOL(set_memory_array_uc); | ||
914 | |||
855 | int _set_memory_wc(unsigned long addr, int numpages) | 915 | int _set_memory_wc(unsigned long addr, int numpages) |
856 | { | 916 | { |
857 | return change_page_attr_set(addr, numpages, | 917 | return change_page_attr_set(&addr, numpages, |
858 | __pgprot(_PAGE_CACHE_WC)); | 918 | __pgprot(_PAGE_CACHE_WC), 0); |
859 | } | 919 | } |
860 | 920 | ||
861 | int set_memory_wc(unsigned long addr, int numpages) | 921 | int set_memory_wc(unsigned long addr, int numpages) |
@@ -863,7 +923,7 @@ int set_memory_wc(unsigned long addr, int numpages) | |||
863 | if (!pat_enabled) | 923 | if (!pat_enabled) |
864 | return set_memory_uc(addr, numpages); | 924 | return set_memory_uc(addr, numpages); |
865 | 925 | ||
866 | if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, | 926 | if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, |
867 | _PAGE_CACHE_WC, NULL)) | 927 | _PAGE_CACHE_WC, NULL)) |
868 | return -EINVAL; | 928 | return -EINVAL; |
869 | 929 | ||
@@ -873,49 +933,71 @@ EXPORT_SYMBOL(set_memory_wc); | |||
873 | 933 | ||
874 | int _set_memory_wb(unsigned long addr, int numpages) | 934 | int _set_memory_wb(unsigned long addr, int numpages) |
875 | { | 935 | { |
876 | return change_page_attr_clear(addr, numpages, | 936 | return change_page_attr_clear(&addr, numpages, |
877 | __pgprot(_PAGE_CACHE_MASK)); | 937 | __pgprot(_PAGE_CACHE_MASK), 0); |
878 | } | 938 | } |
879 | 939 | ||
880 | int set_memory_wb(unsigned long addr, int numpages) | 940 | int set_memory_wb(unsigned long addr, int numpages) |
881 | { | 941 | { |
882 | free_memtype(addr, addr + numpages * PAGE_SIZE); | 942 | free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); |
883 | 943 | ||
884 | return _set_memory_wb(addr, numpages); | 944 | return _set_memory_wb(addr, numpages); |
885 | } | 945 | } |
886 | EXPORT_SYMBOL(set_memory_wb); | 946 | EXPORT_SYMBOL(set_memory_wb); |
887 | 947 | ||
948 | int set_memory_array_wb(unsigned long *addr, int addrinarray) | ||
949 | { | ||
950 | int i; | ||
951 | |||
952 | for (i = 0; i < addrinarray; i++) { | ||
953 | unsigned long start = __pa(addr[i]); | ||
954 | unsigned long end; | ||
955 | |||
956 | for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) { | ||
957 | if (end != __pa(addr[i + 1])) | ||
958 | break; | ||
959 | i++; | ||
960 | } | ||
961 | free_memtype(start, end); | ||
962 | } | ||
963 | return change_page_attr_clear(addr, addrinarray, | ||
964 | __pgprot(_PAGE_CACHE_MASK), 1); | ||
965 | } | ||
966 | EXPORT_SYMBOL(set_memory_array_wb); | ||
967 | |||
888 | int set_memory_x(unsigned long addr, int numpages) | 968 | int set_memory_x(unsigned long addr, int numpages) |
889 | { | 969 | { |
890 | return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_NX)); | 970 | return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0); |
891 | } | 971 | } |
892 | EXPORT_SYMBOL(set_memory_x); | 972 | EXPORT_SYMBOL(set_memory_x); |
893 | 973 | ||
894 | int set_memory_nx(unsigned long addr, int numpages) | 974 | int set_memory_nx(unsigned long addr, int numpages) |
895 | { | 975 | { |
896 | return change_page_attr_set(addr, numpages, __pgprot(_PAGE_NX)); | 976 | return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0); |
897 | } | 977 | } |
898 | EXPORT_SYMBOL(set_memory_nx); | 978 | EXPORT_SYMBOL(set_memory_nx); |
899 | 979 | ||
900 | int set_memory_ro(unsigned long addr, int numpages) | 980 | int set_memory_ro(unsigned long addr, int numpages) |
901 | { | 981 | { |
902 | return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_RW)); | 982 | return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0); |
903 | } | 983 | } |
984 | EXPORT_SYMBOL_GPL(set_memory_ro); | ||
904 | 985 | ||
905 | int set_memory_rw(unsigned long addr, int numpages) | 986 | int set_memory_rw(unsigned long addr, int numpages) |
906 | { | 987 | { |
907 | return change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW)); | 988 | return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0); |
908 | } | 989 | } |
990 | EXPORT_SYMBOL_GPL(set_memory_rw); | ||
909 | 991 | ||
910 | int set_memory_np(unsigned long addr, int numpages) | 992 | int set_memory_np(unsigned long addr, int numpages) |
911 | { | 993 | { |
912 | return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT)); | 994 | return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_PRESENT), 0); |
913 | } | 995 | } |
914 | 996 | ||
915 | int set_memory_4k(unsigned long addr, int numpages) | 997 | int set_memory_4k(unsigned long addr, int numpages) |
916 | { | 998 | { |
917 | return change_page_attr_set_clr(addr, numpages, __pgprot(0), | 999 | return change_page_attr_set_clr(&addr, numpages, __pgprot(0), |
918 | __pgprot(0), 1); | 1000 | __pgprot(0), 1, 0); |
919 | } | 1001 | } |
920 | 1002 | ||
921 | int set_pages_uc(struct page *page, int numpages) | 1003 | int set_pages_uc(struct page *page, int numpages) |
@@ -968,22 +1050,38 @@ int set_pages_rw(struct page *page, int numpages) | |||
968 | 1050 | ||
969 | static int __set_pages_p(struct page *page, int numpages) | 1051 | static int __set_pages_p(struct page *page, int numpages) |
970 | { | 1052 | { |
971 | struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page), | 1053 | unsigned long tempaddr = (unsigned long) page_address(page); |
1054 | struct cpa_data cpa = { .vaddr = &tempaddr, | ||
972 | .numpages = numpages, | 1055 | .numpages = numpages, |
973 | .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), | 1056 | .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), |
974 | .mask_clr = __pgprot(0)}; | 1057 | .mask_clr = __pgprot(0), |
1058 | .flags = 0}; | ||
975 | 1059 | ||
976 | return __change_page_attr_set_clr(&cpa, 1); | 1060 | /* |
1061 | * No alias checking needed for setting present flag. otherwise, | ||
1062 | * we may need to break large pages for 64-bit kernel text | ||
1063 | * mappings (this adds to complexity if we want to do this from | ||
1064 | * atomic context especially). Let's keep it simple! | ||
1065 | */ | ||
1066 | return __change_page_attr_set_clr(&cpa, 0); | ||
977 | } | 1067 | } |
978 | 1068 | ||
979 | static int __set_pages_np(struct page *page, int numpages) | 1069 | static int __set_pages_np(struct page *page, int numpages) |
980 | { | 1070 | { |
981 | struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page), | 1071 | unsigned long tempaddr = (unsigned long) page_address(page); |
1072 | struct cpa_data cpa = { .vaddr = &tempaddr, | ||
982 | .numpages = numpages, | 1073 | .numpages = numpages, |
983 | .mask_set = __pgprot(0), | 1074 | .mask_set = __pgprot(0), |
984 | .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)}; | 1075 | .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), |
1076 | .flags = 0}; | ||
985 | 1077 | ||
986 | return __change_page_attr_set_clr(&cpa, 1); | 1078 | /* |
1079 | * No alias checking needed for setting not present flag. otherwise, | ||
1080 | * we may need to break large pages for 64-bit kernel text | ||
1081 | * mappings (this adds to complexity if we want to do this from | ||
1082 | * atomic context especially). Let's keep it simple! | ||
1083 | */ | ||
1084 | return __change_page_attr_set_clr(&cpa, 0); | ||
987 | } | 1085 | } |
988 | 1086 | ||
989 | void kernel_map_pages(struct page *page, int numpages, int enable) | 1087 | void kernel_map_pages(struct page *page, int numpages, int enable) |
@@ -1003,11 +1101,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable) | |||
1003 | 1101 | ||
1004 | /* | 1102 | /* |
1005 | * The return value is ignored as the calls cannot fail. | 1103 | * The return value is ignored as the calls cannot fail. |
1006 | * Large pages are kept enabled at boot time, and are | 1104 | * Large pages for identity mappings are not used at boot time |
1007 | * split up quickly with DEBUG_PAGEALLOC. If a splitup | 1105 | * and hence no memory allocations during large page split. |
1008 | * fails here (due to temporary memory shortage) no damage | ||
1009 | * is done because we just keep the largepage intact up | ||
1010 | * to the next attempt when it will likely be split up: | ||
1011 | */ | 1106 | */ |
1012 | if (enable) | 1107 | if (enable) |
1013 | __set_pages_p(page, numpages); | 1108 | __set_pages_p(page, numpages); |
@@ -1019,53 +1114,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable) | |||
1019 | * but that can deadlock->flush only current cpu: | 1114 | * but that can deadlock->flush only current cpu: |
1020 | */ | 1115 | */ |
1021 | __flush_tlb_all(); | 1116 | __flush_tlb_all(); |
1022 | |||
1023 | /* | ||
1024 | * Try to refill the page pool here. We can do this only after | ||
1025 | * the tlb flush. | ||
1026 | */ | ||
1027 | cpa_fill_pool(NULL); | ||
1028 | } | 1117 | } |
1029 | 1118 | ||
1030 | #ifdef CONFIG_DEBUG_FS | ||
1031 | static int dpa_show(struct seq_file *m, void *v) | ||
1032 | { | ||
1033 | seq_puts(m, "DEBUG_PAGEALLOC\n"); | ||
1034 | seq_printf(m, "pool_size : %lu\n", pool_size); | ||
1035 | seq_printf(m, "pool_pages : %lu\n", pool_pages); | ||
1036 | seq_printf(m, "pool_low : %lu\n", pool_low); | ||
1037 | seq_printf(m, "pool_used : %lu\n", pool_used); | ||
1038 | seq_printf(m, "pool_failed : %lu\n", pool_failed); | ||
1039 | |||
1040 | return 0; | ||
1041 | } | ||
1042 | |||
1043 | static int dpa_open(struct inode *inode, struct file *filp) | ||
1044 | { | ||
1045 | return single_open(filp, dpa_show, NULL); | ||
1046 | } | ||
1047 | |||
1048 | static const struct file_operations dpa_fops = { | ||
1049 | .open = dpa_open, | ||
1050 | .read = seq_read, | ||
1051 | .llseek = seq_lseek, | ||
1052 | .release = single_release, | ||
1053 | }; | ||
1054 | |||
1055 | static int __init debug_pagealloc_proc_init(void) | ||
1056 | { | ||
1057 | struct dentry *de; | ||
1058 | |||
1059 | de = debugfs_create_file("debug_pagealloc", 0600, NULL, NULL, | ||
1060 | &dpa_fops); | ||
1061 | if (!de) | ||
1062 | return -ENOMEM; | ||
1063 | |||
1064 | return 0; | ||
1065 | } | ||
1066 | __initcall(debug_pagealloc_proc_init); | ||
1067 | #endif | ||
1068 | |||
1069 | #ifdef CONFIG_HIBERNATION | 1119 | #ifdef CONFIG_HIBERNATION |
1070 | 1120 | ||
1071 | bool kernel_page_present(struct page *page) | 1121 | bool kernel_page_present(struct page *page) |