diff options
Diffstat (limited to 'arch/x86/mm/pageattr.c')
-rw-r--r-- | arch/x86/mm/pageattr.c | 463 |
1 files changed, 254 insertions, 209 deletions
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 43e2f8483e4f..a9ec89c3fbca 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -25,15 +25,27 @@ | |||
25 | * The current flushing context - we pass it instead of 5 arguments: | 25 | * The current flushing context - we pass it instead of 5 arguments: |
26 | */ | 26 | */ |
27 | struct cpa_data { | 27 | struct cpa_data { |
28 | unsigned long vaddr; | 28 | unsigned long *vaddr; |
29 | pgprot_t mask_set; | 29 | pgprot_t mask_set; |
30 | pgprot_t mask_clr; | 30 | pgprot_t mask_clr; |
31 | int numpages; | 31 | int numpages; |
32 | int flushtlb; | 32 | int flags; |
33 | unsigned long pfn; | 33 | unsigned long pfn; |
34 | unsigned force_split : 1; | 34 | unsigned force_split : 1; |
35 | int curpage; | ||
35 | }; | 36 | }; |
36 | 37 | ||
38 | /* | ||
39 | * Serialize cpa() (for !DEBUG_PAGEALLOC which uses large identity mappings) | ||
40 | * using cpa_lock. So that we don't allow any other cpu, with stale large tlb | ||
41 | * entries change the page attribute in parallel to some other cpu | ||
42 | * splitting a large page entry along with changing the attribute. | ||
43 | */ | ||
44 | static DEFINE_SPINLOCK(cpa_lock); | ||
45 | |||
46 | #define CPA_FLUSHTLB 1 | ||
47 | #define CPA_ARRAY 2 | ||
48 | |||
37 | #ifdef CONFIG_PROC_FS | 49 | #ifdef CONFIG_PROC_FS |
38 | static unsigned long direct_pages_count[PG_LEVEL_NUM]; | 50 | static unsigned long direct_pages_count[PG_LEVEL_NUM]; |
39 | 51 | ||
@@ -84,7 +96,7 @@ static inline unsigned long highmap_start_pfn(void) | |||
84 | 96 | ||
85 | static inline unsigned long highmap_end_pfn(void) | 97 | static inline unsigned long highmap_end_pfn(void) |
86 | { | 98 | { |
87 | return __pa(round_up((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT; | 99 | return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT; |
88 | } | 100 | } |
89 | 101 | ||
90 | #endif | 102 | #endif |
@@ -190,6 +202,41 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache) | |||
190 | } | 202 | } |
191 | } | 203 | } |
192 | 204 | ||
205 | static void cpa_flush_array(unsigned long *start, int numpages, int cache) | ||
206 | { | ||
207 | unsigned int i, level; | ||
208 | unsigned long *addr; | ||
209 | |||
210 | BUG_ON(irqs_disabled()); | ||
211 | |||
212 | on_each_cpu(__cpa_flush_range, NULL, 1); | ||
213 | |||
214 | if (!cache) | ||
215 | return; | ||
216 | |||
217 | /* 4M threshold */ | ||
218 | if (numpages >= 1024) { | ||
219 | if (boot_cpu_data.x86_model >= 4) | ||
220 | wbinvd(); | ||
221 | return; | ||
222 | } | ||
223 | /* | ||
224 | * We only need to flush on one CPU, | ||
225 | * clflush is a MESI-coherent instruction that | ||
226 | * will cause all other CPUs to flush the same | ||
227 | * cachelines: | ||
228 | */ | ||
229 | for (i = 0, addr = start; i < numpages; i++, addr++) { | ||
230 | pte_t *pte = lookup_address(*addr, &level); | ||
231 | |||
232 | /* | ||
233 | * Only flush present addresses: | ||
234 | */ | ||
235 | if (pte && (pte_val(*pte) & _PAGE_PRESENT)) | ||
236 | clflush_cache_range((void *) *addr, PAGE_SIZE); | ||
237 | } | ||
238 | } | ||
239 | |||
193 | /* | 240 | /* |
194 | * Certain areas of memory on x86 require very specific protection flags, | 241 | * Certain areas of memory on x86 require very specific protection flags, |
195 | * for example the BIOS area or kernel text. Callers don't always get this | 242 | * for example the BIOS area or kernel text. Callers don't always get this |
@@ -398,7 +445,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
398 | */ | 445 | */ |
399 | new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot)); | 446 | new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot)); |
400 | __set_pmd_pte(kpte, address, new_pte); | 447 | __set_pmd_pte(kpte, address, new_pte); |
401 | cpa->flushtlb = 1; | 448 | cpa->flags |= CPA_FLUSHTLB; |
402 | do_split = 0; | 449 | do_split = 0; |
403 | } | 450 | } |
404 | 451 | ||
@@ -408,84 +455,6 @@ out_unlock: | |||
408 | return do_split; | 455 | return do_split; |
409 | } | 456 | } |
410 | 457 | ||
411 | static LIST_HEAD(page_pool); | ||
412 | static unsigned long pool_size, pool_pages, pool_low; | ||
413 | static unsigned long pool_used, pool_failed; | ||
414 | |||
415 | static void cpa_fill_pool(struct page **ret) | ||
416 | { | ||
417 | gfp_t gfp = GFP_KERNEL; | ||
418 | unsigned long flags; | ||
419 | struct page *p; | ||
420 | |||
421 | /* | ||
422 | * Avoid recursion (on debug-pagealloc) and also signal | ||
423 | * our priority to get to these pagetables: | ||
424 | */ | ||
425 | if (current->flags & PF_MEMALLOC) | ||
426 | return; | ||
427 | current->flags |= PF_MEMALLOC; | ||
428 | |||
429 | /* | ||
430 | * Allocate atomically from atomic contexts: | ||
431 | */ | ||
432 | if (in_atomic() || irqs_disabled() || debug_pagealloc) | ||
433 | gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN; | ||
434 | |||
435 | while (pool_pages < pool_size || (ret && !*ret)) { | ||
436 | p = alloc_pages(gfp, 0); | ||
437 | if (!p) { | ||
438 | pool_failed++; | ||
439 | break; | ||
440 | } | ||
441 | /* | ||
442 | * If the call site needs a page right now, provide it: | ||
443 | */ | ||
444 | if (ret && !*ret) { | ||
445 | *ret = p; | ||
446 | continue; | ||
447 | } | ||
448 | spin_lock_irqsave(&pgd_lock, flags); | ||
449 | list_add(&p->lru, &page_pool); | ||
450 | pool_pages++; | ||
451 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
452 | } | ||
453 | |||
454 | current->flags &= ~PF_MEMALLOC; | ||
455 | } | ||
456 | |||
457 | #define SHIFT_MB (20 - PAGE_SHIFT) | ||
458 | #define ROUND_MB_GB ((1 << 10) - 1) | ||
459 | #define SHIFT_MB_GB 10 | ||
460 | #define POOL_PAGES_PER_GB 16 | ||
461 | |||
462 | void __init cpa_init(void) | ||
463 | { | ||
464 | struct sysinfo si; | ||
465 | unsigned long gb; | ||
466 | |||
467 | si_meminfo(&si); | ||
468 | /* | ||
469 | * Calculate the number of pool pages: | ||
470 | * | ||
471 | * Convert totalram (nr of pages) to MiB and round to the next | ||
472 | * GiB. Shift MiB to Gib and multiply the result by | ||
473 | * POOL_PAGES_PER_GB: | ||
474 | */ | ||
475 | if (debug_pagealloc) { | ||
476 | gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB; | ||
477 | pool_size = POOL_PAGES_PER_GB * gb; | ||
478 | } else { | ||
479 | pool_size = 1; | ||
480 | } | ||
481 | pool_low = pool_size; | ||
482 | |||
483 | cpa_fill_pool(NULL); | ||
484 | printk(KERN_DEBUG | ||
485 | "CPA: page pool initialized %lu of %lu pages preallocated\n", | ||
486 | pool_pages, pool_size); | ||
487 | } | ||
488 | |||
489 | static int split_large_page(pte_t *kpte, unsigned long address) | 458 | static int split_large_page(pte_t *kpte, unsigned long address) |
490 | { | 459 | { |
491 | unsigned long flags, pfn, pfninc = 1; | 460 | unsigned long flags, pfn, pfninc = 1; |
@@ -494,28 +463,15 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
494 | pgprot_t ref_prot; | 463 | pgprot_t ref_prot; |
495 | struct page *base; | 464 | struct page *base; |
496 | 465 | ||
497 | /* | 466 | if (!debug_pagealloc) |
498 | * Get a page from the pool. The pool list is protected by the | 467 | spin_unlock(&cpa_lock); |
499 | * pgd_lock, which we have to take anyway for the split | 468 | base = alloc_pages(GFP_KERNEL, 0); |
500 | * operation: | 469 | if (!debug_pagealloc) |
501 | */ | 470 | spin_lock(&cpa_lock); |
502 | spin_lock_irqsave(&pgd_lock, flags); | 471 | if (!base) |
503 | if (list_empty(&page_pool)) { | 472 | return -ENOMEM; |
504 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
505 | base = NULL; | ||
506 | cpa_fill_pool(&base); | ||
507 | if (!base) | ||
508 | return -ENOMEM; | ||
509 | spin_lock_irqsave(&pgd_lock, flags); | ||
510 | } else { | ||
511 | base = list_first_entry(&page_pool, struct page, lru); | ||
512 | list_del(&base->lru); | ||
513 | pool_pages--; | ||
514 | |||
515 | if (pool_pages < pool_low) | ||
516 | pool_low = pool_pages; | ||
517 | } | ||
518 | 473 | ||
474 | spin_lock_irqsave(&pgd_lock, flags); | ||
519 | /* | 475 | /* |
520 | * Check for races, another CPU might have split this page | 476 | * Check for races, another CPU might have split this page |
521 | * up for us already: | 477 | * up for us already: |
@@ -572,11 +528,8 @@ out_unlock: | |||
572 | * If we dropped out via the lookup_address check under | 528 | * If we dropped out via the lookup_address check under |
573 | * pgd_lock then stick the page back into the pool: | 529 | * pgd_lock then stick the page back into the pool: |
574 | */ | 530 | */ |
575 | if (base) { | 531 | if (base) |
576 | list_add(&base->lru, &page_pool); | 532 | __free_page(base); |
577 | pool_pages++; | ||
578 | } else | ||
579 | pool_used++; | ||
580 | spin_unlock_irqrestore(&pgd_lock, flags); | 533 | spin_unlock_irqrestore(&pgd_lock, flags); |
581 | 534 | ||
582 | return 0; | 535 | return 0; |
@@ -584,11 +537,16 @@ out_unlock: | |||
584 | 537 | ||
585 | static int __change_page_attr(struct cpa_data *cpa, int primary) | 538 | static int __change_page_attr(struct cpa_data *cpa, int primary) |
586 | { | 539 | { |
587 | unsigned long address = cpa->vaddr; | 540 | unsigned long address; |
588 | int do_split, err; | 541 | int do_split, err; |
589 | unsigned int level; | 542 | unsigned int level; |
590 | pte_t *kpte, old_pte; | 543 | pte_t *kpte, old_pte; |
591 | 544 | ||
545 | if (cpa->flags & CPA_ARRAY) | ||
546 | address = cpa->vaddr[cpa->curpage]; | ||
547 | else | ||
548 | address = *cpa->vaddr; | ||
549 | |||
592 | repeat: | 550 | repeat: |
593 | kpte = lookup_address(address, &level); | 551 | kpte = lookup_address(address, &level); |
594 | if (!kpte) | 552 | if (!kpte) |
@@ -600,7 +558,7 @@ repeat: | |||
600 | return 0; | 558 | return 0; |
601 | WARN(1, KERN_WARNING "CPA: called for zero pte. " | 559 | WARN(1, KERN_WARNING "CPA: called for zero pte. " |
602 | "vaddr = %lx cpa->vaddr = %lx\n", address, | 560 | "vaddr = %lx cpa->vaddr = %lx\n", address, |
603 | cpa->vaddr); | 561 | *cpa->vaddr); |
604 | return -EINVAL; | 562 | return -EINVAL; |
605 | } | 563 | } |
606 | 564 | ||
@@ -626,7 +584,7 @@ repeat: | |||
626 | */ | 584 | */ |
627 | if (pte_val(old_pte) != pte_val(new_pte)) { | 585 | if (pte_val(old_pte) != pte_val(new_pte)) { |
628 | set_pte_atomic(kpte, new_pte); | 586 | set_pte_atomic(kpte, new_pte); |
629 | cpa->flushtlb = 1; | 587 | cpa->flags |= CPA_FLUSHTLB; |
630 | } | 588 | } |
631 | cpa->numpages = 1; | 589 | cpa->numpages = 1; |
632 | return 0; | 590 | return 0; |
@@ -650,7 +608,25 @@ repeat: | |||
650 | */ | 608 | */ |
651 | err = split_large_page(kpte, address); | 609 | err = split_large_page(kpte, address); |
652 | if (!err) { | 610 | if (!err) { |
653 | cpa->flushtlb = 1; | 611 | /* |
612 | * Do a global flush tlb after splitting the large page | ||
613 | * and before we do the actual change page attribute in the PTE. | ||
614 | * | ||
615 | * With out this, we violate the TLB application note, that says | ||
616 | * "The TLBs may contain both ordinary and large-page | ||
617 | * translations for a 4-KByte range of linear addresses. This | ||
618 | * may occur if software modifies the paging structures so that | ||
619 | * the page size used for the address range changes. If the two | ||
620 | * translations differ with respect to page frame or attributes | ||
621 | * (e.g., permissions), processor behavior is undefined and may | ||
622 | * be implementation-specific." | ||
623 | * | ||
624 | * We do this global tlb flush inside the cpa_lock, so that we | ||
625 | * don't allow any other cpu, with stale tlb entries change the | ||
626 | * page attribute in parallel, that also falls into the | ||
627 | * just split large page entry. | ||
628 | */ | ||
629 | flush_tlb_all(); | ||
654 | goto repeat; | 630 | goto repeat; |
655 | } | 631 | } |
656 | 632 | ||
@@ -663,6 +639,7 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
663 | { | 639 | { |
664 | struct cpa_data alias_cpa; | 640 | struct cpa_data alias_cpa; |
665 | int ret = 0; | 641 | int ret = 0; |
642 | unsigned long temp_cpa_vaddr, vaddr; | ||
666 | 643 | ||
667 | if (cpa->pfn >= max_pfn_mapped) | 644 | if (cpa->pfn >= max_pfn_mapped) |
668 | return 0; | 645 | return 0; |
@@ -675,16 +652,24 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
675 | * No need to redo, when the primary call touched the direct | 652 | * No need to redo, when the primary call touched the direct |
676 | * mapping already: | 653 | * mapping already: |
677 | */ | 654 | */ |
678 | if (!(within(cpa->vaddr, PAGE_OFFSET, | 655 | if (cpa->flags & CPA_ARRAY) |
656 | vaddr = cpa->vaddr[cpa->curpage]; | ||
657 | else | ||
658 | vaddr = *cpa->vaddr; | ||
659 | |||
660 | if (!(within(vaddr, PAGE_OFFSET, | ||
679 | PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT)) | 661 | PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT)) |
680 | #ifdef CONFIG_X86_64 | 662 | #ifdef CONFIG_X86_64 |
681 | || within(cpa->vaddr, PAGE_OFFSET + (1UL<<32), | 663 | || within(vaddr, PAGE_OFFSET + (1UL<<32), |
682 | PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)) | 664 | PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)) |
683 | #endif | 665 | #endif |
684 | )) { | 666 | )) { |
685 | 667 | ||
686 | alias_cpa = *cpa; | 668 | alias_cpa = *cpa; |
687 | alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); | 669 | temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); |
670 | alias_cpa.vaddr = &temp_cpa_vaddr; | ||
671 | alias_cpa.flags &= ~CPA_ARRAY; | ||
672 | |||
688 | 673 | ||
689 | ret = __change_page_attr_set_clr(&alias_cpa, 0); | 674 | ret = __change_page_attr_set_clr(&alias_cpa, 0); |
690 | } | 675 | } |
@@ -696,7 +681,7 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
696 | * No need to redo, when the primary call touched the high | 681 | * No need to redo, when the primary call touched the high |
697 | * mapping already: | 682 | * mapping already: |
698 | */ | 683 | */ |
699 | if (within(cpa->vaddr, (unsigned long) _text, (unsigned long) _end)) | 684 | if (within(vaddr, (unsigned long) _text, (unsigned long) _end)) |
700 | return 0; | 685 | return 0; |
701 | 686 | ||
702 | /* | 687 | /* |
@@ -707,8 +692,9 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
707 | return 0; | 692 | return 0; |
708 | 693 | ||
709 | alias_cpa = *cpa; | 694 | alias_cpa = *cpa; |
710 | alias_cpa.vaddr = | 695 | temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base; |
711 | (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base; | 696 | alias_cpa.vaddr = &temp_cpa_vaddr; |
697 | alias_cpa.flags &= ~CPA_ARRAY; | ||
712 | 698 | ||
713 | /* | 699 | /* |
714 | * The high mapping range is imprecise, so ignore the return value. | 700 | * The high mapping range is imprecise, so ignore the return value. |
@@ -728,8 +714,15 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) | |||
728 | * preservation check. | 714 | * preservation check. |
729 | */ | 715 | */ |
730 | cpa->numpages = numpages; | 716 | cpa->numpages = numpages; |
717 | /* for array changes, we can't use large page */ | ||
718 | if (cpa->flags & CPA_ARRAY) | ||
719 | cpa->numpages = 1; | ||
731 | 720 | ||
721 | if (!debug_pagealloc) | ||
722 | spin_lock(&cpa_lock); | ||
732 | ret = __change_page_attr(cpa, checkalias); | 723 | ret = __change_page_attr(cpa, checkalias); |
724 | if (!debug_pagealloc) | ||
725 | spin_unlock(&cpa_lock); | ||
733 | if (ret) | 726 | if (ret) |
734 | return ret; | 727 | return ret; |
735 | 728 | ||
@@ -746,7 +739,11 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias) | |||
746 | */ | 739 | */ |
747 | BUG_ON(cpa->numpages > numpages); | 740 | BUG_ON(cpa->numpages > numpages); |
748 | numpages -= cpa->numpages; | 741 | numpages -= cpa->numpages; |
749 | cpa->vaddr += cpa->numpages * PAGE_SIZE; | 742 | if (cpa->flags & CPA_ARRAY) |
743 | cpa->curpage++; | ||
744 | else | ||
745 | *cpa->vaddr += cpa->numpages * PAGE_SIZE; | ||
746 | |||
750 | } | 747 | } |
751 | return 0; | 748 | return 0; |
752 | } | 749 | } |
@@ -757,9 +754,9 @@ static inline int cache_attr(pgprot_t attr) | |||
757 | (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD); | 754 | (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD); |
758 | } | 755 | } |
759 | 756 | ||
760 | static int change_page_attr_set_clr(unsigned long addr, int numpages, | 757 | static int change_page_attr_set_clr(unsigned long *addr, int numpages, |
761 | pgprot_t mask_set, pgprot_t mask_clr, | 758 | pgprot_t mask_set, pgprot_t mask_clr, |
762 | int force_split) | 759 | int force_split, int array) |
763 | { | 760 | { |
764 | struct cpa_data cpa; | 761 | struct cpa_data cpa; |
765 | int ret, cache, checkalias; | 762 | int ret, cache, checkalias; |
@@ -774,21 +771,38 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, | |||
774 | return 0; | 771 | return 0; |
775 | 772 | ||
776 | /* Ensure we are PAGE_SIZE aligned */ | 773 | /* Ensure we are PAGE_SIZE aligned */ |
777 | if (addr & ~PAGE_MASK) { | 774 | if (!array) { |
778 | addr &= PAGE_MASK; | 775 | if (*addr & ~PAGE_MASK) { |
779 | /* | 776 | *addr &= PAGE_MASK; |
780 | * People should not be passing in unaligned addresses: | 777 | /* |
781 | */ | 778 | * People should not be passing in unaligned addresses: |
782 | WARN_ON_ONCE(1); | 779 | */ |
780 | WARN_ON_ONCE(1); | ||
781 | } | ||
782 | } else { | ||
783 | int i; | ||
784 | for (i = 0; i < numpages; i++) { | ||
785 | if (addr[i] & ~PAGE_MASK) { | ||
786 | addr[i] &= PAGE_MASK; | ||
787 | WARN_ON_ONCE(1); | ||
788 | } | ||
789 | } | ||
783 | } | 790 | } |
784 | 791 | ||
792 | /* Must avoid aliasing mappings in the highmem code */ | ||
793 | kmap_flush_unused(); | ||
794 | |||
785 | cpa.vaddr = addr; | 795 | cpa.vaddr = addr; |
786 | cpa.numpages = numpages; | 796 | cpa.numpages = numpages; |
787 | cpa.mask_set = mask_set; | 797 | cpa.mask_set = mask_set; |
788 | cpa.mask_clr = mask_clr; | 798 | cpa.mask_clr = mask_clr; |
789 | cpa.flushtlb = 0; | 799 | cpa.flags = 0; |
800 | cpa.curpage = 0; | ||
790 | cpa.force_split = force_split; | 801 | cpa.force_split = force_split; |
791 | 802 | ||
803 | if (array) | ||
804 | cpa.flags |= CPA_ARRAY; | ||
805 | |||
792 | /* No alias checking for _NX bit modifications */ | 806 | /* No alias checking for _NX bit modifications */ |
793 | checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; | 807 | checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; |
794 | 808 | ||
@@ -797,7 +811,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, | |||
797 | /* | 811 | /* |
798 | * Check whether we really changed something: | 812 | * Check whether we really changed something: |
799 | */ | 813 | */ |
800 | if (!cpa.flushtlb) | 814 | if (!(cpa.flags & CPA_FLUSHTLB)) |
801 | goto out; | 815 | goto out; |
802 | 816 | ||
803 | /* | 817 | /* |
@@ -812,27 +826,30 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages, | |||
812 | * error case we fall back to cpa_flush_all (which uses | 826 | * error case we fall back to cpa_flush_all (which uses |
813 | * wbindv): | 827 | * wbindv): |
814 | */ | 828 | */ |
815 | if (!ret && cpu_has_clflush) | 829 | if (!ret && cpu_has_clflush) { |
816 | cpa_flush_range(addr, numpages, cache); | 830 | if (cpa.flags & CPA_ARRAY) |
817 | else | 831 | cpa_flush_array(addr, numpages, cache); |
832 | else | ||
833 | cpa_flush_range(*addr, numpages, cache); | ||
834 | } else | ||
818 | cpa_flush_all(cache); | 835 | cpa_flush_all(cache); |
819 | 836 | ||
820 | out: | 837 | out: |
821 | cpa_fill_pool(NULL); | ||
822 | |||
823 | return ret; | 838 | return ret; |
824 | } | 839 | } |
825 | 840 | ||
826 | static inline int change_page_attr_set(unsigned long addr, int numpages, | 841 | static inline int change_page_attr_set(unsigned long *addr, int numpages, |
827 | pgprot_t mask) | 842 | pgprot_t mask, int array) |
828 | { | 843 | { |
829 | return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0); | 844 | return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0, |
845 | array); | ||
830 | } | 846 | } |
831 | 847 | ||
832 | static inline int change_page_attr_clear(unsigned long addr, int numpages, | 848 | static inline int change_page_attr_clear(unsigned long *addr, int numpages, |
833 | pgprot_t mask) | 849 | pgprot_t mask, int array) |
834 | { | 850 | { |
835 | return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0); | 851 | return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0, |
852 | array); | ||
836 | } | 853 | } |
837 | 854 | ||
838 | int _set_memory_uc(unsigned long addr, int numpages) | 855 | int _set_memory_uc(unsigned long addr, int numpages) |
@@ -840,8 +857,8 @@ int _set_memory_uc(unsigned long addr, int numpages) | |||
840 | /* | 857 | /* |
841 | * for now UC MINUS. see comments in ioremap_nocache() | 858 | * for now UC MINUS. see comments in ioremap_nocache() |
842 | */ | 859 | */ |
843 | return change_page_attr_set(addr, numpages, | 860 | return change_page_attr_set(&addr, numpages, |
844 | __pgprot(_PAGE_CACHE_UC_MINUS)); | 861 | __pgprot(_PAGE_CACHE_UC_MINUS), 0); |
845 | } | 862 | } |
846 | 863 | ||
847 | int set_memory_uc(unsigned long addr, int numpages) | 864 | int set_memory_uc(unsigned long addr, int numpages) |
@@ -857,10 +874,48 @@ int set_memory_uc(unsigned long addr, int numpages) | |||
857 | } | 874 | } |
858 | EXPORT_SYMBOL(set_memory_uc); | 875 | EXPORT_SYMBOL(set_memory_uc); |
859 | 876 | ||
877 | int set_memory_array_uc(unsigned long *addr, int addrinarray) | ||
878 | { | ||
879 | unsigned long start; | ||
880 | unsigned long end; | ||
881 | int i; | ||
882 | /* | ||
883 | * for now UC MINUS. see comments in ioremap_nocache() | ||
884 | */ | ||
885 | for (i = 0; i < addrinarray; i++) { | ||
886 | start = __pa(addr[i]); | ||
887 | for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) { | ||
888 | if (end != __pa(addr[i + 1])) | ||
889 | break; | ||
890 | i++; | ||
891 | } | ||
892 | if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL)) | ||
893 | goto out; | ||
894 | } | ||
895 | |||
896 | return change_page_attr_set(addr, addrinarray, | ||
897 | __pgprot(_PAGE_CACHE_UC_MINUS), 1); | ||
898 | out: | ||
899 | for (i = 0; i < addrinarray; i++) { | ||
900 | unsigned long tmp = __pa(addr[i]); | ||
901 | |||
902 | if (tmp == start) | ||
903 | break; | ||
904 | for (end = tmp + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) { | ||
905 | if (end != __pa(addr[i + 1])) | ||
906 | break; | ||
907 | i++; | ||
908 | } | ||
909 | free_memtype(tmp, end); | ||
910 | } | ||
911 | return -EINVAL; | ||
912 | } | ||
913 | EXPORT_SYMBOL(set_memory_array_uc); | ||
914 | |||
860 | int _set_memory_wc(unsigned long addr, int numpages) | 915 | int _set_memory_wc(unsigned long addr, int numpages) |
861 | { | 916 | { |
862 | return change_page_attr_set(addr, numpages, | 917 | return change_page_attr_set(&addr, numpages, |
863 | __pgprot(_PAGE_CACHE_WC)); | 918 | __pgprot(_PAGE_CACHE_WC), 0); |
864 | } | 919 | } |
865 | 920 | ||
866 | int set_memory_wc(unsigned long addr, int numpages) | 921 | int set_memory_wc(unsigned long addr, int numpages) |
@@ -878,8 +933,8 @@ EXPORT_SYMBOL(set_memory_wc); | |||
878 | 933 | ||
879 | int _set_memory_wb(unsigned long addr, int numpages) | 934 | int _set_memory_wb(unsigned long addr, int numpages) |
880 | { | 935 | { |
881 | return change_page_attr_clear(addr, numpages, | 936 | return change_page_attr_clear(&addr, numpages, |
882 | __pgprot(_PAGE_CACHE_MASK)); | 937 | __pgprot(_PAGE_CACHE_MASK), 0); |
883 | } | 938 | } |
884 | 939 | ||
885 | int set_memory_wb(unsigned long addr, int numpages) | 940 | int set_memory_wb(unsigned long addr, int numpages) |
@@ -890,37 +945,59 @@ int set_memory_wb(unsigned long addr, int numpages) | |||
890 | } | 945 | } |
891 | EXPORT_SYMBOL(set_memory_wb); | 946 | EXPORT_SYMBOL(set_memory_wb); |
892 | 947 | ||
948 | int set_memory_array_wb(unsigned long *addr, int addrinarray) | ||
949 | { | ||
950 | int i; | ||
951 | |||
952 | for (i = 0; i < addrinarray; i++) { | ||
953 | unsigned long start = __pa(addr[i]); | ||
954 | unsigned long end; | ||
955 | |||
956 | for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) { | ||
957 | if (end != __pa(addr[i + 1])) | ||
958 | break; | ||
959 | i++; | ||
960 | } | ||
961 | free_memtype(start, end); | ||
962 | } | ||
963 | return change_page_attr_clear(addr, addrinarray, | ||
964 | __pgprot(_PAGE_CACHE_MASK), 1); | ||
965 | } | ||
966 | EXPORT_SYMBOL(set_memory_array_wb); | ||
967 | |||
893 | int set_memory_x(unsigned long addr, int numpages) | 968 | int set_memory_x(unsigned long addr, int numpages) |
894 | { | 969 | { |
895 | return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_NX)); | 970 | return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0); |
896 | } | 971 | } |
897 | EXPORT_SYMBOL(set_memory_x); | 972 | EXPORT_SYMBOL(set_memory_x); |
898 | 973 | ||
899 | int set_memory_nx(unsigned long addr, int numpages) | 974 | int set_memory_nx(unsigned long addr, int numpages) |
900 | { | 975 | { |
901 | return change_page_attr_set(addr, numpages, __pgprot(_PAGE_NX)); | 976 | return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0); |
902 | } | 977 | } |
903 | EXPORT_SYMBOL(set_memory_nx); | 978 | EXPORT_SYMBOL(set_memory_nx); |
904 | 979 | ||
905 | int set_memory_ro(unsigned long addr, int numpages) | 980 | int set_memory_ro(unsigned long addr, int numpages) |
906 | { | 981 | { |
907 | return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_RW)); | 982 | return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0); |
908 | } | 983 | } |
984 | EXPORT_SYMBOL_GPL(set_memory_ro); | ||
909 | 985 | ||
910 | int set_memory_rw(unsigned long addr, int numpages) | 986 | int set_memory_rw(unsigned long addr, int numpages) |
911 | { | 987 | { |
912 | return change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW)); | 988 | return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0); |
913 | } | 989 | } |
990 | EXPORT_SYMBOL_GPL(set_memory_rw); | ||
914 | 991 | ||
915 | int set_memory_np(unsigned long addr, int numpages) | 992 | int set_memory_np(unsigned long addr, int numpages) |
916 | { | 993 | { |
917 | return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT)); | 994 | return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_PRESENT), 0); |
918 | } | 995 | } |
919 | 996 | ||
920 | int set_memory_4k(unsigned long addr, int numpages) | 997 | int set_memory_4k(unsigned long addr, int numpages) |
921 | { | 998 | { |
922 | return change_page_attr_set_clr(addr, numpages, __pgprot(0), | 999 | return change_page_attr_set_clr(&addr, numpages, __pgprot(0), |
923 | __pgprot(0), 1); | 1000 | __pgprot(0), 1, 0); |
924 | } | 1001 | } |
925 | 1002 | ||
926 | int set_pages_uc(struct page *page, int numpages) | 1003 | int set_pages_uc(struct page *page, int numpages) |
@@ -973,22 +1050,38 @@ int set_pages_rw(struct page *page, int numpages) | |||
973 | 1050 | ||
974 | static int __set_pages_p(struct page *page, int numpages) | 1051 | static int __set_pages_p(struct page *page, int numpages) |
975 | { | 1052 | { |
976 | struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page), | 1053 | unsigned long tempaddr = (unsigned long) page_address(page); |
1054 | struct cpa_data cpa = { .vaddr = &tempaddr, | ||
977 | .numpages = numpages, | 1055 | .numpages = numpages, |
978 | .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), | 1056 | .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), |
979 | .mask_clr = __pgprot(0)}; | 1057 | .mask_clr = __pgprot(0), |
1058 | .flags = 0}; | ||
980 | 1059 | ||
981 | return __change_page_attr_set_clr(&cpa, 1); | 1060 | /* |
1061 | * No alias checking needed for setting present flag. otherwise, | ||
1062 | * we may need to break large pages for 64-bit kernel text | ||
1063 | * mappings (this adds to complexity if we want to do this from | ||
1064 | * atomic context especially). Let's keep it simple! | ||
1065 | */ | ||
1066 | return __change_page_attr_set_clr(&cpa, 0); | ||
982 | } | 1067 | } |
983 | 1068 | ||
984 | static int __set_pages_np(struct page *page, int numpages) | 1069 | static int __set_pages_np(struct page *page, int numpages) |
985 | { | 1070 | { |
986 | struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page), | 1071 | unsigned long tempaddr = (unsigned long) page_address(page); |
1072 | struct cpa_data cpa = { .vaddr = &tempaddr, | ||
987 | .numpages = numpages, | 1073 | .numpages = numpages, |
988 | .mask_set = __pgprot(0), | 1074 | .mask_set = __pgprot(0), |
989 | .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)}; | 1075 | .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), |
1076 | .flags = 0}; | ||
990 | 1077 | ||
991 | return __change_page_attr_set_clr(&cpa, 1); | 1078 | /* |
1079 | * No alias checking needed for setting not present flag. otherwise, | ||
1080 | * we may need to break large pages for 64-bit kernel text | ||
1081 | * mappings (this adds to complexity if we want to do this from | ||
1082 | * atomic context especially). Let's keep it simple! | ||
1083 | */ | ||
1084 | return __change_page_attr_set_clr(&cpa, 0); | ||
992 | } | 1085 | } |
993 | 1086 | ||
994 | void kernel_map_pages(struct page *page, int numpages, int enable) | 1087 | void kernel_map_pages(struct page *page, int numpages, int enable) |
@@ -1008,11 +1101,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable) | |||
1008 | 1101 | ||
1009 | /* | 1102 | /* |
1010 | * The return value is ignored as the calls cannot fail. | 1103 | * The return value is ignored as the calls cannot fail. |
1011 | * Large pages are kept enabled at boot time, and are | 1104 | * Large pages for identity mappings are not used at boot time |
1012 | * split up quickly with DEBUG_PAGEALLOC. If a splitup | 1105 | * and hence no memory allocations during large page split. |
1013 | * fails here (due to temporary memory shortage) no damage | ||
1014 | * is done because we just keep the largepage intact up | ||
1015 | * to the next attempt when it will likely be split up: | ||
1016 | */ | 1106 | */ |
1017 | if (enable) | 1107 | if (enable) |
1018 | __set_pages_p(page, numpages); | 1108 | __set_pages_p(page, numpages); |
@@ -1024,53 +1114,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable) | |||
1024 | * but that can deadlock->flush only current cpu: | 1114 | * but that can deadlock->flush only current cpu: |
1025 | */ | 1115 | */ |
1026 | __flush_tlb_all(); | 1116 | __flush_tlb_all(); |
1027 | |||
1028 | /* | ||
1029 | * Try to refill the page pool here. We can do this only after | ||
1030 | * the tlb flush. | ||
1031 | */ | ||
1032 | cpa_fill_pool(NULL); | ||
1033 | } | 1117 | } |
1034 | 1118 | ||
1035 | #ifdef CONFIG_DEBUG_FS | ||
1036 | static int dpa_show(struct seq_file *m, void *v) | ||
1037 | { | ||
1038 | seq_puts(m, "DEBUG_PAGEALLOC\n"); | ||
1039 | seq_printf(m, "pool_size : %lu\n", pool_size); | ||
1040 | seq_printf(m, "pool_pages : %lu\n", pool_pages); | ||
1041 | seq_printf(m, "pool_low : %lu\n", pool_low); | ||
1042 | seq_printf(m, "pool_used : %lu\n", pool_used); | ||
1043 | seq_printf(m, "pool_failed : %lu\n", pool_failed); | ||
1044 | |||
1045 | return 0; | ||
1046 | } | ||
1047 | |||
1048 | static int dpa_open(struct inode *inode, struct file *filp) | ||
1049 | { | ||
1050 | return single_open(filp, dpa_show, NULL); | ||
1051 | } | ||
1052 | |||
1053 | static const struct file_operations dpa_fops = { | ||
1054 | .open = dpa_open, | ||
1055 | .read = seq_read, | ||
1056 | .llseek = seq_lseek, | ||
1057 | .release = single_release, | ||
1058 | }; | ||
1059 | |||
1060 | static int __init debug_pagealloc_proc_init(void) | ||
1061 | { | ||
1062 | struct dentry *de; | ||
1063 | |||
1064 | de = debugfs_create_file("debug_pagealloc", 0600, NULL, NULL, | ||
1065 | &dpa_fops); | ||
1066 | if (!de) | ||
1067 | return -ENOMEM; | ||
1068 | |||
1069 | return 0; | ||
1070 | } | ||
1071 | __initcall(debug_pagealloc_proc_init); | ||
1072 | #endif | ||
1073 | |||
1074 | #ifdef CONFIG_HIBERNATION | 1119 | #ifdef CONFIG_HIBERNATION |
1075 | 1120 | ||
1076 | bool kernel_page_present(struct page *page) | 1121 | bool kernel_page_present(struct page *page) |