aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/mm/pageattr.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm/pageattr.c')
-rw-r--r--arch/x86/mm/pageattr.c490
1 files changed, 270 insertions, 220 deletions
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index 65c6e46bf059..a9ec89c3fbca 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -25,15 +25,27 @@
25 * The current flushing context - we pass it instead of 5 arguments: 25 * The current flushing context - we pass it instead of 5 arguments:
26 */ 26 */
27struct cpa_data { 27struct cpa_data {
28 unsigned long vaddr; 28 unsigned long *vaddr;
29 pgprot_t mask_set; 29 pgprot_t mask_set;
30 pgprot_t mask_clr; 30 pgprot_t mask_clr;
31 int numpages; 31 int numpages;
32 int flushtlb; 32 int flags;
33 unsigned long pfn; 33 unsigned long pfn;
34 unsigned force_split : 1; 34 unsigned force_split : 1;
35 int curpage;
35}; 36};
36 37
38/*
39 * Serialize cpa() (for !DEBUG_PAGEALLOC which uses large identity mappings)
40 * using cpa_lock. So that we don't allow any other cpu, with stale large tlb
41 * entries change the page attribute in parallel to some other cpu
42 * splitting a large page entry along with changing the attribute.
43 */
44static DEFINE_SPINLOCK(cpa_lock);
45
46#define CPA_FLUSHTLB 1
47#define CPA_ARRAY 2
48
37#ifdef CONFIG_PROC_FS 49#ifdef CONFIG_PROC_FS
38static unsigned long direct_pages_count[PG_LEVEL_NUM]; 50static unsigned long direct_pages_count[PG_LEVEL_NUM];
39 51
@@ -55,13 +67,19 @@ static void split_page_count(int level)
55 67
56int arch_report_meminfo(char *page) 68int arch_report_meminfo(char *page)
57{ 69{
58 int n = sprintf(page, "DirectMap4k: %8lu\n" 70 int n = sprintf(page, "DirectMap4k: %8lu kB\n",
59 "DirectMap2M: %8lu\n", 71 direct_pages_count[PG_LEVEL_4K] << 2);
60 direct_pages_count[PG_LEVEL_4K], 72#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
61 direct_pages_count[PG_LEVEL_2M]); 73 n += sprintf(page + n, "DirectMap2M: %8lu kB\n",
74 direct_pages_count[PG_LEVEL_2M] << 11);
75#else
76 n += sprintf(page + n, "DirectMap4M: %8lu kB\n",
77 direct_pages_count[PG_LEVEL_2M] << 12);
78#endif
62#ifdef CONFIG_X86_64 79#ifdef CONFIG_X86_64
63 n += sprintf(page + n, "DirectMap1G: %8lu\n", 80 if (direct_gbpages)
64 direct_pages_count[PG_LEVEL_1G]); 81 n += sprintf(page + n, "DirectMap1G: %8lu kB\n",
82 direct_pages_count[PG_LEVEL_1G] << 20);
65#endif 83#endif
66 return n; 84 return n;
67} 85}
@@ -78,7 +96,7 @@ static inline unsigned long highmap_start_pfn(void)
78 96
79static inline unsigned long highmap_end_pfn(void) 97static inline unsigned long highmap_end_pfn(void)
80{ 98{
81 return __pa(round_up((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT; 99 return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT;
82} 100}
83 101
84#endif 102#endif
@@ -184,6 +202,41 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
184 } 202 }
185} 203}
186 204
205static void cpa_flush_array(unsigned long *start, int numpages, int cache)
206{
207 unsigned int i, level;
208 unsigned long *addr;
209
210 BUG_ON(irqs_disabled());
211
212 on_each_cpu(__cpa_flush_range, NULL, 1);
213
214 if (!cache)
215 return;
216
217 /* 4M threshold */
218 if (numpages >= 1024) {
219 if (boot_cpu_data.x86_model >= 4)
220 wbinvd();
221 return;
222 }
223 /*
224 * We only need to flush on one CPU,
225 * clflush is a MESI-coherent instruction that
226 * will cause all other CPUs to flush the same
227 * cachelines:
228 */
229 for (i = 0, addr = start; i < numpages; i++, addr++) {
230 pte_t *pte = lookup_address(*addr, &level);
231
232 /*
233 * Only flush present addresses:
234 */
235 if (pte && (pte_val(*pte) & _PAGE_PRESENT))
236 clflush_cache_range((void *) *addr, PAGE_SIZE);
237 }
238}
239
187/* 240/*
188 * Certain areas of memory on x86 require very specific protection flags, 241 * Certain areas of memory on x86 require very specific protection flags,
189 * for example the BIOS area or kernel text. Callers don't always get this 242 * for example the BIOS area or kernel text. Callers don't always get this
@@ -392,7 +445,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
392 */ 445 */
393 new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot)); 446 new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
394 __set_pmd_pte(kpte, address, new_pte); 447 __set_pmd_pte(kpte, address, new_pte);
395 cpa->flushtlb = 1; 448 cpa->flags |= CPA_FLUSHTLB;
396 do_split = 0; 449 do_split = 0;
397 } 450 }
398 451
@@ -402,84 +455,6 @@ out_unlock:
402 return do_split; 455 return do_split;
403} 456}
404 457
405static LIST_HEAD(page_pool);
406static unsigned long pool_size, pool_pages, pool_low;
407static unsigned long pool_used, pool_failed;
408
409static void cpa_fill_pool(struct page **ret)
410{
411 gfp_t gfp = GFP_KERNEL;
412 unsigned long flags;
413 struct page *p;
414
415 /*
416 * Avoid recursion (on debug-pagealloc) and also signal
417 * our priority to get to these pagetables:
418 */
419 if (current->flags & PF_MEMALLOC)
420 return;
421 current->flags |= PF_MEMALLOC;
422
423 /*
424 * Allocate atomically from atomic contexts:
425 */
426 if (in_atomic() || irqs_disabled() || debug_pagealloc)
427 gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
428
429 while (pool_pages < pool_size || (ret && !*ret)) {
430 p = alloc_pages(gfp, 0);
431 if (!p) {
432 pool_failed++;
433 break;
434 }
435 /*
436 * If the call site needs a page right now, provide it:
437 */
438 if (ret && !*ret) {
439 *ret = p;
440 continue;
441 }
442 spin_lock_irqsave(&pgd_lock, flags);
443 list_add(&p->lru, &page_pool);
444 pool_pages++;
445 spin_unlock_irqrestore(&pgd_lock, flags);
446 }
447
448 current->flags &= ~PF_MEMALLOC;
449}
450
451#define SHIFT_MB (20 - PAGE_SHIFT)
452#define ROUND_MB_GB ((1 << 10) - 1)
453#define SHIFT_MB_GB 10
454#define POOL_PAGES_PER_GB 16
455
456void __init cpa_init(void)
457{
458 struct sysinfo si;
459 unsigned long gb;
460
461 si_meminfo(&si);
462 /*
463 * Calculate the number of pool pages:
464 *
465 * Convert totalram (nr of pages) to MiB and round to the next
466 * GiB. Shift MiB to Gib and multiply the result by
467 * POOL_PAGES_PER_GB:
468 */
469 if (debug_pagealloc) {
470 gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
471 pool_size = POOL_PAGES_PER_GB * gb;
472 } else {
473 pool_size = 1;
474 }
475 pool_low = pool_size;
476
477 cpa_fill_pool(NULL);
478 printk(KERN_DEBUG
479 "CPA: page pool initialized %lu of %lu pages preallocated\n",
480 pool_pages, pool_size);
481}
482
483static int split_large_page(pte_t *kpte, unsigned long address) 458static int split_large_page(pte_t *kpte, unsigned long address)
484{ 459{
485 unsigned long flags, pfn, pfninc = 1; 460 unsigned long flags, pfn, pfninc = 1;
@@ -488,28 +463,15 @@ static int split_large_page(pte_t *kpte, unsigned long address)
488 pgprot_t ref_prot; 463 pgprot_t ref_prot;
489 struct page *base; 464 struct page *base;
490 465
491 /* 466 if (!debug_pagealloc)
492 * Get a page from the pool. The pool list is protected by the 467 spin_unlock(&cpa_lock);
493 * pgd_lock, which we have to take anyway for the split 468 base = alloc_pages(GFP_KERNEL, 0);
494 * operation: 469 if (!debug_pagealloc)
495 */ 470 spin_lock(&cpa_lock);
496 spin_lock_irqsave(&pgd_lock, flags); 471 if (!base)
497 if (list_empty(&page_pool)) { 472 return -ENOMEM;
498 spin_unlock_irqrestore(&pgd_lock, flags);
499 base = NULL;
500 cpa_fill_pool(&base);
501 if (!base)
502 return -ENOMEM;
503 spin_lock_irqsave(&pgd_lock, flags);
504 } else {
505 base = list_first_entry(&page_pool, struct page, lru);
506 list_del(&base->lru);
507 pool_pages--;
508
509 if (pool_pages < pool_low)
510 pool_low = pool_pages;
511 }
512 473
474 spin_lock_irqsave(&pgd_lock, flags);
513 /* 475 /*
514 * Check for races, another CPU might have split this page 476 * Check for races, another CPU might have split this page
515 * up for us already: 477 * up for us already:
@@ -566,11 +528,8 @@ out_unlock:
566 * If we dropped out via the lookup_address check under 528 * If we dropped out via the lookup_address check under
567 * pgd_lock then stick the page back into the pool: 529 * pgd_lock then stick the page back into the pool:
568 */ 530 */
569 if (base) { 531 if (base)
570 list_add(&base->lru, &page_pool); 532 __free_page(base);
571 pool_pages++;
572 } else
573 pool_used++;
574 spin_unlock_irqrestore(&pgd_lock, flags); 533 spin_unlock_irqrestore(&pgd_lock, flags);
575 534
576 return 0; 535 return 0;
@@ -578,11 +537,16 @@ out_unlock:
578 537
579static int __change_page_attr(struct cpa_data *cpa, int primary) 538static int __change_page_attr(struct cpa_data *cpa, int primary)
580{ 539{
581 unsigned long address = cpa->vaddr; 540 unsigned long address;
582 int do_split, err; 541 int do_split, err;
583 unsigned int level; 542 unsigned int level;
584 pte_t *kpte, old_pte; 543 pte_t *kpte, old_pte;
585 544
545 if (cpa->flags & CPA_ARRAY)
546 address = cpa->vaddr[cpa->curpage];
547 else
548 address = *cpa->vaddr;
549
586repeat: 550repeat:
587 kpte = lookup_address(address, &level); 551 kpte = lookup_address(address, &level);
588 if (!kpte) 552 if (!kpte)
@@ -592,10 +556,9 @@ repeat:
592 if (!pte_val(old_pte)) { 556 if (!pte_val(old_pte)) {
593 if (!primary) 557 if (!primary)
594 return 0; 558 return 0;
595 printk(KERN_WARNING "CPA: called for zero pte. " 559 WARN(1, KERN_WARNING "CPA: called for zero pte. "
596 "vaddr = %lx cpa->vaddr = %lx\n", address, 560 "vaddr = %lx cpa->vaddr = %lx\n", address,
597 cpa->vaddr); 561 *cpa->vaddr);
598 WARN_ON(1);
599 return -EINVAL; 562 return -EINVAL;
600 } 563 }
601 564
@@ -621,7 +584,7 @@ repeat:
621 */ 584 */
622 if (pte_val(old_pte) != pte_val(new_pte)) { 585 if (pte_val(old_pte) != pte_val(new_pte)) {
623 set_pte_atomic(kpte, new_pte); 586 set_pte_atomic(kpte, new_pte);
624 cpa->flushtlb = 1; 587 cpa->flags |= CPA_FLUSHTLB;
625 } 588 }
626 cpa->numpages = 1; 589 cpa->numpages = 1;
627 return 0; 590 return 0;
@@ -645,7 +608,25 @@ repeat:
645 */ 608 */
646 err = split_large_page(kpte, address); 609 err = split_large_page(kpte, address);
647 if (!err) { 610 if (!err) {
648 cpa->flushtlb = 1; 611 /*
612 * Do a global flush tlb after splitting the large page
613 * and before we do the actual change page attribute in the PTE.
614 *
615 * With out this, we violate the TLB application note, that says
616 * "The TLBs may contain both ordinary and large-page
617 * translations for a 4-KByte range of linear addresses. This
618 * may occur if software modifies the paging structures so that
619 * the page size used for the address range changes. If the two
620 * translations differ with respect to page frame or attributes
621 * (e.g., permissions), processor behavior is undefined and may
622 * be implementation-specific."
623 *
624 * We do this global tlb flush inside the cpa_lock, so that we
625 * don't allow any other cpu, with stale tlb entries change the
626 * page attribute in parallel, that also falls into the
627 * just split large page entry.
628 */
629 flush_tlb_all();
649 goto repeat; 630 goto repeat;
650 } 631 }
651 632
@@ -658,6 +639,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
658{ 639{
659 struct cpa_data alias_cpa; 640 struct cpa_data alias_cpa;
660 int ret = 0; 641 int ret = 0;
642 unsigned long temp_cpa_vaddr, vaddr;
661 643
662 if (cpa->pfn >= max_pfn_mapped) 644 if (cpa->pfn >= max_pfn_mapped)
663 return 0; 645 return 0;
@@ -670,16 +652,24 @@ static int cpa_process_alias(struct cpa_data *cpa)
670 * No need to redo, when the primary call touched the direct 652 * No need to redo, when the primary call touched the direct
671 * mapping already: 653 * mapping already:
672 */ 654 */
673 if (!(within(cpa->vaddr, PAGE_OFFSET, 655 if (cpa->flags & CPA_ARRAY)
656 vaddr = cpa->vaddr[cpa->curpage];
657 else
658 vaddr = *cpa->vaddr;
659
660 if (!(within(vaddr, PAGE_OFFSET,
674 PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT)) 661 PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT))
675#ifdef CONFIG_X86_64 662#ifdef CONFIG_X86_64
676 || within(cpa->vaddr, PAGE_OFFSET + (1UL<<32), 663 || within(vaddr, PAGE_OFFSET + (1UL<<32),
677 PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)) 664 PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))
678#endif 665#endif
679 )) { 666 )) {
680 667
681 alias_cpa = *cpa; 668 alias_cpa = *cpa;
682 alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); 669 temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
670 alias_cpa.vaddr = &temp_cpa_vaddr;
671 alias_cpa.flags &= ~CPA_ARRAY;
672
683 673
684 ret = __change_page_attr_set_clr(&alias_cpa, 0); 674 ret = __change_page_attr_set_clr(&alias_cpa, 0);
685 } 675 }
@@ -691,7 +681,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
691 * No need to redo, when the primary call touched the high 681 * No need to redo, when the primary call touched the high
692 * mapping already: 682 * mapping already:
693 */ 683 */
694 if (within(cpa->vaddr, (unsigned long) _text, (unsigned long) _end)) 684 if (within(vaddr, (unsigned long) _text, (unsigned long) _end))
695 return 0; 685 return 0;
696 686
697 /* 687 /*
@@ -702,8 +692,9 @@ static int cpa_process_alias(struct cpa_data *cpa)
702 return 0; 692 return 0;
703 693
704 alias_cpa = *cpa; 694 alias_cpa = *cpa;
705 alias_cpa.vaddr = 695 temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base;
706 (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base; 696 alias_cpa.vaddr = &temp_cpa_vaddr;
697 alias_cpa.flags &= ~CPA_ARRAY;
707 698
708 /* 699 /*
709 * The high mapping range is imprecise, so ignore the return value. 700 * The high mapping range is imprecise, so ignore the return value.
@@ -723,8 +714,15 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
723 * preservation check. 714 * preservation check.
724 */ 715 */
725 cpa->numpages = numpages; 716 cpa->numpages = numpages;
717 /* for array changes, we can't use large page */
718 if (cpa->flags & CPA_ARRAY)
719 cpa->numpages = 1;
726 720
721 if (!debug_pagealloc)
722 spin_lock(&cpa_lock);
727 ret = __change_page_attr(cpa, checkalias); 723 ret = __change_page_attr(cpa, checkalias);
724 if (!debug_pagealloc)
725 spin_unlock(&cpa_lock);
728 if (ret) 726 if (ret)
729 return ret; 727 return ret;
730 728
@@ -741,7 +739,11 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
741 */ 739 */
742 BUG_ON(cpa->numpages > numpages); 740 BUG_ON(cpa->numpages > numpages);
743 numpages -= cpa->numpages; 741 numpages -= cpa->numpages;
744 cpa->vaddr += cpa->numpages * PAGE_SIZE; 742 if (cpa->flags & CPA_ARRAY)
743 cpa->curpage++;
744 else
745 *cpa->vaddr += cpa->numpages * PAGE_SIZE;
746
745 } 747 }
746 return 0; 748 return 0;
747} 749}
@@ -752,9 +754,9 @@ static inline int cache_attr(pgprot_t attr)
752 (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD); 754 (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD);
753} 755}
754 756
755static int change_page_attr_set_clr(unsigned long addr, int numpages, 757static int change_page_attr_set_clr(unsigned long *addr, int numpages,
756 pgprot_t mask_set, pgprot_t mask_clr, 758 pgprot_t mask_set, pgprot_t mask_clr,
757 int force_split) 759 int force_split, int array)
758{ 760{
759 struct cpa_data cpa; 761 struct cpa_data cpa;
760 int ret, cache, checkalias; 762 int ret, cache, checkalias;
@@ -769,21 +771,38 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
769 return 0; 771 return 0;
770 772
771 /* Ensure we are PAGE_SIZE aligned */ 773 /* Ensure we are PAGE_SIZE aligned */
772 if (addr & ~PAGE_MASK) { 774 if (!array) {
773 addr &= PAGE_MASK; 775 if (*addr & ~PAGE_MASK) {
774 /* 776 *addr &= PAGE_MASK;
775 * People should not be passing in unaligned addresses: 777 /*
776 */ 778 * People should not be passing in unaligned addresses:
777 WARN_ON_ONCE(1); 779 */
780 WARN_ON_ONCE(1);
781 }
782 } else {
783 int i;
784 for (i = 0; i < numpages; i++) {
785 if (addr[i] & ~PAGE_MASK) {
786 addr[i] &= PAGE_MASK;
787 WARN_ON_ONCE(1);
788 }
789 }
778 } 790 }
779 791
792 /* Must avoid aliasing mappings in the highmem code */
793 kmap_flush_unused();
794
780 cpa.vaddr = addr; 795 cpa.vaddr = addr;
781 cpa.numpages = numpages; 796 cpa.numpages = numpages;
782 cpa.mask_set = mask_set; 797 cpa.mask_set = mask_set;
783 cpa.mask_clr = mask_clr; 798 cpa.mask_clr = mask_clr;
784 cpa.flushtlb = 0; 799 cpa.flags = 0;
800 cpa.curpage = 0;
785 cpa.force_split = force_split; 801 cpa.force_split = force_split;
786 802
803 if (array)
804 cpa.flags |= CPA_ARRAY;
805
787 /* No alias checking for _NX bit modifications */ 806 /* No alias checking for _NX bit modifications */
788 checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX; 807 checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
789 808
@@ -792,7 +811,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
792 /* 811 /*
793 * Check whether we really changed something: 812 * Check whether we really changed something:
794 */ 813 */
795 if (!cpa.flushtlb) 814 if (!(cpa.flags & CPA_FLUSHTLB))
796 goto out; 815 goto out;
797 816
798 /* 817 /*
@@ -807,27 +826,30 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
807 * error case we fall back to cpa_flush_all (which uses 826 * error case we fall back to cpa_flush_all (which uses
808 * wbindv): 827 * wbindv):
809 */ 828 */
810 if (!ret && cpu_has_clflush) 829 if (!ret && cpu_has_clflush) {
811 cpa_flush_range(addr, numpages, cache); 830 if (cpa.flags & CPA_ARRAY)
812 else 831 cpa_flush_array(addr, numpages, cache);
832 else
833 cpa_flush_range(*addr, numpages, cache);
834 } else
813 cpa_flush_all(cache); 835 cpa_flush_all(cache);
814 836
815out: 837out:
816 cpa_fill_pool(NULL);
817
818 return ret; 838 return ret;
819} 839}
820 840
821static inline int change_page_attr_set(unsigned long addr, int numpages, 841static inline int change_page_attr_set(unsigned long *addr, int numpages,
822 pgprot_t mask) 842 pgprot_t mask, int array)
823{ 843{
824 return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0); 844 return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0,
845 array);
825} 846}
826 847
827static inline int change_page_attr_clear(unsigned long addr, int numpages, 848static inline int change_page_attr_clear(unsigned long *addr, int numpages,
828 pgprot_t mask) 849 pgprot_t mask, int array)
829{ 850{
830 return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0); 851 return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0,
852 array);
831} 853}
832 854
833int _set_memory_uc(unsigned long addr, int numpages) 855int _set_memory_uc(unsigned long addr, int numpages)
@@ -835,8 +857,8 @@ int _set_memory_uc(unsigned long addr, int numpages)
835 /* 857 /*
836 * for now UC MINUS. see comments in ioremap_nocache() 858 * for now UC MINUS. see comments in ioremap_nocache()
837 */ 859 */
838 return change_page_attr_set(addr, numpages, 860 return change_page_attr_set(&addr, numpages,
839 __pgprot(_PAGE_CACHE_UC_MINUS)); 861 __pgprot(_PAGE_CACHE_UC_MINUS), 0);
840} 862}
841 863
842int set_memory_uc(unsigned long addr, int numpages) 864int set_memory_uc(unsigned long addr, int numpages)
@@ -844,7 +866,7 @@ int set_memory_uc(unsigned long addr, int numpages)
844 /* 866 /*
845 * for now UC MINUS. see comments in ioremap_nocache() 867 * for now UC MINUS. see comments in ioremap_nocache()
846 */ 868 */
847 if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, 869 if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
848 _PAGE_CACHE_UC_MINUS, NULL)) 870 _PAGE_CACHE_UC_MINUS, NULL))
849 return -EINVAL; 871 return -EINVAL;
850 872
@@ -852,10 +874,48 @@ int set_memory_uc(unsigned long addr, int numpages)
852} 874}
853EXPORT_SYMBOL(set_memory_uc); 875EXPORT_SYMBOL(set_memory_uc);
854 876
877int set_memory_array_uc(unsigned long *addr, int addrinarray)
878{
879 unsigned long start;
880 unsigned long end;
881 int i;
882 /*
883 * for now UC MINUS. see comments in ioremap_nocache()
884 */
885 for (i = 0; i < addrinarray; i++) {
886 start = __pa(addr[i]);
887 for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
888 if (end != __pa(addr[i + 1]))
889 break;
890 i++;
891 }
892 if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
893 goto out;
894 }
895
896 return change_page_attr_set(addr, addrinarray,
897 __pgprot(_PAGE_CACHE_UC_MINUS), 1);
898out:
899 for (i = 0; i < addrinarray; i++) {
900 unsigned long tmp = __pa(addr[i]);
901
902 if (tmp == start)
903 break;
904 for (end = tmp + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
905 if (end != __pa(addr[i + 1]))
906 break;
907 i++;
908 }
909 free_memtype(tmp, end);
910 }
911 return -EINVAL;
912}
913EXPORT_SYMBOL(set_memory_array_uc);
914
855int _set_memory_wc(unsigned long addr, int numpages) 915int _set_memory_wc(unsigned long addr, int numpages)
856{ 916{
857 return change_page_attr_set(addr, numpages, 917 return change_page_attr_set(&addr, numpages,
858 __pgprot(_PAGE_CACHE_WC)); 918 __pgprot(_PAGE_CACHE_WC), 0);
859} 919}
860 920
861int set_memory_wc(unsigned long addr, int numpages) 921int set_memory_wc(unsigned long addr, int numpages)
@@ -863,7 +923,7 @@ int set_memory_wc(unsigned long addr, int numpages)
863 if (!pat_enabled) 923 if (!pat_enabled)
864 return set_memory_uc(addr, numpages); 924 return set_memory_uc(addr, numpages);
865 925
866 if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, 926 if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
867 _PAGE_CACHE_WC, NULL)) 927 _PAGE_CACHE_WC, NULL))
868 return -EINVAL; 928 return -EINVAL;
869 929
@@ -873,49 +933,71 @@ EXPORT_SYMBOL(set_memory_wc);
873 933
874int _set_memory_wb(unsigned long addr, int numpages) 934int _set_memory_wb(unsigned long addr, int numpages)
875{ 935{
876 return change_page_attr_clear(addr, numpages, 936 return change_page_attr_clear(&addr, numpages,
877 __pgprot(_PAGE_CACHE_MASK)); 937 __pgprot(_PAGE_CACHE_MASK), 0);
878} 938}
879 939
880int set_memory_wb(unsigned long addr, int numpages) 940int set_memory_wb(unsigned long addr, int numpages)
881{ 941{
882 free_memtype(addr, addr + numpages * PAGE_SIZE); 942 free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
883 943
884 return _set_memory_wb(addr, numpages); 944 return _set_memory_wb(addr, numpages);
885} 945}
886EXPORT_SYMBOL(set_memory_wb); 946EXPORT_SYMBOL(set_memory_wb);
887 947
948int set_memory_array_wb(unsigned long *addr, int addrinarray)
949{
950 int i;
951
952 for (i = 0; i < addrinarray; i++) {
953 unsigned long start = __pa(addr[i]);
954 unsigned long end;
955
956 for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
957 if (end != __pa(addr[i + 1]))
958 break;
959 i++;
960 }
961 free_memtype(start, end);
962 }
963 return change_page_attr_clear(addr, addrinarray,
964 __pgprot(_PAGE_CACHE_MASK), 1);
965}
966EXPORT_SYMBOL(set_memory_array_wb);
967
888int set_memory_x(unsigned long addr, int numpages) 968int set_memory_x(unsigned long addr, int numpages)
889{ 969{
890 return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_NX)); 970 return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0);
891} 971}
892EXPORT_SYMBOL(set_memory_x); 972EXPORT_SYMBOL(set_memory_x);
893 973
894int set_memory_nx(unsigned long addr, int numpages) 974int set_memory_nx(unsigned long addr, int numpages)
895{ 975{
896 return change_page_attr_set(addr, numpages, __pgprot(_PAGE_NX)); 976 return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0);
897} 977}
898EXPORT_SYMBOL(set_memory_nx); 978EXPORT_SYMBOL(set_memory_nx);
899 979
900int set_memory_ro(unsigned long addr, int numpages) 980int set_memory_ro(unsigned long addr, int numpages)
901{ 981{
902 return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_RW)); 982 return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
903} 983}
984EXPORT_SYMBOL_GPL(set_memory_ro);
904 985
905int set_memory_rw(unsigned long addr, int numpages) 986int set_memory_rw(unsigned long addr, int numpages)
906{ 987{
907 return change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW)); 988 return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
908} 989}
990EXPORT_SYMBOL_GPL(set_memory_rw);
909 991
910int set_memory_np(unsigned long addr, int numpages) 992int set_memory_np(unsigned long addr, int numpages)
911{ 993{
912 return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT)); 994 return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
913} 995}
914 996
915int set_memory_4k(unsigned long addr, int numpages) 997int set_memory_4k(unsigned long addr, int numpages)
916{ 998{
917 return change_page_attr_set_clr(addr, numpages, __pgprot(0), 999 return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
918 __pgprot(0), 1); 1000 __pgprot(0), 1, 0);
919} 1001}
920 1002
921int set_pages_uc(struct page *page, int numpages) 1003int set_pages_uc(struct page *page, int numpages)
@@ -968,22 +1050,38 @@ int set_pages_rw(struct page *page, int numpages)
968 1050
969static int __set_pages_p(struct page *page, int numpages) 1051static int __set_pages_p(struct page *page, int numpages)
970{ 1052{
971 struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page), 1053 unsigned long tempaddr = (unsigned long) page_address(page);
1054 struct cpa_data cpa = { .vaddr = &tempaddr,
972 .numpages = numpages, 1055 .numpages = numpages,
973 .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), 1056 .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
974 .mask_clr = __pgprot(0)}; 1057 .mask_clr = __pgprot(0),
1058 .flags = 0};
975 1059
976 return __change_page_attr_set_clr(&cpa, 1); 1060 /*
1061 * No alias checking needed for setting present flag. otherwise,
1062 * we may need to break large pages for 64-bit kernel text
1063 * mappings (this adds to complexity if we want to do this from
1064 * atomic context especially). Let's keep it simple!
1065 */
1066 return __change_page_attr_set_clr(&cpa, 0);
977} 1067}
978 1068
979static int __set_pages_np(struct page *page, int numpages) 1069static int __set_pages_np(struct page *page, int numpages)
980{ 1070{
981 struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page), 1071 unsigned long tempaddr = (unsigned long) page_address(page);
1072 struct cpa_data cpa = { .vaddr = &tempaddr,
982 .numpages = numpages, 1073 .numpages = numpages,
983 .mask_set = __pgprot(0), 1074 .mask_set = __pgprot(0),
984 .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)}; 1075 .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
1076 .flags = 0};
985 1077
986 return __change_page_attr_set_clr(&cpa, 1); 1078 /*
1079 * No alias checking needed for setting not present flag. otherwise,
1080 * we may need to break large pages for 64-bit kernel text
1081 * mappings (this adds to complexity if we want to do this from
1082 * atomic context especially). Let's keep it simple!
1083 */
1084 return __change_page_attr_set_clr(&cpa, 0);
987} 1085}
988 1086
989void kernel_map_pages(struct page *page, int numpages, int enable) 1087void kernel_map_pages(struct page *page, int numpages, int enable)
@@ -1003,11 +1101,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
1003 1101
1004 /* 1102 /*
1005 * The return value is ignored as the calls cannot fail. 1103 * The return value is ignored as the calls cannot fail.
1006 * Large pages are kept enabled at boot time, and are 1104 * Large pages for identity mappings are not used at boot time
1007 * split up quickly with DEBUG_PAGEALLOC. If a splitup 1105 * and hence no memory allocations during large page split.
1008 * fails here (due to temporary memory shortage) no damage
1009 * is done because we just keep the largepage intact up
1010 * to the next attempt when it will likely be split up:
1011 */ 1106 */
1012 if (enable) 1107 if (enable)
1013 __set_pages_p(page, numpages); 1108 __set_pages_p(page, numpages);
@@ -1019,53 +1114,8 @@ void kernel_map_pages(struct page *page, int numpages, int enable)
1019 * but that can deadlock->flush only current cpu: 1114 * but that can deadlock->flush only current cpu:
1020 */ 1115 */
1021 __flush_tlb_all(); 1116 __flush_tlb_all();
1022
1023 /*
1024 * Try to refill the page pool here. We can do this only after
1025 * the tlb flush.
1026 */
1027 cpa_fill_pool(NULL);
1028} 1117}
1029 1118
1030#ifdef CONFIG_DEBUG_FS
1031static int dpa_show(struct seq_file *m, void *v)
1032{
1033 seq_puts(m, "DEBUG_PAGEALLOC\n");
1034 seq_printf(m, "pool_size : %lu\n", pool_size);
1035 seq_printf(m, "pool_pages : %lu\n", pool_pages);
1036 seq_printf(m, "pool_low : %lu\n", pool_low);
1037 seq_printf(m, "pool_used : %lu\n", pool_used);
1038 seq_printf(m, "pool_failed : %lu\n", pool_failed);
1039
1040 return 0;
1041}
1042
1043static int dpa_open(struct inode *inode, struct file *filp)
1044{
1045 return single_open(filp, dpa_show, NULL);
1046}
1047
1048static const struct file_operations dpa_fops = {
1049 .open = dpa_open,
1050 .read = seq_read,
1051 .llseek = seq_lseek,
1052 .release = single_release,
1053};
1054
1055static int __init debug_pagealloc_proc_init(void)
1056{
1057 struct dentry *de;
1058
1059 de = debugfs_create_file("debug_pagealloc", 0600, NULL, NULL,
1060 &dpa_fops);
1061 if (!de)
1062 return -ENOMEM;
1063
1064 return 0;
1065}
1066__initcall(debug_pagealloc_proc_init);
1067#endif
1068
1069#ifdef CONFIG_HIBERNATION 1119#ifdef CONFIG_HIBERNATION
1070 1120
1071bool kernel_page_present(struct page *page) 1121bool kernel_page_present(struct page *page)