aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm/hugetlbpage.c
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2005-11-06 19:06:55 -0500
committerLinus Torvalds <torvalds@g5.osdl.org>2005-11-06 19:56:47 -0500
commit3c726f8dee6f55e96475574e9f645327e461884c (patch)
treef67c381e8f57959aa4a94bda4c68e24253cd8171 /arch/powerpc/mm/hugetlbpage.c
parentf912696ab330bf539231d1f8032320f2a08b850f (diff)
[PATCH] ppc64: support 64k pages
Adds a new CONFIG_PPC_64K_PAGES which, when enabled, changes the kernel base page size to 64K. The resulting kernel still boots on any hardware. On current machines with 4K pages support only, the kernel will maintain 16 "subpages" for each 64K page transparently. Note that while real 64K capable HW has been tested, the current patch will not enable it yet as such hardware is not released yet, and I'm still verifying with the firmware architects the proper to get the information from the newer hypervisors. Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/powerpc/mm/hugetlbpage.c')
-rw-r--r--arch/powerpc/mm/hugetlbpage.c134
1 files changed, 75 insertions, 59 deletions
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 0ea0994ed974..0073a04047e4 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -47,10 +47,25 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
47 pu = pud_offset(pg, addr); 47 pu = pud_offset(pg, addr);
48 if (!pud_none(*pu)) { 48 if (!pud_none(*pu)) {
49 pm = pmd_offset(pu, addr); 49 pm = pmd_offset(pu, addr);
50#ifdef CONFIG_PPC_64K_PAGES
51 /* Currently, we use the normal PTE offset within full
52 * size PTE pages, thus our huge PTEs are scattered in
53 * the PTE page and we do waste some. We may change
54 * that in the future, but the current mecanism keeps
55 * things much simpler
56 */
57 if (!pmd_none(*pm)) {
58 /* Note: pte_offset_* are all equivalent on
59 * ppc64 as we don't have HIGHMEM
60 */
61 pt = pte_offset_kernel(pm, addr);
62 return pt;
63 }
64#else /* CONFIG_PPC_64K_PAGES */
65 /* On 4k pages, we put huge PTEs in the PMD page */
50 pt = (pte_t *)pm; 66 pt = (pte_t *)pm;
51 BUG_ON(!pmd_none(*pm)
52 && !(pte_present(*pt) && pte_huge(*pt)));
53 return pt; 67 return pt;
68#endif /* CONFIG_PPC_64K_PAGES */
54 } 69 }
55 } 70 }
56 71
@@ -74,9 +89,16 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
74 if (pu) { 89 if (pu) {
75 pm = pmd_alloc(mm, pu, addr); 90 pm = pmd_alloc(mm, pu, addr);
76 if (pm) { 91 if (pm) {
92#ifdef CONFIG_PPC_64K_PAGES
93 /* See comment in huge_pte_offset. Note that if we ever
94 * want to put the page size in the PMD, we would have
95 * to open code our own pte_alloc* function in order
96 * to populate and set the size atomically
97 */
98 pt = pte_alloc_map(mm, pm, addr);
99#else /* CONFIG_PPC_64K_PAGES */
77 pt = (pte_t *)pm; 100 pt = (pte_t *)pm;
78 BUG_ON(!pmd_none(*pm) 101#endif /* CONFIG_PPC_64K_PAGES */
79 && !(pte_present(*pt) && pte_huge(*pt)));
80 return pt; 102 return pt;
81 } 103 }
82 } 104 }
@@ -84,35 +106,29 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr)
84 return NULL; 106 return NULL;
85} 107}
86 108
87#define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE)
88
89void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, 109void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
90 pte_t *ptep, pte_t pte) 110 pte_t *ptep, pte_t pte)
91{ 111{
92 int i;
93
94 if (pte_present(*ptep)) { 112 if (pte_present(*ptep)) {
95 pte_clear(mm, addr, ptep); 113 /* We open-code pte_clear because we need to pass the right
114 * argument to hpte_update (huge / !huge)
115 */
116 unsigned long old = pte_update(ptep, ~0UL);
117 if (old & _PAGE_HASHPTE)
118 hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1);
96 flush_tlb_pending(); 119 flush_tlb_pending();
97 } 120 }
98 121 *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
99 for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) {
100 *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
101 ptep++;
102 }
103} 122}
104 123
105pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, 124pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr,
106 pte_t *ptep) 125 pte_t *ptep)
107{ 126{
108 unsigned long old = pte_update(ptep, ~0UL); 127 unsigned long old = pte_update(ptep, ~0UL);
109 int i;
110 128
111 if (old & _PAGE_HASHPTE) 129 if (old & _PAGE_HASHPTE)
112 hpte_update(mm, addr, old, 0); 130 hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1);
113 131 *ptep = __pte(0);
114 for (i = 1; i < HUGEPTE_BATCH_SIZE; i++)
115 ptep[i] = __pte(0);
116 132
117 return __pte(old); 133 return __pte(old);
118} 134}
@@ -563,6 +579,8 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
563 int lastshift; 579 int lastshift;
564 u16 areamask, curareas; 580 u16 areamask, curareas;
565 581
582 if (HPAGE_SHIFT == 0)
583 return -EINVAL;
566 if (len & ~HPAGE_MASK) 584 if (len & ~HPAGE_MASK)
567 return -EINVAL; 585 return -EINVAL;
568 586
@@ -619,19 +637,15 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
619 unsigned long ea, unsigned long vsid, int local) 637 unsigned long ea, unsigned long vsid, int local)
620{ 638{
621 pte_t *ptep; 639 pte_t *ptep;
622 unsigned long va, vpn; 640 unsigned long old_pte, new_pte;
623 pte_t old_pte, new_pte; 641 unsigned long va, rflags, pa;
624 unsigned long rflags, prpn;
625 long slot; 642 long slot;
626 int err = 1; 643 int err = 1;
627 644
628 spin_lock(&mm->page_table_lock);
629
630 ptep = huge_pte_offset(mm, ea); 645 ptep = huge_pte_offset(mm, ea);
631 646
632 /* Search the Linux page table for a match with va */ 647 /* Search the Linux page table for a match with va */
633 va = (vsid << 28) | (ea & 0x0fffffff); 648 va = (vsid << 28) | (ea & 0x0fffffff);
634 vpn = va >> HPAGE_SHIFT;
635 649
636 /* 650 /*
637 * If no pte found or not present, send the problem up to 651 * If no pte found or not present, send the problem up to
@@ -640,8 +654,6 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
640 if (unlikely(!ptep || pte_none(*ptep))) 654 if (unlikely(!ptep || pte_none(*ptep)))
641 goto out; 655 goto out;
642 656
643/* BUG_ON(pte_bad(*ptep)); */
644
645 /* 657 /*
646 * Check the user's access rights to the page. If access should be 658 * Check the user's access rights to the page. If access should be
647 * prevented then send the problem up to do_page_fault. 659 * prevented then send the problem up to do_page_fault.
@@ -661,58 +673,64 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access,
661 */ 673 */
662 674
663 675
664 old_pte = *ptep; 676 do {
665 new_pte = old_pte; 677 old_pte = pte_val(*ptep);
666 678 if (old_pte & _PAGE_BUSY)
667 rflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW)); 679 goto out;
680 new_pte = old_pte | _PAGE_BUSY |
681 _PAGE_ACCESSED | _PAGE_HASHPTE;
682 } while(old_pte != __cmpxchg_u64((unsigned long *)ptep,
683 old_pte, new_pte));
684
685 rflags = 0x2 | (!(new_pte & _PAGE_RW));
668 /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ 686 /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */
669 rflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC); 687 rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N);
670 688
671 /* Check if pte already has an hpte (case 2) */ 689 /* Check if pte already has an hpte (case 2) */
672 if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) { 690 if (unlikely(old_pte & _PAGE_HASHPTE)) {
673 /* There MIGHT be an HPTE for this pte */ 691 /* There MIGHT be an HPTE for this pte */
674 unsigned long hash, slot; 692 unsigned long hash, slot;
675 693
676 hash = hpt_hash(vpn, 1); 694 hash = hpt_hash(va, HPAGE_SHIFT);
677 if (pte_val(old_pte) & _PAGE_SECONDARY) 695 if (old_pte & _PAGE_F_SECOND)
678 hash = ~hash; 696 hash = ~hash;
679 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; 697 slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
680 slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12; 698 slot += (old_pte & _PAGE_F_GIX) >> 12;
681 699
682 if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1) 700 if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1)
683 pte_val(old_pte) &= ~_PAGE_HPTEFLAGS; 701 old_pte &= ~_PAGE_HPTEFLAGS;
684 } 702 }
685 703
686 if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) { 704 if (likely(!(old_pte & _PAGE_HASHPTE))) {
687 unsigned long hash = hpt_hash(vpn, 1); 705 unsigned long hash = hpt_hash(va, HPAGE_SHIFT);
688 unsigned long hpte_group; 706 unsigned long hpte_group;
689 707
690 prpn = pte_pfn(old_pte); 708 pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT;
691 709
692repeat: 710repeat:
693 hpte_group = ((hash & htab_hash_mask) * 711 hpte_group = ((hash & htab_hash_mask) *
694 HPTES_PER_GROUP) & ~0x7UL; 712 HPTES_PER_GROUP) & ~0x7UL;
695 713
696 /* Update the linux pte with the HPTE slot */ 714 /* clear HPTE slot informations in new PTE */
697 pte_val(new_pte) &= ~_PAGE_HPTEFLAGS; 715 new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE;
698 pte_val(new_pte) |= _PAGE_HASHPTE;
699 716
700 /* Add in WIMG bits */ 717 /* Add in WIMG bits */
701 /* XXX We should store these in the pte */ 718 /* XXX We should store these in the pte */
719 /* --BenH: I think they are ... */
702 rflags |= _PAGE_COHERENT; 720 rflags |= _PAGE_COHERENT;
703 721
704 slot = ppc_md.hpte_insert(hpte_group, va, prpn, 722 /* Insert into the hash table, primary slot */
705 HPTE_V_LARGE, rflags); 723 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0,
724 mmu_huge_psize);
706 725
707 /* Primary is full, try the secondary */ 726 /* Primary is full, try the secondary */
708 if (unlikely(slot == -1)) { 727 if (unlikely(slot == -1)) {
709 pte_val(new_pte) |= _PAGE_SECONDARY; 728 new_pte |= _PAGE_F_SECOND;
710 hpte_group = ((~hash & htab_hash_mask) * 729 hpte_group = ((~hash & htab_hash_mask) *
711 HPTES_PER_GROUP) & ~0x7UL; 730 HPTES_PER_GROUP) & ~0x7UL;
712 slot = ppc_md.hpte_insert(hpte_group, va, prpn, 731 slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags,
713 HPTE_V_LARGE |
714 HPTE_V_SECONDARY, 732 HPTE_V_SECONDARY,
715 rflags); 733 mmu_huge_psize);
716 if (slot == -1) { 734 if (slot == -1) {
717 if (mftb() & 0x1) 735 if (mftb() & 0x1)
718 hpte_group = ((hash & htab_hash_mask) * 736 hpte_group = ((hash & htab_hash_mask) *
@@ -726,20 +744,18 @@ repeat:
726 if (unlikely(slot == -2)) 744 if (unlikely(slot == -2))
727 panic("hash_huge_page: pte_insert failed\n"); 745 panic("hash_huge_page: pte_insert failed\n");
728 746
729 pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX; 747 new_pte |= (slot << 12) & _PAGE_F_GIX;
730
731 /*
732 * No need to use ldarx/stdcx here because all who
733 * might be updating the pte will hold the
734 * page_table_lock
735 */
736 *ptep = new_pte;
737 } 748 }
738 749
750 /*
751 * No need to use ldarx/stdcx here because all who
752 * might be updating the pte will hold the
753 * page_table_lock
754 */
755 *ptep = __pte(new_pte & ~_PAGE_BUSY);
756
739 err = 0; 757 err = 0;
740 758
741 out: 759 out:
742 spin_unlock(&mm->page_table_lock);
743
744 return err; 760 return err;
745} 761}