diff options
author | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2005-11-06 19:06:55 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-11-06 19:56:47 -0500 |
commit | 3c726f8dee6f55e96475574e9f645327e461884c (patch) | |
tree | f67c381e8f57959aa4a94bda4c68e24253cd8171 /arch/powerpc/mm/hugetlbpage.c | |
parent | f912696ab330bf539231d1f8032320f2a08b850f (diff) |
[PATCH] ppc64: support 64k pages
Adds a new CONFIG_PPC_64K_PAGES which, when enabled, changes the kernel
base page size to 64K. The resulting kernel still boots on any
hardware. On current machines with 4K pages support only, the kernel
will maintain 16 "subpages" for each 64K page transparently.
Note that while real 64K capable HW has been tested, the current patch
will not enable it yet as such hardware is not released yet, and I'm
still verifying with the firmware architects the proper to get the
information from the newer hypervisors.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/powerpc/mm/hugetlbpage.c')
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 134 |
1 files changed, 75 insertions, 59 deletions
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 0ea0994ed974..0073a04047e4 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c | |||
@@ -47,10 +47,25 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | |||
47 | pu = pud_offset(pg, addr); | 47 | pu = pud_offset(pg, addr); |
48 | if (!pud_none(*pu)) { | 48 | if (!pud_none(*pu)) { |
49 | pm = pmd_offset(pu, addr); | 49 | pm = pmd_offset(pu, addr); |
50 | #ifdef CONFIG_PPC_64K_PAGES | ||
51 | /* Currently, we use the normal PTE offset within full | ||
52 | * size PTE pages, thus our huge PTEs are scattered in | ||
53 | * the PTE page and we do waste some. We may change | ||
54 | * that in the future, but the current mecanism keeps | ||
55 | * things much simpler | ||
56 | */ | ||
57 | if (!pmd_none(*pm)) { | ||
58 | /* Note: pte_offset_* are all equivalent on | ||
59 | * ppc64 as we don't have HIGHMEM | ||
60 | */ | ||
61 | pt = pte_offset_kernel(pm, addr); | ||
62 | return pt; | ||
63 | } | ||
64 | #else /* CONFIG_PPC_64K_PAGES */ | ||
65 | /* On 4k pages, we put huge PTEs in the PMD page */ | ||
50 | pt = (pte_t *)pm; | 66 | pt = (pte_t *)pm; |
51 | BUG_ON(!pmd_none(*pm) | ||
52 | && !(pte_present(*pt) && pte_huge(*pt))); | ||
53 | return pt; | 67 | return pt; |
68 | #endif /* CONFIG_PPC_64K_PAGES */ | ||
54 | } | 69 | } |
55 | } | 70 | } |
56 | 71 | ||
@@ -74,9 +89,16 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | |||
74 | if (pu) { | 89 | if (pu) { |
75 | pm = pmd_alloc(mm, pu, addr); | 90 | pm = pmd_alloc(mm, pu, addr); |
76 | if (pm) { | 91 | if (pm) { |
92 | #ifdef CONFIG_PPC_64K_PAGES | ||
93 | /* See comment in huge_pte_offset. Note that if we ever | ||
94 | * want to put the page size in the PMD, we would have | ||
95 | * to open code our own pte_alloc* function in order | ||
96 | * to populate and set the size atomically | ||
97 | */ | ||
98 | pt = pte_alloc_map(mm, pm, addr); | ||
99 | #else /* CONFIG_PPC_64K_PAGES */ | ||
77 | pt = (pte_t *)pm; | 100 | pt = (pte_t *)pm; |
78 | BUG_ON(!pmd_none(*pm) | 101 | #endif /* CONFIG_PPC_64K_PAGES */ |
79 | && !(pte_present(*pt) && pte_huge(*pt))); | ||
80 | return pt; | 102 | return pt; |
81 | } | 103 | } |
82 | } | 104 | } |
@@ -84,35 +106,29 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | |||
84 | return NULL; | 106 | return NULL; |
85 | } | 107 | } |
86 | 108 | ||
87 | #define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE) | ||
88 | |||
89 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, | 109 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, |
90 | pte_t *ptep, pte_t pte) | 110 | pte_t *ptep, pte_t pte) |
91 | { | 111 | { |
92 | int i; | ||
93 | |||
94 | if (pte_present(*ptep)) { | 112 | if (pte_present(*ptep)) { |
95 | pte_clear(mm, addr, ptep); | 113 | /* We open-code pte_clear because we need to pass the right |
114 | * argument to hpte_update (huge / !huge) | ||
115 | */ | ||
116 | unsigned long old = pte_update(ptep, ~0UL); | ||
117 | if (old & _PAGE_HASHPTE) | ||
118 | hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1); | ||
96 | flush_tlb_pending(); | 119 | flush_tlb_pending(); |
97 | } | 120 | } |
98 | 121 | *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); | |
99 | for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) { | ||
100 | *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); | ||
101 | ptep++; | ||
102 | } | ||
103 | } | 122 | } |
104 | 123 | ||
105 | pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, | 124 | pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, |
106 | pte_t *ptep) | 125 | pte_t *ptep) |
107 | { | 126 | { |
108 | unsigned long old = pte_update(ptep, ~0UL); | 127 | unsigned long old = pte_update(ptep, ~0UL); |
109 | int i; | ||
110 | 128 | ||
111 | if (old & _PAGE_HASHPTE) | 129 | if (old & _PAGE_HASHPTE) |
112 | hpte_update(mm, addr, old, 0); | 130 | hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1); |
113 | 131 | *ptep = __pte(0); | |
114 | for (i = 1; i < HUGEPTE_BATCH_SIZE; i++) | ||
115 | ptep[i] = __pte(0); | ||
116 | 132 | ||
117 | return __pte(old); | 133 | return __pte(old); |
118 | } | 134 | } |
@@ -563,6 +579,8 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
563 | int lastshift; | 579 | int lastshift; |
564 | u16 areamask, curareas; | 580 | u16 areamask, curareas; |
565 | 581 | ||
582 | if (HPAGE_SHIFT == 0) | ||
583 | return -EINVAL; | ||
566 | if (len & ~HPAGE_MASK) | 584 | if (len & ~HPAGE_MASK) |
567 | return -EINVAL; | 585 | return -EINVAL; |
568 | 586 | ||
@@ -619,19 +637,15 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, | |||
619 | unsigned long ea, unsigned long vsid, int local) | 637 | unsigned long ea, unsigned long vsid, int local) |
620 | { | 638 | { |
621 | pte_t *ptep; | 639 | pte_t *ptep; |
622 | unsigned long va, vpn; | 640 | unsigned long old_pte, new_pte; |
623 | pte_t old_pte, new_pte; | 641 | unsigned long va, rflags, pa; |
624 | unsigned long rflags, prpn; | ||
625 | long slot; | 642 | long slot; |
626 | int err = 1; | 643 | int err = 1; |
627 | 644 | ||
628 | spin_lock(&mm->page_table_lock); | ||
629 | |||
630 | ptep = huge_pte_offset(mm, ea); | 645 | ptep = huge_pte_offset(mm, ea); |
631 | 646 | ||
632 | /* Search the Linux page table for a match with va */ | 647 | /* Search the Linux page table for a match with va */ |
633 | va = (vsid << 28) | (ea & 0x0fffffff); | 648 | va = (vsid << 28) | (ea & 0x0fffffff); |
634 | vpn = va >> HPAGE_SHIFT; | ||
635 | 649 | ||
636 | /* | 650 | /* |
637 | * If no pte found or not present, send the problem up to | 651 | * If no pte found or not present, send the problem up to |
@@ -640,8 +654,6 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, | |||
640 | if (unlikely(!ptep || pte_none(*ptep))) | 654 | if (unlikely(!ptep || pte_none(*ptep))) |
641 | goto out; | 655 | goto out; |
642 | 656 | ||
643 | /* BUG_ON(pte_bad(*ptep)); */ | ||
644 | |||
645 | /* | 657 | /* |
646 | * Check the user's access rights to the page. If access should be | 658 | * Check the user's access rights to the page. If access should be |
647 | * prevented then send the problem up to do_page_fault. | 659 | * prevented then send the problem up to do_page_fault. |
@@ -661,58 +673,64 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, | |||
661 | */ | 673 | */ |
662 | 674 | ||
663 | 675 | ||
664 | old_pte = *ptep; | 676 | do { |
665 | new_pte = old_pte; | 677 | old_pte = pte_val(*ptep); |
666 | 678 | if (old_pte & _PAGE_BUSY) | |
667 | rflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW)); | 679 | goto out; |
680 | new_pte = old_pte | _PAGE_BUSY | | ||
681 | _PAGE_ACCESSED | _PAGE_HASHPTE; | ||
682 | } while(old_pte != __cmpxchg_u64((unsigned long *)ptep, | ||
683 | old_pte, new_pte)); | ||
684 | |||
685 | rflags = 0x2 | (!(new_pte & _PAGE_RW)); | ||
668 | /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ | 686 | /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ |
669 | rflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC); | 687 | rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); |
670 | 688 | ||
671 | /* Check if pte already has an hpte (case 2) */ | 689 | /* Check if pte already has an hpte (case 2) */ |
672 | if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) { | 690 | if (unlikely(old_pte & _PAGE_HASHPTE)) { |
673 | /* There MIGHT be an HPTE for this pte */ | 691 | /* There MIGHT be an HPTE for this pte */ |
674 | unsigned long hash, slot; | 692 | unsigned long hash, slot; |
675 | 693 | ||
676 | hash = hpt_hash(vpn, 1); | 694 | hash = hpt_hash(va, HPAGE_SHIFT); |
677 | if (pte_val(old_pte) & _PAGE_SECONDARY) | 695 | if (old_pte & _PAGE_F_SECOND) |
678 | hash = ~hash; | 696 | hash = ~hash; |
679 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | 697 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; |
680 | slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12; | 698 | slot += (old_pte & _PAGE_F_GIX) >> 12; |
681 | 699 | ||
682 | if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1) | 700 | if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1) |
683 | pte_val(old_pte) &= ~_PAGE_HPTEFLAGS; | 701 | old_pte &= ~_PAGE_HPTEFLAGS; |
684 | } | 702 | } |
685 | 703 | ||
686 | if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) { | 704 | if (likely(!(old_pte & _PAGE_HASHPTE))) { |
687 | unsigned long hash = hpt_hash(vpn, 1); | 705 | unsigned long hash = hpt_hash(va, HPAGE_SHIFT); |
688 | unsigned long hpte_group; | 706 | unsigned long hpte_group; |
689 | 707 | ||
690 | prpn = pte_pfn(old_pte); | 708 | pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; |
691 | 709 | ||
692 | repeat: | 710 | repeat: |
693 | hpte_group = ((hash & htab_hash_mask) * | 711 | hpte_group = ((hash & htab_hash_mask) * |
694 | HPTES_PER_GROUP) & ~0x7UL; | 712 | HPTES_PER_GROUP) & ~0x7UL; |
695 | 713 | ||
696 | /* Update the linux pte with the HPTE slot */ | 714 | /* clear HPTE slot informations in new PTE */ |
697 | pte_val(new_pte) &= ~_PAGE_HPTEFLAGS; | 715 | new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; |
698 | pte_val(new_pte) |= _PAGE_HASHPTE; | ||
699 | 716 | ||
700 | /* Add in WIMG bits */ | 717 | /* Add in WIMG bits */ |
701 | /* XXX We should store these in the pte */ | 718 | /* XXX We should store these in the pte */ |
719 | /* --BenH: I think they are ... */ | ||
702 | rflags |= _PAGE_COHERENT; | 720 | rflags |= _PAGE_COHERENT; |
703 | 721 | ||
704 | slot = ppc_md.hpte_insert(hpte_group, va, prpn, | 722 | /* Insert into the hash table, primary slot */ |
705 | HPTE_V_LARGE, rflags); | 723 | slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, |
724 | mmu_huge_psize); | ||
706 | 725 | ||
707 | /* Primary is full, try the secondary */ | 726 | /* Primary is full, try the secondary */ |
708 | if (unlikely(slot == -1)) { | 727 | if (unlikely(slot == -1)) { |
709 | pte_val(new_pte) |= _PAGE_SECONDARY; | 728 | new_pte |= _PAGE_F_SECOND; |
710 | hpte_group = ((~hash & htab_hash_mask) * | 729 | hpte_group = ((~hash & htab_hash_mask) * |
711 | HPTES_PER_GROUP) & ~0x7UL; | 730 | HPTES_PER_GROUP) & ~0x7UL; |
712 | slot = ppc_md.hpte_insert(hpte_group, va, prpn, | 731 | slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, |
713 | HPTE_V_LARGE | | ||
714 | HPTE_V_SECONDARY, | 732 | HPTE_V_SECONDARY, |
715 | rflags); | 733 | mmu_huge_psize); |
716 | if (slot == -1) { | 734 | if (slot == -1) { |
717 | if (mftb() & 0x1) | 735 | if (mftb() & 0x1) |
718 | hpte_group = ((hash & htab_hash_mask) * | 736 | hpte_group = ((hash & htab_hash_mask) * |
@@ -726,20 +744,18 @@ repeat: | |||
726 | if (unlikely(slot == -2)) | 744 | if (unlikely(slot == -2)) |
727 | panic("hash_huge_page: pte_insert failed\n"); | 745 | panic("hash_huge_page: pte_insert failed\n"); |
728 | 746 | ||
729 | pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX; | 747 | new_pte |= (slot << 12) & _PAGE_F_GIX; |
730 | |||
731 | /* | ||
732 | * No need to use ldarx/stdcx here because all who | ||
733 | * might be updating the pte will hold the | ||
734 | * page_table_lock | ||
735 | */ | ||
736 | *ptep = new_pte; | ||
737 | } | 748 | } |
738 | 749 | ||
750 | /* | ||
751 | * No need to use ldarx/stdcx here because all who | ||
752 | * might be updating the pte will hold the | ||
753 | * page_table_lock | ||
754 | */ | ||
755 | *ptep = __pte(new_pte & ~_PAGE_BUSY); | ||
756 | |||
739 | err = 0; | 757 | err = 0; |
740 | 758 | ||
741 | out: | 759 | out: |
742 | spin_unlock(&mm->page_table_lock); | ||
743 | |||
744 | return err; | 760 | return err; |
745 | } | 761 | } |