diff options
Diffstat (limited to 'arch/powerpc/mm/hugetlbpage.c')
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 134 |
1 files changed, 75 insertions, 59 deletions
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 0ea0994ed974..0073a04047e4 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c | |||
@@ -47,10 +47,25 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | |||
47 | pu = pud_offset(pg, addr); | 47 | pu = pud_offset(pg, addr); |
48 | if (!pud_none(*pu)) { | 48 | if (!pud_none(*pu)) { |
49 | pm = pmd_offset(pu, addr); | 49 | pm = pmd_offset(pu, addr); |
50 | #ifdef CONFIG_PPC_64K_PAGES | ||
51 | /* Currently, we use the normal PTE offset within full | ||
52 | * size PTE pages, thus our huge PTEs are scattered in | ||
53 | * the PTE page and we do waste some. We may change | ||
54 | * that in the future, but the current mecanism keeps | ||
55 | * things much simpler | ||
56 | */ | ||
57 | if (!pmd_none(*pm)) { | ||
58 | /* Note: pte_offset_* are all equivalent on | ||
59 | * ppc64 as we don't have HIGHMEM | ||
60 | */ | ||
61 | pt = pte_offset_kernel(pm, addr); | ||
62 | return pt; | ||
63 | } | ||
64 | #else /* CONFIG_PPC_64K_PAGES */ | ||
65 | /* On 4k pages, we put huge PTEs in the PMD page */ | ||
50 | pt = (pte_t *)pm; | 66 | pt = (pte_t *)pm; |
51 | BUG_ON(!pmd_none(*pm) | ||
52 | && !(pte_present(*pt) && pte_huge(*pt))); | ||
53 | return pt; | 67 | return pt; |
68 | #endif /* CONFIG_PPC_64K_PAGES */ | ||
54 | } | 69 | } |
55 | } | 70 | } |
56 | 71 | ||
@@ -74,9 +89,16 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | |||
74 | if (pu) { | 89 | if (pu) { |
75 | pm = pmd_alloc(mm, pu, addr); | 90 | pm = pmd_alloc(mm, pu, addr); |
76 | if (pm) { | 91 | if (pm) { |
92 | #ifdef CONFIG_PPC_64K_PAGES | ||
93 | /* See comment in huge_pte_offset. Note that if we ever | ||
94 | * want to put the page size in the PMD, we would have | ||
95 | * to open code our own pte_alloc* function in order | ||
96 | * to populate and set the size atomically | ||
97 | */ | ||
98 | pt = pte_alloc_map(mm, pm, addr); | ||
99 | #else /* CONFIG_PPC_64K_PAGES */ | ||
77 | pt = (pte_t *)pm; | 100 | pt = (pte_t *)pm; |
78 | BUG_ON(!pmd_none(*pm) | 101 | #endif /* CONFIG_PPC_64K_PAGES */ |
79 | && !(pte_present(*pt) && pte_huge(*pt))); | ||
80 | return pt; | 102 | return pt; |
81 | } | 103 | } |
82 | } | 104 | } |
@@ -84,35 +106,29 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | |||
84 | return NULL; | 106 | return NULL; |
85 | } | 107 | } |
86 | 108 | ||
87 | #define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE) | ||
88 | |||
89 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, | 109 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, |
90 | pte_t *ptep, pte_t pte) | 110 | pte_t *ptep, pte_t pte) |
91 | { | 111 | { |
92 | int i; | ||
93 | |||
94 | if (pte_present(*ptep)) { | 112 | if (pte_present(*ptep)) { |
95 | pte_clear(mm, addr, ptep); | 113 | /* We open-code pte_clear because we need to pass the right |
114 | * argument to hpte_update (huge / !huge) | ||
115 | */ | ||
116 | unsigned long old = pte_update(ptep, ~0UL); | ||
117 | if (old & _PAGE_HASHPTE) | ||
118 | hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1); | ||
96 | flush_tlb_pending(); | 119 | flush_tlb_pending(); |
97 | } | 120 | } |
98 | 121 | *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); | |
99 | for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) { | ||
100 | *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); | ||
101 | ptep++; | ||
102 | } | ||
103 | } | 122 | } |
104 | 123 | ||
105 | pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, | 124 | pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, |
106 | pte_t *ptep) | 125 | pte_t *ptep) |
107 | { | 126 | { |
108 | unsigned long old = pte_update(ptep, ~0UL); | 127 | unsigned long old = pte_update(ptep, ~0UL); |
109 | int i; | ||
110 | 128 | ||
111 | if (old & _PAGE_HASHPTE) | 129 | if (old & _PAGE_HASHPTE) |
112 | hpte_update(mm, addr, old, 0); | 130 | hpte_update(mm, addr & HPAGE_MASK, ptep, old, 1); |
113 | 131 | *ptep = __pte(0); | |
114 | for (i = 1; i < HUGEPTE_BATCH_SIZE; i++) | ||
115 | ptep[i] = __pte(0); | ||
116 | 132 | ||
117 | return __pte(old); | 133 | return __pte(old); |
118 | } | 134 | } |
@@ -563,6 +579,8 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
563 | int lastshift; | 579 | int lastshift; |
564 | u16 areamask, curareas; | 580 | u16 areamask, curareas; |
565 | 581 | ||
582 | if (HPAGE_SHIFT == 0) | ||
583 | return -EINVAL; | ||
566 | if (len & ~HPAGE_MASK) | 584 | if (len & ~HPAGE_MASK) |
567 | return -EINVAL; | 585 | return -EINVAL; |
568 | 586 | ||
@@ -619,19 +637,15 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, | |||
619 | unsigned long ea, unsigned long vsid, int local) | 637 | unsigned long ea, unsigned long vsid, int local) |
620 | { | 638 | { |
621 | pte_t *ptep; | 639 | pte_t *ptep; |
622 | unsigned long va, vpn; | 640 | unsigned long old_pte, new_pte; |
623 | pte_t old_pte, new_pte; | 641 | unsigned long va, rflags, pa; |
624 | unsigned long rflags, prpn; | ||
625 | long slot; | 642 | long slot; |
626 | int err = 1; | 643 | int err = 1; |
627 | 644 | ||
628 | spin_lock(&mm->page_table_lock); | ||
629 | |||
630 | ptep = huge_pte_offset(mm, ea); | 645 | ptep = huge_pte_offset(mm, ea); |
631 | 646 | ||
632 | /* Search the Linux page table for a match with va */ | 647 | /* Search the Linux page table for a match with va */ |
633 | va = (vsid << 28) | (ea & 0x0fffffff); | 648 | va = (vsid << 28) | (ea & 0x0fffffff); |
634 | vpn = va >> HPAGE_SHIFT; | ||
635 | 649 | ||
636 | /* | 650 | /* |
637 | * If no pte found or not present, send the problem up to | 651 | * If no pte found or not present, send the problem up to |
@@ -640,8 +654,6 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, | |||
640 | if (unlikely(!ptep || pte_none(*ptep))) | 654 | if (unlikely(!ptep || pte_none(*ptep))) |
641 | goto out; | 655 | goto out; |
642 | 656 | ||
643 | /* BUG_ON(pte_bad(*ptep)); */ | ||
644 | |||
645 | /* | 657 | /* |
646 | * Check the user's access rights to the page. If access should be | 658 | * Check the user's access rights to the page. If access should be |
647 | * prevented then send the problem up to do_page_fault. | 659 | * prevented then send the problem up to do_page_fault. |
@@ -661,58 +673,64 @@ int hash_huge_page(struct mm_struct *mm, unsigned long access, | |||
661 | */ | 673 | */ |
662 | 674 | ||
663 | 675 | ||
664 | old_pte = *ptep; | 676 | do { |
665 | new_pte = old_pte; | 677 | old_pte = pte_val(*ptep); |
666 | 678 | if (old_pte & _PAGE_BUSY) | |
667 | rflags = 0x2 | (! (pte_val(new_pte) & _PAGE_RW)); | 679 | goto out; |
680 | new_pte = old_pte | _PAGE_BUSY | | ||
681 | _PAGE_ACCESSED | _PAGE_HASHPTE; | ||
682 | } while(old_pte != __cmpxchg_u64((unsigned long *)ptep, | ||
683 | old_pte, new_pte)); | ||
684 | |||
685 | rflags = 0x2 | (!(new_pte & _PAGE_RW)); | ||
668 | /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ | 686 | /* _PAGE_EXEC -> HW_NO_EXEC since it's inverted */ |
669 | rflags |= ((pte_val(new_pte) & _PAGE_EXEC) ? 0 : HW_NO_EXEC); | 687 | rflags |= ((new_pte & _PAGE_EXEC) ? 0 : HPTE_R_N); |
670 | 688 | ||
671 | /* Check if pte already has an hpte (case 2) */ | 689 | /* Check if pte already has an hpte (case 2) */ |
672 | if (unlikely(pte_val(old_pte) & _PAGE_HASHPTE)) { | 690 | if (unlikely(old_pte & _PAGE_HASHPTE)) { |
673 | /* There MIGHT be an HPTE for this pte */ | 691 | /* There MIGHT be an HPTE for this pte */ |
674 | unsigned long hash, slot; | 692 | unsigned long hash, slot; |
675 | 693 | ||
676 | hash = hpt_hash(vpn, 1); | 694 | hash = hpt_hash(va, HPAGE_SHIFT); |
677 | if (pte_val(old_pte) & _PAGE_SECONDARY) | 695 | if (old_pte & _PAGE_F_SECOND) |
678 | hash = ~hash; | 696 | hash = ~hash; |
679 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; | 697 | slot = (hash & htab_hash_mask) * HPTES_PER_GROUP; |
680 | slot += (pte_val(old_pte) & _PAGE_GROUP_IX) >> 12; | 698 | slot += (old_pte & _PAGE_F_GIX) >> 12; |
681 | 699 | ||
682 | if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1) | 700 | if (ppc_md.hpte_updatepp(slot, rflags, va, 1, local) == -1) |
683 | pte_val(old_pte) &= ~_PAGE_HPTEFLAGS; | 701 | old_pte &= ~_PAGE_HPTEFLAGS; |
684 | } | 702 | } |
685 | 703 | ||
686 | if (likely(!(pte_val(old_pte) & _PAGE_HASHPTE))) { | 704 | if (likely(!(old_pte & _PAGE_HASHPTE))) { |
687 | unsigned long hash = hpt_hash(vpn, 1); | 705 | unsigned long hash = hpt_hash(va, HPAGE_SHIFT); |
688 | unsigned long hpte_group; | 706 | unsigned long hpte_group; |
689 | 707 | ||
690 | prpn = pte_pfn(old_pte); | 708 | pa = pte_pfn(__pte(old_pte)) << PAGE_SHIFT; |
691 | 709 | ||
692 | repeat: | 710 | repeat: |
693 | hpte_group = ((hash & htab_hash_mask) * | 711 | hpte_group = ((hash & htab_hash_mask) * |
694 | HPTES_PER_GROUP) & ~0x7UL; | 712 | HPTES_PER_GROUP) & ~0x7UL; |
695 | 713 | ||
696 | /* Update the linux pte with the HPTE slot */ | 714 | /* clear HPTE slot informations in new PTE */ |
697 | pte_val(new_pte) &= ~_PAGE_HPTEFLAGS; | 715 | new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | _PAGE_HASHPTE; |
698 | pte_val(new_pte) |= _PAGE_HASHPTE; | ||
699 | 716 | ||
700 | /* Add in WIMG bits */ | 717 | /* Add in WIMG bits */ |
701 | /* XXX We should store these in the pte */ | 718 | /* XXX We should store these in the pte */ |
719 | /* --BenH: I think they are ... */ | ||
702 | rflags |= _PAGE_COHERENT; | 720 | rflags |= _PAGE_COHERENT; |
703 | 721 | ||
704 | slot = ppc_md.hpte_insert(hpte_group, va, prpn, | 722 | /* Insert into the hash table, primary slot */ |
705 | HPTE_V_LARGE, rflags); | 723 | slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, 0, |
724 | mmu_huge_psize); | ||
706 | 725 | ||
707 | /* Primary is full, try the secondary */ | 726 | /* Primary is full, try the secondary */ |
708 | if (unlikely(slot == -1)) { | 727 | if (unlikely(slot == -1)) { |
709 | pte_val(new_pte) |= _PAGE_SECONDARY; | 728 | new_pte |= _PAGE_F_SECOND; |
710 | hpte_group = ((~hash & htab_hash_mask) * | 729 | hpte_group = ((~hash & htab_hash_mask) * |
711 | HPTES_PER_GROUP) & ~0x7UL; | 730 | HPTES_PER_GROUP) & ~0x7UL; |
712 | slot = ppc_md.hpte_insert(hpte_group, va, prpn, | 731 | slot = ppc_md.hpte_insert(hpte_group, va, pa, rflags, |
713 | HPTE_V_LARGE | | ||
714 | HPTE_V_SECONDARY, | 732 | HPTE_V_SECONDARY, |
715 | rflags); | 733 | mmu_huge_psize); |
716 | if (slot == -1) { | 734 | if (slot == -1) { |
717 | if (mftb() & 0x1) | 735 | if (mftb() & 0x1) |
718 | hpte_group = ((hash & htab_hash_mask) * | 736 | hpte_group = ((hash & htab_hash_mask) * |
@@ -726,20 +744,18 @@ repeat: | |||
726 | if (unlikely(slot == -2)) | 744 | if (unlikely(slot == -2)) |
727 | panic("hash_huge_page: pte_insert failed\n"); | 745 | panic("hash_huge_page: pte_insert failed\n"); |
728 | 746 | ||
729 | pte_val(new_pte) |= (slot<<12) & _PAGE_GROUP_IX; | 747 | new_pte |= (slot << 12) & _PAGE_F_GIX; |
730 | |||
731 | /* | ||
732 | * No need to use ldarx/stdcx here because all who | ||
733 | * might be updating the pte will hold the | ||
734 | * page_table_lock | ||
735 | */ | ||
736 | *ptep = new_pte; | ||
737 | } | 748 | } |
738 | 749 | ||
750 | /* | ||
751 | * No need to use ldarx/stdcx here because all who | ||
752 | * might be updating the pte will hold the | ||
753 | * page_table_lock | ||
754 | */ | ||
755 | *ptep = __pte(new_pte & ~_PAGE_BUSY); | ||
756 | |||
739 | err = 0; | 757 | err = 0; |
740 | 758 | ||
741 | out: | 759 | out: |
742 | spin_unlock(&mm->page_table_lock); | ||
743 | |||
744 | return err; | 760 | return err; |
745 | } | 761 | } |