aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/mm/hugetlbpage.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/mm/hugetlbpage.c')
-rw-r--r--arch/powerpc/mm/hugetlbpage.c299
1 files changed, 174 insertions, 125 deletions
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 4210549ac95e..834ca8eb38f2 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -21,6 +21,9 @@
21#include <asm/pgalloc.h> 21#include <asm/pgalloc.h>
22#include <asm/tlb.h> 22#include <asm/tlb.h>
23#include <asm/setup.h> 23#include <asm/setup.h>
24#include <asm/hugetlb.h>
25
26#ifdef CONFIG_HUGETLB_PAGE
24 27
25#define PAGE_SHIFT_64K 16 28#define PAGE_SHIFT_64K 16
26#define PAGE_SHIFT_16M 24 29#define PAGE_SHIFT_16M 24
@@ -100,68 +103,9 @@ int pgd_huge(pgd_t pgd)
100} 103}
101#endif 104#endif
102 105
103/*
104 * We have 4 cases for pgds and pmds:
105 * (1) invalid (all zeroes)
106 * (2) pointer to next table, as normal; bottom 6 bits == 0
107 * (3) leaf pte for huge page, bottom two bits != 00
108 * (4) hugepd pointer, bottom two bits == 00, next 4 bits indicate size of table
109 */
110pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift)
111{
112 pgd_t *pg;
113 pud_t *pu;
114 pmd_t *pm;
115 pte_t *ret_pte;
116 hugepd_t *hpdp = NULL;
117 unsigned pdshift = PGDIR_SHIFT;
118
119 if (shift)
120 *shift = 0;
121
122 pg = pgdir + pgd_index(ea);
123
124 if (pgd_huge(*pg)) {
125 ret_pte = (pte_t *) pg;
126 goto out;
127 } else if (is_hugepd(pg))
128 hpdp = (hugepd_t *)pg;
129 else if (!pgd_none(*pg)) {
130 pdshift = PUD_SHIFT;
131 pu = pud_offset(pg, ea);
132
133 if (pud_huge(*pu)) {
134 ret_pte = (pte_t *) pu;
135 goto out;
136 } else if (is_hugepd(pu))
137 hpdp = (hugepd_t *)pu;
138 else if (!pud_none(*pu)) {
139 pdshift = PMD_SHIFT;
140 pm = pmd_offset(pu, ea);
141
142 if (pmd_huge(*pm)) {
143 ret_pte = (pte_t *) pm;
144 goto out;
145 } else if (is_hugepd(pm))
146 hpdp = (hugepd_t *)pm;
147 else if (!pmd_none(*pm))
148 return pte_offset_kernel(pm, ea);
149 }
150 }
151 if (!hpdp)
152 return NULL;
153
154 ret_pte = hugepte_offset(hpdp, ea, pdshift);
155 pdshift = hugepd_shift(*hpdp);
156out:
157 if (shift)
158 *shift = pdshift;
159 return ret_pte;
160}
161EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte);
162
163pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) 106pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
164{ 107{
108 /* Only called for hugetlbfs pages, hence can ignore THP */
165 return find_linux_pte_or_hugepte(mm->pgd, addr, NULL); 109 return find_linux_pte_or_hugepte(mm->pgd, addr, NULL);
166} 110}
167 111
@@ -736,11 +680,14 @@ follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
736 struct page *page; 680 struct page *page;
737 unsigned shift; 681 unsigned shift;
738 unsigned long mask; 682 unsigned long mask;
739 683 /*
684 * Transparent hugepages are handled by generic code. We can skip them
685 * here.
686 */
740 ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift); 687 ptep = find_linux_pte_or_hugepte(mm->pgd, address, &shift);
741 688
742 /* Verify it is a huge page else bail. */ 689 /* Verify it is a huge page else bail. */
743 if (!ptep || !shift) 690 if (!ptep || !shift || pmd_trans_huge(*(pmd_t *)ptep))
744 return ERR_PTR(-EINVAL); 691 return ERR_PTR(-EINVAL);
745 692
746 mask = (1UL << shift) - 1; 693 mask = (1UL << shift) - 1;
@@ -759,69 +706,6 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
759 return NULL; 706 return NULL;
760} 707}
761 708
762int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
763 unsigned long end, int write, struct page **pages, int *nr)
764{
765 unsigned long mask;
766 unsigned long pte_end;
767 struct page *head, *page, *tail;
768 pte_t pte;
769 int refs;
770
771 pte_end = (addr + sz) & ~(sz-1);
772 if (pte_end < end)
773 end = pte_end;
774
775 pte = *ptep;
776 mask = _PAGE_PRESENT | _PAGE_USER;
777 if (write)
778 mask |= _PAGE_RW;
779
780 if ((pte_val(pte) & mask) != mask)
781 return 0;
782
783 /* hugepages are never "special" */
784 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
785
786 refs = 0;
787 head = pte_page(pte);
788
789 page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
790 tail = page;
791 do {
792 VM_BUG_ON(compound_head(page) != head);
793 pages[*nr] = page;
794 (*nr)++;
795 page++;
796 refs++;
797 } while (addr += PAGE_SIZE, addr != end);
798
799 if (!page_cache_add_speculative(head, refs)) {
800 *nr -= refs;
801 return 0;
802 }
803
804 if (unlikely(pte_val(pte) != pte_val(*ptep))) {
805 /* Could be optimized better */
806 *nr -= refs;
807 while (refs--)
808 put_page(head);
809 return 0;
810 }
811
812 /*
813 * Any tail page need their mapcount reference taken before we
814 * return.
815 */
816 while (refs--) {
817 if (PageTail(tail))
818 get_huge_page_tail(tail);
819 tail++;
820 }
821
822 return 1;
823}
824
825static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end, 709static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
826 unsigned long sz) 710 unsigned long sz)
827{ 711{
@@ -1038,3 +922,168 @@ void flush_dcache_icache_hugepage(struct page *page)
1038 } 922 }
1039 } 923 }
1040} 924}
925
926#endif /* CONFIG_HUGETLB_PAGE */
927
928/*
929 * We have 4 cases for pgds and pmds:
930 * (1) invalid (all zeroes)
931 * (2) pointer to next table, as normal; bottom 6 bits == 0
932 * (3) leaf pte for huge page, bottom two bits != 00
933 * (4) hugepd pointer, bottom two bits == 00, next 4 bits indicate size of table
934 *
935 * So long as we atomically load page table pointers we are safe against teardown,
936 * we can follow the address down to the the page and take a ref on it.
937 */
938
939pte_t *find_linux_pte_or_hugepte(pgd_t *pgdir, unsigned long ea, unsigned *shift)
940{
941 pgd_t pgd, *pgdp;
942 pud_t pud, *pudp;
943 pmd_t pmd, *pmdp;
944 pte_t *ret_pte;
945 hugepd_t *hpdp = NULL;
946 unsigned pdshift = PGDIR_SHIFT;
947
948 if (shift)
949 *shift = 0;
950
951 pgdp = pgdir + pgd_index(ea);
952 pgd = ACCESS_ONCE(*pgdp);
953 /*
954 * Always operate on the local stack value. This make sure the
955 * value don't get updated by a parallel THP split/collapse,
956 * page fault or a page unmap. The return pte_t * is still not
957 * stable. So should be checked there for above conditions.
958 */
959 if (pgd_none(pgd))
960 return NULL;
961 else if (pgd_huge(pgd)) {
962 ret_pte = (pte_t *) pgdp;
963 goto out;
964 } else if (is_hugepd(&pgd))
965 hpdp = (hugepd_t *)&pgd;
966 else {
967 /*
968 * Even if we end up with an unmap, the pgtable will not
969 * be freed, because we do an rcu free and here we are
970 * irq disabled
971 */
972 pdshift = PUD_SHIFT;
973 pudp = pud_offset(&pgd, ea);
974 pud = ACCESS_ONCE(*pudp);
975
976 if (pud_none(pud))
977 return NULL;
978 else if (pud_huge(pud)) {
979 ret_pte = (pte_t *) pudp;
980 goto out;
981 } else if (is_hugepd(&pud))
982 hpdp = (hugepd_t *)&pud;
983 else {
984 pdshift = PMD_SHIFT;
985 pmdp = pmd_offset(&pud, ea);
986 pmd = ACCESS_ONCE(*pmdp);
987 /*
988 * A hugepage collapse is captured by pmd_none, because
989 * it mark the pmd none and do a hpte invalidate.
990 *
991 * A hugepage split is captured by pmd_trans_splitting
992 * because we mark the pmd trans splitting and do a
993 * hpte invalidate
994 *
995 */
996 if (pmd_none(pmd) || pmd_trans_splitting(pmd))
997 return NULL;
998
999 if (pmd_huge(pmd) || pmd_large(pmd)) {
1000 ret_pte = (pte_t *) pmdp;
1001 goto out;
1002 } else if (is_hugepd(&pmd))
1003 hpdp = (hugepd_t *)&pmd;
1004 else
1005 return pte_offset_kernel(&pmd, ea);
1006 }
1007 }
1008 if (!hpdp)
1009 return NULL;
1010
1011 ret_pte = hugepte_offset(hpdp, ea, pdshift);
1012 pdshift = hugepd_shift(*hpdp);
1013out:
1014 if (shift)
1015 *shift = pdshift;
1016 return ret_pte;
1017}
1018EXPORT_SYMBOL_GPL(find_linux_pte_or_hugepte);
1019
1020int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
1021 unsigned long end, int write, struct page **pages, int *nr)
1022{
1023 unsigned long mask;
1024 unsigned long pte_end;
1025 struct page *head, *page, *tail;
1026 pte_t pte;
1027 int refs;
1028
1029 pte_end = (addr + sz) & ~(sz-1);
1030 if (pte_end < end)
1031 end = pte_end;
1032
1033 pte = ACCESS_ONCE(*ptep);
1034 mask = _PAGE_PRESENT | _PAGE_USER;
1035 if (write)
1036 mask |= _PAGE_RW;
1037
1038 if ((pte_val(pte) & mask) != mask)
1039 return 0;
1040
1041#ifdef CONFIG_TRANSPARENT_HUGEPAGE
1042 /*
1043 * check for splitting here
1044 */
1045 if (pmd_trans_splitting(pte_pmd(pte)))
1046 return 0;
1047#endif
1048
1049 /* hugepages are never "special" */
1050 VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
1051
1052 refs = 0;
1053 head = pte_page(pte);
1054
1055 page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
1056 tail = page;
1057 do {
1058 VM_BUG_ON(compound_head(page) != head);
1059 pages[*nr] = page;
1060 (*nr)++;
1061 page++;
1062 refs++;
1063 } while (addr += PAGE_SIZE, addr != end);
1064
1065 if (!page_cache_add_speculative(head, refs)) {
1066 *nr -= refs;
1067 return 0;
1068 }
1069
1070 if (unlikely(pte_val(pte) != pte_val(*ptep))) {
1071 /* Could be optimized better */
1072 *nr -= refs;
1073 while (refs--)
1074 put_page(head);
1075 return 0;
1076 }
1077
1078 /*
1079 * Any tail page need their mapcount reference taken before we
1080 * return.
1081 */
1082 while (refs--) {
1083 if (PageTail(tail))
1084 get_huge_page_tail(tail);
1085 tail++;
1086 }
1087
1088 return 1;
1089}