diff options
Diffstat (limited to 'arch/powerpc/mm/hugetlbpage.c')
-rw-r--r-- | arch/powerpc/mm/hugetlbpage.c | 295 |
1 files changed, 259 insertions, 36 deletions
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 7370f9f33e29..266b8b2ceac9 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c | |||
@@ -30,13 +30,66 @@ | |||
30 | #define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) | 30 | #define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) |
31 | #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) | 31 | #define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) |
32 | 32 | ||
33 | #ifdef CONFIG_PPC_64K_PAGES | ||
34 | #define HUGEPTE_INDEX_SIZE (PMD_SHIFT-HPAGE_SHIFT) | ||
35 | #else | ||
36 | #define HUGEPTE_INDEX_SIZE (PUD_SHIFT-HPAGE_SHIFT) | ||
37 | #endif | ||
38 | #define PTRS_PER_HUGEPTE (1 << HUGEPTE_INDEX_SIZE) | ||
39 | #define HUGEPTE_TABLE_SIZE (sizeof(pte_t) << HUGEPTE_INDEX_SIZE) | ||
40 | |||
41 | #define HUGEPD_SHIFT (HPAGE_SHIFT + HUGEPTE_INDEX_SIZE) | ||
42 | #define HUGEPD_SIZE (1UL << HUGEPD_SHIFT) | ||
43 | #define HUGEPD_MASK (~(HUGEPD_SIZE-1)) | ||
44 | |||
45 | #define huge_pgtable_cache (pgtable_cache[HUGEPTE_CACHE_NUM]) | ||
46 | |||
47 | /* Flag to mark huge PD pointers. This means pmd_bad() and pud_bad() | ||
48 | * will choke on pointers to hugepte tables, which is handy for | ||
49 | * catching screwups early. */ | ||
50 | #define HUGEPD_OK 0x1 | ||
51 | |||
52 | typedef struct { unsigned long pd; } hugepd_t; | ||
53 | |||
54 | #define hugepd_none(hpd) ((hpd).pd == 0) | ||
55 | |||
56 | static inline pte_t *hugepd_page(hugepd_t hpd) | ||
57 | { | ||
58 | BUG_ON(!(hpd.pd & HUGEPD_OK)); | ||
59 | return (pte_t *)(hpd.pd & ~HUGEPD_OK); | ||
60 | } | ||
61 | |||
62 | static inline pte_t *hugepte_offset(hugepd_t *hpdp, unsigned long addr) | ||
63 | { | ||
64 | unsigned long idx = ((addr >> HPAGE_SHIFT) & (PTRS_PER_HUGEPTE-1)); | ||
65 | pte_t *dir = hugepd_page(*hpdp); | ||
66 | |||
67 | return dir + idx; | ||
68 | } | ||
69 | |||
70 | static int __hugepte_alloc(struct mm_struct *mm, hugepd_t *hpdp, | ||
71 | unsigned long address) | ||
72 | { | ||
73 | pte_t *new = kmem_cache_alloc(huge_pgtable_cache, | ||
74 | GFP_KERNEL|__GFP_REPEAT); | ||
75 | |||
76 | if (! new) | ||
77 | return -ENOMEM; | ||
78 | |||
79 | spin_lock(&mm->page_table_lock); | ||
80 | if (!hugepd_none(*hpdp)) | ||
81 | kmem_cache_free(huge_pgtable_cache, new); | ||
82 | else | ||
83 | hpdp->pd = (unsigned long)new | HUGEPD_OK; | ||
84 | spin_unlock(&mm->page_table_lock); | ||
85 | return 0; | ||
86 | } | ||
87 | |||
33 | /* Modelled after find_linux_pte() */ | 88 | /* Modelled after find_linux_pte() */ |
34 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | 89 | pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) |
35 | { | 90 | { |
36 | pgd_t *pg; | 91 | pgd_t *pg; |
37 | pud_t *pu; | 92 | pud_t *pu; |
38 | pmd_t *pm; | ||
39 | pte_t *pt; | ||
40 | 93 | ||
41 | BUG_ON(! in_hugepage_area(mm->context, addr)); | 94 | BUG_ON(! in_hugepage_area(mm->context, addr)); |
42 | 95 | ||
@@ -46,26 +99,14 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | |||
46 | if (!pgd_none(*pg)) { | 99 | if (!pgd_none(*pg)) { |
47 | pu = pud_offset(pg, addr); | 100 | pu = pud_offset(pg, addr); |
48 | if (!pud_none(*pu)) { | 101 | if (!pud_none(*pu)) { |
49 | pm = pmd_offset(pu, addr); | ||
50 | #ifdef CONFIG_PPC_64K_PAGES | 102 | #ifdef CONFIG_PPC_64K_PAGES |
51 | /* Currently, we use the normal PTE offset within full | 103 | pmd_t *pm; |
52 | * size PTE pages, thus our huge PTEs are scattered in | 104 | pm = pmd_offset(pu, addr); |
53 | * the PTE page and we do waste some. We may change | 105 | if (!pmd_none(*pm)) |
54 | * that in the future, but the current mecanism keeps | 106 | return hugepte_offset((hugepd_t *)pm, addr); |
55 | * things much simpler | 107 | #else |
56 | */ | 108 | return hugepte_offset((hugepd_t *)pu, addr); |
57 | if (!pmd_none(*pm)) { | 109 | #endif |
58 | /* Note: pte_offset_* are all equivalent on | ||
59 | * ppc64 as we don't have HIGHMEM | ||
60 | */ | ||
61 | pt = pte_offset_kernel(pm, addr); | ||
62 | return pt; | ||
63 | } | ||
64 | #else /* CONFIG_PPC_64K_PAGES */ | ||
65 | /* On 4k pages, we put huge PTEs in the PMD page */ | ||
66 | pt = (pte_t *)pm; | ||
67 | return pt; | ||
68 | #endif /* CONFIG_PPC_64K_PAGES */ | ||
69 | } | 110 | } |
70 | } | 111 | } |
71 | 112 | ||
@@ -76,8 +117,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | |||
76 | { | 117 | { |
77 | pgd_t *pg; | 118 | pgd_t *pg; |
78 | pud_t *pu; | 119 | pud_t *pu; |
79 | pmd_t *pm; | 120 | hugepd_t *hpdp = NULL; |
80 | pte_t *pt; | ||
81 | 121 | ||
82 | BUG_ON(! in_hugepage_area(mm->context, addr)); | 122 | BUG_ON(! in_hugepage_area(mm->context, addr)); |
83 | 123 | ||
@@ -87,23 +127,182 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | |||
87 | pu = pud_alloc(mm, pg, addr); | 127 | pu = pud_alloc(mm, pg, addr); |
88 | 128 | ||
89 | if (pu) { | 129 | if (pu) { |
130 | #ifdef CONFIG_PPC_64K_PAGES | ||
131 | pmd_t *pm; | ||
90 | pm = pmd_alloc(mm, pu, addr); | 132 | pm = pmd_alloc(mm, pu, addr); |
91 | if (pm) { | 133 | if (pm) |
134 | hpdp = (hugepd_t *)pm; | ||
135 | #else | ||
136 | hpdp = (hugepd_t *)pu; | ||
137 | #endif | ||
138 | } | ||
139 | |||
140 | if (! hpdp) | ||
141 | return NULL; | ||
142 | |||
143 | if (hugepd_none(*hpdp) && __hugepte_alloc(mm, hpdp, addr)) | ||
144 | return NULL; | ||
145 | |||
146 | return hugepte_offset(hpdp, addr); | ||
147 | } | ||
148 | |||
149 | static void free_hugepte_range(struct mmu_gather *tlb, hugepd_t *hpdp) | ||
150 | { | ||
151 | pte_t *hugepte = hugepd_page(*hpdp); | ||
152 | |||
153 | hpdp->pd = 0; | ||
154 | tlb->need_flush = 1; | ||
155 | pgtable_free_tlb(tlb, pgtable_free_cache(hugepte, HUGEPTE_CACHE_NUM, | ||
156 | HUGEPTE_TABLE_SIZE-1)); | ||
157 | } | ||
158 | |||
92 | #ifdef CONFIG_PPC_64K_PAGES | 159 | #ifdef CONFIG_PPC_64K_PAGES |
93 | /* See comment in huge_pte_offset. Note that if we ever | 160 | static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud, |
94 | * want to put the page size in the PMD, we would have | 161 | unsigned long addr, unsigned long end, |
95 | * to open code our own pte_alloc* function in order | 162 | unsigned long floor, unsigned long ceiling) |
96 | * to populate and set the size atomically | 163 | { |
97 | */ | 164 | pmd_t *pmd; |
98 | pt = pte_alloc_map(mm, pm, addr); | 165 | unsigned long next; |
99 | #else /* CONFIG_PPC_64K_PAGES */ | 166 | unsigned long start; |
100 | pt = (pte_t *)pm; | 167 | |
101 | #endif /* CONFIG_PPC_64K_PAGES */ | 168 | start = addr; |
102 | return pt; | 169 | pmd = pmd_offset(pud, addr); |
103 | } | 170 | do { |
171 | next = pmd_addr_end(addr, end); | ||
172 | if (pmd_none(*pmd)) | ||
173 | continue; | ||
174 | free_hugepte_range(tlb, (hugepd_t *)pmd); | ||
175 | } while (pmd++, addr = next, addr != end); | ||
176 | |||
177 | start &= PUD_MASK; | ||
178 | if (start < floor) | ||
179 | return; | ||
180 | if (ceiling) { | ||
181 | ceiling &= PUD_MASK; | ||
182 | if (!ceiling) | ||
183 | return; | ||
104 | } | 184 | } |
185 | if (end - 1 > ceiling - 1) | ||
186 | return; | ||
105 | 187 | ||
106 | return NULL; | 188 | pmd = pmd_offset(pud, start); |
189 | pud_clear(pud); | ||
190 | pmd_free_tlb(tlb, pmd); | ||
191 | } | ||
192 | #endif | ||
193 | |||
194 | static void hugetlb_free_pud_range(struct mmu_gather *tlb, pgd_t *pgd, | ||
195 | unsigned long addr, unsigned long end, | ||
196 | unsigned long floor, unsigned long ceiling) | ||
197 | { | ||
198 | pud_t *pud; | ||
199 | unsigned long next; | ||
200 | unsigned long start; | ||
201 | |||
202 | start = addr; | ||
203 | pud = pud_offset(pgd, addr); | ||
204 | do { | ||
205 | next = pud_addr_end(addr, end); | ||
206 | #ifdef CONFIG_PPC_64K_PAGES | ||
207 | if (pud_none_or_clear_bad(pud)) | ||
208 | continue; | ||
209 | hugetlb_free_pmd_range(tlb, pud, addr, next, floor, ceiling); | ||
210 | #else | ||
211 | if (pud_none(*pud)) | ||
212 | continue; | ||
213 | free_hugepte_range(tlb, (hugepd_t *)pud); | ||
214 | #endif | ||
215 | } while (pud++, addr = next, addr != end); | ||
216 | |||
217 | start &= PGDIR_MASK; | ||
218 | if (start < floor) | ||
219 | return; | ||
220 | if (ceiling) { | ||
221 | ceiling &= PGDIR_MASK; | ||
222 | if (!ceiling) | ||
223 | return; | ||
224 | } | ||
225 | if (end - 1 > ceiling - 1) | ||
226 | return; | ||
227 | |||
228 | pud = pud_offset(pgd, start); | ||
229 | pgd_clear(pgd); | ||
230 | pud_free_tlb(tlb, pud); | ||
231 | } | ||
232 | |||
233 | /* | ||
234 | * This function frees user-level page tables of a process. | ||
235 | * | ||
236 | * Must be called with pagetable lock held. | ||
237 | */ | ||
238 | void hugetlb_free_pgd_range(struct mmu_gather **tlb, | ||
239 | unsigned long addr, unsigned long end, | ||
240 | unsigned long floor, unsigned long ceiling) | ||
241 | { | ||
242 | pgd_t *pgd; | ||
243 | unsigned long next; | ||
244 | unsigned long start; | ||
245 | |||
246 | /* | ||
247 | * Comments below take from the normal free_pgd_range(). They | ||
248 | * apply here too. The tests against HUGEPD_MASK below are | ||
249 | * essential, because we *don't* test for this at the bottom | ||
250 | * level. Without them we'll attempt to free a hugepte table | ||
251 | * when we unmap just part of it, even if there are other | ||
252 | * active mappings using it. | ||
253 | * | ||
254 | * The next few lines have given us lots of grief... | ||
255 | * | ||
256 | * Why are we testing HUGEPD* at this top level? Because | ||
257 | * often there will be no work to do at all, and we'd prefer | ||
258 | * not to go all the way down to the bottom just to discover | ||
259 | * that. | ||
260 | * | ||
261 | * Why all these "- 1"s? Because 0 represents both the bottom | ||
262 | * of the address space and the top of it (using -1 for the | ||
263 | * top wouldn't help much: the masks would do the wrong thing). | ||
264 | * The rule is that addr 0 and floor 0 refer to the bottom of | ||
265 | * the address space, but end 0 and ceiling 0 refer to the top | ||
266 | * Comparisons need to use "end - 1" and "ceiling - 1" (though | ||
267 | * that end 0 case should be mythical). | ||
268 | * | ||
269 | * Wherever addr is brought up or ceiling brought down, we | ||
270 | * must be careful to reject "the opposite 0" before it | ||
271 | * confuses the subsequent tests. But what about where end is | ||
272 | * brought down by HUGEPD_SIZE below? no, end can't go down to | ||
273 | * 0 there. | ||
274 | * | ||
275 | * Whereas we round start (addr) and ceiling down, by different | ||
276 | * masks at different levels, in order to test whether a table | ||
277 | * now has no other vmas using it, so can be freed, we don't | ||
278 | * bother to round floor or end up - the tests don't need that. | ||
279 | */ | ||
280 | |||
281 | addr &= HUGEPD_MASK; | ||
282 | if (addr < floor) { | ||
283 | addr += HUGEPD_SIZE; | ||
284 | if (!addr) | ||
285 | return; | ||
286 | } | ||
287 | if (ceiling) { | ||
288 | ceiling &= HUGEPD_MASK; | ||
289 | if (!ceiling) | ||
290 | return; | ||
291 | } | ||
292 | if (end - 1 > ceiling - 1) | ||
293 | end -= HUGEPD_SIZE; | ||
294 | if (addr > end - 1) | ||
295 | return; | ||
296 | |||
297 | start = addr; | ||
298 | pgd = pgd_offset((*tlb)->mm, addr); | ||
299 | do { | ||
300 | BUG_ON(! in_hugepage_area((*tlb)->mm->context, addr)); | ||
301 | next = pgd_addr_end(addr, end); | ||
302 | if (pgd_none_or_clear_bad(pgd)) | ||
303 | continue; | ||
304 | hugetlb_free_pud_range(*tlb, pgd, addr, next, floor, ceiling); | ||
305 | } while (pgd++, addr = next, addr != end); | ||
107 | } | 306 | } |
108 | 307 | ||
109 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, | 308 | void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, |
@@ -841,3 +1040,27 @@ repeat: | |||
841 | out: | 1040 | out: |
842 | return err; | 1041 | return err; |
843 | } | 1042 | } |
1043 | |||
1044 | static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags) | ||
1045 | { | ||
1046 | memset(addr, 0, kmem_cache_size(cache)); | ||
1047 | } | ||
1048 | |||
1049 | static int __init hugetlbpage_init(void) | ||
1050 | { | ||
1051 | if (!cpu_has_feature(CPU_FTR_16M_PAGE)) | ||
1052 | return -ENODEV; | ||
1053 | |||
1054 | huge_pgtable_cache = kmem_cache_create("hugepte_cache", | ||
1055 | HUGEPTE_TABLE_SIZE, | ||
1056 | HUGEPTE_TABLE_SIZE, | ||
1057 | SLAB_HWCACHE_ALIGN | | ||
1058 | SLAB_MUST_HWCACHE_ALIGN, | ||
1059 | zero_ctor, NULL); | ||
1060 | if (! huge_pgtable_cache) | ||
1061 | panic("hugetlbpage_init(): could not create hugepte cache\n"); | ||
1062 | |||
1063 | return 0; | ||
1064 | } | ||
1065 | |||
1066 | module_init(hugetlbpage_init); | ||