diff options
Diffstat (limited to 'arch/tile/mm/pgtable.c')
-rw-r--r-- | arch/tile/mm/pgtable.c | 170 |
1 files changed, 141 insertions, 29 deletions
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c index 2c850d9864e3..1a2b36f8866d 100644 --- a/arch/tile/mm/pgtable.c +++ b/arch/tile/mm/pgtable.c | |||
@@ -142,6 +142,76 @@ pte_t *_pte_offset_map(pmd_t *dir, unsigned long address) | |||
142 | } | 142 | } |
143 | #endif | 143 | #endif |
144 | 144 | ||
145 | /** | ||
146 | * shatter_huge_page() - ensure a given address is mapped by a small page. | ||
147 | * | ||
148 | * This function converts a huge PTE mapping kernel LOWMEM into a bunch | ||
149 | * of small PTEs with the same caching. No cache flush required, but we | ||
150 | * must do a global TLB flush. | ||
151 | * | ||
152 | * Any caller that wishes to modify a kernel mapping that might | ||
153 | * have been made with a huge page should call this function, | ||
154 | * since doing so properly avoids race conditions with installing the | ||
155 | * newly-shattered page and then flushing all the TLB entries. | ||
156 | * | ||
157 | * @addr: Address at which to shatter any existing huge page. | ||
158 | */ | ||
159 | void shatter_huge_page(unsigned long addr) | ||
160 | { | ||
161 | pgd_t *pgd; | ||
162 | pud_t *pud; | ||
163 | pmd_t *pmd; | ||
164 | unsigned long flags = 0; /* happy compiler */ | ||
165 | #ifdef __PAGETABLE_PMD_FOLDED | ||
166 | struct list_head *pos; | ||
167 | #endif | ||
168 | |||
169 | /* Get a pointer to the pmd entry that we need to change. */ | ||
170 | addr &= HPAGE_MASK; | ||
171 | BUG_ON(pgd_addr_invalid(addr)); | ||
172 | BUG_ON(addr < PAGE_OFFSET); /* only for kernel LOWMEM */ | ||
173 | pgd = swapper_pg_dir + pgd_index(addr); | ||
174 | pud = pud_offset(pgd, addr); | ||
175 | BUG_ON(!pud_present(*pud)); | ||
176 | pmd = pmd_offset(pud, addr); | ||
177 | BUG_ON(!pmd_present(*pmd)); | ||
178 | if (!pmd_huge_page(*pmd)) | ||
179 | return; | ||
180 | |||
181 | /* | ||
182 | * Grab the pgd_lock, since we may need it to walk the pgd_list, | ||
183 | * and since we need some kind of lock here to avoid races. | ||
184 | */ | ||
185 | spin_lock_irqsave(&pgd_lock, flags); | ||
186 | if (!pmd_huge_page(*pmd)) { | ||
187 | /* Lost the race to convert the huge page. */ | ||
188 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
189 | return; | ||
190 | } | ||
191 | |||
192 | /* Shatter the huge page into the preallocated L2 page table. */ | ||
193 | pmd_populate_kernel(&init_mm, pmd, | ||
194 | get_prealloc_pte(pte_pfn(*(pte_t *)pmd))); | ||
195 | |||
196 | #ifdef __PAGETABLE_PMD_FOLDED | ||
197 | /* Walk every pgd on the system and update the pmd there. */ | ||
198 | list_for_each(pos, &pgd_list) { | ||
199 | pmd_t *copy_pmd; | ||
200 | pgd = list_to_pgd(pos) + pgd_index(addr); | ||
201 | pud = pud_offset(pgd, addr); | ||
202 | copy_pmd = pmd_offset(pud, addr); | ||
203 | __set_pmd(copy_pmd, *pmd); | ||
204 | } | ||
205 | #endif | ||
206 | |||
207 | /* Tell every cpu to notice the change. */ | ||
208 | flush_remote(0, 0, NULL, addr, HPAGE_SIZE, HPAGE_SIZE, | ||
209 | cpu_possible_mask, NULL, 0); | ||
210 | |||
211 | /* Hold the lock until the TLB flush is finished to avoid races. */ | ||
212 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
213 | } | ||
214 | |||
145 | /* | 215 | /* |
146 | * List of all pgd's needed so it can invalidate entries in both cached | 216 | * List of all pgd's needed so it can invalidate entries in both cached |
147 | * and uncached pgd's. This is essentially codepath-based locking | 217 | * and uncached pgd's. This is essentially codepath-based locking |
@@ -184,9 +254,9 @@ static void pgd_ctor(pgd_t *pgd) | |||
184 | BUG_ON(((u64 *)swapper_pg_dir)[pgd_index(MEM_USER_INTRPT)] != 0); | 254 | BUG_ON(((u64 *)swapper_pg_dir)[pgd_index(MEM_USER_INTRPT)] != 0); |
185 | #endif | 255 | #endif |
186 | 256 | ||
187 | clone_pgd_range(pgd + KERNEL_PGD_INDEX_START, | 257 | memcpy(pgd + KERNEL_PGD_INDEX_START, |
188 | swapper_pg_dir + KERNEL_PGD_INDEX_START, | 258 | swapper_pg_dir + KERNEL_PGD_INDEX_START, |
189 | KERNEL_PGD_PTRS); | 259 | KERNEL_PGD_PTRS * sizeof(pgd_t)); |
190 | 260 | ||
191 | pgd_list_add(pgd); | 261 | pgd_list_add(pgd); |
192 | spin_unlock_irqrestore(&pgd_lock, flags); | 262 | spin_unlock_irqrestore(&pgd_lock, flags); |
@@ -220,8 +290,11 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd) | |||
220 | 290 | ||
221 | struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) | 291 | struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) |
222 | { | 292 | { |
223 | gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO|__GFP_COMP; | 293 | gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO; |
224 | struct page *p; | 294 | struct page *p; |
295 | #if L2_USER_PGTABLE_ORDER > 0 | ||
296 | int i; | ||
297 | #endif | ||
225 | 298 | ||
226 | #ifdef CONFIG_HIGHPTE | 299 | #ifdef CONFIG_HIGHPTE |
227 | flags |= __GFP_HIGHMEM; | 300 | flags |= __GFP_HIGHMEM; |
@@ -231,6 +304,18 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) | |||
231 | if (p == NULL) | 304 | if (p == NULL) |
232 | return NULL; | 305 | return NULL; |
233 | 306 | ||
307 | #if L2_USER_PGTABLE_ORDER > 0 | ||
308 | /* | ||
309 | * Make every page have a page_count() of one, not just the first. | ||
310 | * We don't use __GFP_COMP since it doesn't look like it works | ||
311 | * correctly with tlb_remove_page(). | ||
312 | */ | ||
313 | for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) { | ||
314 | init_page_count(p+i); | ||
315 | inc_zone_page_state(p+i, NR_PAGETABLE); | ||
316 | } | ||
317 | #endif | ||
318 | |||
234 | pgtable_page_ctor(p); | 319 | pgtable_page_ctor(p); |
235 | return p; | 320 | return p; |
236 | } | 321 | } |
@@ -242,8 +327,15 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) | |||
242 | */ | 327 | */ |
243 | void pte_free(struct mm_struct *mm, struct page *p) | 328 | void pte_free(struct mm_struct *mm, struct page *p) |
244 | { | 329 | { |
330 | int i; | ||
331 | |||
245 | pgtable_page_dtor(p); | 332 | pgtable_page_dtor(p); |
246 | __free_pages(p, L2_USER_PGTABLE_ORDER); | 333 | __free_page(p); |
334 | |||
335 | for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) { | ||
336 | __free_page(p+i); | ||
337 | dec_zone_page_state(p+i, NR_PAGETABLE); | ||
338 | } | ||
247 | } | 339 | } |
248 | 340 | ||
249 | void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, | 341 | void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, |
@@ -252,8 +344,12 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, | |||
252 | int i; | 344 | int i; |
253 | 345 | ||
254 | pgtable_page_dtor(pte); | 346 | pgtable_page_dtor(pte); |
255 | for (i = 0; i < L2_USER_PGTABLE_PAGES; ++i) | 347 | tlb_remove_page(tlb, pte); |
348 | |||
349 | for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) { | ||
256 | tlb_remove_page(tlb, pte + i); | 350 | tlb_remove_page(tlb, pte + i); |
351 | dec_zone_page_state(pte + i, NR_PAGETABLE); | ||
352 | } | ||
257 | } | 353 | } |
258 | 354 | ||
259 | #ifndef __tilegx__ | 355 | #ifndef __tilegx__ |
@@ -335,35 +431,51 @@ int get_remote_cache_cpu(pgprot_t prot) | |||
335 | return x + y * smp_width; | 431 | return x + y * smp_width; |
336 | } | 432 | } |
337 | 433 | ||
338 | void set_pte_order(pte_t *ptep, pte_t pte, int order) | 434 | /* |
435 | * Convert a kernel VA to a PA and homing information. | ||
436 | */ | ||
437 | int va_to_cpa_and_pte(void *va, unsigned long long *cpa, pte_t *pte) | ||
339 | { | 438 | { |
340 | unsigned long pfn = pte_pfn(pte); | 439 | struct page *page = virt_to_page(va); |
341 | struct page *page = pfn_to_page(pfn); | 440 | pte_t null_pte = { 0 }; |
342 | 441 | ||
343 | /* Update the home of a PTE if necessary */ | 442 | *cpa = __pa(va); |
344 | pte = pte_set_home(pte, page_home(page)); | 443 | |
444 | /* Note that this is not writing a page table, just returning a pte. */ | ||
445 | *pte = pte_set_home(null_pte, page_home(page)); | ||
446 | |||
447 | return 0; /* return non-zero if not hfh? */ | ||
448 | } | ||
449 | EXPORT_SYMBOL(va_to_cpa_and_pte); | ||
345 | 450 | ||
451 | void __set_pte(pte_t *ptep, pte_t pte) | ||
452 | { | ||
346 | #ifdef __tilegx__ | 453 | #ifdef __tilegx__ |
347 | *ptep = pte; | 454 | *ptep = pte; |
348 | #else | 455 | #else |
349 | /* | 456 | # if HV_PTE_INDEX_PRESENT >= 32 || HV_PTE_INDEX_MIGRATING >= 32 |
350 | * When setting a PTE, write the high bits first, then write | 457 | # error Must write the present and migrating bits last |
351 | * the low bits. This sets the "present" bit only after the | 458 | # endif |
352 | * other bits are in place. If a particular PTE update | 459 | if (pte_present(pte)) { |
353 | * involves transitioning from one valid PTE to another, it | 460 | ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32); |
354 | * may be necessary to call set_pte_order() more than once, | 461 | barrier(); |
355 | * transitioning via a suitable intermediate state. | 462 | ((u32 *)ptep)[0] = (u32)(pte_val(pte)); |
356 | * Note that this sequence also means that if we are transitioning | 463 | } else { |
357 | * from any migrating PTE to a non-migrating one, we will not | 464 | ((u32 *)ptep)[0] = (u32)(pte_val(pte)); |
358 | * see a half-updated PTE with the migrating bit off. | 465 | barrier(); |
359 | */ | 466 | ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32); |
360 | #if HV_PTE_INDEX_PRESENT >= 32 || HV_PTE_INDEX_MIGRATING >= 32 | 467 | } |
361 | # error Must write the present and migrating bits last | 468 | #endif /* __tilegx__ */ |
362 | #endif | 469 | } |
363 | ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32); | 470 | |
364 | barrier(); | 471 | void set_pte(pte_t *ptep, pte_t pte) |
365 | ((u32 *)ptep)[0] = (u32)(pte_val(pte)); | 472 | { |
366 | #endif | 473 | struct page *page = pfn_to_page(pte_pfn(pte)); |
474 | |||
475 | /* Update the home of a PTE if necessary */ | ||
476 | pte = pte_set_home(pte, page_home(page)); | ||
477 | |||
478 | __set_pte(ptep, pte); | ||
367 | } | 479 | } |
368 | 480 | ||
369 | /* Can this mm load a PTE with cached_priority set? */ | 481 | /* Can this mm load a PTE with cached_priority set? */ |