aboutsummaryrefslogtreecommitdiffstats
path: root/arch/tile/mm/pgtable.c
diff options
context:
space:
mode:
authorChris Metcalf <cmetcalf@tilera.com>2011-02-28 16:37:34 -0500
committerChris Metcalf <cmetcalf@tilera.com>2011-03-10 13:17:53 -0500
commit76c567fbba50c3da2f4d40e2e551bab26cfd4381 (patch)
tree6e3c92a266d0ec255e1930adf5ba5268cd71dee9 /arch/tile/mm/pgtable.c
parent09c17eab075ceeafb53935d858c575b6776394d1 (diff)
arch/tile: support 4KB page size as well as 64KB
The Tilera architecture traditionally supports 64KB page sizes to improve TLB utilization and improve performance when the hardware is being used primarily to run a single application. For more generic server scenarios, it can be beneficial to run with 4KB page sizes, so this commit allows that to be specified (by modifying the arch/tile/include/hv/pagesize.h header). As part of this change, we also re-worked the PTE management slightly so that PTE writes all go through a __set_pte() function where we can do some additional validation. The set_pte_order() function was eliminated since the "order" argument wasn't being used. One bug uncovered was in the PCI DMA code, which wasn't properly flushing the specified range. This was benign with 64KB pages, but with 4KB pages we were getting some larger flushes wrong. The per-cpu memory reservation code also needed updating to conform with the newer percpu stuff; before it always chose 64KB, and that was always correct, but with 4KB granularity we now have to pay closer attention and reserve the amount of memory that will be requested when the percpu code starts allocating. Signed-off-by: Chris Metcalf <cmetcalf@tilera.com>
Diffstat (limited to 'arch/tile/mm/pgtable.c')
-rw-r--r--arch/tile/mm/pgtable.c170
1 files changed, 141 insertions, 29 deletions
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c
index 2c850d9864e3..1a2b36f8866d 100644
--- a/arch/tile/mm/pgtable.c
+++ b/arch/tile/mm/pgtable.c
@@ -142,6 +142,76 @@ pte_t *_pte_offset_map(pmd_t *dir, unsigned long address)
142} 142}
143#endif 143#endif
144 144
145/**
146 * shatter_huge_page() - ensure a given address is mapped by a small page.
147 *
148 * This function converts a huge PTE mapping kernel LOWMEM into a bunch
149 * of small PTEs with the same caching. No cache flush required, but we
150 * must do a global TLB flush.
151 *
152 * Any caller that wishes to modify a kernel mapping that might
153 * have been made with a huge page should call this function,
154 * since doing so properly avoids race conditions with installing the
155 * newly-shattered page and then flushing all the TLB entries.
156 *
157 * @addr: Address at which to shatter any existing huge page.
158 */
159void shatter_huge_page(unsigned long addr)
160{
161 pgd_t *pgd;
162 pud_t *pud;
163 pmd_t *pmd;
164 unsigned long flags = 0; /* happy compiler */
165#ifdef __PAGETABLE_PMD_FOLDED
166 struct list_head *pos;
167#endif
168
169 /* Get a pointer to the pmd entry that we need to change. */
170 addr &= HPAGE_MASK;
171 BUG_ON(pgd_addr_invalid(addr));
172 BUG_ON(addr < PAGE_OFFSET); /* only for kernel LOWMEM */
173 pgd = swapper_pg_dir + pgd_index(addr);
174 pud = pud_offset(pgd, addr);
175 BUG_ON(!pud_present(*pud));
176 pmd = pmd_offset(pud, addr);
177 BUG_ON(!pmd_present(*pmd));
178 if (!pmd_huge_page(*pmd))
179 return;
180
181 /*
182 * Grab the pgd_lock, since we may need it to walk the pgd_list,
183 * and since we need some kind of lock here to avoid races.
184 */
185 spin_lock_irqsave(&pgd_lock, flags);
186 if (!pmd_huge_page(*pmd)) {
187 /* Lost the race to convert the huge page. */
188 spin_unlock_irqrestore(&pgd_lock, flags);
189 return;
190 }
191
192 /* Shatter the huge page into the preallocated L2 page table. */
193 pmd_populate_kernel(&init_mm, pmd,
194 get_prealloc_pte(pte_pfn(*(pte_t *)pmd)));
195
196#ifdef __PAGETABLE_PMD_FOLDED
197 /* Walk every pgd on the system and update the pmd there. */
198 list_for_each(pos, &pgd_list) {
199 pmd_t *copy_pmd;
200 pgd = list_to_pgd(pos) + pgd_index(addr);
201 pud = pud_offset(pgd, addr);
202 copy_pmd = pmd_offset(pud, addr);
203 __set_pmd(copy_pmd, *pmd);
204 }
205#endif
206
207 /* Tell every cpu to notice the change. */
208 flush_remote(0, 0, NULL, addr, HPAGE_SIZE, HPAGE_SIZE,
209 cpu_possible_mask, NULL, 0);
210
211 /* Hold the lock until the TLB flush is finished to avoid races. */
212 spin_unlock_irqrestore(&pgd_lock, flags);
213}
214
145/* 215/*
146 * List of all pgd's needed so it can invalidate entries in both cached 216 * List of all pgd's needed so it can invalidate entries in both cached
147 * and uncached pgd's. This is essentially codepath-based locking 217 * and uncached pgd's. This is essentially codepath-based locking
@@ -184,9 +254,9 @@ static void pgd_ctor(pgd_t *pgd)
184 BUG_ON(((u64 *)swapper_pg_dir)[pgd_index(MEM_USER_INTRPT)] != 0); 254 BUG_ON(((u64 *)swapper_pg_dir)[pgd_index(MEM_USER_INTRPT)] != 0);
185#endif 255#endif
186 256
187 clone_pgd_range(pgd + KERNEL_PGD_INDEX_START, 257 memcpy(pgd + KERNEL_PGD_INDEX_START,
188 swapper_pg_dir + KERNEL_PGD_INDEX_START, 258 swapper_pg_dir + KERNEL_PGD_INDEX_START,
189 KERNEL_PGD_PTRS); 259 KERNEL_PGD_PTRS * sizeof(pgd_t));
190 260
191 pgd_list_add(pgd); 261 pgd_list_add(pgd);
192 spin_unlock_irqrestore(&pgd_lock, flags); 262 spin_unlock_irqrestore(&pgd_lock, flags);
@@ -220,8 +290,11 @@ void pgd_free(struct mm_struct *mm, pgd_t *pgd)
220 290
221struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) 291struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
222{ 292{
223 gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO|__GFP_COMP; 293 gfp_t flags = GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO;
224 struct page *p; 294 struct page *p;
295#if L2_USER_PGTABLE_ORDER > 0
296 int i;
297#endif
225 298
226#ifdef CONFIG_HIGHPTE 299#ifdef CONFIG_HIGHPTE
227 flags |= __GFP_HIGHMEM; 300 flags |= __GFP_HIGHMEM;
@@ -231,6 +304,18 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
231 if (p == NULL) 304 if (p == NULL)
232 return NULL; 305 return NULL;
233 306
307#if L2_USER_PGTABLE_ORDER > 0
308 /*
309 * Make every page have a page_count() of one, not just the first.
310 * We don't use __GFP_COMP since it doesn't look like it works
311 * correctly with tlb_remove_page().
312 */
313 for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) {
314 init_page_count(p+i);
315 inc_zone_page_state(p+i, NR_PAGETABLE);
316 }
317#endif
318
234 pgtable_page_ctor(p); 319 pgtable_page_ctor(p);
235 return p; 320 return p;
236} 321}
@@ -242,8 +327,15 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address)
242 */ 327 */
243void pte_free(struct mm_struct *mm, struct page *p) 328void pte_free(struct mm_struct *mm, struct page *p)
244{ 329{
330 int i;
331
245 pgtable_page_dtor(p); 332 pgtable_page_dtor(p);
246 __free_pages(p, L2_USER_PGTABLE_ORDER); 333 __free_page(p);
334
335 for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) {
336 __free_page(p+i);
337 dec_zone_page_state(p+i, NR_PAGETABLE);
338 }
247} 339}
248 340
249void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte, 341void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
@@ -252,8 +344,12 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
252 int i; 344 int i;
253 345
254 pgtable_page_dtor(pte); 346 pgtable_page_dtor(pte);
255 for (i = 0; i < L2_USER_PGTABLE_PAGES; ++i) 347 tlb_remove_page(tlb, pte);
348
349 for (i = 1; i < L2_USER_PGTABLE_PAGES; ++i) {
256 tlb_remove_page(tlb, pte + i); 350 tlb_remove_page(tlb, pte + i);
351 dec_zone_page_state(pte + i, NR_PAGETABLE);
352 }
257} 353}
258 354
259#ifndef __tilegx__ 355#ifndef __tilegx__
@@ -335,35 +431,51 @@ int get_remote_cache_cpu(pgprot_t prot)
335 return x + y * smp_width; 431 return x + y * smp_width;
336} 432}
337 433
338void set_pte_order(pte_t *ptep, pte_t pte, int order) 434/*
435 * Convert a kernel VA to a PA and homing information.
436 */
437int va_to_cpa_and_pte(void *va, unsigned long long *cpa, pte_t *pte)
339{ 438{
340 unsigned long pfn = pte_pfn(pte); 439 struct page *page = virt_to_page(va);
341 struct page *page = pfn_to_page(pfn); 440 pte_t null_pte = { 0 };
342 441
343 /* Update the home of a PTE if necessary */ 442 *cpa = __pa(va);
344 pte = pte_set_home(pte, page_home(page)); 443
444 /* Note that this is not writing a page table, just returning a pte. */
445 *pte = pte_set_home(null_pte, page_home(page));
446
447 return 0; /* return non-zero if not hfh? */
448}
449EXPORT_SYMBOL(va_to_cpa_and_pte);
345 450
451void __set_pte(pte_t *ptep, pte_t pte)
452{
346#ifdef __tilegx__ 453#ifdef __tilegx__
347 *ptep = pte; 454 *ptep = pte;
348#else 455#else
349 /* 456# if HV_PTE_INDEX_PRESENT >= 32 || HV_PTE_INDEX_MIGRATING >= 32
350 * When setting a PTE, write the high bits first, then write 457# error Must write the present and migrating bits last
351 * the low bits. This sets the "present" bit only after the 458# endif
352 * other bits are in place. If a particular PTE update 459 if (pte_present(pte)) {
353 * involves transitioning from one valid PTE to another, it 460 ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);
354 * may be necessary to call set_pte_order() more than once, 461 barrier();
355 * transitioning via a suitable intermediate state. 462 ((u32 *)ptep)[0] = (u32)(pte_val(pte));
356 * Note that this sequence also means that if we are transitioning 463 } else {
357 * from any migrating PTE to a non-migrating one, we will not 464 ((u32 *)ptep)[0] = (u32)(pte_val(pte));
358 * see a half-updated PTE with the migrating bit off. 465 barrier();
359 */ 466 ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32);
360#if HV_PTE_INDEX_PRESENT >= 32 || HV_PTE_INDEX_MIGRATING >= 32 467 }
361# error Must write the present and migrating bits last 468#endif /* __tilegx__ */
362#endif 469}
363 ((u32 *)ptep)[1] = (u32)(pte_val(pte) >> 32); 470
364 barrier(); 471void set_pte(pte_t *ptep, pte_t pte)
365 ((u32 *)ptep)[0] = (u32)(pte_val(pte)); 472{
366#endif 473 struct page *page = pfn_to_page(pte_pfn(pte));
474
475 /* Update the home of a PTE if necessary */
476 pte = pte_set_home(pte, page_home(page));
477
478 __set_pte(ptep, pte);
367} 479}
368 480
369/* Can this mm load a PTE with cached_priority set? */ 481/* Can this mm load a PTE with cached_priority set? */