1 files changed, 82 insertions, 103 deletions
diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c
index f36799b7152c..7fbe1e0cd105 100644
--- a/arch/sparc64/mm/tsb.c
+++ b/arch/sparc64/mm/tsb.c
@@ -48,11 +48,15 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end)
 void flush_tsb_user(struct mmu_gather *mp)
 {
        struct mm_struct *mm = mp->mm;
-        struct tsb *tsb = mm->context.tsb;
+        unsigned long nentries, base, flags;
-        unsigned long nentries = mm->context.tsb_nentries;
+        struct tsb *tsb;
-        unsigned long base;
        int i;
+        spin_lock_irqsave(&mm->context.lock, flags);
+        tsb = mm->context.tsb;
+        nentries = mm->context.tsb_nentries;
        if (tlb_type == cheetah_plus || tlb_type == hypervisor)
                base = __pa(tsb);
        else
@@ -70,6 +74,8 @@ void flush_tsb_user(struct mmu_gather *mp)
                tsb_flush(ent, tag);
        }
+        spin_unlock_irqrestore(&mm->context.lock, flags);
 }
 static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes)
@@ -201,86 +207,9 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes)
        }
 }
-/* The page tables are locked against modifications while this
- * runs.
- *
- * XXX do some prefetching...
- */
-static void copy_tsb(struct tsb *old_tsb, unsigned long old_size,
-                     struct tsb *new_tsb, unsigned long new_size)
-{
-        unsigned long old_nentries = old_size / sizeof(struct tsb);
-        unsigned long new_nentries = new_size / sizeof(struct tsb);
-        unsigned long i;
-        for (i = 0; i < old_nentries; i++) {
-                register unsigned long tag asm("o4");
-                register unsigned long pte asm("o5");
-                unsigned long v, hash;
-                if (tlb_type == hypervisor) {
-                        __asm__ __volatile__(
-                                "ldda [%2] %3, %0"
-                                : "=r" (tag), "=r" (pte)
-                                : "r" (__pa(&old_tsb[i])),
-                                  "i" (ASI_QUAD_LDD_PHYS_4V));
-                } else if (tlb_type == cheetah_plus) {
-                        __asm__ __volatile__(
-                                "ldda [%2] %3, %0"
-                                : "=r" (tag), "=r" (pte)
-                                : "r" (__pa(&old_tsb[i])),
-                                  "i" (ASI_QUAD_LDD_PHYS));
-                } else {
-                        __asm__ __volatile__(
-                                "ldda [%2] %3, %0"
-                                : "=r" (tag), "=r" (pte)
-                                : "r" (&old_tsb[i]),
-                                  "i" (ASI_NUCLEUS_QUAD_LDD));
-                }
-                if (tag & ((1UL << TSB_TAG_LOCK_BIT) |
-                           (1UL << TSB_TAG_INVALID_BIT)))
-                        continue;
-                /* We only put base page size PTEs into the TSB,
-                 * but that might change in the future.  This code
-                 * would need to be changed if we start putting larger
-                 * page size PTEs into there.
-                 */
-                WARN_ON((pte & _PAGE_ALL_SZ_BITS) != _PAGE_SZBITS);
-                /* The tag holds bits 22 to 63 of the virtual address
-                 * and the context.  Clear out the context, and shift
-                 * up to make a virtual address.
-                 */
-                v = (tag & ((1UL << 42UL) - 1UL)) << 22UL;
-                /* The implied bits of the tag (bits 13 to 21) are
-                 * determined by the TSB entry index, so fill that in.
-                 */
-                v |= (i & (512UL - 1UL)) << 13UL;
-                hash = tsb_hash(v, new_nentries);
-                if (tlb_type == cheetah_plus ||
-                    tlb_type == hypervisor) {
-                        __asm__ __volatile__(
-                                "stxa   %0, [%1] %2\n\t"
-                                "stxa   %3, [%4] %2"
-                                : /* no outputs */
-                                : "r" (tag),
-                                  "r" (__pa(&new_tsb[hash].tag)),
-                                  "i" (ASI_PHYS_USE_EC),
-                                  "r" (pte),
-                                  "r" (__pa(&new_tsb[hash].pte)));
-                } else {
-                        new_tsb[hash].tag = tag;
-                        new_tsb[hash].pte = pte;
-                }
-        }
-}
 /* When the RSS of an address space exceeds mm->context.tsb_rss_limit,
- * update_mmu_cache() invokes this routine to try and grow the TSB.
+ * do_sparc64_fault() invokes this routine to try and grow the TSB.
+ *
 * When we reach the maximum TSB size supported, we stick ~0UL into
 * mm->context.tsb_rss_limit so the grow checks in update_mmu_cache()
 * will not trigger any longer.
@@ -293,12 +222,12 @@ static void copy_tsb(struct tsb *old_tsb, unsigned long old_size,
 * the number of entries that the current TSB can hold at once.  Currently,
 * we trigger when the RSS hits 3/4 of the TSB capacity.
 */
-void tsb_grow(struct mm_struct *mm, unsigned long rss, gfp_t gfp_flags)
+void tsb_grow(struct mm_struct *mm, unsigned long rss)
 {
        unsigned long max_tsb_size = 1 * 1024 * 1024;
-        unsigned long size, old_size;
+        unsigned long size, old_size, flags;
        struct page *page;
-        struct tsb *old_tsb;
+        struct tsb *old_tsb, *new_tsb;
        if (max_tsb_size > (PAGE_SIZE << MAX_ORDER))
                max_tsb_size = (PAGE_SIZE << MAX_ORDER);
@@ -311,12 +240,51 @@ void tsb_grow(struct mm_struct *mm, unsigned long rss, gfp_t gfp_flags)
                        break;
        }
-        page = alloc_pages(gfp_flags, get_order(size));
+        page = alloc_pages(GFP_KERNEL, get_order(size));
        if (unlikely(!page))
                return;
        /* Mark all tags as invalid.  */
-        memset(page_address(page), 0x40, size);
+        new_tsb = page_address(page);
+        memset(new_tsb, 0x40, size);
+        /* Ok, we are about to commit the changes.  If we are
+         * growing an existing TSB the locking is very tricky,
+         * so WATCH OUT!
+         *
+         * We have to hold mm->context.lock while committing to the
+         * new TSB, this synchronizes us with processors in
+         * flush_tsb_user() and switch_mm() for this address space.
+         *
+         * But even with that lock held, processors run asynchronously
+         * accessing the old TSB via TLB miss handling.  This is OK
+         * because those actions are just propagating state from the
+         * Linux page tables into the TSB, page table mappings are not
+         * being changed.  If a real fault occurs, the processor will
+         * synchronize with us when it hits flush_tsb_user(), this is
+         * also true for the case where vmscan is modifying the page
+         * tables.  The only thing we need to be careful with is to
+         * skip any locked TSB entries during copy_tsb().
+         *
+         * When we finish committing to the new TSB, we have to drop
+         * the lock and ask all other cpus running this address space
+         * to run tsb_context_switch() to see the new TSB table.
+         */
+        spin_lock_irqsave(&mm->context.lock, flags);
+        old_tsb = mm->context.tsb;
+        old_size = mm->context.tsb_nentries * sizeof(struct tsb);
+        /* Handle multiple threads trying to grow the TSB at the same time.
+         * One will get in here first, and bump the size and the RSS limit.
+         * The others will get in here next and hit this check.
+         */
+        if (unlikely(old_tsb && (rss < mm->context.tsb_rss_limit))) {
+                spin_unlock_irqrestore(&mm->context.lock, flags);
+                free_pages((unsigned long) new_tsb, get_order(size));
+                return;
+        }
        if (size == max_tsb_size)
                mm->context.tsb_rss_limit = ~0UL;
@@ -324,30 +292,37 @@ void tsb_grow(struct mm_struct *mm, unsigned long rss, gfp_t gfp_flags)
                mm->context.tsb_rss_limit =
                        ((size / sizeof(struct tsb)) * 3) / 4;
-        old_tsb = mm->context.tsb;
+        if (old_tsb) {
-        old_size = mm->context.tsb_nentries * sizeof(struct tsb);
+                extern void copy_tsb(unsigned long old_tsb_base,
+                                     unsigned long old_tsb_size,
-        if (old_tsb)
+                                     unsigned long new_tsb_base,
-                copy_tsb(old_tsb, old_size, page_address(page), size);
+                                     unsigned long new_tsb_size);
+                unsigned long old_tsb_base = (unsigned long) old_tsb;
+                unsigned long new_tsb_base = (unsigned long) new_tsb;
+                if (tlb_type == cheetah_plus || tlb_type == hypervisor) {
+                        old_tsb_base = __pa(old_tsb_base);
+                        new_tsb_base = __pa(new_tsb_base);
+                }
+                copy_tsb(old_tsb_base, old_size, new_tsb_base, size);
+        }
-        mm->context.tsb = page_address(page);
+        mm->context.tsb = new_tsb;
        setup_tsb_params(mm, size);
+        spin_unlock_irqrestore(&mm->context.lock, flags);
        /* If old_tsb is NULL, we're being invoked for the first time
         * from init_new_context().
         */
        if (old_tsb) {
-                /* Now force all other processors to reload the new
+                /* Reload it on the local cpu.  */
-                 * TSB state.
-                 */
-                smp_tsb_sync(mm);
-                /* Finally reload it on the local cpu.  No further
-                 * references will remain to the old TSB and we can
-                 * thus free it up.
-                 */
                tsb_context_switch(mm);
+                /* Now force other processors to do the same.  */
+                smp_tsb_sync(mm);
+                /* Now it is safe to free the old tsb.  */
                free_pages((unsigned long) old_tsb, get_order(old_size));
        }
 }
@@ -363,7 +338,11 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
         * will be confused and think there is an older TSB to free up.
         */
        mm->context.tsb = NULL;
-        tsb_grow(mm, 0, GFP_KERNEL);
+        /* If this is fork, inherit the parent's TSB size.  We would
+         * grow it to that size on the first page fault anyways.
+         */
+        tsb_grow(mm, get_mm_rss(mm));
        if (unlikely(!mm->context.tsb))
                return -ENOMEM;

diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c index f36799b7152c..7fbe1e0cd105 100644 --- a/arch/sparc64/mm/tsb.c +++ b/arch/sparc64/mm/tsb.c
@@ -48,11 +48,15 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end)
48	void flush_tsb_user(struct mmu_gather *mp)	48	void flush_tsb_user(struct mmu_gather *mp)
49	{	49	{
50	struct mm_struct *mm = mp->mm;	50	struct mm_struct *mm = mp->mm;
51	struct tsb *tsb = mm->context.tsb;	51	unsigned long nentries, base, flags;
52	unsigned long nentries = mm->context.tsb_nentries;	52	struct tsb *tsb;
53	unsigned long base;
54	int i;	53	int i;
55		54
		55	spin_lock_irqsave(&mm->context.lock, flags);
		56
		57	tsb = mm->context.tsb;
		58	nentries = mm->context.tsb_nentries;
		59
56	if (tlb_type == cheetah_plus \|\| tlb_type == hypervisor)	60	if (tlb_type == cheetah_plus \|\| tlb_type == hypervisor)
57	base = __pa(tsb);	61	base = __pa(tsb);
58	else	62	else
@@ -70,6 +74,8 @@ void flush_tsb_user(struct mmu_gather *mp)
70		74
71	tsb_flush(ent, tag);	75	tsb_flush(ent, tag);
72	}	76	}
		77
		78	spin_unlock_irqrestore(&mm->context.lock, flags);
73	}	79	}
74		80
75	static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes)	81	static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes)
@@ -201,86 +207,9 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes)
201	}	207	}
202	}	208	}
203		209
204	/* The page tables are locked against modifications while this
205	* runs.
206	*
207	* XXX do some prefetching...
208	*/
209	static void copy_tsb(struct tsb *old_tsb, unsigned long old_size,
210	struct tsb *new_tsb, unsigned long new_size)
211	{
212	unsigned long old_nentries = old_size / sizeof(struct tsb);
213	unsigned long new_nentries = new_size / sizeof(struct tsb);
214	unsigned long i;
215
216	for (i = 0; i < old_nentries; i++) {
217	register unsigned long tag asm("o4");
218	register unsigned long pte asm("o5");
219	unsigned long v, hash;
220
221	if (tlb_type == hypervisor) {
222	__asm__ __volatile__(
223	"ldda [%2] %3, %0"
224	: "=r" (tag), "=r" (pte)
225	: "r" (__pa(&old_tsb[i])),
226	"i" (ASI_QUAD_LDD_PHYS_4V));
227	} else if (tlb_type == cheetah_plus) {
228	__asm__ __volatile__(
229	"ldda [%2] %3, %0"
230	: "=r" (tag), "=r" (pte)
231	: "r" (__pa(&old_tsb[i])),
232	"i" (ASI_QUAD_LDD_PHYS));
233	} else {
234	__asm__ __volatile__(
235	"ldda [%2] %3, %0"
236	: "=r" (tag), "=r" (pte)
237	: "r" (&old_tsb[i]),
238	"i" (ASI_NUCLEUS_QUAD_LDD));
239	}
240
241	if (tag & ((1UL << TSB_TAG_LOCK_BIT) \|
242	(1UL << TSB_TAG_INVALID_BIT)))
243	continue;
244
245	/* We only put base page size PTEs into the TSB,
246	* but that might change in the future. This code
247	* would need to be changed if we start putting larger
248	* page size PTEs into there.
249	*/
250	WARN_ON((pte & _PAGE_ALL_SZ_BITS) != _PAGE_SZBITS);
251
252	/* The tag holds bits 22 to 63 of the virtual address
253	* and the context. Clear out the context, and shift
254	* up to make a virtual address.
255	*/
256	v = (tag & ((1UL << 42UL) - 1UL)) << 22UL;
257
258	/* The implied bits of the tag (bits 13 to 21) are
259	* determined by the TSB entry index, so fill that in.
260	*/
261	v \|= (i & (512UL - 1UL)) << 13UL;
262
263	hash = tsb_hash(v, new_nentries);
264	if (tlb_type == cheetah_plus \|\|
265	tlb_type == hypervisor) {
266	__asm__ __volatile__(
267	"stxa %0, [%1] %2\n\t"
268	"stxa %3, [%4] %2"
269	: /* no outputs */
270	: "r" (tag),
271	"r" (__pa(&new_tsb[hash].tag)),
272	"i" (ASI_PHYS_USE_EC),
273	"r" (pte),
274	"r" (__pa(&new_tsb[hash].pte)));
275	} else {
276	new_tsb[hash].tag = tag;
277	new_tsb[hash].pte = pte;
278	}
279	}
280	}
281
282	/* When the RSS of an address space exceeds mm->context.tsb_rss_limit,	210	/* When the RSS of an address space exceeds mm->context.tsb_rss_limit,
283	* update_mmu_cache() invokes this routine to try and grow the TSB.	211	* do_sparc64_fault() invokes this routine to try and grow the TSB.
		212	*
284	* When we reach the maximum TSB size supported, we stick ~0UL into	213	* When we reach the maximum TSB size supported, we stick ~0UL into
285	* mm->context.tsb_rss_limit so the grow checks in update_mmu_cache()	214	* mm->context.tsb_rss_limit so the grow checks in update_mmu_cache()
286	* will not trigger any longer.	215	* will not trigger any longer.
@@ -293,12 +222,12 @@ static void copy_tsb(struct tsb *old_tsb, unsigned long old_size,
293	* the number of entries that the current TSB can hold at once. Currently,	222	* the number of entries that the current TSB can hold at once. Currently,
294	* we trigger when the RSS hits 3/4 of the TSB capacity.	223	* we trigger when the RSS hits 3/4 of the TSB capacity.
295	*/	224	*/
296	void tsb_grow(struct mm_struct *mm, unsigned long rss, gfp_t gfp_flags)	225	void tsb_grow(struct mm_struct *mm, unsigned long rss)
297	{	226	{
298	unsigned long max_tsb_size = 1 * 1024 * 1024;	227	unsigned long max_tsb_size = 1 * 1024 * 1024;
299	unsigned long size, old_size;	228	unsigned long size, old_size, flags;
300	struct page *page;	229	struct page *page;
301	struct tsb *old_tsb;	230	struct tsb old_tsb, new_tsb;
302		231
303	if (max_tsb_size > (PAGE_SIZE << MAX_ORDER))	232	if (max_tsb_size > (PAGE_SIZE << MAX_ORDER))
304	max_tsb_size = (PAGE_SIZE << MAX_ORDER);	233	max_tsb_size = (PAGE_SIZE << MAX_ORDER);
@@ -311,12 +240,51 @@ void tsb_grow(struct mm_struct *mm, unsigned long rss, gfp_t gfp_flags)
311	break;	240	break;
312	}	241	}
313		242
314	page = alloc_pages(gfp_flags, get_order(size));	243	page = alloc_pages(GFP_KERNEL, get_order(size));
315	if (unlikely(!page))	244	if (unlikely(!page))
316	return;	245	return;
317		246
318	/* Mark all tags as invalid. */	247	/* Mark all tags as invalid. */
319	memset(page_address(page), 0x40, size);	248	new_tsb = page_address(page);
		249	memset(new_tsb, 0x40, size);
		250
		251	/* Ok, we are about to commit the changes. If we are
		252	* growing an existing TSB the locking is very tricky,
		253	* so WATCH OUT!
		254	*
		255	* We have to hold mm->context.lock while committing to the
		256	* new TSB, this synchronizes us with processors in
		257	* flush_tsb_user() and switch_mm() for this address space.
		258	*
		259	* But even with that lock held, processors run asynchronously
		260	* accessing the old TSB via TLB miss handling. This is OK
		261	* because those actions are just propagating state from the
		262	* Linux page tables into the TSB, page table mappings are not
		263	* being changed. If a real fault occurs, the processor will
		264	* synchronize with us when it hits flush_tsb_user(), this is
		265	* also true for the case where vmscan is modifying the page
		266	* tables. The only thing we need to be careful with is to
		267	* skip any locked TSB entries during copy_tsb().
		268	*
		269	* When we finish committing to the new TSB, we have to drop
		270	* the lock and ask all other cpus running this address space
		271	* to run tsb_context_switch() to see the new TSB table.
		272	*/
		273	spin_lock_irqsave(&mm->context.lock, flags);
		274
		275	old_tsb = mm->context.tsb;
		276	old_size = mm->context.tsb_nentries * sizeof(struct tsb);
		277
		278	/* Handle multiple threads trying to grow the TSB at the same time.
		279	* One will get in here first, and bump the size and the RSS limit.
		280	* The others will get in here next and hit this check.
		281	*/
		282	if (unlikely(old_tsb && (rss < mm->context.tsb_rss_limit))) {
		283	spin_unlock_irqrestore(&mm->context.lock, flags);
		284
		285	free_pages((unsigned long) new_tsb, get_order(size));
		286	return;
		287	}
320		288
321	if (size == max_tsb_size)	289	if (size == max_tsb_size)
322	mm->context.tsb_rss_limit = ~0UL;	290	mm->context.tsb_rss_limit = ~0UL;
@@ -324,30 +292,37 @@ void tsb_grow(struct mm_struct *mm, unsigned long rss, gfp_t gfp_flags)
324	mm->context.tsb_rss_limit =	292	mm->context.tsb_rss_limit =
325	((size / sizeof(struct tsb)) * 3) / 4;	293	((size / sizeof(struct tsb)) * 3) / 4;
326		294
327	old_tsb = mm->context.tsb;	295	if (old_tsb) {
328	old_size = mm->context.tsb_nentries * sizeof(struct tsb);	296	extern void copy_tsb(unsigned long old_tsb_base,
329		297	unsigned long old_tsb_size,
330	if (old_tsb)	298	unsigned long new_tsb_base,
331	copy_tsb(old_tsb, old_size, page_address(page), size);	299	unsigned long new_tsb_size);
		300	unsigned long old_tsb_base = (unsigned long) old_tsb;
		301	unsigned long new_tsb_base = (unsigned long) new_tsb;
		302
		303	if (tlb_type == cheetah_plus \|\| tlb_type == hypervisor) {
		304	old_tsb_base = __pa(old_tsb_base);
		305	new_tsb_base = __pa(new_tsb_base);
		306	}
		307	copy_tsb(old_tsb_base, old_size, new_tsb_base, size);
		308	}
332		309
333	mm->context.tsb = page_address(page);	310	mm->context.tsb = new_tsb;
334	setup_tsb_params(mm, size);	311	setup_tsb_params(mm, size);
335		312
		313	spin_unlock_irqrestore(&mm->context.lock, flags);
		314
336	/* If old_tsb is NULL, we're being invoked for the first time	315	/* If old_tsb is NULL, we're being invoked for the first time
337	* from init_new_context().	316	* from init_new_context().
338	*/	317	*/
339	if (old_tsb) {	318	if (old_tsb) {
340	/* Now force all other processors to reload the new	319	/* Reload it on the local cpu. */
341	* TSB state.
342	*/
343	smp_tsb_sync(mm);
344
345	/* Finally reload it on the local cpu. No further
346	* references will remain to the old TSB and we can
347	* thus free it up.
348	*/
349	tsb_context_switch(mm);	320	tsb_context_switch(mm);
350		321
		322	/* Now force other processors to do the same. */
		323	smp_tsb_sync(mm);
		324
		325	/* Now it is safe to free the old tsb. */
351	free_pages((unsigned long) old_tsb, get_order(old_size));	326	free_pages((unsigned long) old_tsb, get_order(old_size));
352	}	327	}
353	}	328	}
@@ -363,7 +338,11 @@ int init_new_context(struct task_struct tsk, struct mm_struct mm)
363	* will be confused and think there is an older TSB to free up.	338	* will be confused and think there is an older TSB to free up.
364	*/	339	*/
365	mm->context.tsb = NULL;	340	mm->context.tsb = NULL;
366	tsb_grow(mm, 0, GFP_KERNEL);	341
		342	/* If this is fork, inherit the parent's TSB size. We would
		343	* grow it to that size on the first page fault anyways.
		344	*/
		345	tsb_grow(mm, get_mm_rss(mm));
367		346
368	if (unlikely(!mm->context.tsb))	347	if (unlikely(!mm->context.tsb))
369	return -ENOMEM;	348	return -ENOMEM;