3 files changed, 176 insertions, 10 deletions
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 8c245859d212..3c14b549cf91 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -581,11 +581,11 @@ extern unsigned long xcall_call_function;
 * You must not call this function with disabled interrupts or from a
 * hardware interrupt handler or from a bottom half handler.
 */
-int smp_call_function(void (*func)(void *info), void *info,
+static int smp_call_function_mask(void (*func)(void *info), void *info,
-                      int nonatomic, int wait)
+                                  int nonatomic, int wait, cpumask_t mask)
 {
        struct call_data_struct data;
-        int cpus = num_online_cpus() - 1;
+        int cpus = cpus_weight(mask) - 1;
        long timeout;
        if (!cpus)
@@ -603,7 +603,7 @@ int smp_call_function(void (*func)(void *info), void *info,
        call_data = &data;
-        smp_cross_call(&xcall_call_function, 0, 0, 0);
+        smp_cross_call_masked(&xcall_call_function, 0, 0, 0, mask);
        /* 
         * Wait for other cpus to complete function or at
@@ -629,6 +629,13 @@ out_timeout:
        return 0;
 }
+int smp_call_function(void (*func)(void *info), void *info,
+                      int nonatomic, int wait)
+{
+        return smp_call_function_mask(func, info, nonatomic, wait,
+                                      cpu_online_map);
+}
 void smp_call_function_client(int irq, struct pt_regs *regs)
 {
        void (*func) (void *info) = call_data->func;
@@ -646,6 +653,19 @@ void smp_call_function_client(int irq, struct pt_regs *regs)
        }
 }
+static void tsb_sync(void *info)
+{
+        struct mm_struct *mm = info;
+        if (current->active_mm == mm)
+                tsb_context_switch(mm);
+}
+void smp_tsb_sync(struct mm_struct *mm)
+{
+        smp_call_function_mask(tsb_sync, mm, 0, 1, mm->cpu_vm_mask);
+}
 extern unsigned long xcall_flush_tlb_mm;
 extern unsigned long xcall_flush_tlb_pending;
 extern unsigned long xcall_flush_tlb_kernel_range;
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 7c456afaa9a5..a8119cb4fa32 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -246,9 +246,11 @@ static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long c
 void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte)
 {
+        struct mm_struct *mm;
        struct page *page;
        unsigned long pfn;
        unsigned long pg_flags;
+        unsigned long mm_rss;
        pfn = pte_pfn(pte);
        if (pfn_valid(pfn) &&
@@ -270,6 +272,11 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t p
                put_cpu();
        }
+        mm = vma->vm_mm;
+        mm_rss = get_mm_rss(mm);
+        if (mm_rss >= mm->context.tsb_rss_limit)
+                tsb_grow(mm, mm_rss, GFP_ATOMIC);
 }
 void flush_dcache_page(struct page *page)
diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c
index dfe7144fcdf6..707af4b84a0e 100644
--- a/arch/sparc64/mm/tsb.c
+++ b/arch/sparc64/mm/tsb.c
@@ -10,6 +10,7 @@
 #include <asm/tlb.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
+#include <asm/tsb.h>
 /* We use an 8K TSB for the whole kernel, this allows to
 * handle about 4MB of modules and vmalloc mappings without
@@ -146,6 +147,9 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes)
                tte |= _PAGE_SZ4MB;
                page_sz = 4 * 1024 * 1024;
                break;
+        default:
+                BUG();
        };
        tsb_reg |= base;
@@ -157,23 +161,158 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes)
        mm->context.tsb_map_pte = tte;
 }
+/* The page tables are locked against modifications while this
+ * runs.
+ *
+ * XXX do some prefetching...
+ */
+static void copy_tsb(struct tsb *old_tsb, unsigned long old_size,
+                     struct tsb *new_tsb, unsigned long new_size)
+{
+        unsigned long old_nentries = old_size / sizeof(struct tsb);
+        unsigned long new_nentries = new_size / sizeof(struct tsb);
+        unsigned long i;
+        for (i = 0; i < old_nentries; i++) {
+                register unsigned long tag asm("o4");
+                register unsigned long pte asm("o5");
+                unsigned long v;
+                unsigned int hash;
+                __asm__ __volatile__(
+                        "ldda [%2] %3, %0"
+                        : "=r" (tag), "=r" (pte)
+                        : "r" (&old_tsb[i]), "i" (ASI_NUCLEUS_QUAD_LDD));
+                if (!tag || (tag & TSB_TAG_LOCK))
+                        continue;
+                /* We only put base page size PTEs into the TSB,
+                 * but that might change in the future.  This code
+                 * would need to be changed if we start putting larger
+                 * page size PTEs into there.
+                 */
+                WARN_ON((pte & _PAGE_ALL_SZ_BITS) != _PAGE_SZBITS);
+                /* The tag holds bits 22 to 63 of the virtual address
+                 * and the context.  Clear out the context, and shift
+                 * up to make a virtual address.
+                 */
+                v = (tag & ((1UL << 42UL) - 1UL)) << 22UL;
+                /* The implied bits of the tag (bits 13 to 21) are
+                 * determined by the TSB entry index, so fill that in.
+                 */
+                v |= (i & (512UL - 1UL)) << 13UL;
+                hash = tsb_hash(v, new_nentries);
+                new_tsb[hash].tag = tag;
+                new_tsb[hash].pte = pte;
+        }
+}
+/* When the RSS of an address space exceeds mm->context.tsb_rss_limit,
+ * update_mmu_cache() invokes this routine to try and grow the TSB.
+ * When we reach the maximum TSB size supported, we stick ~0UL into
+ * mm->context.tsb_rss_limit so the grow checks in update_mmu_cache()
+ * will not trigger any longer.
+ *
+ * The TSB can be anywhere from 8K to 1MB in size, in increasing powers
+ * of two.  The TSB must be aligned to it's size, so f.e. a 512K TSB
+ * must be 512K aligned.
+ *
+ * The idea here is to grow the TSB when the RSS of the process approaches
+ * the number of entries that the current TSB can hold at once.  Currently,
+ * we trigger when the RSS hits 3/4 of the TSB capacity.
+ */
+void tsb_grow(struct mm_struct *mm, unsigned long rss, gfp_t gfp_flags)
+{
+        unsigned long max_tsb_size = 1 * 1024 * 1024;
+        unsigned long size, old_size;
+        struct page *page;
+        struct tsb *old_tsb;
+        if (max_tsb_size > (PAGE_SIZE << MAX_ORDER))
+                max_tsb_size = (PAGE_SIZE << MAX_ORDER);
+        for (size = PAGE_SIZE; size < max_tsb_size; size <<= 1UL) {
+                unsigned long n_entries = size / sizeof(struct tsb);
+                n_entries = (n_entries * 3) / 4;
+                if (n_entries > rss)
+                        break;
+        }
+        page = alloc_pages(gfp_flags | __GFP_ZERO, get_order(size));
+        if (unlikely(!page))
+                return;
+        if (size == max_tsb_size)
+                mm->context.tsb_rss_limit = ~0UL;
+        else
+                mm->context.tsb_rss_limit =
+                        ((size / sizeof(struct tsb)) * 3) / 4;
+        old_tsb = mm->context.tsb;
+        old_size = mm->context.tsb_nentries * sizeof(struct tsb);
+        if (old_tsb)
+                copy_tsb(old_tsb, old_size, page_address(page), size);
+        mm->context.tsb = page_address(page);
+        setup_tsb_params(mm, size);
+        /* If old_tsb is NULL, we're being invoked for the first time
+         * from init_new_context().
+         */
+        if (old_tsb) {
+                /* Now force all other processors to reload the new
+                 * TSB state.
+                 */
+                smp_tsb_sync(mm);
+                /* Finally reload it on the local cpu.  No further
+                 * references will remain to the old TSB and we can
+                 * thus free it up.
+                 */
+                tsb_context_switch(mm);
+                free_pages((unsigned long) old_tsb, get_order(old_size));
+        }
+}
 int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
 {
-        unsigned long page = get_zeroed_page(GFP_KERNEL);
+        unsigned long initial_rss;
        mm->context.sparc64_ctx_val = 0UL;
-        if (unlikely(!page))
-                return -ENOMEM;
-        mm->context.tsb = (struct tsb *) page;
+        /* copy_mm() copies over the parent's mm_struct before calling
-        setup_tsb_params(mm, PAGE_SIZE);
+         * us, so we need to zero out the TSB pointer or else tsb_grow()
+         * will be confused and think there is an older TSB to free up.
+         */
+        mm->context.tsb = NULL;
+        /* If this is fork, inherit the parent's TSB size.  We would
+         * grow it to that size on the first page fault anyways.
+         */
+        initial_rss = mm->context.tsb_nentries;
+        if (initial_rss)
+                initial_rss -= 1;
+        tsb_grow(mm, initial_rss, GFP_KERNEL);
+        if (unlikely(!mm->context.tsb))
+                return -ENOMEM;
        return 0;
 }
 void destroy_context(struct mm_struct *mm)
 {
-        free_page((unsigned long) mm->context.tsb);
+        unsigned long size = mm->context.tsb_nentries * sizeof(struct tsb);
+        free_pages((unsigned long) mm->context.tsb, get_order(size));
        /* We can remove these later, but for now it's useful
         * to catch any bogus post-destroy_context() references

diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index 8c245859d212..3c14b549cf91 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c
@@ -581,11 +581,11 @@ extern unsigned long xcall_call_function;
581	* You must not call this function with disabled interrupts or from a	581	* You must not call this function with disabled interrupts or from a
582	* hardware interrupt handler or from a bottom half handler.	582	* hardware interrupt handler or from a bottom half handler.
583	*/	583	*/
584	int smp_call_function(void (func)(void info), void *info,	584	static int smp_call_function_mask(void (func)(void info), void *info,
585	int nonatomic, int wait)	585	int nonatomic, int wait, cpumask_t mask)
586	{	586	{
587	struct call_data_struct data;	587	struct call_data_struct data;
588	int cpus = num_online_cpus() - 1;	588	int cpus = cpus_weight(mask) - 1;
589	long timeout;	589	long timeout;
590		590
591	if (!cpus)	591	if (!cpus)
@@ -603,7 +603,7 @@ int smp_call_function(void (func)(void info), void *info,
603		603
604	call_data = &data;	604	call_data = &data;
605		605
606	smp_cross_call(&xcall_call_function, 0, 0, 0);	606	smp_cross_call_masked(&xcall_call_function, 0, 0, 0, mask);
607		607
608	/*	608	/*
609	* Wait for other cpus to complete function or at	609	* Wait for other cpus to complete function or at
@@ -629,6 +629,13 @@ out_timeout:
629	return 0;	629	return 0;
630	}	630	}
631		631
		632	int smp_call_function(void (func)(void info), void *info,
		633	int nonatomic, int wait)
		634	{
		635	return smp_call_function_mask(func, info, nonatomic, wait,
		636	cpu_online_map);
		637	}
		638
632	void smp_call_function_client(int irq, struct pt_regs *regs)	639	void smp_call_function_client(int irq, struct pt_regs *regs)
633	{	640	{
634	void (func) (void info) = call_data->func;	641	void (func) (void info) = call_data->func;
@@ -646,6 +653,19 @@ void smp_call_function_client(int irq, struct pt_regs *regs)
646	}	653	}
647	}	654	}
648		655
		656	static void tsb_sync(void *info)
		657	{
		658	struct mm_struct *mm = info;
		659
		660	if (current->active_mm == mm)
		661	tsb_context_switch(mm);
		662	}
		663
		664	void smp_tsb_sync(struct mm_struct *mm)
		665	{
		666	smp_call_function_mask(tsb_sync, mm, 0, 1, mm->cpu_vm_mask);
		667	}
		668
649	extern unsigned long xcall_flush_tlb_mm;	669	extern unsigned long xcall_flush_tlb_mm;
650	extern unsigned long xcall_flush_tlb_pending;	670	extern unsigned long xcall_flush_tlb_pending;
651	extern unsigned long xcall_flush_tlb_kernel_range;	671	extern unsigned long xcall_flush_tlb_kernel_range;


diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c index 7c456afaa9a5..a8119cb4fa32 100644 --- a/arch/sparc64/mm/init.c +++ b/arch/sparc64/mm/init.c
@@ -246,9 +246,11 @@ static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long c
246		246
247	void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte)	247	void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte)
248	{	248	{
		249	struct mm_struct *mm;
249	struct page *page;	250	struct page *page;
250	unsigned long pfn;	251	unsigned long pfn;
251	unsigned long pg_flags;	252	unsigned long pg_flags;
		253	unsigned long mm_rss;
252		254
253	pfn = pte_pfn(pte);	255	pfn = pte_pfn(pte);
254	if (pfn_valid(pfn) &&	256	if (pfn_valid(pfn) &&
@@ -270,6 +272,11 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t p
270		272
271	put_cpu();	273	put_cpu();
272	}	274	}
		275
		276	mm = vma->vm_mm;
		277	mm_rss = get_mm_rss(mm);
		278	if (mm_rss >= mm->context.tsb_rss_limit)
		279	tsb_grow(mm, mm_rss, GFP_ATOMIC);
273	}	280	}
274		281
275	void flush_dcache_page(struct page *page)	282	void flush_dcache_page(struct page *page)


diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c index dfe7144fcdf6..707af4b84a0e 100644 --- a/arch/sparc64/mm/tsb.c +++ b/arch/sparc64/mm/tsb.c
@@ -10,6 +10,7 @@
10	#include <asm/tlb.h>	10	#include <asm/tlb.h>
11	#include <asm/mmu_context.h>	11	#include <asm/mmu_context.h>
12	#include <asm/pgtable.h>	12	#include <asm/pgtable.h>
		13	#include <asm/tsb.h>
13		14
14	/* We use an 8K TSB for the whole kernel, this allows to	15	/* We use an 8K TSB for the whole kernel, this allows to
15	* handle about 4MB of modules and vmalloc mappings without	16	* handle about 4MB of modules and vmalloc mappings without
@@ -146,6 +147,9 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes)
146	tte \|= _PAGE_SZ4MB;	147	tte \|= _PAGE_SZ4MB;
147	page_sz = 4 * 1024 * 1024;	148	page_sz = 4 * 1024 * 1024;
148	break;	149	break;
		150
		151	default:
		152	BUG();
149	};	153	};
150		154
151	tsb_reg \|= base;	155	tsb_reg \|= base;
@@ -157,23 +161,158 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes)
157	mm->context.tsb_map_pte = tte;	161	mm->context.tsb_map_pte = tte;
158	}	162	}
159		163
		164	/* The page tables are locked against modifications while this
		165	* runs.
		166	*
		167	* XXX do some prefetching...
		168	*/
		169	static void copy_tsb(struct tsb *old_tsb, unsigned long old_size,
		170	struct tsb *new_tsb, unsigned long new_size)
		171	{
		172	unsigned long old_nentries = old_size / sizeof(struct tsb);
		173	unsigned long new_nentries = new_size / sizeof(struct tsb);
		174	unsigned long i;
		175
		176	for (i = 0; i < old_nentries; i++) {
		177	register unsigned long tag asm("o4");
		178	register unsigned long pte asm("o5");
		179	unsigned long v;
		180	unsigned int hash;
		181
		182	__asm__ __volatile__(
		183	"ldda [%2] %3, %0"
		184	: "=r" (tag), "=r" (pte)
		185	: "r" (&old_tsb[i]), "i" (ASI_NUCLEUS_QUAD_LDD));
		186
		187	if (!tag \|\| (tag & TSB_TAG_LOCK))
		188	continue;
		189
		190	/* We only put base page size PTEs into the TSB,
		191	* but that might change in the future. This code
		192	* would need to be changed if we start putting larger
		193	* page size PTEs into there.
		194	*/
		195	WARN_ON((pte & _PAGE_ALL_SZ_BITS) != _PAGE_SZBITS);
		196
		197	/* The tag holds bits 22 to 63 of the virtual address
		198	* and the context. Clear out the context, and shift
		199	* up to make a virtual address.
		200	*/
		201	v = (tag & ((1UL << 42UL) - 1UL)) << 22UL;
		202
		203	/* The implied bits of the tag (bits 13 to 21) are
		204	* determined by the TSB entry index, so fill that in.
		205	*/
		206	v \|= (i & (512UL - 1UL)) << 13UL;
		207
		208	hash = tsb_hash(v, new_nentries);
		209	new_tsb[hash].tag = tag;
		210	new_tsb[hash].pte = pte;
		211	}
		212	}
		213
		214	/* When the RSS of an address space exceeds mm->context.tsb_rss_limit,
		215	* update_mmu_cache() invokes this routine to try and grow the TSB.
		216	* When we reach the maximum TSB size supported, we stick ~0UL into
		217	* mm->context.tsb_rss_limit so the grow checks in update_mmu_cache()
		218	* will not trigger any longer.
		219	*
		220	* The TSB can be anywhere from 8K to 1MB in size, in increasing powers
		221	* of two. The TSB must be aligned to it's size, so f.e. a 512K TSB
		222	* must be 512K aligned.
		223	*
		224	* The idea here is to grow the TSB when the RSS of the process approaches
		225	* the number of entries that the current TSB can hold at once. Currently,
		226	* we trigger when the RSS hits 3/4 of the TSB capacity.
		227	*/
		228	void tsb_grow(struct mm_struct *mm, unsigned long rss, gfp_t gfp_flags)
		229	{
		230	unsigned long max_tsb_size = 1 * 1024 * 1024;
		231	unsigned long size, old_size;
		232	struct page *page;
		233	struct tsb *old_tsb;
		234
		235	if (max_tsb_size > (PAGE_SIZE << MAX_ORDER))
		236	max_tsb_size = (PAGE_SIZE << MAX_ORDER);
		237
		238	for (size = PAGE_SIZE; size < max_tsb_size; size <<= 1UL) {
		239	unsigned long n_entries = size / sizeof(struct tsb);
		240
		241	n_entries = (n_entries * 3) / 4;
		242	if (n_entries > rss)
		243	break;
		244	}
		245
		246	page = alloc_pages(gfp_flags \| __GFP_ZERO, get_order(size));
		247	if (unlikely(!page))
		248	return;
		249
		250	if (size == max_tsb_size)
		251	mm->context.tsb_rss_limit = ~0UL;
		252	else
		253	mm->context.tsb_rss_limit =
		254	((size / sizeof(struct tsb)) * 3) / 4;
		255
		256	old_tsb = mm->context.tsb;
		257	old_size = mm->context.tsb_nentries * sizeof(struct tsb);
		258
		259	if (old_tsb)
		260	copy_tsb(old_tsb, old_size, page_address(page), size);
		261
		262	mm->context.tsb = page_address(page);
		263	setup_tsb_params(mm, size);
		264
		265	/* If old_tsb is NULL, we're being invoked for the first time
		266	* from init_new_context().
		267	*/
		268	if (old_tsb) {
		269	/* Now force all other processors to reload the new
		270	* TSB state.
		271	*/
		272	smp_tsb_sync(mm);
		273
		274	/* Finally reload it on the local cpu. No further
		275	* references will remain to the old TSB and we can
		276	* thus free it up.
		277	*/
		278	tsb_context_switch(mm);
		279
		280	free_pages((unsigned long) old_tsb, get_order(old_size));
		281	}
		282	}
		283
160	int init_new_context(struct task_struct tsk, struct mm_struct mm)	284	int init_new_context(struct task_struct tsk, struct mm_struct mm)
161	{	285	{
162	unsigned long page = get_zeroed_page(GFP_KERNEL);	286	unsigned long initial_rss;
163		287
164	mm->context.sparc64_ctx_val = 0UL;	288	mm->context.sparc64_ctx_val = 0UL;
165	if (unlikely(!page))
166	return -ENOMEM;
167		289
168	mm->context.tsb = (struct tsb *) page;	290	/* copy_mm() copies over the parent's mm_struct before calling
169	setup_tsb_params(mm, PAGE_SIZE);	291	* us, so we need to zero out the TSB pointer or else tsb_grow()
		292	* will be confused and think there is an older TSB to free up.
		293	*/
		294	mm->context.tsb = NULL;
		295
		296	/* If this is fork, inherit the parent's TSB size. We would
		297	* grow it to that size on the first page fault anyways.
		298	*/
		299	initial_rss = mm->context.tsb_nentries;
		300	if (initial_rss)
		301	initial_rss -= 1;
		302
		303	tsb_grow(mm, initial_rss, GFP_KERNEL);
		304
		305	if (unlikely(!mm->context.tsb))
		306	return -ENOMEM;
170		307
171	return 0;	308	return 0;
172	}	309	}
173		310
174	void destroy_context(struct mm_struct *mm)	311	void destroy_context(struct mm_struct *mm)
175	{	312	{
176	free_page((unsigned long) mm->context.tsb);	313	unsigned long size = mm->context.tsb_nentries * sizeof(struct tsb);
		314
		315	free_pages((unsigned long) mm->context.tsb, get_order(size));
177		316
178	/* We can remove these later, but for now it's useful	317	/* We can remove these later, but for now it's useful
179	* to catch any bogus post-destroy_context() references	318	* to catch any bogus post-destroy_context() references