aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2006-01-31 21:31:38 -0500
committerDavid S. Miller <davem@sunset.davemloft.net>2006-03-20 04:11:18 -0500
commitbd40791e1d289d807b8580abe1f117e9c62894e4 (patch)
tree2b47e24c8dc0e668dfd7ba0e3879165180c49c65
parent98c5584cfc47932c4f3ccf5eee2e0bae1447b85e (diff)
[SPARC64]: Dynamically grow TSB in response to RSS growth.
As the RSS grows, grow the TSB in order to reduce the likelyhood of hash collisions and thus poor hit rates in the TSB. This definitely needs some serious tuning. Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/sparc64/kernel/smp.c28
-rw-r--r--arch/sparc64/mm/init.c7
-rw-r--r--arch/sparc64/mm/tsb.c151
-rw-r--r--include/asm-sparc64/mmu.h1
-rw-r--r--include/asm-sparc64/mmu_context.h7
5 files changed, 184 insertions, 10 deletions
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 8c245859d212..3c14b549cf91 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -581,11 +581,11 @@ extern unsigned long xcall_call_function;
581 * You must not call this function with disabled interrupts or from a 581 * You must not call this function with disabled interrupts or from a
582 * hardware interrupt handler or from a bottom half handler. 582 * hardware interrupt handler or from a bottom half handler.
583 */ 583 */
584int smp_call_function(void (*func)(void *info), void *info, 584static int smp_call_function_mask(void (*func)(void *info), void *info,
585 int nonatomic, int wait) 585 int nonatomic, int wait, cpumask_t mask)
586{ 586{
587 struct call_data_struct data; 587 struct call_data_struct data;
588 int cpus = num_online_cpus() - 1; 588 int cpus = cpus_weight(mask) - 1;
589 long timeout; 589 long timeout;
590 590
591 if (!cpus) 591 if (!cpus)
@@ -603,7 +603,7 @@ int smp_call_function(void (*func)(void *info), void *info,
603 603
604 call_data = &data; 604 call_data = &data;
605 605
606 smp_cross_call(&xcall_call_function, 0, 0, 0); 606 smp_cross_call_masked(&xcall_call_function, 0, 0, 0, mask);
607 607
608 /* 608 /*
609 * Wait for other cpus to complete function or at 609 * Wait for other cpus to complete function or at
@@ -629,6 +629,13 @@ out_timeout:
629 return 0; 629 return 0;
630} 630}
631 631
632int smp_call_function(void (*func)(void *info), void *info,
633 int nonatomic, int wait)
634{
635 return smp_call_function_mask(func, info, nonatomic, wait,
636 cpu_online_map);
637}
638
632void smp_call_function_client(int irq, struct pt_regs *regs) 639void smp_call_function_client(int irq, struct pt_regs *regs)
633{ 640{
634 void (*func) (void *info) = call_data->func; 641 void (*func) (void *info) = call_data->func;
@@ -646,6 +653,19 @@ void smp_call_function_client(int irq, struct pt_regs *regs)
646 } 653 }
647} 654}
648 655
656static void tsb_sync(void *info)
657{
658 struct mm_struct *mm = info;
659
660 if (current->active_mm == mm)
661 tsb_context_switch(mm);
662}
663
664void smp_tsb_sync(struct mm_struct *mm)
665{
666 smp_call_function_mask(tsb_sync, mm, 0, 1, mm->cpu_vm_mask);
667}
668
649extern unsigned long xcall_flush_tlb_mm; 669extern unsigned long xcall_flush_tlb_mm;
650extern unsigned long xcall_flush_tlb_pending; 670extern unsigned long xcall_flush_tlb_pending;
651extern unsigned long xcall_flush_tlb_kernel_range; 671extern unsigned long xcall_flush_tlb_kernel_range;
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 7c456afaa9a5..a8119cb4fa32 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -246,9 +246,11 @@ static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long c
246 246
247void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte) 247void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte)
248{ 248{
249 struct mm_struct *mm;
249 struct page *page; 250 struct page *page;
250 unsigned long pfn; 251 unsigned long pfn;
251 unsigned long pg_flags; 252 unsigned long pg_flags;
253 unsigned long mm_rss;
252 254
253 pfn = pte_pfn(pte); 255 pfn = pte_pfn(pte);
254 if (pfn_valid(pfn) && 256 if (pfn_valid(pfn) &&
@@ -270,6 +272,11 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t p
270 272
271 put_cpu(); 273 put_cpu();
272 } 274 }
275
276 mm = vma->vm_mm;
277 mm_rss = get_mm_rss(mm);
278 if (mm_rss >= mm->context.tsb_rss_limit)
279 tsb_grow(mm, mm_rss, GFP_ATOMIC);
273} 280}
274 281
275void flush_dcache_page(struct page *page) 282void flush_dcache_page(struct page *page)
diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c
index dfe7144fcdf6..707af4b84a0e 100644
--- a/arch/sparc64/mm/tsb.c
+++ b/arch/sparc64/mm/tsb.c
@@ -10,6 +10,7 @@
10#include <asm/tlb.h> 10#include <asm/tlb.h>
11#include <asm/mmu_context.h> 11#include <asm/mmu_context.h>
12#include <asm/pgtable.h> 12#include <asm/pgtable.h>
13#include <asm/tsb.h>
13 14
14/* We use an 8K TSB for the whole kernel, this allows to 15/* We use an 8K TSB for the whole kernel, this allows to
15 * handle about 4MB of modules and vmalloc mappings without 16 * handle about 4MB of modules and vmalloc mappings without
@@ -146,6 +147,9 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes)
146 tte |= _PAGE_SZ4MB; 147 tte |= _PAGE_SZ4MB;
147 page_sz = 4 * 1024 * 1024; 148 page_sz = 4 * 1024 * 1024;
148 break; 149 break;
150
151 default:
152 BUG();
149 }; 153 };
150 154
151 tsb_reg |= base; 155 tsb_reg |= base;
@@ -157,23 +161,158 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes)
157 mm->context.tsb_map_pte = tte; 161 mm->context.tsb_map_pte = tte;
158} 162}
159 163
164/* The page tables are locked against modifications while this
165 * runs.
166 *
167 * XXX do some prefetching...
168 */
169static void copy_tsb(struct tsb *old_tsb, unsigned long old_size,
170 struct tsb *new_tsb, unsigned long new_size)
171{
172 unsigned long old_nentries = old_size / sizeof(struct tsb);
173 unsigned long new_nentries = new_size / sizeof(struct tsb);
174 unsigned long i;
175
176 for (i = 0; i < old_nentries; i++) {
177 register unsigned long tag asm("o4");
178 register unsigned long pte asm("o5");
179 unsigned long v;
180 unsigned int hash;
181
182 __asm__ __volatile__(
183 "ldda [%2] %3, %0"
184 : "=r" (tag), "=r" (pte)
185 : "r" (&old_tsb[i]), "i" (ASI_NUCLEUS_QUAD_LDD));
186
187 if (!tag || (tag & TSB_TAG_LOCK))
188 continue;
189
190 /* We only put base page size PTEs into the TSB,
191 * but that might change in the future. This code
192 * would need to be changed if we start putting larger
193 * page size PTEs into there.
194 */
195 WARN_ON((pte & _PAGE_ALL_SZ_BITS) != _PAGE_SZBITS);
196
197 /* The tag holds bits 22 to 63 of the virtual address
198 * and the context. Clear out the context, and shift
199 * up to make a virtual address.
200 */
201 v = (tag & ((1UL << 42UL) - 1UL)) << 22UL;
202
203 /* The implied bits of the tag (bits 13 to 21) are
204 * determined by the TSB entry index, so fill that in.
205 */
206 v |= (i & (512UL - 1UL)) << 13UL;
207
208 hash = tsb_hash(v, new_nentries);
209 new_tsb[hash].tag = tag;
210 new_tsb[hash].pte = pte;
211 }
212}
213
214/* When the RSS of an address space exceeds mm->context.tsb_rss_limit,
215 * update_mmu_cache() invokes this routine to try and grow the TSB.
216 * When we reach the maximum TSB size supported, we stick ~0UL into
217 * mm->context.tsb_rss_limit so the grow checks in update_mmu_cache()
218 * will not trigger any longer.
219 *
220 * The TSB can be anywhere from 8K to 1MB in size, in increasing powers
221 * of two. The TSB must be aligned to it's size, so f.e. a 512K TSB
222 * must be 512K aligned.
223 *
224 * The idea here is to grow the TSB when the RSS of the process approaches
225 * the number of entries that the current TSB can hold at once. Currently,
226 * we trigger when the RSS hits 3/4 of the TSB capacity.
227 */
228void tsb_grow(struct mm_struct *mm, unsigned long rss, gfp_t gfp_flags)
229{
230 unsigned long max_tsb_size = 1 * 1024 * 1024;
231 unsigned long size, old_size;
232 struct page *page;
233 struct tsb *old_tsb;
234
235 if (max_tsb_size > (PAGE_SIZE << MAX_ORDER))
236 max_tsb_size = (PAGE_SIZE << MAX_ORDER);
237
238 for (size = PAGE_SIZE; size < max_tsb_size; size <<= 1UL) {
239 unsigned long n_entries = size / sizeof(struct tsb);
240
241 n_entries = (n_entries * 3) / 4;
242 if (n_entries > rss)
243 break;
244 }
245
246 page = alloc_pages(gfp_flags | __GFP_ZERO, get_order(size));
247 if (unlikely(!page))
248 return;
249
250 if (size == max_tsb_size)
251 mm->context.tsb_rss_limit = ~0UL;
252 else
253 mm->context.tsb_rss_limit =
254 ((size / sizeof(struct tsb)) * 3) / 4;
255
256 old_tsb = mm->context.tsb;
257 old_size = mm->context.tsb_nentries * sizeof(struct tsb);
258
259 if (old_tsb)
260 copy_tsb(old_tsb, old_size, page_address(page), size);
261
262 mm->context.tsb = page_address(page);
263 setup_tsb_params(mm, size);
264
265 /* If old_tsb is NULL, we're being invoked for the first time
266 * from init_new_context().
267 */
268 if (old_tsb) {
269 /* Now force all other processors to reload the new
270 * TSB state.
271 */
272 smp_tsb_sync(mm);
273
274 /* Finally reload it on the local cpu. No further
275 * references will remain to the old TSB and we can
276 * thus free it up.
277 */
278 tsb_context_switch(mm);
279
280 free_pages((unsigned long) old_tsb, get_order(old_size));
281 }
282}
283
160int init_new_context(struct task_struct *tsk, struct mm_struct *mm) 284int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
161{ 285{
162 unsigned long page = get_zeroed_page(GFP_KERNEL); 286 unsigned long initial_rss;
163 287
164 mm->context.sparc64_ctx_val = 0UL; 288 mm->context.sparc64_ctx_val = 0UL;
165 if (unlikely(!page))
166 return -ENOMEM;
167 289
168 mm->context.tsb = (struct tsb *) page; 290 /* copy_mm() copies over the parent's mm_struct before calling
169 setup_tsb_params(mm, PAGE_SIZE); 291 * us, so we need to zero out the TSB pointer or else tsb_grow()
292 * will be confused and think there is an older TSB to free up.
293 */
294 mm->context.tsb = NULL;
295
296 /* If this is fork, inherit the parent's TSB size. We would
297 * grow it to that size on the first page fault anyways.
298 */
299 initial_rss = mm->context.tsb_nentries;
300 if (initial_rss)
301 initial_rss -= 1;
302
303 tsb_grow(mm, initial_rss, GFP_KERNEL);
304
305 if (unlikely(!mm->context.tsb))
306 return -ENOMEM;
170 307
171 return 0; 308 return 0;
172} 309}
173 310
174void destroy_context(struct mm_struct *mm) 311void destroy_context(struct mm_struct *mm)
175{ 312{
176 free_page((unsigned long) mm->context.tsb); 313 unsigned long size = mm->context.tsb_nentries * sizeof(struct tsb);
314
315 free_pages((unsigned long) mm->context.tsb, get_order(size));
177 316
178 /* We can remove these later, but for now it's useful 317 /* We can remove these later, but for now it's useful
179 * to catch any bogus post-destroy_context() references 318 * to catch any bogus post-destroy_context() references
diff --git a/include/asm-sparc64/mmu.h b/include/asm-sparc64/mmu.h
index 2effeba2476c..76008ff6a90b 100644
--- a/include/asm-sparc64/mmu.h
+++ b/include/asm-sparc64/mmu.h
@@ -100,6 +100,7 @@ struct tsb {
100typedef struct { 100typedef struct {
101 unsigned long sparc64_ctx_val; 101 unsigned long sparc64_ctx_val;
102 struct tsb *tsb; 102 struct tsb *tsb;
103 unsigned long tsb_rss_limit;
103 unsigned long tsb_nentries; 104 unsigned long tsb_nentries;
104 unsigned long tsb_reg_val; 105 unsigned long tsb_reg_val;
105 unsigned long tsb_map_vaddr; 106 unsigned long tsb_map_vaddr;
diff --git a/include/asm-sparc64/mmu_context.h b/include/asm-sparc64/mmu_context.h
index 0a950f151d2b..1d232678821d 100644
--- a/include/asm-sparc64/mmu_context.h
+++ b/include/asm-sparc64/mmu_context.h
@@ -32,6 +32,13 @@ static inline void tsb_context_switch(struct mm_struct *mm)
32 mm->context.tsb_map_pte); 32 mm->context.tsb_map_pte);
33} 33}
34 34
35extern void tsb_grow(struct mm_struct *mm, unsigned long mm_rss, gfp_t gfp_flags);
36#ifdef CONFIG_SMP
37extern void smp_tsb_sync(struct mm_struct *mm);
38#else
39#define smp_tsb_sync(__mm) do { } while (0)
40#endif
41
35/* Set MMU context in the actual hardware. */ 42/* Set MMU context in the actual hardware. */
36#define load_secondary_context(__mm) \ 43#define load_secondary_context(__mm) \
37 __asm__ __volatile__("stxa %0, [%1] %2\n\t" \ 44 __asm__ __volatile__("stxa %0, [%1] %2\n\t" \