aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/sparc64/kernel/smp.c28
-rw-r--r--arch/sparc64/mm/init.c7
-rw-r--r--arch/sparc64/mm/tsb.c151
-rw-r--r--include/asm-sparc64/mmu.h1
-rw-r--r--include/asm-sparc64/mmu_context.h7
5 files changed, 184 insertions, 10 deletions
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 8c245859d212..3c14b549cf91 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -581,11 +581,11 @@ extern unsigned long xcall_call_function;
581 * You must not call this function with disabled interrupts or from a 581 * You must not call this function with disabled interrupts or from a
582 * hardware interrupt handler or from a bottom half handler. 582 * hardware interrupt handler or from a bottom half handler.
583 */ 583 */
584int smp_call_function(void (*func)(void *info), void *info, 584static int smp_call_function_mask(void (*func)(void *info), void *info,
585 int nonatomic, int wait) 585 int nonatomic, int wait, cpumask_t mask)
586{ 586{
587 struct call_data_struct data; 587 struct call_data_struct data;
588 int cpus = num_online_cpus() - 1; 588 int cpus = cpus_weight(mask) - 1;
589 long timeout; 589 long timeout;
590 590
591 if (!cpus) 591 if (!cpus)
@@ -603,7 +603,7 @@ int smp_call_function(void (*func)(void *info), void *info,
603 603
604 call_data = &data; 604 call_data = &data;
605 605
606 smp_cross_call(&xcall_call_function, 0, 0, 0); 606 smp_cross_call_masked(&xcall_call_function, 0, 0, 0, mask);
607 607
608 /* 608 /*
609 * Wait for other cpus to complete function or at 609 * Wait for other cpus to complete function or at
@@ -629,6 +629,13 @@ out_timeout:
629 return 0; 629 return 0;
630} 630}
631 631
632int smp_call_function(void (*func)(void *info), void *info,
633 int nonatomic, int wait)
634{
635 return smp_call_function_mask(func, info, nonatomic, wait,
636 cpu_online_map);
637}
638
632void smp_call_function_client(int irq, struct pt_regs *regs) 639void smp_call_function_client(int irq, struct pt_regs *regs)
633{ 640{
634 void (*func) (void *info) = call_data->func; 641 void (*func) (void *info) = call_data->func;
@@ -646,6 +653,19 @@ void smp_call_function_client(int irq, struct pt_regs *regs)
646 } 653 }
647} 654}
648 655
656static void tsb_sync(void *info)
657{
658 struct mm_struct *mm = info;
659
660 if (current->active_mm == mm)
661 tsb_context_switch(mm);
662}
663
664void smp_tsb_sync(struct mm_struct *mm)
665{
666 smp_call_function_mask(tsb_sync, mm, 0, 1, mm->cpu_vm_mask);
667}
668
649extern unsigned long xcall_flush_tlb_mm; 669extern unsigned long xcall_flush_tlb_mm;
650extern unsigned long xcall_flush_tlb_pending; 670extern unsigned long xcall_flush_tlb_pending;
651extern unsigned long xcall_flush_tlb_kernel_range; 671extern unsigned long xcall_flush_tlb_kernel_range;
diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
index 7c456afaa9a5..a8119cb4fa32 100644
--- a/arch/sparc64/mm/init.c
+++ b/arch/sparc64/mm/init.c
@@ -246,9 +246,11 @@ static __inline__ void clear_dcache_dirty_cpu(struct page *page, unsigned long c
246 246
247void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte) 247void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte)
248{ 248{
249 struct mm_struct *mm;
249 struct page *page; 250 struct page *page;
250 unsigned long pfn; 251 unsigned long pfn;
251 unsigned long pg_flags; 252 unsigned long pg_flags;
253 unsigned long mm_rss;
252 254
253 pfn = pte_pfn(pte); 255 pfn = pte_pfn(pte);
254 if (pfn_valid(pfn) && 256 if (pfn_valid(pfn) &&
@@ -270,6 +272,11 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t p
270 272
271 put_cpu(); 273 put_cpu();
272 } 274 }
275
276 mm = vma->vm_mm;
277 mm_rss = get_mm_rss(mm);
278 if (mm_rss >= mm->context.tsb_rss_limit)
279 tsb_grow(mm, mm_rss, GFP_ATOMIC);
273} 280}
274 281
275void flush_dcache_page(struct page *page) 282void flush_dcache_page(struct page *page)
diff --git a/arch/sparc64/mm/tsb.c b/arch/sparc64/mm/tsb.c
index dfe7144fcdf6..707af4b84a0e 100644
--- a/arch/sparc64/mm/tsb.c
+++ b/arch/sparc64/mm/tsb.c
@@ -10,6 +10,7 @@
10#include <asm/tlb.h> 10#include <asm/tlb.h>
11#include <asm/mmu_context.h> 11#include <asm/mmu_context.h>
12#include <asm/pgtable.h> 12#include <asm/pgtable.h>
13#include <asm/tsb.h>
13 14
14/* We use an 8K TSB for the whole kernel, this allows to 15/* We use an 8K TSB for the whole kernel, this allows to
15 * handle about 4MB of modules and vmalloc mappings without 16 * handle about 4MB of modules and vmalloc mappings without
@@ -146,6 +147,9 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes)
146 tte |= _PAGE_SZ4MB; 147 tte |= _PAGE_SZ4MB;
147 page_sz = 4 * 1024 * 1024; 148 page_sz = 4 * 1024 * 1024;
148 break; 149 break;
150
151 default:
152 BUG();
149 }; 153 };
150 154
151 tsb_reg |= base; 155 tsb_reg |= base;
@@ -157,23 +161,158 @@ static void setup_tsb_params(struct mm_struct *mm, unsigned long tsb_bytes)
157 mm->context.tsb_map_pte = tte; 161 mm->context.tsb_map_pte = tte;
158} 162}
159 163
164/* The page tables are locked against modifications while this
165 * runs.
166 *
167 * XXX do some prefetching...
168 */
169static void copy_tsb(struct tsb *old_tsb, unsigned long old_size,
170 struct tsb *new_tsb, unsigned long new_size)
171{
172 unsigned long old_nentries = old_size / sizeof(struct tsb);
173 unsigned long new_nentries = new_size / sizeof(struct tsb);
174 unsigned long i;
175
176 for (i = 0; i < old_nentries; i++) {
177 register unsigned long tag asm("o4");
178 register unsigned long pte asm("o5");
179 unsigned long v;
180 unsigned int hash;
181
182 __asm__ __volatile__(
183 "ldda [%2] %3, %0"
184 : "=r" (tag), "=r" (pte)
185 : "r" (&old_tsb[i]), "i" (ASI_NUCLEUS_QUAD_LDD));
186
187 if (!tag || (tag & TSB_TAG_LOCK))
188 continue;
189
190 /* We only put base page size PTEs into the TSB,
191 * but that might change in the future. This code
192 * would need to be changed if we start putting larger
193 * page size PTEs into there.
194 */
195 WARN_ON((pte & _PAGE_ALL_SZ_BITS) != _PAGE_SZBITS);
196
197 /* The tag holds bits 22 to 63 of the virtual address
198 * and the context. Clear out the context, and shift
199 * up to make a virtual address.
200 */
201 v = (tag & ((1UL << 42UL) - 1UL)) << 22UL;
202
203 /* The implied bits of the tag (bits 13 to 21) are
204 * determined by the TSB entry index, so fill that in.
205 */
206 v |= (i & (512UL - 1UL)) << 13UL;
207
208 hash = tsb_hash(v, new_nentries);
209 new_tsb[hash].tag = tag;
210 new_tsb[hash].pte = pte;
211 }
212}
213
214/* When the RSS of an address space exceeds mm->context.tsb_rss_limit,
215 * update_mmu_cache() invokes this routine to try and grow the TSB.
216 * When we reach the maximum TSB size supported, we stick ~0UL into
217 * mm->context.tsb_rss_limit so the grow checks in update_mmu_cache()
218 * will not trigger any longer.
219 *
220 * The TSB can be anywhere from 8K to 1MB in size, in increasing powers
221 * of two. The TSB must be aligned to it's size, so f.e. a 512K TSB
222 * must be 512K aligned.
223 *
224 * The idea here is to grow the TSB when the RSS of the process approaches
225 * the number of entries that the current TSB can hold at once. Currently,
226 * we trigger when the RSS hits 3/4 of the TSB capacity.
227 */
228void tsb_grow(struct mm_struct *mm, unsigned long rss, gfp_t gfp_flags)
229{
230 unsigned long max_tsb_size = 1 * 1024 * 1024;
231 unsigned long size, old_size;
232 struct page *page;
233 struct tsb *old_tsb;
234
235 if (max_tsb_size > (PAGE_SIZE << MAX_ORDER))
236 max_tsb_size = (PAGE_SIZE << MAX_ORDER);
237
238 for (size = PAGE_SIZE; size < max_tsb_size; size <<= 1UL) {
239 unsigned long n_entries = size / sizeof(struct tsb);
240
241 n_entries = (n_entries * 3) / 4;
242 if (n_entries > rss)
243 break;
244 }
245
246 page = alloc_pages(gfp_flags | __GFP_ZERO, get_order(size));
247 if (unlikely(!page))
248 return;
249
250 if (size == max_tsb_size)
251 mm->context.tsb_rss_limit = ~0UL;
252 else
253 mm->context.tsb_rss_limit =
254 ((size / sizeof(struct tsb)) * 3) / 4;
255
256 old_tsb = mm->context.tsb;
257 old_size = mm->context.tsb_nentries * sizeof(struct tsb);
258
259 if (old_tsb)
260 copy_tsb(old_tsb, old_size, page_address(page), size);
261
262 mm->context.tsb = page_address(page);
263 setup_tsb_params(mm, size);
264
265 /* If old_tsb is NULL, we're being invoked for the first time
266 * from init_new_context().
267 */
268 if (old_tsb) {
269 /* Now force all other processors to reload the new
270 * TSB state.
271 */
272 smp_tsb_sync(mm);
273
274 /* Finally reload it on the local cpu. No further
275 * references will remain to the old TSB and we can
276 * thus free it up.
277 */
278 tsb_context_switch(mm);
279
280 free_pages((unsigned long) old_tsb, get_order(old_size));
281 }
282}
283
160int init_new_context(struct task_struct *tsk, struct mm_struct *mm) 284int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
161{ 285{
162 unsigned long page = get_zeroed_page(GFP_KERNEL); 286 unsigned long initial_rss;
163 287
164 mm->context.sparc64_ctx_val = 0UL; 288 mm->context.sparc64_ctx_val = 0UL;
165 if (unlikely(!page))
166 return -ENOMEM;
167 289
168 mm->context.tsb = (struct tsb *) page; 290 /* copy_mm() copies over the parent's mm_struct before calling
169 setup_tsb_params(mm, PAGE_SIZE); 291 * us, so we need to zero out the TSB pointer or else tsb_grow()
292 * will be confused and think there is an older TSB to free up.
293 */
294 mm->context.tsb = NULL;
295
296 /* If this is fork, inherit the parent's TSB size. We would
297 * grow it to that size on the first page fault anyways.
298 */
299 initial_rss = mm->context.tsb_nentries;
300 if (initial_rss)
301 initial_rss -= 1;
302
303 tsb_grow(mm, initial_rss, GFP_KERNEL);
304
305 if (unlikely(!mm->context.tsb))
306 return -ENOMEM;
170 307
171 return 0; 308 return 0;
172} 309}
173 310
174void destroy_context(struct mm_struct *mm) 311void destroy_context(struct mm_struct *mm)
175{ 312{
176 free_page((unsigned long) mm->context.tsb); 313 unsigned long size = mm->context.tsb_nentries * sizeof(struct tsb);
314
315 free_pages((unsigned long) mm->context.tsb, get_order(size));
177 316
178 /* We can remove these later, but for now it's useful 317 /* We can remove these later, but for now it's useful
179 * to catch any bogus post-destroy_context() references 318 * to catch any bogus post-destroy_context() references
diff --git a/include/asm-sparc64/mmu.h b/include/asm-sparc64/mmu.h
index 2effeba2476c..76008ff6a90b 100644
--- a/include/asm-sparc64/mmu.h
+++ b/include/asm-sparc64/mmu.h
@@ -100,6 +100,7 @@ struct tsb {
100typedef struct { 100typedef struct {
101 unsigned long sparc64_ctx_val; 101 unsigned long sparc64_ctx_val;
102 struct tsb *tsb; 102 struct tsb *tsb;
103 unsigned long tsb_rss_limit;
103 unsigned long tsb_nentries; 104 unsigned long tsb_nentries;
104 unsigned long tsb_reg_val; 105 unsigned long tsb_reg_val;
105 unsigned long tsb_map_vaddr; 106 unsigned long tsb_map_vaddr;
diff --git a/include/asm-sparc64/mmu_context.h b/include/asm-sparc64/mmu_context.h
index 0a950f151d2b..1d232678821d 100644
--- a/include/asm-sparc64/mmu_context.h
+++ b/include/asm-sparc64/mmu_context.h
@@ -32,6 +32,13 @@ static inline void tsb_context_switch(struct mm_struct *mm)
32 mm->context.tsb_map_pte); 32 mm->context.tsb_map_pte);
33} 33}
34 34
35extern void tsb_grow(struct mm_struct *mm, unsigned long mm_rss, gfp_t gfp_flags);
36#ifdef CONFIG_SMP
37extern void smp_tsb_sync(struct mm_struct *mm);
38#else
39#define smp_tsb_sync(__mm) do { } while (0)
40#endif
41
35/* Set MMU context in the actual hardware. */ 42/* Set MMU context in the actual hardware. */
36#define load_secondary_context(__mm) \ 43#define load_secondary_context(__mm) \
37 __asm__ __volatile__("stxa %0, [%1] %2\n\t" \ 44 __asm__ __volatile__("stxa %0, [%1] %2\n\t" \