aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/ia64/kernel/setup.c1
-rw-r--r--arch/ia64/mm/tlb.c85
-rw-r--r--include/asm-ia64/mmu_context.h81
-rw-r--r--include/asm-ia64/tlbflush.h1
4 files changed, 92 insertions, 76 deletions
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 3af6de36a482..5add0bcf87a7 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -461,6 +461,7 @@ setup_arch (char **cmdline_p)
461#endif 461#endif
462 462
463 cpu_init(); /* initialize the bootstrap CPU */ 463 cpu_init(); /* initialize the bootstrap CPU */
464 mmu_context_init(); /* initialize context_id bitmap */
464 465
465#ifdef CONFIG_ACPI 466#ifdef CONFIG_ACPI
466 acpi_boot_init(); 467 acpi_boot_init();
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index c79a9b96d02b..41105d454423 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -8,6 +8,8 @@
8 * Modified RID allocation for SMP 8 * Modified RID allocation for SMP
9 * Goutham Rao <goutham.rao@intel.com> 9 * Goutham Rao <goutham.rao@intel.com>
10 * IPI based ptc implementation and A-step IPI implementation. 10 * IPI based ptc implementation and A-step IPI implementation.
11 * Rohit Seth <rohit.seth@intel.com>
12 * Ken Chen <kenneth.w.chen@intel.com>
11 */ 13 */
12#include <linux/config.h> 14#include <linux/config.h>
13#include <linux/module.h> 15#include <linux/module.h>
@@ -16,78 +18,75 @@
16#include <linux/sched.h> 18#include <linux/sched.h>
17#include <linux/smp.h> 19#include <linux/smp.h>
18#include <linux/mm.h> 20#include <linux/mm.h>
21#include <linux/bootmem.h>
19 22
20#include <asm/delay.h> 23#include <asm/delay.h>
21#include <asm/mmu_context.h> 24#include <asm/mmu_context.h>
22#include <asm/pgalloc.h> 25#include <asm/pgalloc.h>
23#include <asm/pal.h> 26#include <asm/pal.h>
24#include <asm/tlbflush.h> 27#include <asm/tlbflush.h>
28#include <asm/dma.h>
25 29
26static struct { 30static struct {
27 unsigned long mask; /* mask of supported purge page-sizes */ 31 unsigned long mask; /* mask of supported purge page-sizes */
28 unsigned long max_bits; /* log2() of largest supported purge page-size */ 32 unsigned long max_bits; /* log2 of largest supported purge page-size */
29} purge; 33} purge;
30 34
31struct ia64_ctx ia64_ctx = { 35struct ia64_ctx ia64_ctx = {
32 .lock = SPIN_LOCK_UNLOCKED, 36 .lock = SPIN_LOCK_UNLOCKED,
33 .next = 1, 37 .next = 1,
34 .limit = (1 << 15) - 1, /* start out with the safe (architected) limit */
35 .max_ctx = ~0U 38 .max_ctx = ~0U
36}; 39};
37 40
38DEFINE_PER_CPU(u8, ia64_need_tlb_flush); 41DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
39 42
40/* 43/*
44 * Initializes the ia64_ctx.bitmap array based on max_ctx+1.
45 * Called after cpu_init() has setup ia64_ctx.max_ctx based on
46 * maximum RID that is supported by boot CPU.
47 */
48void __init
49mmu_context_init (void)
50{
51 ia64_ctx.bitmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
52 ia64_ctx.flushmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3);
53}
54
55/*
41 * Acquire the ia64_ctx.lock before calling this function! 56 * Acquire the ia64_ctx.lock before calling this function!
42 */ 57 */
43void 58void
44wrap_mmu_context (struct mm_struct *mm) 59wrap_mmu_context (struct mm_struct *mm)
45{ 60{
46 unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx; 61 int i, cpu;
47 struct task_struct *tsk; 62 unsigned long flush_bit;
48 int i;
49 63
50 if (ia64_ctx.next > max_ctx) 64 for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) {
51 ia64_ctx.next = 300; /* skip daemons */ 65 flush_bit = xchg(&ia64_ctx.flushmap[i], 0);
52 ia64_ctx.limit = max_ctx + 1; 66 ia64_ctx.bitmap[i] ^= flush_bit;
67 }
68
69 /* use offset at 300 to skip daemons */
70 ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
71 ia64_ctx.max_ctx, 300);
72 ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
73 ia64_ctx.max_ctx, ia64_ctx.next);
53 74
54 /* 75 /*
55 * Scan all the task's mm->context and set proper safe range 76 * can't call flush_tlb_all() here because of race condition
77 * with O(1) scheduler [EF]
56 */ 78 */
57 79 cpu = get_cpu(); /* prevent preemption/migration */
58 read_lock(&tasklist_lock); 80 for_each_online_cpu(i)
59 repeat: 81 if (i != cpu)
60 for_each_process(tsk) { 82 per_cpu(ia64_need_tlb_flush, i) = 1;
61 if (!tsk->mm) 83 put_cpu();
62 continue;
63 tsk_context = tsk->mm->context;
64 if (tsk_context == ia64_ctx.next) {
65 if (++ia64_ctx.next >= ia64_ctx.limit) {
66 /* empty range: reset the range limit and start over */
67 if (ia64_ctx.next > max_ctx)
68 ia64_ctx.next = 300;
69 ia64_ctx.limit = max_ctx + 1;
70 goto repeat;
71 }
72 }
73 if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit))
74 ia64_ctx.limit = tsk_context;
75 }
76 read_unlock(&tasklist_lock);
77 /* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */
78 {
79 int cpu = get_cpu(); /* prevent preemption/migration */
80 for_each_online_cpu(i) {
81 if (i != cpu)
82 per_cpu(ia64_need_tlb_flush, i) = 1;
83 }
84 put_cpu();
85 }
86 local_flush_tlb_all(); 84 local_flush_tlb_all();
87} 85}
88 86
89void 87void
90ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long nbits) 88ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start,
89 unsigned long end, unsigned long nbits)
91{ 90{
92 static DEFINE_SPINLOCK(ptcg_lock); 91 static DEFINE_SPINLOCK(ptcg_lock);
93 92
@@ -135,7 +134,8 @@ local_flush_tlb_all (void)
135} 134}
136 135
137void 136void
138flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end) 137flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
138 unsigned long end)
139{ 139{
140 struct mm_struct *mm = vma->vm_mm; 140 struct mm_struct *mm = vma->vm_mm;
141 unsigned long size = end - start; 141 unsigned long size = end - start;
@@ -149,7 +149,8 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long
149#endif 149#endif
150 150
151 nbits = ia64_fls(size + 0xfff); 151 nbits = ia64_fls(size + 0xfff);
152 while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits)) 152 while (unlikely (((1UL << nbits) & purge.mask) == 0) &&
153 (nbits < purge.max_bits))
153 ++nbits; 154 ++nbits;
154 if (nbits > purge.max_bits) 155 if (nbits > purge.max_bits)
155 nbits = purge.max_bits; 156 nbits = purge.max_bits;
@@ -191,5 +192,5 @@ ia64_tlb_init (void)
191 local_cpu_data->ptce_stride[0] = ptce_info.stride[0]; 192 local_cpu_data->ptce_stride[0] = ptce_info.stride[0];
192 local_cpu_data->ptce_stride[1] = ptce_info.stride[1]; 193 local_cpu_data->ptce_stride[1] = ptce_info.stride[1];
193 194
194 local_flush_tlb_all(); /* nuke left overs from bootstrapping... */ 195 local_flush_tlb_all(); /* nuke left overs from bootstrapping... */
195} 196}
diff --git a/include/asm-ia64/mmu_context.h b/include/asm-ia64/mmu_context.h
index 8d6e72f7b08e..b5c65081a3aa 100644
--- a/include/asm-ia64/mmu_context.h
+++ b/include/asm-ia64/mmu_context.h
@@ -7,12 +7,13 @@
7 */ 7 */
8 8
9/* 9/*
10 * Routines to manage the allocation of task context numbers. Task context numbers are 10 * Routines to manage the allocation of task context numbers. Task context
11 * used to reduce or eliminate the need to perform TLB flushes due to context switches. 11 * numbers are used to reduce or eliminate the need to perform TLB flushes
12 * Context numbers are implemented using ia-64 region ids. Since the IA-64 TLB does not 12 * due to context switches. Context numbers are implemented using ia-64
13 * consider the region number when performing a TLB lookup, we need to assign a unique 13 * region ids. Since the IA-64 TLB does not consider the region number when
14 * region id to each region in a process. We use the least significant three bits in a 14 * performing a TLB lookup, we need to assign a unique region id to each
15 * region id for this purpose. 15 * region in a process. We use the least significant three bits in aregion
16 * id for this purpose.
16 */ 17 */
17 18
18#define IA64_REGION_ID_KERNEL 0 /* the kernel's region id (tlb.c depends on this being 0) */ 19#define IA64_REGION_ID_KERNEL 0 /* the kernel's region id (tlb.c depends on this being 0) */
@@ -32,13 +33,17 @@
32struct ia64_ctx { 33struct ia64_ctx {
33 spinlock_t lock; 34 spinlock_t lock;
34 unsigned int next; /* next context number to use */ 35 unsigned int next; /* next context number to use */
35 unsigned int limit; /* next >= limit => must call wrap_mmu_context() */ 36 unsigned int limit; /* available free range */
36 unsigned int max_ctx; /* max. context value supported by all CPUs */ 37 unsigned int max_ctx; /* max. context value supported by all CPUs */
38 /* call wrap_mmu_context when next >= max */
39 unsigned long *bitmap; /* bitmap size is max_ctx+1 */
40 unsigned long *flushmap;/* pending rid to be flushed */
37}; 41};
38 42
39extern struct ia64_ctx ia64_ctx; 43extern struct ia64_ctx ia64_ctx;
40DECLARE_PER_CPU(u8, ia64_need_tlb_flush); 44DECLARE_PER_CPU(u8, ia64_need_tlb_flush);
41 45
46extern void mmu_context_init (void);
42extern void wrap_mmu_context (struct mm_struct *mm); 47extern void wrap_mmu_context (struct mm_struct *mm);
43 48
44static inline void 49static inline void
@@ -47,10 +52,10 @@ enter_lazy_tlb (struct mm_struct *mm, struct task_struct *tsk)
47} 52}
48 53
49/* 54/*
50 * When the context counter wraps around all TLBs need to be flushed because an old 55 * When the context counter wraps around all TLBs need to be flushed because
51 * context number might have been reused. This is signalled by the ia64_need_tlb_flush 56 * an old context number might have been reused. This is signalled by the
52 * per-CPU variable, which is checked in the routine below. Called by activate_mm(). 57 * ia64_need_tlb_flush per-CPU variable, which is checked in the routine
53 * <efocht@ess.nec.de> 58 * below. Called by activate_mm(). <efocht@ess.nec.de>
54 */ 59 */
55static inline void 60static inline void
56delayed_tlb_flush (void) 61delayed_tlb_flush (void)
@@ -60,11 +65,9 @@ delayed_tlb_flush (void)
60 65
61 if (unlikely(__ia64_per_cpu_var(ia64_need_tlb_flush))) { 66 if (unlikely(__ia64_per_cpu_var(ia64_need_tlb_flush))) {
62 spin_lock_irqsave(&ia64_ctx.lock, flags); 67 spin_lock_irqsave(&ia64_ctx.lock, flags);
63 { 68 if (__ia64_per_cpu_var(ia64_need_tlb_flush)) {
64 if (__ia64_per_cpu_var(ia64_need_tlb_flush)) { 69 local_flush_tlb_all();
65 local_flush_tlb_all(); 70 __ia64_per_cpu_var(ia64_need_tlb_flush) = 0;
66 __ia64_per_cpu_var(ia64_need_tlb_flush) = 0;
67 }
68 } 71 }
69 spin_unlock_irqrestore(&ia64_ctx.lock, flags); 72 spin_unlock_irqrestore(&ia64_ctx.lock, flags);
70 } 73 }
@@ -76,20 +79,27 @@ get_mmu_context (struct mm_struct *mm)
76 unsigned long flags; 79 unsigned long flags;
77 nv_mm_context_t context = mm->context; 80 nv_mm_context_t context = mm->context;
78 81
79 if (unlikely(!context)) { 82 if (likely(context))
80 spin_lock_irqsave(&ia64_ctx.lock, flags); 83 goto out;
81 { 84
82 /* re-check, now that we've got the lock: */ 85 spin_lock_irqsave(&ia64_ctx.lock, flags);
83 context = mm->context; 86 /* re-check, now that we've got the lock: */
84 if (context == 0) { 87 context = mm->context;
85 cpus_clear(mm->cpu_vm_mask); 88 if (context == 0) {
86 if (ia64_ctx.next >= ia64_ctx.limit) 89 cpus_clear(mm->cpu_vm_mask);
87 wrap_mmu_context(mm); 90 if (ia64_ctx.next >= ia64_ctx.limit) {
88 mm->context = context = ia64_ctx.next++; 91 ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap,
89 } 92 ia64_ctx.max_ctx, ia64_ctx.next);
93 ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap,
94 ia64_ctx.max_ctx, ia64_ctx.next);
95 if (ia64_ctx.next >= ia64_ctx.max_ctx)
96 wrap_mmu_context(mm);
90 } 97 }
91 spin_unlock_irqrestore(&ia64_ctx.lock, flags); 98 mm->context = context = ia64_ctx.next++;
99 __set_bit(context, ia64_ctx.bitmap);
92 } 100 }
101 spin_unlock_irqrestore(&ia64_ctx.lock, flags);
102out:
93 /* 103 /*
94 * Ensure we're not starting to use "context" before any old 104 * Ensure we're not starting to use "context" before any old
95 * uses of it are gone from our TLB. 105 * uses of it are gone from our TLB.
@@ -100,8 +110,8 @@ get_mmu_context (struct mm_struct *mm)
100} 110}
101 111
102/* 112/*
103 * Initialize context number to some sane value. MM is guaranteed to be a brand-new 113 * Initialize context number to some sane value. MM is guaranteed to be a
104 * address-space, so no TLB flushing is needed, ever. 114 * brand-new address-space, so no TLB flushing is needed, ever.
105 */ 115 */
106static inline int 116static inline int
107init_new_context (struct task_struct *p, struct mm_struct *mm) 117init_new_context (struct task_struct *p, struct mm_struct *mm)
@@ -162,7 +172,10 @@ activate_context (struct mm_struct *mm)
162 if (!cpu_isset(smp_processor_id(), mm->cpu_vm_mask)) 172 if (!cpu_isset(smp_processor_id(), mm->cpu_vm_mask))
163 cpu_set(smp_processor_id(), mm->cpu_vm_mask); 173 cpu_set(smp_processor_id(), mm->cpu_vm_mask);
164 reload_context(context); 174 reload_context(context);
165 /* in the unlikely event of a TLB-flush by another thread, redo the load: */ 175 /*
176 * in the unlikely event of a TLB-flush by another thread,
177 * redo the load.
178 */
166 } while (unlikely(context != mm->context)); 179 } while (unlikely(context != mm->context));
167} 180}
168 181
@@ -175,8 +188,8 @@ static inline void
175activate_mm (struct mm_struct *prev, struct mm_struct *next) 188activate_mm (struct mm_struct *prev, struct mm_struct *next)
176{ 189{
177 /* 190 /*
178 * We may get interrupts here, but that's OK because interrupt handlers cannot 191 * We may get interrupts here, but that's OK because interrupt
179 * touch user-space. 192 * handlers cannot touch user-space.
180 */ 193 */
181 ia64_set_kr(IA64_KR_PT_BASE, __pa(next->pgd)); 194 ia64_set_kr(IA64_KR_PT_BASE, __pa(next->pgd));
182 activate_context(next); 195 activate_context(next);
diff --git a/include/asm-ia64/tlbflush.h b/include/asm-ia64/tlbflush.h
index b65c62702724..a35b323bae4c 100644
--- a/include/asm-ia64/tlbflush.h
+++ b/include/asm-ia64/tlbflush.h
@@ -51,6 +51,7 @@ flush_tlb_mm (struct mm_struct *mm)
51 if (!mm) 51 if (!mm)
52 return; 52 return;
53 53
54 set_bit(mm->context, ia64_ctx.flushmap);
54 mm->context = 0; 55 mm->context = 0;
55 56
56 if (atomic_read(&mm->mm_users) == 0) 57 if (atomic_read(&mm->mm_users) == 0)