aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorChen, Kenneth W <kenneth.w.chen@intel.com>2006-03-06 17:12:54 -0500
committerTony Luck <tony.luck@intel.com>2006-03-27 13:20:03 -0500
commitce9eed5a98efacb896551d3470d9d46826caaee5 (patch)
tree13752f8ae5e941b83f57ba815b1af4e7bbcff34d /arch
parent5e48521e869de7d904bb5ffe9739258ffa026927 (diff)
[IA64] optimize flush_tlb_range on large numa box
It was reported from a field customer that global spin lock ptcg_lock is giving a lot of grief on munmap performance running on a large numa machine. What appears to be a problem coming from flush_tlb_range(), which currently unconditionally calls platform_global_tlb_purge(). For some of the numa machines in existence today, this function is mapped into ia64_global_tlb_purge(), which holds ptcg_lock spin lock while executing ptc.ga instruction. Here is a patch that attempt to avoid global tlb purge whenever possible. It will use local tlb purge as much as possible. Though the conditions to use local tlb purge is pretty restrictive. One of the side effect of having flush tlb range instruction on ia64 is that kernel don't get a chance to clear out cpu_vm_mask. On ia64, this mask is sticky and it will accumulate if process bounces around. Thus diminishing the possible use of ptc.l. Thoughts? Signed-off-by: Ken Chen <kenneth.w.chen@intel.com> Acked-by: Jack Steiner <steiner@sgi.com> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/ia64/mm/tlb.c12
1 files changed, 7 insertions, 5 deletions
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index 6a4eec9113e8..4dbbca0b5e9c 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -156,17 +156,19 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start,
156 nbits = purge.max_bits; 156 nbits = purge.max_bits;
157 start &= ~((1UL << nbits) - 1); 157 start &= ~((1UL << nbits) - 1);
158 158
159# ifdef CONFIG_SMP
160 platform_global_tlb_purge(mm, start, end, nbits);
161# else
162 preempt_disable(); 159 preempt_disable();
160#ifdef CONFIG_SMP
161 if (mm != current->active_mm || cpus_weight(mm->cpu_vm_mask) != 1) {
162 platform_global_tlb_purge(mm, start, end, nbits);
163 preempt_enable();
164 return;
165 }
166#endif
163 do { 167 do {
164 ia64_ptcl(start, (nbits<<2)); 168 ia64_ptcl(start, (nbits<<2));
165 start += (1UL << nbits); 169 start += (1UL << nbits);
166 } while (start < end); 170 } while (start < end);
167 preempt_enable(); 171 preempt_enable();
168# endif
169
170 ia64_srlz_i(); /* srlz.i implies srlz.d */ 172 ia64_srlz_i(); /* srlz.i implies srlz.d */
171} 173}
172EXPORT_SYMBOL(flush_tlb_range); 174EXPORT_SYMBOL(flush_tlb_range);