diff options
author | Dean Roe <roe@sgi.com> | 2005-10-27 16:41:04 -0400 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2005-10-27 17:44:58 -0400 |
commit | c1902aae322952f8726469a6657df7b9d5c794fe (patch) | |
tree | 5c78f21c19597b64faf06e0faee7726ae01f7bbb /arch/ia64/mm | |
parent | 72ab373a5688a78cbdaf3bf96012e597d5399bb7 (diff) |
[IA64] - Avoid slow TLB purges on SGI Altix systems
flush_tlb_all() can be a scaling issue on large SGI Altix systems
since it uses the global call_lock and always executes on all cpus.
When a process enters flush_tlb_range() to purge TLBs for another
process, it is possible to avoid flush_tlb_all() and instead allow
sn2_global_tlb_purge() to purge TLBs only where necessary.
This patch modifies flush_tlb_range() so that this case can be handled
by platform TLB purge functions and updates ia64_global_tlb_purge()
accordingly. sn2_global_tlb_purge() now calculates the region register
value from the mm argument introduced with this patch.
Signed-off-by: Dean Roe <roe@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64/mm')
-rw-r--r-- | arch/ia64/mm/tlb.c | 16 |
1 files changed, 9 insertions, 7 deletions
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index 464557e4ed82..99ea8c70f408 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c | |||
@@ -86,10 +86,15 @@ wrap_mmu_context (struct mm_struct *mm) | |||
86 | } | 86 | } |
87 | 87 | ||
88 | void | 88 | void |
89 | ia64_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits) | 89 | ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long nbits) |
90 | { | 90 | { |
91 | static DEFINE_SPINLOCK(ptcg_lock); | 91 | static DEFINE_SPINLOCK(ptcg_lock); |
92 | 92 | ||
93 | if (mm != current->active_mm) { | ||
94 | flush_tlb_all(); | ||
95 | return; | ||
96 | } | ||
97 | |||
93 | /* HW requires global serialization of ptc.ga. */ | 98 | /* HW requires global serialization of ptc.ga. */ |
94 | spin_lock(&ptcg_lock); | 99 | spin_lock(&ptcg_lock); |
95 | { | 100 | { |
@@ -135,15 +140,12 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long | |||
135 | unsigned long size = end - start; | 140 | unsigned long size = end - start; |
136 | unsigned long nbits; | 141 | unsigned long nbits; |
137 | 142 | ||
143 | #ifndef CONFIG_SMP | ||
138 | if (mm != current->active_mm) { | 144 | if (mm != current->active_mm) { |
139 | /* this does happen, but perhaps it's not worth optimizing for? */ | ||
140 | #ifdef CONFIG_SMP | ||
141 | flush_tlb_all(); | ||
142 | #else | ||
143 | mm->context = 0; | 145 | mm->context = 0; |
144 | #endif | ||
145 | return; | 146 | return; |
146 | } | 147 | } |
148 | #endif | ||
147 | 149 | ||
148 | nbits = ia64_fls(size + 0xfff); | 150 | nbits = ia64_fls(size + 0xfff); |
149 | while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits)) | 151 | while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits)) |
@@ -153,7 +155,7 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long | |||
153 | start &= ~((1UL << nbits) - 1); | 155 | start &= ~((1UL << nbits) - 1); |
154 | 156 | ||
155 | # ifdef CONFIG_SMP | 157 | # ifdef CONFIG_SMP |
156 | platform_global_tlb_purge(start, end, nbits); | 158 | platform_global_tlb_purge(mm, start, end, nbits); |
157 | # else | 159 | # else |
158 | do { | 160 | do { |
159 | ia64_ptcl(start, (nbits<<2)); | 161 | ia64_ptcl(start, (nbits<<2)); |