aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorAaro Koskinen <aaro.koskinen@nokia.com>2009-04-14 08:07:35 -0400
committerRussell King <rmk+kernel@arm.linux.org.uk>2009-04-15 05:01:02 -0400
commit7fccfc00c003c855936970facdbb667bae9dbe9a (patch)
tree73c486c5db1c042dba8d507655c5176742bbb9ca /arch
parent41609ff43005de11dadfb0ccadb344f0e2966829 (diff)
[ARM] 5450/1: Flush only the needed range when unmapping a VMA
When unmapping N pages (e.g. shared memory) the amount of TLB flushes done can be (N*PAGE_SIZE/ZAP_BLOCK_SIZE)*N although it should be N at maximum. With PREEMPT kernel ZAP_BLOCK_SIZE is 8 pages, so there is a noticeable performance penalty when unmapping a large VMA and the system is spending its time in flush_tlb_range(). The problem is that tlb_end_vma() is always flushing the full VMA range. The subrange that needs to be flushed can be calculated by tlb_remove_tlb_entry(). This approach was suggested by Hugh Dickins, and is also used by other arches. The speed increase is roughly 3x for 8M mappings and for larger mappings even more. Signed-off-by: Aaro Koskinen <Aaro.Koskinen@nokia.com> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/include/asm/tlb.h25
1 files changed, 21 insertions, 4 deletions
diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h
index 857f1dfac794..321c83e43a1e 100644
--- a/arch/arm/include/asm/tlb.h
+++ b/arch/arm/include/asm/tlb.h
@@ -36,6 +36,8 @@
36struct mmu_gather { 36struct mmu_gather {
37 struct mm_struct *mm; 37 struct mm_struct *mm;
38 unsigned int fullmm; 38 unsigned int fullmm;
39 unsigned long range_start;
40 unsigned long range_end;
39}; 41};
40 42
41DECLARE_PER_CPU(struct mmu_gather, mmu_gathers); 43DECLARE_PER_CPU(struct mmu_gather, mmu_gathers);
@@ -63,7 +65,19 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
63 put_cpu_var(mmu_gathers); 65 put_cpu_var(mmu_gathers);
64} 66}
65 67
66#define tlb_remove_tlb_entry(tlb,ptep,address) do { } while (0) 68/*
69 * Memorize the range for the TLB flush.
70 */
71static inline void
72tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep, unsigned long addr)
73{
74 if (!tlb->fullmm) {
75 if (addr < tlb->range_start)
76 tlb->range_start = addr;
77 if (addr + PAGE_SIZE > tlb->range_end)
78 tlb->range_end = addr + PAGE_SIZE;
79 }
80}
67 81
68/* 82/*
69 * In the case of tlb vma handling, we can optimise these away in the 83 * In the case of tlb vma handling, we can optimise these away in the
@@ -73,15 +87,18 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end)
73static inline void 87static inline void
74tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) 88tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
75{ 89{
76 if (!tlb->fullmm) 90 if (!tlb->fullmm) {
77 flush_cache_range(vma, vma->vm_start, vma->vm_end); 91 flush_cache_range(vma, vma->vm_start, vma->vm_end);
92 tlb->range_start = TASK_SIZE;
93 tlb->range_end = 0;
94 }
78} 95}
79 96
80static inline void 97static inline void
81tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) 98tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
82{ 99{
83 if (!tlb->fullmm) 100 if (!tlb->fullmm && tlb->range_end > 0)
84 flush_tlb_range(vma, vma->vm_start, vma->vm_end); 101 flush_tlb_range(vma, tlb->range_start, tlb->range_end);
85} 102}
86 103
87#define tlb_remove_page(tlb,page) free_page_and_swap_cache(page) 104#define tlb_remove_page(tlb,page) free_page_and_swap_cache(page)