aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mremap.c
diff options
context:
space:
mode:
authorAndrea Arcangeli <aarcange@redhat.com>2011-10-31 20:08:26 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-10-31 20:30:48 -0400
commit7b6efc2bc4f19952b25ebf9b236e5ac43cd386c2 (patch)
treebae674bd95329a498a5f2cc5d9c23bf5a4a54305 /mm/mremap.c
parentebed48460be5abd86d9a24fa7c66378e58109f30 (diff)
mremap: avoid sending one IPI per page
This replaces ptep_clear_flush() with ptep_get_and_clear() and a single flush_tlb_range() at the end of the loop, to avoid sending one IPI for each page. The mmu_notifier_invalidate_range_start/end section is enlarged accordingly but this is not going to fundamentally change things. It was more by accident that the region under mremap was for the most part still available for secondary MMUs: the primary MMU was never allowed to reliably access that region for the duration of the mremap (modulo trapping SIGSEGV on the old address range which sounds unpractical and flakey). If users wants secondary MMUs not to lose access to a large region under mremap they should reduce the mremap size accordingly in userland and run multiple calls. Overall this will run faster so it's actually going to reduce the time the region is under mremap for the primary MMU which should provide a net benefit to apps. For KVM this is a noop because the guest physical memory is never mremapped, there's just no point it ever moving it while guest runs. One target of this optimization is JVM GC (so unrelated to the mmu notifier logic). Signed-off-by: Andrea Arcangeli <aarcange@redhat.com> Acked-by: Johannes Weiner <jweiner@redhat.com> Acked-by: Mel Gorman <mgorman@suse.de> Acked-by: Rik van Riel <riel@redhat.com> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/mremap.c')
-rw-r--r--mm/mremap.c15
1 files changed, 9 insertions, 6 deletions
diff --git a/mm/mremap.c b/mm/mremap.c
index 195e866568e0..a184f3732e1e 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -80,11 +80,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
80 struct mm_struct *mm = vma->vm_mm; 80 struct mm_struct *mm = vma->vm_mm;
81 pte_t *old_pte, *new_pte, pte; 81 pte_t *old_pte, *new_pte, pte;
82 spinlock_t *old_ptl, *new_ptl; 82 spinlock_t *old_ptl, *new_ptl;
83 unsigned long old_start;
84 83
85 old_start = old_addr;
86 mmu_notifier_invalidate_range_start(vma->vm_mm,
87 old_start, old_end);
88 if (vma->vm_file) { 84 if (vma->vm_file) {
89 /* 85 /*
90 * Subtle point from Rajesh Venkatasubramanian: before 86 * Subtle point from Rajesh Venkatasubramanian: before
@@ -111,7 +107,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
111 new_pte++, new_addr += PAGE_SIZE) { 107 new_pte++, new_addr += PAGE_SIZE) {
112 if (pte_none(*old_pte)) 108 if (pte_none(*old_pte))
113 continue; 109 continue;
114 pte = ptep_clear_flush(vma, old_addr, old_pte); 110 pte = ptep_get_and_clear(mm, old_addr, old_pte);
115 pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr); 111 pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
116 set_pte_at(mm, new_addr, new_pte, pte); 112 set_pte_at(mm, new_addr, new_pte, pte);
117 } 113 }
@@ -123,7 +119,6 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
123 pte_unmap_unlock(old_pte - 1, old_ptl); 119 pte_unmap_unlock(old_pte - 1, old_ptl);
124 if (mapping) 120 if (mapping)
125 mutex_unlock(&mapping->i_mmap_mutex); 121 mutex_unlock(&mapping->i_mmap_mutex);
126 mmu_notifier_invalidate_range_end(vma->vm_mm, old_start, old_end);
127} 122}
128 123
129#define LATENCY_LIMIT (64 * PAGE_SIZE) 124#define LATENCY_LIMIT (64 * PAGE_SIZE)
@@ -134,10 +129,13 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
134{ 129{
135 unsigned long extent, next, old_end; 130 unsigned long extent, next, old_end;
136 pmd_t *old_pmd, *new_pmd; 131 pmd_t *old_pmd, *new_pmd;
132 bool need_flush = false;
137 133
138 old_end = old_addr + len; 134 old_end = old_addr + len;
139 flush_cache_range(vma, old_addr, old_end); 135 flush_cache_range(vma, old_addr, old_end);
140 136
137 mmu_notifier_invalidate_range_start(vma->vm_mm, old_addr, old_end);
138
141 for (; old_addr < old_end; old_addr += extent, new_addr += extent) { 139 for (; old_addr < old_end; old_addr += extent, new_addr += extent) {
142 cond_resched(); 140 cond_resched();
143 next = (old_addr + PMD_SIZE) & PMD_MASK; 141 next = (old_addr + PMD_SIZE) & PMD_MASK;
@@ -158,7 +156,12 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
158 extent = LATENCY_LIMIT; 156 extent = LATENCY_LIMIT;
159 move_ptes(vma, old_pmd, old_addr, old_addr + extent, 157 move_ptes(vma, old_pmd, old_addr, old_addr + extent,
160 new_vma, new_pmd, new_addr); 158 new_vma, new_pmd, new_addr);
159 need_flush = true;
161 } 160 }
161 if (likely(need_flush))
162 flush_tlb_range(vma, old_end-len, old_addr);
163
164 mmu_notifier_invalidate_range_end(vma->vm_mm, old_end-len, old_end);
162 165
163 return len + old_addr - old_end; /* how much done */ 166 return len + old_addr - old_end; /* how much done */
164} 167}