aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2005-10-29 21:16:00 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2005-10-30 00:40:37 -0400
commit7be7a546994f1222b2312fd348da14e16b6b7b42 (patch)
treee1f8dae8783274372a0f136be6eb64102877e9f6
parent65500d234e74fc4e8f18e1a429bc24e51e75de4a (diff)
[PATCH] mm: move_page_tables by extents
Speeding up mremap's moving of ptes has never been a priority, but the locking will get more complicated shortly, and is already too baroque. Scrap the current one-by-one moving, do an extent at a time: curtailed by end of src and dst pmds (have to use PMD_SIZE: the way pmd_addr_end gets elided doesn't match this usage), and by latency considerations. One nice property of the old method is lost: it never allocated a page table unless absolutely necessary, so you could free empty page tables by mremapping to and fro. Whereas this way, it allocates a dst table wherever there was a src table. I keep diving in to reinstate the old behaviour, then come out preferring not to clutter how it now is. Signed-off-by: Hugh Dickins <hugh@veritas.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--mm/mremap.c168
1 files changed, 72 insertions, 96 deletions
diff --git a/mm/mremap.c b/mm/mremap.c
index 55df8f53e84d..f4e562098500 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -22,40 +22,15 @@
22#include <asm/cacheflush.h> 22#include <asm/cacheflush.h>
23#include <asm/tlbflush.h> 23#include <asm/tlbflush.h>
24 24
25static pte_t *get_one_pte_map_nested(struct mm_struct *mm, unsigned long addr) 25static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr)
26{
27 pgd_t *pgd;
28 pud_t *pud;
29 pmd_t *pmd;
30 pte_t *pte = NULL;
31
32 pgd = pgd_offset(mm, addr);
33 if (pgd_none_or_clear_bad(pgd))
34 goto end;
35
36 pud = pud_offset(pgd, addr);
37 if (pud_none_or_clear_bad(pud))
38 goto end;
39
40 pmd = pmd_offset(pud, addr);
41 if (pmd_none_or_clear_bad(pmd))
42 goto end;
43
44 pte = pte_offset_map_nested(pmd, addr);
45 if (pte_none(*pte)) {
46 pte_unmap_nested(pte);
47 pte = NULL;
48 }
49end:
50 return pte;
51}
52
53static pte_t *get_one_pte_map(struct mm_struct *mm, unsigned long addr)
54{ 26{
55 pgd_t *pgd; 27 pgd_t *pgd;
56 pud_t *pud; 28 pud_t *pud;
57 pmd_t *pmd; 29 pmd_t *pmd;
58 30
31 /*
32 * We don't need page_table_lock: we have mmap_sem exclusively.
33 */
59 pgd = pgd_offset(mm, addr); 34 pgd = pgd_offset(mm, addr);
60 if (pgd_none_or_clear_bad(pgd)) 35 if (pgd_none_or_clear_bad(pgd))
61 return NULL; 36 return NULL;
@@ -68,35 +43,48 @@ static pte_t *get_one_pte_map(struct mm_struct *mm, unsigned long addr)
68 if (pmd_none_or_clear_bad(pmd)) 43 if (pmd_none_or_clear_bad(pmd))
69 return NULL; 44 return NULL;
70 45
71 return pte_offset_map(pmd, addr); 46 return pmd;
72} 47}
73 48
74static inline pte_t *alloc_one_pte_map(struct mm_struct *mm, unsigned long addr) 49static pmd_t *alloc_new_pmd(struct mm_struct *mm, unsigned long addr)
75{ 50{
76 pgd_t *pgd; 51 pgd_t *pgd;
77 pud_t *pud; 52 pud_t *pud;
78 pmd_t *pmd; 53 pmd_t *pmd = NULL;
79 pte_t *pte = NULL; 54 pte_t *pte;
80 55
56 /*
57 * We do need page_table_lock: because allocators expect that.
58 */
59 spin_lock(&mm->page_table_lock);
81 pgd = pgd_offset(mm, addr); 60 pgd = pgd_offset(mm, addr);
82
83 pud = pud_alloc(mm, pgd, addr); 61 pud = pud_alloc(mm, pgd, addr);
84 if (!pud) 62 if (!pud)
85 return NULL; 63 goto out;
64
86 pmd = pmd_alloc(mm, pud, addr); 65 pmd = pmd_alloc(mm, pud, addr);
87 if (pmd) 66 if (!pmd)
88 pte = pte_alloc_map(mm, pmd, addr); 67 goto out;
89 return pte; 68
69 pte = pte_alloc_map(mm, pmd, addr);
70 if (!pte) {
71 pmd = NULL;
72 goto out;
73 }
74 pte_unmap(pte);
75out:
76 spin_unlock(&mm->page_table_lock);
77 return pmd;
90} 78}
91 79
92static int 80static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
93move_one_page(struct vm_area_struct *vma, unsigned long old_addr, 81 unsigned long old_addr, unsigned long old_end,
94 struct vm_area_struct *new_vma, unsigned long new_addr) 82 struct vm_area_struct *new_vma, pmd_t *new_pmd,
83 unsigned long new_addr)
95{ 84{
96 struct address_space *mapping = NULL; 85 struct address_space *mapping = NULL;
97 struct mm_struct *mm = vma->vm_mm; 86 struct mm_struct *mm = vma->vm_mm;
98 int error = 0; 87 pte_t *old_pte, *new_pte, pte;
99 pte_t *src, *dst;
100 88
101 if (vma->vm_file) { 89 if (vma->vm_file) {
102 /* 90 /*
@@ -111,74 +99,62 @@ move_one_page(struct vm_area_struct *vma, unsigned long old_addr,
111 new_vma->vm_truncate_count != vma->vm_truncate_count) 99 new_vma->vm_truncate_count != vma->vm_truncate_count)
112 new_vma->vm_truncate_count = 0; 100 new_vma->vm_truncate_count = 0;
113 } 101 }
114 spin_lock(&mm->page_table_lock);
115 102
116 src = get_one_pte_map_nested(mm, old_addr); 103 spin_lock(&mm->page_table_lock);
117 if (src) { 104 old_pte = pte_offset_map(old_pmd, old_addr);
118 /* 105 new_pte = pte_offset_map_nested(new_pmd, new_addr);
119 * Look to see whether alloc_one_pte_map needs to perform a 106
120 * memory allocation. If it does then we need to drop the 107 for (; old_addr < old_end; old_pte++, old_addr += PAGE_SIZE,
121 * atomic kmap 108 new_pte++, new_addr += PAGE_SIZE) {
122 */ 109 if (pte_none(*old_pte))
123 dst = get_one_pte_map(mm, new_addr); 110 continue;
124 if (unlikely(!dst)) { 111 pte = ptep_clear_flush(vma, old_addr, old_pte);
125 pte_unmap_nested(src); 112 /* ZERO_PAGE can be dependant on virtual addr */
126 if (mapping) 113 pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
127 spin_unlock(&mapping->i_mmap_lock); 114 set_pte_at(mm, new_addr, new_pte, pte);
128 dst = alloc_one_pte_map(mm, new_addr);
129 if (mapping && !spin_trylock(&mapping->i_mmap_lock)) {
130 spin_unlock(&mm->page_table_lock);
131 spin_lock(&mapping->i_mmap_lock);
132 spin_lock(&mm->page_table_lock);
133 }
134 src = get_one_pte_map_nested(mm, old_addr);
135 }
136 /*
137 * Since alloc_one_pte_map can drop and re-acquire
138 * page_table_lock, we should re-check the src entry...
139 */
140 if (src) {
141 if (dst) {
142 pte_t pte;
143 pte = ptep_clear_flush(vma, old_addr, src);
144
145 /* ZERO_PAGE can be dependant on virtual addr */
146 pte = move_pte(pte, new_vma->vm_page_prot,
147 old_addr, new_addr);
148 set_pte_at(mm, new_addr, dst, pte);
149 } else
150 error = -ENOMEM;
151 pte_unmap_nested(src);
152 }
153 if (dst)
154 pte_unmap(dst);
155 } 115 }
116
117 pte_unmap_nested(new_pte - 1);
118 pte_unmap(old_pte - 1);
156 spin_unlock(&mm->page_table_lock); 119 spin_unlock(&mm->page_table_lock);
157 if (mapping) 120 if (mapping)
158 spin_unlock(&mapping->i_mmap_lock); 121 spin_unlock(&mapping->i_mmap_lock);
159 return error;
160} 122}
161 123
124#define LATENCY_LIMIT (64 * PAGE_SIZE)
125
162static unsigned long move_page_tables(struct vm_area_struct *vma, 126static unsigned long move_page_tables(struct vm_area_struct *vma,
163 unsigned long old_addr, struct vm_area_struct *new_vma, 127 unsigned long old_addr, struct vm_area_struct *new_vma,
164 unsigned long new_addr, unsigned long len) 128 unsigned long new_addr, unsigned long len)
165{ 129{
166 unsigned long offset; 130 unsigned long extent, next, old_end;
131 pmd_t *old_pmd, *new_pmd;
167 132
168 flush_cache_range(vma, old_addr, old_addr + len); 133 old_end = old_addr + len;
134 flush_cache_range(vma, old_addr, old_end);
169 135
170 /* 136 for (; old_addr < old_end; old_addr += extent, new_addr += extent) {
171 * This is not the clever way to do this, but we're taking the
172 * easy way out on the assumption that most remappings will be
173 * only a few pages.. This also makes error recovery easier.
174 */
175 for (offset = 0; offset < len; offset += PAGE_SIZE) {
176 if (move_one_page(vma, old_addr + offset,
177 new_vma, new_addr + offset) < 0)
178 break;
179 cond_resched(); 137 cond_resched();
138 next = (old_addr + PMD_SIZE) & PMD_MASK;
139 if (next - 1 > old_end)
140 next = old_end;
141 extent = next - old_addr;
142 old_pmd = get_old_pmd(vma->vm_mm, old_addr);
143 if (!old_pmd)
144 continue;
145 new_pmd = alloc_new_pmd(vma->vm_mm, new_addr);
146 if (!new_pmd)
147 break;
148 next = (new_addr + PMD_SIZE) & PMD_MASK;
149 if (extent > next - new_addr)
150 extent = next - new_addr;
151 if (extent > LATENCY_LIMIT)
152 extent = LATENCY_LIMIT;
153 move_ptes(vma, old_pmd, old_addr, old_addr + extent,
154 new_vma, new_pmd, new_addr);
180 } 155 }
181 return offset; 156
157 return len + old_addr - old_end; /* how much done */
182} 158}
183 159
184static unsigned long move_vma(struct vm_area_struct *vma, 160static unsigned long move_vma(struct vm_area_struct *vma,