aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWolfgang Wander <wwc@rentec.com>2005-06-21 20:14:49 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-21 21:46:16 -0400
commit1363c3cd8603a913a27e2995dccbd70d5312d8e6 (patch)
tree405e7fc1ef44678f3ca0a54c536d0457e6e80f45
parente7c8d5c9955a4d2e88e36b640563f5d6d5aba48a (diff)
[PATCH] Avoiding mmap fragmentation
Ingo recently introduced a great speedup for allocating new mmaps using the free_area_cache pointer which boosts the specweb SSL benchmark by 4-5% and causes huge performance increases in thread creation. The downside of this patch is that it does lead to fragmentation in the mmap-ed areas (visible via /proc/self/maps), such that some applications that work fine under 2.4 kernels quickly run out of memory on any 2.6 kernel. The problem is twofold: 1) the free_area_cache is used to continue a search for memory where the last search ended. Before the change new areas were always searched from the base address on. So now new small areas are cluttering holes of all sizes throughout the whole mmap-able region whereas before small holes tended to close holes near the base leaving holes far from the base large and available for larger requests. 2) the free_area_cache also is set to the location of the last munmap-ed area so in scenarios where we allocate e.g. five regions of 1K each, then free regions 4 2 3 in this order the next request for 1K will be placed in the position of the old region 3, whereas before we appended it to the still active region 1, placing it at the location of the old region 2. Before we had 1 free region of 2K, now we only get two free regions of 1K -> fragmentation. The patch addresses thes issues by introducing yet another cache descriptor cached_hole_size that contains the largest known hole size below the current free_area_cache. If a new request comes in the size is compared against the cached_hole_size and if the request can be filled with a hole below free_area_cache the search is started from the base instead. The results look promising: Whereas 2.6.12-rc4 fragments quickly and my (earlier posted) leakme.c test program terminates after 50000+ iterations with 96 distinct and fragmented maps in /proc/self/maps it performs nicely (as expected) with thread creation, Ingo's test_str02 with 20000 threads requires 0.7s system time. Taking out Ingo's patch (un-patch available per request) by basically deleting all mentions of free_area_cache from the kernel and starting the search for new memory always at the respective bases we observe: leakme terminates successfully with 11 distinctive hardly fragmented areas in /proc/self/maps but thread creating is gringdingly slow: 30+s(!) system time for Ingo's test_str02 with 20000 threads. Now - drumroll ;-) the appended patch works fine with leakme: it ends with only 7 distinct areas in /proc/self/maps and also thread creation seems sufficiently fast with 0.71s for 20000 threads. Signed-off-by: Wolfgang Wander <wwc@rentec.com> Credit-to: "Richard Purdie" <rpurdie@rpsys.net> Signed-off-by: Ken Chen <kenneth.w.chen@intel.com> Acked-by: Ingo Molnar <mingo@elte.hu> (partly) Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--arch/arm/mm/mmap.c10
-rw-r--r--arch/i386/mm/hugetlbpage.c34
-rw-r--r--arch/ppc64/mm/hugetlbpage.c34
-rw-r--r--arch/sh/kernel/sys_sh.c8
-rw-r--r--arch/sparc64/kernel/sys_sparc.c8
-rw-r--r--arch/x86_64/ia32/ia32_aout.c1
-rw-r--r--arch/x86_64/kernel/sys_x86_64.c9
-rw-r--r--fs/binfmt_aout.c1
-rw-r--r--fs/binfmt_elf.c1
-rw-r--r--fs/hugetlbfs/inode.c3
-rw-r--r--include/linux/sched.h11
-rw-r--r--kernel/fork.c2
-rw-r--r--mm/mmap.c53
-rw-r--r--mm/nommu.c2
14 files changed, 147 insertions, 30 deletions
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index 32c4b0e35b37..3de7f84b53c2 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -73,7 +73,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
73 (!vma || addr + len <= vma->vm_start)) 73 (!vma || addr + len <= vma->vm_start))
74 return addr; 74 return addr;
75 } 75 }
76 start_addr = addr = mm->free_area_cache; 76 if (len > mm->cached_hole_size) {
77 start_addr = addr = mm->free_area_cache;
78 } else {
79 start_addr = addr = TASK_UNMAPPED_BASE;
80 mm->cached_hole_size = 0;
81 }
77 82
78full_search: 83full_search:
79 if (do_align) 84 if (do_align)
@@ -90,6 +95,7 @@ full_search:
90 */ 95 */
91 if (start_addr != TASK_UNMAPPED_BASE) { 96 if (start_addr != TASK_UNMAPPED_BASE) {
92 start_addr = addr = TASK_UNMAPPED_BASE; 97 start_addr = addr = TASK_UNMAPPED_BASE;
98 mm->cached_hole_size = 0;
93 goto full_search; 99 goto full_search;
94 } 100 }
95 return -ENOMEM; 101 return -ENOMEM;
@@ -101,6 +107,8 @@ full_search:
101 mm->free_area_cache = addr + len; 107 mm->free_area_cache = addr + len;
102 return addr; 108 return addr;
103 } 109 }
110 if (addr + mm->cached_hole_size < vma->vm_start)
111 mm->cached_hole_size = vma->vm_start - addr;
104 addr = vma->vm_end; 112 addr = vma->vm_end;
105 if (do_align) 113 if (do_align)
106 addr = COLOUR_ALIGN(addr, pgoff); 114 addr = COLOUR_ALIGN(addr, pgoff);
diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c
index 5aa06001a4bd..3b099f32b948 100644
--- a/arch/i386/mm/hugetlbpage.c
+++ b/arch/i386/mm/hugetlbpage.c
@@ -140,7 +140,12 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
140 struct vm_area_struct *vma; 140 struct vm_area_struct *vma;
141 unsigned long start_addr; 141 unsigned long start_addr;
142 142
143 start_addr = mm->free_area_cache; 143 if (len > mm->cached_hole_size) {
144 start_addr = mm->free_area_cache;
145 } else {
146 start_addr = TASK_UNMAPPED_BASE;
147 mm->cached_hole_size = 0;
148 }
144 149
145full_search: 150full_search:
146 addr = ALIGN(start_addr, HPAGE_SIZE); 151 addr = ALIGN(start_addr, HPAGE_SIZE);
@@ -154,6 +159,7 @@ full_search:
154 */ 159 */
155 if (start_addr != TASK_UNMAPPED_BASE) { 160 if (start_addr != TASK_UNMAPPED_BASE) {
156 start_addr = TASK_UNMAPPED_BASE; 161 start_addr = TASK_UNMAPPED_BASE;
162 mm->cached_hole_size = 0;
157 goto full_search; 163 goto full_search;
158 } 164 }
159 return -ENOMEM; 165 return -ENOMEM;
@@ -162,6 +168,8 @@ full_search:
162 mm->free_area_cache = addr + len; 168 mm->free_area_cache = addr + len;
163 return addr; 169 return addr;
164 } 170 }
171 if (addr + mm->cached_hole_size < vma->vm_start)
172 mm->cached_hole_size = vma->vm_start - addr;
165 addr = ALIGN(vma->vm_end, HPAGE_SIZE); 173 addr = ALIGN(vma->vm_end, HPAGE_SIZE);
166 } 174 }
167} 175}
@@ -173,12 +181,17 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
173 struct mm_struct *mm = current->mm; 181 struct mm_struct *mm = current->mm;
174 struct vm_area_struct *vma, *prev_vma; 182 struct vm_area_struct *vma, *prev_vma;
175 unsigned long base = mm->mmap_base, addr = addr0; 183 unsigned long base = mm->mmap_base, addr = addr0;
184 unsigned long largest_hole = mm->cached_hole_size;
176 int first_time = 1; 185 int first_time = 1;
177 186
178 /* don't allow allocations above current base */ 187 /* don't allow allocations above current base */
179 if (mm->free_area_cache > base) 188 if (mm->free_area_cache > base)
180 mm->free_area_cache = base; 189 mm->free_area_cache = base;
181 190
191 if (len <= largest_hole) {
192 largest_hole = 0;
193 mm->free_area_cache = base;
194 }
182try_again: 195try_again:
183 /* make sure it can fit in the remaining address space */ 196 /* make sure it can fit in the remaining address space */
184 if (mm->free_area_cache < len) 197 if (mm->free_area_cache < len)
@@ -199,13 +212,21 @@ try_again:
199 * vma->vm_start, use it: 212 * vma->vm_start, use it:
200 */ 213 */
201 if (addr + len <= vma->vm_start && 214 if (addr + len <= vma->vm_start &&
202 (!prev_vma || (addr >= prev_vma->vm_end))) 215 (!prev_vma || (addr >= prev_vma->vm_end))) {
203 /* remember the address as a hint for next time */ 216 /* remember the address as a hint for next time */
204 return (mm->free_area_cache = addr); 217 mm->cached_hole_size = largest_hole;
205 else 218 return (mm->free_area_cache = addr);
219 } else {
206 /* pull free_area_cache down to the first hole */ 220 /* pull free_area_cache down to the first hole */
207 if (mm->free_area_cache == vma->vm_end) 221 if (mm->free_area_cache == vma->vm_end) {
208 mm->free_area_cache = vma->vm_start; 222 mm->free_area_cache = vma->vm_start;
223 mm->cached_hole_size = largest_hole;
224 }
225 }
226
227 /* remember the largest hole we saw so far */
228 if (addr + largest_hole < vma->vm_start)
229 largest_hole = vma->vm_start - addr;
209 230
210 /* try just below the current vma->vm_start */ 231 /* try just below the current vma->vm_start */
211 addr = (vma->vm_start - len) & HPAGE_MASK; 232 addr = (vma->vm_start - len) & HPAGE_MASK;
@@ -218,6 +239,7 @@ fail:
218 */ 239 */
219 if (first_time) { 240 if (first_time) {
220 mm->free_area_cache = base; 241 mm->free_area_cache = base;
242 largest_hole = 0;
221 first_time = 0; 243 first_time = 0;
222 goto try_again; 244 goto try_again;
223 } 245 }
@@ -228,6 +250,7 @@ fail:
228 * allocations. 250 * allocations.
229 */ 251 */
230 mm->free_area_cache = TASK_UNMAPPED_BASE; 252 mm->free_area_cache = TASK_UNMAPPED_BASE;
253 mm->cached_hole_size = ~0UL;
231 addr = hugetlb_get_unmapped_area_bottomup(file, addr0, 254 addr = hugetlb_get_unmapped_area_bottomup(file, addr0,
232 len, pgoff, flags); 255 len, pgoff, flags);
233 256
@@ -235,6 +258,7 @@ fail:
235 * Restore the topdown base: 258 * Restore the topdown base:
236 */ 259 */
237 mm->free_area_cache = base; 260 mm->free_area_cache = base;
261 mm->cached_hole_size = ~0UL;
238 262
239 return addr; 263 return addr;
240} 264}
diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c
index b4ab766f5980..fdcfe97c75c1 100644
--- a/arch/ppc64/mm/hugetlbpage.c
+++ b/arch/ppc64/mm/hugetlbpage.c
@@ -292,7 +292,12 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
292 && !is_hugepage_only_range(mm, addr,len)) 292 && !is_hugepage_only_range(mm, addr,len))
293 return addr; 293 return addr;
294 } 294 }
295 start_addr = addr = mm->free_area_cache; 295 if (len > mm->cached_hole_size) {
296 start_addr = addr = mm->free_area_cache;
297 } else {
298 start_addr = addr = TASK_UNMAPPED_BASE;
299 mm->cached_hole_size = 0;
300 }
296 301
297full_search: 302full_search:
298 vma = find_vma(mm, addr); 303 vma = find_vma(mm, addr);
@@ -316,6 +321,8 @@ full_search:
316 mm->free_area_cache = addr + len; 321 mm->free_area_cache = addr + len;
317 return addr; 322 return addr;
318 } 323 }
324 if (addr + mm->cached_hole_size < vma->vm_start)
325 mm->cached_hole_size = vma->vm_start - addr;
319 addr = vma->vm_end; 326 addr = vma->vm_end;
320 vma = vma->vm_next; 327 vma = vma->vm_next;
321 } 328 }
@@ -323,6 +330,7 @@ full_search:
323 /* Make sure we didn't miss any holes */ 330 /* Make sure we didn't miss any holes */
324 if (start_addr != TASK_UNMAPPED_BASE) { 331 if (start_addr != TASK_UNMAPPED_BASE) {
325 start_addr = addr = TASK_UNMAPPED_BASE; 332 start_addr = addr = TASK_UNMAPPED_BASE;
333 mm->cached_hole_size = 0;
326 goto full_search; 334 goto full_search;
327 } 335 }
328 return -ENOMEM; 336 return -ENOMEM;
@@ -344,6 +352,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
344 struct vm_area_struct *vma, *prev_vma; 352 struct vm_area_struct *vma, *prev_vma;
345 struct mm_struct *mm = current->mm; 353 struct mm_struct *mm = current->mm;
346 unsigned long base = mm->mmap_base, addr = addr0; 354 unsigned long base = mm->mmap_base, addr = addr0;
355 unsigned long largest_hole = mm->cached_hole_size;
347 int first_time = 1; 356 int first_time = 1;
348 357
349 /* requested length too big for entire address space */ 358 /* requested length too big for entire address space */
@@ -364,6 +373,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
364 return addr; 373 return addr;
365 } 374 }
366 375
376 if (len <= largest_hole) {
377 largest_hole = 0;
378 mm->free_area_cache = base;
379 }
367try_again: 380try_again:
368 /* make sure it can fit in the remaining address space */ 381 /* make sure it can fit in the remaining address space */
369 if (mm->free_area_cache < len) 382 if (mm->free_area_cache < len)
@@ -392,13 +405,21 @@ hugepage_recheck:
392 * vma->vm_start, use it: 405 * vma->vm_start, use it:
393 */ 406 */
394 if (addr+len <= vma->vm_start && 407 if (addr+len <= vma->vm_start &&
395 (!prev_vma || (addr >= prev_vma->vm_end))) 408 (!prev_vma || (addr >= prev_vma->vm_end))) {
396 /* remember the address as a hint for next time */ 409 /* remember the address as a hint for next time */
397 return (mm->free_area_cache = addr); 410 mm->cached_hole_size = largest_hole;
398 else 411 return (mm->free_area_cache = addr);
412 } else {
399 /* pull free_area_cache down to the first hole */ 413 /* pull free_area_cache down to the first hole */
400 if (mm->free_area_cache == vma->vm_end) 414 if (mm->free_area_cache == vma->vm_end) {
401 mm->free_area_cache = vma->vm_start; 415 mm->free_area_cache = vma->vm_start;
416 mm->cached_hole_size = largest_hole;
417 }
418 }
419
420 /* remember the largest hole we saw so far */
421 if (addr + largest_hole < vma->vm_start)
422 largest_hole = vma->vm_start - addr;
402 423
403 /* try just below the current vma->vm_start */ 424 /* try just below the current vma->vm_start */
404 addr = vma->vm_start-len; 425 addr = vma->vm_start-len;
@@ -411,6 +432,7 @@ fail:
411 */ 432 */
412 if (first_time) { 433 if (first_time) {
413 mm->free_area_cache = base; 434 mm->free_area_cache = base;
435 largest_hole = 0;
414 first_time = 0; 436 first_time = 0;
415 goto try_again; 437 goto try_again;
416 } 438 }
@@ -421,11 +443,13 @@ fail:
421 * allocations. 443 * allocations.
422 */ 444 */
423 mm->free_area_cache = TASK_UNMAPPED_BASE; 445 mm->free_area_cache = TASK_UNMAPPED_BASE;
446 mm->cached_hole_size = ~0UL;
424 addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); 447 addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
425 /* 448 /*
426 * Restore the topdown base: 449 * Restore the topdown base:
427 */ 450 */
428 mm->free_area_cache = base; 451 mm->free_area_cache = base;
452 mm->cached_hole_size = ~0UL;
429 453
430 return addr; 454 return addr;
431} 455}
diff --git a/arch/sh/kernel/sys_sh.c b/arch/sh/kernel/sys_sh.c
index df5ac294c379..917b2f32f260 100644
--- a/arch/sh/kernel/sys_sh.c
+++ b/arch/sh/kernel/sys_sh.c
@@ -79,6 +79,10 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
79 (!vma || addr + len <= vma->vm_start)) 79 (!vma || addr + len <= vma->vm_start))
80 return addr; 80 return addr;
81 } 81 }
82 if (len <= mm->cached_hole_size) {
83 mm->cached_hole_size = 0;
84 mm->free_area_cache = TASK_UNMAPPED_BASE;
85 }
82 if (flags & MAP_PRIVATE) 86 if (flags & MAP_PRIVATE)
83 addr = PAGE_ALIGN(mm->free_area_cache); 87 addr = PAGE_ALIGN(mm->free_area_cache);
84 else 88 else
@@ -95,6 +99,7 @@ full_search:
95 */ 99 */
96 if (start_addr != TASK_UNMAPPED_BASE) { 100 if (start_addr != TASK_UNMAPPED_BASE) {
97 start_addr = addr = TASK_UNMAPPED_BASE; 101 start_addr = addr = TASK_UNMAPPED_BASE;
102 mm->cached_hole_size = 0;
98 goto full_search; 103 goto full_search;
99 } 104 }
100 return -ENOMEM; 105 return -ENOMEM;
@@ -106,6 +111,9 @@ full_search:
106 mm->free_area_cache = addr + len; 111 mm->free_area_cache = addr + len;
107 return addr; 112 return addr;
108 } 113 }
114 if (addr + mm->cached_hole_size < vma->vm_start)
115 mm->cached_hole_size = vma->vm_start - addr;
116
109 addr = vma->vm_end; 117 addr = vma->vm_end;
110 if (!(flags & MAP_PRIVATE)) 118 if (!(flags & MAP_PRIVATE))
111 addr = COLOUR_ALIGN(addr); 119 addr = COLOUR_ALIGN(addr);
diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c
index 0077f02f4b37..5f8c822a2b4a 100644
--- a/arch/sparc64/kernel/sys_sparc.c
+++ b/arch/sparc64/kernel/sys_sparc.c
@@ -84,6 +84,10 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
84 return addr; 84 return addr;
85 } 85 }
86 86
87 if (len <= mm->cached_hole_size) {
88 mm->cached_hole_size = 0;
89 mm->free_area_cache = TASK_UNMAPPED_BASE;
90 }
87 start_addr = addr = mm->free_area_cache; 91 start_addr = addr = mm->free_area_cache;
88 92
89 task_size -= len; 93 task_size -= len;
@@ -103,6 +107,7 @@ full_search:
103 if (task_size < addr) { 107 if (task_size < addr) {
104 if (start_addr != TASK_UNMAPPED_BASE) { 108 if (start_addr != TASK_UNMAPPED_BASE) {
105 start_addr = addr = TASK_UNMAPPED_BASE; 109 start_addr = addr = TASK_UNMAPPED_BASE;
110 mm->cached_hole_size = 0;
106 goto full_search; 111 goto full_search;
107 } 112 }
108 return -ENOMEM; 113 return -ENOMEM;
@@ -114,6 +119,9 @@ full_search:
114 mm->free_area_cache = addr + len; 119 mm->free_area_cache = addr + len;
115 return addr; 120 return addr;
116 } 121 }
122 if (addr + mm->cached_hole_size < vma->vm_start)
123 mm->cached_hole_size = vma->vm_start - addr;
124
117 addr = vma->vm_end; 125 addr = vma->vm_end;
118 if (do_color_align) 126 if (do_color_align)
119 addr = COLOUR_ALIGN(addr, pgoff); 127 addr = COLOUR_ALIGN(addr, pgoff);
diff --git a/arch/x86_64/ia32/ia32_aout.c b/arch/x86_64/ia32/ia32_aout.c
index 1965efc974dc..c12edf5d97f0 100644
--- a/arch/x86_64/ia32/ia32_aout.c
+++ b/arch/x86_64/ia32/ia32_aout.c
@@ -312,6 +312,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
312 current->mm->brk = ex.a_bss + 312 current->mm->brk = ex.a_bss +
313 (current->mm->start_brk = N_BSSADDR(ex)); 313 (current->mm->start_brk = N_BSSADDR(ex));
314 current->mm->free_area_cache = TASK_UNMAPPED_BASE; 314 current->mm->free_area_cache = TASK_UNMAPPED_BASE;
315 current->mm->cached_hole_size = 0;
315 316
316 set_mm_counter(current->mm, rss, 0); 317 set_mm_counter(current->mm, rss, 0);
317 current->mm->mmap = NULL; 318 current->mm->mmap = NULL;
diff --git a/arch/x86_64/kernel/sys_x86_64.c b/arch/x86_64/kernel/sys_x86_64.c
index d9798dd433fc..cc7821c68851 100644
--- a/arch/x86_64/kernel/sys_x86_64.c
+++ b/arch/x86_64/kernel/sys_x86_64.c
@@ -105,6 +105,11 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
105 (!vma || addr + len <= vma->vm_start)) 105 (!vma || addr + len <= vma->vm_start))
106 return addr; 106 return addr;
107 } 107 }
108 if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32))
109 && len <= mm->cached_hole_size) {
110 mm->cached_hole_size = 0;
111 mm->free_area_cache = begin;
112 }
108 addr = mm->free_area_cache; 113 addr = mm->free_area_cache;
109 if (addr < begin) 114 if (addr < begin)
110 addr = begin; 115 addr = begin;
@@ -120,6 +125,7 @@ full_search:
120 */ 125 */
121 if (start_addr != begin) { 126 if (start_addr != begin) {
122 start_addr = addr = begin; 127 start_addr = addr = begin;
128 mm->cached_hole_size = 0;
123 goto full_search; 129 goto full_search;
124 } 130 }
125 return -ENOMEM; 131 return -ENOMEM;
@@ -131,6 +137,9 @@ full_search:
131 mm->free_area_cache = addr + len; 137 mm->free_area_cache = addr + len;
132 return addr; 138 return addr;
133 } 139 }
140 if (addr + mm->cached_hole_size < vma->vm_start)
141 mm->cached_hole_size = vma->vm_start - addr;
142
134 addr = vma->vm_end; 143 addr = vma->vm_end;
135 } 144 }
136} 145}
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index 009b8920c1ff..dd9baabaf016 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -316,6 +316,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
316 current->mm->brk = ex.a_bss + 316 current->mm->brk = ex.a_bss +
317 (current->mm->start_brk = N_BSSADDR(ex)); 317 (current->mm->start_brk = N_BSSADDR(ex));
318 current->mm->free_area_cache = current->mm->mmap_base; 318 current->mm->free_area_cache = current->mm->mmap_base;
319 current->mm->cached_hole_size = 0;
319 320
320 set_mm_counter(current->mm, rss, 0); 321 set_mm_counter(current->mm, rss, 0);
321 current->mm->mmap = NULL; 322 current->mm->mmap = NULL;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index f8f6b6b76179..7976a238f0a3 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -775,6 +775,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs)
775 change some of these later */ 775 change some of these later */
776 set_mm_counter(current->mm, rss, 0); 776 set_mm_counter(current->mm, rss, 0);
777 current->mm->free_area_cache = current->mm->mmap_base; 777 current->mm->free_area_cache = current->mm->mmap_base;
778 current->mm->cached_hole_size = 0;
778 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP), 779 retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
779 executable_stack); 780 executable_stack);
780 if (retval < 0) { 781 if (retval < 0) {
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 2af3338f891b..3a9b6d179cbd 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -122,6 +122,9 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
122 122
123 start_addr = mm->free_area_cache; 123 start_addr = mm->free_area_cache;
124 124
125 if (len <= mm->cached_hole_size)
126 start_addr = TASK_UNMAPPED_BASE;
127
125full_search: 128full_search:
126 addr = ALIGN(start_addr, HPAGE_SIZE); 129 addr = ALIGN(start_addr, HPAGE_SIZE);
127 130
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4dbb109022f3..b58afd97a180 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -201,8 +201,8 @@ extern unsigned long
201arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, 201arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
202 unsigned long len, unsigned long pgoff, 202 unsigned long len, unsigned long pgoff,
203 unsigned long flags); 203 unsigned long flags);
204extern void arch_unmap_area(struct vm_area_struct *area); 204extern void arch_unmap_area(struct mm_struct *, unsigned long);
205extern void arch_unmap_area_topdown(struct vm_area_struct *area); 205extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
206 206
207#define set_mm_counter(mm, member, value) (mm)->_##member = (value) 207#define set_mm_counter(mm, member, value) (mm)->_##member = (value)
208#define get_mm_counter(mm, member) ((mm)->_##member) 208#define get_mm_counter(mm, member) ((mm)->_##member)
@@ -218,9 +218,10 @@ struct mm_struct {
218 unsigned long (*get_unmapped_area) (struct file *filp, 218 unsigned long (*get_unmapped_area) (struct file *filp,
219 unsigned long addr, unsigned long len, 219 unsigned long addr, unsigned long len,
220 unsigned long pgoff, unsigned long flags); 220 unsigned long pgoff, unsigned long flags);
221 void (*unmap_area) (struct vm_area_struct *area); 221 void (*unmap_area) (struct mm_struct *mm, unsigned long addr);
222 unsigned long mmap_base; /* base of mmap area */ 222 unsigned long mmap_base; /* base of mmap area */
223 unsigned long free_area_cache; /* first hole */ 223 unsigned long cached_hole_size; /* if non-zero, the largest hole below free_area_cache */
224 unsigned long free_area_cache; /* first hole of size cached_hole_size or larger */
224 pgd_t * pgd; 225 pgd_t * pgd;
225 atomic_t mm_users; /* How many users with user space? */ 226 atomic_t mm_users; /* How many users with user space? */
226 atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */ 227 atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */
diff --git a/kernel/fork.c b/kernel/fork.c
index f42a17f88699..876b31cd822d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -194,6 +194,7 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
194 mm->mmap = NULL; 194 mm->mmap = NULL;
195 mm->mmap_cache = NULL; 195 mm->mmap_cache = NULL;
196 mm->free_area_cache = oldmm->mmap_base; 196 mm->free_area_cache = oldmm->mmap_base;
197 mm->cached_hole_size = ~0UL;
197 mm->map_count = 0; 198 mm->map_count = 0;
198 set_mm_counter(mm, rss, 0); 199 set_mm_counter(mm, rss, 0);
199 set_mm_counter(mm, anon_rss, 0); 200 set_mm_counter(mm, anon_rss, 0);
@@ -322,6 +323,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm)
322 mm->ioctx_list = NULL; 323 mm->ioctx_list = NULL;
323 mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm); 324 mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm);
324 mm->free_area_cache = TASK_UNMAPPED_BASE; 325 mm->free_area_cache = TASK_UNMAPPED_BASE;
326 mm->cached_hole_size = ~0UL;
325 327
326 if (likely(!mm_alloc_pgd(mm))) { 328 if (likely(!mm_alloc_pgd(mm))) {
327 mm->def_flags = 0; 329 mm->def_flags = 0;
diff --git a/mm/mmap.c b/mm/mmap.c
index de54acd9942f..9da23c1ef9dc 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1175,7 +1175,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
1175 (!vma || addr + len <= vma->vm_start)) 1175 (!vma || addr + len <= vma->vm_start))
1176 return addr; 1176 return addr;
1177 } 1177 }
1178 start_addr = addr = mm->free_area_cache; 1178 if (len > mm->cached_hole_size) {
1179 start_addr = addr = mm->free_area_cache;
1180 } else {
1181 start_addr = addr = TASK_UNMAPPED_BASE;
1182 mm->cached_hole_size = 0;
1183 }
1179 1184
1180full_search: 1185full_search:
1181 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { 1186 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
@@ -1186,7 +1191,9 @@ full_search:
1186 * some holes. 1191 * some holes.
1187 */ 1192 */
1188 if (start_addr != TASK_UNMAPPED_BASE) { 1193 if (start_addr != TASK_UNMAPPED_BASE) {
1189 start_addr = addr = TASK_UNMAPPED_BASE; 1194 addr = TASK_UNMAPPED_BASE;
1195 start_addr = addr;
1196 mm->cached_hole_size = 0;
1190 goto full_search; 1197 goto full_search;
1191 } 1198 }
1192 return -ENOMEM; 1199 return -ENOMEM;
@@ -1198,19 +1205,22 @@ full_search:
1198 mm->free_area_cache = addr + len; 1205 mm->free_area_cache = addr + len;
1199 return addr; 1206 return addr;
1200 } 1207 }
1208 if (addr + mm->cached_hole_size < vma->vm_start)
1209 mm->cached_hole_size = vma->vm_start - addr;
1201 addr = vma->vm_end; 1210 addr = vma->vm_end;
1202 } 1211 }
1203} 1212}
1204#endif 1213#endif
1205 1214
1206void arch_unmap_area(struct vm_area_struct *area) 1215void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
1207{ 1216{
1208 /* 1217 /*
1209 * Is this a new hole at the lowest possible address? 1218 * Is this a new hole at the lowest possible address?
1210 */ 1219 */
1211 if (area->vm_start >= TASK_UNMAPPED_BASE && 1220 if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache) {
1212 area->vm_start < area->vm_mm->free_area_cache) 1221 mm->free_area_cache = addr;
1213 area->vm_mm->free_area_cache = area->vm_start; 1222 mm->cached_hole_size = ~0UL;
1223 }
1214} 1224}
1215 1225
1216/* 1226/*
@@ -1240,6 +1250,12 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
1240 return addr; 1250 return addr;
1241 } 1251 }
1242 1252
1253 /* check if free_area_cache is useful for us */
1254 if (len <= mm->cached_hole_size) {
1255 mm->cached_hole_size = 0;
1256 mm->free_area_cache = mm->mmap_base;
1257 }
1258
1243 /* either no address requested or can't fit in requested address hole */ 1259 /* either no address requested or can't fit in requested address hole */
1244 addr = mm->free_area_cache; 1260 addr = mm->free_area_cache;
1245 1261
@@ -1264,6 +1280,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
1264 /* remember the address as a hint for next time */ 1280 /* remember the address as a hint for next time */
1265 return (mm->free_area_cache = addr); 1281 return (mm->free_area_cache = addr);
1266 1282
1283 /* remember the largest hole we saw so far */
1284 if (addr + mm->cached_hole_size < vma->vm_start)
1285 mm->cached_hole_size = vma->vm_start - addr;
1286
1267 /* try just below the current vma->vm_start */ 1287 /* try just below the current vma->vm_start */
1268 addr = vma->vm_start-len; 1288 addr = vma->vm_start-len;
1269 } while (len < vma->vm_start); 1289 } while (len < vma->vm_start);
@@ -1274,28 +1294,30 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
1274 * can happen with large stack limits and large mmap() 1294 * can happen with large stack limits and large mmap()
1275 * allocations. 1295 * allocations.
1276 */ 1296 */
1277 mm->free_area_cache = TASK_UNMAPPED_BASE; 1297 mm->cached_hole_size = ~0UL;
1298 mm->free_area_cache = TASK_UNMAPPED_BASE;
1278 addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); 1299 addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
1279 /* 1300 /*
1280 * Restore the topdown base: 1301 * Restore the topdown base:
1281 */ 1302 */
1282 mm->free_area_cache = mm->mmap_base; 1303 mm->free_area_cache = mm->mmap_base;
1304 mm->cached_hole_size = ~0UL;
1283 1305
1284 return addr; 1306 return addr;
1285} 1307}
1286#endif 1308#endif
1287 1309
1288void arch_unmap_area_topdown(struct vm_area_struct *area) 1310void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr)
1289{ 1311{
1290 /* 1312 /*
1291 * Is this a new hole at the highest possible address? 1313 * Is this a new hole at the highest possible address?
1292 */ 1314 */
1293 if (area->vm_end > area->vm_mm->free_area_cache) 1315 if (addr > mm->free_area_cache)
1294 area->vm_mm->free_area_cache = area->vm_end; 1316 mm->free_area_cache = addr;
1295 1317
1296 /* dont allow allocations above current base */ 1318 /* dont allow allocations above current base */
1297 if (area->vm_mm->free_area_cache > area->vm_mm->mmap_base) 1319 if (mm->free_area_cache > mm->mmap_base)
1298 area->vm_mm->free_area_cache = area->vm_mm->mmap_base; 1320 mm->free_area_cache = mm->mmap_base;
1299} 1321}
1300 1322
1301unsigned long 1323unsigned long
@@ -1595,7 +1617,6 @@ static void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area)
1595 if (area->vm_flags & VM_LOCKED) 1617 if (area->vm_flags & VM_LOCKED)
1596 area->vm_mm->locked_vm -= len >> PAGE_SHIFT; 1618 area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
1597 vm_stat_unaccount(area); 1619 vm_stat_unaccount(area);
1598 area->vm_mm->unmap_area(area);
1599 remove_vm_struct(area); 1620 remove_vm_struct(area);
1600} 1621}
1601 1622
@@ -1649,6 +1670,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
1649{ 1670{
1650 struct vm_area_struct **insertion_point; 1671 struct vm_area_struct **insertion_point;
1651 struct vm_area_struct *tail_vma = NULL; 1672 struct vm_area_struct *tail_vma = NULL;
1673 unsigned long addr;
1652 1674
1653 insertion_point = (prev ? &prev->vm_next : &mm->mmap); 1675 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
1654 do { 1676 do {
@@ -1659,6 +1681,11 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
1659 } while (vma && vma->vm_start < end); 1681 } while (vma && vma->vm_start < end);
1660 *insertion_point = vma; 1682 *insertion_point = vma;
1661 tail_vma->vm_next = NULL; 1683 tail_vma->vm_next = NULL;
1684 if (mm->unmap_area == arch_unmap_area)
1685 addr = prev ? prev->vm_end : mm->mmap_base;
1686 else
1687 addr = vma ? vma->vm_start : mm->mmap_base;
1688 mm->unmap_area(mm, addr);
1662 mm->mmap_cache = NULL; /* Kill the cache. */ 1689 mm->mmap_cache = NULL; /* Kill the cache. */
1663} 1690}
1664 1691
diff --git a/mm/nommu.c b/mm/nommu.c
index c53e9c8f6b4a..ce74452c02d9 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1067,7 +1067,7 @@ unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr,
1067 return -ENOMEM; 1067 return -ENOMEM;
1068} 1068}
1069 1069
1070void arch_unmap_area(struct vm_area_struct *area) 1070void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
1071{ 1071{
1072} 1072}
1073 1073