diff options
author | Wolfgang Wander <wwc@rentec.com> | 2005-06-21 20:14:49 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-06-21 21:46:16 -0400 |
commit | 1363c3cd8603a913a27e2995dccbd70d5312d8e6 (patch) | |
tree | 405e7fc1ef44678f3ca0a54c536d0457e6e80f45 | |
parent | e7c8d5c9955a4d2e88e36b640563f5d6d5aba48a (diff) |
[PATCH] Avoiding mmap fragmentation
Ingo recently introduced a great speedup for allocating new mmaps using the
free_area_cache pointer which boosts the specweb SSL benchmark by 4-5% and
causes huge performance increases in thread creation.
The downside of this patch is that it does lead to fragmentation in the
mmap-ed areas (visible via /proc/self/maps), such that some applications
that work fine under 2.4 kernels quickly run out of memory on any 2.6
kernel.
The problem is twofold:
1) the free_area_cache is used to continue a search for memory where
the last search ended. Before the change new areas were always
searched from the base address on.
So now new small areas are cluttering holes of all sizes
throughout the whole mmap-able region whereas before small holes
tended to close holes near the base leaving holes far from the base
large and available for larger requests.
2) the free_area_cache also is set to the location of the last
munmap-ed area so in scenarios where we allocate e.g. five regions of
1K each, then free regions 4 2 3 in this order the next request for 1K
will be placed in the position of the old region 3, whereas before we
appended it to the still active region 1, placing it at the location
of the old region 2. Before we had 1 free region of 2K, now we only
get two free regions of 1K -> fragmentation.
The patch addresses thes issues by introducing yet another cache descriptor
cached_hole_size that contains the largest known hole size below the
current free_area_cache. If a new request comes in the size is compared
against the cached_hole_size and if the request can be filled with a hole
below free_area_cache the search is started from the base instead.
The results look promising: Whereas 2.6.12-rc4 fragments quickly and my
(earlier posted) leakme.c test program terminates after 50000+ iterations
with 96 distinct and fragmented maps in /proc/self/maps it performs nicely
(as expected) with thread creation, Ingo's test_str02 with 20000 threads
requires 0.7s system time.
Taking out Ingo's patch (un-patch available per request) by basically
deleting all mentions of free_area_cache from the kernel and starting the
search for new memory always at the respective bases we observe: leakme
terminates successfully with 11 distinctive hardly fragmented areas in
/proc/self/maps but thread creating is gringdingly slow: 30+s(!) system
time for Ingo's test_str02 with 20000 threads.
Now - drumroll ;-) the appended patch works fine with leakme: it ends with
only 7 distinct areas in /proc/self/maps and also thread creation seems
sufficiently fast with 0.71s for 20000 threads.
Signed-off-by: Wolfgang Wander <wwc@rentec.com>
Credit-to: "Richard Purdie" <rpurdie@rpsys.net>
Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu> (partly)
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | arch/arm/mm/mmap.c | 10 | ||||
-rw-r--r-- | arch/i386/mm/hugetlbpage.c | 34 | ||||
-rw-r--r-- | arch/ppc64/mm/hugetlbpage.c | 34 | ||||
-rw-r--r-- | arch/sh/kernel/sys_sh.c | 8 | ||||
-rw-r--r-- | arch/sparc64/kernel/sys_sparc.c | 8 | ||||
-rw-r--r-- | arch/x86_64/ia32/ia32_aout.c | 1 | ||||
-rw-r--r-- | arch/x86_64/kernel/sys_x86_64.c | 9 | ||||
-rw-r--r-- | fs/binfmt_aout.c | 1 | ||||
-rw-r--r-- | fs/binfmt_elf.c | 1 | ||||
-rw-r--r-- | fs/hugetlbfs/inode.c | 3 | ||||
-rw-r--r-- | include/linux/sched.h | 11 | ||||
-rw-r--r-- | kernel/fork.c | 2 | ||||
-rw-r--r-- | mm/mmap.c | 53 | ||||
-rw-r--r-- | mm/nommu.c | 2 |
14 files changed, 147 insertions, 30 deletions
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 32c4b0e35b37..3de7f84b53c2 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c | |||
@@ -73,7 +73,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, | |||
73 | (!vma || addr + len <= vma->vm_start)) | 73 | (!vma || addr + len <= vma->vm_start)) |
74 | return addr; | 74 | return addr; |
75 | } | 75 | } |
76 | start_addr = addr = mm->free_area_cache; | 76 | if (len > mm->cached_hole_size) { |
77 | start_addr = addr = mm->free_area_cache; | ||
78 | } else { | ||
79 | start_addr = addr = TASK_UNMAPPED_BASE; | ||
80 | mm->cached_hole_size = 0; | ||
81 | } | ||
77 | 82 | ||
78 | full_search: | 83 | full_search: |
79 | if (do_align) | 84 | if (do_align) |
@@ -90,6 +95,7 @@ full_search: | |||
90 | */ | 95 | */ |
91 | if (start_addr != TASK_UNMAPPED_BASE) { | 96 | if (start_addr != TASK_UNMAPPED_BASE) { |
92 | start_addr = addr = TASK_UNMAPPED_BASE; | 97 | start_addr = addr = TASK_UNMAPPED_BASE; |
98 | mm->cached_hole_size = 0; | ||
93 | goto full_search; | 99 | goto full_search; |
94 | } | 100 | } |
95 | return -ENOMEM; | 101 | return -ENOMEM; |
@@ -101,6 +107,8 @@ full_search: | |||
101 | mm->free_area_cache = addr + len; | 107 | mm->free_area_cache = addr + len; |
102 | return addr; | 108 | return addr; |
103 | } | 109 | } |
110 | if (addr + mm->cached_hole_size < vma->vm_start) | ||
111 | mm->cached_hole_size = vma->vm_start - addr; | ||
104 | addr = vma->vm_end; | 112 | addr = vma->vm_end; |
105 | if (do_align) | 113 | if (do_align) |
106 | addr = COLOUR_ALIGN(addr, pgoff); | 114 | addr = COLOUR_ALIGN(addr, pgoff); |
diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c index 5aa06001a4bd..3b099f32b948 100644 --- a/arch/i386/mm/hugetlbpage.c +++ b/arch/i386/mm/hugetlbpage.c | |||
@@ -140,7 +140,12 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, | |||
140 | struct vm_area_struct *vma; | 140 | struct vm_area_struct *vma; |
141 | unsigned long start_addr; | 141 | unsigned long start_addr; |
142 | 142 | ||
143 | start_addr = mm->free_area_cache; | 143 | if (len > mm->cached_hole_size) { |
144 | start_addr = mm->free_area_cache; | ||
145 | } else { | ||
146 | start_addr = TASK_UNMAPPED_BASE; | ||
147 | mm->cached_hole_size = 0; | ||
148 | } | ||
144 | 149 | ||
145 | full_search: | 150 | full_search: |
146 | addr = ALIGN(start_addr, HPAGE_SIZE); | 151 | addr = ALIGN(start_addr, HPAGE_SIZE); |
@@ -154,6 +159,7 @@ full_search: | |||
154 | */ | 159 | */ |
155 | if (start_addr != TASK_UNMAPPED_BASE) { | 160 | if (start_addr != TASK_UNMAPPED_BASE) { |
156 | start_addr = TASK_UNMAPPED_BASE; | 161 | start_addr = TASK_UNMAPPED_BASE; |
162 | mm->cached_hole_size = 0; | ||
157 | goto full_search; | 163 | goto full_search; |
158 | } | 164 | } |
159 | return -ENOMEM; | 165 | return -ENOMEM; |
@@ -162,6 +168,8 @@ full_search: | |||
162 | mm->free_area_cache = addr + len; | 168 | mm->free_area_cache = addr + len; |
163 | return addr; | 169 | return addr; |
164 | } | 170 | } |
171 | if (addr + mm->cached_hole_size < vma->vm_start) | ||
172 | mm->cached_hole_size = vma->vm_start - addr; | ||
165 | addr = ALIGN(vma->vm_end, HPAGE_SIZE); | 173 | addr = ALIGN(vma->vm_end, HPAGE_SIZE); |
166 | } | 174 | } |
167 | } | 175 | } |
@@ -173,12 +181,17 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, | |||
173 | struct mm_struct *mm = current->mm; | 181 | struct mm_struct *mm = current->mm; |
174 | struct vm_area_struct *vma, *prev_vma; | 182 | struct vm_area_struct *vma, *prev_vma; |
175 | unsigned long base = mm->mmap_base, addr = addr0; | 183 | unsigned long base = mm->mmap_base, addr = addr0; |
184 | unsigned long largest_hole = mm->cached_hole_size; | ||
176 | int first_time = 1; | 185 | int first_time = 1; |
177 | 186 | ||
178 | /* don't allow allocations above current base */ | 187 | /* don't allow allocations above current base */ |
179 | if (mm->free_area_cache > base) | 188 | if (mm->free_area_cache > base) |
180 | mm->free_area_cache = base; | 189 | mm->free_area_cache = base; |
181 | 190 | ||
191 | if (len <= largest_hole) { | ||
192 | largest_hole = 0; | ||
193 | mm->free_area_cache = base; | ||
194 | } | ||
182 | try_again: | 195 | try_again: |
183 | /* make sure it can fit in the remaining address space */ | 196 | /* make sure it can fit in the remaining address space */ |
184 | if (mm->free_area_cache < len) | 197 | if (mm->free_area_cache < len) |
@@ -199,13 +212,21 @@ try_again: | |||
199 | * vma->vm_start, use it: | 212 | * vma->vm_start, use it: |
200 | */ | 213 | */ |
201 | if (addr + len <= vma->vm_start && | 214 | if (addr + len <= vma->vm_start && |
202 | (!prev_vma || (addr >= prev_vma->vm_end))) | 215 | (!prev_vma || (addr >= prev_vma->vm_end))) { |
203 | /* remember the address as a hint for next time */ | 216 | /* remember the address as a hint for next time */ |
204 | return (mm->free_area_cache = addr); | 217 | mm->cached_hole_size = largest_hole; |
205 | else | 218 | return (mm->free_area_cache = addr); |
219 | } else { | ||
206 | /* pull free_area_cache down to the first hole */ | 220 | /* pull free_area_cache down to the first hole */ |
207 | if (mm->free_area_cache == vma->vm_end) | 221 | if (mm->free_area_cache == vma->vm_end) { |
208 | mm->free_area_cache = vma->vm_start; | 222 | mm->free_area_cache = vma->vm_start; |
223 | mm->cached_hole_size = largest_hole; | ||
224 | } | ||
225 | } | ||
226 | |||
227 | /* remember the largest hole we saw so far */ | ||
228 | if (addr + largest_hole < vma->vm_start) | ||
229 | largest_hole = vma->vm_start - addr; | ||
209 | 230 | ||
210 | /* try just below the current vma->vm_start */ | 231 | /* try just below the current vma->vm_start */ |
211 | addr = (vma->vm_start - len) & HPAGE_MASK; | 232 | addr = (vma->vm_start - len) & HPAGE_MASK; |
@@ -218,6 +239,7 @@ fail: | |||
218 | */ | 239 | */ |
219 | if (first_time) { | 240 | if (first_time) { |
220 | mm->free_area_cache = base; | 241 | mm->free_area_cache = base; |
242 | largest_hole = 0; | ||
221 | first_time = 0; | 243 | first_time = 0; |
222 | goto try_again; | 244 | goto try_again; |
223 | } | 245 | } |
@@ -228,6 +250,7 @@ fail: | |||
228 | * allocations. | 250 | * allocations. |
229 | */ | 251 | */ |
230 | mm->free_area_cache = TASK_UNMAPPED_BASE; | 252 | mm->free_area_cache = TASK_UNMAPPED_BASE; |
253 | mm->cached_hole_size = ~0UL; | ||
231 | addr = hugetlb_get_unmapped_area_bottomup(file, addr0, | 254 | addr = hugetlb_get_unmapped_area_bottomup(file, addr0, |
232 | len, pgoff, flags); | 255 | len, pgoff, flags); |
233 | 256 | ||
@@ -235,6 +258,7 @@ fail: | |||
235 | * Restore the topdown base: | 258 | * Restore the topdown base: |
236 | */ | 259 | */ |
237 | mm->free_area_cache = base; | 260 | mm->free_area_cache = base; |
261 | mm->cached_hole_size = ~0UL; | ||
238 | 262 | ||
239 | return addr; | 263 | return addr; |
240 | } | 264 | } |
diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c index b4ab766f5980..fdcfe97c75c1 100644 --- a/arch/ppc64/mm/hugetlbpage.c +++ b/arch/ppc64/mm/hugetlbpage.c | |||
@@ -292,7 +292,12 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, | |||
292 | && !is_hugepage_only_range(mm, addr,len)) | 292 | && !is_hugepage_only_range(mm, addr,len)) |
293 | return addr; | 293 | return addr; |
294 | } | 294 | } |
295 | start_addr = addr = mm->free_area_cache; | 295 | if (len > mm->cached_hole_size) { |
296 | start_addr = addr = mm->free_area_cache; | ||
297 | } else { | ||
298 | start_addr = addr = TASK_UNMAPPED_BASE; | ||
299 | mm->cached_hole_size = 0; | ||
300 | } | ||
296 | 301 | ||
297 | full_search: | 302 | full_search: |
298 | vma = find_vma(mm, addr); | 303 | vma = find_vma(mm, addr); |
@@ -316,6 +321,8 @@ full_search: | |||
316 | mm->free_area_cache = addr + len; | 321 | mm->free_area_cache = addr + len; |
317 | return addr; | 322 | return addr; |
318 | } | 323 | } |
324 | if (addr + mm->cached_hole_size < vma->vm_start) | ||
325 | mm->cached_hole_size = vma->vm_start - addr; | ||
319 | addr = vma->vm_end; | 326 | addr = vma->vm_end; |
320 | vma = vma->vm_next; | 327 | vma = vma->vm_next; |
321 | } | 328 | } |
@@ -323,6 +330,7 @@ full_search: | |||
323 | /* Make sure we didn't miss any holes */ | 330 | /* Make sure we didn't miss any holes */ |
324 | if (start_addr != TASK_UNMAPPED_BASE) { | 331 | if (start_addr != TASK_UNMAPPED_BASE) { |
325 | start_addr = addr = TASK_UNMAPPED_BASE; | 332 | start_addr = addr = TASK_UNMAPPED_BASE; |
333 | mm->cached_hole_size = 0; | ||
326 | goto full_search; | 334 | goto full_search; |
327 | } | 335 | } |
328 | return -ENOMEM; | 336 | return -ENOMEM; |
@@ -344,6 +352,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |||
344 | struct vm_area_struct *vma, *prev_vma; | 352 | struct vm_area_struct *vma, *prev_vma; |
345 | struct mm_struct *mm = current->mm; | 353 | struct mm_struct *mm = current->mm; |
346 | unsigned long base = mm->mmap_base, addr = addr0; | 354 | unsigned long base = mm->mmap_base, addr = addr0; |
355 | unsigned long largest_hole = mm->cached_hole_size; | ||
347 | int first_time = 1; | 356 | int first_time = 1; |
348 | 357 | ||
349 | /* requested length too big for entire address space */ | 358 | /* requested length too big for entire address space */ |
@@ -364,6 +373,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |||
364 | return addr; | 373 | return addr; |
365 | } | 374 | } |
366 | 375 | ||
376 | if (len <= largest_hole) { | ||
377 | largest_hole = 0; | ||
378 | mm->free_area_cache = base; | ||
379 | } | ||
367 | try_again: | 380 | try_again: |
368 | /* make sure it can fit in the remaining address space */ | 381 | /* make sure it can fit in the remaining address space */ |
369 | if (mm->free_area_cache < len) | 382 | if (mm->free_area_cache < len) |
@@ -392,13 +405,21 @@ hugepage_recheck: | |||
392 | * vma->vm_start, use it: | 405 | * vma->vm_start, use it: |
393 | */ | 406 | */ |
394 | if (addr+len <= vma->vm_start && | 407 | if (addr+len <= vma->vm_start && |
395 | (!prev_vma || (addr >= prev_vma->vm_end))) | 408 | (!prev_vma || (addr >= prev_vma->vm_end))) { |
396 | /* remember the address as a hint for next time */ | 409 | /* remember the address as a hint for next time */ |
397 | return (mm->free_area_cache = addr); | 410 | mm->cached_hole_size = largest_hole; |
398 | else | 411 | return (mm->free_area_cache = addr); |
412 | } else { | ||
399 | /* pull free_area_cache down to the first hole */ | 413 | /* pull free_area_cache down to the first hole */ |
400 | if (mm->free_area_cache == vma->vm_end) | 414 | if (mm->free_area_cache == vma->vm_end) { |
401 | mm->free_area_cache = vma->vm_start; | 415 | mm->free_area_cache = vma->vm_start; |
416 | mm->cached_hole_size = largest_hole; | ||
417 | } | ||
418 | } | ||
419 | |||
420 | /* remember the largest hole we saw so far */ | ||
421 | if (addr + largest_hole < vma->vm_start) | ||
422 | largest_hole = vma->vm_start - addr; | ||
402 | 423 | ||
403 | /* try just below the current vma->vm_start */ | 424 | /* try just below the current vma->vm_start */ |
404 | addr = vma->vm_start-len; | 425 | addr = vma->vm_start-len; |
@@ -411,6 +432,7 @@ fail: | |||
411 | */ | 432 | */ |
412 | if (first_time) { | 433 | if (first_time) { |
413 | mm->free_area_cache = base; | 434 | mm->free_area_cache = base; |
435 | largest_hole = 0; | ||
414 | first_time = 0; | 436 | first_time = 0; |
415 | goto try_again; | 437 | goto try_again; |
416 | } | 438 | } |
@@ -421,11 +443,13 @@ fail: | |||
421 | * allocations. | 443 | * allocations. |
422 | */ | 444 | */ |
423 | mm->free_area_cache = TASK_UNMAPPED_BASE; | 445 | mm->free_area_cache = TASK_UNMAPPED_BASE; |
446 | mm->cached_hole_size = ~0UL; | ||
424 | addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); | 447 | addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); |
425 | /* | 448 | /* |
426 | * Restore the topdown base: | 449 | * Restore the topdown base: |
427 | */ | 450 | */ |
428 | mm->free_area_cache = base; | 451 | mm->free_area_cache = base; |
452 | mm->cached_hole_size = ~0UL; | ||
429 | 453 | ||
430 | return addr; | 454 | return addr; |
431 | } | 455 | } |
diff --git a/arch/sh/kernel/sys_sh.c b/arch/sh/kernel/sys_sh.c index df5ac294c379..917b2f32f260 100644 --- a/arch/sh/kernel/sys_sh.c +++ b/arch/sh/kernel/sys_sh.c | |||
@@ -79,6 +79,10 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, | |||
79 | (!vma || addr + len <= vma->vm_start)) | 79 | (!vma || addr + len <= vma->vm_start)) |
80 | return addr; | 80 | return addr; |
81 | } | 81 | } |
82 | if (len <= mm->cached_hole_size) { | ||
83 | mm->cached_hole_size = 0; | ||
84 | mm->free_area_cache = TASK_UNMAPPED_BASE; | ||
85 | } | ||
82 | if (flags & MAP_PRIVATE) | 86 | if (flags & MAP_PRIVATE) |
83 | addr = PAGE_ALIGN(mm->free_area_cache); | 87 | addr = PAGE_ALIGN(mm->free_area_cache); |
84 | else | 88 | else |
@@ -95,6 +99,7 @@ full_search: | |||
95 | */ | 99 | */ |
96 | if (start_addr != TASK_UNMAPPED_BASE) { | 100 | if (start_addr != TASK_UNMAPPED_BASE) { |
97 | start_addr = addr = TASK_UNMAPPED_BASE; | 101 | start_addr = addr = TASK_UNMAPPED_BASE; |
102 | mm->cached_hole_size = 0; | ||
98 | goto full_search; | 103 | goto full_search; |
99 | } | 104 | } |
100 | return -ENOMEM; | 105 | return -ENOMEM; |
@@ -106,6 +111,9 @@ full_search: | |||
106 | mm->free_area_cache = addr + len; | 111 | mm->free_area_cache = addr + len; |
107 | return addr; | 112 | return addr; |
108 | } | 113 | } |
114 | if (addr + mm->cached_hole_size < vma->vm_start) | ||
115 | mm->cached_hole_size = vma->vm_start - addr; | ||
116 | |||
109 | addr = vma->vm_end; | 117 | addr = vma->vm_end; |
110 | if (!(flags & MAP_PRIVATE)) | 118 | if (!(flags & MAP_PRIVATE)) |
111 | addr = COLOUR_ALIGN(addr); | 119 | addr = COLOUR_ALIGN(addr); |
diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c index 0077f02f4b37..5f8c822a2b4a 100644 --- a/arch/sparc64/kernel/sys_sparc.c +++ b/arch/sparc64/kernel/sys_sparc.c | |||
@@ -84,6 +84,10 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi | |||
84 | return addr; | 84 | return addr; |
85 | } | 85 | } |
86 | 86 | ||
87 | if (len <= mm->cached_hole_size) { | ||
88 | mm->cached_hole_size = 0; | ||
89 | mm->free_area_cache = TASK_UNMAPPED_BASE; | ||
90 | } | ||
87 | start_addr = addr = mm->free_area_cache; | 91 | start_addr = addr = mm->free_area_cache; |
88 | 92 | ||
89 | task_size -= len; | 93 | task_size -= len; |
@@ -103,6 +107,7 @@ full_search: | |||
103 | if (task_size < addr) { | 107 | if (task_size < addr) { |
104 | if (start_addr != TASK_UNMAPPED_BASE) { | 108 | if (start_addr != TASK_UNMAPPED_BASE) { |
105 | start_addr = addr = TASK_UNMAPPED_BASE; | 109 | start_addr = addr = TASK_UNMAPPED_BASE; |
110 | mm->cached_hole_size = 0; | ||
106 | goto full_search; | 111 | goto full_search; |
107 | } | 112 | } |
108 | return -ENOMEM; | 113 | return -ENOMEM; |
@@ -114,6 +119,9 @@ full_search: | |||
114 | mm->free_area_cache = addr + len; | 119 | mm->free_area_cache = addr + len; |
115 | return addr; | 120 | return addr; |
116 | } | 121 | } |
122 | if (addr + mm->cached_hole_size < vma->vm_start) | ||
123 | mm->cached_hole_size = vma->vm_start - addr; | ||
124 | |||
117 | addr = vma->vm_end; | 125 | addr = vma->vm_end; |
118 | if (do_color_align) | 126 | if (do_color_align) |
119 | addr = COLOUR_ALIGN(addr, pgoff); | 127 | addr = COLOUR_ALIGN(addr, pgoff); |
diff --git a/arch/x86_64/ia32/ia32_aout.c b/arch/x86_64/ia32/ia32_aout.c index 1965efc974dc..c12edf5d97f0 100644 --- a/arch/x86_64/ia32/ia32_aout.c +++ b/arch/x86_64/ia32/ia32_aout.c | |||
@@ -312,6 +312,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) | |||
312 | current->mm->brk = ex.a_bss + | 312 | current->mm->brk = ex.a_bss + |
313 | (current->mm->start_brk = N_BSSADDR(ex)); | 313 | (current->mm->start_brk = N_BSSADDR(ex)); |
314 | current->mm->free_area_cache = TASK_UNMAPPED_BASE; | 314 | current->mm->free_area_cache = TASK_UNMAPPED_BASE; |
315 | current->mm->cached_hole_size = 0; | ||
315 | 316 | ||
316 | set_mm_counter(current->mm, rss, 0); | 317 | set_mm_counter(current->mm, rss, 0); |
317 | current->mm->mmap = NULL; | 318 | current->mm->mmap = NULL; |
diff --git a/arch/x86_64/kernel/sys_x86_64.c b/arch/x86_64/kernel/sys_x86_64.c index d9798dd433fc..cc7821c68851 100644 --- a/arch/x86_64/kernel/sys_x86_64.c +++ b/arch/x86_64/kernel/sys_x86_64.c | |||
@@ -105,6 +105,11 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, | |||
105 | (!vma || addr + len <= vma->vm_start)) | 105 | (!vma || addr + len <= vma->vm_start)) |
106 | return addr; | 106 | return addr; |
107 | } | 107 | } |
108 | if (((flags & MAP_32BIT) || test_thread_flag(TIF_IA32)) | ||
109 | && len <= mm->cached_hole_size) { | ||
110 | mm->cached_hole_size = 0; | ||
111 | mm->free_area_cache = begin; | ||
112 | } | ||
108 | addr = mm->free_area_cache; | 113 | addr = mm->free_area_cache; |
109 | if (addr < begin) | 114 | if (addr < begin) |
110 | addr = begin; | 115 | addr = begin; |
@@ -120,6 +125,7 @@ full_search: | |||
120 | */ | 125 | */ |
121 | if (start_addr != begin) { | 126 | if (start_addr != begin) { |
122 | start_addr = addr = begin; | 127 | start_addr = addr = begin; |
128 | mm->cached_hole_size = 0; | ||
123 | goto full_search; | 129 | goto full_search; |
124 | } | 130 | } |
125 | return -ENOMEM; | 131 | return -ENOMEM; |
@@ -131,6 +137,9 @@ full_search: | |||
131 | mm->free_area_cache = addr + len; | 137 | mm->free_area_cache = addr + len; |
132 | return addr; | 138 | return addr; |
133 | } | 139 | } |
140 | if (addr + mm->cached_hole_size < vma->vm_start) | ||
141 | mm->cached_hole_size = vma->vm_start - addr; | ||
142 | |||
134 | addr = vma->vm_end; | 143 | addr = vma->vm_end; |
135 | } | 144 | } |
136 | } | 145 | } |
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 009b8920c1ff..dd9baabaf016 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c | |||
@@ -316,6 +316,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) | |||
316 | current->mm->brk = ex.a_bss + | 316 | current->mm->brk = ex.a_bss + |
317 | (current->mm->start_brk = N_BSSADDR(ex)); | 317 | (current->mm->start_brk = N_BSSADDR(ex)); |
318 | current->mm->free_area_cache = current->mm->mmap_base; | 318 | current->mm->free_area_cache = current->mm->mmap_base; |
319 | current->mm->cached_hole_size = 0; | ||
319 | 320 | ||
320 | set_mm_counter(current->mm, rss, 0); | 321 | set_mm_counter(current->mm, rss, 0); |
321 | current->mm->mmap = NULL; | 322 | current->mm->mmap = NULL; |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index f8f6b6b76179..7976a238f0a3 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -775,6 +775,7 @@ static int load_elf_binary(struct linux_binprm * bprm, struct pt_regs * regs) | |||
775 | change some of these later */ | 775 | change some of these later */ |
776 | set_mm_counter(current->mm, rss, 0); | 776 | set_mm_counter(current->mm, rss, 0); |
777 | current->mm->free_area_cache = current->mm->mmap_base; | 777 | current->mm->free_area_cache = current->mm->mmap_base; |
778 | current->mm->cached_hole_size = 0; | ||
778 | retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP), | 779 | retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP), |
779 | executable_stack); | 780 | executable_stack); |
780 | if (retval < 0) { | 781 | if (retval < 0) { |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 2af3338f891b..3a9b6d179cbd 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -122,6 +122,9 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
122 | 122 | ||
123 | start_addr = mm->free_area_cache; | 123 | start_addr = mm->free_area_cache; |
124 | 124 | ||
125 | if (len <= mm->cached_hole_size) | ||
126 | start_addr = TASK_UNMAPPED_BASE; | ||
127 | |||
125 | full_search: | 128 | full_search: |
126 | addr = ALIGN(start_addr, HPAGE_SIZE); | 129 | addr = ALIGN(start_addr, HPAGE_SIZE); |
127 | 130 | ||
diff --git a/include/linux/sched.h b/include/linux/sched.h index 4dbb109022f3..b58afd97a180 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -201,8 +201,8 @@ extern unsigned long | |||
201 | arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, | 201 | arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr, |
202 | unsigned long len, unsigned long pgoff, | 202 | unsigned long len, unsigned long pgoff, |
203 | unsigned long flags); | 203 | unsigned long flags); |
204 | extern void arch_unmap_area(struct vm_area_struct *area); | 204 | extern void arch_unmap_area(struct mm_struct *, unsigned long); |
205 | extern void arch_unmap_area_topdown(struct vm_area_struct *area); | 205 | extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long); |
206 | 206 | ||
207 | #define set_mm_counter(mm, member, value) (mm)->_##member = (value) | 207 | #define set_mm_counter(mm, member, value) (mm)->_##member = (value) |
208 | #define get_mm_counter(mm, member) ((mm)->_##member) | 208 | #define get_mm_counter(mm, member) ((mm)->_##member) |
@@ -218,9 +218,10 @@ struct mm_struct { | |||
218 | unsigned long (*get_unmapped_area) (struct file *filp, | 218 | unsigned long (*get_unmapped_area) (struct file *filp, |
219 | unsigned long addr, unsigned long len, | 219 | unsigned long addr, unsigned long len, |
220 | unsigned long pgoff, unsigned long flags); | 220 | unsigned long pgoff, unsigned long flags); |
221 | void (*unmap_area) (struct vm_area_struct *area); | 221 | void (*unmap_area) (struct mm_struct *mm, unsigned long addr); |
222 | unsigned long mmap_base; /* base of mmap area */ | 222 | unsigned long mmap_base; /* base of mmap area */ |
223 | unsigned long free_area_cache; /* first hole */ | 223 | unsigned long cached_hole_size; /* if non-zero, the largest hole below free_area_cache */ |
224 | unsigned long free_area_cache; /* first hole of size cached_hole_size or larger */ | ||
224 | pgd_t * pgd; | 225 | pgd_t * pgd; |
225 | atomic_t mm_users; /* How many users with user space? */ | 226 | atomic_t mm_users; /* How many users with user space? */ |
226 | atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */ | 227 | atomic_t mm_count; /* How many references to "struct mm_struct" (users count as 1) */ |
diff --git a/kernel/fork.c b/kernel/fork.c index f42a17f88699..876b31cd822d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -194,6 +194,7 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm) | |||
194 | mm->mmap = NULL; | 194 | mm->mmap = NULL; |
195 | mm->mmap_cache = NULL; | 195 | mm->mmap_cache = NULL; |
196 | mm->free_area_cache = oldmm->mmap_base; | 196 | mm->free_area_cache = oldmm->mmap_base; |
197 | mm->cached_hole_size = ~0UL; | ||
197 | mm->map_count = 0; | 198 | mm->map_count = 0; |
198 | set_mm_counter(mm, rss, 0); | 199 | set_mm_counter(mm, rss, 0); |
199 | set_mm_counter(mm, anon_rss, 0); | 200 | set_mm_counter(mm, anon_rss, 0); |
@@ -322,6 +323,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm) | |||
322 | mm->ioctx_list = NULL; | 323 | mm->ioctx_list = NULL; |
323 | mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm); | 324 | mm->default_kioctx = (struct kioctx)INIT_KIOCTX(mm->default_kioctx, *mm); |
324 | mm->free_area_cache = TASK_UNMAPPED_BASE; | 325 | mm->free_area_cache = TASK_UNMAPPED_BASE; |
326 | mm->cached_hole_size = ~0UL; | ||
325 | 327 | ||
326 | if (likely(!mm_alloc_pgd(mm))) { | 328 | if (likely(!mm_alloc_pgd(mm))) { |
327 | mm->def_flags = 0; | 329 | mm->def_flags = 0; |
@@ -1175,7 +1175,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr, | |||
1175 | (!vma || addr + len <= vma->vm_start)) | 1175 | (!vma || addr + len <= vma->vm_start)) |
1176 | return addr; | 1176 | return addr; |
1177 | } | 1177 | } |
1178 | start_addr = addr = mm->free_area_cache; | 1178 | if (len > mm->cached_hole_size) { |
1179 | start_addr = addr = mm->free_area_cache; | ||
1180 | } else { | ||
1181 | start_addr = addr = TASK_UNMAPPED_BASE; | ||
1182 | mm->cached_hole_size = 0; | ||
1183 | } | ||
1179 | 1184 | ||
1180 | full_search: | 1185 | full_search: |
1181 | for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { | 1186 | for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { |
@@ -1186,7 +1191,9 @@ full_search: | |||
1186 | * some holes. | 1191 | * some holes. |
1187 | */ | 1192 | */ |
1188 | if (start_addr != TASK_UNMAPPED_BASE) { | 1193 | if (start_addr != TASK_UNMAPPED_BASE) { |
1189 | start_addr = addr = TASK_UNMAPPED_BASE; | 1194 | addr = TASK_UNMAPPED_BASE; |
1195 | start_addr = addr; | ||
1196 | mm->cached_hole_size = 0; | ||
1190 | goto full_search; | 1197 | goto full_search; |
1191 | } | 1198 | } |
1192 | return -ENOMEM; | 1199 | return -ENOMEM; |
@@ -1198,19 +1205,22 @@ full_search: | |||
1198 | mm->free_area_cache = addr + len; | 1205 | mm->free_area_cache = addr + len; |
1199 | return addr; | 1206 | return addr; |
1200 | } | 1207 | } |
1208 | if (addr + mm->cached_hole_size < vma->vm_start) | ||
1209 | mm->cached_hole_size = vma->vm_start - addr; | ||
1201 | addr = vma->vm_end; | 1210 | addr = vma->vm_end; |
1202 | } | 1211 | } |
1203 | } | 1212 | } |
1204 | #endif | 1213 | #endif |
1205 | 1214 | ||
1206 | void arch_unmap_area(struct vm_area_struct *area) | 1215 | void arch_unmap_area(struct mm_struct *mm, unsigned long addr) |
1207 | { | 1216 | { |
1208 | /* | 1217 | /* |
1209 | * Is this a new hole at the lowest possible address? | 1218 | * Is this a new hole at the lowest possible address? |
1210 | */ | 1219 | */ |
1211 | if (area->vm_start >= TASK_UNMAPPED_BASE && | 1220 | if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache) { |
1212 | area->vm_start < area->vm_mm->free_area_cache) | 1221 | mm->free_area_cache = addr; |
1213 | area->vm_mm->free_area_cache = area->vm_start; | 1222 | mm->cached_hole_size = ~0UL; |
1223 | } | ||
1214 | } | 1224 | } |
1215 | 1225 | ||
1216 | /* | 1226 | /* |
@@ -1240,6 +1250,12 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |||
1240 | return addr; | 1250 | return addr; |
1241 | } | 1251 | } |
1242 | 1252 | ||
1253 | /* check if free_area_cache is useful for us */ | ||
1254 | if (len <= mm->cached_hole_size) { | ||
1255 | mm->cached_hole_size = 0; | ||
1256 | mm->free_area_cache = mm->mmap_base; | ||
1257 | } | ||
1258 | |||
1243 | /* either no address requested or can't fit in requested address hole */ | 1259 | /* either no address requested or can't fit in requested address hole */ |
1244 | addr = mm->free_area_cache; | 1260 | addr = mm->free_area_cache; |
1245 | 1261 | ||
@@ -1264,6 +1280,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |||
1264 | /* remember the address as a hint for next time */ | 1280 | /* remember the address as a hint for next time */ |
1265 | return (mm->free_area_cache = addr); | 1281 | return (mm->free_area_cache = addr); |
1266 | 1282 | ||
1283 | /* remember the largest hole we saw so far */ | ||
1284 | if (addr + mm->cached_hole_size < vma->vm_start) | ||
1285 | mm->cached_hole_size = vma->vm_start - addr; | ||
1286 | |||
1267 | /* try just below the current vma->vm_start */ | 1287 | /* try just below the current vma->vm_start */ |
1268 | addr = vma->vm_start-len; | 1288 | addr = vma->vm_start-len; |
1269 | } while (len < vma->vm_start); | 1289 | } while (len < vma->vm_start); |
@@ -1274,28 +1294,30 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0, | |||
1274 | * can happen with large stack limits and large mmap() | 1294 | * can happen with large stack limits and large mmap() |
1275 | * allocations. | 1295 | * allocations. |
1276 | */ | 1296 | */ |
1277 | mm->free_area_cache = TASK_UNMAPPED_BASE; | 1297 | mm->cached_hole_size = ~0UL; |
1298 | mm->free_area_cache = TASK_UNMAPPED_BASE; | ||
1278 | addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); | 1299 | addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); |
1279 | /* | 1300 | /* |
1280 | * Restore the topdown base: | 1301 | * Restore the topdown base: |
1281 | */ | 1302 | */ |
1282 | mm->free_area_cache = mm->mmap_base; | 1303 | mm->free_area_cache = mm->mmap_base; |
1304 | mm->cached_hole_size = ~0UL; | ||
1283 | 1305 | ||
1284 | return addr; | 1306 | return addr; |
1285 | } | 1307 | } |
1286 | #endif | 1308 | #endif |
1287 | 1309 | ||
1288 | void arch_unmap_area_topdown(struct vm_area_struct *area) | 1310 | void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr) |
1289 | { | 1311 | { |
1290 | /* | 1312 | /* |
1291 | * Is this a new hole at the highest possible address? | 1313 | * Is this a new hole at the highest possible address? |
1292 | */ | 1314 | */ |
1293 | if (area->vm_end > area->vm_mm->free_area_cache) | 1315 | if (addr > mm->free_area_cache) |
1294 | area->vm_mm->free_area_cache = area->vm_end; | 1316 | mm->free_area_cache = addr; |
1295 | 1317 | ||
1296 | /* dont allow allocations above current base */ | 1318 | /* dont allow allocations above current base */ |
1297 | if (area->vm_mm->free_area_cache > area->vm_mm->mmap_base) | 1319 | if (mm->free_area_cache > mm->mmap_base) |
1298 | area->vm_mm->free_area_cache = area->vm_mm->mmap_base; | 1320 | mm->free_area_cache = mm->mmap_base; |
1299 | } | 1321 | } |
1300 | 1322 | ||
1301 | unsigned long | 1323 | unsigned long |
@@ -1595,7 +1617,6 @@ static void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area) | |||
1595 | if (area->vm_flags & VM_LOCKED) | 1617 | if (area->vm_flags & VM_LOCKED) |
1596 | area->vm_mm->locked_vm -= len >> PAGE_SHIFT; | 1618 | area->vm_mm->locked_vm -= len >> PAGE_SHIFT; |
1597 | vm_stat_unaccount(area); | 1619 | vm_stat_unaccount(area); |
1598 | area->vm_mm->unmap_area(area); | ||
1599 | remove_vm_struct(area); | 1620 | remove_vm_struct(area); |
1600 | } | 1621 | } |
1601 | 1622 | ||
@@ -1649,6 +1670,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1649 | { | 1670 | { |
1650 | struct vm_area_struct **insertion_point; | 1671 | struct vm_area_struct **insertion_point; |
1651 | struct vm_area_struct *tail_vma = NULL; | 1672 | struct vm_area_struct *tail_vma = NULL; |
1673 | unsigned long addr; | ||
1652 | 1674 | ||
1653 | insertion_point = (prev ? &prev->vm_next : &mm->mmap); | 1675 | insertion_point = (prev ? &prev->vm_next : &mm->mmap); |
1654 | do { | 1676 | do { |
@@ -1659,6 +1681,11 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1659 | } while (vma && vma->vm_start < end); | 1681 | } while (vma && vma->vm_start < end); |
1660 | *insertion_point = vma; | 1682 | *insertion_point = vma; |
1661 | tail_vma->vm_next = NULL; | 1683 | tail_vma->vm_next = NULL; |
1684 | if (mm->unmap_area == arch_unmap_area) | ||
1685 | addr = prev ? prev->vm_end : mm->mmap_base; | ||
1686 | else | ||
1687 | addr = vma ? vma->vm_start : mm->mmap_base; | ||
1688 | mm->unmap_area(mm, addr); | ||
1662 | mm->mmap_cache = NULL; /* Kill the cache. */ | 1689 | mm->mmap_cache = NULL; /* Kill the cache. */ |
1663 | } | 1690 | } |
1664 | 1691 | ||
diff --git a/mm/nommu.c b/mm/nommu.c index c53e9c8f6b4a..ce74452c02d9 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -1067,7 +1067,7 @@ unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr, | |||
1067 | return -ENOMEM; | 1067 | return -ENOMEM; |
1068 | } | 1068 | } |
1069 | 1069 | ||
1070 | void arch_unmap_area(struct vm_area_struct *area) | 1070 | void arch_unmap_area(struct mm_struct *mm, unsigned long addr) |
1071 | { | 1071 | { |
1072 | } | 1072 | } |
1073 | 1073 | ||