aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ppc64
diff options
context:
space:
mode:
authorWolfgang Wander <wwc@rentec.com>2005-06-21 20:14:49 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-21 21:46:16 -0400
commit1363c3cd8603a913a27e2995dccbd70d5312d8e6 (patch)
tree405e7fc1ef44678f3ca0a54c536d0457e6e80f45 /arch/ppc64
parente7c8d5c9955a4d2e88e36b640563f5d6d5aba48a (diff)
[PATCH] Avoiding mmap fragmentation
Ingo recently introduced a great speedup for allocating new mmaps using the free_area_cache pointer which boosts the specweb SSL benchmark by 4-5% and causes huge performance increases in thread creation. The downside of this patch is that it does lead to fragmentation in the mmap-ed areas (visible via /proc/self/maps), such that some applications that work fine under 2.4 kernels quickly run out of memory on any 2.6 kernel. The problem is twofold: 1) the free_area_cache is used to continue a search for memory where the last search ended. Before the change new areas were always searched from the base address on. So now new small areas are cluttering holes of all sizes throughout the whole mmap-able region whereas before small holes tended to close holes near the base leaving holes far from the base large and available for larger requests. 2) the free_area_cache also is set to the location of the last munmap-ed area so in scenarios where we allocate e.g. five regions of 1K each, then free regions 4 2 3 in this order the next request for 1K will be placed in the position of the old region 3, whereas before we appended it to the still active region 1, placing it at the location of the old region 2. Before we had 1 free region of 2K, now we only get two free regions of 1K -> fragmentation. The patch addresses thes issues by introducing yet another cache descriptor cached_hole_size that contains the largest known hole size below the current free_area_cache. If a new request comes in the size is compared against the cached_hole_size and if the request can be filled with a hole below free_area_cache the search is started from the base instead. The results look promising: Whereas 2.6.12-rc4 fragments quickly and my (earlier posted) leakme.c test program terminates after 50000+ iterations with 96 distinct and fragmented maps in /proc/self/maps it performs nicely (as expected) with thread creation, Ingo's test_str02 with 20000 threads requires 0.7s system time. Taking out Ingo's patch (un-patch available per request) by basically deleting all mentions of free_area_cache from the kernel and starting the search for new memory always at the respective bases we observe: leakme terminates successfully with 11 distinctive hardly fragmented areas in /proc/self/maps but thread creating is gringdingly slow: 30+s(!) system time for Ingo's test_str02 with 20000 threads. Now - drumroll ;-) the appended patch works fine with leakme: it ends with only 7 distinct areas in /proc/self/maps and also thread creation seems sufficiently fast with 0.71s for 20000 threads. Signed-off-by: Wolfgang Wander <wwc@rentec.com> Credit-to: "Richard Purdie" <rpurdie@rpsys.net> Signed-off-by: Ken Chen <kenneth.w.chen@intel.com> Acked-by: Ingo Molnar <mingo@elte.hu> (partly) Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/ppc64')
-rw-r--r--arch/ppc64/mm/hugetlbpage.c34
1 files changed, 29 insertions, 5 deletions
diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c
index b4ab766f5980..fdcfe97c75c1 100644
--- a/arch/ppc64/mm/hugetlbpage.c
+++ b/arch/ppc64/mm/hugetlbpage.c
@@ -292,7 +292,12 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
292 && !is_hugepage_only_range(mm, addr,len)) 292 && !is_hugepage_only_range(mm, addr,len))
293 return addr; 293 return addr;
294 } 294 }
295 start_addr = addr = mm->free_area_cache; 295 if (len > mm->cached_hole_size) {
296 start_addr = addr = mm->free_area_cache;
297 } else {
298 start_addr = addr = TASK_UNMAPPED_BASE;
299 mm->cached_hole_size = 0;
300 }
296 301
297full_search: 302full_search:
298 vma = find_vma(mm, addr); 303 vma = find_vma(mm, addr);
@@ -316,6 +321,8 @@ full_search:
316 mm->free_area_cache = addr + len; 321 mm->free_area_cache = addr + len;
317 return addr; 322 return addr;
318 } 323 }
324 if (addr + mm->cached_hole_size < vma->vm_start)
325 mm->cached_hole_size = vma->vm_start - addr;
319 addr = vma->vm_end; 326 addr = vma->vm_end;
320 vma = vma->vm_next; 327 vma = vma->vm_next;
321 } 328 }
@@ -323,6 +330,7 @@ full_search:
323 /* Make sure we didn't miss any holes */ 330 /* Make sure we didn't miss any holes */
324 if (start_addr != TASK_UNMAPPED_BASE) { 331 if (start_addr != TASK_UNMAPPED_BASE) {
325 start_addr = addr = TASK_UNMAPPED_BASE; 332 start_addr = addr = TASK_UNMAPPED_BASE;
333 mm->cached_hole_size = 0;
326 goto full_search; 334 goto full_search;
327 } 335 }
328 return -ENOMEM; 336 return -ENOMEM;
@@ -344,6 +352,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
344 struct vm_area_struct *vma, *prev_vma; 352 struct vm_area_struct *vma, *prev_vma;
345 struct mm_struct *mm = current->mm; 353 struct mm_struct *mm = current->mm;
346 unsigned long base = mm->mmap_base, addr = addr0; 354 unsigned long base = mm->mmap_base, addr = addr0;
355 unsigned long largest_hole = mm->cached_hole_size;
347 int first_time = 1; 356 int first_time = 1;
348 357
349 /* requested length too big for entire address space */ 358 /* requested length too big for entire address space */
@@ -364,6 +373,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
364 return addr; 373 return addr;
365 } 374 }
366 375
376 if (len <= largest_hole) {
377 largest_hole = 0;
378 mm->free_area_cache = base;
379 }
367try_again: 380try_again:
368 /* make sure it can fit in the remaining address space */ 381 /* make sure it can fit in the remaining address space */
369 if (mm->free_area_cache < len) 382 if (mm->free_area_cache < len)
@@ -392,13 +405,21 @@ hugepage_recheck:
392 * vma->vm_start, use it: 405 * vma->vm_start, use it:
393 */ 406 */
394 if (addr+len <= vma->vm_start && 407 if (addr+len <= vma->vm_start &&
395 (!prev_vma || (addr >= prev_vma->vm_end))) 408 (!prev_vma || (addr >= prev_vma->vm_end))) {
396 /* remember the address as a hint for next time */ 409 /* remember the address as a hint for next time */
397 return (mm->free_area_cache = addr); 410 mm->cached_hole_size = largest_hole;
398 else 411 return (mm->free_area_cache = addr);
412 } else {
399 /* pull free_area_cache down to the first hole */ 413 /* pull free_area_cache down to the first hole */
400 if (mm->free_area_cache == vma->vm_end) 414 if (mm->free_area_cache == vma->vm_end) {
401 mm->free_area_cache = vma->vm_start; 415 mm->free_area_cache = vma->vm_start;
416 mm->cached_hole_size = largest_hole;
417 }
418 }
419
420 /* remember the largest hole we saw so far */
421 if (addr + largest_hole < vma->vm_start)
422 largest_hole = vma->vm_start - addr;
402 423
403 /* try just below the current vma->vm_start */ 424 /* try just below the current vma->vm_start */
404 addr = vma->vm_start-len; 425 addr = vma->vm_start-len;
@@ -411,6 +432,7 @@ fail:
411 */ 432 */
412 if (first_time) { 433 if (first_time) {
413 mm->free_area_cache = base; 434 mm->free_area_cache = base;
435 largest_hole = 0;
414 first_time = 0; 436 first_time = 0;
415 goto try_again; 437 goto try_again;
416 } 438 }
@@ -421,11 +443,13 @@ fail:
421 * allocations. 443 * allocations.
422 */ 444 */
423 mm->free_area_cache = TASK_UNMAPPED_BASE; 445 mm->free_area_cache = TASK_UNMAPPED_BASE;
446 mm->cached_hole_size = ~0UL;
424 addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); 447 addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
425 /* 448 /*
426 * Restore the topdown base: 449 * Restore the topdown base:
427 */ 450 */
428 mm->free_area_cache = base; 451 mm->free_area_cache = base;
452 mm->cached_hole_size = ~0UL;
429 453
430 return addr; 454 return addr;
431} 455}