aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mmap.c
diff options
context:
space:
mode:
authorWolfgang Wander <wwc@rentec.com>2005-06-21 20:14:49 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-21 21:46:16 -0400
commit1363c3cd8603a913a27e2995dccbd70d5312d8e6 (patch)
tree405e7fc1ef44678f3ca0a54c536d0457e6e80f45 /mm/mmap.c
parente7c8d5c9955a4d2e88e36b640563f5d6d5aba48a (diff)
[PATCH] Avoiding mmap fragmentation
Ingo recently introduced a great speedup for allocating new mmaps using the free_area_cache pointer which boosts the specweb SSL benchmark by 4-5% and causes huge performance increases in thread creation. The downside of this patch is that it does lead to fragmentation in the mmap-ed areas (visible via /proc/self/maps), such that some applications that work fine under 2.4 kernels quickly run out of memory on any 2.6 kernel. The problem is twofold: 1) the free_area_cache is used to continue a search for memory where the last search ended. Before the change new areas were always searched from the base address on. So now new small areas are cluttering holes of all sizes throughout the whole mmap-able region whereas before small holes tended to close holes near the base leaving holes far from the base large and available for larger requests. 2) the free_area_cache also is set to the location of the last munmap-ed area so in scenarios where we allocate e.g. five regions of 1K each, then free regions 4 2 3 in this order the next request for 1K will be placed in the position of the old region 3, whereas before we appended it to the still active region 1, placing it at the location of the old region 2. Before we had 1 free region of 2K, now we only get two free regions of 1K -> fragmentation. The patch addresses thes issues by introducing yet another cache descriptor cached_hole_size that contains the largest known hole size below the current free_area_cache. If a new request comes in the size is compared against the cached_hole_size and if the request can be filled with a hole below free_area_cache the search is started from the base instead. The results look promising: Whereas 2.6.12-rc4 fragments quickly and my (earlier posted) leakme.c test program terminates after 50000+ iterations with 96 distinct and fragmented maps in /proc/self/maps it performs nicely (as expected) with thread creation, Ingo's test_str02 with 20000 threads requires 0.7s system time. Taking out Ingo's patch (un-patch available per request) by basically deleting all mentions of free_area_cache from the kernel and starting the search for new memory always at the respective bases we observe: leakme terminates successfully with 11 distinctive hardly fragmented areas in /proc/self/maps but thread creating is gringdingly slow: 30+s(!) system time for Ingo's test_str02 with 20000 threads. Now - drumroll ;-) the appended patch works fine with leakme: it ends with only 7 distinct areas in /proc/self/maps and also thread creation seems sufficiently fast with 0.71s for 20000 threads. Signed-off-by: Wolfgang Wander <wwc@rentec.com> Credit-to: "Richard Purdie" <rpurdie@rpsys.net> Signed-off-by: Ken Chen <kenneth.w.chen@intel.com> Acked-by: Ingo Molnar <mingo@elte.hu> (partly) Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'mm/mmap.c')
-rw-r--r--mm/mmap.c53
1 files changed, 40 insertions, 13 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index de54acd9942f..9da23c1ef9dc 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1175,7 +1175,12 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
1175 (!vma || addr + len <= vma->vm_start)) 1175 (!vma || addr + len <= vma->vm_start))
1176 return addr; 1176 return addr;
1177 } 1177 }
1178 start_addr = addr = mm->free_area_cache; 1178 if (len > mm->cached_hole_size) {
1179 start_addr = addr = mm->free_area_cache;
1180 } else {
1181 start_addr = addr = TASK_UNMAPPED_BASE;
1182 mm->cached_hole_size = 0;
1183 }
1179 1184
1180full_search: 1185full_search:
1181 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { 1186 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
@@ -1186,7 +1191,9 @@ full_search:
1186 * some holes. 1191 * some holes.
1187 */ 1192 */
1188 if (start_addr != TASK_UNMAPPED_BASE) { 1193 if (start_addr != TASK_UNMAPPED_BASE) {
1189 start_addr = addr = TASK_UNMAPPED_BASE; 1194 addr = TASK_UNMAPPED_BASE;
1195 start_addr = addr;
1196 mm->cached_hole_size = 0;
1190 goto full_search; 1197 goto full_search;
1191 } 1198 }
1192 return -ENOMEM; 1199 return -ENOMEM;
@@ -1198,19 +1205,22 @@ full_search:
1198 mm->free_area_cache = addr + len; 1205 mm->free_area_cache = addr + len;
1199 return addr; 1206 return addr;
1200 } 1207 }
1208 if (addr + mm->cached_hole_size < vma->vm_start)
1209 mm->cached_hole_size = vma->vm_start - addr;
1201 addr = vma->vm_end; 1210 addr = vma->vm_end;
1202 } 1211 }
1203} 1212}
1204#endif 1213#endif
1205 1214
1206void arch_unmap_area(struct vm_area_struct *area) 1215void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
1207{ 1216{
1208 /* 1217 /*
1209 * Is this a new hole at the lowest possible address? 1218 * Is this a new hole at the lowest possible address?
1210 */ 1219 */
1211 if (area->vm_start >= TASK_UNMAPPED_BASE && 1220 if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache) {
1212 area->vm_start < area->vm_mm->free_area_cache) 1221 mm->free_area_cache = addr;
1213 area->vm_mm->free_area_cache = area->vm_start; 1222 mm->cached_hole_size = ~0UL;
1223 }
1214} 1224}
1215 1225
1216/* 1226/*
@@ -1240,6 +1250,12 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
1240 return addr; 1250 return addr;
1241 } 1251 }
1242 1252
1253 /* check if free_area_cache is useful for us */
1254 if (len <= mm->cached_hole_size) {
1255 mm->cached_hole_size = 0;
1256 mm->free_area_cache = mm->mmap_base;
1257 }
1258
1243 /* either no address requested or can't fit in requested address hole */ 1259 /* either no address requested or can't fit in requested address hole */
1244 addr = mm->free_area_cache; 1260 addr = mm->free_area_cache;
1245 1261
@@ -1264,6 +1280,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
1264 /* remember the address as a hint for next time */ 1280 /* remember the address as a hint for next time */
1265 return (mm->free_area_cache = addr); 1281 return (mm->free_area_cache = addr);
1266 1282
1283 /* remember the largest hole we saw so far */
1284 if (addr + mm->cached_hole_size < vma->vm_start)
1285 mm->cached_hole_size = vma->vm_start - addr;
1286
1267 /* try just below the current vma->vm_start */ 1287 /* try just below the current vma->vm_start */
1268 addr = vma->vm_start-len; 1288 addr = vma->vm_start-len;
1269 } while (len < vma->vm_start); 1289 } while (len < vma->vm_start);
@@ -1274,28 +1294,30 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
1274 * can happen with large stack limits and large mmap() 1294 * can happen with large stack limits and large mmap()
1275 * allocations. 1295 * allocations.
1276 */ 1296 */
1277 mm->free_area_cache = TASK_UNMAPPED_BASE; 1297 mm->cached_hole_size = ~0UL;
1298 mm->free_area_cache = TASK_UNMAPPED_BASE;
1278 addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags); 1299 addr = arch_get_unmapped_area(filp, addr0, len, pgoff, flags);
1279 /* 1300 /*
1280 * Restore the topdown base: 1301 * Restore the topdown base:
1281 */ 1302 */
1282 mm->free_area_cache = mm->mmap_base; 1303 mm->free_area_cache = mm->mmap_base;
1304 mm->cached_hole_size = ~0UL;
1283 1305
1284 return addr; 1306 return addr;
1285} 1307}
1286#endif 1308#endif
1287 1309
1288void arch_unmap_area_topdown(struct vm_area_struct *area) 1310void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr)
1289{ 1311{
1290 /* 1312 /*
1291 * Is this a new hole at the highest possible address? 1313 * Is this a new hole at the highest possible address?
1292 */ 1314 */
1293 if (area->vm_end > area->vm_mm->free_area_cache) 1315 if (addr > mm->free_area_cache)
1294 area->vm_mm->free_area_cache = area->vm_end; 1316 mm->free_area_cache = addr;
1295 1317
1296 /* dont allow allocations above current base */ 1318 /* dont allow allocations above current base */
1297 if (area->vm_mm->free_area_cache > area->vm_mm->mmap_base) 1319 if (mm->free_area_cache > mm->mmap_base)
1298 area->vm_mm->free_area_cache = area->vm_mm->mmap_base; 1320 mm->free_area_cache = mm->mmap_base;
1299} 1321}
1300 1322
1301unsigned long 1323unsigned long
@@ -1595,7 +1617,6 @@ static void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area)
1595 if (area->vm_flags & VM_LOCKED) 1617 if (area->vm_flags & VM_LOCKED)
1596 area->vm_mm->locked_vm -= len >> PAGE_SHIFT; 1618 area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
1597 vm_stat_unaccount(area); 1619 vm_stat_unaccount(area);
1598 area->vm_mm->unmap_area(area);
1599 remove_vm_struct(area); 1620 remove_vm_struct(area);
1600} 1621}
1601 1622
@@ -1649,6 +1670,7 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
1649{ 1670{
1650 struct vm_area_struct **insertion_point; 1671 struct vm_area_struct **insertion_point;
1651 struct vm_area_struct *tail_vma = NULL; 1672 struct vm_area_struct *tail_vma = NULL;
1673 unsigned long addr;
1652 1674
1653 insertion_point = (prev ? &prev->vm_next : &mm->mmap); 1675 insertion_point = (prev ? &prev->vm_next : &mm->mmap);
1654 do { 1676 do {
@@ -1659,6 +1681,11 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
1659 } while (vma && vma->vm_start < end); 1681 } while (vma && vma->vm_start < end);
1660 *insertion_point = vma; 1682 *insertion_point = vma;
1661 tail_vma->vm_next = NULL; 1683 tail_vma->vm_next = NULL;
1684 if (mm->unmap_area == arch_unmap_area)
1685 addr = prev ? prev->vm_end : mm->mmap_base;
1686 else
1687 addr = vma ? vma->vm_start : mm->mmap_base;
1688 mm->unmap_area(mm, addr);
1662 mm->mmap_cache = NULL; /* Kill the cache. */ 1689 mm->mmap_cache = NULL; /* Kill the cache. */
1663} 1690}
1664 1691