aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/mm
diff options
context:
space:
mode:
authorWolfgang Wander <wwc@rentec.com>2005-06-21 20:14:49 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-21 21:46:16 -0400
commit1363c3cd8603a913a27e2995dccbd70d5312d8e6 (patch)
tree405e7fc1ef44678f3ca0a54c536d0457e6e80f45 /arch/i386/mm
parente7c8d5c9955a4d2e88e36b640563f5d6d5aba48a (diff)
[PATCH] Avoiding mmap fragmentation
Ingo recently introduced a great speedup for allocating new mmaps using the free_area_cache pointer which boosts the specweb SSL benchmark by 4-5% and causes huge performance increases in thread creation. The downside of this patch is that it does lead to fragmentation in the mmap-ed areas (visible via /proc/self/maps), such that some applications that work fine under 2.4 kernels quickly run out of memory on any 2.6 kernel. The problem is twofold: 1) the free_area_cache is used to continue a search for memory where the last search ended. Before the change new areas were always searched from the base address on. So now new small areas are cluttering holes of all sizes throughout the whole mmap-able region whereas before small holes tended to close holes near the base leaving holes far from the base large and available for larger requests. 2) the free_area_cache also is set to the location of the last munmap-ed area so in scenarios where we allocate e.g. five regions of 1K each, then free regions 4 2 3 in this order the next request for 1K will be placed in the position of the old region 3, whereas before we appended it to the still active region 1, placing it at the location of the old region 2. Before we had 1 free region of 2K, now we only get two free regions of 1K -> fragmentation. The patch addresses thes issues by introducing yet another cache descriptor cached_hole_size that contains the largest known hole size below the current free_area_cache. If a new request comes in the size is compared against the cached_hole_size and if the request can be filled with a hole below free_area_cache the search is started from the base instead. The results look promising: Whereas 2.6.12-rc4 fragments quickly and my (earlier posted) leakme.c test program terminates after 50000+ iterations with 96 distinct and fragmented maps in /proc/self/maps it performs nicely (as expected) with thread creation, Ingo's test_str02 with 20000 threads requires 0.7s system time. Taking out Ingo's patch (un-patch available per request) by basically deleting all mentions of free_area_cache from the kernel and starting the search for new memory always at the respective bases we observe: leakme terminates successfully with 11 distinctive hardly fragmented areas in /proc/self/maps but thread creating is gringdingly slow: 30+s(!) system time for Ingo's test_str02 with 20000 threads. Now - drumroll ;-) the appended patch works fine with leakme: it ends with only 7 distinct areas in /proc/self/maps and also thread creation seems sufficiently fast with 0.71s for 20000 threads. Signed-off-by: Wolfgang Wander <wwc@rentec.com> Credit-to: "Richard Purdie" <rpurdie@rpsys.net> Signed-off-by: Ken Chen <kenneth.w.chen@intel.com> Acked-by: Ingo Molnar <mingo@elte.hu> (partly) Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/i386/mm')
-rw-r--r--arch/i386/mm/hugetlbpage.c34
1 files changed, 29 insertions, 5 deletions
diff --git a/arch/i386/mm/hugetlbpage.c b/arch/i386/mm/hugetlbpage.c
index 5aa06001a4bd..3b099f32b948 100644
--- a/arch/i386/mm/hugetlbpage.c
+++ b/arch/i386/mm/hugetlbpage.c
@@ -140,7 +140,12 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
140 struct vm_area_struct *vma; 140 struct vm_area_struct *vma;
141 unsigned long start_addr; 141 unsigned long start_addr;
142 142
143 start_addr = mm->free_area_cache; 143 if (len > mm->cached_hole_size) {
144 start_addr = mm->free_area_cache;
145 } else {
146 start_addr = TASK_UNMAPPED_BASE;
147 mm->cached_hole_size = 0;
148 }
144 149
145full_search: 150full_search:
146 addr = ALIGN(start_addr, HPAGE_SIZE); 151 addr = ALIGN(start_addr, HPAGE_SIZE);
@@ -154,6 +159,7 @@ full_search:
154 */ 159 */
155 if (start_addr != TASK_UNMAPPED_BASE) { 160 if (start_addr != TASK_UNMAPPED_BASE) {
156 start_addr = TASK_UNMAPPED_BASE; 161 start_addr = TASK_UNMAPPED_BASE;
162 mm->cached_hole_size = 0;
157 goto full_search; 163 goto full_search;
158 } 164 }
159 return -ENOMEM; 165 return -ENOMEM;
@@ -162,6 +168,8 @@ full_search:
162 mm->free_area_cache = addr + len; 168 mm->free_area_cache = addr + len;
163 return addr; 169 return addr;
164 } 170 }
171 if (addr + mm->cached_hole_size < vma->vm_start)
172 mm->cached_hole_size = vma->vm_start - addr;
165 addr = ALIGN(vma->vm_end, HPAGE_SIZE); 173 addr = ALIGN(vma->vm_end, HPAGE_SIZE);
166 } 174 }
167} 175}
@@ -173,12 +181,17 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
173 struct mm_struct *mm = current->mm; 181 struct mm_struct *mm = current->mm;
174 struct vm_area_struct *vma, *prev_vma; 182 struct vm_area_struct *vma, *prev_vma;
175 unsigned long base = mm->mmap_base, addr = addr0; 183 unsigned long base = mm->mmap_base, addr = addr0;
184 unsigned long largest_hole = mm->cached_hole_size;
176 int first_time = 1; 185 int first_time = 1;
177 186
178 /* don't allow allocations above current base */ 187 /* don't allow allocations above current base */
179 if (mm->free_area_cache > base) 188 if (mm->free_area_cache > base)
180 mm->free_area_cache = base; 189 mm->free_area_cache = base;
181 190
191 if (len <= largest_hole) {
192 largest_hole = 0;
193 mm->free_area_cache = base;
194 }
182try_again: 195try_again:
183 /* make sure it can fit in the remaining address space */ 196 /* make sure it can fit in the remaining address space */
184 if (mm->free_area_cache < len) 197 if (mm->free_area_cache < len)
@@ -199,13 +212,21 @@ try_again:
199 * vma->vm_start, use it: 212 * vma->vm_start, use it:
200 */ 213 */
201 if (addr + len <= vma->vm_start && 214 if (addr + len <= vma->vm_start &&
202 (!prev_vma || (addr >= prev_vma->vm_end))) 215 (!prev_vma || (addr >= prev_vma->vm_end))) {
203 /* remember the address as a hint for next time */ 216 /* remember the address as a hint for next time */
204 return (mm->free_area_cache = addr); 217 mm->cached_hole_size = largest_hole;
205 else 218 return (mm->free_area_cache = addr);
219 } else {
206 /* pull free_area_cache down to the first hole */ 220 /* pull free_area_cache down to the first hole */
207 if (mm->free_area_cache == vma->vm_end) 221 if (mm->free_area_cache == vma->vm_end) {
208 mm->free_area_cache = vma->vm_start; 222 mm->free_area_cache = vma->vm_start;
223 mm->cached_hole_size = largest_hole;
224 }
225 }
226
227 /* remember the largest hole we saw so far */
228 if (addr + largest_hole < vma->vm_start)
229 largest_hole = vma->vm_start - addr;
209 230
210 /* try just below the current vma->vm_start */ 231 /* try just below the current vma->vm_start */
211 addr = (vma->vm_start - len) & HPAGE_MASK; 232 addr = (vma->vm_start - len) & HPAGE_MASK;
@@ -218,6 +239,7 @@ fail:
218 */ 239 */
219 if (first_time) { 240 if (first_time) {
220 mm->free_area_cache = base; 241 mm->free_area_cache = base;
242 largest_hole = 0;
221 first_time = 0; 243 first_time = 0;
222 goto try_again; 244 goto try_again;
223 } 245 }
@@ -228,6 +250,7 @@ fail:
228 * allocations. 250 * allocations.
229 */ 251 */
230 mm->free_area_cache = TASK_UNMAPPED_BASE; 252 mm->free_area_cache = TASK_UNMAPPED_BASE;
253 mm->cached_hole_size = ~0UL;
231 addr = hugetlb_get_unmapped_area_bottomup(file, addr0, 254 addr = hugetlb_get_unmapped_area_bottomup(file, addr0,
232 len, pgoff, flags); 255 len, pgoff, flags);
233 256
@@ -235,6 +258,7 @@ fail:
235 * Restore the topdown base: 258 * Restore the topdown base:
236 */ 259 */
237 mm->free_area_cache = base; 260 mm->free_area_cache = base;
261 mm->cached_hole_size = ~0UL;
238 262
239 return addr; 263 return addr;
240} 264}