aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAdam Litke <agl@us.ibm.com>2007-10-16 04:26:19 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-16 12:43:02 -0400
commite4e574b767ba63101cfda2b42d72f38546319297 (patch)
tree084b94d01c71ccd898f8df0ec441e6726e657e75
parent7893d1d505d59db9d4f35165c8b6d3c6dff40a32 (diff)
hugetlb: Try to grow hugetlb pool for MAP_SHARED mappings
Shared mappings require special handling because the huge pages needed to fully populate the VMA must be reserved at mmap time. If not enough pages are available when making the reservation, allocate all of the shortfall at once from the buddy allocator and add the pages directly to the hugetlb pool. If they cannot be allocated, then fail the mapping. The page surplus is accounted for in the same way as for private mappings; faulted surplus pages will be freed at unmap time. Reserved, surplus pages that have not been used must be freed separately when their reservation has been released. Signed-off-by: Adam Litke <agl@us.ibm.com> Acked-by: Andy Whitcroft <apw@shadowen.org> Acked-by: Dave McCracken <dave.mccracken@oracle.com> Cc: William Irwin <bill.irwin@oracle.com> Cc: David Gibson <david@gibson.dropbear.id.au> Cc: Ken Chen <kenchen@google.com> Cc: Badari Pulavarty <pbadari@us.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/hugetlb.c155
1 files changed, 132 insertions, 23 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8768e5250323..31bbca6b2c90 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -87,6 +87,8 @@ static struct page *dequeue_huge_page(struct vm_area_struct *vma,
87 list_del(&page->lru); 87 list_del(&page->lru);
88 free_huge_pages--; 88 free_huge_pages--;
89 free_huge_pages_node[nid]--; 89 free_huge_pages_node[nid]--;
90 if (vma && vma->vm_flags & VM_MAYSHARE)
91 resv_huge_pages--;
90 break; 92 break;
91 } 93 }
92 } 94 }
@@ -214,15 +216,116 @@ static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma,
214 return page; 216 return page;
215} 217}
216 218
219/*
220 * Increase the hugetlb pool such that it can accomodate a reservation
221 * of size 'delta'.
222 */
223static int gather_surplus_pages(int delta)
224{
225 struct list_head surplus_list;
226 struct page *page, *tmp;
227 int ret, i;
228 int needed, allocated;
229
230 needed = (resv_huge_pages + delta) - free_huge_pages;
231 if (needed <= 0)
232 return 0;
233
234 allocated = 0;
235 INIT_LIST_HEAD(&surplus_list);
236
237 ret = -ENOMEM;
238retry:
239 spin_unlock(&hugetlb_lock);
240 for (i = 0; i < needed; i++) {
241 page = alloc_buddy_huge_page(NULL, 0);
242 if (!page) {
243 /*
244 * We were not able to allocate enough pages to
245 * satisfy the entire reservation so we free what
246 * we've allocated so far.
247 */
248 spin_lock(&hugetlb_lock);
249 needed = 0;
250 goto free;
251 }
252
253 list_add(&page->lru, &surplus_list);
254 }
255 allocated += needed;
256
257 /*
258 * After retaking hugetlb_lock, we need to recalculate 'needed'
259 * because either resv_huge_pages or free_huge_pages may have changed.
260 */
261 spin_lock(&hugetlb_lock);
262 needed = (resv_huge_pages + delta) - (free_huge_pages + allocated);
263 if (needed > 0)
264 goto retry;
265
266 /*
267 * The surplus_list now contains _at_least_ the number of extra pages
268 * needed to accomodate the reservation. Add the appropriate number
269 * of pages to the hugetlb pool and free the extras back to the buddy
270 * allocator.
271 */
272 needed += allocated;
273 ret = 0;
274free:
275 list_for_each_entry_safe(page, tmp, &surplus_list, lru) {
276 list_del(&page->lru);
277 if ((--needed) >= 0)
278 enqueue_huge_page(page);
279 else
280 update_and_free_page(page);
281 }
282
283 return ret;
284}
285
286/*
287 * When releasing a hugetlb pool reservation, any surplus pages that were
288 * allocated to satisfy the reservation must be explicitly freed if they were
289 * never used.
290 */
291void return_unused_surplus_pages(unsigned long unused_resv_pages)
292{
293 static int nid = -1;
294 struct page *page;
295 unsigned long nr_pages;
296
297 nr_pages = min(unused_resv_pages, surplus_huge_pages);
298
299 while (nr_pages) {
300 nid = next_node(nid, node_online_map);
301 if (nid == MAX_NUMNODES)
302 nid = first_node(node_online_map);
303
304 if (!surplus_huge_pages_node[nid])
305 continue;
306
307 if (!list_empty(&hugepage_freelists[nid])) {
308 page = list_entry(hugepage_freelists[nid].next,
309 struct page, lru);
310 list_del(&page->lru);
311 update_and_free_page(page);
312 free_huge_pages--;
313 free_huge_pages_node[nid]--;
314 surplus_huge_pages--;
315 surplus_huge_pages_node[nid]--;
316 nr_pages--;
317 }
318 }
319}
320
217static struct page *alloc_huge_page(struct vm_area_struct *vma, 321static struct page *alloc_huge_page(struct vm_area_struct *vma,
218 unsigned long addr) 322 unsigned long addr)
219{ 323{
220 struct page *page = NULL; 324 struct page *page = NULL;
325 int use_reserved_page = vma->vm_flags & VM_MAYSHARE;
221 326
222 spin_lock(&hugetlb_lock); 327 spin_lock(&hugetlb_lock);
223 if (vma->vm_flags & VM_MAYSHARE) 328 if (!use_reserved_page && (free_huge_pages <= resv_huge_pages))
224 resv_huge_pages--;
225 else if (free_huge_pages <= resv_huge_pages)
226 goto fail; 329 goto fail;
227 330
228 page = dequeue_huge_page(vma, addr); 331 page = dequeue_huge_page(vma, addr);
@@ -234,8 +337,6 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
234 return page; 337 return page;
235 338
236fail: 339fail:
237 if (vma->vm_flags & VM_MAYSHARE)
238 resv_huge_pages++;
239 spin_unlock(&hugetlb_lock); 340 spin_unlock(&hugetlb_lock);
240 341
241 /* 342 /*
@@ -243,7 +344,7 @@ fail:
243 * may have failed due to an undersized hugetlb pool. Try to grab a 344 * may have failed due to an undersized hugetlb pool. Try to grab a
244 * surplus huge page from the buddy allocator. 345 * surplus huge page from the buddy allocator.
245 */ 346 */
246 if (!(vma->vm_flags & VM_MAYSHARE)) 347 if (!use_reserved_page)
247 page = alloc_buddy_huge_page(vma, addr); 348 page = alloc_buddy_huge_page(vma, addr);
248 349
249 return page; 350 return page;
@@ -952,21 +1053,6 @@ static int hugetlb_acct_memory(long delta)
952 int ret = -ENOMEM; 1053 int ret = -ENOMEM;
953 1054
954 spin_lock(&hugetlb_lock); 1055 spin_lock(&hugetlb_lock);
955 if ((delta + resv_huge_pages) <= free_huge_pages) {
956 resv_huge_pages += delta;
957 ret = 0;
958 }
959 spin_unlock(&hugetlb_lock);
960 return ret;
961}
962
963int hugetlb_reserve_pages(struct inode *inode, long from, long to)
964{
965 long ret, chg;
966
967 chg = region_chg(&inode->i_mapping->private_list, from, to);
968 if (chg < 0)
969 return chg;
970 /* 1056 /*
971 * When cpuset is configured, it breaks the strict hugetlb page 1057 * When cpuset is configured, it breaks the strict hugetlb page
972 * reservation as the accounting is done on a global variable. Such 1058 * reservation as the accounting is done on a global variable. Such
@@ -984,8 +1070,31 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to)
984 * a best attempt and hopefully to minimize the impact of changing 1070 * a best attempt and hopefully to minimize the impact of changing
985 * semantics that cpuset has. 1071 * semantics that cpuset has.
986 */ 1072 */
987 if (chg > cpuset_mems_nr(free_huge_pages_node)) 1073 if (delta > 0) {
988 return -ENOMEM; 1074 if (gather_surplus_pages(delta) < 0)
1075 goto out;
1076
1077 if (delta > cpuset_mems_nr(free_huge_pages_node))
1078 goto out;
1079 }
1080
1081 ret = 0;
1082 resv_huge_pages += delta;
1083 if (delta < 0)
1084 return_unused_surplus_pages((unsigned long) -delta);
1085
1086out:
1087 spin_unlock(&hugetlb_lock);
1088 return ret;
1089}
1090
1091int hugetlb_reserve_pages(struct inode *inode, long from, long to)
1092{
1093 long ret, chg;
1094
1095 chg = region_chg(&inode->i_mapping->private_list, from, to);
1096 if (chg < 0)
1097 return chg;
989 1098
990 ret = hugetlb_acct_memory(chg); 1099 ret = hugetlb_acct_memory(chg);
991 if (ret < 0) 1100 if (ret < 0)