diff options
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r-- | mm/hugetlb.c | 56 |
1 files changed, 45 insertions, 11 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 89e6286a7f57..74c1b6b0b37b 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -71,7 +71,25 @@ static void enqueue_huge_page(struct page *page) | |||
71 | free_huge_pages_node[nid]++; | 71 | free_huge_pages_node[nid]++; |
72 | } | 72 | } |
73 | 73 | ||
74 | static struct page *dequeue_huge_page(struct vm_area_struct *vma, | 74 | static struct page *dequeue_huge_page(void) |
75 | { | ||
76 | int nid; | ||
77 | struct page *page = NULL; | ||
78 | |||
79 | for (nid = 0; nid < MAX_NUMNODES; ++nid) { | ||
80 | if (!list_empty(&hugepage_freelists[nid])) { | ||
81 | page = list_entry(hugepage_freelists[nid].next, | ||
82 | struct page, lru); | ||
83 | list_del(&page->lru); | ||
84 | free_huge_pages--; | ||
85 | free_huge_pages_node[nid]--; | ||
86 | break; | ||
87 | } | ||
88 | } | ||
89 | return page; | ||
90 | } | ||
91 | |||
92 | static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma, | ||
75 | unsigned long address) | 93 | unsigned long address) |
76 | { | 94 | { |
77 | int nid; | 95 | int nid; |
@@ -268,6 +286,12 @@ static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma, | |||
268 | 286 | ||
269 | spin_lock(&hugetlb_lock); | 287 | spin_lock(&hugetlb_lock); |
270 | if (page) { | 288 | if (page) { |
289 | /* | ||
290 | * This page is now managed by the hugetlb allocator and has | ||
291 | * no users -- drop the buddy allocator's reference. | ||
292 | */ | ||
293 | put_page_testzero(page); | ||
294 | VM_BUG_ON(page_count(page)); | ||
271 | nid = page_to_nid(page); | 295 | nid = page_to_nid(page); |
272 | set_compound_page_dtor(page, free_huge_page); | 296 | set_compound_page_dtor(page, free_huge_page); |
273 | /* | 297 | /* |
@@ -296,8 +320,10 @@ static int gather_surplus_pages(int delta) | |||
296 | int needed, allocated; | 320 | int needed, allocated; |
297 | 321 | ||
298 | needed = (resv_huge_pages + delta) - free_huge_pages; | 322 | needed = (resv_huge_pages + delta) - free_huge_pages; |
299 | if (needed <= 0) | 323 | if (needed <= 0) { |
324 | resv_huge_pages += delta; | ||
300 | return 0; | 325 | return 0; |
326 | } | ||
301 | 327 | ||
302 | allocated = 0; | 328 | allocated = 0; |
303 | INIT_LIST_HEAD(&surplus_list); | 329 | INIT_LIST_HEAD(&surplus_list); |
@@ -335,9 +361,12 @@ retry: | |||
335 | * The surplus_list now contains _at_least_ the number of extra pages | 361 | * The surplus_list now contains _at_least_ the number of extra pages |
336 | * needed to accomodate the reservation. Add the appropriate number | 362 | * needed to accomodate the reservation. Add the appropriate number |
337 | * of pages to the hugetlb pool and free the extras back to the buddy | 363 | * of pages to the hugetlb pool and free the extras back to the buddy |
338 | * allocator. | 364 | * allocator. Commit the entire reservation here to prevent another |
365 | * process from stealing the pages as they are added to the pool but | ||
366 | * before they are reserved. | ||
339 | */ | 367 | */ |
340 | needed += allocated; | 368 | needed += allocated; |
369 | resv_huge_pages += delta; | ||
341 | ret = 0; | 370 | ret = 0; |
342 | free: | 371 | free: |
343 | list_for_each_entry_safe(page, tmp, &surplus_list, lru) { | 372 | list_for_each_entry_safe(page, tmp, &surplus_list, lru) { |
@@ -346,13 +375,14 @@ free: | |||
346 | enqueue_huge_page(page); | 375 | enqueue_huge_page(page); |
347 | else { | 376 | else { |
348 | /* | 377 | /* |
349 | * Decrement the refcount and free the page using its | 378 | * The page has a reference count of zero already, so |
350 | * destructor. This must be done with hugetlb_lock | 379 | * call free_huge_page directly instead of using |
380 | * put_page. This must be done with hugetlb_lock | ||
351 | * unlocked which is safe because free_huge_page takes | 381 | * unlocked which is safe because free_huge_page takes |
352 | * hugetlb_lock before deciding how to free the page. | 382 | * hugetlb_lock before deciding how to free the page. |
353 | */ | 383 | */ |
354 | spin_unlock(&hugetlb_lock); | 384 | spin_unlock(&hugetlb_lock); |
355 | put_page(page); | 385 | free_huge_page(page); |
356 | spin_lock(&hugetlb_lock); | 386 | spin_lock(&hugetlb_lock); |
357 | } | 387 | } |
358 | } | 388 | } |
@@ -371,6 +401,9 @@ static void return_unused_surplus_pages(unsigned long unused_resv_pages) | |||
371 | struct page *page; | 401 | struct page *page; |
372 | unsigned long nr_pages; | 402 | unsigned long nr_pages; |
373 | 403 | ||
404 | /* Uncommit the reservation */ | ||
405 | resv_huge_pages -= unused_resv_pages; | ||
406 | |||
374 | nr_pages = min(unused_resv_pages, surplus_huge_pages); | 407 | nr_pages = min(unused_resv_pages, surplus_huge_pages); |
375 | 408 | ||
376 | while (nr_pages) { | 409 | while (nr_pages) { |
@@ -402,7 +435,7 @@ static struct page *alloc_huge_page_shared(struct vm_area_struct *vma, | |||
402 | struct page *page; | 435 | struct page *page; |
403 | 436 | ||
404 | spin_lock(&hugetlb_lock); | 437 | spin_lock(&hugetlb_lock); |
405 | page = dequeue_huge_page(vma, addr); | 438 | page = dequeue_huge_page_vma(vma, addr); |
406 | spin_unlock(&hugetlb_lock); | 439 | spin_unlock(&hugetlb_lock); |
407 | return page ? page : ERR_PTR(-VM_FAULT_OOM); | 440 | return page ? page : ERR_PTR(-VM_FAULT_OOM); |
408 | } | 441 | } |
@@ -417,7 +450,7 @@ static struct page *alloc_huge_page_private(struct vm_area_struct *vma, | |||
417 | 450 | ||
418 | spin_lock(&hugetlb_lock); | 451 | spin_lock(&hugetlb_lock); |
419 | if (free_huge_pages > resv_huge_pages) | 452 | if (free_huge_pages > resv_huge_pages) |
420 | page = dequeue_huge_page(vma, addr); | 453 | page = dequeue_huge_page_vma(vma, addr); |
421 | spin_unlock(&hugetlb_lock); | 454 | spin_unlock(&hugetlb_lock); |
422 | if (!page) { | 455 | if (!page) { |
423 | page = alloc_buddy_huge_page(vma, addr); | 456 | page = alloc_buddy_huge_page(vma, addr); |
@@ -570,7 +603,7 @@ static unsigned long set_max_huge_pages(unsigned long count) | |||
570 | min_count = max(count, min_count); | 603 | min_count = max(count, min_count); |
571 | try_to_free_low(min_count); | 604 | try_to_free_low(min_count); |
572 | while (min_count < persistent_huge_pages) { | 605 | while (min_count < persistent_huge_pages) { |
573 | struct page *page = dequeue_huge_page(NULL, 0); | 606 | struct page *page = dequeue_huge_page(); |
574 | if (!page) | 607 | if (!page) |
575 | break; | 608 | break; |
576 | update_and_free_page(page); | 609 | update_and_free_page(page); |
@@ -1205,12 +1238,13 @@ static int hugetlb_acct_memory(long delta) | |||
1205 | if (gather_surplus_pages(delta) < 0) | 1238 | if (gather_surplus_pages(delta) < 0) |
1206 | goto out; | 1239 | goto out; |
1207 | 1240 | ||
1208 | if (delta > cpuset_mems_nr(free_huge_pages_node)) | 1241 | if (delta > cpuset_mems_nr(free_huge_pages_node)) { |
1242 | return_unused_surplus_pages(delta); | ||
1209 | goto out; | 1243 | goto out; |
1244 | } | ||
1210 | } | 1245 | } |
1211 | 1246 | ||
1212 | ret = 0; | 1247 | ret = 0; |
1213 | resv_huge_pages += delta; | ||
1214 | if (delta < 0) | 1248 | if (delta < 0) |
1215 | return_unused_surplus_pages((unsigned long) -delta); | 1249 | return_unused_surplus_pages((unsigned long) -delta); |
1216 | 1250 | ||