aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNishanth Aravamudan <nacc@us.ibm.com>2008-03-26 17:40:20 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-03-26 18:01:33 -0400
commit11320d17ce4ecf8002dc8f9b6f1e49cd18e45a94 (patch)
tree815d7f4f9a10173b30fc7d5eb8b53b50b71abb53
parenta1de09195b294c6a4c5dec8c8defd0a2688d3f75 (diff)
hugetlb: fix potential livelock in return_unused_surplus_hugepages()
Running the counters testcase from libhugetlbfs results in on 2.6.25-rc5 and 2.6.25-rc5-mm1: BUG: soft lockup - CPU#3 stuck for 61s! [counters:10531] NIP: c0000000000d1f3c LR: c0000000000d1f2c CTR: c0000000001b5088 REGS: c000005db12cb360 TRAP: 0901 Not tainted (2.6.25-rc5-autokern1) MSR: 8000000000009032 <EE,ME,IR,DR> CR: 48008448 XER: 20000000 TASK = c000005dbf3d6000[10531] 'counters' THREAD: c000005db12c8000 CPU: 3 GPR00: 0000000000000004 c000005db12cb5e0 c000000000879228 0000000000000004 GPR04: 0000000000000010 0000000000000000 0000000000200200 0000000000100100 GPR08: c0000000008aba10 000000000000ffff 0000000000000004 0000000000000000 GPR12: 0000000028000442 c000000000770080 NIP [c0000000000d1f3c] .return_unused_surplus_pages+0x84/0x18c LR [c0000000000d1f2c] .return_unused_surplus_pages+0x74/0x18c Call Trace: [c000005db12cb5e0] [c000005db12cb670] 0xc000005db12cb670 (unreliable) [c000005db12cb670] [c0000000000d24c4] .hugetlb_acct_memory+0x2e0/0x354 [c000005db12cb740] [c0000000001b5048] .truncate_hugepages+0x1d4/0x214 [c000005db12cb890] [c0000000001b50a4] .hugetlbfs_delete_inode+0x1c/0x3c [c000005db12cb920] [c000000000103fd8] .generic_delete_inode+0xf8/0x1c0 [c000005db12cb9b0] [c0000000001b5100] .hugetlbfs_drop_inode+0x3c/0x24c [c000005db12cba50] [c00000000010287c] .iput+0xdc/0xf8 [c000005db12cbad0] [c0000000000fee54] .dentry_iput+0x12c/0x194 [c000005db12cbb60] [c0000000000ff050] .d_kill+0x6c/0xa4 [c000005db12cbbf0] [c0000000000ffb74] .dput+0x18c/0x1b0 [c000005db12cbc70] [c0000000000e9e98] .__fput+0x1a4/0x1e8 [c000005db12cbd10] [c0000000000e61ec] .filp_close+0xb8/0xe0 [c000005db12cbda0] [c0000000000e62d0] .sys_close+0xbc/0x134 [c000005db12cbe30] [c00000000000872c] syscall_exit+0x0/0x40 Instruction dump: ebbe8038 38800010 e8bf0002 3bbd0008 7fa3eb78 38a50001 7ca507b4 4818df25 60000000 38800010 38a00000 7c601b78 <7fa3eb78> 2f800010 409d0008 38000010 This was tracked down to a potential livelock in return_unused_surplus_hugepages(). In the case where we have surplus pages on some node, but no free pages on the same node, we may never break out of the loop. To avoid this livelock, terminate the search if we iterate a number of times equal to the number of online nodes without freeing a page. Thanks to Andy Whitcroft and Adam Litke for helping with debugging and the patch. Signed-off-by: Nishanth Aravamudan <nacc@us.ibm.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/hugetlb.c11
1 files changed, 10 insertions, 1 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 40d841cb5126..51c9e2c01640 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -401,12 +401,20 @@ static void return_unused_surplus_pages(unsigned long unused_resv_pages)
401 struct page *page; 401 struct page *page;
402 unsigned long nr_pages; 402 unsigned long nr_pages;
403 403
404 /*
405 * We want to release as many surplus pages as possible, spread
406 * evenly across all nodes. Iterate across all nodes until we
407 * can no longer free unreserved surplus pages. This occurs when
408 * the nodes with surplus pages have no free pages.
409 */
410 unsigned long remaining_iterations = num_online_nodes();
411
404 /* Uncommit the reservation */ 412 /* Uncommit the reservation */
405 resv_huge_pages -= unused_resv_pages; 413 resv_huge_pages -= unused_resv_pages;
406 414
407 nr_pages = min(unused_resv_pages, surplus_huge_pages); 415 nr_pages = min(unused_resv_pages, surplus_huge_pages);
408 416
409 while (nr_pages) { 417 while (remaining_iterations-- && nr_pages) {
410 nid = next_node(nid, node_online_map); 418 nid = next_node(nid, node_online_map);
411 if (nid == MAX_NUMNODES) 419 if (nid == MAX_NUMNODES)
412 nid = first_node(node_online_map); 420 nid = first_node(node_online_map);
@@ -424,6 +432,7 @@ static void return_unused_surplus_pages(unsigned long unused_resv_pages)
424 surplus_huge_pages--; 432 surplus_huge_pages--;
425 surplus_huge_pages_node[nid]--; 433 surplus_huge_pages_node[nid]--;
426 nr_pages--; 434 nr_pages--;
435 remaining_iterations = num_online_nodes();
427 } 436 }
428 } 437 }
429} 438}