diff options
author | Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> | 2010-09-07 21:19:37 -0400 |
---|---|---|
committer | Andi Kleen <ak@linux.intel.com> | 2010-10-08 03:32:45 -0400 |
commit | a9869b837c098732bad84939015c0eb391b23e41 (patch) | |
tree | 00ba63997470724e2704c9b7c995c91672b53c7b /mm/hugetlb.c | |
parent | 6de2b1aab94355482bd2accdc115666509667458 (diff) |
hugetlb: move refcounting in hugepage allocation inside hugetlb_lock
Currently alloc_huge_page() raises page refcount outside hugetlb_lock.
but it causes race when dequeue_hwpoison_huge_page() runs concurrently
with alloc_huge_page().
To avoid it, this patch moves set_page_refcounted() in hugetlb_lock.
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Wu Fengguang <fengguang.wu@intel.com>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r-- | mm/hugetlb.c | 35 |
1 files changed, 13 insertions, 22 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index deb7bebefe68..636be5d6aadd 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -509,6 +509,7 @@ static struct page *dequeue_huge_page_node(struct hstate *h, int nid) | |||
509 | return NULL; | 509 | return NULL; |
510 | page = list_entry(h->hugepage_freelists[nid].next, struct page, lru); | 510 | page = list_entry(h->hugepage_freelists[nid].next, struct page, lru); |
511 | list_del(&page->lru); | 511 | list_del(&page->lru); |
512 | set_page_refcounted(page); | ||
512 | h->free_huge_pages--; | 513 | h->free_huge_pages--; |
513 | h->free_huge_pages_node[nid]--; | 514 | h->free_huge_pages_node[nid]--; |
514 | return page; | 515 | return page; |
@@ -868,12 +869,6 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid) | |||
868 | 869 | ||
869 | spin_lock(&hugetlb_lock); | 870 | spin_lock(&hugetlb_lock); |
870 | if (page) { | 871 | if (page) { |
871 | /* | ||
872 | * This page is now managed by the hugetlb allocator and has | ||
873 | * no users -- drop the buddy allocator's reference. | ||
874 | */ | ||
875 | put_page_testzero(page); | ||
876 | VM_BUG_ON(page_count(page)); | ||
877 | r_nid = page_to_nid(page); | 872 | r_nid = page_to_nid(page); |
878 | set_compound_page_dtor(page, free_huge_page); | 873 | set_compound_page_dtor(page, free_huge_page); |
879 | /* | 874 | /* |
@@ -936,16 +931,13 @@ retry: | |||
936 | spin_unlock(&hugetlb_lock); | 931 | spin_unlock(&hugetlb_lock); |
937 | for (i = 0; i < needed; i++) { | 932 | for (i = 0; i < needed; i++) { |
938 | page = alloc_buddy_huge_page(h, NUMA_NO_NODE); | 933 | page = alloc_buddy_huge_page(h, NUMA_NO_NODE); |
939 | if (!page) { | 934 | if (!page) |
940 | /* | 935 | /* |
941 | * We were not able to allocate enough pages to | 936 | * We were not able to allocate enough pages to |
942 | * satisfy the entire reservation so we free what | 937 | * satisfy the entire reservation so we free what |
943 | * we've allocated so far. | 938 | * we've allocated so far. |
944 | */ | 939 | */ |
945 | spin_lock(&hugetlb_lock); | ||
946 | needed = 0; | ||
947 | goto free; | 940 | goto free; |
948 | } | ||
949 | 941 | ||
950 | list_add(&page->lru, &surplus_list); | 942 | list_add(&page->lru, &surplus_list); |
951 | } | 943 | } |
@@ -972,31 +964,31 @@ retry: | |||
972 | needed += allocated; | 964 | needed += allocated; |
973 | h->resv_huge_pages += delta; | 965 | h->resv_huge_pages += delta; |
974 | ret = 0; | 966 | ret = 0; |
975 | free: | 967 | |
968 | spin_unlock(&hugetlb_lock); | ||
976 | /* Free the needed pages to the hugetlb pool */ | 969 | /* Free the needed pages to the hugetlb pool */ |
977 | list_for_each_entry_safe(page, tmp, &surplus_list, lru) { | 970 | list_for_each_entry_safe(page, tmp, &surplus_list, lru) { |
978 | if ((--needed) < 0) | 971 | if ((--needed) < 0) |
979 | break; | 972 | break; |
980 | list_del(&page->lru); | 973 | list_del(&page->lru); |
974 | /* | ||
975 | * This page is now managed by the hugetlb allocator and has | ||
976 | * no users -- drop the buddy allocator's reference. | ||
977 | */ | ||
978 | put_page_testzero(page); | ||
979 | VM_BUG_ON(page_count(page)); | ||
981 | enqueue_huge_page(h, page); | 980 | enqueue_huge_page(h, page); |
982 | } | 981 | } |
983 | 982 | ||
984 | /* Free unnecessary surplus pages to the buddy allocator */ | 983 | /* Free unnecessary surplus pages to the buddy allocator */ |
984 | free: | ||
985 | if (!list_empty(&surplus_list)) { | 985 | if (!list_empty(&surplus_list)) { |
986 | spin_unlock(&hugetlb_lock); | ||
987 | list_for_each_entry_safe(page, tmp, &surplus_list, lru) { | 986 | list_for_each_entry_safe(page, tmp, &surplus_list, lru) { |
988 | list_del(&page->lru); | 987 | list_del(&page->lru); |
989 | /* | 988 | put_page(page); |
990 | * The page has a reference count of zero already, so | ||
991 | * call free_huge_page directly instead of using | ||
992 | * put_page. This must be done with hugetlb_lock | ||
993 | * unlocked which is safe because free_huge_page takes | ||
994 | * hugetlb_lock before deciding how to free the page. | ||
995 | */ | ||
996 | free_huge_page(page); | ||
997 | } | 989 | } |
998 | spin_lock(&hugetlb_lock); | ||
999 | } | 990 | } |
991 | spin_lock(&hugetlb_lock); | ||
1000 | 992 | ||
1001 | return ret; | 993 | return ret; |
1002 | } | 994 | } |
@@ -1123,7 +1115,6 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma, | |||
1123 | } | 1115 | } |
1124 | } | 1116 | } |
1125 | 1117 | ||
1126 | set_page_refcounted(page); | ||
1127 | set_page_private(page, (unsigned long) mapping); | 1118 | set_page_private(page, (unsigned long) mapping); |
1128 | 1119 | ||
1129 | vma_commit_reservation(h, vma, addr); | 1120 | vma_commit_reservation(h, vma, addr); |