diff options
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r-- | mm/hugetlb.c | 78 |
1 files changed, 51 insertions, 27 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 51c9e2c01640..df28c1773fb2 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -95,13 +95,16 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma, | |||
95 | int nid; | 95 | int nid; |
96 | struct page *page = NULL; | 96 | struct page *page = NULL; |
97 | struct mempolicy *mpol; | 97 | struct mempolicy *mpol; |
98 | nodemask_t *nodemask; | ||
98 | struct zonelist *zonelist = huge_zonelist(vma, address, | 99 | struct zonelist *zonelist = huge_zonelist(vma, address, |
99 | htlb_alloc_mask, &mpol); | 100 | htlb_alloc_mask, &mpol, &nodemask); |
100 | struct zone **z; | 101 | struct zone *zone; |
101 | 102 | struct zoneref *z; | |
102 | for (z = zonelist->zones; *z; z++) { | 103 | |
103 | nid = zone_to_nid(*z); | 104 | for_each_zone_zonelist_nodemask(zone, z, zonelist, |
104 | if (cpuset_zone_allowed_softwall(*z, htlb_alloc_mask) && | 105 | MAX_NR_ZONES - 1, nodemask) { |
106 | nid = zone_to_nid(zone); | ||
107 | if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) && | ||
105 | !list_empty(&hugepage_freelists[nid])) { | 108 | !list_empty(&hugepage_freelists[nid])) { |
106 | page = list_entry(hugepage_freelists[nid].next, | 109 | page = list_entry(hugepage_freelists[nid].next, |
107 | struct page, lru); | 110 | struct page, lru); |
@@ -113,7 +116,7 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma, | |||
113 | break; | 116 | break; |
114 | } | 117 | } |
115 | } | 118 | } |
116 | mpol_free(mpol); /* unref if mpol !NULL */ | 119 | mpol_cond_put(mpol); |
117 | return page; | 120 | return page; |
118 | } | 121 | } |
119 | 122 | ||
@@ -129,6 +132,7 @@ static void update_and_free_page(struct page *page) | |||
129 | } | 132 | } |
130 | set_compound_page_dtor(page, NULL); | 133 | set_compound_page_dtor(page, NULL); |
131 | set_page_refcounted(page); | 134 | set_page_refcounted(page); |
135 | arch_release_hugepage(page); | ||
132 | __free_pages(page, HUGETLB_PAGE_ORDER); | 136 | __free_pages(page, HUGETLB_PAGE_ORDER); |
133 | } | 137 | } |
134 | 138 | ||
@@ -198,6 +202,10 @@ static struct page *alloc_fresh_huge_page_node(int nid) | |||
198 | htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|__GFP_NOWARN, | 202 | htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|__GFP_NOWARN, |
199 | HUGETLB_PAGE_ORDER); | 203 | HUGETLB_PAGE_ORDER); |
200 | if (page) { | 204 | if (page) { |
205 | if (arch_prepare_hugepage(page)) { | ||
206 | __free_pages(page, HUGETLB_PAGE_ORDER); | ||
207 | return 0; | ||
208 | } | ||
201 | set_compound_page_dtor(page, free_huge_page); | 209 | set_compound_page_dtor(page, free_huge_page); |
202 | spin_lock(&hugetlb_lock); | 210 | spin_lock(&hugetlb_lock); |
203 | nr_huge_pages++; | 211 | nr_huge_pages++; |
@@ -239,6 +247,11 @@ static int alloc_fresh_huge_page(void) | |||
239 | hugetlb_next_nid = next_nid; | 247 | hugetlb_next_nid = next_nid; |
240 | } while (!page && hugetlb_next_nid != start_nid); | 248 | } while (!page && hugetlb_next_nid != start_nid); |
241 | 249 | ||
250 | if (ret) | ||
251 | count_vm_event(HTLB_BUDDY_PGALLOC); | ||
252 | else | ||
253 | count_vm_event(HTLB_BUDDY_PGALLOC_FAIL); | ||
254 | |||
242 | return ret; | 255 | return ret; |
243 | } | 256 | } |
244 | 257 | ||
@@ -299,9 +312,11 @@ static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma, | |||
299 | */ | 312 | */ |
300 | nr_huge_pages_node[nid]++; | 313 | nr_huge_pages_node[nid]++; |
301 | surplus_huge_pages_node[nid]++; | 314 | surplus_huge_pages_node[nid]++; |
315 | __count_vm_event(HTLB_BUDDY_PGALLOC); | ||
302 | } else { | 316 | } else { |
303 | nr_huge_pages--; | 317 | nr_huge_pages--; |
304 | surplus_huge_pages--; | 318 | surplus_huge_pages--; |
319 | __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL); | ||
305 | } | 320 | } |
306 | spin_unlock(&hugetlb_lock); | 321 | spin_unlock(&hugetlb_lock); |
307 | 322 | ||
@@ -369,11 +384,19 @@ retry: | |||
369 | resv_huge_pages += delta; | 384 | resv_huge_pages += delta; |
370 | ret = 0; | 385 | ret = 0; |
371 | free: | 386 | free: |
387 | /* Free the needed pages to the hugetlb pool */ | ||
372 | list_for_each_entry_safe(page, tmp, &surplus_list, lru) { | 388 | list_for_each_entry_safe(page, tmp, &surplus_list, lru) { |
389 | if ((--needed) < 0) | ||
390 | break; | ||
373 | list_del(&page->lru); | 391 | list_del(&page->lru); |
374 | if ((--needed) >= 0) | 392 | enqueue_huge_page(page); |
375 | enqueue_huge_page(page); | 393 | } |
376 | else { | 394 | |
395 | /* Free unnecessary surplus pages to the buddy allocator */ | ||
396 | if (!list_empty(&surplus_list)) { | ||
397 | spin_unlock(&hugetlb_lock); | ||
398 | list_for_each_entry_safe(page, tmp, &surplus_list, lru) { | ||
399 | list_del(&page->lru); | ||
377 | /* | 400 | /* |
378 | * The page has a reference count of zero already, so | 401 | * The page has a reference count of zero already, so |
379 | * call free_huge_page directly instead of using | 402 | * call free_huge_page directly instead of using |
@@ -381,10 +404,9 @@ free: | |||
381 | * unlocked which is safe because free_huge_page takes | 404 | * unlocked which is safe because free_huge_page takes |
382 | * hugetlb_lock before deciding how to free the page. | 405 | * hugetlb_lock before deciding how to free the page. |
383 | */ | 406 | */ |
384 | spin_unlock(&hugetlb_lock); | ||
385 | free_huge_page(page); | 407 | free_huge_page(page); |
386 | spin_lock(&hugetlb_lock); | ||
387 | } | 408 | } |
409 | spin_lock(&hugetlb_lock); | ||
388 | } | 410 | } |
389 | 411 | ||
390 | return ret; | 412 | return ret; |
@@ -718,7 +740,7 @@ static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page, | |||
718 | entry = | 740 | entry = |
719 | pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); | 741 | pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); |
720 | } else { | 742 | } else { |
721 | entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot)); | 743 | entry = huge_pte_wrprotect(mk_pte(page, vma->vm_page_prot)); |
722 | } | 744 | } |
723 | entry = pte_mkyoung(entry); | 745 | entry = pte_mkyoung(entry); |
724 | entry = pte_mkhuge(entry); | 746 | entry = pte_mkhuge(entry); |
@@ -731,8 +753,8 @@ static void set_huge_ptep_writable(struct vm_area_struct *vma, | |||
731 | { | 753 | { |
732 | pte_t entry; | 754 | pte_t entry; |
733 | 755 | ||
734 | entry = pte_mkwrite(pte_mkdirty(*ptep)); | 756 | entry = pte_mkwrite(pte_mkdirty(huge_ptep_get(ptep))); |
735 | if (ptep_set_access_flags(vma, address, ptep, entry, 1)) { | 757 | if (huge_ptep_set_access_flags(vma, address, ptep, entry, 1)) { |
736 | update_mmu_cache(vma, address, entry); | 758 | update_mmu_cache(vma, address, entry); |
737 | } | 759 | } |
738 | } | 760 | } |
@@ -762,10 +784,10 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, | |||
762 | 784 | ||
763 | spin_lock(&dst->page_table_lock); | 785 | spin_lock(&dst->page_table_lock); |
764 | spin_lock(&src->page_table_lock); | 786 | spin_lock(&src->page_table_lock); |
765 | if (!pte_none(*src_pte)) { | 787 | if (!huge_pte_none(huge_ptep_get(src_pte))) { |
766 | if (cow) | 788 | if (cow) |
767 | ptep_set_wrprotect(src, addr, src_pte); | 789 | huge_ptep_set_wrprotect(src, addr, src_pte); |
768 | entry = *src_pte; | 790 | entry = huge_ptep_get(src_pte); |
769 | ptepage = pte_page(entry); | 791 | ptepage = pte_page(entry); |
770 | get_page(ptepage); | 792 | get_page(ptepage); |
771 | set_huge_pte_at(dst, addr, dst_pte, entry); | 793 | set_huge_pte_at(dst, addr, dst_pte, entry); |
@@ -809,7 +831,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, | |||
809 | continue; | 831 | continue; |
810 | 832 | ||
811 | pte = huge_ptep_get_and_clear(mm, address, ptep); | 833 | pte = huge_ptep_get_and_clear(mm, address, ptep); |
812 | if (pte_none(pte)) | 834 | if (huge_pte_none(pte)) |
813 | continue; | 835 | continue; |
814 | 836 | ||
815 | page = pte_page(pte); | 837 | page = pte_page(pte); |
@@ -873,8 +895,9 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma, | |||
873 | spin_lock(&mm->page_table_lock); | 895 | spin_lock(&mm->page_table_lock); |
874 | 896 | ||
875 | ptep = huge_pte_offset(mm, address & HPAGE_MASK); | 897 | ptep = huge_pte_offset(mm, address & HPAGE_MASK); |
876 | if (likely(pte_same(*ptep, pte))) { | 898 | if (likely(pte_same(huge_ptep_get(ptep), pte))) { |
877 | /* Break COW */ | 899 | /* Break COW */ |
900 | huge_ptep_clear_flush(vma, address, ptep); | ||
878 | set_huge_pte_at(mm, address, ptep, | 901 | set_huge_pte_at(mm, address, ptep, |
879 | make_huge_pte(vma, new_page, 1)); | 902 | make_huge_pte(vma, new_page, 1)); |
880 | /* Make the old page be freed below */ | 903 | /* Make the old page be freed below */ |
@@ -942,7 +965,7 @@ retry: | |||
942 | goto backout; | 965 | goto backout; |
943 | 966 | ||
944 | ret = 0; | 967 | ret = 0; |
945 | if (!pte_none(*ptep)) | 968 | if (!huge_pte_none(huge_ptep_get(ptep))) |
946 | goto backout; | 969 | goto backout; |
947 | 970 | ||
948 | new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE) | 971 | new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE) |
@@ -984,8 +1007,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
984 | * the same page in the page cache. | 1007 | * the same page in the page cache. |
985 | */ | 1008 | */ |
986 | mutex_lock(&hugetlb_instantiation_mutex); | 1009 | mutex_lock(&hugetlb_instantiation_mutex); |
987 | entry = *ptep; | 1010 | entry = huge_ptep_get(ptep); |
988 | if (pte_none(entry)) { | 1011 | if (huge_pte_none(entry)) { |
989 | ret = hugetlb_no_page(mm, vma, address, ptep, write_access); | 1012 | ret = hugetlb_no_page(mm, vma, address, ptep, write_access); |
990 | mutex_unlock(&hugetlb_instantiation_mutex); | 1013 | mutex_unlock(&hugetlb_instantiation_mutex); |
991 | return ret; | 1014 | return ret; |
@@ -995,7 +1018,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
995 | 1018 | ||
996 | spin_lock(&mm->page_table_lock); | 1019 | spin_lock(&mm->page_table_lock); |
997 | /* Check for a racing update before calling hugetlb_cow */ | 1020 | /* Check for a racing update before calling hugetlb_cow */ |
998 | if (likely(pte_same(entry, *ptep))) | 1021 | if (likely(pte_same(entry, huge_ptep_get(ptep)))) |
999 | if (write_access && !pte_write(entry)) | 1022 | if (write_access && !pte_write(entry)) |
1000 | ret = hugetlb_cow(mm, vma, address, ptep, entry); | 1023 | ret = hugetlb_cow(mm, vma, address, ptep, entry); |
1001 | spin_unlock(&mm->page_table_lock); | 1024 | spin_unlock(&mm->page_table_lock); |
@@ -1025,7 +1048,8 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1025 | */ | 1048 | */ |
1026 | pte = huge_pte_offset(mm, vaddr & HPAGE_MASK); | 1049 | pte = huge_pte_offset(mm, vaddr & HPAGE_MASK); |
1027 | 1050 | ||
1028 | if (!pte || pte_none(*pte) || (write && !pte_write(*pte))) { | 1051 | if (!pte || huge_pte_none(huge_ptep_get(pte)) || |
1052 | (write && !pte_write(huge_ptep_get(pte)))) { | ||
1029 | int ret; | 1053 | int ret; |
1030 | 1054 | ||
1031 | spin_unlock(&mm->page_table_lock); | 1055 | spin_unlock(&mm->page_table_lock); |
@@ -1041,7 +1065,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1041 | } | 1065 | } |
1042 | 1066 | ||
1043 | pfn_offset = (vaddr & ~HPAGE_MASK) >> PAGE_SHIFT; | 1067 | pfn_offset = (vaddr & ~HPAGE_MASK) >> PAGE_SHIFT; |
1044 | page = pte_page(*pte); | 1068 | page = pte_page(huge_ptep_get(pte)); |
1045 | same_page: | 1069 | same_page: |
1046 | if (pages) { | 1070 | if (pages) { |
1047 | get_page(page); | 1071 | get_page(page); |
@@ -1090,7 +1114,7 @@ void hugetlb_change_protection(struct vm_area_struct *vma, | |||
1090 | continue; | 1114 | continue; |
1091 | if (huge_pmd_unshare(mm, &address, ptep)) | 1115 | if (huge_pmd_unshare(mm, &address, ptep)) |
1092 | continue; | 1116 | continue; |
1093 | if (!pte_none(*ptep)) { | 1117 | if (!huge_pte_none(huge_ptep_get(ptep))) { |
1094 | pte = huge_ptep_get_and_clear(mm, address, ptep); | 1118 | pte = huge_ptep_get_and_clear(mm, address, ptep); |
1095 | pte = pte_mkhuge(pte_modify(pte, newprot)); | 1119 | pte = pte_mkhuge(pte_modify(pte, newprot)); |
1096 | set_huge_pte_at(mm, address, ptep, pte); | 1120 | set_huge_pte_at(mm, address, ptep, pte); |