aboutsummaryrefslogtreecommitdiffstats
path: root/mm/hugetlb.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/hugetlb.c')
-rw-r--r--mm/hugetlb.c78
1 files changed, 51 insertions, 27 deletions
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 51c9e2c01640..df28c1773fb2 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -95,13 +95,16 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
95 int nid; 95 int nid;
96 struct page *page = NULL; 96 struct page *page = NULL;
97 struct mempolicy *mpol; 97 struct mempolicy *mpol;
98 nodemask_t *nodemask;
98 struct zonelist *zonelist = huge_zonelist(vma, address, 99 struct zonelist *zonelist = huge_zonelist(vma, address,
99 htlb_alloc_mask, &mpol); 100 htlb_alloc_mask, &mpol, &nodemask);
100 struct zone **z; 101 struct zone *zone;
101 102 struct zoneref *z;
102 for (z = zonelist->zones; *z; z++) { 103
103 nid = zone_to_nid(*z); 104 for_each_zone_zonelist_nodemask(zone, z, zonelist,
104 if (cpuset_zone_allowed_softwall(*z, htlb_alloc_mask) && 105 MAX_NR_ZONES - 1, nodemask) {
106 nid = zone_to_nid(zone);
107 if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask) &&
105 !list_empty(&hugepage_freelists[nid])) { 108 !list_empty(&hugepage_freelists[nid])) {
106 page = list_entry(hugepage_freelists[nid].next, 109 page = list_entry(hugepage_freelists[nid].next,
107 struct page, lru); 110 struct page, lru);
@@ -113,7 +116,7 @@ static struct page *dequeue_huge_page_vma(struct vm_area_struct *vma,
113 break; 116 break;
114 } 117 }
115 } 118 }
116 mpol_free(mpol); /* unref if mpol !NULL */ 119 mpol_cond_put(mpol);
117 return page; 120 return page;
118} 121}
119 122
@@ -129,6 +132,7 @@ static void update_and_free_page(struct page *page)
129 } 132 }
130 set_compound_page_dtor(page, NULL); 133 set_compound_page_dtor(page, NULL);
131 set_page_refcounted(page); 134 set_page_refcounted(page);
135 arch_release_hugepage(page);
132 __free_pages(page, HUGETLB_PAGE_ORDER); 136 __free_pages(page, HUGETLB_PAGE_ORDER);
133} 137}
134 138
@@ -198,6 +202,10 @@ static struct page *alloc_fresh_huge_page_node(int nid)
198 htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|__GFP_NOWARN, 202 htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|__GFP_NOWARN,
199 HUGETLB_PAGE_ORDER); 203 HUGETLB_PAGE_ORDER);
200 if (page) { 204 if (page) {
205 if (arch_prepare_hugepage(page)) {
206 __free_pages(page, HUGETLB_PAGE_ORDER);
207 return 0;
208 }
201 set_compound_page_dtor(page, free_huge_page); 209 set_compound_page_dtor(page, free_huge_page);
202 spin_lock(&hugetlb_lock); 210 spin_lock(&hugetlb_lock);
203 nr_huge_pages++; 211 nr_huge_pages++;
@@ -239,6 +247,11 @@ static int alloc_fresh_huge_page(void)
239 hugetlb_next_nid = next_nid; 247 hugetlb_next_nid = next_nid;
240 } while (!page && hugetlb_next_nid != start_nid); 248 } while (!page && hugetlb_next_nid != start_nid);
241 249
250 if (ret)
251 count_vm_event(HTLB_BUDDY_PGALLOC);
252 else
253 count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
254
242 return ret; 255 return ret;
243} 256}
244 257
@@ -299,9 +312,11 @@ static struct page *alloc_buddy_huge_page(struct vm_area_struct *vma,
299 */ 312 */
300 nr_huge_pages_node[nid]++; 313 nr_huge_pages_node[nid]++;
301 surplus_huge_pages_node[nid]++; 314 surplus_huge_pages_node[nid]++;
315 __count_vm_event(HTLB_BUDDY_PGALLOC);
302 } else { 316 } else {
303 nr_huge_pages--; 317 nr_huge_pages--;
304 surplus_huge_pages--; 318 surplus_huge_pages--;
319 __count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
305 } 320 }
306 spin_unlock(&hugetlb_lock); 321 spin_unlock(&hugetlb_lock);
307 322
@@ -369,11 +384,19 @@ retry:
369 resv_huge_pages += delta; 384 resv_huge_pages += delta;
370 ret = 0; 385 ret = 0;
371free: 386free:
387 /* Free the needed pages to the hugetlb pool */
372 list_for_each_entry_safe(page, tmp, &surplus_list, lru) { 388 list_for_each_entry_safe(page, tmp, &surplus_list, lru) {
389 if ((--needed) < 0)
390 break;
373 list_del(&page->lru); 391 list_del(&page->lru);
374 if ((--needed) >= 0) 392 enqueue_huge_page(page);
375 enqueue_huge_page(page); 393 }
376 else { 394
395 /* Free unnecessary surplus pages to the buddy allocator */
396 if (!list_empty(&surplus_list)) {
397 spin_unlock(&hugetlb_lock);
398 list_for_each_entry_safe(page, tmp, &surplus_list, lru) {
399 list_del(&page->lru);
377 /* 400 /*
378 * The page has a reference count of zero already, so 401 * The page has a reference count of zero already, so
379 * call free_huge_page directly instead of using 402 * call free_huge_page directly instead of using
@@ -381,10 +404,9 @@ free:
381 * unlocked which is safe because free_huge_page takes 404 * unlocked which is safe because free_huge_page takes
382 * hugetlb_lock before deciding how to free the page. 405 * hugetlb_lock before deciding how to free the page.
383 */ 406 */
384 spin_unlock(&hugetlb_lock);
385 free_huge_page(page); 407 free_huge_page(page);
386 spin_lock(&hugetlb_lock);
387 } 408 }
409 spin_lock(&hugetlb_lock);
388 } 410 }
389 411
390 return ret; 412 return ret;
@@ -718,7 +740,7 @@ static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page,
718 entry = 740 entry =
719 pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); 741 pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
720 } else { 742 } else {
721 entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot)); 743 entry = huge_pte_wrprotect(mk_pte(page, vma->vm_page_prot));
722 } 744 }
723 entry = pte_mkyoung(entry); 745 entry = pte_mkyoung(entry);
724 entry = pte_mkhuge(entry); 746 entry = pte_mkhuge(entry);
@@ -731,8 +753,8 @@ static void set_huge_ptep_writable(struct vm_area_struct *vma,
731{ 753{
732 pte_t entry; 754 pte_t entry;
733 755
734 entry = pte_mkwrite(pte_mkdirty(*ptep)); 756 entry = pte_mkwrite(pte_mkdirty(huge_ptep_get(ptep)));
735 if (ptep_set_access_flags(vma, address, ptep, entry, 1)) { 757 if (huge_ptep_set_access_flags(vma, address, ptep, entry, 1)) {
736 update_mmu_cache(vma, address, entry); 758 update_mmu_cache(vma, address, entry);
737 } 759 }
738} 760}
@@ -762,10 +784,10 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
762 784
763 spin_lock(&dst->page_table_lock); 785 spin_lock(&dst->page_table_lock);
764 spin_lock(&src->page_table_lock); 786 spin_lock(&src->page_table_lock);
765 if (!pte_none(*src_pte)) { 787 if (!huge_pte_none(huge_ptep_get(src_pte))) {
766 if (cow) 788 if (cow)
767 ptep_set_wrprotect(src, addr, src_pte); 789 huge_ptep_set_wrprotect(src, addr, src_pte);
768 entry = *src_pte; 790 entry = huge_ptep_get(src_pte);
769 ptepage = pte_page(entry); 791 ptepage = pte_page(entry);
770 get_page(ptepage); 792 get_page(ptepage);
771 set_huge_pte_at(dst, addr, dst_pte, entry); 793 set_huge_pte_at(dst, addr, dst_pte, entry);
@@ -809,7 +831,7 @@ void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
809 continue; 831 continue;
810 832
811 pte = huge_ptep_get_and_clear(mm, address, ptep); 833 pte = huge_ptep_get_and_clear(mm, address, ptep);
812 if (pte_none(pte)) 834 if (huge_pte_none(pte))
813 continue; 835 continue;
814 836
815 page = pte_page(pte); 837 page = pte_page(pte);
@@ -873,8 +895,9 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
873 spin_lock(&mm->page_table_lock); 895 spin_lock(&mm->page_table_lock);
874 896
875 ptep = huge_pte_offset(mm, address & HPAGE_MASK); 897 ptep = huge_pte_offset(mm, address & HPAGE_MASK);
876 if (likely(pte_same(*ptep, pte))) { 898 if (likely(pte_same(huge_ptep_get(ptep), pte))) {
877 /* Break COW */ 899 /* Break COW */
900 huge_ptep_clear_flush(vma, address, ptep);
878 set_huge_pte_at(mm, address, ptep, 901 set_huge_pte_at(mm, address, ptep,
879 make_huge_pte(vma, new_page, 1)); 902 make_huge_pte(vma, new_page, 1));
880 /* Make the old page be freed below */ 903 /* Make the old page be freed below */
@@ -942,7 +965,7 @@ retry:
942 goto backout; 965 goto backout;
943 966
944 ret = 0; 967 ret = 0;
945 if (!pte_none(*ptep)) 968 if (!huge_pte_none(huge_ptep_get(ptep)))
946 goto backout; 969 goto backout;
947 970
948 new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE) 971 new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
@@ -984,8 +1007,8 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
984 * the same page in the page cache. 1007 * the same page in the page cache.
985 */ 1008 */
986 mutex_lock(&hugetlb_instantiation_mutex); 1009 mutex_lock(&hugetlb_instantiation_mutex);
987 entry = *ptep; 1010 entry = huge_ptep_get(ptep);
988 if (pte_none(entry)) { 1011 if (huge_pte_none(entry)) {
989 ret = hugetlb_no_page(mm, vma, address, ptep, write_access); 1012 ret = hugetlb_no_page(mm, vma, address, ptep, write_access);
990 mutex_unlock(&hugetlb_instantiation_mutex); 1013 mutex_unlock(&hugetlb_instantiation_mutex);
991 return ret; 1014 return ret;
@@ -995,7 +1018,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
995 1018
996 spin_lock(&mm->page_table_lock); 1019 spin_lock(&mm->page_table_lock);
997 /* Check for a racing update before calling hugetlb_cow */ 1020 /* Check for a racing update before calling hugetlb_cow */
998 if (likely(pte_same(entry, *ptep))) 1021 if (likely(pte_same(entry, huge_ptep_get(ptep))))
999 if (write_access && !pte_write(entry)) 1022 if (write_access && !pte_write(entry))
1000 ret = hugetlb_cow(mm, vma, address, ptep, entry); 1023 ret = hugetlb_cow(mm, vma, address, ptep, entry);
1001 spin_unlock(&mm->page_table_lock); 1024 spin_unlock(&mm->page_table_lock);
@@ -1025,7 +1048,8 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
1025 */ 1048 */
1026 pte = huge_pte_offset(mm, vaddr & HPAGE_MASK); 1049 pte = huge_pte_offset(mm, vaddr & HPAGE_MASK);
1027 1050
1028 if (!pte || pte_none(*pte) || (write && !pte_write(*pte))) { 1051 if (!pte || huge_pte_none(huge_ptep_get(pte)) ||
1052 (write && !pte_write(huge_ptep_get(pte)))) {
1029 int ret; 1053 int ret;
1030 1054
1031 spin_unlock(&mm->page_table_lock); 1055 spin_unlock(&mm->page_table_lock);
@@ -1041,7 +1065,7 @@ int follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
1041 } 1065 }
1042 1066
1043 pfn_offset = (vaddr & ~HPAGE_MASK) >> PAGE_SHIFT; 1067 pfn_offset = (vaddr & ~HPAGE_MASK) >> PAGE_SHIFT;
1044 page = pte_page(*pte); 1068 page = pte_page(huge_ptep_get(pte));
1045same_page: 1069same_page:
1046 if (pages) { 1070 if (pages) {
1047 get_page(page); 1071 get_page(page);
@@ -1090,7 +1114,7 @@ void hugetlb_change_protection(struct vm_area_struct *vma,
1090 continue; 1114 continue;
1091 if (huge_pmd_unshare(mm, &address, ptep)) 1115 if (huge_pmd_unshare(mm, &address, ptep))
1092 continue; 1116 continue;
1093 if (!pte_none(*ptep)) { 1117 if (!huge_pte_none(huge_ptep_get(ptep))) {
1094 pte = huge_ptep_get_and_clear(mm, address, ptep); 1118 pte = huge_ptep_get_and_clear(mm, address, ptep);
1095 pte = pte_mkhuge(pte_modify(pte, newprot)); 1119 pte = pte_mkhuge(pte_modify(pte, newprot));
1096 set_huge_pte_at(mm, address, ptep, pte); 1120 set_huge_pte_at(mm, address, ptep, pte);