aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/proc/task_mmu.c73
-rw-r--r--include/linux/huge_mm.h17
-rw-r--r--mm/huge_memory.c125
3 files changed, 101 insertions, 114 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 95264c0ef308..328843de6e9f 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -394,20 +394,11 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
394 pte_t *pte; 394 pte_t *pte;
395 spinlock_t *ptl; 395 spinlock_t *ptl;
396 396
397 spin_lock(&walk->mm->page_table_lock); 397 if (pmd_trans_huge_lock(pmd, vma) == 1) {
398 if (pmd_trans_huge(*pmd)) { 398 smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk);
399 if (pmd_trans_splitting(*pmd)) {
400 spin_unlock(&walk->mm->page_table_lock);
401 wait_split_huge_page(vma->anon_vma, pmd);
402 } else {
403 smaps_pte_entry(*(pte_t *)pmd, addr,
404 HPAGE_PMD_SIZE, walk);
405 spin_unlock(&walk->mm->page_table_lock);
406 mss->anonymous_thp += HPAGE_PMD_SIZE;
407 return 0;
408 }
409 } else {
410 spin_unlock(&walk->mm->page_table_lock); 399 spin_unlock(&walk->mm->page_table_lock);
400 mss->anonymous_thp += HPAGE_PMD_SIZE;
401 return 0;
411 } 402 }
412 403
413 if (pmd_trans_unstable(pmd)) 404 if (pmd_trans_unstable(pmd))
@@ -705,26 +696,19 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
705 /* find the first VMA at or above 'addr' */ 696 /* find the first VMA at or above 'addr' */
706 vma = find_vma(walk->mm, addr); 697 vma = find_vma(walk->mm, addr);
707 spin_lock(&walk->mm->page_table_lock); 698 spin_lock(&walk->mm->page_table_lock);
708 if (pmd_trans_huge(*pmd)) { 699 if (pmd_trans_huge_lock(pmd, vma) == 1) {
709 if (pmd_trans_splitting(*pmd)) { 700 for (; addr != end; addr += PAGE_SIZE) {
710 spin_unlock(&walk->mm->page_table_lock); 701 unsigned long offset;
711 wait_split_huge_page(vma->anon_vma, pmd); 702
712 } else { 703 offset = (addr & ~PAGEMAP_WALK_MASK) >>
713 for (; addr != end; addr += PAGE_SIZE) { 704 PAGE_SHIFT;
714 unsigned long offset; 705 pfn = thp_pmd_to_pagemap_entry(*pmd, offset);
715 706 err = add_to_pagemap(addr, pfn, pm);
716 offset = (addr & ~PAGEMAP_WALK_MASK) >> 707 if (err)
717 PAGE_SHIFT; 708 break;
718 pfn = thp_pmd_to_pagemap_entry(*pmd, offset);
719 err = add_to_pagemap(addr, pfn, pm);
720 if (err)
721 break;
722 }
723 spin_unlock(&walk->mm->page_table_lock);
724 return err;
725 } 709 }
726 } else {
727 spin_unlock(&walk->mm->page_table_lock); 710 spin_unlock(&walk->mm->page_table_lock);
711 return err;
728 } 712 }
729 713
730 for (; addr != end; addr += PAGE_SIZE) { 714 for (; addr != end; addr += PAGE_SIZE) {
@@ -992,24 +976,17 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
992 pte_t *pte; 976 pte_t *pte;
993 977
994 md = walk->private; 978 md = walk->private;
995 spin_lock(&walk->mm->page_table_lock); 979
996 if (pmd_trans_huge(*pmd)) { 980 if (pmd_trans_huge_lock(pmd, md->vma) == 1) {
997 if (pmd_trans_splitting(*pmd)) { 981 pte_t huge_pte = *(pte_t *)pmd;
998 spin_unlock(&walk->mm->page_table_lock); 982 struct page *page;
999 wait_split_huge_page(md->vma->anon_vma, pmd); 983
1000 } else { 984 page = can_gather_numa_stats(huge_pte, md->vma, addr);
1001 pte_t huge_pte = *(pte_t *)pmd; 985 if (page)
1002 struct page *page; 986 gather_stats(page, md, pte_dirty(huge_pte),
1003 987 HPAGE_PMD_SIZE/PAGE_SIZE);
1004 page = can_gather_numa_stats(huge_pte, md->vma, addr);
1005 if (page)
1006 gather_stats(page, md, pte_dirty(huge_pte),
1007 HPAGE_PMD_SIZE/PAGE_SIZE);
1008 spin_unlock(&walk->mm->page_table_lock);
1009 return 0;
1010 }
1011 } else {
1012 spin_unlock(&walk->mm->page_table_lock); 988 spin_unlock(&walk->mm->page_table_lock);
989 return 0;
1013 } 990 }
1014 991
1015 if (pmd_trans_unstable(pmd)) 992 if (pmd_trans_unstable(pmd))
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 1b921299abc4..f56cacb4fec3 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -113,6 +113,18 @@ extern void __vma_adjust_trans_huge(struct vm_area_struct *vma,
113 unsigned long start, 113 unsigned long start,
114 unsigned long end, 114 unsigned long end,
115 long adjust_next); 115 long adjust_next);
116extern int __pmd_trans_huge_lock(pmd_t *pmd,
117 struct vm_area_struct *vma);
118/* mmap_sem must be held on entry */
119static inline int pmd_trans_huge_lock(pmd_t *pmd,
120 struct vm_area_struct *vma)
121{
122 VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem));
123 if (pmd_trans_huge(*pmd))
124 return __pmd_trans_huge_lock(pmd, vma);
125 else
126 return 0;
127}
116static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, 128static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
117 unsigned long start, 129 unsigned long start,
118 unsigned long end, 130 unsigned long end,
@@ -176,6 +188,11 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma,
176 long adjust_next) 188 long adjust_next)
177{ 189{
178} 190}
191static inline int pmd_trans_huge_lock(pmd_t *pmd,
192 struct vm_area_struct *vma)
193{
194 return 0;
195}
179#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ 196#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
180 197
181#endif /* _LINUX_HUGE_MM_H */ 198#endif /* _LINUX_HUGE_MM_H */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 8f7fc394f636..f0e5306eeb55 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1031,32 +1031,23 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
1031{ 1031{
1032 int ret = 0; 1032 int ret = 0;
1033 1033
1034 spin_lock(&tlb->mm->page_table_lock); 1034 if (__pmd_trans_huge_lock(pmd, vma) == 1) {
1035 if (likely(pmd_trans_huge(*pmd))) { 1035 struct page *page;
1036 if (unlikely(pmd_trans_splitting(*pmd))) { 1036 pgtable_t pgtable;
1037 spin_unlock(&tlb->mm->page_table_lock); 1037 pgtable = get_pmd_huge_pte(tlb->mm);
1038 wait_split_huge_page(vma->anon_vma, 1038 page = pmd_page(*pmd);
1039 pmd); 1039 pmd_clear(pmd);
1040 } else { 1040 tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
1041 struct page *page; 1041 page_remove_rmap(page);
1042 pgtable_t pgtable; 1042 VM_BUG_ON(page_mapcount(page) < 0);
1043 pgtable = get_pmd_huge_pte(tlb->mm); 1043 add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
1044 page = pmd_page(*pmd); 1044 VM_BUG_ON(!PageHead(page));
1045 pmd_clear(pmd); 1045 tlb->mm->nr_ptes--;
1046 tlb_remove_pmd_tlb_entry(tlb, pmd, addr);
1047 page_remove_rmap(page);
1048 VM_BUG_ON(page_mapcount(page) < 0);
1049 add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR);
1050 VM_BUG_ON(!PageHead(page));
1051 tlb->mm->nr_ptes--;
1052 spin_unlock(&tlb->mm->page_table_lock);
1053 tlb_remove_page(tlb, page);
1054 pte_free(tlb->mm, pgtable);
1055 ret = 1;
1056 }
1057 } else
1058 spin_unlock(&tlb->mm->page_table_lock); 1046 spin_unlock(&tlb->mm->page_table_lock);
1059 1047 tlb_remove_page(tlb, page);
1048 pte_free(tlb->mm, pgtable);
1049 ret = 1;
1050 }
1060 return ret; 1051 return ret;
1061} 1052}
1062 1053
@@ -1066,21 +1057,15 @@ int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
1066{ 1057{
1067 int ret = 0; 1058 int ret = 0;
1068 1059
1069 spin_lock(&vma->vm_mm->page_table_lock); 1060 if (__pmd_trans_huge_lock(pmd, vma) == 1) {
1070 if (likely(pmd_trans_huge(*pmd))) { 1061 /*
1071 ret = !pmd_trans_splitting(*pmd); 1062 * All logical pages in the range are present
1072 spin_unlock(&vma->vm_mm->page_table_lock); 1063 * if backed by a huge page.
1073 if (unlikely(!ret)) 1064 */
1074 wait_split_huge_page(vma->anon_vma, pmd);
1075 else {
1076 /*
1077 * All logical pages in the range are present
1078 * if backed by a huge page.
1079 */
1080 memset(vec, 1, (end - addr) >> PAGE_SHIFT);
1081 }
1082 } else
1083 spin_unlock(&vma->vm_mm->page_table_lock); 1065 spin_unlock(&vma->vm_mm->page_table_lock);
1066 memset(vec, 1, (end - addr) >> PAGE_SHIFT);
1067 ret = 1;
1068 }
1084 1069
1085 return ret; 1070 return ret;
1086} 1071}
@@ -1110,20 +1095,11 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma,
1110 goto out; 1095 goto out;
1111 } 1096 }
1112 1097
1113 spin_lock(&mm->page_table_lock); 1098 ret = __pmd_trans_huge_lock(old_pmd, vma);
1114 if (likely(pmd_trans_huge(*old_pmd))) { 1099 if (ret == 1) {
1115 if (pmd_trans_splitting(*old_pmd)) { 1100 pmd = pmdp_get_and_clear(mm, old_addr, old_pmd);
1116 spin_unlock(&mm->page_table_lock); 1101 VM_BUG_ON(!pmd_none(*new_pmd));
1117 wait_split_huge_page(vma->anon_vma, old_pmd); 1102 set_pmd_at(mm, new_addr, new_pmd, pmd);
1118 ret = -1;
1119 } else {
1120 pmd = pmdp_get_and_clear(mm, old_addr, old_pmd);
1121 VM_BUG_ON(!pmd_none(*new_pmd));
1122 set_pmd_at(mm, new_addr, new_pmd, pmd);
1123 spin_unlock(&mm->page_table_lock);
1124 ret = 1;
1125 }
1126 } else {
1127 spin_unlock(&mm->page_table_lock); 1103 spin_unlock(&mm->page_table_lock);
1128 } 1104 }
1129out: 1105out:
@@ -1136,24 +1112,41 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
1136 struct mm_struct *mm = vma->vm_mm; 1112 struct mm_struct *mm = vma->vm_mm;
1137 int ret = 0; 1113 int ret = 0;
1138 1114
1139 spin_lock(&mm->page_table_lock); 1115 if (__pmd_trans_huge_lock(pmd, vma) == 1) {
1116 pmd_t entry;
1117 entry = pmdp_get_and_clear(mm, addr, pmd);
1118 entry = pmd_modify(entry, newprot);
1119 set_pmd_at(mm, addr, pmd, entry);
1120 spin_unlock(&vma->vm_mm->page_table_lock);
1121 ret = 1;
1122 }
1123
1124 return ret;
1125}
1126
1127/*
1128 * Returns 1 if a given pmd maps a stable (not under splitting) thp.
1129 * Returns -1 if it maps a thp under splitting. Returns 0 otherwise.
1130 *
1131 * Note that if it returns 1, this routine returns without unlocking page
1132 * table locks. So callers must unlock them.
1133 */
1134int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma)
1135{
1136 spin_lock(&vma->vm_mm->page_table_lock);
1140 if (likely(pmd_trans_huge(*pmd))) { 1137 if (likely(pmd_trans_huge(*pmd))) {
1141 if (unlikely(pmd_trans_splitting(*pmd))) { 1138 if (unlikely(pmd_trans_splitting(*pmd))) {
1142 spin_unlock(&mm->page_table_lock); 1139 spin_unlock(&vma->vm_mm->page_table_lock);
1143 wait_split_huge_page(vma->anon_vma, pmd); 1140 wait_split_huge_page(vma->anon_vma, pmd);
1141 return -1;
1144 } else { 1142 } else {
1145 pmd_t entry; 1143 /* Thp mapped by 'pmd' is stable, so we can
1146 1144 * handle it as it is. */
1147 entry = pmdp_get_and_clear(mm, addr, pmd); 1145 return 1;
1148 entry = pmd_modify(entry, newprot);
1149 set_pmd_at(mm, addr, pmd, entry);
1150 spin_unlock(&vma->vm_mm->page_table_lock);
1151 ret = 1;
1152 } 1146 }
1153 } else 1147 }
1154 spin_unlock(&vma->vm_mm->page_table_lock); 1148 spin_unlock(&vma->vm_mm->page_table_lock);
1155 1149 return 0;
1156 return ret;
1157} 1150}
1158 1151
1159pmd_t *page_check_address_pmd(struct page *page, 1152pmd_t *page_check_address_pmd(struct page *page,