diff options
-rw-r--r-- | fs/proc/task_mmu.c | 73 | ||||
-rw-r--r-- | include/linux/huge_mm.h | 17 | ||||
-rw-r--r-- | mm/huge_memory.c | 125 |
3 files changed, 101 insertions, 114 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 95264c0ef308..328843de6e9f 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -394,20 +394,11 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
394 | pte_t *pte; | 394 | pte_t *pte; |
395 | spinlock_t *ptl; | 395 | spinlock_t *ptl; |
396 | 396 | ||
397 | spin_lock(&walk->mm->page_table_lock); | 397 | if (pmd_trans_huge_lock(pmd, vma) == 1) { |
398 | if (pmd_trans_huge(*pmd)) { | 398 | smaps_pte_entry(*(pte_t *)pmd, addr, HPAGE_PMD_SIZE, walk); |
399 | if (pmd_trans_splitting(*pmd)) { | ||
400 | spin_unlock(&walk->mm->page_table_lock); | ||
401 | wait_split_huge_page(vma->anon_vma, pmd); | ||
402 | } else { | ||
403 | smaps_pte_entry(*(pte_t *)pmd, addr, | ||
404 | HPAGE_PMD_SIZE, walk); | ||
405 | spin_unlock(&walk->mm->page_table_lock); | ||
406 | mss->anonymous_thp += HPAGE_PMD_SIZE; | ||
407 | return 0; | ||
408 | } | ||
409 | } else { | ||
410 | spin_unlock(&walk->mm->page_table_lock); | 399 | spin_unlock(&walk->mm->page_table_lock); |
400 | mss->anonymous_thp += HPAGE_PMD_SIZE; | ||
401 | return 0; | ||
411 | } | 402 | } |
412 | 403 | ||
413 | if (pmd_trans_unstable(pmd)) | 404 | if (pmd_trans_unstable(pmd)) |
@@ -705,26 +696,19 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
705 | /* find the first VMA at or above 'addr' */ | 696 | /* find the first VMA at or above 'addr' */ |
706 | vma = find_vma(walk->mm, addr); | 697 | vma = find_vma(walk->mm, addr); |
707 | spin_lock(&walk->mm->page_table_lock); | 698 | spin_lock(&walk->mm->page_table_lock); |
708 | if (pmd_trans_huge(*pmd)) { | 699 | if (pmd_trans_huge_lock(pmd, vma) == 1) { |
709 | if (pmd_trans_splitting(*pmd)) { | 700 | for (; addr != end; addr += PAGE_SIZE) { |
710 | spin_unlock(&walk->mm->page_table_lock); | 701 | unsigned long offset; |
711 | wait_split_huge_page(vma->anon_vma, pmd); | 702 | |
712 | } else { | 703 | offset = (addr & ~PAGEMAP_WALK_MASK) >> |
713 | for (; addr != end; addr += PAGE_SIZE) { | 704 | PAGE_SHIFT; |
714 | unsigned long offset; | 705 | pfn = thp_pmd_to_pagemap_entry(*pmd, offset); |
715 | 706 | err = add_to_pagemap(addr, pfn, pm); | |
716 | offset = (addr & ~PAGEMAP_WALK_MASK) >> | 707 | if (err) |
717 | PAGE_SHIFT; | 708 | break; |
718 | pfn = thp_pmd_to_pagemap_entry(*pmd, offset); | ||
719 | err = add_to_pagemap(addr, pfn, pm); | ||
720 | if (err) | ||
721 | break; | ||
722 | } | ||
723 | spin_unlock(&walk->mm->page_table_lock); | ||
724 | return err; | ||
725 | } | 709 | } |
726 | } else { | ||
727 | spin_unlock(&walk->mm->page_table_lock); | 710 | spin_unlock(&walk->mm->page_table_lock); |
711 | return err; | ||
728 | } | 712 | } |
729 | 713 | ||
730 | for (; addr != end; addr += PAGE_SIZE) { | 714 | for (; addr != end; addr += PAGE_SIZE) { |
@@ -992,24 +976,17 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, | |||
992 | pte_t *pte; | 976 | pte_t *pte; |
993 | 977 | ||
994 | md = walk->private; | 978 | md = walk->private; |
995 | spin_lock(&walk->mm->page_table_lock); | 979 | |
996 | if (pmd_trans_huge(*pmd)) { | 980 | if (pmd_trans_huge_lock(pmd, md->vma) == 1) { |
997 | if (pmd_trans_splitting(*pmd)) { | 981 | pte_t huge_pte = *(pte_t *)pmd; |
998 | spin_unlock(&walk->mm->page_table_lock); | 982 | struct page *page; |
999 | wait_split_huge_page(md->vma->anon_vma, pmd); | 983 | |
1000 | } else { | 984 | page = can_gather_numa_stats(huge_pte, md->vma, addr); |
1001 | pte_t huge_pte = *(pte_t *)pmd; | 985 | if (page) |
1002 | struct page *page; | 986 | gather_stats(page, md, pte_dirty(huge_pte), |
1003 | 987 | HPAGE_PMD_SIZE/PAGE_SIZE); | |
1004 | page = can_gather_numa_stats(huge_pte, md->vma, addr); | ||
1005 | if (page) | ||
1006 | gather_stats(page, md, pte_dirty(huge_pte), | ||
1007 | HPAGE_PMD_SIZE/PAGE_SIZE); | ||
1008 | spin_unlock(&walk->mm->page_table_lock); | ||
1009 | return 0; | ||
1010 | } | ||
1011 | } else { | ||
1012 | spin_unlock(&walk->mm->page_table_lock); | 988 | spin_unlock(&walk->mm->page_table_lock); |
989 | return 0; | ||
1013 | } | 990 | } |
1014 | 991 | ||
1015 | if (pmd_trans_unstable(pmd)) | 992 | if (pmd_trans_unstable(pmd)) |
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 1b921299abc4..f56cacb4fec3 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h | |||
@@ -113,6 +113,18 @@ extern void __vma_adjust_trans_huge(struct vm_area_struct *vma, | |||
113 | unsigned long start, | 113 | unsigned long start, |
114 | unsigned long end, | 114 | unsigned long end, |
115 | long adjust_next); | 115 | long adjust_next); |
116 | extern int __pmd_trans_huge_lock(pmd_t *pmd, | ||
117 | struct vm_area_struct *vma); | ||
118 | /* mmap_sem must be held on entry */ | ||
119 | static inline int pmd_trans_huge_lock(pmd_t *pmd, | ||
120 | struct vm_area_struct *vma) | ||
121 | { | ||
122 | VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem)); | ||
123 | if (pmd_trans_huge(*pmd)) | ||
124 | return __pmd_trans_huge_lock(pmd, vma); | ||
125 | else | ||
126 | return 0; | ||
127 | } | ||
116 | static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, | 128 | static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, |
117 | unsigned long start, | 129 | unsigned long start, |
118 | unsigned long end, | 130 | unsigned long end, |
@@ -176,6 +188,11 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, | |||
176 | long adjust_next) | 188 | long adjust_next) |
177 | { | 189 | { |
178 | } | 190 | } |
191 | static inline int pmd_trans_huge_lock(pmd_t *pmd, | ||
192 | struct vm_area_struct *vma) | ||
193 | { | ||
194 | return 0; | ||
195 | } | ||
179 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ | 196 | #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ |
180 | 197 | ||
181 | #endif /* _LINUX_HUGE_MM_H */ | 198 | #endif /* _LINUX_HUGE_MM_H */ |
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 8f7fc394f636..f0e5306eeb55 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -1031,32 +1031,23 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
1031 | { | 1031 | { |
1032 | int ret = 0; | 1032 | int ret = 0; |
1033 | 1033 | ||
1034 | spin_lock(&tlb->mm->page_table_lock); | 1034 | if (__pmd_trans_huge_lock(pmd, vma) == 1) { |
1035 | if (likely(pmd_trans_huge(*pmd))) { | 1035 | struct page *page; |
1036 | if (unlikely(pmd_trans_splitting(*pmd))) { | 1036 | pgtable_t pgtable; |
1037 | spin_unlock(&tlb->mm->page_table_lock); | 1037 | pgtable = get_pmd_huge_pte(tlb->mm); |
1038 | wait_split_huge_page(vma->anon_vma, | 1038 | page = pmd_page(*pmd); |
1039 | pmd); | 1039 | pmd_clear(pmd); |
1040 | } else { | 1040 | tlb_remove_pmd_tlb_entry(tlb, pmd, addr); |
1041 | struct page *page; | 1041 | page_remove_rmap(page); |
1042 | pgtable_t pgtable; | 1042 | VM_BUG_ON(page_mapcount(page) < 0); |
1043 | pgtable = get_pmd_huge_pte(tlb->mm); | 1043 | add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR); |
1044 | page = pmd_page(*pmd); | 1044 | VM_BUG_ON(!PageHead(page)); |
1045 | pmd_clear(pmd); | 1045 | tlb->mm->nr_ptes--; |
1046 | tlb_remove_pmd_tlb_entry(tlb, pmd, addr); | ||
1047 | page_remove_rmap(page); | ||
1048 | VM_BUG_ON(page_mapcount(page) < 0); | ||
1049 | add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR); | ||
1050 | VM_BUG_ON(!PageHead(page)); | ||
1051 | tlb->mm->nr_ptes--; | ||
1052 | spin_unlock(&tlb->mm->page_table_lock); | ||
1053 | tlb_remove_page(tlb, page); | ||
1054 | pte_free(tlb->mm, pgtable); | ||
1055 | ret = 1; | ||
1056 | } | ||
1057 | } else | ||
1058 | spin_unlock(&tlb->mm->page_table_lock); | 1046 | spin_unlock(&tlb->mm->page_table_lock); |
1059 | 1047 | tlb_remove_page(tlb, page); | |
1048 | pte_free(tlb->mm, pgtable); | ||
1049 | ret = 1; | ||
1050 | } | ||
1060 | return ret; | 1051 | return ret; |
1061 | } | 1052 | } |
1062 | 1053 | ||
@@ -1066,21 +1057,15 @@ int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
1066 | { | 1057 | { |
1067 | int ret = 0; | 1058 | int ret = 0; |
1068 | 1059 | ||
1069 | spin_lock(&vma->vm_mm->page_table_lock); | 1060 | if (__pmd_trans_huge_lock(pmd, vma) == 1) { |
1070 | if (likely(pmd_trans_huge(*pmd))) { | 1061 | /* |
1071 | ret = !pmd_trans_splitting(*pmd); | 1062 | * All logical pages in the range are present |
1072 | spin_unlock(&vma->vm_mm->page_table_lock); | 1063 | * if backed by a huge page. |
1073 | if (unlikely(!ret)) | 1064 | */ |
1074 | wait_split_huge_page(vma->anon_vma, pmd); | ||
1075 | else { | ||
1076 | /* | ||
1077 | * All logical pages in the range are present | ||
1078 | * if backed by a huge page. | ||
1079 | */ | ||
1080 | memset(vec, 1, (end - addr) >> PAGE_SHIFT); | ||
1081 | } | ||
1082 | } else | ||
1083 | spin_unlock(&vma->vm_mm->page_table_lock); | 1065 | spin_unlock(&vma->vm_mm->page_table_lock); |
1066 | memset(vec, 1, (end - addr) >> PAGE_SHIFT); | ||
1067 | ret = 1; | ||
1068 | } | ||
1084 | 1069 | ||
1085 | return ret; | 1070 | return ret; |
1086 | } | 1071 | } |
@@ -1110,20 +1095,11 @@ int move_huge_pmd(struct vm_area_struct *vma, struct vm_area_struct *new_vma, | |||
1110 | goto out; | 1095 | goto out; |
1111 | } | 1096 | } |
1112 | 1097 | ||
1113 | spin_lock(&mm->page_table_lock); | 1098 | ret = __pmd_trans_huge_lock(old_pmd, vma); |
1114 | if (likely(pmd_trans_huge(*old_pmd))) { | 1099 | if (ret == 1) { |
1115 | if (pmd_trans_splitting(*old_pmd)) { | 1100 | pmd = pmdp_get_and_clear(mm, old_addr, old_pmd); |
1116 | spin_unlock(&mm->page_table_lock); | 1101 | VM_BUG_ON(!pmd_none(*new_pmd)); |
1117 | wait_split_huge_page(vma->anon_vma, old_pmd); | 1102 | set_pmd_at(mm, new_addr, new_pmd, pmd); |
1118 | ret = -1; | ||
1119 | } else { | ||
1120 | pmd = pmdp_get_and_clear(mm, old_addr, old_pmd); | ||
1121 | VM_BUG_ON(!pmd_none(*new_pmd)); | ||
1122 | set_pmd_at(mm, new_addr, new_pmd, pmd); | ||
1123 | spin_unlock(&mm->page_table_lock); | ||
1124 | ret = 1; | ||
1125 | } | ||
1126 | } else { | ||
1127 | spin_unlock(&mm->page_table_lock); | 1103 | spin_unlock(&mm->page_table_lock); |
1128 | } | 1104 | } |
1129 | out: | 1105 | out: |
@@ -1136,24 +1112,41 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, | |||
1136 | struct mm_struct *mm = vma->vm_mm; | 1112 | struct mm_struct *mm = vma->vm_mm; |
1137 | int ret = 0; | 1113 | int ret = 0; |
1138 | 1114 | ||
1139 | spin_lock(&mm->page_table_lock); | 1115 | if (__pmd_trans_huge_lock(pmd, vma) == 1) { |
1116 | pmd_t entry; | ||
1117 | entry = pmdp_get_and_clear(mm, addr, pmd); | ||
1118 | entry = pmd_modify(entry, newprot); | ||
1119 | set_pmd_at(mm, addr, pmd, entry); | ||
1120 | spin_unlock(&vma->vm_mm->page_table_lock); | ||
1121 | ret = 1; | ||
1122 | } | ||
1123 | |||
1124 | return ret; | ||
1125 | } | ||
1126 | |||
1127 | /* | ||
1128 | * Returns 1 if a given pmd maps a stable (not under splitting) thp. | ||
1129 | * Returns -1 if it maps a thp under splitting. Returns 0 otherwise. | ||
1130 | * | ||
1131 | * Note that if it returns 1, this routine returns without unlocking page | ||
1132 | * table locks. So callers must unlock them. | ||
1133 | */ | ||
1134 | int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) | ||
1135 | { | ||
1136 | spin_lock(&vma->vm_mm->page_table_lock); | ||
1140 | if (likely(pmd_trans_huge(*pmd))) { | 1137 | if (likely(pmd_trans_huge(*pmd))) { |
1141 | if (unlikely(pmd_trans_splitting(*pmd))) { | 1138 | if (unlikely(pmd_trans_splitting(*pmd))) { |
1142 | spin_unlock(&mm->page_table_lock); | 1139 | spin_unlock(&vma->vm_mm->page_table_lock); |
1143 | wait_split_huge_page(vma->anon_vma, pmd); | 1140 | wait_split_huge_page(vma->anon_vma, pmd); |
1141 | return -1; | ||
1144 | } else { | 1142 | } else { |
1145 | pmd_t entry; | 1143 | /* Thp mapped by 'pmd' is stable, so we can |
1146 | 1144 | * handle it as it is. */ | |
1147 | entry = pmdp_get_and_clear(mm, addr, pmd); | 1145 | return 1; |
1148 | entry = pmd_modify(entry, newprot); | ||
1149 | set_pmd_at(mm, addr, pmd, entry); | ||
1150 | spin_unlock(&vma->vm_mm->page_table_lock); | ||
1151 | ret = 1; | ||
1152 | } | 1146 | } |
1153 | } else | 1147 | } |
1154 | spin_unlock(&vma->vm_mm->page_table_lock); | 1148 | spin_unlock(&vma->vm_mm->page_table_lock); |
1155 | 1149 | return 0; | |
1156 | return ret; | ||
1157 | } | 1150 | } |
1158 | 1151 | ||
1159 | pmd_t *page_check_address_pmd(struct page *page, | 1152 | pmd_t *page_check_address_pmd(struct page *page, |