diff options
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 441 |
1 files changed, 211 insertions, 230 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 141dbb695097..40f17c34b415 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/khugepaged.h> | 17 | #include <linux/khugepaged.h> |
18 | #include <linux/freezer.h> | 18 | #include <linux/freezer.h> |
19 | #include <linux/mman.h> | 19 | #include <linux/mman.h> |
20 | #include <linux/pagemap.h> | ||
20 | #include <asm/tlb.h> | 21 | #include <asm/tlb.h> |
21 | #include <asm/pgalloc.h> | 22 | #include <asm/pgalloc.h> |
22 | #include "internal.h" | 23 | #include "internal.h" |
@@ -102,10 +103,7 @@ static int set_recommended_min_free_kbytes(void) | |||
102 | unsigned long recommended_min; | 103 | unsigned long recommended_min; |
103 | extern int min_free_kbytes; | 104 | extern int min_free_kbytes; |
104 | 105 | ||
105 | if (!test_bit(TRANSPARENT_HUGEPAGE_FLAG, | 106 | if (!khugepaged_enabled()) |
106 | &transparent_hugepage_flags) && | ||
107 | !test_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, | ||
108 | &transparent_hugepage_flags)) | ||
109 | return 0; | 107 | return 0; |
110 | 108 | ||
111 | for_each_populated_zone(zone) | 109 | for_each_populated_zone(zone) |
@@ -139,12 +137,6 @@ static int start_khugepaged(void) | |||
139 | { | 137 | { |
140 | int err = 0; | 138 | int err = 0; |
141 | if (khugepaged_enabled()) { | 139 | if (khugepaged_enabled()) { |
142 | int wakeup; | ||
143 | if (unlikely(!mm_slot_cache || !mm_slots_hash)) { | ||
144 | err = -ENOMEM; | ||
145 | goto out; | ||
146 | } | ||
147 | mutex_lock(&khugepaged_mutex); | ||
148 | if (!khugepaged_thread) | 140 | if (!khugepaged_thread) |
149 | khugepaged_thread = kthread_run(khugepaged, NULL, | 141 | khugepaged_thread = kthread_run(khugepaged, NULL, |
150 | "khugepaged"); | 142 | "khugepaged"); |
@@ -154,16 +146,16 @@ static int start_khugepaged(void) | |||
154 | err = PTR_ERR(khugepaged_thread); | 146 | err = PTR_ERR(khugepaged_thread); |
155 | khugepaged_thread = NULL; | 147 | khugepaged_thread = NULL; |
156 | } | 148 | } |
157 | wakeup = !list_empty(&khugepaged_scan.mm_head); | 149 | |
158 | mutex_unlock(&khugepaged_mutex); | 150 | if (!list_empty(&khugepaged_scan.mm_head)) |
159 | if (wakeup) | ||
160 | wake_up_interruptible(&khugepaged_wait); | 151 | wake_up_interruptible(&khugepaged_wait); |
161 | 152 | ||
162 | set_recommended_min_free_kbytes(); | 153 | set_recommended_min_free_kbytes(); |
163 | } else | 154 | } else if (khugepaged_thread) { |
164 | /* wakeup to exit */ | 155 | kthread_stop(khugepaged_thread); |
165 | wake_up_interruptible(&khugepaged_wait); | 156 | khugepaged_thread = NULL; |
166 | out: | 157 | } |
158 | |||
167 | return err; | 159 | return err; |
168 | } | 160 | } |
169 | 161 | ||
@@ -224,18 +216,16 @@ static ssize_t enabled_store(struct kobject *kobj, | |||
224 | TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG); | 216 | TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG); |
225 | 217 | ||
226 | if (ret > 0) { | 218 | if (ret > 0) { |
227 | int err = start_khugepaged(); | 219 | int err; |
220 | |||
221 | mutex_lock(&khugepaged_mutex); | ||
222 | err = start_khugepaged(); | ||
223 | mutex_unlock(&khugepaged_mutex); | ||
224 | |||
228 | if (err) | 225 | if (err) |
229 | ret = err; | 226 | ret = err; |
230 | } | 227 | } |
231 | 228 | ||
232 | if (ret > 0 && | ||
233 | (test_bit(TRANSPARENT_HUGEPAGE_FLAG, | ||
234 | &transparent_hugepage_flags) || | ||
235 | test_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, | ||
236 | &transparent_hugepage_flags))) | ||
237 | set_recommended_min_free_kbytes(); | ||
238 | |||
239 | return ret; | 229 | return ret; |
240 | } | 230 | } |
241 | static struct kobj_attribute enabled_attr = | 231 | static struct kobj_attribute enabled_attr = |
@@ -570,8 +560,6 @@ static int __init hugepage_init(void) | |||
570 | 560 | ||
571 | start_khugepaged(); | 561 | start_khugepaged(); |
572 | 562 | ||
573 | set_recommended_min_free_kbytes(); | ||
574 | |||
575 | return 0; | 563 | return 0; |
576 | out: | 564 | out: |
577 | hugepage_exit_sysfs(hugepage_kobj); | 565 | hugepage_exit_sysfs(hugepage_kobj); |
@@ -611,19 +599,6 @@ out: | |||
611 | } | 599 | } |
612 | __setup("transparent_hugepage=", setup_transparent_hugepage); | 600 | __setup("transparent_hugepage=", setup_transparent_hugepage); |
613 | 601 | ||
614 | static void prepare_pmd_huge_pte(pgtable_t pgtable, | ||
615 | struct mm_struct *mm) | ||
616 | { | ||
617 | assert_spin_locked(&mm->page_table_lock); | ||
618 | |||
619 | /* FIFO */ | ||
620 | if (!mm->pmd_huge_pte) | ||
621 | INIT_LIST_HEAD(&pgtable->lru); | ||
622 | else | ||
623 | list_add(&pgtable->lru, &mm->pmd_huge_pte->lru); | ||
624 | mm->pmd_huge_pte = pgtable; | ||
625 | } | ||
626 | |||
627 | static inline pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) | 602 | static inline pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) |
628 | { | 603 | { |
629 | if (likely(vma->vm_flags & VM_WRITE)) | 604 | if (likely(vma->vm_flags & VM_WRITE)) |
@@ -665,7 +640,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, | |||
665 | */ | 640 | */ |
666 | page_add_new_anon_rmap(page, vma, haddr); | 641 | page_add_new_anon_rmap(page, vma, haddr); |
667 | set_pmd_at(mm, haddr, pmd, entry); | 642 | set_pmd_at(mm, haddr, pmd, entry); |
668 | prepare_pmd_huge_pte(pgtable, mm); | 643 | pgtable_trans_huge_deposit(mm, pgtable); |
669 | add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); | 644 | add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); |
670 | mm->nr_ptes++; | 645 | mm->nr_ptes++; |
671 | spin_unlock(&mm->page_table_lock); | 646 | spin_unlock(&mm->page_table_lock); |
@@ -791,7 +766,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
791 | pmdp_set_wrprotect(src_mm, addr, src_pmd); | 766 | pmdp_set_wrprotect(src_mm, addr, src_pmd); |
792 | pmd = pmd_mkold(pmd_wrprotect(pmd)); | 767 | pmd = pmd_mkold(pmd_wrprotect(pmd)); |
793 | set_pmd_at(dst_mm, addr, dst_pmd, pmd); | 768 | set_pmd_at(dst_mm, addr, dst_pmd, pmd); |
794 | prepare_pmd_huge_pte(pgtable, dst_mm); | 769 | pgtable_trans_huge_deposit(dst_mm, pgtable); |
795 | dst_mm->nr_ptes++; | 770 | dst_mm->nr_ptes++; |
796 | 771 | ||
797 | ret = 0; | 772 | ret = 0; |
@@ -802,25 +777,6 @@ out: | |||
802 | return ret; | 777 | return ret; |
803 | } | 778 | } |
804 | 779 | ||
805 | /* no "address" argument so destroys page coloring of some arch */ | ||
806 | pgtable_t get_pmd_huge_pte(struct mm_struct *mm) | ||
807 | { | ||
808 | pgtable_t pgtable; | ||
809 | |||
810 | assert_spin_locked(&mm->page_table_lock); | ||
811 | |||
812 | /* FIFO */ | ||
813 | pgtable = mm->pmd_huge_pte; | ||
814 | if (list_empty(&pgtable->lru)) | ||
815 | mm->pmd_huge_pte = NULL; | ||
816 | else { | ||
817 | mm->pmd_huge_pte = list_entry(pgtable->lru.next, | ||
818 | struct page, lru); | ||
819 | list_del(&pgtable->lru); | ||
820 | } | ||
821 | return pgtable; | ||
822 | } | ||
823 | |||
824 | static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, | 780 | static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, |
825 | struct vm_area_struct *vma, | 781 | struct vm_area_struct *vma, |
826 | unsigned long address, | 782 | unsigned long address, |
@@ -832,6 +788,8 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, | |||
832 | pmd_t _pmd; | 788 | pmd_t _pmd; |
833 | int ret = 0, i; | 789 | int ret = 0, i; |
834 | struct page **pages; | 790 | struct page **pages; |
791 | unsigned long mmun_start; /* For mmu_notifiers */ | ||
792 | unsigned long mmun_end; /* For mmu_notifiers */ | ||
835 | 793 | ||
836 | pages = kmalloc(sizeof(struct page *) * HPAGE_PMD_NR, | 794 | pages = kmalloc(sizeof(struct page *) * HPAGE_PMD_NR, |
837 | GFP_KERNEL); | 795 | GFP_KERNEL); |
@@ -868,15 +826,19 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, | |||
868 | cond_resched(); | 826 | cond_resched(); |
869 | } | 827 | } |
870 | 828 | ||
829 | mmun_start = haddr; | ||
830 | mmun_end = haddr + HPAGE_PMD_SIZE; | ||
831 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | ||
832 | |||
871 | spin_lock(&mm->page_table_lock); | 833 | spin_lock(&mm->page_table_lock); |
872 | if (unlikely(!pmd_same(*pmd, orig_pmd))) | 834 | if (unlikely(!pmd_same(*pmd, orig_pmd))) |
873 | goto out_free_pages; | 835 | goto out_free_pages; |
874 | VM_BUG_ON(!PageHead(page)); | 836 | VM_BUG_ON(!PageHead(page)); |
875 | 837 | ||
876 | pmdp_clear_flush_notify(vma, haddr, pmd); | 838 | pmdp_clear_flush(vma, haddr, pmd); |
877 | /* leave pmd empty until pte is filled */ | 839 | /* leave pmd empty until pte is filled */ |
878 | 840 | ||
879 | pgtable = get_pmd_huge_pte(mm); | 841 | pgtable = pgtable_trans_huge_withdraw(mm); |
880 | pmd_populate(mm, &_pmd, pgtable); | 842 | pmd_populate(mm, &_pmd, pgtable); |
881 | 843 | ||
882 | for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) { | 844 | for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) { |
@@ -896,6 +858,8 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, | |||
896 | page_remove_rmap(page); | 858 | page_remove_rmap(page); |
897 | spin_unlock(&mm->page_table_lock); | 859 | spin_unlock(&mm->page_table_lock); |
898 | 860 | ||
861 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | ||
862 | |||
899 | ret |= VM_FAULT_WRITE; | 863 | ret |= VM_FAULT_WRITE; |
900 | put_page(page); | 864 | put_page(page); |
901 | 865 | ||
@@ -904,6 +868,7 @@ out: | |||
904 | 868 | ||
905 | out_free_pages: | 869 | out_free_pages: |
906 | spin_unlock(&mm->page_table_lock); | 870 | spin_unlock(&mm->page_table_lock); |
871 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | ||
907 | mem_cgroup_uncharge_start(); | 872 | mem_cgroup_uncharge_start(); |
908 | for (i = 0; i < HPAGE_PMD_NR; i++) { | 873 | for (i = 0; i < HPAGE_PMD_NR; i++) { |
909 | mem_cgroup_uncharge_page(pages[i]); | 874 | mem_cgroup_uncharge_page(pages[i]); |
@@ -920,6 +885,8 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
920 | int ret = 0; | 885 | int ret = 0; |
921 | struct page *page, *new_page; | 886 | struct page *page, *new_page; |
922 | unsigned long haddr; | 887 | unsigned long haddr; |
888 | unsigned long mmun_start; /* For mmu_notifiers */ | ||
889 | unsigned long mmun_end; /* For mmu_notifiers */ | ||
923 | 890 | ||
924 | VM_BUG_ON(!vma->anon_vma); | 891 | VM_BUG_ON(!vma->anon_vma); |
925 | spin_lock(&mm->page_table_lock); | 892 | spin_lock(&mm->page_table_lock); |
@@ -934,7 +901,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
934 | entry = pmd_mkyoung(orig_pmd); | 901 | entry = pmd_mkyoung(orig_pmd); |
935 | entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); | 902 | entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); |
936 | if (pmdp_set_access_flags(vma, haddr, pmd, entry, 1)) | 903 | if (pmdp_set_access_flags(vma, haddr, pmd, entry, 1)) |
937 | update_mmu_cache(vma, address, entry); | 904 | update_mmu_cache_pmd(vma, address, pmd); |
938 | ret |= VM_FAULT_WRITE; | 905 | ret |= VM_FAULT_WRITE; |
939 | goto out_unlock; | 906 | goto out_unlock; |
940 | } | 907 | } |
@@ -970,38 +937,47 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
970 | copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR); | 937 | copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR); |
971 | __SetPageUptodate(new_page); | 938 | __SetPageUptodate(new_page); |
972 | 939 | ||
940 | mmun_start = haddr; | ||
941 | mmun_end = haddr + HPAGE_PMD_SIZE; | ||
942 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | ||
943 | |||
973 | spin_lock(&mm->page_table_lock); | 944 | spin_lock(&mm->page_table_lock); |
974 | put_page(page); | 945 | put_page(page); |
975 | if (unlikely(!pmd_same(*pmd, orig_pmd))) { | 946 | if (unlikely(!pmd_same(*pmd, orig_pmd))) { |
976 | spin_unlock(&mm->page_table_lock); | 947 | spin_unlock(&mm->page_table_lock); |
977 | mem_cgroup_uncharge_page(new_page); | 948 | mem_cgroup_uncharge_page(new_page); |
978 | put_page(new_page); | 949 | put_page(new_page); |
979 | goto out; | 950 | goto out_mn; |
980 | } else { | 951 | } else { |
981 | pmd_t entry; | 952 | pmd_t entry; |
982 | VM_BUG_ON(!PageHead(page)); | 953 | VM_BUG_ON(!PageHead(page)); |
983 | entry = mk_pmd(new_page, vma->vm_page_prot); | 954 | entry = mk_pmd(new_page, vma->vm_page_prot); |
984 | entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); | 955 | entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); |
985 | entry = pmd_mkhuge(entry); | 956 | entry = pmd_mkhuge(entry); |
986 | pmdp_clear_flush_notify(vma, haddr, pmd); | 957 | pmdp_clear_flush(vma, haddr, pmd); |
987 | page_add_new_anon_rmap(new_page, vma, haddr); | 958 | page_add_new_anon_rmap(new_page, vma, haddr); |
988 | set_pmd_at(mm, haddr, pmd, entry); | 959 | set_pmd_at(mm, haddr, pmd, entry); |
989 | update_mmu_cache(vma, address, entry); | 960 | update_mmu_cache_pmd(vma, address, pmd); |
990 | page_remove_rmap(page); | 961 | page_remove_rmap(page); |
991 | put_page(page); | 962 | put_page(page); |
992 | ret |= VM_FAULT_WRITE; | 963 | ret |= VM_FAULT_WRITE; |
993 | } | 964 | } |
994 | out_unlock: | ||
995 | spin_unlock(&mm->page_table_lock); | 965 | spin_unlock(&mm->page_table_lock); |
966 | out_mn: | ||
967 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | ||
996 | out: | 968 | out: |
997 | return ret; | 969 | return ret; |
970 | out_unlock: | ||
971 | spin_unlock(&mm->page_table_lock); | ||
972 | return ret; | ||
998 | } | 973 | } |
999 | 974 | ||
1000 | struct page *follow_trans_huge_pmd(struct mm_struct *mm, | 975 | struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, |
1001 | unsigned long addr, | 976 | unsigned long addr, |
1002 | pmd_t *pmd, | 977 | pmd_t *pmd, |
1003 | unsigned int flags) | 978 | unsigned int flags) |
1004 | { | 979 | { |
980 | struct mm_struct *mm = vma->vm_mm; | ||
1005 | struct page *page = NULL; | 981 | struct page *page = NULL; |
1006 | 982 | ||
1007 | assert_spin_locked(&mm->page_table_lock); | 983 | assert_spin_locked(&mm->page_table_lock); |
@@ -1024,6 +1000,14 @@ struct page *follow_trans_huge_pmd(struct mm_struct *mm, | |||
1024 | _pmd = pmd_mkyoung(pmd_mkdirty(*pmd)); | 1000 | _pmd = pmd_mkyoung(pmd_mkdirty(*pmd)); |
1025 | set_pmd_at(mm, addr & HPAGE_PMD_MASK, pmd, _pmd); | 1001 | set_pmd_at(mm, addr & HPAGE_PMD_MASK, pmd, _pmd); |
1026 | } | 1002 | } |
1003 | if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) { | ||
1004 | if (page->mapping && trylock_page(page)) { | ||
1005 | lru_add_drain(); | ||
1006 | if (page->mapping) | ||
1007 | mlock_vma_page(page); | ||
1008 | unlock_page(page); | ||
1009 | } | ||
1010 | } | ||
1027 | page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT; | 1011 | page += (addr & ~HPAGE_PMD_MASK) >> PAGE_SHIFT; |
1028 | VM_BUG_ON(!PageCompound(page)); | 1012 | VM_BUG_ON(!PageCompound(page)); |
1029 | if (flags & FOLL_GET) | 1013 | if (flags & FOLL_GET) |
@@ -1041,9 +1025,10 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
1041 | if (__pmd_trans_huge_lock(pmd, vma) == 1) { | 1025 | if (__pmd_trans_huge_lock(pmd, vma) == 1) { |
1042 | struct page *page; | 1026 | struct page *page; |
1043 | pgtable_t pgtable; | 1027 | pgtable_t pgtable; |
1044 | pgtable = get_pmd_huge_pte(tlb->mm); | 1028 | pmd_t orig_pmd; |
1045 | page = pmd_page(*pmd); | 1029 | pgtable = pgtable_trans_huge_withdraw(tlb->mm); |
1046 | pmd_clear(pmd); | 1030 | orig_pmd = pmdp_get_and_clear(tlb->mm, addr, pmd); |
1031 | page = pmd_page(orig_pmd); | ||
1047 | tlb_remove_pmd_tlb_entry(tlb, pmd, addr); | 1032 | tlb_remove_pmd_tlb_entry(tlb, pmd, addr); |
1048 | page_remove_rmap(page); | 1033 | page_remove_rmap(page); |
1049 | VM_BUG_ON(page_mapcount(page) < 0); | 1034 | VM_BUG_ON(page_mapcount(page) < 0); |
@@ -1207,7 +1192,11 @@ static int __split_huge_page_splitting(struct page *page, | |||
1207 | struct mm_struct *mm = vma->vm_mm; | 1192 | struct mm_struct *mm = vma->vm_mm; |
1208 | pmd_t *pmd; | 1193 | pmd_t *pmd; |
1209 | int ret = 0; | 1194 | int ret = 0; |
1195 | /* For mmu_notifiers */ | ||
1196 | const unsigned long mmun_start = address; | ||
1197 | const unsigned long mmun_end = address + HPAGE_PMD_SIZE; | ||
1210 | 1198 | ||
1199 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | ||
1211 | spin_lock(&mm->page_table_lock); | 1200 | spin_lock(&mm->page_table_lock); |
1212 | pmd = page_check_address_pmd(page, mm, address, | 1201 | pmd = page_check_address_pmd(page, mm, address, |
1213 | PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG); | 1202 | PAGE_CHECK_ADDRESS_PMD_NOTSPLITTING_FLAG); |
@@ -1219,10 +1208,11 @@ static int __split_huge_page_splitting(struct page *page, | |||
1219 | * and it won't wait on the anon_vma->root->mutex to | 1208 | * and it won't wait on the anon_vma->root->mutex to |
1220 | * serialize against split_huge_page*. | 1209 | * serialize against split_huge_page*. |
1221 | */ | 1210 | */ |
1222 | pmdp_splitting_flush_notify(vma, address, pmd); | 1211 | pmdp_splitting_flush(vma, address, pmd); |
1223 | ret = 1; | 1212 | ret = 1; |
1224 | } | 1213 | } |
1225 | spin_unlock(&mm->page_table_lock); | 1214 | spin_unlock(&mm->page_table_lock); |
1215 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | ||
1226 | 1216 | ||
1227 | return ret; | 1217 | return ret; |
1228 | } | 1218 | } |
@@ -1358,11 +1348,11 @@ static int __split_huge_page_map(struct page *page, | |||
1358 | pmd = page_check_address_pmd(page, mm, address, | 1348 | pmd = page_check_address_pmd(page, mm, address, |
1359 | PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG); | 1349 | PAGE_CHECK_ADDRESS_PMD_SPLITTING_FLAG); |
1360 | if (pmd) { | 1350 | if (pmd) { |
1361 | pgtable = get_pmd_huge_pte(mm); | 1351 | pgtable = pgtable_trans_huge_withdraw(mm); |
1362 | pmd_populate(mm, &_pmd, pgtable); | 1352 | pmd_populate(mm, &_pmd, pgtable); |
1363 | 1353 | ||
1364 | for (i = 0, haddr = address; i < HPAGE_PMD_NR; | 1354 | haddr = address; |
1365 | i++, haddr += PAGE_SIZE) { | 1355 | for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) { |
1366 | pte_t *pte, entry; | 1356 | pte_t *pte, entry; |
1367 | BUG_ON(PageCompound(page+i)); | 1357 | BUG_ON(PageCompound(page+i)); |
1368 | entry = mk_pte(page + i, vma->vm_page_prot); | 1358 | entry = mk_pte(page + i, vma->vm_page_prot); |
@@ -1406,8 +1396,7 @@ static int __split_huge_page_map(struct page *page, | |||
1406 | * SMP TLB and finally we write the non-huge version | 1396 | * SMP TLB and finally we write the non-huge version |
1407 | * of the pmd entry with pmd_populate. | 1397 | * of the pmd entry with pmd_populate. |
1408 | */ | 1398 | */ |
1409 | set_pmd_at(mm, address, pmd, pmd_mknotpresent(*pmd)); | 1399 | pmdp_invalidate(vma, address, pmd); |
1410 | flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); | ||
1411 | pmd_populate(mm, pmd, pgtable); | 1400 | pmd_populate(mm, pmd, pgtable); |
1412 | ret = 1; | 1401 | ret = 1; |
1413 | } | 1402 | } |
@@ -1421,18 +1410,17 @@ static void __split_huge_page(struct page *page, | |||
1421 | struct anon_vma *anon_vma) | 1410 | struct anon_vma *anon_vma) |
1422 | { | 1411 | { |
1423 | int mapcount, mapcount2; | 1412 | int mapcount, mapcount2; |
1413 | pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); | ||
1424 | struct anon_vma_chain *avc; | 1414 | struct anon_vma_chain *avc; |
1425 | 1415 | ||
1426 | BUG_ON(!PageHead(page)); | 1416 | BUG_ON(!PageHead(page)); |
1427 | BUG_ON(PageTail(page)); | 1417 | BUG_ON(PageTail(page)); |
1428 | 1418 | ||
1429 | mapcount = 0; | 1419 | mapcount = 0; |
1430 | list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { | 1420 | anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) { |
1431 | struct vm_area_struct *vma = avc->vma; | 1421 | struct vm_area_struct *vma = avc->vma; |
1432 | unsigned long addr = vma_address(page, vma); | 1422 | unsigned long addr = vma_address(page, vma); |
1433 | BUG_ON(is_vma_temporary_stack(vma)); | 1423 | BUG_ON(is_vma_temporary_stack(vma)); |
1434 | if (addr == -EFAULT) | ||
1435 | continue; | ||
1436 | mapcount += __split_huge_page_splitting(page, vma, addr); | 1424 | mapcount += __split_huge_page_splitting(page, vma, addr); |
1437 | } | 1425 | } |
1438 | /* | 1426 | /* |
@@ -1453,12 +1441,10 @@ static void __split_huge_page(struct page *page, | |||
1453 | __split_huge_page_refcount(page); | 1441 | __split_huge_page_refcount(page); |
1454 | 1442 | ||
1455 | mapcount2 = 0; | 1443 | mapcount2 = 0; |
1456 | list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { | 1444 | anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff, pgoff) { |
1457 | struct vm_area_struct *vma = avc->vma; | 1445 | struct vm_area_struct *vma = avc->vma; |
1458 | unsigned long addr = vma_address(page, vma); | 1446 | unsigned long addr = vma_address(page, vma); |
1459 | BUG_ON(is_vma_temporary_stack(vma)); | 1447 | BUG_ON(is_vma_temporary_stack(vma)); |
1460 | if (addr == -EFAULT) | ||
1461 | continue; | ||
1462 | mapcount2 += __split_huge_page_map(page, vma, addr); | 1448 | mapcount2 += __split_huge_page_map(page, vma, addr); |
1463 | } | 1449 | } |
1464 | if (mapcount != mapcount2) | 1450 | if (mapcount != mapcount2) |
@@ -1491,12 +1477,13 @@ out: | |||
1491 | return ret; | 1477 | return ret; |
1492 | } | 1478 | } |
1493 | 1479 | ||
1494 | #define VM_NO_THP (VM_SPECIAL|VM_INSERTPAGE|VM_MIXEDMAP|VM_SAO| \ | 1480 | #define VM_NO_THP (VM_SPECIAL|VM_MIXEDMAP|VM_HUGETLB|VM_SHARED|VM_MAYSHARE) |
1495 | VM_HUGETLB|VM_SHARED|VM_MAYSHARE) | ||
1496 | 1481 | ||
1497 | int hugepage_madvise(struct vm_area_struct *vma, | 1482 | int hugepage_madvise(struct vm_area_struct *vma, |
1498 | unsigned long *vm_flags, int advice) | 1483 | unsigned long *vm_flags, int advice) |
1499 | { | 1484 | { |
1485 | struct mm_struct *mm = vma->vm_mm; | ||
1486 | |||
1500 | switch (advice) { | 1487 | switch (advice) { |
1501 | case MADV_HUGEPAGE: | 1488 | case MADV_HUGEPAGE: |
1502 | /* | 1489 | /* |
@@ -1504,6 +1491,8 @@ int hugepage_madvise(struct vm_area_struct *vma, | |||
1504 | */ | 1491 | */ |
1505 | if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP)) | 1492 | if (*vm_flags & (VM_HUGEPAGE | VM_NO_THP)) |
1506 | return -EINVAL; | 1493 | return -EINVAL; |
1494 | if (mm->def_flags & VM_NOHUGEPAGE) | ||
1495 | return -EINVAL; | ||
1507 | *vm_flags &= ~VM_NOHUGEPAGE; | 1496 | *vm_flags &= ~VM_NOHUGEPAGE; |
1508 | *vm_flags |= VM_HUGEPAGE; | 1497 | *vm_flags |= VM_HUGEPAGE; |
1509 | /* | 1498 | /* |
@@ -1655,11 +1644,7 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma) | |||
1655 | if (vma->vm_ops) | 1644 | if (vma->vm_ops) |
1656 | /* khugepaged not yet working on file or special mappings */ | 1645 | /* khugepaged not yet working on file or special mappings */ |
1657 | return 0; | 1646 | return 0; |
1658 | /* | 1647 | VM_BUG_ON(vma->vm_flags & VM_NO_THP); |
1659 | * If is_pfn_mapping() is true is_learn_pfn_mapping() must be | ||
1660 | * true too, verify it here. | ||
1661 | */ | ||
1662 | VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP); | ||
1663 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; | 1648 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; |
1664 | hend = vma->vm_end & HPAGE_PMD_MASK; | 1649 | hend = vma->vm_end & HPAGE_PMD_MASK; |
1665 | if (hstart < hend) | 1650 | if (hstart < hend) |
@@ -1833,28 +1818,35 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, | |||
1833 | } | 1818 | } |
1834 | } | 1819 | } |
1835 | 1820 | ||
1836 | static void collapse_huge_page(struct mm_struct *mm, | 1821 | static void khugepaged_alloc_sleep(void) |
1837 | unsigned long address, | ||
1838 | struct page **hpage, | ||
1839 | struct vm_area_struct *vma, | ||
1840 | int node) | ||
1841 | { | 1822 | { |
1842 | pgd_t *pgd; | 1823 | wait_event_freezable_timeout(khugepaged_wait, false, |
1843 | pud_t *pud; | 1824 | msecs_to_jiffies(khugepaged_alloc_sleep_millisecs)); |
1844 | pmd_t *pmd, _pmd; | 1825 | } |
1845 | pte_t *pte; | ||
1846 | pgtable_t pgtable; | ||
1847 | struct page *new_page; | ||
1848 | spinlock_t *ptl; | ||
1849 | int isolated; | ||
1850 | unsigned long hstart, hend; | ||
1851 | 1826 | ||
1852 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | 1827 | #ifdef CONFIG_NUMA |
1853 | #ifndef CONFIG_NUMA | 1828 | static bool khugepaged_prealloc_page(struct page **hpage, bool *wait) |
1854 | up_read(&mm->mmap_sem); | 1829 | { |
1855 | VM_BUG_ON(!*hpage); | 1830 | if (IS_ERR(*hpage)) { |
1856 | new_page = *hpage; | 1831 | if (!*wait) |
1857 | #else | 1832 | return false; |
1833 | |||
1834 | *wait = false; | ||
1835 | *hpage = NULL; | ||
1836 | khugepaged_alloc_sleep(); | ||
1837 | } else if (*hpage) { | ||
1838 | put_page(*hpage); | ||
1839 | *hpage = NULL; | ||
1840 | } | ||
1841 | |||
1842 | return true; | ||
1843 | } | ||
1844 | |||
1845 | static struct page | ||
1846 | *khugepaged_alloc_page(struct page **hpage, struct mm_struct *mm, | ||
1847 | struct vm_area_struct *vma, unsigned long address, | ||
1848 | int node) | ||
1849 | { | ||
1858 | VM_BUG_ON(*hpage); | 1850 | VM_BUG_ON(*hpage); |
1859 | /* | 1851 | /* |
1860 | * Allocate the page while the vma is still valid and under | 1852 | * Allocate the page while the vma is still valid and under |
@@ -1866,7 +1858,7 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1866 | * mmap_sem in read mode is good idea also to allow greater | 1858 | * mmap_sem in read mode is good idea also to allow greater |
1867 | * scalability. | 1859 | * scalability. |
1868 | */ | 1860 | */ |
1869 | new_page = alloc_hugepage_vma(khugepaged_defrag(), vma, address, | 1861 | *hpage = alloc_hugepage_vma(khugepaged_defrag(), vma, address, |
1870 | node, __GFP_OTHER_NODE); | 1862 | node, __GFP_OTHER_NODE); |
1871 | 1863 | ||
1872 | /* | 1864 | /* |
@@ -1874,20 +1866,85 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1874 | * preparation for taking it in write mode. | 1866 | * preparation for taking it in write mode. |
1875 | */ | 1867 | */ |
1876 | up_read(&mm->mmap_sem); | 1868 | up_read(&mm->mmap_sem); |
1877 | if (unlikely(!new_page)) { | 1869 | if (unlikely(!*hpage)) { |
1878 | count_vm_event(THP_COLLAPSE_ALLOC_FAILED); | 1870 | count_vm_event(THP_COLLAPSE_ALLOC_FAILED); |
1879 | *hpage = ERR_PTR(-ENOMEM); | 1871 | *hpage = ERR_PTR(-ENOMEM); |
1880 | return; | 1872 | return NULL; |
1881 | } | 1873 | } |
1882 | #endif | ||
1883 | 1874 | ||
1884 | count_vm_event(THP_COLLAPSE_ALLOC); | 1875 | count_vm_event(THP_COLLAPSE_ALLOC); |
1885 | if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { | 1876 | return *hpage; |
1886 | #ifdef CONFIG_NUMA | 1877 | } |
1887 | put_page(new_page); | 1878 | #else |
1879 | static struct page *khugepaged_alloc_hugepage(bool *wait) | ||
1880 | { | ||
1881 | struct page *hpage; | ||
1882 | |||
1883 | do { | ||
1884 | hpage = alloc_hugepage(khugepaged_defrag()); | ||
1885 | if (!hpage) { | ||
1886 | count_vm_event(THP_COLLAPSE_ALLOC_FAILED); | ||
1887 | if (!*wait) | ||
1888 | return NULL; | ||
1889 | |||
1890 | *wait = false; | ||
1891 | khugepaged_alloc_sleep(); | ||
1892 | } else | ||
1893 | count_vm_event(THP_COLLAPSE_ALLOC); | ||
1894 | } while (unlikely(!hpage) && likely(khugepaged_enabled())); | ||
1895 | |||
1896 | return hpage; | ||
1897 | } | ||
1898 | |||
1899 | static bool khugepaged_prealloc_page(struct page **hpage, bool *wait) | ||
1900 | { | ||
1901 | if (!*hpage) | ||
1902 | *hpage = khugepaged_alloc_hugepage(wait); | ||
1903 | |||
1904 | if (unlikely(!*hpage)) | ||
1905 | return false; | ||
1906 | |||
1907 | return true; | ||
1908 | } | ||
1909 | |||
1910 | static struct page | ||
1911 | *khugepaged_alloc_page(struct page **hpage, struct mm_struct *mm, | ||
1912 | struct vm_area_struct *vma, unsigned long address, | ||
1913 | int node) | ||
1914 | { | ||
1915 | up_read(&mm->mmap_sem); | ||
1916 | VM_BUG_ON(!*hpage); | ||
1917 | return *hpage; | ||
1918 | } | ||
1888 | #endif | 1919 | #endif |
1920 | |||
1921 | static void collapse_huge_page(struct mm_struct *mm, | ||
1922 | unsigned long address, | ||
1923 | struct page **hpage, | ||
1924 | struct vm_area_struct *vma, | ||
1925 | int node) | ||
1926 | { | ||
1927 | pgd_t *pgd; | ||
1928 | pud_t *pud; | ||
1929 | pmd_t *pmd, _pmd; | ||
1930 | pte_t *pte; | ||
1931 | pgtable_t pgtable; | ||
1932 | struct page *new_page; | ||
1933 | spinlock_t *ptl; | ||
1934 | int isolated; | ||
1935 | unsigned long hstart, hend; | ||
1936 | unsigned long mmun_start; /* For mmu_notifiers */ | ||
1937 | unsigned long mmun_end; /* For mmu_notifiers */ | ||
1938 | |||
1939 | VM_BUG_ON(address & ~HPAGE_PMD_MASK); | ||
1940 | |||
1941 | /* release the mmap_sem read lock. */ | ||
1942 | new_page = khugepaged_alloc_page(hpage, mm, vma, address, node); | ||
1943 | if (!new_page) | ||
1944 | return; | ||
1945 | |||
1946 | if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) | ||
1889 | return; | 1947 | return; |
1890 | } | ||
1891 | 1948 | ||
1892 | /* | 1949 | /* |
1893 | * Prevent all access to pagetables with the exception of | 1950 | * Prevent all access to pagetables with the exception of |
@@ -1912,11 +1969,7 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1912 | goto out; | 1969 | goto out; |
1913 | if (is_vma_temporary_stack(vma)) | 1970 | if (is_vma_temporary_stack(vma)) |
1914 | goto out; | 1971 | goto out; |
1915 | /* | 1972 | VM_BUG_ON(vma->vm_flags & VM_NO_THP); |
1916 | * If is_pfn_mapping() is true is_learn_pfn_mapping() must be | ||
1917 | * true too, verify it here. | ||
1918 | */ | ||
1919 | VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP); | ||
1920 | 1973 | ||
1921 | pgd = pgd_offset(mm, address); | 1974 | pgd = pgd_offset(mm, address); |
1922 | if (!pgd_present(*pgd)) | 1975 | if (!pgd_present(*pgd)) |
@@ -1936,6 +1989,9 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1936 | pte = pte_offset_map(pmd, address); | 1989 | pte = pte_offset_map(pmd, address); |
1937 | ptl = pte_lockptr(mm, pmd); | 1990 | ptl = pte_lockptr(mm, pmd); |
1938 | 1991 | ||
1992 | mmun_start = address; | ||
1993 | mmun_end = address + HPAGE_PMD_SIZE; | ||
1994 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | ||
1939 | spin_lock(&mm->page_table_lock); /* probably unnecessary */ | 1995 | spin_lock(&mm->page_table_lock); /* probably unnecessary */ |
1940 | /* | 1996 | /* |
1941 | * After this gup_fast can't run anymore. This also removes | 1997 | * After this gup_fast can't run anymore. This also removes |
@@ -1943,8 +1999,9 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1943 | * huge and small TLB entries for the same virtual address | 1999 | * huge and small TLB entries for the same virtual address |
1944 | * to avoid the risk of CPU bugs in that area. | 2000 | * to avoid the risk of CPU bugs in that area. |
1945 | */ | 2001 | */ |
1946 | _pmd = pmdp_clear_flush_notify(vma, address, pmd); | 2002 | _pmd = pmdp_clear_flush(vma, address, pmd); |
1947 | spin_unlock(&mm->page_table_lock); | 2003 | spin_unlock(&mm->page_table_lock); |
2004 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | ||
1948 | 2005 | ||
1949 | spin_lock(ptl); | 2006 | spin_lock(ptl); |
1950 | isolated = __collapse_huge_page_isolate(vma, address, pte); | 2007 | isolated = __collapse_huge_page_isolate(vma, address, pte); |
@@ -1970,8 +2027,6 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1970 | pte_unmap(pte); | 2027 | pte_unmap(pte); |
1971 | __SetPageUptodate(new_page); | 2028 | __SetPageUptodate(new_page); |
1972 | pgtable = pmd_pgtable(_pmd); | 2029 | pgtable = pmd_pgtable(_pmd); |
1973 | VM_BUG_ON(page_count(pgtable) != 1); | ||
1974 | VM_BUG_ON(page_mapcount(pgtable) != 0); | ||
1975 | 2030 | ||
1976 | _pmd = mk_pmd(new_page, vma->vm_page_prot); | 2031 | _pmd = mk_pmd(new_page, vma->vm_page_prot); |
1977 | _pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma); | 2032 | _pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma); |
@@ -1988,13 +2043,12 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
1988 | BUG_ON(!pmd_none(*pmd)); | 2043 | BUG_ON(!pmd_none(*pmd)); |
1989 | page_add_new_anon_rmap(new_page, vma, address); | 2044 | page_add_new_anon_rmap(new_page, vma, address); |
1990 | set_pmd_at(mm, address, pmd, _pmd); | 2045 | set_pmd_at(mm, address, pmd, _pmd); |
1991 | update_mmu_cache(vma, address, _pmd); | 2046 | update_mmu_cache_pmd(vma, address, pmd); |
1992 | prepare_pmd_huge_pte(pgtable, mm); | 2047 | pgtable_trans_huge_deposit(mm, pgtable); |
1993 | spin_unlock(&mm->page_table_lock); | 2048 | spin_unlock(&mm->page_table_lock); |
1994 | 2049 | ||
1995 | #ifndef CONFIG_NUMA | ||
1996 | *hpage = NULL; | 2050 | *hpage = NULL; |
1997 | #endif | 2051 | |
1998 | khugepaged_pages_collapsed++; | 2052 | khugepaged_pages_collapsed++; |
1999 | out_up_write: | 2053 | out_up_write: |
2000 | up_write(&mm->mmap_sem); | 2054 | up_write(&mm->mmap_sem); |
@@ -2002,9 +2056,6 @@ out_up_write: | |||
2002 | 2056 | ||
2003 | out: | 2057 | out: |
2004 | mem_cgroup_uncharge_page(new_page); | 2058 | mem_cgroup_uncharge_page(new_page); |
2005 | #ifdef CONFIG_NUMA | ||
2006 | put_page(new_page); | ||
2007 | #endif | ||
2008 | goto out_up_write; | 2059 | goto out_up_write; |
2009 | } | 2060 | } |
2010 | 2061 | ||
@@ -2154,12 +2205,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, | |||
2154 | goto skip; | 2205 | goto skip; |
2155 | if (is_vma_temporary_stack(vma)) | 2206 | if (is_vma_temporary_stack(vma)) |
2156 | goto skip; | 2207 | goto skip; |
2157 | /* | 2208 | VM_BUG_ON(vma->vm_flags & VM_NO_THP); |
2158 | * If is_pfn_mapping() is true is_learn_pfn_mapping() | ||
2159 | * must be true too, verify it here. | ||
2160 | */ | ||
2161 | VM_BUG_ON(is_linear_pfn_mapping(vma) || | ||
2162 | vma->vm_flags & VM_NO_THP); | ||
2163 | 2209 | ||
2164 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; | 2210 | hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK; |
2165 | hend = vma->vm_end & HPAGE_PMD_MASK; | 2211 | hend = vma->vm_end & HPAGE_PMD_MASK; |
@@ -2234,32 +2280,23 @@ static int khugepaged_has_work(void) | |||
2234 | static int khugepaged_wait_event(void) | 2280 | static int khugepaged_wait_event(void) |
2235 | { | 2281 | { |
2236 | return !list_empty(&khugepaged_scan.mm_head) || | 2282 | return !list_empty(&khugepaged_scan.mm_head) || |
2237 | !khugepaged_enabled(); | 2283 | kthread_should_stop(); |
2238 | } | 2284 | } |
2239 | 2285 | ||
2240 | static void khugepaged_do_scan(struct page **hpage) | 2286 | static void khugepaged_do_scan(void) |
2241 | { | 2287 | { |
2288 | struct page *hpage = NULL; | ||
2242 | unsigned int progress = 0, pass_through_head = 0; | 2289 | unsigned int progress = 0, pass_through_head = 0; |
2243 | unsigned int pages = khugepaged_pages_to_scan; | 2290 | unsigned int pages = khugepaged_pages_to_scan; |
2291 | bool wait = true; | ||
2244 | 2292 | ||
2245 | barrier(); /* write khugepaged_pages_to_scan to local stack */ | 2293 | barrier(); /* write khugepaged_pages_to_scan to local stack */ |
2246 | 2294 | ||
2247 | while (progress < pages) { | 2295 | while (progress < pages) { |
2248 | cond_resched(); | 2296 | if (!khugepaged_prealloc_page(&hpage, &wait)) |
2249 | |||
2250 | #ifndef CONFIG_NUMA | ||
2251 | if (!*hpage) { | ||
2252 | *hpage = alloc_hugepage(khugepaged_defrag()); | ||
2253 | if (unlikely(!*hpage)) { | ||
2254 | count_vm_event(THP_COLLAPSE_ALLOC_FAILED); | ||
2255 | break; | ||
2256 | } | ||
2257 | count_vm_event(THP_COLLAPSE_ALLOC); | ||
2258 | } | ||
2259 | #else | ||
2260 | if (IS_ERR(*hpage)) | ||
2261 | break; | 2297 | break; |
2262 | #endif | 2298 | |
2299 | cond_resched(); | ||
2263 | 2300 | ||
2264 | if (unlikely(kthread_should_stop() || freezing(current))) | 2301 | if (unlikely(kthread_should_stop() || freezing(current))) |
2265 | break; | 2302 | break; |
@@ -2270,73 +2307,32 @@ static void khugepaged_do_scan(struct page **hpage) | |||
2270 | if (khugepaged_has_work() && | 2307 | if (khugepaged_has_work() && |
2271 | pass_through_head < 2) | 2308 | pass_through_head < 2) |
2272 | progress += khugepaged_scan_mm_slot(pages - progress, | 2309 | progress += khugepaged_scan_mm_slot(pages - progress, |
2273 | hpage); | 2310 | &hpage); |
2274 | else | 2311 | else |
2275 | progress = pages; | 2312 | progress = pages; |
2276 | spin_unlock(&khugepaged_mm_lock); | 2313 | spin_unlock(&khugepaged_mm_lock); |
2277 | } | 2314 | } |
2278 | } | ||
2279 | 2315 | ||
2280 | static void khugepaged_alloc_sleep(void) | 2316 | if (!IS_ERR_OR_NULL(hpage)) |
2281 | { | 2317 | put_page(hpage); |
2282 | wait_event_freezable_timeout(khugepaged_wait, false, | ||
2283 | msecs_to_jiffies(khugepaged_alloc_sleep_millisecs)); | ||
2284 | } | 2318 | } |
2285 | 2319 | ||
2286 | #ifndef CONFIG_NUMA | 2320 | static void khugepaged_wait_work(void) |
2287 | static struct page *khugepaged_alloc_hugepage(void) | ||
2288 | { | 2321 | { |
2289 | struct page *hpage; | 2322 | try_to_freeze(); |
2290 | |||
2291 | do { | ||
2292 | hpage = alloc_hugepage(khugepaged_defrag()); | ||
2293 | if (!hpage) { | ||
2294 | count_vm_event(THP_COLLAPSE_ALLOC_FAILED); | ||
2295 | khugepaged_alloc_sleep(); | ||
2296 | } else | ||
2297 | count_vm_event(THP_COLLAPSE_ALLOC); | ||
2298 | } while (unlikely(!hpage) && | ||
2299 | likely(khugepaged_enabled())); | ||
2300 | return hpage; | ||
2301 | } | ||
2302 | #endif | ||
2303 | 2323 | ||
2304 | static void khugepaged_loop(void) | 2324 | if (khugepaged_has_work()) { |
2305 | { | 2325 | if (!khugepaged_scan_sleep_millisecs) |
2306 | struct page *hpage; | 2326 | return; |
2307 | 2327 | ||
2308 | #ifdef CONFIG_NUMA | 2328 | wait_event_freezable_timeout(khugepaged_wait, |
2309 | hpage = NULL; | 2329 | kthread_should_stop(), |
2310 | #endif | 2330 | msecs_to_jiffies(khugepaged_scan_sleep_millisecs)); |
2311 | while (likely(khugepaged_enabled())) { | 2331 | return; |
2312 | #ifndef CONFIG_NUMA | ||
2313 | hpage = khugepaged_alloc_hugepage(); | ||
2314 | if (unlikely(!hpage)) | ||
2315 | break; | ||
2316 | #else | ||
2317 | if (IS_ERR(hpage)) { | ||
2318 | khugepaged_alloc_sleep(); | ||
2319 | hpage = NULL; | ||
2320 | } | ||
2321 | #endif | ||
2322 | |||
2323 | khugepaged_do_scan(&hpage); | ||
2324 | #ifndef CONFIG_NUMA | ||
2325 | if (hpage) | ||
2326 | put_page(hpage); | ||
2327 | #endif | ||
2328 | try_to_freeze(); | ||
2329 | if (unlikely(kthread_should_stop())) | ||
2330 | break; | ||
2331 | if (khugepaged_has_work()) { | ||
2332 | if (!khugepaged_scan_sleep_millisecs) | ||
2333 | continue; | ||
2334 | wait_event_freezable_timeout(khugepaged_wait, false, | ||
2335 | msecs_to_jiffies(khugepaged_scan_sleep_millisecs)); | ||
2336 | } else if (khugepaged_enabled()) | ||
2337 | wait_event_freezable(khugepaged_wait, | ||
2338 | khugepaged_wait_event()); | ||
2339 | } | 2332 | } |
2333 | |||
2334 | if (khugepaged_enabled()) | ||
2335 | wait_event_freezable(khugepaged_wait, khugepaged_wait_event()); | ||
2340 | } | 2336 | } |
2341 | 2337 | ||
2342 | static int khugepaged(void *none) | 2338 | static int khugepaged(void *none) |
@@ -2346,20 +2342,9 @@ static int khugepaged(void *none) | |||
2346 | set_freezable(); | 2342 | set_freezable(); |
2347 | set_user_nice(current, 19); | 2343 | set_user_nice(current, 19); |
2348 | 2344 | ||
2349 | /* serialize with start_khugepaged() */ | 2345 | while (!kthread_should_stop()) { |
2350 | mutex_lock(&khugepaged_mutex); | 2346 | khugepaged_do_scan(); |
2351 | 2347 | khugepaged_wait_work(); | |
2352 | for (;;) { | ||
2353 | mutex_unlock(&khugepaged_mutex); | ||
2354 | VM_BUG_ON(khugepaged_thread != current); | ||
2355 | khugepaged_loop(); | ||
2356 | VM_BUG_ON(khugepaged_thread != current); | ||
2357 | |||
2358 | mutex_lock(&khugepaged_mutex); | ||
2359 | if (!khugepaged_enabled()) | ||
2360 | break; | ||
2361 | if (unlikely(kthread_should_stop())) | ||
2362 | break; | ||
2363 | } | 2348 | } |
2364 | 2349 | ||
2365 | spin_lock(&khugepaged_mm_lock); | 2350 | spin_lock(&khugepaged_mm_lock); |
@@ -2368,10 +2353,6 @@ static int khugepaged(void *none) | |||
2368 | if (mm_slot) | 2353 | if (mm_slot) |
2369 | collect_mm_slot(mm_slot); | 2354 | collect_mm_slot(mm_slot); |
2370 | spin_unlock(&khugepaged_mm_lock); | 2355 | spin_unlock(&khugepaged_mm_lock); |
2371 | |||
2372 | khugepaged_thread = NULL; | ||
2373 | mutex_unlock(&khugepaged_mutex); | ||
2374 | |||
2375 | return 0; | 2356 | return 0; |
2376 | } | 2357 | } |
2377 | 2358 | ||