diff options
author | Kirill A. Shutemov <kirill.shutemov@linux.intel.com> | 2012-12-12 16:50:54 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-12 20:38:31 -0500 |
commit | 93b4796dede916de74b21fbd637588da6a99a7ec (patch) | |
tree | 3833de6051bb1b8d361a614743aafa56b43902e1 /mm/huge_memory.c | |
parent | fc9fe822f7112db23e51e2be3b886f5d8f0afdb6 (diff) |
thp: do_huge_pmd_wp_page(): handle huge zero page
On write access to huge zero page we alloc a new huge page and clear it.
If ENOMEM, graceful fallback: we create a new pmd table and set pte around
fault address to newly allocated normal (4k) page. All other ptes in the
pmd set to normal zero page.
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@linux.intel.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 111 |
1 files changed, 96 insertions, 15 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 650625390f61..a959b3a4ddd5 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -858,6 +858,70 @@ unlock: | |||
858 | spin_unlock(&mm->page_table_lock); | 858 | spin_unlock(&mm->page_table_lock); |
859 | } | 859 | } |
860 | 860 | ||
861 | static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm, | ||
862 | struct vm_area_struct *vma, unsigned long address, | ||
863 | pmd_t *pmd, unsigned long haddr) | ||
864 | { | ||
865 | pgtable_t pgtable; | ||
866 | pmd_t _pmd; | ||
867 | struct page *page; | ||
868 | int i, ret = 0; | ||
869 | unsigned long mmun_start; /* For mmu_notifiers */ | ||
870 | unsigned long mmun_end; /* For mmu_notifiers */ | ||
871 | |||
872 | page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); | ||
873 | if (!page) { | ||
874 | ret |= VM_FAULT_OOM; | ||
875 | goto out; | ||
876 | } | ||
877 | |||
878 | if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL)) { | ||
879 | put_page(page); | ||
880 | ret |= VM_FAULT_OOM; | ||
881 | goto out; | ||
882 | } | ||
883 | |||
884 | clear_user_highpage(page, address); | ||
885 | __SetPageUptodate(page); | ||
886 | |||
887 | mmun_start = haddr; | ||
888 | mmun_end = haddr + HPAGE_PMD_SIZE; | ||
889 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | ||
890 | |||
891 | spin_lock(&mm->page_table_lock); | ||
892 | pmdp_clear_flush(vma, haddr, pmd); | ||
893 | /* leave pmd empty until pte is filled */ | ||
894 | |||
895 | pgtable = pgtable_trans_huge_withdraw(mm); | ||
896 | pmd_populate(mm, &_pmd, pgtable); | ||
897 | |||
898 | for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) { | ||
899 | pte_t *pte, entry; | ||
900 | if (haddr == (address & PAGE_MASK)) { | ||
901 | entry = mk_pte(page, vma->vm_page_prot); | ||
902 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | ||
903 | page_add_new_anon_rmap(page, vma, haddr); | ||
904 | } else { | ||
905 | entry = pfn_pte(my_zero_pfn(haddr), vma->vm_page_prot); | ||
906 | entry = pte_mkspecial(entry); | ||
907 | } | ||
908 | pte = pte_offset_map(&_pmd, haddr); | ||
909 | VM_BUG_ON(!pte_none(*pte)); | ||
910 | set_pte_at(mm, haddr, pte, entry); | ||
911 | pte_unmap(pte); | ||
912 | } | ||
913 | smp_wmb(); /* make pte visible before pmd */ | ||
914 | pmd_populate(mm, pmd, pgtable); | ||
915 | spin_unlock(&mm->page_table_lock); | ||
916 | inc_mm_counter(mm, MM_ANONPAGES); | ||
917 | |||
918 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | ||
919 | |||
920 | ret |= VM_FAULT_WRITE; | ||
921 | out: | ||
922 | return ret; | ||
923 | } | ||
924 | |||
861 | static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, | 925 | static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, |
862 | struct vm_area_struct *vma, | 926 | struct vm_area_struct *vma, |
863 | unsigned long address, | 927 | unsigned long address, |
@@ -964,19 +1028,21 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
964 | unsigned long address, pmd_t *pmd, pmd_t orig_pmd) | 1028 | unsigned long address, pmd_t *pmd, pmd_t orig_pmd) |
965 | { | 1029 | { |
966 | int ret = 0; | 1030 | int ret = 0; |
967 | struct page *page, *new_page; | 1031 | struct page *page = NULL, *new_page; |
968 | unsigned long haddr; | 1032 | unsigned long haddr; |
969 | unsigned long mmun_start; /* For mmu_notifiers */ | 1033 | unsigned long mmun_start; /* For mmu_notifiers */ |
970 | unsigned long mmun_end; /* For mmu_notifiers */ | 1034 | unsigned long mmun_end; /* For mmu_notifiers */ |
971 | 1035 | ||
972 | VM_BUG_ON(!vma->anon_vma); | 1036 | VM_BUG_ON(!vma->anon_vma); |
1037 | haddr = address & HPAGE_PMD_MASK; | ||
1038 | if (is_huge_zero_pmd(orig_pmd)) | ||
1039 | goto alloc; | ||
973 | spin_lock(&mm->page_table_lock); | 1040 | spin_lock(&mm->page_table_lock); |
974 | if (unlikely(!pmd_same(*pmd, orig_pmd))) | 1041 | if (unlikely(!pmd_same(*pmd, orig_pmd))) |
975 | goto out_unlock; | 1042 | goto out_unlock; |
976 | 1043 | ||
977 | page = pmd_page(orig_pmd); | 1044 | page = pmd_page(orig_pmd); |
978 | VM_BUG_ON(!PageCompound(page) || !PageHead(page)); | 1045 | VM_BUG_ON(!PageCompound(page) || !PageHead(page)); |
979 | haddr = address & HPAGE_PMD_MASK; | ||
980 | if (page_mapcount(page) == 1) { | 1046 | if (page_mapcount(page) == 1) { |
981 | pmd_t entry; | 1047 | pmd_t entry; |
982 | entry = pmd_mkyoung(orig_pmd); | 1048 | entry = pmd_mkyoung(orig_pmd); |
@@ -988,7 +1054,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
988 | } | 1054 | } |
989 | get_page(page); | 1055 | get_page(page); |
990 | spin_unlock(&mm->page_table_lock); | 1056 | spin_unlock(&mm->page_table_lock); |
991 | 1057 | alloc: | |
992 | if (transparent_hugepage_enabled(vma) && | 1058 | if (transparent_hugepage_enabled(vma) && |
993 | !transparent_hugepage_debug_cow()) | 1059 | !transparent_hugepage_debug_cow()) |
994 | new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), | 1060 | new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), |
@@ -998,24 +1064,34 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
998 | 1064 | ||
999 | if (unlikely(!new_page)) { | 1065 | if (unlikely(!new_page)) { |
1000 | count_vm_event(THP_FAULT_FALLBACK); | 1066 | count_vm_event(THP_FAULT_FALLBACK); |
1001 | ret = do_huge_pmd_wp_page_fallback(mm, vma, address, | 1067 | if (is_huge_zero_pmd(orig_pmd)) { |
1002 | pmd, orig_pmd, page, haddr); | 1068 | ret = do_huge_pmd_wp_zero_page_fallback(mm, vma, |
1003 | if (ret & VM_FAULT_OOM) | 1069 | address, pmd, haddr); |
1004 | split_huge_page(page); | 1070 | } else { |
1005 | put_page(page); | 1071 | ret = do_huge_pmd_wp_page_fallback(mm, vma, address, |
1072 | pmd, orig_pmd, page, haddr); | ||
1073 | if (ret & VM_FAULT_OOM) | ||
1074 | split_huge_page(page); | ||
1075 | put_page(page); | ||
1076 | } | ||
1006 | goto out; | 1077 | goto out; |
1007 | } | 1078 | } |
1008 | count_vm_event(THP_FAULT_ALLOC); | 1079 | count_vm_event(THP_FAULT_ALLOC); |
1009 | 1080 | ||
1010 | if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { | 1081 | if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { |
1011 | put_page(new_page); | 1082 | put_page(new_page); |
1012 | split_huge_page(page); | 1083 | if (page) { |
1013 | put_page(page); | 1084 | split_huge_page(page); |
1085 | put_page(page); | ||
1086 | } | ||
1014 | ret |= VM_FAULT_OOM; | 1087 | ret |= VM_FAULT_OOM; |
1015 | goto out; | 1088 | goto out; |
1016 | } | 1089 | } |
1017 | 1090 | ||
1018 | copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR); | 1091 | if (is_huge_zero_pmd(orig_pmd)) |
1092 | clear_huge_page(new_page, haddr, HPAGE_PMD_NR); | ||
1093 | else | ||
1094 | copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR); | ||
1019 | __SetPageUptodate(new_page); | 1095 | __SetPageUptodate(new_page); |
1020 | 1096 | ||
1021 | mmun_start = haddr; | 1097 | mmun_start = haddr; |
@@ -1023,7 +1099,8 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1023 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | 1099 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); |
1024 | 1100 | ||
1025 | spin_lock(&mm->page_table_lock); | 1101 | spin_lock(&mm->page_table_lock); |
1026 | put_page(page); | 1102 | if (page) |
1103 | put_page(page); | ||
1027 | if (unlikely(!pmd_same(*pmd, orig_pmd))) { | 1104 | if (unlikely(!pmd_same(*pmd, orig_pmd))) { |
1028 | spin_unlock(&mm->page_table_lock); | 1105 | spin_unlock(&mm->page_table_lock); |
1029 | mem_cgroup_uncharge_page(new_page); | 1106 | mem_cgroup_uncharge_page(new_page); |
@@ -1031,14 +1108,18 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1031 | goto out_mn; | 1108 | goto out_mn; |
1032 | } else { | 1109 | } else { |
1033 | pmd_t entry; | 1110 | pmd_t entry; |
1034 | VM_BUG_ON(!PageHead(page)); | ||
1035 | entry = mk_huge_pmd(new_page, vma); | 1111 | entry = mk_huge_pmd(new_page, vma); |
1036 | pmdp_clear_flush(vma, haddr, pmd); | 1112 | pmdp_clear_flush(vma, haddr, pmd); |
1037 | page_add_new_anon_rmap(new_page, vma, haddr); | 1113 | page_add_new_anon_rmap(new_page, vma, haddr); |
1038 | set_pmd_at(mm, haddr, pmd, entry); | 1114 | set_pmd_at(mm, haddr, pmd, entry); |
1039 | update_mmu_cache_pmd(vma, address, pmd); | 1115 | update_mmu_cache_pmd(vma, address, pmd); |
1040 | page_remove_rmap(page); | 1116 | if (is_huge_zero_pmd(orig_pmd)) |
1041 | put_page(page); | 1117 | add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); |
1118 | else { | ||
1119 | VM_BUG_ON(!PageHead(page)); | ||
1120 | page_remove_rmap(page); | ||
1121 | put_page(page); | ||
1122 | } | ||
1042 | ret |= VM_FAULT_WRITE; | 1123 | ret |= VM_FAULT_WRITE; |
1043 | } | 1124 | } |
1044 | spin_unlock(&mm->page_table_lock); | 1125 | spin_unlock(&mm->page_table_lock); |