aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorKirill A. Shutemov <kirill.shutemov@linux.intel.com>2012-12-12 16:50:54 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-12 20:38:31 -0500
commit93b4796dede916de74b21fbd637588da6a99a7ec (patch)
tree3833de6051bb1b8d361a614743aafa56b43902e1 /mm
parentfc9fe822f7112db23e51e2be3b886f5d8f0afdb6 (diff)
thp: do_huge_pmd_wp_page(): handle huge zero page
On write access to huge zero page we alloc a new huge page and clear it. If ENOMEM, graceful fallback: we create a new pmd table and set pte around fault address to newly allocated normal (4k) page. All other ptes in the pmd set to normal zero page. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Andi Kleen <ak@linux.intel.com> Cc: "H. Peter Anvin" <hpa@linux.intel.com> Cc: Mel Gorman <mel@csn.ul.ie> Acked-by: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/huge_memory.c111
-rw-r--r--mm/memory.c7
2 files changed, 96 insertions, 22 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 650625390f61..a959b3a4ddd5 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -858,6 +858,70 @@ unlock:
858 spin_unlock(&mm->page_table_lock); 858 spin_unlock(&mm->page_table_lock);
859} 859}
860 860
861static int do_huge_pmd_wp_zero_page_fallback(struct mm_struct *mm,
862 struct vm_area_struct *vma, unsigned long address,
863 pmd_t *pmd, unsigned long haddr)
864{
865 pgtable_t pgtable;
866 pmd_t _pmd;
867 struct page *page;
868 int i, ret = 0;
869 unsigned long mmun_start; /* For mmu_notifiers */
870 unsigned long mmun_end; /* For mmu_notifiers */
871
872 page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
873 if (!page) {
874 ret |= VM_FAULT_OOM;
875 goto out;
876 }
877
878 if (mem_cgroup_newpage_charge(page, mm, GFP_KERNEL)) {
879 put_page(page);
880 ret |= VM_FAULT_OOM;
881 goto out;
882 }
883
884 clear_user_highpage(page, address);
885 __SetPageUptodate(page);
886
887 mmun_start = haddr;
888 mmun_end = haddr + HPAGE_PMD_SIZE;
889 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
890
891 spin_lock(&mm->page_table_lock);
892 pmdp_clear_flush(vma, haddr, pmd);
893 /* leave pmd empty until pte is filled */
894
895 pgtable = pgtable_trans_huge_withdraw(mm);
896 pmd_populate(mm, &_pmd, pgtable);
897
898 for (i = 0; i < HPAGE_PMD_NR; i++, haddr += PAGE_SIZE) {
899 pte_t *pte, entry;
900 if (haddr == (address & PAGE_MASK)) {
901 entry = mk_pte(page, vma->vm_page_prot);
902 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
903 page_add_new_anon_rmap(page, vma, haddr);
904 } else {
905 entry = pfn_pte(my_zero_pfn(haddr), vma->vm_page_prot);
906 entry = pte_mkspecial(entry);
907 }
908 pte = pte_offset_map(&_pmd, haddr);
909 VM_BUG_ON(!pte_none(*pte));
910 set_pte_at(mm, haddr, pte, entry);
911 pte_unmap(pte);
912 }
913 smp_wmb(); /* make pte visible before pmd */
914 pmd_populate(mm, pmd, pgtable);
915 spin_unlock(&mm->page_table_lock);
916 inc_mm_counter(mm, MM_ANONPAGES);
917
918 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
919
920 ret |= VM_FAULT_WRITE;
921out:
922 return ret;
923}
924
861static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, 925static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
862 struct vm_area_struct *vma, 926 struct vm_area_struct *vma,
863 unsigned long address, 927 unsigned long address,
@@ -964,19 +1028,21 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
964 unsigned long address, pmd_t *pmd, pmd_t orig_pmd) 1028 unsigned long address, pmd_t *pmd, pmd_t orig_pmd)
965{ 1029{
966 int ret = 0; 1030 int ret = 0;
967 struct page *page, *new_page; 1031 struct page *page = NULL, *new_page;
968 unsigned long haddr; 1032 unsigned long haddr;
969 unsigned long mmun_start; /* For mmu_notifiers */ 1033 unsigned long mmun_start; /* For mmu_notifiers */
970 unsigned long mmun_end; /* For mmu_notifiers */ 1034 unsigned long mmun_end; /* For mmu_notifiers */
971 1035
972 VM_BUG_ON(!vma->anon_vma); 1036 VM_BUG_ON(!vma->anon_vma);
1037 haddr = address & HPAGE_PMD_MASK;
1038 if (is_huge_zero_pmd(orig_pmd))
1039 goto alloc;
973 spin_lock(&mm->page_table_lock); 1040 spin_lock(&mm->page_table_lock);
974 if (unlikely(!pmd_same(*pmd, orig_pmd))) 1041 if (unlikely(!pmd_same(*pmd, orig_pmd)))
975 goto out_unlock; 1042 goto out_unlock;
976 1043
977 page = pmd_page(orig_pmd); 1044 page = pmd_page(orig_pmd);
978 VM_BUG_ON(!PageCompound(page) || !PageHead(page)); 1045 VM_BUG_ON(!PageCompound(page) || !PageHead(page));
979 haddr = address & HPAGE_PMD_MASK;
980 if (page_mapcount(page) == 1) { 1046 if (page_mapcount(page) == 1) {
981 pmd_t entry; 1047 pmd_t entry;
982 entry = pmd_mkyoung(orig_pmd); 1048 entry = pmd_mkyoung(orig_pmd);
@@ -988,7 +1054,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
988 } 1054 }
989 get_page(page); 1055 get_page(page);
990 spin_unlock(&mm->page_table_lock); 1056 spin_unlock(&mm->page_table_lock);
991 1057alloc:
992 if (transparent_hugepage_enabled(vma) && 1058 if (transparent_hugepage_enabled(vma) &&
993 !transparent_hugepage_debug_cow()) 1059 !transparent_hugepage_debug_cow())
994 new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), 1060 new_page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
@@ -998,24 +1064,34 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
998 1064
999 if (unlikely(!new_page)) { 1065 if (unlikely(!new_page)) {
1000 count_vm_event(THP_FAULT_FALLBACK); 1066 count_vm_event(THP_FAULT_FALLBACK);
1001 ret = do_huge_pmd_wp_page_fallback(mm, vma, address, 1067 if (is_huge_zero_pmd(orig_pmd)) {
1002 pmd, orig_pmd, page, haddr); 1068 ret = do_huge_pmd_wp_zero_page_fallback(mm, vma,
1003 if (ret & VM_FAULT_OOM) 1069 address, pmd, haddr);
1004 split_huge_page(page); 1070 } else {
1005 put_page(page); 1071 ret = do_huge_pmd_wp_page_fallback(mm, vma, address,
1072 pmd, orig_pmd, page, haddr);
1073 if (ret & VM_FAULT_OOM)
1074 split_huge_page(page);
1075 put_page(page);
1076 }
1006 goto out; 1077 goto out;
1007 } 1078 }
1008 count_vm_event(THP_FAULT_ALLOC); 1079 count_vm_event(THP_FAULT_ALLOC);
1009 1080
1010 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { 1081 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
1011 put_page(new_page); 1082 put_page(new_page);
1012 split_huge_page(page); 1083 if (page) {
1013 put_page(page); 1084 split_huge_page(page);
1085 put_page(page);
1086 }
1014 ret |= VM_FAULT_OOM; 1087 ret |= VM_FAULT_OOM;
1015 goto out; 1088 goto out;
1016 } 1089 }
1017 1090
1018 copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR); 1091 if (is_huge_zero_pmd(orig_pmd))
1092 clear_huge_page(new_page, haddr, HPAGE_PMD_NR);
1093 else
1094 copy_user_huge_page(new_page, page, haddr, vma, HPAGE_PMD_NR);
1019 __SetPageUptodate(new_page); 1095 __SetPageUptodate(new_page);
1020 1096
1021 mmun_start = haddr; 1097 mmun_start = haddr;
@@ -1023,7 +1099,8 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1023 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); 1099 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
1024 1100
1025 spin_lock(&mm->page_table_lock); 1101 spin_lock(&mm->page_table_lock);
1026 put_page(page); 1102 if (page)
1103 put_page(page);
1027 if (unlikely(!pmd_same(*pmd, orig_pmd))) { 1104 if (unlikely(!pmd_same(*pmd, orig_pmd))) {
1028 spin_unlock(&mm->page_table_lock); 1105 spin_unlock(&mm->page_table_lock);
1029 mem_cgroup_uncharge_page(new_page); 1106 mem_cgroup_uncharge_page(new_page);
@@ -1031,14 +1108,18 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
1031 goto out_mn; 1108 goto out_mn;
1032 } else { 1109 } else {
1033 pmd_t entry; 1110 pmd_t entry;
1034 VM_BUG_ON(!PageHead(page));
1035 entry = mk_huge_pmd(new_page, vma); 1111 entry = mk_huge_pmd(new_page, vma);
1036 pmdp_clear_flush(vma, haddr, pmd); 1112 pmdp_clear_flush(vma, haddr, pmd);
1037 page_add_new_anon_rmap(new_page, vma, haddr); 1113 page_add_new_anon_rmap(new_page, vma, haddr);
1038 set_pmd_at(mm, haddr, pmd, entry); 1114 set_pmd_at(mm, haddr, pmd, entry);
1039 update_mmu_cache_pmd(vma, address, pmd); 1115 update_mmu_cache_pmd(vma, address, pmd);
1040 page_remove_rmap(page); 1116 if (is_huge_zero_pmd(orig_pmd))
1041 put_page(page); 1117 add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR);
1118 else {
1119 VM_BUG_ON(!PageHead(page));
1120 page_remove_rmap(page);
1121 put_page(page);
1122 }
1042 ret |= VM_FAULT_WRITE; 1123 ret |= VM_FAULT_WRITE;
1043 } 1124 }
1044 spin_unlock(&mm->page_table_lock); 1125 spin_unlock(&mm->page_table_lock);
diff --git a/mm/memory.c b/mm/memory.c
index 765377385632..259b34fe1347 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -724,13 +724,6 @@ static inline int is_zero_pfn(unsigned long pfn)
724} 724}
725#endif 725#endif
726 726
727#ifndef my_zero_pfn
728static inline unsigned long my_zero_pfn(unsigned long addr)
729{
730 return zero_pfn;
731}
732#endif
733
734/* 727/*
735 * vm_normal_page -- This function gets the "struct page" associated with a pte. 728 * vm_normal_page -- This function gets the "struct page" associated with a pte.
736 * 729 *