aboutsummaryrefslogtreecommitdiffstats
path: root/mm/huge_memory.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-09-12 18:44:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-12 18:44:27 -0400
commitac4de9543aca59f2b763746647577302fbedd57e (patch)
tree40407750569ee030de56233c41c9a97f7e89cf67 /mm/huge_memory.c
parent26935fb06ee88f1188789807687c03041f3c70d9 (diff)
parentde32a8177f64bc62e1b19c685dd391af664ab13f (diff)
Merge branch 'akpm' (patches from Andrew Morton)
Merge more patches from Andrew Morton: "The rest of MM. Plus one misc cleanup" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (35 commits) mm/Kconfig: add MMU dependency for MIGRATION. kernel: replace strict_strto*() with kstrto*() mm, thp: count thp_fault_fallback anytime thp fault fails thp: consolidate code between handle_mm_fault() and do_huge_pmd_anonymous_page() thp: do_huge_pmd_anonymous_page() cleanup thp: move maybe_pmd_mkwrite() out of mk_huge_pmd() mm: cleanup add_to_page_cache_locked() thp: account anon transparent huge pages into NR_ANON_PAGES truncate: drop 'oldsize' truncate_pagecache() parameter mm: make lru_add_drain_all() selective memcg: document cgroup dirty/writeback memory statistics memcg: add per cgroup writeback pages accounting memcg: check for proper lock held in mem_cgroup_update_page_stat memcg: remove MEMCG_NR_FILE_MAPPED memcg: reduce function dereference memcg: avoid overflow caused by PAGE_ALIGN memcg: rename RESOURCE_MAX to RES_COUNTER_MAX memcg: correct RESOURCE_MAX to ULLONG_MAX mm: memcg: do not trap chargers with full callstack on OOM mm: memcg: rework and document OOM waiting and wakeup ...
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r--mm/huge_memory.c129
1 files changed, 56 insertions, 73 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index d66010e0049d..7489884682d8 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -695,11 +695,10 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
695 return pmd; 695 return pmd;
696} 696}
697 697
698static inline pmd_t mk_huge_pmd(struct page *page, struct vm_area_struct *vma) 698static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot)
699{ 699{
700 pmd_t entry; 700 pmd_t entry;
701 entry = mk_pmd(page, vma->vm_page_prot); 701 entry = mk_pmd(page, prot);
702 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
703 entry = pmd_mkhuge(entry); 702 entry = pmd_mkhuge(entry);
704 return entry; 703 return entry;
705} 704}
@@ -732,7 +731,8 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
732 pte_free(mm, pgtable); 731 pte_free(mm, pgtable);
733 } else { 732 } else {
734 pmd_t entry; 733 pmd_t entry;
735 entry = mk_huge_pmd(page, vma); 734 entry = mk_huge_pmd(page, vma->vm_page_prot);
735 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
736 page_add_new_anon_rmap(page, vma, haddr); 736 page_add_new_anon_rmap(page, vma, haddr);
737 pgtable_trans_huge_deposit(mm, pmd, pgtable); 737 pgtable_trans_huge_deposit(mm, pmd, pgtable);
738 set_pmd_at(mm, haddr, pmd, entry); 738 set_pmd_at(mm, haddr, pmd, entry);
@@ -788,77 +788,57 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
788{ 788{
789 struct page *page; 789 struct page *page;
790 unsigned long haddr = address & HPAGE_PMD_MASK; 790 unsigned long haddr = address & HPAGE_PMD_MASK;
791 pte_t *pte;
792 791
793 if (haddr >= vma->vm_start && haddr + HPAGE_PMD_SIZE <= vma->vm_end) { 792 if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
794 if (unlikely(anon_vma_prepare(vma))) 793 return VM_FAULT_FALLBACK;
795 return VM_FAULT_OOM; 794 if (unlikely(anon_vma_prepare(vma)))
796 if (unlikely(khugepaged_enter(vma))) 795 return VM_FAULT_OOM;
796 if (unlikely(khugepaged_enter(vma)))
797 return VM_FAULT_OOM;
798 if (!(flags & FAULT_FLAG_WRITE) &&
799 transparent_hugepage_use_zero_page()) {
800 pgtable_t pgtable;
801 struct page *zero_page;
802 bool set;
803 pgtable = pte_alloc_one(mm, haddr);
804 if (unlikely(!pgtable))
797 return VM_FAULT_OOM; 805 return VM_FAULT_OOM;
798 if (!(flags & FAULT_FLAG_WRITE) && 806 zero_page = get_huge_zero_page();
799 transparent_hugepage_use_zero_page()) { 807 if (unlikely(!zero_page)) {
800 pgtable_t pgtable; 808 pte_free(mm, pgtable);
801 struct page *zero_page;
802 bool set;
803 pgtable = pte_alloc_one(mm, haddr);
804 if (unlikely(!pgtable))
805 return VM_FAULT_OOM;
806 zero_page = get_huge_zero_page();
807 if (unlikely(!zero_page)) {
808 pte_free(mm, pgtable);
809 count_vm_event(THP_FAULT_FALLBACK);
810 goto out;
811 }
812 spin_lock(&mm->page_table_lock);
813 set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd,
814 zero_page);
815 spin_unlock(&mm->page_table_lock);
816 if (!set) {
817 pte_free(mm, pgtable);
818 put_huge_zero_page();
819 }
820 return 0;
821 }
822 page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
823 vma, haddr, numa_node_id(), 0);
824 if (unlikely(!page)) {
825 count_vm_event(THP_FAULT_FALLBACK); 809 count_vm_event(THP_FAULT_FALLBACK);
826 goto out; 810 return VM_FAULT_FALLBACK;
827 }
828 count_vm_event(THP_FAULT_ALLOC);
829 if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
830 put_page(page);
831 goto out;
832 } 811 }
833 if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, 812 spin_lock(&mm->page_table_lock);
834 page))) { 813 set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd,
835 mem_cgroup_uncharge_page(page); 814 zero_page);
836 put_page(page); 815 spin_unlock(&mm->page_table_lock);
837 goto out; 816 if (!set) {
817 pte_free(mm, pgtable);
818 put_huge_zero_page();
838 } 819 }
839
840 return 0; 820 return 0;
841 } 821 }
842out: 822 page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
843 /* 823 vma, haddr, numa_node_id(), 0);
844 * Use __pte_alloc instead of pte_alloc_map, because we can't 824 if (unlikely(!page)) {
845 * run pte_offset_map on the pmd, if an huge pmd could 825 count_vm_event(THP_FAULT_FALLBACK);
846 * materialize from under us from a different thread. 826 return VM_FAULT_FALLBACK;
847 */ 827 }
848 if (unlikely(pmd_none(*pmd)) && 828 if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
849 unlikely(__pte_alloc(mm, vma, pmd, address))) 829 put_page(page);
850 return VM_FAULT_OOM; 830 count_vm_event(THP_FAULT_FALLBACK);
851 /* if an huge pmd materialized from under us just retry later */ 831 return VM_FAULT_FALLBACK;
852 if (unlikely(pmd_trans_huge(*pmd))) 832 }
853 return 0; 833 if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) {
854 /* 834 mem_cgroup_uncharge_page(page);
855 * A regular pmd is established and it can't morph into a huge pmd 835 put_page(page);
856 * from under us anymore at this point because we hold the mmap_sem 836 count_vm_event(THP_FAULT_FALLBACK);
857 * read mode and khugepaged takes it in write mode. So now it's 837 return VM_FAULT_FALLBACK;
858 * safe to run pte_offset_map(). 838 }
859 */ 839
860 pte = pte_offset_map(pmd, address); 840 count_vm_event(THP_FAULT_ALLOC);
861 return handle_pte_fault(mm, vma, address, pte, pmd, flags); 841 return 0;
862} 842}
863 843
864int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, 844int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@@ -1170,7 +1150,6 @@ alloc:
1170 new_page = NULL; 1150 new_page = NULL;
1171 1151
1172 if (unlikely(!new_page)) { 1152 if (unlikely(!new_page)) {
1173 count_vm_event(THP_FAULT_FALLBACK);
1174 if (is_huge_zero_pmd(orig_pmd)) { 1153 if (is_huge_zero_pmd(orig_pmd)) {
1175 ret = do_huge_pmd_wp_zero_page_fallback(mm, vma, 1154 ret = do_huge_pmd_wp_zero_page_fallback(mm, vma,
1176 address, pmd, orig_pmd, haddr); 1155 address, pmd, orig_pmd, haddr);
@@ -1181,9 +1160,9 @@ alloc:
1181 split_huge_page(page); 1160 split_huge_page(page);
1182 put_page(page); 1161 put_page(page);
1183 } 1162 }
1163 count_vm_event(THP_FAULT_FALLBACK);
1184 goto out; 1164 goto out;
1185 } 1165 }
1186 count_vm_event(THP_FAULT_ALLOC);
1187 1166
1188 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { 1167 if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
1189 put_page(new_page); 1168 put_page(new_page);
@@ -1191,10 +1170,13 @@ alloc:
1191 split_huge_page(page); 1170 split_huge_page(page);
1192 put_page(page); 1171 put_page(page);
1193 } 1172 }
1173 count_vm_event(THP_FAULT_FALLBACK);
1194 ret |= VM_FAULT_OOM; 1174 ret |= VM_FAULT_OOM;
1195 goto out; 1175 goto out;
1196 } 1176 }
1197 1177
1178 count_vm_event(THP_FAULT_ALLOC);
1179
1198 if (is_huge_zero_pmd(orig_pmd)) 1180 if (is_huge_zero_pmd(orig_pmd))
1199 clear_huge_page(new_page, haddr, HPAGE_PMD_NR); 1181 clear_huge_page(new_page, haddr, HPAGE_PMD_NR);
1200 else 1182 else
@@ -1215,7 +1197,8 @@ alloc:
1215 goto out_mn; 1197 goto out_mn;
1216 } else { 1198 } else {
1217 pmd_t entry; 1199 pmd_t entry;
1218 entry = mk_huge_pmd(new_page, vma); 1200 entry = mk_huge_pmd(new_page, vma->vm_page_prot);
1201 entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
1219 pmdp_clear_flush(vma, haddr, pmd); 1202 pmdp_clear_flush(vma, haddr, pmd);
1220 page_add_new_anon_rmap(new_page, vma, haddr); 1203 page_add_new_anon_rmap(new_page, vma, haddr);
1221 set_pmd_at(mm, haddr, pmd, entry); 1204 set_pmd_at(mm, haddr, pmd, entry);
@@ -1666,7 +1649,6 @@ static void __split_huge_page_refcount(struct page *page,
1666 BUG_ON(atomic_read(&page->_count) <= 0); 1649 BUG_ON(atomic_read(&page->_count) <= 0);
1667 1650
1668 __mod_zone_page_state(zone, NR_ANON_TRANSPARENT_HUGEPAGES, -1); 1651 __mod_zone_page_state(zone, NR_ANON_TRANSPARENT_HUGEPAGES, -1);
1669 __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR);
1670 1652
1671 ClearPageCompound(page); 1653 ClearPageCompound(page);
1672 compound_unlock(page); 1654 compound_unlock(page);
@@ -2364,7 +2346,8 @@ static void collapse_huge_page(struct mm_struct *mm,
2364 __SetPageUptodate(new_page); 2346 __SetPageUptodate(new_page);
2365 pgtable = pmd_pgtable(_pmd); 2347 pgtable = pmd_pgtable(_pmd);
2366 2348
2367 _pmd = mk_huge_pmd(new_page, vma); 2349 _pmd = mk_huge_pmd(new_page, vma->vm_page_prot);
2350 _pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma);
2368 2351
2369 /* 2352 /*
2370 * spin_lock() below is not the equivalent of smp_wmb(), so 2353 * spin_lock() below is not the equivalent of smp_wmb(), so