diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-12 18:44:27 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-12 18:44:27 -0400 |
commit | ac4de9543aca59f2b763746647577302fbedd57e (patch) | |
tree | 40407750569ee030de56233c41c9a97f7e89cf67 /mm/huge_memory.c | |
parent | 26935fb06ee88f1188789807687c03041f3c70d9 (diff) | |
parent | de32a8177f64bc62e1b19c685dd391af664ab13f (diff) |
Merge branch 'akpm' (patches from Andrew Morton)
Merge more patches from Andrew Morton:
"The rest of MM. Plus one misc cleanup"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (35 commits)
mm/Kconfig: add MMU dependency for MIGRATION.
kernel: replace strict_strto*() with kstrto*()
mm, thp: count thp_fault_fallback anytime thp fault fails
thp: consolidate code between handle_mm_fault() and do_huge_pmd_anonymous_page()
thp: do_huge_pmd_anonymous_page() cleanup
thp: move maybe_pmd_mkwrite() out of mk_huge_pmd()
mm: cleanup add_to_page_cache_locked()
thp: account anon transparent huge pages into NR_ANON_PAGES
truncate: drop 'oldsize' truncate_pagecache() parameter
mm: make lru_add_drain_all() selective
memcg: document cgroup dirty/writeback memory statistics
memcg: add per cgroup writeback pages accounting
memcg: check for proper lock held in mem_cgroup_update_page_stat
memcg: remove MEMCG_NR_FILE_MAPPED
memcg: reduce function dereference
memcg: avoid overflow caused by PAGE_ALIGN
memcg: rename RESOURCE_MAX to RES_COUNTER_MAX
memcg: correct RESOURCE_MAX to ULLONG_MAX
mm: memcg: do not trap chargers with full callstack on OOM
mm: memcg: rework and document OOM waiting and wakeup
...
Diffstat (limited to 'mm/huge_memory.c')
-rw-r--r-- | mm/huge_memory.c | 129 |
1 files changed, 56 insertions, 73 deletions
diff --git a/mm/huge_memory.c b/mm/huge_memory.c index d66010e0049d..7489884682d8 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c | |||
@@ -695,11 +695,10 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma) | |||
695 | return pmd; | 695 | return pmd; |
696 | } | 696 | } |
697 | 697 | ||
698 | static inline pmd_t mk_huge_pmd(struct page *page, struct vm_area_struct *vma) | 698 | static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot) |
699 | { | 699 | { |
700 | pmd_t entry; | 700 | pmd_t entry; |
701 | entry = mk_pmd(page, vma->vm_page_prot); | 701 | entry = mk_pmd(page, prot); |
702 | entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); | ||
703 | entry = pmd_mkhuge(entry); | 702 | entry = pmd_mkhuge(entry); |
704 | return entry; | 703 | return entry; |
705 | } | 704 | } |
@@ -732,7 +731,8 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, | |||
732 | pte_free(mm, pgtable); | 731 | pte_free(mm, pgtable); |
733 | } else { | 732 | } else { |
734 | pmd_t entry; | 733 | pmd_t entry; |
735 | entry = mk_huge_pmd(page, vma); | 734 | entry = mk_huge_pmd(page, vma->vm_page_prot); |
735 | entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); | ||
736 | page_add_new_anon_rmap(page, vma, haddr); | 736 | page_add_new_anon_rmap(page, vma, haddr); |
737 | pgtable_trans_huge_deposit(mm, pmd, pgtable); | 737 | pgtable_trans_huge_deposit(mm, pmd, pgtable); |
738 | set_pmd_at(mm, haddr, pmd, entry); | 738 | set_pmd_at(mm, haddr, pmd, entry); |
@@ -788,77 +788,57 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
788 | { | 788 | { |
789 | struct page *page; | 789 | struct page *page; |
790 | unsigned long haddr = address & HPAGE_PMD_MASK; | 790 | unsigned long haddr = address & HPAGE_PMD_MASK; |
791 | pte_t *pte; | ||
792 | 791 | ||
793 | if (haddr >= vma->vm_start && haddr + HPAGE_PMD_SIZE <= vma->vm_end) { | 792 | if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end) |
794 | if (unlikely(anon_vma_prepare(vma))) | 793 | return VM_FAULT_FALLBACK; |
795 | return VM_FAULT_OOM; | 794 | if (unlikely(anon_vma_prepare(vma))) |
796 | if (unlikely(khugepaged_enter(vma))) | 795 | return VM_FAULT_OOM; |
796 | if (unlikely(khugepaged_enter(vma))) | ||
797 | return VM_FAULT_OOM; | ||
798 | if (!(flags & FAULT_FLAG_WRITE) && | ||
799 | transparent_hugepage_use_zero_page()) { | ||
800 | pgtable_t pgtable; | ||
801 | struct page *zero_page; | ||
802 | bool set; | ||
803 | pgtable = pte_alloc_one(mm, haddr); | ||
804 | if (unlikely(!pgtable)) | ||
797 | return VM_FAULT_OOM; | 805 | return VM_FAULT_OOM; |
798 | if (!(flags & FAULT_FLAG_WRITE) && | 806 | zero_page = get_huge_zero_page(); |
799 | transparent_hugepage_use_zero_page()) { | 807 | if (unlikely(!zero_page)) { |
800 | pgtable_t pgtable; | 808 | pte_free(mm, pgtable); |
801 | struct page *zero_page; | ||
802 | bool set; | ||
803 | pgtable = pte_alloc_one(mm, haddr); | ||
804 | if (unlikely(!pgtable)) | ||
805 | return VM_FAULT_OOM; | ||
806 | zero_page = get_huge_zero_page(); | ||
807 | if (unlikely(!zero_page)) { | ||
808 | pte_free(mm, pgtable); | ||
809 | count_vm_event(THP_FAULT_FALLBACK); | ||
810 | goto out; | ||
811 | } | ||
812 | spin_lock(&mm->page_table_lock); | ||
813 | set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd, | ||
814 | zero_page); | ||
815 | spin_unlock(&mm->page_table_lock); | ||
816 | if (!set) { | ||
817 | pte_free(mm, pgtable); | ||
818 | put_huge_zero_page(); | ||
819 | } | ||
820 | return 0; | ||
821 | } | ||
822 | page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), | ||
823 | vma, haddr, numa_node_id(), 0); | ||
824 | if (unlikely(!page)) { | ||
825 | count_vm_event(THP_FAULT_FALLBACK); | 809 | count_vm_event(THP_FAULT_FALLBACK); |
826 | goto out; | 810 | return VM_FAULT_FALLBACK; |
827 | } | ||
828 | count_vm_event(THP_FAULT_ALLOC); | ||
829 | if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { | ||
830 | put_page(page); | ||
831 | goto out; | ||
832 | } | 811 | } |
833 | if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, | 812 | spin_lock(&mm->page_table_lock); |
834 | page))) { | 813 | set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd, |
835 | mem_cgroup_uncharge_page(page); | 814 | zero_page); |
836 | put_page(page); | 815 | spin_unlock(&mm->page_table_lock); |
837 | goto out; | 816 | if (!set) { |
817 | pte_free(mm, pgtable); | ||
818 | put_huge_zero_page(); | ||
838 | } | 819 | } |
839 | |||
840 | return 0; | 820 | return 0; |
841 | } | 821 | } |
842 | out: | 822 | page = alloc_hugepage_vma(transparent_hugepage_defrag(vma), |
843 | /* | 823 | vma, haddr, numa_node_id(), 0); |
844 | * Use __pte_alloc instead of pte_alloc_map, because we can't | 824 | if (unlikely(!page)) { |
845 | * run pte_offset_map on the pmd, if an huge pmd could | 825 | count_vm_event(THP_FAULT_FALLBACK); |
846 | * materialize from under us from a different thread. | 826 | return VM_FAULT_FALLBACK; |
847 | */ | 827 | } |
848 | if (unlikely(pmd_none(*pmd)) && | 828 | if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) { |
849 | unlikely(__pte_alloc(mm, vma, pmd, address))) | 829 | put_page(page); |
850 | return VM_FAULT_OOM; | 830 | count_vm_event(THP_FAULT_FALLBACK); |
851 | /* if an huge pmd materialized from under us just retry later */ | 831 | return VM_FAULT_FALLBACK; |
852 | if (unlikely(pmd_trans_huge(*pmd))) | 832 | } |
853 | return 0; | 833 | if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) { |
854 | /* | 834 | mem_cgroup_uncharge_page(page); |
855 | * A regular pmd is established and it can't morph into a huge pmd | 835 | put_page(page); |
856 | * from under us anymore at this point because we hold the mmap_sem | 836 | count_vm_event(THP_FAULT_FALLBACK); |
857 | * read mode and khugepaged takes it in write mode. So now it's | 837 | return VM_FAULT_FALLBACK; |
858 | * safe to run pte_offset_map(). | 838 | } |
859 | */ | 839 | |
860 | pte = pte_offset_map(pmd, address); | 840 | count_vm_event(THP_FAULT_ALLOC); |
861 | return handle_pte_fault(mm, vma, address, pte, pmd, flags); | 841 | return 0; |
862 | } | 842 | } |
863 | 843 | ||
864 | int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, | 844 | int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, |
@@ -1170,7 +1150,6 @@ alloc: | |||
1170 | new_page = NULL; | 1150 | new_page = NULL; |
1171 | 1151 | ||
1172 | if (unlikely(!new_page)) { | 1152 | if (unlikely(!new_page)) { |
1173 | count_vm_event(THP_FAULT_FALLBACK); | ||
1174 | if (is_huge_zero_pmd(orig_pmd)) { | 1153 | if (is_huge_zero_pmd(orig_pmd)) { |
1175 | ret = do_huge_pmd_wp_zero_page_fallback(mm, vma, | 1154 | ret = do_huge_pmd_wp_zero_page_fallback(mm, vma, |
1176 | address, pmd, orig_pmd, haddr); | 1155 | address, pmd, orig_pmd, haddr); |
@@ -1181,9 +1160,9 @@ alloc: | |||
1181 | split_huge_page(page); | 1160 | split_huge_page(page); |
1182 | put_page(page); | 1161 | put_page(page); |
1183 | } | 1162 | } |
1163 | count_vm_event(THP_FAULT_FALLBACK); | ||
1184 | goto out; | 1164 | goto out; |
1185 | } | 1165 | } |
1186 | count_vm_event(THP_FAULT_ALLOC); | ||
1187 | 1166 | ||
1188 | if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { | 1167 | if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) { |
1189 | put_page(new_page); | 1168 | put_page(new_page); |
@@ -1191,10 +1170,13 @@ alloc: | |||
1191 | split_huge_page(page); | 1170 | split_huge_page(page); |
1192 | put_page(page); | 1171 | put_page(page); |
1193 | } | 1172 | } |
1173 | count_vm_event(THP_FAULT_FALLBACK); | ||
1194 | ret |= VM_FAULT_OOM; | 1174 | ret |= VM_FAULT_OOM; |
1195 | goto out; | 1175 | goto out; |
1196 | } | 1176 | } |
1197 | 1177 | ||
1178 | count_vm_event(THP_FAULT_ALLOC); | ||
1179 | |||
1198 | if (is_huge_zero_pmd(orig_pmd)) | 1180 | if (is_huge_zero_pmd(orig_pmd)) |
1199 | clear_huge_page(new_page, haddr, HPAGE_PMD_NR); | 1181 | clear_huge_page(new_page, haddr, HPAGE_PMD_NR); |
1200 | else | 1182 | else |
@@ -1215,7 +1197,8 @@ alloc: | |||
1215 | goto out_mn; | 1197 | goto out_mn; |
1216 | } else { | 1198 | } else { |
1217 | pmd_t entry; | 1199 | pmd_t entry; |
1218 | entry = mk_huge_pmd(new_page, vma); | 1200 | entry = mk_huge_pmd(new_page, vma->vm_page_prot); |
1201 | entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); | ||
1219 | pmdp_clear_flush(vma, haddr, pmd); | 1202 | pmdp_clear_flush(vma, haddr, pmd); |
1220 | page_add_new_anon_rmap(new_page, vma, haddr); | 1203 | page_add_new_anon_rmap(new_page, vma, haddr); |
1221 | set_pmd_at(mm, haddr, pmd, entry); | 1204 | set_pmd_at(mm, haddr, pmd, entry); |
@@ -1666,7 +1649,6 @@ static void __split_huge_page_refcount(struct page *page, | |||
1666 | BUG_ON(atomic_read(&page->_count) <= 0); | 1649 | BUG_ON(atomic_read(&page->_count) <= 0); |
1667 | 1650 | ||
1668 | __mod_zone_page_state(zone, NR_ANON_TRANSPARENT_HUGEPAGES, -1); | 1651 | __mod_zone_page_state(zone, NR_ANON_TRANSPARENT_HUGEPAGES, -1); |
1669 | __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR); | ||
1670 | 1652 | ||
1671 | ClearPageCompound(page); | 1653 | ClearPageCompound(page); |
1672 | compound_unlock(page); | 1654 | compound_unlock(page); |
@@ -2364,7 +2346,8 @@ static void collapse_huge_page(struct mm_struct *mm, | |||
2364 | __SetPageUptodate(new_page); | 2346 | __SetPageUptodate(new_page); |
2365 | pgtable = pmd_pgtable(_pmd); | 2347 | pgtable = pmd_pgtable(_pmd); |
2366 | 2348 | ||
2367 | _pmd = mk_huge_pmd(new_page, vma); | 2349 | _pmd = mk_huge_pmd(new_page, vma->vm_page_prot); |
2350 | _pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma); | ||
2368 | 2351 | ||
2369 | /* | 2352 | /* |
2370 | * spin_lock() below is not the equivalent of smp_wmb(), so | 2353 | * spin_lock() below is not the equivalent of smp_wmb(), so |