diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 216 |
1 files changed, 170 insertions, 46 deletions
diff --git a/mm/memory.c b/mm/memory.c index 6ab19dd4a199..833952d8b74d 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -56,6 +56,7 @@ | |||
56 | #include <linux/kallsyms.h> | 56 | #include <linux/kallsyms.h> |
57 | #include <linux/swapops.h> | 57 | #include <linux/swapops.h> |
58 | #include <linux/elf.h> | 58 | #include <linux/elf.h> |
59 | #include <linux/gfp.h> | ||
59 | 60 | ||
60 | #include <asm/io.h> | 61 | #include <asm/io.h> |
61 | #include <asm/pgalloc.h> | 62 | #include <asm/pgalloc.h> |
@@ -121,6 +122,77 @@ static int __init init_zero_pfn(void) | |||
121 | } | 122 | } |
122 | core_initcall(init_zero_pfn); | 123 | core_initcall(init_zero_pfn); |
123 | 124 | ||
125 | |||
126 | #if defined(SPLIT_RSS_COUNTING) | ||
127 | |||
128 | static void __sync_task_rss_stat(struct task_struct *task, struct mm_struct *mm) | ||
129 | { | ||
130 | int i; | ||
131 | |||
132 | for (i = 0; i < NR_MM_COUNTERS; i++) { | ||
133 | if (task->rss_stat.count[i]) { | ||
134 | add_mm_counter(mm, i, task->rss_stat.count[i]); | ||
135 | task->rss_stat.count[i] = 0; | ||
136 | } | ||
137 | } | ||
138 | task->rss_stat.events = 0; | ||
139 | } | ||
140 | |||
141 | static void add_mm_counter_fast(struct mm_struct *mm, int member, int val) | ||
142 | { | ||
143 | struct task_struct *task = current; | ||
144 | |||
145 | if (likely(task->mm == mm)) | ||
146 | task->rss_stat.count[member] += val; | ||
147 | else | ||
148 | add_mm_counter(mm, member, val); | ||
149 | } | ||
150 | #define inc_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, 1) | ||
151 | #define dec_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, -1) | ||
152 | |||
153 | /* sync counter once per 64 page faults */ | ||
154 | #define TASK_RSS_EVENTS_THRESH (64) | ||
155 | static void check_sync_rss_stat(struct task_struct *task) | ||
156 | { | ||
157 | if (unlikely(task != current)) | ||
158 | return; | ||
159 | if (unlikely(task->rss_stat.events++ > TASK_RSS_EVENTS_THRESH)) | ||
160 | __sync_task_rss_stat(task, task->mm); | ||
161 | } | ||
162 | |||
163 | unsigned long get_mm_counter(struct mm_struct *mm, int member) | ||
164 | { | ||
165 | long val = 0; | ||
166 | |||
167 | /* | ||
168 | * Don't use task->mm here...for avoiding to use task_get_mm().. | ||
169 | * The caller must guarantee task->mm is not invalid. | ||
170 | */ | ||
171 | val = atomic_long_read(&mm->rss_stat.count[member]); | ||
172 | /* | ||
173 | * counter is updated in asynchronous manner and may go to minus. | ||
174 | * But it's never be expected number for users. | ||
175 | */ | ||
176 | if (val < 0) | ||
177 | return 0; | ||
178 | return (unsigned long)val; | ||
179 | } | ||
180 | |||
181 | void sync_mm_rss(struct task_struct *task, struct mm_struct *mm) | ||
182 | { | ||
183 | __sync_task_rss_stat(task, mm); | ||
184 | } | ||
185 | #else | ||
186 | |||
187 | #define inc_mm_counter_fast(mm, member) inc_mm_counter(mm, member) | ||
188 | #define dec_mm_counter_fast(mm, member) dec_mm_counter(mm, member) | ||
189 | |||
190 | static void check_sync_rss_stat(struct task_struct *task) | ||
191 | { | ||
192 | } | ||
193 | |||
194 | #endif | ||
195 | |||
124 | /* | 196 | /* |
125 | * If a p?d_bad entry is found while walking page tables, report | 197 | * If a p?d_bad entry is found while walking page tables, report |
126 | * the error, before resetting entry to p?d_none. Usually (but | 198 | * the error, before resetting entry to p?d_none. Usually (but |
@@ -300,7 +372,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
300 | * Hide vma from rmap and truncate_pagecache before freeing | 372 | * Hide vma from rmap and truncate_pagecache before freeing |
301 | * pgtables | 373 | * pgtables |
302 | */ | 374 | */ |
303 | anon_vma_unlink(vma); | 375 | unlink_anon_vmas(vma); |
304 | unlink_file_vma(vma); | 376 | unlink_file_vma(vma); |
305 | 377 | ||
306 | if (is_vm_hugetlb_page(vma)) { | 378 | if (is_vm_hugetlb_page(vma)) { |
@@ -314,7 +386,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
314 | && !is_vm_hugetlb_page(next)) { | 386 | && !is_vm_hugetlb_page(next)) { |
315 | vma = next; | 387 | vma = next; |
316 | next = vma->vm_next; | 388 | next = vma->vm_next; |
317 | anon_vma_unlink(vma); | 389 | unlink_anon_vmas(vma); |
318 | unlink_file_vma(vma); | 390 | unlink_file_vma(vma); |
319 | } | 391 | } |
320 | free_pgd_range(tlb, addr, vma->vm_end, | 392 | free_pgd_range(tlb, addr, vma->vm_end, |
@@ -376,12 +448,20 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address) | |||
376 | return 0; | 448 | return 0; |
377 | } | 449 | } |
378 | 450 | ||
379 | static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss) | 451 | static inline void init_rss_vec(int *rss) |
380 | { | 452 | { |
381 | if (file_rss) | 453 | memset(rss, 0, sizeof(int) * NR_MM_COUNTERS); |
382 | add_mm_counter(mm, file_rss, file_rss); | 454 | } |
383 | if (anon_rss) | 455 | |
384 | add_mm_counter(mm, anon_rss, anon_rss); | 456 | static inline void add_mm_rss_vec(struct mm_struct *mm, int *rss) |
457 | { | ||
458 | int i; | ||
459 | |||
460 | if (current->mm == mm) | ||
461 | sync_mm_rss(current, mm); | ||
462 | for (i = 0; i < NR_MM_COUNTERS; i++) | ||
463 | if (rss[i]) | ||
464 | add_mm_counter(mm, i, rss[i]); | ||
385 | } | 465 | } |
386 | 466 | ||
387 | /* | 467 | /* |
@@ -430,12 +510,8 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, | |||
430 | "BUG: Bad page map in process %s pte:%08llx pmd:%08llx\n", | 510 | "BUG: Bad page map in process %s pte:%08llx pmd:%08llx\n", |
431 | current->comm, | 511 | current->comm, |
432 | (long long)pte_val(pte), (long long)pmd_val(*pmd)); | 512 | (long long)pte_val(pte), (long long)pmd_val(*pmd)); |
433 | if (page) { | 513 | if (page) |
434 | printk(KERN_ALERT | 514 | dump_page(page); |
435 | "page:%p flags:%p count:%d mapcount:%d mapping:%p index:%lx\n", | ||
436 | page, (void *)page->flags, page_count(page), | ||
437 | page_mapcount(page), page->mapping, page->index); | ||
438 | } | ||
439 | printk(KERN_ALERT | 515 | printk(KERN_ALERT |
440 | "addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n", | 516 | "addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n", |
441 | (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index); | 517 | (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index); |
@@ -572,7 +648,7 @@ out: | |||
572 | * covered by this vma. | 648 | * covered by this vma. |
573 | */ | 649 | */ |
574 | 650 | ||
575 | static inline void | 651 | static inline unsigned long |
576 | copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | 652 | copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, |
577 | pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, | 653 | pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma, |
578 | unsigned long addr, int *rss) | 654 | unsigned long addr, int *rss) |
@@ -586,7 +662,9 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
586 | if (!pte_file(pte)) { | 662 | if (!pte_file(pte)) { |
587 | swp_entry_t entry = pte_to_swp_entry(pte); | 663 | swp_entry_t entry = pte_to_swp_entry(pte); |
588 | 664 | ||
589 | swap_duplicate(entry); | 665 | if (swap_duplicate(entry) < 0) |
666 | return entry.val; | ||
667 | |||
590 | /* make sure dst_mm is on swapoff's mmlist. */ | 668 | /* make sure dst_mm is on swapoff's mmlist. */ |
591 | if (unlikely(list_empty(&dst_mm->mmlist))) { | 669 | if (unlikely(list_empty(&dst_mm->mmlist))) { |
592 | spin_lock(&mmlist_lock); | 670 | spin_lock(&mmlist_lock); |
@@ -595,7 +673,9 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
595 | &src_mm->mmlist); | 673 | &src_mm->mmlist); |
596 | spin_unlock(&mmlist_lock); | 674 | spin_unlock(&mmlist_lock); |
597 | } | 675 | } |
598 | if (is_write_migration_entry(entry) && | 676 | if (likely(!non_swap_entry(entry))) |
677 | rss[MM_SWAPENTS]++; | ||
678 | else if (is_write_migration_entry(entry) && | ||
599 | is_cow_mapping(vm_flags)) { | 679 | is_cow_mapping(vm_flags)) { |
600 | /* | 680 | /* |
601 | * COW mappings require pages in both parent | 681 | * COW mappings require pages in both parent |
@@ -630,11 +710,15 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
630 | if (page) { | 710 | if (page) { |
631 | get_page(page); | 711 | get_page(page); |
632 | page_dup_rmap(page); | 712 | page_dup_rmap(page); |
633 | rss[PageAnon(page)]++; | 713 | if (PageAnon(page)) |
714 | rss[MM_ANONPAGES]++; | ||
715 | else | ||
716 | rss[MM_FILEPAGES]++; | ||
634 | } | 717 | } |
635 | 718 | ||
636 | out_set_pte: | 719 | out_set_pte: |
637 | set_pte_at(dst_mm, addr, dst_pte, pte); | 720 | set_pte_at(dst_mm, addr, dst_pte, pte); |
721 | return 0; | ||
638 | } | 722 | } |
639 | 723 | ||
640 | static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, | 724 | static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, |
@@ -645,10 +729,12 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
645 | pte_t *src_pte, *dst_pte; | 729 | pte_t *src_pte, *dst_pte; |
646 | spinlock_t *src_ptl, *dst_ptl; | 730 | spinlock_t *src_ptl, *dst_ptl; |
647 | int progress = 0; | 731 | int progress = 0; |
648 | int rss[2]; | 732 | int rss[NR_MM_COUNTERS]; |
733 | swp_entry_t entry = (swp_entry_t){0}; | ||
649 | 734 | ||
650 | again: | 735 | again: |
651 | rss[1] = rss[0] = 0; | 736 | init_rss_vec(rss); |
737 | |||
652 | dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl); | 738 | dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl); |
653 | if (!dst_pte) | 739 | if (!dst_pte) |
654 | return -ENOMEM; | 740 | return -ENOMEM; |
@@ -674,16 +760,25 @@ again: | |||
674 | progress++; | 760 | progress++; |
675 | continue; | 761 | continue; |
676 | } | 762 | } |
677 | copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss); | 763 | entry.val = copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, |
764 | vma, addr, rss); | ||
765 | if (entry.val) | ||
766 | break; | ||
678 | progress += 8; | 767 | progress += 8; |
679 | } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); | 768 | } while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end); |
680 | 769 | ||
681 | arch_leave_lazy_mmu_mode(); | 770 | arch_leave_lazy_mmu_mode(); |
682 | spin_unlock(src_ptl); | 771 | spin_unlock(src_ptl); |
683 | pte_unmap_nested(orig_src_pte); | 772 | pte_unmap_nested(orig_src_pte); |
684 | add_mm_rss(dst_mm, rss[0], rss[1]); | 773 | add_mm_rss_vec(dst_mm, rss); |
685 | pte_unmap_unlock(orig_dst_pte, dst_ptl); | 774 | pte_unmap_unlock(orig_dst_pte, dst_ptl); |
686 | cond_resched(); | 775 | cond_resched(); |
776 | |||
777 | if (entry.val) { | ||
778 | if (add_swap_count_continuation(entry, GFP_KERNEL) < 0) | ||
779 | return -ENOMEM; | ||
780 | progress = 0; | ||
781 | } | ||
687 | if (addr != end) | 782 | if (addr != end) |
688 | goto again; | 783 | goto again; |
689 | return 0; | 784 | return 0; |
@@ -803,8 +898,9 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, | |||
803 | struct mm_struct *mm = tlb->mm; | 898 | struct mm_struct *mm = tlb->mm; |
804 | pte_t *pte; | 899 | pte_t *pte; |
805 | spinlock_t *ptl; | 900 | spinlock_t *ptl; |
806 | int file_rss = 0; | 901 | int rss[NR_MM_COUNTERS]; |
807 | int anon_rss = 0; | 902 | |
903 | init_rss_vec(rss); | ||
808 | 904 | ||
809 | pte = pte_offset_map_lock(mm, pmd, addr, &ptl); | 905 | pte = pte_offset_map_lock(mm, pmd, addr, &ptl); |
810 | arch_enter_lazy_mmu_mode(); | 906 | arch_enter_lazy_mmu_mode(); |
@@ -850,14 +946,14 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, | |||
850 | set_pte_at(mm, addr, pte, | 946 | set_pte_at(mm, addr, pte, |
851 | pgoff_to_pte(page->index)); | 947 | pgoff_to_pte(page->index)); |
852 | if (PageAnon(page)) | 948 | if (PageAnon(page)) |
853 | anon_rss--; | 949 | rss[MM_ANONPAGES]--; |
854 | else { | 950 | else { |
855 | if (pte_dirty(ptent)) | 951 | if (pte_dirty(ptent)) |
856 | set_page_dirty(page); | 952 | set_page_dirty(page); |
857 | if (pte_young(ptent) && | 953 | if (pte_young(ptent) && |
858 | likely(!VM_SequentialReadHint(vma))) | 954 | likely(!VM_SequentialReadHint(vma))) |
859 | mark_page_accessed(page); | 955 | mark_page_accessed(page); |
860 | file_rss--; | 956 | rss[MM_FILEPAGES]--; |
861 | } | 957 | } |
862 | page_remove_rmap(page); | 958 | page_remove_rmap(page); |
863 | if (unlikely(page_mapcount(page) < 0)) | 959 | if (unlikely(page_mapcount(page) < 0)) |
@@ -874,13 +970,18 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, | |||
874 | if (pte_file(ptent)) { | 970 | if (pte_file(ptent)) { |
875 | if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) | 971 | if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) |
876 | print_bad_pte(vma, addr, ptent, NULL); | 972 | print_bad_pte(vma, addr, ptent, NULL); |
877 | } else if | 973 | } else { |
878 | (unlikely(!free_swap_and_cache(pte_to_swp_entry(ptent)))) | 974 | swp_entry_t entry = pte_to_swp_entry(ptent); |
879 | print_bad_pte(vma, addr, ptent, NULL); | 975 | |
976 | if (!non_swap_entry(entry)) | ||
977 | rss[MM_SWAPENTS]--; | ||
978 | if (unlikely(!free_swap_and_cache(entry))) | ||
979 | print_bad_pte(vma, addr, ptent, NULL); | ||
980 | } | ||
880 | pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); | 981 | pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); |
881 | } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0)); | 982 | } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0)); |
882 | 983 | ||
883 | add_mm_rss(mm, file_rss, anon_rss); | 984 | add_mm_rss_vec(mm, rss); |
884 | arch_leave_lazy_mmu_mode(); | 985 | arch_leave_lazy_mmu_mode(); |
885 | pte_unmap_unlock(pte - 1, ptl); | 986 | pte_unmap_unlock(pte - 1, ptl); |
886 | 987 | ||
@@ -943,6 +1044,7 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb, | |||
943 | details = NULL; | 1044 | details = NULL; |
944 | 1045 | ||
945 | BUG_ON(addr >= end); | 1046 | BUG_ON(addr >= end); |
1047 | mem_cgroup_uncharge_start(); | ||
946 | tlb_start_vma(tlb, vma); | 1048 | tlb_start_vma(tlb, vma); |
947 | pgd = pgd_offset(vma->vm_mm, addr); | 1049 | pgd = pgd_offset(vma->vm_mm, addr); |
948 | do { | 1050 | do { |
@@ -955,6 +1057,7 @@ static unsigned long unmap_page_range(struct mmu_gather *tlb, | |||
955 | zap_work, details); | 1057 | zap_work, details); |
956 | } while (pgd++, addr = next, (addr != end && *zap_work > 0)); | 1058 | } while (pgd++, addr = next, (addr != end && *zap_work > 0)); |
957 | tlb_end_vma(tlb, vma); | 1059 | tlb_end_vma(tlb, vma); |
1060 | mem_cgroup_uncharge_end(); | ||
958 | 1061 | ||
959 | return addr; | 1062 | return addr; |
960 | } | 1063 | } |
@@ -1512,7 +1615,7 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr, | |||
1512 | 1615 | ||
1513 | /* Ok, finally just insert the thing.. */ | 1616 | /* Ok, finally just insert the thing.. */ |
1514 | get_page(page); | 1617 | get_page(page); |
1515 | inc_mm_counter(mm, file_rss); | 1618 | inc_mm_counter_fast(mm, MM_FILEPAGES); |
1516 | page_add_file_rmap(page); | 1619 | page_add_file_rmap(page); |
1517 | set_pte_at(mm, addr, pte, mk_pte(page, prot)); | 1620 | set_pte_at(mm, addr, pte, mk_pte(page, prot)); |
1518 | 1621 | ||
@@ -1578,7 +1681,7 @@ static int insert_pfn(struct vm_area_struct *vma, unsigned long addr, | |||
1578 | /* Ok, finally just insert the thing.. */ | 1681 | /* Ok, finally just insert the thing.. */ |
1579 | entry = pte_mkspecial(pfn_pte(pfn, prot)); | 1682 | entry = pte_mkspecial(pfn_pte(pfn, prot)); |
1580 | set_pte_at(mm, addr, pte, entry); | 1683 | set_pte_at(mm, addr, pte, entry); |
1581 | update_mmu_cache(vma, addr, entry); /* XXX: why not for insert_page? */ | 1684 | update_mmu_cache(vma, addr, pte); /* XXX: why not for insert_page? */ |
1582 | 1685 | ||
1583 | retval = 0; | 1686 | retval = 0; |
1584 | out_unlock: | 1687 | out_unlock: |
@@ -2029,6 +2132,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2029 | page_cache_release(old_page); | 2132 | page_cache_release(old_page); |
2030 | } | 2133 | } |
2031 | reuse = reuse_swap_page(old_page); | 2134 | reuse = reuse_swap_page(old_page); |
2135 | if (reuse) | ||
2136 | /* | ||
2137 | * The page is all ours. Move it to our anon_vma so | ||
2138 | * the rmap code will not search our parent or siblings. | ||
2139 | * Protected against the rmap code by the page lock. | ||
2140 | */ | ||
2141 | page_move_anon_rmap(old_page, vma, address); | ||
2032 | unlock_page(old_page); | 2142 | unlock_page(old_page); |
2033 | } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == | 2143 | } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == |
2034 | (VM_WRITE|VM_SHARED))) { | 2144 | (VM_WRITE|VM_SHARED))) { |
@@ -2101,7 +2211,7 @@ reuse: | |||
2101 | entry = pte_mkyoung(orig_pte); | 2211 | entry = pte_mkyoung(orig_pte); |
2102 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | 2212 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
2103 | if (ptep_set_access_flags(vma, address, page_table, entry,1)) | 2213 | if (ptep_set_access_flags(vma, address, page_table, entry,1)) |
2104 | update_mmu_cache(vma, address, entry); | 2214 | update_mmu_cache(vma, address, page_table); |
2105 | ret |= VM_FAULT_WRITE; | 2215 | ret |= VM_FAULT_WRITE; |
2106 | goto unlock; | 2216 | goto unlock; |
2107 | } | 2217 | } |
@@ -2148,11 +2258,11 @@ gotten: | |||
2148 | if (likely(pte_same(*page_table, orig_pte))) { | 2258 | if (likely(pte_same(*page_table, orig_pte))) { |
2149 | if (old_page) { | 2259 | if (old_page) { |
2150 | if (!PageAnon(old_page)) { | 2260 | if (!PageAnon(old_page)) { |
2151 | dec_mm_counter(mm, file_rss); | 2261 | dec_mm_counter_fast(mm, MM_FILEPAGES); |
2152 | inc_mm_counter(mm, anon_rss); | 2262 | inc_mm_counter_fast(mm, MM_ANONPAGES); |
2153 | } | 2263 | } |
2154 | } else | 2264 | } else |
2155 | inc_mm_counter(mm, anon_rss); | 2265 | inc_mm_counter_fast(mm, MM_ANONPAGES); |
2156 | flush_cache_page(vma, address, pte_pfn(orig_pte)); | 2266 | flush_cache_page(vma, address, pte_pfn(orig_pte)); |
2157 | entry = mk_pte(new_page, vma->vm_page_prot); | 2267 | entry = mk_pte(new_page, vma->vm_page_prot); |
2158 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | 2268 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
@@ -2170,7 +2280,7 @@ gotten: | |||
2170 | * new page to be mapped directly into the secondary page table. | 2280 | * new page to be mapped directly into the secondary page table. |
2171 | */ | 2281 | */ |
2172 | set_pte_at_notify(mm, address, page_table, entry); | 2282 | set_pte_at_notify(mm, address, page_table, entry); |
2173 | update_mmu_cache(vma, address, entry); | 2283 | update_mmu_cache(vma, address, page_table); |
2174 | if (old_page) { | 2284 | if (old_page) { |
2175 | /* | 2285 | /* |
2176 | * Only after switching the pte to the new page may | 2286 | * Only after switching the pte to the new page may |
@@ -2514,7 +2624,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2514 | ret = VM_FAULT_HWPOISON; | 2624 | ret = VM_FAULT_HWPOISON; |
2515 | } else { | 2625 | } else { |
2516 | print_bad_pte(vma, address, orig_pte, NULL); | 2626 | print_bad_pte(vma, address, orig_pte, NULL); |
2517 | ret = VM_FAULT_OOM; | 2627 | ret = VM_FAULT_SIGBUS; |
2518 | } | 2628 | } |
2519 | goto out; | 2629 | goto out; |
2520 | } | 2630 | } |
@@ -2540,6 +2650,10 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2540 | ret = VM_FAULT_MAJOR; | 2650 | ret = VM_FAULT_MAJOR; |
2541 | count_vm_event(PGMAJFAULT); | 2651 | count_vm_event(PGMAJFAULT); |
2542 | } else if (PageHWPoison(page)) { | 2652 | } else if (PageHWPoison(page)) { |
2653 | /* | ||
2654 | * hwpoisoned dirty swapcache pages are kept for killing | ||
2655 | * owner processes (which may be unknown at hwpoison time) | ||
2656 | */ | ||
2543 | ret = VM_FAULT_HWPOISON; | 2657 | ret = VM_FAULT_HWPOISON; |
2544 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); | 2658 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); |
2545 | goto out_release; | 2659 | goto out_release; |
@@ -2548,6 +2662,12 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2548 | lock_page(page); | 2662 | lock_page(page); |
2549 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); | 2663 | delayacct_clear_flag(DELAYACCT_PF_SWAPIN); |
2550 | 2664 | ||
2665 | page = ksm_might_need_to_copy(page, vma, address); | ||
2666 | if (!page) { | ||
2667 | ret = VM_FAULT_OOM; | ||
2668 | goto out; | ||
2669 | } | ||
2670 | |||
2551 | if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) { | 2671 | if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) { |
2552 | ret = VM_FAULT_OOM; | 2672 | ret = VM_FAULT_OOM; |
2553 | goto out_page; | 2673 | goto out_page; |
@@ -2579,7 +2699,8 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2579 | * discarded at swap_free(). | 2699 | * discarded at swap_free(). |
2580 | */ | 2700 | */ |
2581 | 2701 | ||
2582 | inc_mm_counter(mm, anon_rss); | 2702 | inc_mm_counter_fast(mm, MM_ANONPAGES); |
2703 | dec_mm_counter_fast(mm, MM_SWAPENTS); | ||
2583 | pte = mk_pte(page, vma->vm_page_prot); | 2704 | pte = mk_pte(page, vma->vm_page_prot); |
2584 | if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) { | 2705 | if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) { |
2585 | pte = maybe_mkwrite(pte_mkdirty(pte), vma); | 2706 | pte = maybe_mkwrite(pte_mkdirty(pte), vma); |
@@ -2604,7 +2725,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2604 | } | 2725 | } |
2605 | 2726 | ||
2606 | /* No need to invalidate - it was non-present before */ | 2727 | /* No need to invalidate - it was non-present before */ |
2607 | update_mmu_cache(vma, address, pte); | 2728 | update_mmu_cache(vma, address, page_table); |
2608 | unlock: | 2729 | unlock: |
2609 | pte_unmap_unlock(page_table, ptl); | 2730 | pte_unmap_unlock(page_table, ptl); |
2610 | out: | 2731 | out: |
@@ -2663,13 +2784,13 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2663 | if (!pte_none(*page_table)) | 2784 | if (!pte_none(*page_table)) |
2664 | goto release; | 2785 | goto release; |
2665 | 2786 | ||
2666 | inc_mm_counter(mm, anon_rss); | 2787 | inc_mm_counter_fast(mm, MM_ANONPAGES); |
2667 | page_add_new_anon_rmap(page, vma, address); | 2788 | page_add_new_anon_rmap(page, vma, address); |
2668 | setpte: | 2789 | setpte: |
2669 | set_pte_at(mm, address, page_table, entry); | 2790 | set_pte_at(mm, address, page_table, entry); |
2670 | 2791 | ||
2671 | /* No need to invalidate - it was non-present before */ | 2792 | /* No need to invalidate - it was non-present before */ |
2672 | update_mmu_cache(vma, address, entry); | 2793 | update_mmu_cache(vma, address, page_table); |
2673 | unlock: | 2794 | unlock: |
2674 | pte_unmap_unlock(page_table, ptl); | 2795 | pte_unmap_unlock(page_table, ptl); |
2675 | return 0; | 2796 | return 0; |
@@ -2817,10 +2938,10 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2817 | if (flags & FAULT_FLAG_WRITE) | 2938 | if (flags & FAULT_FLAG_WRITE) |
2818 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | 2939 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
2819 | if (anon) { | 2940 | if (anon) { |
2820 | inc_mm_counter(mm, anon_rss); | 2941 | inc_mm_counter_fast(mm, MM_ANONPAGES); |
2821 | page_add_new_anon_rmap(page, vma, address); | 2942 | page_add_new_anon_rmap(page, vma, address); |
2822 | } else { | 2943 | } else { |
2823 | inc_mm_counter(mm, file_rss); | 2944 | inc_mm_counter_fast(mm, MM_FILEPAGES); |
2824 | page_add_file_rmap(page); | 2945 | page_add_file_rmap(page); |
2825 | if (flags & FAULT_FLAG_WRITE) { | 2946 | if (flags & FAULT_FLAG_WRITE) { |
2826 | dirty_page = page; | 2947 | dirty_page = page; |
@@ -2830,7 +2951,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2830 | set_pte_at(mm, address, page_table, entry); | 2951 | set_pte_at(mm, address, page_table, entry); |
2831 | 2952 | ||
2832 | /* no need to invalidate: a not-present page won't be cached */ | 2953 | /* no need to invalidate: a not-present page won't be cached */ |
2833 | update_mmu_cache(vma, address, entry); | 2954 | update_mmu_cache(vma, address, page_table); |
2834 | } else { | 2955 | } else { |
2835 | if (charged) | 2956 | if (charged) |
2836 | mem_cgroup_uncharge_page(page); | 2957 | mem_cgroup_uncharge_page(page); |
@@ -2910,7 +3031,7 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2910 | * Page table corrupted: show pte and kill process. | 3031 | * Page table corrupted: show pte and kill process. |
2911 | */ | 3032 | */ |
2912 | print_bad_pte(vma, address, orig_pte, NULL); | 3033 | print_bad_pte(vma, address, orig_pte, NULL); |
2913 | return VM_FAULT_OOM; | 3034 | return VM_FAULT_SIGBUS; |
2914 | } | 3035 | } |
2915 | 3036 | ||
2916 | pgoff = pte_to_pgoff(orig_pte); | 3037 | pgoff = pte_to_pgoff(orig_pte); |
@@ -2967,7 +3088,7 @@ static inline int handle_pte_fault(struct mm_struct *mm, | |||
2967 | } | 3088 | } |
2968 | entry = pte_mkyoung(entry); | 3089 | entry = pte_mkyoung(entry); |
2969 | if (ptep_set_access_flags(vma, address, pte, entry, flags & FAULT_FLAG_WRITE)) { | 3090 | if (ptep_set_access_flags(vma, address, pte, entry, flags & FAULT_FLAG_WRITE)) { |
2970 | update_mmu_cache(vma, address, entry); | 3091 | update_mmu_cache(vma, address, pte); |
2971 | } else { | 3092 | } else { |
2972 | /* | 3093 | /* |
2973 | * This is needed only for protection faults but the arch code | 3094 | * This is needed only for protection faults but the arch code |
@@ -2998,6 +3119,9 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2998 | 3119 | ||
2999 | count_vm_event(PGFAULT); | 3120 | count_vm_event(PGFAULT); |
3000 | 3121 | ||
3122 | /* do counter updates before entering really critical section. */ | ||
3123 | check_sync_rss_stat(current); | ||
3124 | |||
3001 | if (unlikely(is_vm_hugetlb_page(vma))) | 3125 | if (unlikely(is_vm_hugetlb_page(vma))) |
3002 | return hugetlb_fault(mm, vma, address, flags); | 3126 | return hugetlb_fault(mm, vma, address, flags); |
3003 | 3127 | ||