aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2010-04-03 18:49:14 -0400
committerDavid S. Miller <davem@davemloft.net>2010-04-03 18:49:14 -0400
commit87e8b821ed8db3dab03d96cd542e29666bf210aa (patch)
tree0027060473aafbbb125655ba027319c8a1a665fc /mm/memory.c
parent33cd9dfa3a13e3d8e41aef225a9f98169816723b (diff)
parent5e11611a5d22252f3f9c169a3c9377eac0c32033 (diff)
Merge branch 'master' of /home/davem/src/GIT/linux-2.6/
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c167
1 files changed, 133 insertions, 34 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 72fb5f39bccc..bc9ba5a1f5b9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -121,6 +121,78 @@ static int __init init_zero_pfn(void)
121} 121}
122core_initcall(init_zero_pfn); 122core_initcall(init_zero_pfn);
123 123
124
125#if defined(SPLIT_RSS_COUNTING)
126
127void __sync_task_rss_stat(struct task_struct *task, struct mm_struct *mm)
128{
129 int i;
130
131 for (i = 0; i < NR_MM_COUNTERS; i++) {
132 if (task->rss_stat.count[i]) {
133 BUG_ON(!mm);
134 add_mm_counter(mm, i, task->rss_stat.count[i]);
135 task->rss_stat.count[i] = 0;
136 }
137 }
138 task->rss_stat.events = 0;
139}
140
141static void add_mm_counter_fast(struct mm_struct *mm, int member, int val)
142{
143 struct task_struct *task = current;
144
145 if (likely(task->mm == mm))
146 task->rss_stat.count[member] += val;
147 else
148 add_mm_counter(mm, member, val);
149}
150#define inc_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, 1)
151#define dec_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, -1)
152
153/* sync counter once per 64 page faults */
154#define TASK_RSS_EVENTS_THRESH (64)
155static void check_sync_rss_stat(struct task_struct *task)
156{
157 if (unlikely(task != current))
158 return;
159 if (unlikely(task->rss_stat.events++ > TASK_RSS_EVENTS_THRESH))
160 __sync_task_rss_stat(task, task->mm);
161}
162
163unsigned long get_mm_counter(struct mm_struct *mm, int member)
164{
165 long val = 0;
166
167 /*
168 * Don't use task->mm here...for avoiding to use task_get_mm()..
169 * The caller must guarantee task->mm is not invalid.
170 */
171 val = atomic_long_read(&mm->rss_stat.count[member]);
172 /*
173 * counter is updated in asynchronous manner and may go to minus.
174 * But it's never be expected number for users.
175 */
176 if (val < 0)
177 return 0;
178 return (unsigned long)val;
179}
180
181void sync_mm_rss(struct task_struct *task, struct mm_struct *mm)
182{
183 __sync_task_rss_stat(task, mm);
184}
185#else
186
187#define inc_mm_counter_fast(mm, member) inc_mm_counter(mm, member)
188#define dec_mm_counter_fast(mm, member) dec_mm_counter(mm, member)
189
190static void check_sync_rss_stat(struct task_struct *task)
191{
192}
193
194#endif
195
124/* 196/*
125 * If a p?d_bad entry is found while walking page tables, report 197 * If a p?d_bad entry is found while walking page tables, report
126 * the error, before resetting entry to p?d_none. Usually (but 198 * the error, before resetting entry to p?d_none. Usually (but
@@ -300,7 +372,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
300 * Hide vma from rmap and truncate_pagecache before freeing 372 * Hide vma from rmap and truncate_pagecache before freeing
301 * pgtables 373 * pgtables
302 */ 374 */
303 anon_vma_unlink(vma); 375 unlink_anon_vmas(vma);
304 unlink_file_vma(vma); 376 unlink_file_vma(vma);
305 377
306 if (is_vm_hugetlb_page(vma)) { 378 if (is_vm_hugetlb_page(vma)) {
@@ -314,7 +386,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
314 && !is_vm_hugetlb_page(next)) { 386 && !is_vm_hugetlb_page(next)) {
315 vma = next; 387 vma = next;
316 next = vma->vm_next; 388 next = vma->vm_next;
317 anon_vma_unlink(vma); 389 unlink_anon_vmas(vma);
318 unlink_file_vma(vma); 390 unlink_file_vma(vma);
319 } 391 }
320 free_pgd_range(tlb, addr, vma->vm_end, 392 free_pgd_range(tlb, addr, vma->vm_end,
@@ -376,12 +448,20 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
376 return 0; 448 return 0;
377} 449}
378 450
379static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss) 451static inline void init_rss_vec(int *rss)
380{ 452{
381 if (file_rss) 453 memset(rss, 0, sizeof(int) * NR_MM_COUNTERS);
382 add_mm_counter(mm, file_rss, file_rss); 454}
383 if (anon_rss) 455
384 add_mm_counter(mm, anon_rss, anon_rss); 456static inline void add_mm_rss_vec(struct mm_struct *mm, int *rss)
457{
458 int i;
459
460 if (current->mm == mm)
461 sync_mm_rss(current, mm);
462 for (i = 0; i < NR_MM_COUNTERS; i++)
463 if (rss[i])
464 add_mm_counter(mm, i, rss[i]);
385} 465}
386 466
387/* 467/*
@@ -430,12 +510,8 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
430 "BUG: Bad page map in process %s pte:%08llx pmd:%08llx\n", 510 "BUG: Bad page map in process %s pte:%08llx pmd:%08llx\n",
431 current->comm, 511 current->comm,
432 (long long)pte_val(pte), (long long)pmd_val(*pmd)); 512 (long long)pte_val(pte), (long long)pmd_val(*pmd));
433 if (page) { 513 if (page)
434 printk(KERN_ALERT 514 dump_page(page);
435 "page:%p flags:%p count:%d mapcount:%d mapping:%p index:%lx\n",
436 page, (void *)page->flags, page_count(page),
437 page_mapcount(page), page->mapping, page->index);
438 }
439 printk(KERN_ALERT 515 printk(KERN_ALERT
440 "addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n", 516 "addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n",
441 (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index); 517 (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index);
@@ -597,7 +673,9 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
597 &src_mm->mmlist); 673 &src_mm->mmlist);
598 spin_unlock(&mmlist_lock); 674 spin_unlock(&mmlist_lock);
599 } 675 }
600 if (is_write_migration_entry(entry) && 676 if (likely(!non_swap_entry(entry)))
677 rss[MM_SWAPENTS]++;
678 else if (is_write_migration_entry(entry) &&
601 is_cow_mapping(vm_flags)) { 679 is_cow_mapping(vm_flags)) {
602 /* 680 /*
603 * COW mappings require pages in both parent 681 * COW mappings require pages in both parent
@@ -632,7 +710,10 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
632 if (page) { 710 if (page) {
633 get_page(page); 711 get_page(page);
634 page_dup_rmap(page); 712 page_dup_rmap(page);
635 rss[PageAnon(page)]++; 713 if (PageAnon(page))
714 rss[MM_ANONPAGES]++;
715 else
716 rss[MM_FILEPAGES]++;
636 } 717 }
637 718
638out_set_pte: 719out_set_pte:
@@ -648,11 +729,12 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
648 pte_t *src_pte, *dst_pte; 729 pte_t *src_pte, *dst_pte;
649 spinlock_t *src_ptl, *dst_ptl; 730 spinlock_t *src_ptl, *dst_ptl;
650 int progress = 0; 731 int progress = 0;
651 int rss[2]; 732 int rss[NR_MM_COUNTERS];
652 swp_entry_t entry = (swp_entry_t){0}; 733 swp_entry_t entry = (swp_entry_t){0};
653 734
654again: 735again:
655 rss[1] = rss[0] = 0; 736 init_rss_vec(rss);
737
656 dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl); 738 dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
657 if (!dst_pte) 739 if (!dst_pte)
658 return -ENOMEM; 740 return -ENOMEM;
@@ -688,7 +770,7 @@ again:
688 arch_leave_lazy_mmu_mode(); 770 arch_leave_lazy_mmu_mode();
689 spin_unlock(src_ptl); 771 spin_unlock(src_ptl);
690 pte_unmap_nested(orig_src_pte); 772 pte_unmap_nested(orig_src_pte);
691 add_mm_rss(dst_mm, rss[0], rss[1]); 773 add_mm_rss_vec(dst_mm, rss);
692 pte_unmap_unlock(orig_dst_pte, dst_ptl); 774 pte_unmap_unlock(orig_dst_pte, dst_ptl);
693 cond_resched(); 775 cond_resched();
694 776
@@ -816,8 +898,9 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
816 struct mm_struct *mm = tlb->mm; 898 struct mm_struct *mm = tlb->mm;
817 pte_t *pte; 899 pte_t *pte;
818 spinlock_t *ptl; 900 spinlock_t *ptl;
819 int file_rss = 0; 901 int rss[NR_MM_COUNTERS];
820 int anon_rss = 0; 902
903 init_rss_vec(rss);
821 904
822 pte = pte_offset_map_lock(mm, pmd, addr, &ptl); 905 pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
823 arch_enter_lazy_mmu_mode(); 906 arch_enter_lazy_mmu_mode();
@@ -863,14 +946,14 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
863 set_pte_at(mm, addr, pte, 946 set_pte_at(mm, addr, pte,
864 pgoff_to_pte(page->index)); 947 pgoff_to_pte(page->index));
865 if (PageAnon(page)) 948 if (PageAnon(page))
866 anon_rss--; 949 rss[MM_ANONPAGES]--;
867 else { 950 else {
868 if (pte_dirty(ptent)) 951 if (pte_dirty(ptent))
869 set_page_dirty(page); 952 set_page_dirty(page);
870 if (pte_young(ptent) && 953 if (pte_young(ptent) &&
871 likely(!VM_SequentialReadHint(vma))) 954 likely(!VM_SequentialReadHint(vma)))
872 mark_page_accessed(page); 955 mark_page_accessed(page);
873 file_rss--; 956 rss[MM_FILEPAGES]--;
874 } 957 }
875 page_remove_rmap(page); 958 page_remove_rmap(page);
876 if (unlikely(page_mapcount(page) < 0)) 959 if (unlikely(page_mapcount(page) < 0))
@@ -887,13 +970,18 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
887 if (pte_file(ptent)) { 970 if (pte_file(ptent)) {
888 if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) 971 if (unlikely(!(vma->vm_flags & VM_NONLINEAR)))
889 print_bad_pte(vma, addr, ptent, NULL); 972 print_bad_pte(vma, addr, ptent, NULL);
890 } else if 973 } else {
891 (unlikely(!free_swap_and_cache(pte_to_swp_entry(ptent)))) 974 swp_entry_t entry = pte_to_swp_entry(ptent);
892 print_bad_pte(vma, addr, ptent, NULL); 975
976 if (!non_swap_entry(entry))
977 rss[MM_SWAPENTS]--;
978 if (unlikely(!free_swap_and_cache(entry)))
979 print_bad_pte(vma, addr, ptent, NULL);
980 }
893 pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); 981 pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
894 } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0)); 982 } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
895 983
896 add_mm_rss(mm, file_rss, anon_rss); 984 add_mm_rss_vec(mm, rss);
897 arch_leave_lazy_mmu_mode(); 985 arch_leave_lazy_mmu_mode();
898 pte_unmap_unlock(pte - 1, ptl); 986 pte_unmap_unlock(pte - 1, ptl);
899 987
@@ -1527,7 +1615,7 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr,
1527 1615
1528 /* Ok, finally just insert the thing.. */ 1616 /* Ok, finally just insert the thing.. */
1529 get_page(page); 1617 get_page(page);
1530 inc_mm_counter(mm, file_rss); 1618 inc_mm_counter_fast(mm, MM_FILEPAGES);
1531 page_add_file_rmap(page); 1619 page_add_file_rmap(page);
1532 set_pte_at(mm, addr, pte, mk_pte(page, prot)); 1620 set_pte_at(mm, addr, pte, mk_pte(page, prot));
1533 1621
@@ -2044,6 +2132,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
2044 page_cache_release(old_page); 2132 page_cache_release(old_page);
2045 } 2133 }
2046 reuse = reuse_swap_page(old_page); 2134 reuse = reuse_swap_page(old_page);
2135 if (reuse)
2136 /*
2137 * The page is all ours. Move it to our anon_vma so
2138 * the rmap code will not search our parent or siblings.
2139 * Protected against the rmap code by the page lock.
2140 */
2141 page_move_anon_rmap(old_page, vma, address);
2047 unlock_page(old_page); 2142 unlock_page(old_page);
2048 } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == 2143 } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
2049 (VM_WRITE|VM_SHARED))) { 2144 (VM_WRITE|VM_SHARED))) {
@@ -2163,11 +2258,11 @@ gotten:
2163 if (likely(pte_same(*page_table, orig_pte))) { 2258 if (likely(pte_same(*page_table, orig_pte))) {
2164 if (old_page) { 2259 if (old_page) {
2165 if (!PageAnon(old_page)) { 2260 if (!PageAnon(old_page)) {
2166 dec_mm_counter(mm, file_rss); 2261 dec_mm_counter_fast(mm, MM_FILEPAGES);
2167 inc_mm_counter(mm, anon_rss); 2262 inc_mm_counter_fast(mm, MM_ANONPAGES);
2168 } 2263 }
2169 } else 2264 } else
2170 inc_mm_counter(mm, anon_rss); 2265 inc_mm_counter_fast(mm, MM_ANONPAGES);
2171 flush_cache_page(vma, address, pte_pfn(orig_pte)); 2266 flush_cache_page(vma, address, pte_pfn(orig_pte));
2172 entry = mk_pte(new_page, vma->vm_page_prot); 2267 entry = mk_pte(new_page, vma->vm_page_prot);
2173 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 2268 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
@@ -2604,7 +2699,8 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
2604 * discarded at swap_free(). 2699 * discarded at swap_free().
2605 */ 2700 */
2606 2701
2607 inc_mm_counter(mm, anon_rss); 2702 inc_mm_counter_fast(mm, MM_ANONPAGES);
2703 dec_mm_counter_fast(mm, MM_SWAPENTS);
2608 pte = mk_pte(page, vma->vm_page_prot); 2704 pte = mk_pte(page, vma->vm_page_prot);
2609 if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) { 2705 if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
2610 pte = maybe_mkwrite(pte_mkdirty(pte), vma); 2706 pte = maybe_mkwrite(pte_mkdirty(pte), vma);
@@ -2688,7 +2784,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
2688 if (!pte_none(*page_table)) 2784 if (!pte_none(*page_table))
2689 goto release; 2785 goto release;
2690 2786
2691 inc_mm_counter(mm, anon_rss); 2787 inc_mm_counter_fast(mm, MM_ANONPAGES);
2692 page_add_new_anon_rmap(page, vma, address); 2788 page_add_new_anon_rmap(page, vma, address);
2693setpte: 2789setpte:
2694 set_pte_at(mm, address, page_table, entry); 2790 set_pte_at(mm, address, page_table, entry);
@@ -2842,10 +2938,10 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
2842 if (flags & FAULT_FLAG_WRITE) 2938 if (flags & FAULT_FLAG_WRITE)
2843 entry = maybe_mkwrite(pte_mkdirty(entry), vma); 2939 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
2844 if (anon) { 2940 if (anon) {
2845 inc_mm_counter(mm, anon_rss); 2941 inc_mm_counter_fast(mm, MM_ANONPAGES);
2846 page_add_new_anon_rmap(page, vma, address); 2942 page_add_new_anon_rmap(page, vma, address);
2847 } else { 2943 } else {
2848 inc_mm_counter(mm, file_rss); 2944 inc_mm_counter_fast(mm, MM_FILEPAGES);
2849 page_add_file_rmap(page); 2945 page_add_file_rmap(page);
2850 if (flags & FAULT_FLAG_WRITE) { 2946 if (flags & FAULT_FLAG_WRITE) {
2851 dirty_page = page; 2947 dirty_page = page;
@@ -3023,6 +3119,9 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3023 3119
3024 count_vm_event(PGFAULT); 3120 count_vm_event(PGFAULT);
3025 3121
3122 /* do counter updates before entering really critical section. */
3123 check_sync_rss_stat(current);
3124
3026 if (unlikely(is_vm_hugetlb_page(vma))) 3125 if (unlikely(is_vm_hugetlb_page(vma)))
3027 return hugetlb_fault(mm, vma, address, flags); 3126 return hugetlb_fault(mm, vma, address, flags);
3028 3127