diff options
| author | David S. Miller <davem@davemloft.net> | 2010-04-03 18:49:14 -0400 |
|---|---|---|
| committer | David S. Miller <davem@davemloft.net> | 2010-04-03 18:49:14 -0400 |
| commit | 87e8b821ed8db3dab03d96cd542e29666bf210aa (patch) | |
| tree | 0027060473aafbbb125655ba027319c8a1a665fc /mm/memory.c | |
| parent | 33cd9dfa3a13e3d8e41aef225a9f98169816723b (diff) | |
| parent | 5e11611a5d22252f3f9c169a3c9377eac0c32033 (diff) | |
Merge branch 'master' of /home/davem/src/GIT/linux-2.6/
Diffstat (limited to 'mm/memory.c')
| -rw-r--r-- | mm/memory.c | 167 |
1 files changed, 133 insertions, 34 deletions
diff --git a/mm/memory.c b/mm/memory.c index 72fb5f39bccc..bc9ba5a1f5b9 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
| @@ -121,6 +121,78 @@ static int __init init_zero_pfn(void) | |||
| 121 | } | 121 | } |
| 122 | core_initcall(init_zero_pfn); | 122 | core_initcall(init_zero_pfn); |
| 123 | 123 | ||
| 124 | |||
| 125 | #if defined(SPLIT_RSS_COUNTING) | ||
| 126 | |||
| 127 | void __sync_task_rss_stat(struct task_struct *task, struct mm_struct *mm) | ||
| 128 | { | ||
| 129 | int i; | ||
| 130 | |||
| 131 | for (i = 0; i < NR_MM_COUNTERS; i++) { | ||
| 132 | if (task->rss_stat.count[i]) { | ||
| 133 | BUG_ON(!mm); | ||
| 134 | add_mm_counter(mm, i, task->rss_stat.count[i]); | ||
| 135 | task->rss_stat.count[i] = 0; | ||
| 136 | } | ||
| 137 | } | ||
| 138 | task->rss_stat.events = 0; | ||
| 139 | } | ||
| 140 | |||
| 141 | static void add_mm_counter_fast(struct mm_struct *mm, int member, int val) | ||
| 142 | { | ||
| 143 | struct task_struct *task = current; | ||
| 144 | |||
| 145 | if (likely(task->mm == mm)) | ||
| 146 | task->rss_stat.count[member] += val; | ||
| 147 | else | ||
| 148 | add_mm_counter(mm, member, val); | ||
| 149 | } | ||
| 150 | #define inc_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, 1) | ||
| 151 | #define dec_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, -1) | ||
| 152 | |||
| 153 | /* sync counter once per 64 page faults */ | ||
| 154 | #define TASK_RSS_EVENTS_THRESH (64) | ||
| 155 | static void check_sync_rss_stat(struct task_struct *task) | ||
| 156 | { | ||
| 157 | if (unlikely(task != current)) | ||
| 158 | return; | ||
| 159 | if (unlikely(task->rss_stat.events++ > TASK_RSS_EVENTS_THRESH)) | ||
| 160 | __sync_task_rss_stat(task, task->mm); | ||
| 161 | } | ||
| 162 | |||
| 163 | unsigned long get_mm_counter(struct mm_struct *mm, int member) | ||
| 164 | { | ||
| 165 | long val = 0; | ||
| 166 | |||
| 167 | /* | ||
| 168 | * Don't use task->mm here...for avoiding to use task_get_mm().. | ||
| 169 | * The caller must guarantee task->mm is not invalid. | ||
| 170 | */ | ||
| 171 | val = atomic_long_read(&mm->rss_stat.count[member]); | ||
| 172 | /* | ||
| 173 | * counter is updated in asynchronous manner and may go to minus. | ||
| 174 | * But it's never be expected number for users. | ||
| 175 | */ | ||
| 176 | if (val < 0) | ||
| 177 | return 0; | ||
| 178 | return (unsigned long)val; | ||
| 179 | } | ||
| 180 | |||
| 181 | void sync_mm_rss(struct task_struct *task, struct mm_struct *mm) | ||
| 182 | { | ||
| 183 | __sync_task_rss_stat(task, mm); | ||
| 184 | } | ||
| 185 | #else | ||
| 186 | |||
| 187 | #define inc_mm_counter_fast(mm, member) inc_mm_counter(mm, member) | ||
| 188 | #define dec_mm_counter_fast(mm, member) dec_mm_counter(mm, member) | ||
| 189 | |||
| 190 | static void check_sync_rss_stat(struct task_struct *task) | ||
| 191 | { | ||
| 192 | } | ||
| 193 | |||
| 194 | #endif | ||
| 195 | |||
| 124 | /* | 196 | /* |
| 125 | * If a p?d_bad entry is found while walking page tables, report | 197 | * If a p?d_bad entry is found while walking page tables, report |
| 126 | * the error, before resetting entry to p?d_none. Usually (but | 198 | * the error, before resetting entry to p?d_none. Usually (but |
| @@ -300,7 +372,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
| 300 | * Hide vma from rmap and truncate_pagecache before freeing | 372 | * Hide vma from rmap and truncate_pagecache before freeing |
| 301 | * pgtables | 373 | * pgtables |
| 302 | */ | 374 | */ |
| 303 | anon_vma_unlink(vma); | 375 | unlink_anon_vmas(vma); |
| 304 | unlink_file_vma(vma); | 376 | unlink_file_vma(vma); |
| 305 | 377 | ||
| 306 | if (is_vm_hugetlb_page(vma)) { | 378 | if (is_vm_hugetlb_page(vma)) { |
| @@ -314,7 +386,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
| 314 | && !is_vm_hugetlb_page(next)) { | 386 | && !is_vm_hugetlb_page(next)) { |
| 315 | vma = next; | 387 | vma = next; |
| 316 | next = vma->vm_next; | 388 | next = vma->vm_next; |
| 317 | anon_vma_unlink(vma); | 389 | unlink_anon_vmas(vma); |
| 318 | unlink_file_vma(vma); | 390 | unlink_file_vma(vma); |
| 319 | } | 391 | } |
| 320 | free_pgd_range(tlb, addr, vma->vm_end, | 392 | free_pgd_range(tlb, addr, vma->vm_end, |
| @@ -376,12 +448,20 @@ int __pte_alloc_kernel(pmd_t *pmd, unsigned long address) | |||
| 376 | return 0; | 448 | return 0; |
| 377 | } | 449 | } |
| 378 | 450 | ||
| 379 | static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss) | 451 | static inline void init_rss_vec(int *rss) |
| 380 | { | 452 | { |
| 381 | if (file_rss) | 453 | memset(rss, 0, sizeof(int) * NR_MM_COUNTERS); |
| 382 | add_mm_counter(mm, file_rss, file_rss); | 454 | } |
| 383 | if (anon_rss) | 455 | |
| 384 | add_mm_counter(mm, anon_rss, anon_rss); | 456 | static inline void add_mm_rss_vec(struct mm_struct *mm, int *rss) |
| 457 | { | ||
| 458 | int i; | ||
| 459 | |||
| 460 | if (current->mm == mm) | ||
| 461 | sync_mm_rss(current, mm); | ||
| 462 | for (i = 0; i < NR_MM_COUNTERS; i++) | ||
| 463 | if (rss[i]) | ||
| 464 | add_mm_counter(mm, i, rss[i]); | ||
| 385 | } | 465 | } |
| 386 | 466 | ||
| 387 | /* | 467 | /* |
| @@ -430,12 +510,8 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, | |||
| 430 | "BUG: Bad page map in process %s pte:%08llx pmd:%08llx\n", | 510 | "BUG: Bad page map in process %s pte:%08llx pmd:%08llx\n", |
| 431 | current->comm, | 511 | current->comm, |
| 432 | (long long)pte_val(pte), (long long)pmd_val(*pmd)); | 512 | (long long)pte_val(pte), (long long)pmd_val(*pmd)); |
| 433 | if (page) { | 513 | if (page) |
| 434 | printk(KERN_ALERT | 514 | dump_page(page); |
| 435 | "page:%p flags:%p count:%d mapcount:%d mapping:%p index:%lx\n", | ||
| 436 | page, (void *)page->flags, page_count(page), | ||
| 437 | page_mapcount(page), page->mapping, page->index); | ||
| 438 | } | ||
| 439 | printk(KERN_ALERT | 515 | printk(KERN_ALERT |
| 440 | "addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n", | 516 | "addr:%p vm_flags:%08lx anon_vma:%p mapping:%p index:%lx\n", |
| 441 | (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index); | 517 | (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index); |
| @@ -597,7 +673,9 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
| 597 | &src_mm->mmlist); | 673 | &src_mm->mmlist); |
| 598 | spin_unlock(&mmlist_lock); | 674 | spin_unlock(&mmlist_lock); |
| 599 | } | 675 | } |
| 600 | if (is_write_migration_entry(entry) && | 676 | if (likely(!non_swap_entry(entry))) |
| 677 | rss[MM_SWAPENTS]++; | ||
| 678 | else if (is_write_migration_entry(entry) && | ||
| 601 | is_cow_mapping(vm_flags)) { | 679 | is_cow_mapping(vm_flags)) { |
| 602 | /* | 680 | /* |
| 603 | * COW mappings require pages in both parent | 681 | * COW mappings require pages in both parent |
| @@ -632,7 +710,10 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
| 632 | if (page) { | 710 | if (page) { |
| 633 | get_page(page); | 711 | get_page(page); |
| 634 | page_dup_rmap(page); | 712 | page_dup_rmap(page); |
| 635 | rss[PageAnon(page)]++; | 713 | if (PageAnon(page)) |
| 714 | rss[MM_ANONPAGES]++; | ||
| 715 | else | ||
| 716 | rss[MM_FILEPAGES]++; | ||
| 636 | } | 717 | } |
| 637 | 718 | ||
| 638 | out_set_pte: | 719 | out_set_pte: |
| @@ -648,11 +729,12 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
| 648 | pte_t *src_pte, *dst_pte; | 729 | pte_t *src_pte, *dst_pte; |
| 649 | spinlock_t *src_ptl, *dst_ptl; | 730 | spinlock_t *src_ptl, *dst_ptl; |
| 650 | int progress = 0; | 731 | int progress = 0; |
| 651 | int rss[2]; | 732 | int rss[NR_MM_COUNTERS]; |
| 652 | swp_entry_t entry = (swp_entry_t){0}; | 733 | swp_entry_t entry = (swp_entry_t){0}; |
| 653 | 734 | ||
| 654 | again: | 735 | again: |
| 655 | rss[1] = rss[0] = 0; | 736 | init_rss_vec(rss); |
| 737 | |||
| 656 | dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl); | 738 | dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl); |
| 657 | if (!dst_pte) | 739 | if (!dst_pte) |
| 658 | return -ENOMEM; | 740 | return -ENOMEM; |
| @@ -688,7 +770,7 @@ again: | |||
| 688 | arch_leave_lazy_mmu_mode(); | 770 | arch_leave_lazy_mmu_mode(); |
| 689 | spin_unlock(src_ptl); | 771 | spin_unlock(src_ptl); |
| 690 | pte_unmap_nested(orig_src_pte); | 772 | pte_unmap_nested(orig_src_pte); |
| 691 | add_mm_rss(dst_mm, rss[0], rss[1]); | 773 | add_mm_rss_vec(dst_mm, rss); |
| 692 | pte_unmap_unlock(orig_dst_pte, dst_ptl); | 774 | pte_unmap_unlock(orig_dst_pte, dst_ptl); |
| 693 | cond_resched(); | 775 | cond_resched(); |
| 694 | 776 | ||
| @@ -816,8 +898,9 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, | |||
| 816 | struct mm_struct *mm = tlb->mm; | 898 | struct mm_struct *mm = tlb->mm; |
| 817 | pte_t *pte; | 899 | pte_t *pte; |
| 818 | spinlock_t *ptl; | 900 | spinlock_t *ptl; |
| 819 | int file_rss = 0; | 901 | int rss[NR_MM_COUNTERS]; |
| 820 | int anon_rss = 0; | 902 | |
| 903 | init_rss_vec(rss); | ||
| 821 | 904 | ||
| 822 | pte = pte_offset_map_lock(mm, pmd, addr, &ptl); | 905 | pte = pte_offset_map_lock(mm, pmd, addr, &ptl); |
| 823 | arch_enter_lazy_mmu_mode(); | 906 | arch_enter_lazy_mmu_mode(); |
| @@ -863,14 +946,14 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, | |||
| 863 | set_pte_at(mm, addr, pte, | 946 | set_pte_at(mm, addr, pte, |
| 864 | pgoff_to_pte(page->index)); | 947 | pgoff_to_pte(page->index)); |
| 865 | if (PageAnon(page)) | 948 | if (PageAnon(page)) |
| 866 | anon_rss--; | 949 | rss[MM_ANONPAGES]--; |
| 867 | else { | 950 | else { |
| 868 | if (pte_dirty(ptent)) | 951 | if (pte_dirty(ptent)) |
| 869 | set_page_dirty(page); | 952 | set_page_dirty(page); |
| 870 | if (pte_young(ptent) && | 953 | if (pte_young(ptent) && |
| 871 | likely(!VM_SequentialReadHint(vma))) | 954 | likely(!VM_SequentialReadHint(vma))) |
| 872 | mark_page_accessed(page); | 955 | mark_page_accessed(page); |
| 873 | file_rss--; | 956 | rss[MM_FILEPAGES]--; |
| 874 | } | 957 | } |
| 875 | page_remove_rmap(page); | 958 | page_remove_rmap(page); |
| 876 | if (unlikely(page_mapcount(page) < 0)) | 959 | if (unlikely(page_mapcount(page) < 0)) |
| @@ -887,13 +970,18 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, | |||
| 887 | if (pte_file(ptent)) { | 970 | if (pte_file(ptent)) { |
| 888 | if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) | 971 | if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) |
| 889 | print_bad_pte(vma, addr, ptent, NULL); | 972 | print_bad_pte(vma, addr, ptent, NULL); |
| 890 | } else if | 973 | } else { |
| 891 | (unlikely(!free_swap_and_cache(pte_to_swp_entry(ptent)))) | 974 | swp_entry_t entry = pte_to_swp_entry(ptent); |
| 892 | print_bad_pte(vma, addr, ptent, NULL); | 975 | |
| 976 | if (!non_swap_entry(entry)) | ||
| 977 | rss[MM_SWAPENTS]--; | ||
| 978 | if (unlikely(!free_swap_and_cache(entry))) | ||
| 979 | print_bad_pte(vma, addr, ptent, NULL); | ||
| 980 | } | ||
| 893 | pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); | 981 | pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); |
| 894 | } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0)); | 982 | } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0)); |
| 895 | 983 | ||
| 896 | add_mm_rss(mm, file_rss, anon_rss); | 984 | add_mm_rss_vec(mm, rss); |
| 897 | arch_leave_lazy_mmu_mode(); | 985 | arch_leave_lazy_mmu_mode(); |
| 898 | pte_unmap_unlock(pte - 1, ptl); | 986 | pte_unmap_unlock(pte - 1, ptl); |
| 899 | 987 | ||
| @@ -1527,7 +1615,7 @@ static int insert_page(struct vm_area_struct *vma, unsigned long addr, | |||
| 1527 | 1615 | ||
| 1528 | /* Ok, finally just insert the thing.. */ | 1616 | /* Ok, finally just insert the thing.. */ |
| 1529 | get_page(page); | 1617 | get_page(page); |
| 1530 | inc_mm_counter(mm, file_rss); | 1618 | inc_mm_counter_fast(mm, MM_FILEPAGES); |
| 1531 | page_add_file_rmap(page); | 1619 | page_add_file_rmap(page); |
| 1532 | set_pte_at(mm, addr, pte, mk_pte(page, prot)); | 1620 | set_pte_at(mm, addr, pte, mk_pte(page, prot)); |
| 1533 | 1621 | ||
| @@ -2044,6 +2132,13 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 2044 | page_cache_release(old_page); | 2132 | page_cache_release(old_page); |
| 2045 | } | 2133 | } |
| 2046 | reuse = reuse_swap_page(old_page); | 2134 | reuse = reuse_swap_page(old_page); |
| 2135 | if (reuse) | ||
| 2136 | /* | ||
| 2137 | * The page is all ours. Move it to our anon_vma so | ||
| 2138 | * the rmap code will not search our parent or siblings. | ||
| 2139 | * Protected against the rmap code by the page lock. | ||
| 2140 | */ | ||
| 2141 | page_move_anon_rmap(old_page, vma, address); | ||
| 2047 | unlock_page(old_page); | 2142 | unlock_page(old_page); |
| 2048 | } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == | 2143 | } else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) == |
| 2049 | (VM_WRITE|VM_SHARED))) { | 2144 | (VM_WRITE|VM_SHARED))) { |
| @@ -2163,11 +2258,11 @@ gotten: | |||
| 2163 | if (likely(pte_same(*page_table, orig_pte))) { | 2258 | if (likely(pte_same(*page_table, orig_pte))) { |
| 2164 | if (old_page) { | 2259 | if (old_page) { |
| 2165 | if (!PageAnon(old_page)) { | 2260 | if (!PageAnon(old_page)) { |
| 2166 | dec_mm_counter(mm, file_rss); | 2261 | dec_mm_counter_fast(mm, MM_FILEPAGES); |
| 2167 | inc_mm_counter(mm, anon_rss); | 2262 | inc_mm_counter_fast(mm, MM_ANONPAGES); |
| 2168 | } | 2263 | } |
| 2169 | } else | 2264 | } else |
| 2170 | inc_mm_counter(mm, anon_rss); | 2265 | inc_mm_counter_fast(mm, MM_ANONPAGES); |
| 2171 | flush_cache_page(vma, address, pte_pfn(orig_pte)); | 2266 | flush_cache_page(vma, address, pte_pfn(orig_pte)); |
| 2172 | entry = mk_pte(new_page, vma->vm_page_prot); | 2267 | entry = mk_pte(new_page, vma->vm_page_prot); |
| 2173 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | 2268 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
| @@ -2604,7 +2699,8 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 2604 | * discarded at swap_free(). | 2699 | * discarded at swap_free(). |
| 2605 | */ | 2700 | */ |
| 2606 | 2701 | ||
| 2607 | inc_mm_counter(mm, anon_rss); | 2702 | inc_mm_counter_fast(mm, MM_ANONPAGES); |
| 2703 | dec_mm_counter_fast(mm, MM_SWAPENTS); | ||
| 2608 | pte = mk_pte(page, vma->vm_page_prot); | 2704 | pte = mk_pte(page, vma->vm_page_prot); |
| 2609 | if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) { | 2705 | if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) { |
| 2610 | pte = maybe_mkwrite(pte_mkdirty(pte), vma); | 2706 | pte = maybe_mkwrite(pte_mkdirty(pte), vma); |
| @@ -2688,7 +2784,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 2688 | if (!pte_none(*page_table)) | 2784 | if (!pte_none(*page_table)) |
| 2689 | goto release; | 2785 | goto release; |
| 2690 | 2786 | ||
| 2691 | inc_mm_counter(mm, anon_rss); | 2787 | inc_mm_counter_fast(mm, MM_ANONPAGES); |
| 2692 | page_add_new_anon_rmap(page, vma, address); | 2788 | page_add_new_anon_rmap(page, vma, address); |
| 2693 | setpte: | 2789 | setpte: |
| 2694 | set_pte_at(mm, address, page_table, entry); | 2790 | set_pte_at(mm, address, page_table, entry); |
| @@ -2842,10 +2938,10 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 2842 | if (flags & FAULT_FLAG_WRITE) | 2938 | if (flags & FAULT_FLAG_WRITE) |
| 2843 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); | 2939 | entry = maybe_mkwrite(pte_mkdirty(entry), vma); |
| 2844 | if (anon) { | 2940 | if (anon) { |
| 2845 | inc_mm_counter(mm, anon_rss); | 2941 | inc_mm_counter_fast(mm, MM_ANONPAGES); |
| 2846 | page_add_new_anon_rmap(page, vma, address); | 2942 | page_add_new_anon_rmap(page, vma, address); |
| 2847 | } else { | 2943 | } else { |
| 2848 | inc_mm_counter(mm, file_rss); | 2944 | inc_mm_counter_fast(mm, MM_FILEPAGES); |
| 2849 | page_add_file_rmap(page); | 2945 | page_add_file_rmap(page); |
| 2850 | if (flags & FAULT_FLAG_WRITE) { | 2946 | if (flags & FAULT_FLAG_WRITE) { |
| 2851 | dirty_page = page; | 2947 | dirty_page = page; |
| @@ -3023,6 +3119,9 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
| 3023 | 3119 | ||
| 3024 | count_vm_event(PGFAULT); | 3120 | count_vm_event(PGFAULT); |
| 3025 | 3121 | ||
| 3122 | /* do counter updates before entering really critical section. */ | ||
| 3123 | check_sync_rss_stat(current); | ||
| 3124 | |||
| 3026 | if (unlikely(is_vm_hugetlb_page(vma))) | 3125 | if (unlikely(is_vm_hugetlb_page(vma))) |
| 3027 | return hugetlb_fault(mm, vma, address, flags); | 3126 | return hugetlb_fault(mm, vma, address, flags); |
| 3028 | 3127 | ||
