diff options
Diffstat (limited to 'fs/proc/task_mmu.c')
| -rw-r--r-- | fs/proc/task_mmu.c | 250 |
1 files changed, 126 insertions, 124 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 246eae84b13b..956b75d61809 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
| @@ -21,7 +21,7 @@ | |||
| 21 | 21 | ||
| 22 | void task_mem(struct seq_file *m, struct mm_struct *mm) | 22 | void task_mem(struct seq_file *m, struct mm_struct *mm) |
| 23 | { | 23 | { |
| 24 | unsigned long data, text, lib, swap; | 24 | unsigned long data, text, lib, swap, ptes, pmds; |
| 25 | unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; | 25 | unsigned long hiwater_vm, total_vm, hiwater_rss, total_rss; |
| 26 | 26 | ||
| 27 | /* | 27 | /* |
| @@ -42,6 +42,8 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) | |||
| 42 | text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10; | 42 | text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK)) >> 10; |
| 43 | lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text; | 43 | lib = (mm->exec_vm << (PAGE_SHIFT-10)) - text; |
| 44 | swap = get_mm_counter(mm, MM_SWAPENTS); | 44 | swap = get_mm_counter(mm, MM_SWAPENTS); |
| 45 | ptes = PTRS_PER_PTE * sizeof(pte_t) * atomic_long_read(&mm->nr_ptes); | ||
| 46 | pmds = PTRS_PER_PMD * sizeof(pmd_t) * mm_nr_pmds(mm); | ||
| 45 | seq_printf(m, | 47 | seq_printf(m, |
| 46 | "VmPeak:\t%8lu kB\n" | 48 | "VmPeak:\t%8lu kB\n" |
| 47 | "VmSize:\t%8lu kB\n" | 49 | "VmSize:\t%8lu kB\n" |
| @@ -54,6 +56,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) | |||
| 54 | "VmExe:\t%8lu kB\n" | 56 | "VmExe:\t%8lu kB\n" |
| 55 | "VmLib:\t%8lu kB\n" | 57 | "VmLib:\t%8lu kB\n" |
| 56 | "VmPTE:\t%8lu kB\n" | 58 | "VmPTE:\t%8lu kB\n" |
| 59 | "VmPMD:\t%8lu kB\n" | ||
| 57 | "VmSwap:\t%8lu kB\n", | 60 | "VmSwap:\t%8lu kB\n", |
| 58 | hiwater_vm << (PAGE_SHIFT-10), | 61 | hiwater_vm << (PAGE_SHIFT-10), |
| 59 | total_vm << (PAGE_SHIFT-10), | 62 | total_vm << (PAGE_SHIFT-10), |
| @@ -63,8 +66,8 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) | |||
| 63 | total_rss << (PAGE_SHIFT-10), | 66 | total_rss << (PAGE_SHIFT-10), |
| 64 | data << (PAGE_SHIFT-10), | 67 | data << (PAGE_SHIFT-10), |
| 65 | mm->stack_vm << (PAGE_SHIFT-10), text, lib, | 68 | mm->stack_vm << (PAGE_SHIFT-10), text, lib, |
| 66 | (PTRS_PER_PTE * sizeof(pte_t) * | 69 | ptes >> 10, |
| 67 | atomic_long_read(&mm->nr_ptes)) >> 10, | 70 | pmds >> 10, |
| 68 | swap << (PAGE_SHIFT-10)); | 71 | swap << (PAGE_SHIFT-10)); |
| 69 | } | 72 | } |
| 70 | 73 | ||
| @@ -433,7 +436,6 @@ const struct file_operations proc_tid_maps_operations = { | |||
| 433 | 436 | ||
| 434 | #ifdef CONFIG_PROC_PAGE_MONITOR | 437 | #ifdef CONFIG_PROC_PAGE_MONITOR |
| 435 | struct mem_size_stats { | 438 | struct mem_size_stats { |
| 436 | struct vm_area_struct *vma; | ||
| 437 | unsigned long resident; | 439 | unsigned long resident; |
| 438 | unsigned long shared_clean; | 440 | unsigned long shared_clean; |
| 439 | unsigned long shared_dirty; | 441 | unsigned long shared_dirty; |
| @@ -443,7 +445,6 @@ struct mem_size_stats { | |||
| 443 | unsigned long anonymous; | 445 | unsigned long anonymous; |
| 444 | unsigned long anonymous_thp; | 446 | unsigned long anonymous_thp; |
| 445 | unsigned long swap; | 447 | unsigned long swap; |
| 446 | unsigned long nonlinear; | ||
| 447 | u64 pss; | 448 | u64 pss; |
| 448 | }; | 449 | }; |
| 449 | 450 | ||
| @@ -483,8 +484,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, | |||
| 483 | struct mm_walk *walk) | 484 | struct mm_walk *walk) |
| 484 | { | 485 | { |
| 485 | struct mem_size_stats *mss = walk->private; | 486 | struct mem_size_stats *mss = walk->private; |
| 486 | struct vm_area_struct *vma = mss->vma; | 487 | struct vm_area_struct *vma = walk->vma; |
| 487 | pgoff_t pgoff = linear_page_index(vma, addr); | ||
| 488 | struct page *page = NULL; | 488 | struct page *page = NULL; |
| 489 | 489 | ||
| 490 | if (pte_present(*pte)) { | 490 | if (pte_present(*pte)) { |
| @@ -496,17 +496,10 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, | |||
| 496 | mss->swap += PAGE_SIZE; | 496 | mss->swap += PAGE_SIZE; |
| 497 | else if (is_migration_entry(swpent)) | 497 | else if (is_migration_entry(swpent)) |
| 498 | page = migration_entry_to_page(swpent); | 498 | page = migration_entry_to_page(swpent); |
| 499 | } else if (pte_file(*pte)) { | ||
| 500 | if (pte_to_pgoff(*pte) != pgoff) | ||
| 501 | mss->nonlinear += PAGE_SIZE; | ||
| 502 | } | 499 | } |
| 503 | 500 | ||
| 504 | if (!page) | 501 | if (!page) |
| 505 | return; | 502 | return; |
| 506 | |||
| 507 | if (page->index != pgoff) | ||
| 508 | mss->nonlinear += PAGE_SIZE; | ||
| 509 | |||
| 510 | smaps_account(mss, page, PAGE_SIZE, pte_young(*pte), pte_dirty(*pte)); | 503 | smaps_account(mss, page, PAGE_SIZE, pte_young(*pte), pte_dirty(*pte)); |
| 511 | } | 504 | } |
| 512 | 505 | ||
| @@ -515,7 +508,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, | |||
| 515 | struct mm_walk *walk) | 508 | struct mm_walk *walk) |
| 516 | { | 509 | { |
| 517 | struct mem_size_stats *mss = walk->private; | 510 | struct mem_size_stats *mss = walk->private; |
| 518 | struct vm_area_struct *vma = mss->vma; | 511 | struct vm_area_struct *vma = walk->vma; |
| 519 | struct page *page; | 512 | struct page *page; |
| 520 | 513 | ||
| 521 | /* FOLL_DUMP will return -EFAULT on huge zero page */ | 514 | /* FOLL_DUMP will return -EFAULT on huge zero page */ |
| @@ -536,8 +529,7 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, | |||
| 536 | static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | 529 | static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, |
| 537 | struct mm_walk *walk) | 530 | struct mm_walk *walk) |
| 538 | { | 531 | { |
| 539 | struct mem_size_stats *mss = walk->private; | 532 | struct vm_area_struct *vma = walk->vma; |
| 540 | struct vm_area_struct *vma = mss->vma; | ||
| 541 | pte_t *pte; | 533 | pte_t *pte; |
| 542 | spinlock_t *ptl; | 534 | spinlock_t *ptl; |
| 543 | 535 | ||
| @@ -596,7 +588,6 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) | |||
| 596 | [ilog2(VM_ACCOUNT)] = "ac", | 588 | [ilog2(VM_ACCOUNT)] = "ac", |
| 597 | [ilog2(VM_NORESERVE)] = "nr", | 589 | [ilog2(VM_NORESERVE)] = "nr", |
| 598 | [ilog2(VM_HUGETLB)] = "ht", | 590 | [ilog2(VM_HUGETLB)] = "ht", |
| 599 | [ilog2(VM_NONLINEAR)] = "nl", | ||
| 600 | [ilog2(VM_ARCH_1)] = "ar", | 591 | [ilog2(VM_ARCH_1)] = "ar", |
| 601 | [ilog2(VM_DONTDUMP)] = "dd", | 592 | [ilog2(VM_DONTDUMP)] = "dd", |
| 602 | #ifdef CONFIG_MEM_SOFT_DIRTY | 593 | #ifdef CONFIG_MEM_SOFT_DIRTY |
| @@ -630,10 +621,8 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) | |||
| 630 | }; | 621 | }; |
| 631 | 622 | ||
| 632 | memset(&mss, 0, sizeof mss); | 623 | memset(&mss, 0, sizeof mss); |
| 633 | mss.vma = vma; | ||
| 634 | /* mmap_sem is held in m_start */ | 624 | /* mmap_sem is held in m_start */ |
| 635 | if (vma->vm_mm && !is_vm_hugetlb_page(vma)) | 625 | walk_page_vma(vma, &smaps_walk); |
| 636 | walk_page_range(vma->vm_start, vma->vm_end, &smaps_walk); | ||
| 637 | 626 | ||
| 638 | show_map_vma(m, vma, is_pid); | 627 | show_map_vma(m, vma, is_pid); |
| 639 | 628 | ||
| @@ -668,10 +657,6 @@ static int show_smap(struct seq_file *m, void *v, int is_pid) | |||
| 668 | (vma->vm_flags & VM_LOCKED) ? | 657 | (vma->vm_flags & VM_LOCKED) ? |
| 669 | (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); | 658 | (unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0); |
| 670 | 659 | ||
| 671 | if (vma->vm_flags & VM_NONLINEAR) | ||
| 672 | seq_printf(m, "Nonlinear: %8lu kB\n", | ||
| 673 | mss.nonlinear >> 10); | ||
| 674 | |||
| 675 | show_smap_vma_flags(m, vma); | 660 | show_smap_vma_flags(m, vma); |
| 676 | m_cache_vma(m, vma); | 661 | m_cache_vma(m, vma); |
| 677 | return 0; | 662 | return 0; |
| @@ -747,18 +732,18 @@ enum clear_refs_types { | |||
| 747 | CLEAR_REFS_ANON, | 732 | CLEAR_REFS_ANON, |
| 748 | CLEAR_REFS_MAPPED, | 733 | CLEAR_REFS_MAPPED, |
| 749 | CLEAR_REFS_SOFT_DIRTY, | 734 | CLEAR_REFS_SOFT_DIRTY, |
| 735 | CLEAR_REFS_MM_HIWATER_RSS, | ||
| 750 | CLEAR_REFS_LAST, | 736 | CLEAR_REFS_LAST, |
| 751 | }; | 737 | }; |
| 752 | 738 | ||
| 753 | struct clear_refs_private { | 739 | struct clear_refs_private { |
| 754 | struct vm_area_struct *vma; | ||
| 755 | enum clear_refs_types type; | 740 | enum clear_refs_types type; |
| 756 | }; | 741 | }; |
| 757 | 742 | ||
| 743 | #ifdef CONFIG_MEM_SOFT_DIRTY | ||
| 758 | static inline void clear_soft_dirty(struct vm_area_struct *vma, | 744 | static inline void clear_soft_dirty(struct vm_area_struct *vma, |
| 759 | unsigned long addr, pte_t *pte) | 745 | unsigned long addr, pte_t *pte) |
| 760 | { | 746 | { |
| 761 | #ifdef CONFIG_MEM_SOFT_DIRTY | ||
| 762 | /* | 747 | /* |
| 763 | * The soft-dirty tracker uses #PF-s to catch writes | 748 | * The soft-dirty tracker uses #PF-s to catch writes |
| 764 | * to pages, so write-protect the pte as well. See the | 749 | * to pages, so write-protect the pte as well. See the |
| @@ -772,24 +757,63 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, | |||
| 772 | ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); | 757 | ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); |
| 773 | } else if (is_swap_pte(ptent)) { | 758 | } else if (is_swap_pte(ptent)) { |
| 774 | ptent = pte_swp_clear_soft_dirty(ptent); | 759 | ptent = pte_swp_clear_soft_dirty(ptent); |
| 775 | } else if (pte_file(ptent)) { | ||
| 776 | ptent = pte_file_clear_soft_dirty(ptent); | ||
| 777 | } | 760 | } |
| 778 | 761 | ||
| 779 | set_pte_at(vma->vm_mm, addr, pte, ptent); | 762 | set_pte_at(vma->vm_mm, addr, pte, ptent); |
| 780 | #endif | ||
| 781 | } | 763 | } |
| 782 | 764 | ||
| 765 | static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, | ||
| 766 | unsigned long addr, pmd_t *pmdp) | ||
| 767 | { | ||
| 768 | pmd_t pmd = *pmdp; | ||
| 769 | |||
| 770 | pmd = pmd_wrprotect(pmd); | ||
| 771 | pmd = pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY); | ||
| 772 | |||
| 773 | if (vma->vm_flags & VM_SOFTDIRTY) | ||
| 774 | vma->vm_flags &= ~VM_SOFTDIRTY; | ||
| 775 | |||
| 776 | set_pmd_at(vma->vm_mm, addr, pmdp, pmd); | ||
| 777 | } | ||
| 778 | |||
| 779 | #else | ||
| 780 | |||
| 781 | static inline void clear_soft_dirty(struct vm_area_struct *vma, | ||
| 782 | unsigned long addr, pte_t *pte) | ||
| 783 | { | ||
| 784 | } | ||
| 785 | |||
| 786 | static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, | ||
| 787 | unsigned long addr, pmd_t *pmdp) | ||
| 788 | { | ||
| 789 | } | ||
| 790 | #endif | ||
| 791 | |||
| 783 | static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | 792 | static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, |
| 784 | unsigned long end, struct mm_walk *walk) | 793 | unsigned long end, struct mm_walk *walk) |
| 785 | { | 794 | { |
| 786 | struct clear_refs_private *cp = walk->private; | 795 | struct clear_refs_private *cp = walk->private; |
| 787 | struct vm_area_struct *vma = cp->vma; | 796 | struct vm_area_struct *vma = walk->vma; |
| 788 | pte_t *pte, ptent; | 797 | pte_t *pte, ptent; |
| 789 | spinlock_t *ptl; | 798 | spinlock_t *ptl; |
| 790 | struct page *page; | 799 | struct page *page; |
| 791 | 800 | ||
| 792 | split_huge_page_pmd(vma, addr, pmd); | 801 | if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { |
| 802 | if (cp->type == CLEAR_REFS_SOFT_DIRTY) { | ||
| 803 | clear_soft_dirty_pmd(vma, addr, pmd); | ||
| 804 | goto out; | ||
| 805 | } | ||
| 806 | |||
| 807 | page = pmd_page(*pmd); | ||
| 808 | |||
| 809 | /* Clear accessed and referenced bits. */ | ||
| 810 | pmdp_test_and_clear_young(vma, addr, pmd); | ||
| 811 | ClearPageReferenced(page); | ||
| 812 | out: | ||
| 813 | spin_unlock(ptl); | ||
| 814 | return 0; | ||
| 815 | } | ||
| 816 | |||
| 793 | if (pmd_trans_unstable(pmd)) | 817 | if (pmd_trans_unstable(pmd)) |
| 794 | return 0; | 818 | return 0; |
| 795 | 819 | ||
| @@ -818,6 +842,28 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | |||
| 818 | return 0; | 842 | return 0; |
| 819 | } | 843 | } |
| 820 | 844 | ||
| 845 | static int clear_refs_test_walk(unsigned long start, unsigned long end, | ||
| 846 | struct mm_walk *walk) | ||
| 847 | { | ||
| 848 | struct clear_refs_private *cp = walk->private; | ||
| 849 | struct vm_area_struct *vma = walk->vma; | ||
| 850 | |||
| 851 | if (vma->vm_flags & VM_PFNMAP) | ||
| 852 | return 1; | ||
| 853 | |||
| 854 | /* | ||
| 855 | * Writing 1 to /proc/pid/clear_refs affects all pages. | ||
| 856 | * Writing 2 to /proc/pid/clear_refs only affects anonymous pages. | ||
| 857 | * Writing 3 to /proc/pid/clear_refs only affects file mapped pages. | ||
| 858 | * Writing 4 to /proc/pid/clear_refs affects all pages. | ||
| 859 | */ | ||
| 860 | if (cp->type == CLEAR_REFS_ANON && vma->vm_file) | ||
| 861 | return 1; | ||
| 862 | if (cp->type == CLEAR_REFS_MAPPED && !vma->vm_file) | ||
| 863 | return 1; | ||
| 864 | return 0; | ||
| 865 | } | ||
| 866 | |||
| 821 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, | 867 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, |
| 822 | size_t count, loff_t *ppos) | 868 | size_t count, loff_t *ppos) |
| 823 | { | 869 | { |
| @@ -858,9 +904,22 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, | |||
| 858 | }; | 904 | }; |
| 859 | struct mm_walk clear_refs_walk = { | 905 | struct mm_walk clear_refs_walk = { |
| 860 | .pmd_entry = clear_refs_pte_range, | 906 | .pmd_entry = clear_refs_pte_range, |
| 907 | .test_walk = clear_refs_test_walk, | ||
| 861 | .mm = mm, | 908 | .mm = mm, |
| 862 | .private = &cp, | 909 | .private = &cp, |
| 863 | }; | 910 | }; |
| 911 | |||
| 912 | if (type == CLEAR_REFS_MM_HIWATER_RSS) { | ||
| 913 | /* | ||
| 914 | * Writing 5 to /proc/pid/clear_refs resets the peak | ||
| 915 | * resident set size to this mm's current rss value. | ||
| 916 | */ | ||
| 917 | down_write(&mm->mmap_sem); | ||
| 918 | reset_mm_hiwater_rss(mm); | ||
| 919 | up_write(&mm->mmap_sem); | ||
| 920 | goto out_mm; | ||
| 921 | } | ||
| 922 | |||
| 864 | down_read(&mm->mmap_sem); | 923 | down_read(&mm->mmap_sem); |
| 865 | if (type == CLEAR_REFS_SOFT_DIRTY) { | 924 | if (type == CLEAR_REFS_SOFT_DIRTY) { |
| 866 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 925 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
| @@ -877,32 +936,12 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, | |||
| 877 | } | 936 | } |
| 878 | mmu_notifier_invalidate_range_start(mm, 0, -1); | 937 | mmu_notifier_invalidate_range_start(mm, 0, -1); |
| 879 | } | 938 | } |
| 880 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 939 | walk_page_range(0, ~0UL, &clear_refs_walk); |
| 881 | cp.vma = vma; | ||
| 882 | if (is_vm_hugetlb_page(vma)) | ||
| 883 | continue; | ||
| 884 | /* | ||
| 885 | * Writing 1 to /proc/pid/clear_refs affects all pages. | ||
| 886 | * | ||
| 887 | * Writing 2 to /proc/pid/clear_refs only affects | ||
| 888 | * Anonymous pages. | ||
| 889 | * | ||
| 890 | * Writing 3 to /proc/pid/clear_refs only affects file | ||
| 891 | * mapped pages. | ||
| 892 | * | ||
| 893 | * Writing 4 to /proc/pid/clear_refs affects all pages. | ||
| 894 | */ | ||
| 895 | if (type == CLEAR_REFS_ANON && vma->vm_file) | ||
| 896 | continue; | ||
| 897 | if (type == CLEAR_REFS_MAPPED && !vma->vm_file) | ||
| 898 | continue; | ||
| 899 | walk_page_range(vma->vm_start, vma->vm_end, | ||
| 900 | &clear_refs_walk); | ||
| 901 | } | ||
| 902 | if (type == CLEAR_REFS_SOFT_DIRTY) | 940 | if (type == CLEAR_REFS_SOFT_DIRTY) |
| 903 | mmu_notifier_invalidate_range_end(mm, 0, -1); | 941 | mmu_notifier_invalidate_range_end(mm, 0, -1); |
| 904 | flush_tlb_mm(mm); | 942 | flush_tlb_mm(mm); |
| 905 | up_read(&mm->mmap_sem); | 943 | up_read(&mm->mmap_sem); |
| 944 | out_mm: | ||
| 906 | mmput(mm); | 945 | mmput(mm); |
| 907 | } | 946 | } |
| 908 | put_task_struct(task); | 947 | put_task_struct(task); |
| @@ -1066,15 +1105,13 @@ static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemap | |||
| 1066 | static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | 1105 | static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, |
| 1067 | struct mm_walk *walk) | 1106 | struct mm_walk *walk) |
| 1068 | { | 1107 | { |
| 1069 | struct vm_area_struct *vma; | 1108 | struct vm_area_struct *vma = walk->vma; |
| 1070 | struct pagemapread *pm = walk->private; | 1109 | struct pagemapread *pm = walk->private; |
| 1071 | spinlock_t *ptl; | 1110 | spinlock_t *ptl; |
| 1072 | pte_t *pte; | 1111 | pte_t *pte, *orig_pte; |
| 1073 | int err = 0; | 1112 | int err = 0; |
| 1074 | 1113 | ||
| 1075 | /* find the first VMA at or above 'addr' */ | 1114 | if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { |
| 1076 | vma = find_vma(walk->mm, addr); | ||
| 1077 | if (vma && pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { | ||
| 1078 | int pmd_flags2; | 1115 | int pmd_flags2; |
| 1079 | 1116 | ||
| 1080 | if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd)) | 1117 | if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd)) |
| @@ -1100,51 +1137,20 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
| 1100 | if (pmd_trans_unstable(pmd)) | 1137 | if (pmd_trans_unstable(pmd)) |
| 1101 | return 0; | 1138 | return 0; |
| 1102 | 1139 | ||
| 1103 | while (1) { | 1140 | /* |
| 1104 | /* End of address space hole, which we mark as non-present. */ | 1141 | * We can assume that @vma always points to a valid one and @end never |
| 1105 | unsigned long hole_end; | 1142 | * goes beyond vma->vm_end. |
| 1106 | 1143 | */ | |
| 1107 | if (vma) | 1144 | orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); |
| 1108 | hole_end = min(end, vma->vm_start); | 1145 | for (; addr < end; pte++, addr += PAGE_SIZE) { |
| 1109 | else | 1146 | pagemap_entry_t pme; |
| 1110 | hole_end = end; | ||
| 1111 | |||
| 1112 | for (; addr < hole_end; addr += PAGE_SIZE) { | ||
| 1113 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); | ||
| 1114 | |||
| 1115 | err = add_to_pagemap(addr, &pme, pm); | ||
| 1116 | if (err) | ||
| 1117 | return err; | ||
| 1118 | } | ||
| 1119 | |||
| 1120 | if (!vma || vma->vm_start >= end) | ||
| 1121 | break; | ||
| 1122 | /* | ||
| 1123 | * We can't possibly be in a hugetlb VMA. In general, | ||
| 1124 | * for a mm_walk with a pmd_entry and a hugetlb_entry, | ||
| 1125 | * the pmd_entry can only be called on addresses in a | ||
| 1126 | * hugetlb if the walk starts in a non-hugetlb VMA and | ||
| 1127 | * spans a hugepage VMA. Since pagemap_read walks are | ||
| 1128 | * PMD-sized and PMD-aligned, this will never be true. | ||
| 1129 | */ | ||
| 1130 | BUG_ON(is_vm_hugetlb_page(vma)); | ||
| 1131 | |||
| 1132 | /* Addresses in the VMA. */ | ||
| 1133 | for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) { | ||
| 1134 | pagemap_entry_t pme; | ||
| 1135 | pte = pte_offset_map(pmd, addr); | ||
| 1136 | pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); | ||
| 1137 | pte_unmap(pte); | ||
| 1138 | err = add_to_pagemap(addr, &pme, pm); | ||
| 1139 | if (err) | ||
| 1140 | return err; | ||
| 1141 | } | ||
| 1142 | 1147 | ||
| 1143 | if (addr == end) | 1148 | pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); |
| 1149 | err = add_to_pagemap(addr, &pme, pm); | ||
| 1150 | if (err) | ||
| 1144 | break; | 1151 | break; |
| 1145 | |||
| 1146 | vma = find_vma(walk->mm, addr); | ||
| 1147 | } | 1152 | } |
| 1153 | pte_unmap_unlock(orig_pte, ptl); | ||
| 1148 | 1154 | ||
| 1149 | cond_resched(); | 1155 | cond_resched(); |
| 1150 | 1156 | ||
| @@ -1170,15 +1176,12 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, | |||
| 1170 | struct mm_walk *walk) | 1176 | struct mm_walk *walk) |
| 1171 | { | 1177 | { |
| 1172 | struct pagemapread *pm = walk->private; | 1178 | struct pagemapread *pm = walk->private; |
| 1173 | struct vm_area_struct *vma; | 1179 | struct vm_area_struct *vma = walk->vma; |
| 1174 | int err = 0; | 1180 | int err = 0; |
| 1175 | int flags2; | 1181 | int flags2; |
| 1176 | pagemap_entry_t pme; | 1182 | pagemap_entry_t pme; |
| 1177 | 1183 | ||
| 1178 | vma = find_vma(walk->mm, addr); | 1184 | if (vma->vm_flags & VM_SOFTDIRTY) |
| 1179 | WARN_ON_ONCE(!vma); | ||
| 1180 | |||
| 1181 | if (vma && (vma->vm_flags & VM_SOFTDIRTY)) | ||
| 1182 | flags2 = __PM_SOFT_DIRTY; | 1185 | flags2 = __PM_SOFT_DIRTY; |
| 1183 | else | 1186 | else |
| 1184 | flags2 = 0; | 1187 | flags2 = 0; |
| @@ -1338,7 +1341,6 @@ const struct file_operations proc_pagemap_operations = { | |||
| 1338 | #ifdef CONFIG_NUMA | 1341 | #ifdef CONFIG_NUMA |
| 1339 | 1342 | ||
| 1340 | struct numa_maps { | 1343 | struct numa_maps { |
| 1341 | struct vm_area_struct *vma; | ||
| 1342 | unsigned long pages; | 1344 | unsigned long pages; |
| 1343 | unsigned long anon; | 1345 | unsigned long anon; |
| 1344 | unsigned long active; | 1346 | unsigned long active; |
| @@ -1407,18 +1409,17 @@ static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma, | |||
| 1407 | static int gather_pte_stats(pmd_t *pmd, unsigned long addr, | 1409 | static int gather_pte_stats(pmd_t *pmd, unsigned long addr, |
| 1408 | unsigned long end, struct mm_walk *walk) | 1410 | unsigned long end, struct mm_walk *walk) |
| 1409 | { | 1411 | { |
| 1410 | struct numa_maps *md; | 1412 | struct numa_maps *md = walk->private; |
| 1413 | struct vm_area_struct *vma = walk->vma; | ||
| 1411 | spinlock_t *ptl; | 1414 | spinlock_t *ptl; |
| 1412 | pte_t *orig_pte; | 1415 | pte_t *orig_pte; |
| 1413 | pte_t *pte; | 1416 | pte_t *pte; |
| 1414 | 1417 | ||
| 1415 | md = walk->private; | 1418 | if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) { |
| 1416 | |||
| 1417 | if (pmd_trans_huge_lock(pmd, md->vma, &ptl) == 1) { | ||
| 1418 | pte_t huge_pte = *(pte_t *)pmd; | 1419 | pte_t huge_pte = *(pte_t *)pmd; |
| 1419 | struct page *page; | 1420 | struct page *page; |
| 1420 | 1421 | ||
| 1421 | page = can_gather_numa_stats(huge_pte, md->vma, addr); | 1422 | page = can_gather_numa_stats(huge_pte, vma, addr); |
| 1422 | if (page) | 1423 | if (page) |
| 1423 | gather_stats(page, md, pte_dirty(huge_pte), | 1424 | gather_stats(page, md, pte_dirty(huge_pte), |
| 1424 | HPAGE_PMD_SIZE/PAGE_SIZE); | 1425 | HPAGE_PMD_SIZE/PAGE_SIZE); |
| @@ -1430,7 +1431,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, | |||
| 1430 | return 0; | 1431 | return 0; |
| 1431 | orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); | 1432 | orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); |
| 1432 | do { | 1433 | do { |
| 1433 | struct page *page = can_gather_numa_stats(*pte, md->vma, addr); | 1434 | struct page *page = can_gather_numa_stats(*pte, vma, addr); |
| 1434 | if (!page) | 1435 | if (!page) |
| 1435 | continue; | 1436 | continue; |
| 1436 | gather_stats(page, md, pte_dirty(*pte), 1); | 1437 | gather_stats(page, md, pte_dirty(*pte), 1); |
| @@ -1440,7 +1441,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, | |||
| 1440 | return 0; | 1441 | return 0; |
| 1441 | } | 1442 | } |
| 1442 | #ifdef CONFIG_HUGETLB_PAGE | 1443 | #ifdef CONFIG_HUGETLB_PAGE |
| 1443 | static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, | 1444 | static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, |
| 1444 | unsigned long addr, unsigned long end, struct mm_walk *walk) | 1445 | unsigned long addr, unsigned long end, struct mm_walk *walk) |
| 1445 | { | 1446 | { |
| 1446 | struct numa_maps *md; | 1447 | struct numa_maps *md; |
| @@ -1459,7 +1460,7 @@ static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, | |||
| 1459 | } | 1460 | } |
| 1460 | 1461 | ||
| 1461 | #else | 1462 | #else |
| 1462 | static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, | 1463 | static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask, |
| 1463 | unsigned long addr, unsigned long end, struct mm_walk *walk) | 1464 | unsigned long addr, unsigned long end, struct mm_walk *walk) |
| 1464 | { | 1465 | { |
| 1465 | return 0; | 1466 | return 0; |
| @@ -1477,7 +1478,12 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | |||
| 1477 | struct numa_maps *md = &numa_priv->md; | 1478 | struct numa_maps *md = &numa_priv->md; |
| 1478 | struct file *file = vma->vm_file; | 1479 | struct file *file = vma->vm_file; |
| 1479 | struct mm_struct *mm = vma->vm_mm; | 1480 | struct mm_struct *mm = vma->vm_mm; |
| 1480 | struct mm_walk walk = {}; | 1481 | struct mm_walk walk = { |
| 1482 | .hugetlb_entry = gather_hugetlb_stats, | ||
| 1483 | .pmd_entry = gather_pte_stats, | ||
| 1484 | .private = md, | ||
| 1485 | .mm = mm, | ||
| 1486 | }; | ||
| 1481 | struct mempolicy *pol; | 1487 | struct mempolicy *pol; |
| 1482 | char buffer[64]; | 1488 | char buffer[64]; |
| 1483 | int nid; | 1489 | int nid; |
| @@ -1488,13 +1494,6 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | |||
| 1488 | /* Ensure we start with an empty set of numa_maps statistics. */ | 1494 | /* Ensure we start with an empty set of numa_maps statistics. */ |
| 1489 | memset(md, 0, sizeof(*md)); | 1495 | memset(md, 0, sizeof(*md)); |
| 1490 | 1496 | ||
| 1491 | md->vma = vma; | ||
| 1492 | |||
| 1493 | walk.hugetlb_entry = gather_hugetbl_stats; | ||
| 1494 | walk.pmd_entry = gather_pte_stats; | ||
| 1495 | walk.private = md; | ||
| 1496 | walk.mm = mm; | ||
| 1497 | |||
| 1498 | pol = __get_vma_policy(vma, vma->vm_start); | 1497 | pol = __get_vma_policy(vma, vma->vm_start); |
| 1499 | if (pol) { | 1498 | if (pol) { |
| 1500 | mpol_to_str(buffer, sizeof(buffer), pol); | 1499 | mpol_to_str(buffer, sizeof(buffer), pol); |
| @@ -1528,7 +1527,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | |||
| 1528 | if (is_vm_hugetlb_page(vma)) | 1527 | if (is_vm_hugetlb_page(vma)) |
| 1529 | seq_puts(m, " huge"); | 1528 | seq_puts(m, " huge"); |
| 1530 | 1529 | ||
| 1531 | walk_page_range(vma->vm_start, vma->vm_end, &walk); | 1530 | /* mmap_sem is held by m_start */ |
| 1531 | walk_page_vma(vma, &walk); | ||
| 1532 | 1532 | ||
| 1533 | if (!md->pages) | 1533 | if (!md->pages) |
| 1534 | goto out; | 1534 | goto out; |
| @@ -1557,6 +1557,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid) | |||
| 1557 | for_each_node_state(nid, N_MEMORY) | 1557 | for_each_node_state(nid, N_MEMORY) |
| 1558 | if (md->node[nid]) | 1558 | if (md->node[nid]) |
| 1559 | seq_printf(m, " N%d=%lu", nid, md->node[nid]); | 1559 | seq_printf(m, " N%d=%lu", nid, md->node[nid]); |
| 1560 | |||
| 1561 | seq_printf(m, " kernelpagesize_kB=%lu", vma_kernel_pagesize(vma) >> 10); | ||
| 1560 | out: | 1562 | out: |
| 1561 | seq_putc(m, '\n'); | 1563 | seq_putc(m, '\n'); |
| 1562 | m_cache_vma(m, vma); | 1564 | m_cache_vma(m, vma); |
