diff options
Diffstat (limited to 'fs/proc/task_mmu.c')
-rw-r--r-- | fs/proc/task_mmu.c | 168 |
1 files changed, 133 insertions, 35 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 3e636d864d56..107d026f5d6e 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/rmap.h> | 11 | #include <linux/rmap.h> |
12 | #include <linux/swap.h> | 12 | #include <linux/swap.h> |
13 | #include <linux/swapops.h> | 13 | #include <linux/swapops.h> |
14 | #include <linux/mmu_notifier.h> | ||
14 | 15 | ||
15 | #include <asm/elf.h> | 16 | #include <asm/elf.h> |
16 | #include <asm/uaccess.h> | 17 | #include <asm/uaccess.h> |
@@ -688,10 +689,66 @@ const struct file_operations proc_tid_smaps_operations = { | |||
688 | .release = seq_release_private, | 689 | .release = seq_release_private, |
689 | }; | 690 | }; |
690 | 691 | ||
692 | /* | ||
693 | * We do not want to have constant page-shift bits sitting in | ||
694 | * pagemap entries and are about to reuse them some time soon. | ||
695 | * | ||
696 | * Here's the "migration strategy": | ||
697 | * 1. when the system boots these bits remain what they are, | ||
698 | * but a warning about future change is printed in log; | ||
699 | * 2. once anyone clears soft-dirty bits via clear_refs file, | ||
700 | * these flag is set to denote, that user is aware of the | ||
701 | * new API and those page-shift bits change their meaning. | ||
702 | * The respective warning is printed in dmesg; | ||
703 | * 3. In a couple of releases we will remove all the mentions | ||
704 | * of page-shift in pagemap entries. | ||
705 | */ | ||
706 | |||
707 | static bool soft_dirty_cleared __read_mostly; | ||
708 | |||
709 | enum clear_refs_types { | ||
710 | CLEAR_REFS_ALL = 1, | ||
711 | CLEAR_REFS_ANON, | ||
712 | CLEAR_REFS_MAPPED, | ||
713 | CLEAR_REFS_SOFT_DIRTY, | ||
714 | CLEAR_REFS_LAST, | ||
715 | }; | ||
716 | |||
717 | struct clear_refs_private { | ||
718 | struct vm_area_struct *vma; | ||
719 | enum clear_refs_types type; | ||
720 | }; | ||
721 | |||
722 | static inline void clear_soft_dirty(struct vm_area_struct *vma, | ||
723 | unsigned long addr, pte_t *pte) | ||
724 | { | ||
725 | #ifdef CONFIG_MEM_SOFT_DIRTY | ||
726 | /* | ||
727 | * The soft-dirty tracker uses #PF-s to catch writes | ||
728 | * to pages, so write-protect the pte as well. See the | ||
729 | * Documentation/vm/soft-dirty.txt for full description | ||
730 | * of how soft-dirty works. | ||
731 | */ | ||
732 | pte_t ptent = *pte; | ||
733 | |||
734 | if (pte_present(ptent)) { | ||
735 | ptent = pte_wrprotect(ptent); | ||
736 | ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY); | ||
737 | } else if (is_swap_pte(ptent)) { | ||
738 | ptent = pte_swp_clear_soft_dirty(ptent); | ||
739 | } else if (pte_file(ptent)) { | ||
740 | ptent = pte_file_clear_soft_dirty(ptent); | ||
741 | } | ||
742 | |||
743 | set_pte_at(vma->vm_mm, addr, pte, ptent); | ||
744 | #endif | ||
745 | } | ||
746 | |||
691 | static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | 747 | static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, |
692 | unsigned long end, struct mm_walk *walk) | 748 | unsigned long end, struct mm_walk *walk) |
693 | { | 749 | { |
694 | struct vm_area_struct *vma = walk->private; | 750 | struct clear_refs_private *cp = walk->private; |
751 | struct vm_area_struct *vma = cp->vma; | ||
695 | pte_t *pte, ptent; | 752 | pte_t *pte, ptent; |
696 | spinlock_t *ptl; | 753 | spinlock_t *ptl; |
697 | struct page *page; | 754 | struct page *page; |
@@ -703,6 +760,12 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | |||
703 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 760 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
704 | for (; addr != end; pte++, addr += PAGE_SIZE) { | 761 | for (; addr != end; pte++, addr += PAGE_SIZE) { |
705 | ptent = *pte; | 762 | ptent = *pte; |
763 | |||
764 | if (cp->type == CLEAR_REFS_SOFT_DIRTY) { | ||
765 | clear_soft_dirty(vma, addr, pte); | ||
766 | continue; | ||
767 | } | ||
768 | |||
706 | if (!pte_present(ptent)) | 769 | if (!pte_present(ptent)) |
707 | continue; | 770 | continue; |
708 | 771 | ||
@@ -719,10 +782,6 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, | |||
719 | return 0; | 782 | return 0; |
720 | } | 783 | } |
721 | 784 | ||
722 | #define CLEAR_REFS_ALL 1 | ||
723 | #define CLEAR_REFS_ANON 2 | ||
724 | #define CLEAR_REFS_MAPPED 3 | ||
725 | |||
726 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, | 785 | static ssize_t clear_refs_write(struct file *file, const char __user *buf, |
727 | size_t count, loff_t *ppos) | 786 | size_t count, loff_t *ppos) |
728 | { | 787 | { |
@@ -730,7 +789,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, | |||
730 | char buffer[PROC_NUMBUF]; | 789 | char buffer[PROC_NUMBUF]; |
731 | struct mm_struct *mm; | 790 | struct mm_struct *mm; |
732 | struct vm_area_struct *vma; | 791 | struct vm_area_struct *vma; |
733 | int type; | 792 | enum clear_refs_types type; |
793 | int itype; | ||
734 | int rv; | 794 | int rv; |
735 | 795 | ||
736 | memset(buffer, 0, sizeof(buffer)); | 796 | memset(buffer, 0, sizeof(buffer)); |
@@ -738,23 +798,37 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, | |||
738 | count = sizeof(buffer) - 1; | 798 | count = sizeof(buffer) - 1; |
739 | if (copy_from_user(buffer, buf, count)) | 799 | if (copy_from_user(buffer, buf, count)) |
740 | return -EFAULT; | 800 | return -EFAULT; |
741 | rv = kstrtoint(strstrip(buffer), 10, &type); | 801 | rv = kstrtoint(strstrip(buffer), 10, &itype); |
742 | if (rv < 0) | 802 | if (rv < 0) |
743 | return rv; | 803 | return rv; |
744 | if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED) | 804 | type = (enum clear_refs_types)itype; |
805 | if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST) | ||
745 | return -EINVAL; | 806 | return -EINVAL; |
807 | |||
808 | if (type == CLEAR_REFS_SOFT_DIRTY) { | ||
809 | soft_dirty_cleared = true; | ||
810 | pr_warn_once("The pagemap bits 55-60 has changed their meaning! " | ||
811 | "See the linux/Documentation/vm/pagemap.txt for details.\n"); | ||
812 | } | ||
813 | |||
746 | task = get_proc_task(file_inode(file)); | 814 | task = get_proc_task(file_inode(file)); |
747 | if (!task) | 815 | if (!task) |
748 | return -ESRCH; | 816 | return -ESRCH; |
749 | mm = get_task_mm(task); | 817 | mm = get_task_mm(task); |
750 | if (mm) { | 818 | if (mm) { |
819 | struct clear_refs_private cp = { | ||
820 | .type = type, | ||
821 | }; | ||
751 | struct mm_walk clear_refs_walk = { | 822 | struct mm_walk clear_refs_walk = { |
752 | .pmd_entry = clear_refs_pte_range, | 823 | .pmd_entry = clear_refs_pte_range, |
753 | .mm = mm, | 824 | .mm = mm, |
825 | .private = &cp, | ||
754 | }; | 826 | }; |
755 | down_read(&mm->mmap_sem); | 827 | down_read(&mm->mmap_sem); |
828 | if (type == CLEAR_REFS_SOFT_DIRTY) | ||
829 | mmu_notifier_invalidate_range_start(mm, 0, -1); | ||
756 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 830 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
757 | clear_refs_walk.private = vma; | 831 | cp.vma = vma; |
758 | if (is_vm_hugetlb_page(vma)) | 832 | if (is_vm_hugetlb_page(vma)) |
759 | continue; | 833 | continue; |
760 | /* | 834 | /* |
@@ -773,6 +847,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, | |||
773 | walk_page_range(vma->vm_start, vma->vm_end, | 847 | walk_page_range(vma->vm_start, vma->vm_end, |
774 | &clear_refs_walk); | 848 | &clear_refs_walk); |
775 | } | 849 | } |
850 | if (type == CLEAR_REFS_SOFT_DIRTY) | ||
851 | mmu_notifier_invalidate_range_end(mm, 0, -1); | ||
776 | flush_tlb_mm(mm); | 852 | flush_tlb_mm(mm); |
777 | up_read(&mm->mmap_sem); | 853 | up_read(&mm->mmap_sem); |
778 | mmput(mm); | 854 | mmput(mm); |
@@ -792,14 +868,15 @@ typedef struct { | |||
792 | } pagemap_entry_t; | 868 | } pagemap_entry_t; |
793 | 869 | ||
794 | struct pagemapread { | 870 | struct pagemapread { |
795 | int pos, len; | 871 | int pos, len; /* units: PM_ENTRY_BYTES, not bytes */ |
796 | pagemap_entry_t *buffer; | 872 | pagemap_entry_t *buffer; |
873 | bool v2; | ||
797 | }; | 874 | }; |
798 | 875 | ||
799 | #define PAGEMAP_WALK_SIZE (PMD_SIZE) | 876 | #define PAGEMAP_WALK_SIZE (PMD_SIZE) |
800 | #define PAGEMAP_WALK_MASK (PMD_MASK) | 877 | #define PAGEMAP_WALK_MASK (PMD_MASK) |
801 | 878 | ||
802 | #define PM_ENTRY_BYTES sizeof(u64) | 879 | #define PM_ENTRY_BYTES sizeof(pagemap_entry_t) |
803 | #define PM_STATUS_BITS 3 | 880 | #define PM_STATUS_BITS 3 |
804 | #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) | 881 | #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) |
805 | #define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) | 882 | #define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) |
@@ -807,14 +884,17 @@ struct pagemapread { | |||
807 | #define PM_PSHIFT_BITS 6 | 884 | #define PM_PSHIFT_BITS 6 |
808 | #define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) | 885 | #define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS) |
809 | #define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) | 886 | #define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET) |
810 | #define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) | 887 | #define __PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK) |
811 | #define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) | 888 | #define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1) |
812 | #define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) | 889 | #define PM_PFRAME(x) ((x) & PM_PFRAME_MASK) |
890 | /* in "new" pagemap pshift bits are occupied with more status bits */ | ||
891 | #define PM_STATUS2(v2, x) (__PM_PSHIFT(v2 ? x : PAGE_SHIFT)) | ||
813 | 892 | ||
893 | #define __PM_SOFT_DIRTY (1LL) | ||
814 | #define PM_PRESENT PM_STATUS(4LL) | 894 | #define PM_PRESENT PM_STATUS(4LL) |
815 | #define PM_SWAP PM_STATUS(2LL) | 895 | #define PM_SWAP PM_STATUS(2LL) |
816 | #define PM_FILE PM_STATUS(1LL) | 896 | #define PM_FILE PM_STATUS(1LL) |
817 | #define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT) | 897 | #define PM_NOT_PRESENT(v2) PM_STATUS2(v2, 0) |
818 | #define PM_END_OF_BUFFER 1 | 898 | #define PM_END_OF_BUFFER 1 |
819 | 899 | ||
820 | static inline pagemap_entry_t make_pme(u64 val) | 900 | static inline pagemap_entry_t make_pme(u64 val) |
@@ -837,7 +917,7 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end, | |||
837 | struct pagemapread *pm = walk->private; | 917 | struct pagemapread *pm = walk->private; |
838 | unsigned long addr; | 918 | unsigned long addr; |
839 | int err = 0; | 919 | int err = 0; |
840 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); | 920 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
841 | 921 | ||
842 | for (addr = start; addr < end; addr += PAGE_SIZE) { | 922 | for (addr = start; addr < end; addr += PAGE_SIZE) { |
843 | err = add_to_pagemap(addr, &pme, pm); | 923 | err = add_to_pagemap(addr, &pme, pm); |
@@ -847,38 +927,43 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end, | |||
847 | return err; | 927 | return err; |
848 | } | 928 | } |
849 | 929 | ||
850 | static void pte_to_pagemap_entry(pagemap_entry_t *pme, | 930 | static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, |
851 | struct vm_area_struct *vma, unsigned long addr, pte_t pte) | 931 | struct vm_area_struct *vma, unsigned long addr, pte_t pte) |
852 | { | 932 | { |
853 | u64 frame, flags; | 933 | u64 frame, flags; |
854 | struct page *page = NULL; | 934 | struct page *page = NULL; |
935 | int flags2 = 0; | ||
855 | 936 | ||
856 | if (pte_present(pte)) { | 937 | if (pte_present(pte)) { |
857 | frame = pte_pfn(pte); | 938 | frame = pte_pfn(pte); |
858 | flags = PM_PRESENT; | 939 | flags = PM_PRESENT; |
859 | page = vm_normal_page(vma, addr, pte); | 940 | page = vm_normal_page(vma, addr, pte); |
860 | } else if (is_swap_pte(pte)) { | 941 | } else if (is_swap_pte(pte)) { |
861 | swp_entry_t entry = pte_to_swp_entry(pte); | 942 | swp_entry_t entry; |
862 | 943 | if (pte_swp_soft_dirty(pte)) | |
944 | flags2 |= __PM_SOFT_DIRTY; | ||
945 | entry = pte_to_swp_entry(pte); | ||
863 | frame = swp_type(entry) | | 946 | frame = swp_type(entry) | |
864 | (swp_offset(entry) << MAX_SWAPFILES_SHIFT); | 947 | (swp_offset(entry) << MAX_SWAPFILES_SHIFT); |
865 | flags = PM_SWAP; | 948 | flags = PM_SWAP; |
866 | if (is_migration_entry(entry)) | 949 | if (is_migration_entry(entry)) |
867 | page = migration_entry_to_page(entry); | 950 | page = migration_entry_to_page(entry); |
868 | } else { | 951 | } else { |
869 | *pme = make_pme(PM_NOT_PRESENT); | 952 | *pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
870 | return; | 953 | return; |
871 | } | 954 | } |
872 | 955 | ||
873 | if (page && !PageAnon(page)) | 956 | if (page && !PageAnon(page)) |
874 | flags |= PM_FILE; | 957 | flags |= PM_FILE; |
958 | if (pte_soft_dirty(pte)) | ||
959 | flags2 |= __PM_SOFT_DIRTY; | ||
875 | 960 | ||
876 | *pme = make_pme(PM_PFRAME(frame) | PM_PSHIFT(PAGE_SHIFT) | flags); | 961 | *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags); |
877 | } | 962 | } |
878 | 963 | ||
879 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 964 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
880 | static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, | 965 | static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, |
881 | pmd_t pmd, int offset) | 966 | pmd_t pmd, int offset, int pmd_flags2) |
882 | { | 967 | { |
883 | /* | 968 | /* |
884 | * Currently pmd for thp is always present because thp can not be | 969 | * Currently pmd for thp is always present because thp can not be |
@@ -887,13 +972,13 @@ static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, | |||
887 | */ | 972 | */ |
888 | if (pmd_present(pmd)) | 973 | if (pmd_present(pmd)) |
889 | *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) | 974 | *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) |
890 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); | 975 | | PM_STATUS2(pm->v2, pmd_flags2) | PM_PRESENT); |
891 | else | 976 | else |
892 | *pme = make_pme(PM_NOT_PRESENT); | 977 | *pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
893 | } | 978 | } |
894 | #else | 979 | #else |
895 | static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, | 980 | static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, |
896 | pmd_t pmd, int offset) | 981 | pmd_t pmd, int offset, int pmd_flags2) |
897 | { | 982 | { |
898 | } | 983 | } |
899 | #endif | 984 | #endif |
@@ -905,17 +990,20 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
905 | struct pagemapread *pm = walk->private; | 990 | struct pagemapread *pm = walk->private; |
906 | pte_t *pte; | 991 | pte_t *pte; |
907 | int err = 0; | 992 | int err = 0; |
908 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); | 993 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
909 | 994 | ||
910 | /* find the first VMA at or above 'addr' */ | 995 | /* find the first VMA at or above 'addr' */ |
911 | vma = find_vma(walk->mm, addr); | 996 | vma = find_vma(walk->mm, addr); |
912 | if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { | 997 | if (vma && pmd_trans_huge_lock(pmd, vma) == 1) { |
998 | int pmd_flags2; | ||
999 | |||
1000 | pmd_flags2 = (pmd_soft_dirty(*pmd) ? __PM_SOFT_DIRTY : 0); | ||
913 | for (; addr != end; addr += PAGE_SIZE) { | 1001 | for (; addr != end; addr += PAGE_SIZE) { |
914 | unsigned long offset; | 1002 | unsigned long offset; |
915 | 1003 | ||
916 | offset = (addr & ~PAGEMAP_WALK_MASK) >> | 1004 | offset = (addr & ~PAGEMAP_WALK_MASK) >> |
917 | PAGE_SHIFT; | 1005 | PAGE_SHIFT; |
918 | thp_pmd_to_pagemap_entry(&pme, *pmd, offset); | 1006 | thp_pmd_to_pagemap_entry(&pme, pm, *pmd, offset, pmd_flags2); |
919 | err = add_to_pagemap(addr, &pme, pm); | 1007 | err = add_to_pagemap(addr, &pme, pm); |
920 | if (err) | 1008 | if (err) |
921 | break; | 1009 | break; |
@@ -932,7 +1020,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
932 | * and need a new, higher one */ | 1020 | * and need a new, higher one */ |
933 | if (vma && (addr >= vma->vm_end)) { | 1021 | if (vma && (addr >= vma->vm_end)) { |
934 | vma = find_vma(walk->mm, addr); | 1022 | vma = find_vma(walk->mm, addr); |
935 | pme = make_pme(PM_NOT_PRESENT); | 1023 | pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
936 | } | 1024 | } |
937 | 1025 | ||
938 | /* check that 'vma' actually covers this address, | 1026 | /* check that 'vma' actually covers this address, |
@@ -940,7 +1028,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
940 | if (vma && (vma->vm_start <= addr) && | 1028 | if (vma && (vma->vm_start <= addr) && |
941 | !is_vm_hugetlb_page(vma)) { | 1029 | !is_vm_hugetlb_page(vma)) { |
942 | pte = pte_offset_map(pmd, addr); | 1030 | pte = pte_offset_map(pmd, addr); |
943 | pte_to_pagemap_entry(&pme, vma, addr, *pte); | 1031 | pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); |
944 | /* unmap before userspace copy */ | 1032 | /* unmap before userspace copy */ |
945 | pte_unmap(pte); | 1033 | pte_unmap(pte); |
946 | } | 1034 | } |
@@ -955,14 +1043,14 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
955 | } | 1043 | } |
956 | 1044 | ||
957 | #ifdef CONFIG_HUGETLB_PAGE | 1045 | #ifdef CONFIG_HUGETLB_PAGE |
958 | static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, | 1046 | static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm, |
959 | pte_t pte, int offset) | 1047 | pte_t pte, int offset) |
960 | { | 1048 | { |
961 | if (pte_present(pte)) | 1049 | if (pte_present(pte)) |
962 | *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) | 1050 | *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) |
963 | | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); | 1051 | | PM_STATUS2(pm->v2, 0) | PM_PRESENT); |
964 | else | 1052 | else |
965 | *pme = make_pme(PM_NOT_PRESENT); | 1053 | *pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
966 | } | 1054 | } |
967 | 1055 | ||
968 | /* This function walks within one hugetlb entry in the single call */ | 1056 | /* This function walks within one hugetlb entry in the single call */ |
@@ -976,7 +1064,7 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, | |||
976 | 1064 | ||
977 | for (; addr != end; addr += PAGE_SIZE) { | 1065 | for (; addr != end; addr += PAGE_SIZE) { |
978 | int offset = (addr & ~hmask) >> PAGE_SHIFT; | 1066 | int offset = (addr & ~hmask) >> PAGE_SHIFT; |
979 | huge_pte_to_pagemap_entry(&pme, *pte, offset); | 1067 | huge_pte_to_pagemap_entry(&pme, pm, *pte, offset); |
980 | err = add_to_pagemap(addr, &pme, pm); | 1068 | err = add_to_pagemap(addr, &pme, pm); |
981 | if (err) | 1069 | if (err) |
982 | return err; | 1070 | return err; |
@@ -1038,8 +1126,9 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, | |||
1038 | if (!count) | 1126 | if (!count) |
1039 | goto out_task; | 1127 | goto out_task; |
1040 | 1128 | ||
1041 | pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); | 1129 | pm.v2 = soft_dirty_cleared; |
1042 | pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); | 1130 | pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); |
1131 | pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_TEMPORARY); | ||
1043 | ret = -ENOMEM; | 1132 | ret = -ENOMEM; |
1044 | if (!pm.buffer) | 1133 | if (!pm.buffer) |
1045 | goto out_task; | 1134 | goto out_task; |
@@ -1110,9 +1199,18 @@ out: | |||
1110 | return ret; | 1199 | return ret; |
1111 | } | 1200 | } |
1112 | 1201 | ||
1202 | static int pagemap_open(struct inode *inode, struct file *file) | ||
1203 | { | ||
1204 | pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about " | ||
1205 | "to stop being page-shift some time soon. See the " | ||
1206 | "linux/Documentation/vm/pagemap.txt for details.\n"); | ||
1207 | return 0; | ||
1208 | } | ||
1209 | |||
1113 | const struct file_operations proc_pagemap_operations = { | 1210 | const struct file_operations proc_pagemap_operations = { |
1114 | .llseek = mem_lseek, /* borrow this */ | 1211 | .llseek = mem_lseek, /* borrow this */ |
1115 | .read = pagemap_read, | 1212 | .read = pagemap_read, |
1213 | .open = pagemap_open, | ||
1116 | }; | 1214 | }; |
1117 | #endif /* CONFIG_PROC_PAGE_MONITOR */ | 1215 | #endif /* CONFIG_PROC_PAGE_MONITOR */ |
1118 | 1216 | ||