aboutsummaryrefslogtreecommitdiffstats
path: root/fs/proc
diff options
context:
space:
mode:
authorPeter Feiner <pfeiner@google.com>2014-10-09 18:28:32 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-09 22:25:58 -0400
commit81d0fa623c5b8dbd5279d9713094b0f9b0a00fb4 (patch)
tree840455ffeafaef31a53b94dc74df80d2561c713e /fs/proc
parent3193913ce62c63056bc67a6ae378beaf494afa66 (diff)
mm: softdirty: unmapped addresses between VMAs are clean
If a /proc/pid/pagemap read spans a [VMA, an unmapped region, then a VM_SOFTDIRTY VMA], the virtual pages in the unmapped region are reported as softdirty. Here's a program to demonstrate the bug: int main() { const uint64_t PAGEMAP_SOFTDIRTY = 1ul << 55; uint64_t pme[3]; int fd = open("/proc/self/pagemap", O_RDONLY);; char *m = mmap(NULL, 3 * getpagesize(), PROT_READ, MAP_ANONYMOUS | MAP_SHARED, -1, 0); munmap(m + getpagesize(), getpagesize()); pread(fd, pme, 24, (unsigned long) m / getpagesize() * 8); assert(pme[0] & PAGEMAP_SOFTDIRTY); /* passes */ assert(!(pme[1] & PAGEMAP_SOFTDIRTY)); /* fails */ assert(pme[2] & PAGEMAP_SOFTDIRTY); /* passes */ return 0; } (Note that all pages in new VMAs are softdirty until cleared). Tested: Used the program given above. I'm going to include this code in a selftest in the future. [n-horiguchi@ah.jp.nec.com: prevent pagemap_pte_range() from overrunning] Signed-off-by: Peter Feiner <pfeiner@google.com> Cc: "Kirill A. Shutemov" <kirill@shutemov.name> Cc: Cyrill Gorcunov <gorcunov@openvz.org> Cc: Pavel Emelyanov <xemul@parallels.com> Cc: Jamie Liu <jamieliu@google.com> Cc: Hugh Dickins <hughd@google.com> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/task_mmu.c61
1 files changed, 40 insertions, 21 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 1acec26a3758..b7a7dc963a35 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1027,7 +1027,6 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
1027 spinlock_t *ptl; 1027 spinlock_t *ptl;
1028 pte_t *pte; 1028 pte_t *pte;
1029 int err = 0; 1029 int err = 0;
1030 pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
1031 1030
1032 /* find the first VMA at or above 'addr' */ 1031 /* find the first VMA at or above 'addr' */
1033 vma = find_vma(walk->mm, addr); 1032 vma = find_vma(walk->mm, addr);
@@ -1041,6 +1040,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
1041 1040
1042 for (; addr != end; addr += PAGE_SIZE) { 1041 for (; addr != end; addr += PAGE_SIZE) {
1043 unsigned long offset; 1042 unsigned long offset;
1043 pagemap_entry_t pme;
1044 1044
1045 offset = (addr & ~PAGEMAP_WALK_MASK) >> 1045 offset = (addr & ~PAGEMAP_WALK_MASK) >>
1046 PAGE_SHIFT; 1046 PAGE_SHIFT;
@@ -1055,32 +1055,51 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
1055 1055
1056 if (pmd_trans_unstable(pmd)) 1056 if (pmd_trans_unstable(pmd))
1057 return 0; 1057 return 0;
1058 for (; addr != end; addr += PAGE_SIZE) { 1058
1059 int flags2; 1059 while (1) {
1060 1060 /* End of address space hole, which we mark as non-present. */
1061 /* check to see if we've left 'vma' behind 1061 unsigned long hole_end;
1062 * and need a new, higher one */ 1062
1063 if (vma && (addr >= vma->vm_end)) { 1063 if (vma)
1064 vma = find_vma(walk->mm, addr); 1064 hole_end = min(end, vma->vm_start);
1065 if (vma && (vma->vm_flags & VM_SOFTDIRTY)) 1065 else
1066 flags2 = __PM_SOFT_DIRTY; 1066 hole_end = end;
1067 else 1067
1068 flags2 = 0; 1068 for (; addr < hole_end; addr += PAGE_SIZE) {
1069 pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2)); 1069 pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
1070
1071 err = add_to_pagemap(addr, &pme, pm);
1072 if (err)
1073 return err;
1070 } 1074 }
1071 1075
1072 /* check that 'vma' actually covers this address, 1076 if (!vma || vma->vm_start >= end)
1073 * and that it isn't a huge page vma */ 1077 break;
1074 if (vma && (vma->vm_start <= addr) && 1078 /*
1075 !is_vm_hugetlb_page(vma)) { 1079 * We can't possibly be in a hugetlb VMA. In general,
1080 * for a mm_walk with a pmd_entry and a hugetlb_entry,
1081 * the pmd_entry can only be called on addresses in a
1082 * hugetlb if the walk starts in a non-hugetlb VMA and
1083 * spans a hugepage VMA. Since pagemap_read walks are
1084 * PMD-sized and PMD-aligned, this will never be true.
1085 */
1086 BUG_ON(is_vm_hugetlb_page(vma));
1087
1088 /* Addresses in the VMA. */
1089 for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) {
1090 pagemap_entry_t pme;
1076 pte = pte_offset_map(pmd, addr); 1091 pte = pte_offset_map(pmd, addr);
1077 pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); 1092 pte_to_pagemap_entry(&pme, pm, vma, addr, *pte);
1078 /* unmap before userspace copy */
1079 pte_unmap(pte); 1093 pte_unmap(pte);
1094 err = add_to_pagemap(addr, &pme, pm);
1095 if (err)
1096 return err;
1080 } 1097 }
1081 err = add_to_pagemap(addr, &pme, pm); 1098
1082 if (err) 1099 if (addr == end)
1083 return err; 1100 break;
1101
1102 vma = find_vma(walk->mm, addr);
1084 } 1103 }
1085 1104
1086 cond_resched(); 1105 cond_resched();