diff options
author | Peter Feiner <pfeiner@google.com> | 2014-10-09 18:28:32 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-10-09 22:25:58 -0400 |
commit | 81d0fa623c5b8dbd5279d9713094b0f9b0a00fb4 (patch) | |
tree | 840455ffeafaef31a53b94dc74df80d2561c713e /fs/proc | |
parent | 3193913ce62c63056bc67a6ae378beaf494afa66 (diff) |
mm: softdirty: unmapped addresses between VMAs are clean
If a /proc/pid/pagemap read spans a [VMA, an unmapped region, then a
VM_SOFTDIRTY VMA], the virtual pages in the unmapped region are reported
as softdirty. Here's a program to demonstrate the bug:
int main() {
const uint64_t PAGEMAP_SOFTDIRTY = 1ul << 55;
uint64_t pme[3];
int fd = open("/proc/self/pagemap", O_RDONLY);;
char *m = mmap(NULL, 3 * getpagesize(), PROT_READ,
MAP_ANONYMOUS | MAP_SHARED, -1, 0);
munmap(m + getpagesize(), getpagesize());
pread(fd, pme, 24, (unsigned long) m / getpagesize() * 8);
assert(pme[0] & PAGEMAP_SOFTDIRTY); /* passes */
assert(!(pme[1] & PAGEMAP_SOFTDIRTY)); /* fails */
assert(pme[2] & PAGEMAP_SOFTDIRTY); /* passes */
return 0;
}
(Note that all pages in new VMAs are softdirty until cleared).
Tested:
Used the program given above. I'm going to include this code in
a selftest in the future.
[n-horiguchi@ah.jp.nec.com: prevent pagemap_pte_range() from overrunning]
Signed-off-by: Peter Feiner <pfeiner@google.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Jamie Liu <jamieliu@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/proc')
-rw-r--r-- | fs/proc/task_mmu.c | 61 |
1 files changed, 40 insertions, 21 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 1acec26a3758..b7a7dc963a35 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -1027,7 +1027,6 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
1027 | spinlock_t *ptl; | 1027 | spinlock_t *ptl; |
1028 | pte_t *pte; | 1028 | pte_t *pte; |
1029 | int err = 0; | 1029 | int err = 0; |
1030 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); | ||
1031 | 1030 | ||
1032 | /* find the first VMA at or above 'addr' */ | 1031 | /* find the first VMA at or above 'addr' */ |
1033 | vma = find_vma(walk->mm, addr); | 1032 | vma = find_vma(walk->mm, addr); |
@@ -1041,6 +1040,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
1041 | 1040 | ||
1042 | for (; addr != end; addr += PAGE_SIZE) { | 1041 | for (; addr != end; addr += PAGE_SIZE) { |
1043 | unsigned long offset; | 1042 | unsigned long offset; |
1043 | pagemap_entry_t pme; | ||
1044 | 1044 | ||
1045 | offset = (addr & ~PAGEMAP_WALK_MASK) >> | 1045 | offset = (addr & ~PAGEMAP_WALK_MASK) >> |
1046 | PAGE_SHIFT; | 1046 | PAGE_SHIFT; |
@@ -1055,32 +1055,51 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
1055 | 1055 | ||
1056 | if (pmd_trans_unstable(pmd)) | 1056 | if (pmd_trans_unstable(pmd)) |
1057 | return 0; | 1057 | return 0; |
1058 | for (; addr != end; addr += PAGE_SIZE) { | 1058 | |
1059 | int flags2; | 1059 | while (1) { |
1060 | 1060 | /* End of address space hole, which we mark as non-present. */ | |
1061 | /* check to see if we've left 'vma' behind | 1061 | unsigned long hole_end; |
1062 | * and need a new, higher one */ | 1062 | |
1063 | if (vma && (addr >= vma->vm_end)) { | 1063 | if (vma) |
1064 | vma = find_vma(walk->mm, addr); | 1064 | hole_end = min(end, vma->vm_start); |
1065 | if (vma && (vma->vm_flags & VM_SOFTDIRTY)) | 1065 | else |
1066 | flags2 = __PM_SOFT_DIRTY; | 1066 | hole_end = end; |
1067 | else | 1067 | |
1068 | flags2 = 0; | 1068 | for (; addr < hole_end; addr += PAGE_SIZE) { |
1069 | pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2)); | 1069 | pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2)); |
1070 | |||
1071 | err = add_to_pagemap(addr, &pme, pm); | ||
1072 | if (err) | ||
1073 | return err; | ||
1070 | } | 1074 | } |
1071 | 1075 | ||
1072 | /* check that 'vma' actually covers this address, | 1076 | if (!vma || vma->vm_start >= end) |
1073 | * and that it isn't a huge page vma */ | 1077 | break; |
1074 | if (vma && (vma->vm_start <= addr) && | 1078 | /* |
1075 | !is_vm_hugetlb_page(vma)) { | 1079 | * We can't possibly be in a hugetlb VMA. In general, |
1080 | * for a mm_walk with a pmd_entry and a hugetlb_entry, | ||
1081 | * the pmd_entry can only be called on addresses in a | ||
1082 | * hugetlb if the walk starts in a non-hugetlb VMA and | ||
1083 | * spans a hugepage VMA. Since pagemap_read walks are | ||
1084 | * PMD-sized and PMD-aligned, this will never be true. | ||
1085 | */ | ||
1086 | BUG_ON(is_vm_hugetlb_page(vma)); | ||
1087 | |||
1088 | /* Addresses in the VMA. */ | ||
1089 | for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) { | ||
1090 | pagemap_entry_t pme; | ||
1076 | pte = pte_offset_map(pmd, addr); | 1091 | pte = pte_offset_map(pmd, addr); |
1077 | pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); | 1092 | pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); |
1078 | /* unmap before userspace copy */ | ||
1079 | pte_unmap(pte); | 1093 | pte_unmap(pte); |
1094 | err = add_to_pagemap(addr, &pme, pm); | ||
1095 | if (err) | ||
1096 | return err; | ||
1080 | } | 1097 | } |
1081 | err = add_to_pagemap(addr, &pme, pm); | 1098 | |
1082 | if (err) | 1099 | if (addr == end) |
1083 | return err; | 1100 | break; |
1101 | |||
1102 | vma = find_vma(walk->mm, addr); | ||
1084 | } | 1103 | } |
1085 | 1104 | ||
1086 | cond_resched(); | 1105 | cond_resched(); |