diff options
author | Konstantin Khlebnikov <khlebnikov@yandex-team.ru> | 2015-02-11 18:27:31 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-11 20:06:05 -0500 |
commit | 05fbf357d94152171bc50f8a369390f1f16efd89 (patch) | |
tree | 656156ba6e3b055ef9353f0d17e92b59de49175b /fs | |
parent | 0664e57ff0c68cbca012a45a38288fa277eb6795 (diff) |
proc/pagemap: walk page tables under pte lock
Lockless access to pte in pagemap_pte_range() might race with page
migration and trigger BUG_ON(!PageLocked()) in migration_entry_to_page():
CPU A (pagemap) CPU B (migration)
lock_page()
try_to_unmap(page, TTU_MIGRATION...)
make_migration_entry()
set_pte_at()
<read *pte>
pte_to_pagemap_entry()
remove_migration_ptes()
unlock_page()
if(is_migration_entry())
migration_entry_to_page()
BUG_ON(!PageLocked(page))
Also lockless read might be non-atomic if pte is larger than wordsize.
Other pte walkers (smaps, numa_maps, clear_refs) already lock ptes.
Fixes: 052fb0d635df ("proc: report file/anon bit in /proc/pid/pagemap")
Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Reported-by: Andrey Ryabinin <a.ryabinin@samsung.com>
Reviewed-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: <stable@vger.kernel.org> [3.5+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/proc/task_mmu.c | 14 |
1 files changed, 9 insertions, 5 deletions
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index e6e0abeb5d12..eeab30fcffcc 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -1056,7 +1056,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
1056 | struct vm_area_struct *vma; | 1056 | struct vm_area_struct *vma; |
1057 | struct pagemapread *pm = walk->private; | 1057 | struct pagemapread *pm = walk->private; |
1058 | spinlock_t *ptl; | 1058 | spinlock_t *ptl; |
1059 | pte_t *pte; | 1059 | pte_t *pte, *orig_pte; |
1060 | int err = 0; | 1060 | int err = 0; |
1061 | 1061 | ||
1062 | /* find the first VMA at or above 'addr' */ | 1062 | /* find the first VMA at or above 'addr' */ |
@@ -1117,15 +1117,19 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
1117 | BUG_ON(is_vm_hugetlb_page(vma)); | 1117 | BUG_ON(is_vm_hugetlb_page(vma)); |
1118 | 1118 | ||
1119 | /* Addresses in the VMA. */ | 1119 | /* Addresses in the VMA. */ |
1120 | for (; addr < min(end, vma->vm_end); addr += PAGE_SIZE) { | 1120 | orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); |
1121 | for (; addr < min(end, vma->vm_end); pte++, addr += PAGE_SIZE) { | ||
1121 | pagemap_entry_t pme; | 1122 | pagemap_entry_t pme; |
1122 | pte = pte_offset_map(pmd, addr); | 1123 | |
1123 | pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); | 1124 | pte_to_pagemap_entry(&pme, pm, vma, addr, *pte); |
1124 | pte_unmap(pte); | ||
1125 | err = add_to_pagemap(addr, &pme, pm); | 1125 | err = add_to_pagemap(addr, &pme, pm); |
1126 | if (err) | 1126 | if (err) |
1127 | return err; | 1127 | break; |
1128 | } | 1128 | } |
1129 | pte_unmap_unlock(orig_pte, ptl); | ||
1130 | |||
1131 | if (err) | ||
1132 | return err; | ||
1129 | 1133 | ||
1130 | if (addr == end) | 1134 | if (addr == end) |
1131 | break; | 1135 | break; |