aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2014-05-06 15:50:05 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-05-06 16:04:59 -0400
commit139b6a6fb1539e04b01663d61baff3088c63dbb5 (patch)
tree299fc6452057660ce3626e9e8c00d675d6647423 /mm/memcontrol.c
parent49e068f0b73dd042c186ffa9b420a9943e90389a (diff)
mm: filemap: update find_get_pages_tag() to deal with shadow entries
Dave Jones reports the following crash when find_get_pages_tag() runs into an exceptional entry: kernel BUG at mm/filemap.c:1347! RIP: find_get_pages_tag+0x1cb/0x220 Call Trace: find_get_pages_tag+0x36/0x220 pagevec_lookup_tag+0x21/0x30 filemap_fdatawait_range+0xbe/0x1e0 filemap_fdatawait+0x27/0x30 sync_inodes_sb+0x204/0x2a0 sync_inodes_one_sb+0x19/0x20 iterate_supers+0xb2/0x110 sys_sync+0x44/0xb0 ia32_do_call+0x13/0x13 1343 /* 1344 * This function is never used on a shmem/tmpfs 1345 * mapping, so a swap entry won't be found here. 1346 */ 1347 BUG(); After commit 0cd6144aadd2 ("mm + fs: prepare for non-page entries in page cache radix trees") this comment and BUG() are out of date because exceptional entries can now appear in all mappings - as shadows of recently evicted pages. However, as Hugh Dickins notes, "it is truly surprising for a PAGECACHE_TAG_WRITEBACK (and probably any other PAGECACHE_TAG_*) to appear on an exceptional entry. I expect it comes down to an occasional race in RCU lookup of the radix_tree: lacking absolute synchronization, we might sometimes catch an exceptional entry, with the tag which really belongs with the unexceptional entry which was there an instant before." And indeed, not only is the tree walk lockless, the tags are also read in chunks, one radix tree node at a time. There is plenty of time for page reclaim to swoop in and replace a page that was already looked up as tagged with a shadow entry. Remove the BUG() and update the comment. While reviewing all other lookup sites for whether they properly deal with shadow entries of evicted pages, update all the comments and fix memcg file charge moving to not miss shmem/tmpfs swapcache pages. Fixes: 0cd6144aadd2 ("mm + fs: prepare for non-page entries in page cache radix trees") Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reported-by: Dave Jones <davej@redhat.com> Acked-by: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c20
1 files changed, 12 insertions, 8 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 29501f040568..c47dffdcb246 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -6686,16 +6686,20 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
6686 pgoff = pte_to_pgoff(ptent); 6686 pgoff = pte_to_pgoff(ptent);
6687 6687
6688 /* page is moved even if it's not RSS of this task(page-faulted). */ 6688 /* page is moved even if it's not RSS of this task(page-faulted). */
6689 page = find_get_page(mapping, pgoff);
6690
6691#ifdef CONFIG_SWAP 6689#ifdef CONFIG_SWAP
6692 /* shmem/tmpfs may report page out on swap: account for that too. */ 6690 /* shmem/tmpfs may report page out on swap: account for that too. */
6693 if (radix_tree_exceptional_entry(page)) { 6691 if (shmem_mapping(mapping)) {
6694 swp_entry_t swap = radix_to_swp_entry(page); 6692 page = find_get_entry(mapping, pgoff);
6695 if (do_swap_account) 6693 if (radix_tree_exceptional_entry(page)) {
6696 *entry = swap; 6694 swp_entry_t swp = radix_to_swp_entry(page);
6697 page = find_get_page(swap_address_space(swap), swap.val); 6695 if (do_swap_account)
6698 } 6696 *entry = swp;
6697 page = find_get_page(swap_address_space(swp), swp.val);
6698 }
6699 } else
6700 page = find_get_page(mapping, pgoff);
6701#else
6702 page = find_get_page(mapping, pgoff);
6699#endif 6703#endif
6700 return page; 6704 return page;
6701} 6705}