aboutsummaryrefslogtreecommitdiffstats
path: root/mm/filemap.c
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2014-05-06 15:50:05 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-05-06 16:04:59 -0400
commit139b6a6fb1539e04b01663d61baff3088c63dbb5 (patch)
tree299fc6452057660ce3626e9e8c00d675d6647423 /mm/filemap.c
parent49e068f0b73dd042c186ffa9b420a9943e90389a (diff)
mm: filemap: update find_get_pages_tag() to deal with shadow entries
Dave Jones reports the following crash when find_get_pages_tag() runs into an exceptional entry: kernel BUG at mm/filemap.c:1347! RIP: find_get_pages_tag+0x1cb/0x220 Call Trace: find_get_pages_tag+0x36/0x220 pagevec_lookup_tag+0x21/0x30 filemap_fdatawait_range+0xbe/0x1e0 filemap_fdatawait+0x27/0x30 sync_inodes_sb+0x204/0x2a0 sync_inodes_one_sb+0x19/0x20 iterate_supers+0xb2/0x110 sys_sync+0x44/0xb0 ia32_do_call+0x13/0x13 1343 /* 1344 * This function is never used on a shmem/tmpfs 1345 * mapping, so a swap entry won't be found here. 1346 */ 1347 BUG(); After commit 0cd6144aadd2 ("mm + fs: prepare for non-page entries in page cache radix trees") this comment and BUG() are out of date because exceptional entries can now appear in all mappings - as shadows of recently evicted pages. However, as Hugh Dickins notes, "it is truly surprising for a PAGECACHE_TAG_WRITEBACK (and probably any other PAGECACHE_TAG_*) to appear on an exceptional entry. I expect it comes down to an occasional race in RCU lookup of the radix_tree: lacking absolute synchronization, we might sometimes catch an exceptional entry, with the tag which really belongs with the unexceptional entry which was there an instant before." And indeed, not only is the tree walk lockless, the tags are also read in chunks, one radix tree node at a time. There is plenty of time for page reclaim to swoop in and replace a page that was already looked up as tagged with a shadow entry. Remove the BUG() and update the comment. While reviewing all other lookup sites for whether they properly deal with shadow entries of evicted pages, update all the comments and fix memcg file charge moving to not miss shmem/tmpfs swapcache pages. Fixes: 0cd6144aadd2 ("mm + fs: prepare for non-page entries in page cache radix trees") Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reported-by: Dave Jones <davej@redhat.com> Acked-by: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c49
1 files changed, 28 insertions, 21 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 5020b280a771..000a220e2a41 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -906,8 +906,8 @@ EXPORT_SYMBOL(page_cache_prev_hole);
906 * Looks up the page cache slot at @mapping & @offset. If there is a 906 * Looks up the page cache slot at @mapping & @offset. If there is a
907 * page cache page, it is returned with an increased refcount. 907 * page cache page, it is returned with an increased refcount.
908 * 908 *
909 * If the slot holds a shadow entry of a previously evicted page, it 909 * If the slot holds a shadow entry of a previously evicted page, or a
910 * is returned. 910 * swap entry from shmem/tmpfs, it is returned.
911 * 911 *
912 * Otherwise, %NULL is returned. 912 * Otherwise, %NULL is returned.
913 */ 913 */
@@ -928,9 +928,9 @@ repeat:
928 if (radix_tree_deref_retry(page)) 928 if (radix_tree_deref_retry(page))
929 goto repeat; 929 goto repeat;
930 /* 930 /*
931 * Otherwise, shmem/tmpfs must be storing a swap entry 931 * A shadow entry of a recently evicted page,
932 * here as an exceptional entry: so return it without 932 * or a swap entry from shmem/tmpfs. Return
933 * attempting to raise page count. 933 * it without attempting to raise page count.
934 */ 934 */
935 goto out; 935 goto out;
936 } 936 }
@@ -983,8 +983,8 @@ EXPORT_SYMBOL(find_get_page);
983 * page cache page, it is returned locked and with an increased 983 * page cache page, it is returned locked and with an increased
984 * refcount. 984 * refcount.
985 * 985 *
986 * If the slot holds a shadow entry of a previously evicted page, it 986 * If the slot holds a shadow entry of a previously evicted page, or a
987 * is returned. 987 * swap entry from shmem/tmpfs, it is returned.
988 * 988 *
989 * Otherwise, %NULL is returned. 989 * Otherwise, %NULL is returned.
990 * 990 *
@@ -1099,8 +1099,8 @@ EXPORT_SYMBOL(find_or_create_page);
1099 * with ascending indexes. There may be holes in the indices due to 1099 * with ascending indexes. There may be holes in the indices due to
1100 * not-present pages. 1100 * not-present pages.
1101 * 1101 *
1102 * Any shadow entries of evicted pages are included in the returned 1102 * Any shadow entries of evicted pages, or swap entries from
1103 * array. 1103 * shmem/tmpfs, are included in the returned array.
1104 * 1104 *
1105 * find_get_entries() returns the number of pages and shadow entries 1105 * find_get_entries() returns the number of pages and shadow entries
1106 * which were found. 1106 * which were found.
@@ -1128,9 +1128,9 @@ repeat:
1128 if (radix_tree_deref_retry(page)) 1128 if (radix_tree_deref_retry(page))
1129 goto restart; 1129 goto restart;
1130 /* 1130 /*
1131 * Otherwise, we must be storing a swap entry 1131 * A shadow entry of a recently evicted page,
1132 * here as an exceptional entry: so return it 1132 * or a swap entry from shmem/tmpfs. Return
1133 * without attempting to raise page count. 1133 * it without attempting to raise page count.
1134 */ 1134 */
1135 goto export; 1135 goto export;
1136 } 1136 }
@@ -1198,9 +1198,9 @@ repeat:
1198 goto restart; 1198 goto restart;
1199 } 1199 }
1200 /* 1200 /*
1201 * Otherwise, shmem/tmpfs must be storing a swap entry 1201 * A shadow entry of a recently evicted page,
1202 * here as an exceptional entry: so skip over it - 1202 * or a swap entry from shmem/tmpfs. Skip
1203 * we only reach this from invalidate_mapping_pages(). 1203 * over it.
1204 */ 1204 */
1205 continue; 1205 continue;
1206 } 1206 }
@@ -1265,9 +1265,9 @@ repeat:
1265 goto restart; 1265 goto restart;
1266 } 1266 }
1267 /* 1267 /*
1268 * Otherwise, shmem/tmpfs must be storing a swap entry 1268 * A shadow entry of a recently evicted page,
1269 * here as an exceptional entry: so stop looking for 1269 * or a swap entry from shmem/tmpfs. Stop
1270 * contiguous pages. 1270 * looking for contiguous pages.
1271 */ 1271 */
1272 break; 1272 break;
1273 } 1273 }
@@ -1341,10 +1341,17 @@ repeat:
1341 goto restart; 1341 goto restart;
1342 } 1342 }
1343 /* 1343 /*
1344 * This function is never used on a shmem/tmpfs 1344 * A shadow entry of a recently evicted page.
1345 * mapping, so a swap entry won't be found here. 1345 *
1346 * Those entries should never be tagged, but
1347 * this tree walk is lockless and the tags are
1348 * looked up in bulk, one radix tree node at a
1349 * time, so there is a sizable window for page
1350 * reclaim to evict a page we saw tagged.
1351 *
1352 * Skip over it.
1346 */ 1353 */
1347 BUG(); 1354 continue;
1348 } 1355 }
1349 1356
1350 if (!page_cache_get_speculative(page)) 1357 if (!page_cache_get_speculative(page))