diff options
author | Dan Williams <dan.j.williams@intel.com> | 2017-12-23 01:02:48 -0500 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2018-04-03 08:41:19 -0400 |
commit | d2c997c0f14535eff68d8ed9c2f1c5e100625751 (patch) | |
tree | 5cb06d1486db2db5cb9ad022d070ba425368ae45 /fs/dax.c | |
parent | fb094c90748fbeba1063927eeb751add147b35b9 (diff) |
fs, dax: use page->mapping to warn if truncate collides with a busy page
Catch cases where extent unmap operations encounter pages that are
pinned / busy. Typically this is pinned pages that are under active dma.
This warning is a canary for potential data corruption as truncated
blocks could be allocated to a new file while the device is still
performing i/o.
Here is an example of a collision that this implementation catches:
WARNING: CPU: 2 PID: 1286 at fs/dax.c:343 dax_disassociate_entry+0x55/0x80
[..]
Call Trace:
__dax_invalidate_mapping_entry+0x6c/0xf0
dax_delete_mapping_entry+0xf/0x20
truncate_exceptional_pvec_entries.part.12+0x1af/0x200
truncate_inode_pages_range+0x268/0x970
? tlb_gather_mmu+0x10/0x20
? up_write+0x1c/0x40
? unmap_mapping_range+0x73/0x140
xfs_free_file_space+0x1b6/0x5b0 [xfs]
? xfs_file_fallocate+0x7f/0x320 [xfs]
? down_write_nested+0x40/0x70
? xfs_ilock+0x21d/0x2f0 [xfs]
xfs_file_fallocate+0x162/0x320 [xfs]
? rcu_read_lock_sched_held+0x3f/0x70
? rcu_sync_lockdep_assert+0x2a/0x50
? __sb_start_write+0xd0/0x1b0
? vfs_fallocate+0x20c/0x270
vfs_fallocate+0x154/0x270
SyS_fallocate+0x43/0x80
entry_SYSCALL_64_fastpath+0x1f/0x96
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'fs/dax.c')
-rw-r--r-- | fs/dax.c | 63 |
1 files changed, 63 insertions, 0 deletions
@@ -298,6 +298,63 @@ static void put_unlocked_mapping_entry(struct address_space *mapping, | |||
298 | dax_wake_mapping_entry_waiter(mapping, index, entry, false); | 298 | dax_wake_mapping_entry_waiter(mapping, index, entry, false); |
299 | } | 299 | } |
300 | 300 | ||
301 | static unsigned long dax_entry_size(void *entry) | ||
302 | { | ||
303 | if (dax_is_zero_entry(entry)) | ||
304 | return 0; | ||
305 | else if (dax_is_empty_entry(entry)) | ||
306 | return 0; | ||
307 | else if (dax_is_pmd_entry(entry)) | ||
308 | return PMD_SIZE; | ||
309 | else | ||
310 | return PAGE_SIZE; | ||
311 | } | ||
312 | |||
313 | static unsigned long dax_radix_end_pfn(void *entry) | ||
314 | { | ||
315 | return dax_radix_pfn(entry) + dax_entry_size(entry) / PAGE_SIZE; | ||
316 | } | ||
317 | |||
318 | /* | ||
319 | * Iterate through all mapped pfns represented by an entry, i.e. skip | ||
320 | * 'empty' and 'zero' entries. | ||
321 | */ | ||
322 | #define for_each_mapped_pfn(entry, pfn) \ | ||
323 | for (pfn = dax_radix_pfn(entry); \ | ||
324 | pfn < dax_radix_end_pfn(entry); pfn++) | ||
325 | |||
326 | static void dax_associate_entry(void *entry, struct address_space *mapping) | ||
327 | { | ||
328 | unsigned long pfn; | ||
329 | |||
330 | if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) | ||
331 | return; | ||
332 | |||
333 | for_each_mapped_pfn(entry, pfn) { | ||
334 | struct page *page = pfn_to_page(pfn); | ||
335 | |||
336 | WARN_ON_ONCE(page->mapping); | ||
337 | page->mapping = mapping; | ||
338 | } | ||
339 | } | ||
340 | |||
341 | static void dax_disassociate_entry(void *entry, struct address_space *mapping, | ||
342 | bool trunc) | ||
343 | { | ||
344 | unsigned long pfn; | ||
345 | |||
346 | if (IS_ENABLED(CONFIG_FS_DAX_LIMITED)) | ||
347 | return; | ||
348 | |||
349 | for_each_mapped_pfn(entry, pfn) { | ||
350 | struct page *page = pfn_to_page(pfn); | ||
351 | |||
352 | WARN_ON_ONCE(trunc && page_ref_count(page) > 1); | ||
353 | WARN_ON_ONCE(page->mapping && page->mapping != mapping); | ||
354 | page->mapping = NULL; | ||
355 | } | ||
356 | } | ||
357 | |||
301 | /* | 358 | /* |
302 | * Find radix tree entry at given index. If it points to an exceptional entry, | 359 | * Find radix tree entry at given index. If it points to an exceptional entry, |
303 | * return it with the radix tree entry locked. If the radix tree doesn't | 360 | * return it with the radix tree entry locked. If the radix tree doesn't |
@@ -404,6 +461,7 @@ restart: | |||
404 | } | 461 | } |
405 | 462 | ||
406 | if (pmd_downgrade) { | 463 | if (pmd_downgrade) { |
464 | dax_disassociate_entry(entry, mapping, false); | ||
407 | radix_tree_delete(&mapping->page_tree, index); | 465 | radix_tree_delete(&mapping->page_tree, index); |
408 | mapping->nrexceptional--; | 466 | mapping->nrexceptional--; |
409 | dax_wake_mapping_entry_waiter(mapping, index, entry, | 467 | dax_wake_mapping_entry_waiter(mapping, index, entry, |
@@ -453,6 +511,7 @@ static int __dax_invalidate_mapping_entry(struct address_space *mapping, | |||
453 | (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) || | 511 | (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) || |
454 | radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))) | 512 | radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))) |
455 | goto out; | 513 | goto out; |
514 | dax_disassociate_entry(entry, mapping, trunc); | ||
456 | radix_tree_delete(page_tree, index); | 515 | radix_tree_delete(page_tree, index); |
457 | mapping->nrexceptional--; | 516 | mapping->nrexceptional--; |
458 | ret = 1; | 517 | ret = 1; |
@@ -547,6 +606,10 @@ static void *dax_insert_mapping_entry(struct address_space *mapping, | |||
547 | 606 | ||
548 | spin_lock_irq(&mapping->tree_lock); | 607 | spin_lock_irq(&mapping->tree_lock); |
549 | new_entry = dax_radix_locked_entry(pfn, flags); | 608 | new_entry = dax_radix_locked_entry(pfn, flags); |
609 | if (dax_entry_size(entry) != dax_entry_size(new_entry)) { | ||
610 | dax_disassociate_entry(entry, mapping, false); | ||
611 | dax_associate_entry(new_entry, mapping); | ||
612 | } | ||
550 | 613 | ||
551 | if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { | 614 | if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) { |
552 | /* | 615 | /* |