aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2016-08-10 11:22:44 -0400
committerDan Williams <dan.j.williams@intel.com>2016-12-26 23:29:24 -0500
commitc6dcf52c23d2d3fb5235cec42d7dd3f786b87d55 (patch)
tree7e63a6c0225a769e679b194f54b5723e4cfba385
parente568df6b84ff05a22467503afc11bee7a6ba0700 (diff)
mm: Invalidate DAX radix tree entries only if appropriate
Currently invalidate_inode_pages2_range() and invalidate_mapping_pages() just delete all exceptional radix tree entries they find. For DAX this is not desirable as we track cache dirtiness in these entries and when they are evicted, we may not flush caches although it is necessary. This can for example manifest when we write to the same block both via mmap and via write(2) (to different offsets) and fsync(2) then does not properly flush CPU caches when modification via write(2) was the last one. Create appropriate DAX functions to handle invalidation of DAX entries for invalidate_inode_pages2_range() and invalidate_mapping_pages() and wire them up into the corresponding mm functions. Acked-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
-rw-r--r--fs/dax.c71
-rw-r--r--include/linux/dax.h3
-rw-r--r--mm/truncate.c75
3 files changed, 125 insertions, 24 deletions
diff --git a/fs/dax.c b/fs/dax.c
index a8732fbed381..bcfedd184860 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -451,16 +451,37 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping,
451 __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key); 451 __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
452} 452}
453 453
454static int __dax_invalidate_mapping_entry(struct address_space *mapping,
455 pgoff_t index, bool trunc)
456{
457 int ret = 0;
458 void *entry;
459 struct radix_tree_root *page_tree = &mapping->page_tree;
460
461 spin_lock_irq(&mapping->tree_lock);
462 entry = get_unlocked_mapping_entry(mapping, index, NULL);
463 if (!entry || !radix_tree_exceptional_entry(entry))
464 goto out;
465 if (!trunc &&
466 (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) ||
467 radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)))
468 goto out;
469 radix_tree_delete(page_tree, index);
470 mapping->nrexceptional--;
471 ret = 1;
472out:
473 put_unlocked_mapping_entry(mapping, index, entry);
474 spin_unlock_irq(&mapping->tree_lock);
475 return ret;
476}
454/* 477/*
455 * Delete exceptional DAX entry at @index from @mapping. Wait for radix tree 478 * Delete exceptional DAX entry at @index from @mapping. Wait for radix tree
456 * entry to get unlocked before deleting it. 479 * entry to get unlocked before deleting it.
457 */ 480 */
458int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) 481int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
459{ 482{
460 void *entry; 483 int ret = __dax_invalidate_mapping_entry(mapping, index, true);
461 484
462 spin_lock_irq(&mapping->tree_lock);
463 entry = get_unlocked_mapping_entry(mapping, index, NULL);
464 /* 485 /*
465 * This gets called from truncate / punch_hole path. As such, the caller 486 * This gets called from truncate / punch_hole path. As such, the caller
466 * must hold locks protecting against concurrent modifications of the 487 * must hold locks protecting against concurrent modifications of the
@@ -468,16 +489,46 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
468 * caller has seen exceptional entry for this index, we better find it 489 * caller has seen exceptional entry for this index, we better find it
469 * at that index as well... 490 * at that index as well...
470 */ 491 */
471 if (WARN_ON_ONCE(!entry || !radix_tree_exceptional_entry(entry))) { 492 WARN_ON_ONCE(!ret);
472 spin_unlock_irq(&mapping->tree_lock); 493 return ret;
473 return 0; 494}
474 } 495
475 radix_tree_delete(&mapping->page_tree, index); 496/*
497 * Invalidate exceptional DAX entry if easily possible. This handles DAX
498 * entries for invalidate_inode_pages() so we evict the entry only if we can
499 * do so without blocking.
500 */
501int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index)
502{
503 int ret = 0;
504 void *entry, **slot;
505 struct radix_tree_root *page_tree = &mapping->page_tree;
506
507 spin_lock_irq(&mapping->tree_lock);
508 entry = __radix_tree_lookup(page_tree, index, NULL, &slot);
509 if (!entry || !radix_tree_exceptional_entry(entry) ||
510 slot_locked(mapping, slot))
511 goto out;
512 if (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) ||
513 radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE))
514 goto out;
515 radix_tree_delete(page_tree, index);
476 mapping->nrexceptional--; 516 mapping->nrexceptional--;
517 ret = 1;
518out:
477 spin_unlock_irq(&mapping->tree_lock); 519 spin_unlock_irq(&mapping->tree_lock);
478 dax_wake_mapping_entry_waiter(mapping, index, entry, true); 520 if (ret)
521 dax_wake_mapping_entry_waiter(mapping, index, entry, true);
522 return ret;
523}
479 524
480 return 1; 525/*
526 * Invalidate exceptional DAX entry if it is clean.
527 */
528int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
529 pgoff_t index)
530{
531 return __dax_invalidate_mapping_entry(mapping, index, false);
481} 532}
482 533
483/* 534/*
diff --git a/include/linux/dax.h b/include/linux/dax.h
index f97bcfe79472..24ad71173995 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -41,6 +41,9 @@ ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
41int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf, 41int dax_iomap_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
42 struct iomap_ops *ops); 42 struct iomap_ops *ops);
43int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); 43int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index);
44int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index);
45int dax_invalidate_mapping_entry_sync(struct address_space *mapping,
46 pgoff_t index);
44void dax_wake_mapping_entry_waiter(struct address_space *mapping, 47void dax_wake_mapping_entry_waiter(struct address_space *mapping,
45 pgoff_t index, void *entry, bool wake_all); 48 pgoff_t index, void *entry, bool wake_all);
46 49
diff --git a/mm/truncate.c b/mm/truncate.c
index fd97f1dbce29..dd7b24e083c5 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -24,20 +24,12 @@
24#include <linux/rmap.h> 24#include <linux/rmap.h>
25#include "internal.h" 25#include "internal.h"
26 26
27static void clear_exceptional_entry(struct address_space *mapping, 27static void clear_shadow_entry(struct address_space *mapping, pgoff_t index,
28 pgoff_t index, void *entry) 28 void *entry)
29{ 29{
30 struct radix_tree_node *node; 30 struct radix_tree_node *node;
31 void **slot; 31 void **slot;
32 32
33 /* Handled by shmem itself */
34 if (shmem_mapping(mapping))
35 return;
36
37 if (dax_mapping(mapping)) {
38 dax_delete_mapping_entry(mapping, index);
39 return;
40 }
41 spin_lock_irq(&mapping->tree_lock); 33 spin_lock_irq(&mapping->tree_lock);
42 /* 34 /*
43 * Regular page slots are stabilized by the page lock even 35 * Regular page slots are stabilized by the page lock even
@@ -55,6 +47,56 @@ unlock:
55 spin_unlock_irq(&mapping->tree_lock); 47 spin_unlock_irq(&mapping->tree_lock);
56} 48}
57 49
50/*
51 * Unconditionally remove exceptional entry. Usually called from truncate path.
52 */
53static void truncate_exceptional_entry(struct address_space *mapping,
54 pgoff_t index, void *entry)
55{
56 /* Handled by shmem itself */
57 if (shmem_mapping(mapping))
58 return;
59
60 if (dax_mapping(mapping)) {
61 dax_delete_mapping_entry(mapping, index);
62 return;
63 }
64 clear_shadow_entry(mapping, index, entry);
65}
66
67/*
68 * Invalidate exceptional entry if easily possible. This handles exceptional
69 * entries for invalidate_inode_pages() so for DAX it evicts only unlocked and
70 * clean entries.
71 */
72static int invalidate_exceptional_entry(struct address_space *mapping,
73 pgoff_t index, void *entry)
74{
75 /* Handled by shmem itself */
76 if (shmem_mapping(mapping))
77 return 1;
78 if (dax_mapping(mapping))
79 return dax_invalidate_mapping_entry(mapping, index);
80 clear_shadow_entry(mapping, index, entry);
81 return 1;
82}
83
84/*
85 * Invalidate exceptional entry if clean. This handles exceptional entries for
86 * invalidate_inode_pages2() so for DAX it evicts only clean entries.
87 */
88static int invalidate_exceptional_entry2(struct address_space *mapping,
89 pgoff_t index, void *entry)
90{
91 /* Handled by shmem itself */
92 if (shmem_mapping(mapping))
93 return 1;
94 if (dax_mapping(mapping))
95 return dax_invalidate_mapping_entry_sync(mapping, index);
96 clear_shadow_entry(mapping, index, entry);
97 return 1;
98}
99
58/** 100/**
59 * do_invalidatepage - invalidate part or all of a page 101 * do_invalidatepage - invalidate part or all of a page
60 * @page: the page which is affected 102 * @page: the page which is affected
@@ -262,7 +304,8 @@ void truncate_inode_pages_range(struct address_space *mapping,
262 break; 304 break;
263 305
264 if (radix_tree_exceptional_entry(page)) { 306 if (radix_tree_exceptional_entry(page)) {
265 clear_exceptional_entry(mapping, index, page); 307 truncate_exceptional_entry(mapping, index,
308 page);
266 continue; 309 continue;
267 } 310 }
268 311
@@ -351,7 +394,8 @@ void truncate_inode_pages_range(struct address_space *mapping,
351 } 394 }
352 395
353 if (radix_tree_exceptional_entry(page)) { 396 if (radix_tree_exceptional_entry(page)) {
354 clear_exceptional_entry(mapping, index, page); 397 truncate_exceptional_entry(mapping, index,
398 page);
355 continue; 399 continue;
356 } 400 }
357 401
@@ -470,7 +514,8 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
470 break; 514 break;
471 515
472 if (radix_tree_exceptional_entry(page)) { 516 if (radix_tree_exceptional_entry(page)) {
473 clear_exceptional_entry(mapping, index, page); 517 invalidate_exceptional_entry(mapping, index,
518 page);
474 continue; 519 continue;
475 } 520 }
476 521
@@ -592,7 +637,9 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
592 break; 637 break;
593 638
594 if (radix_tree_exceptional_entry(page)) { 639 if (radix_tree_exceptional_entry(page)) {
595 clear_exceptional_entry(mapping, index, page); 640 if (!invalidate_exceptional_entry2(mapping,
641 index, page))
642 ret = -EBUSY;
596 continue; 643 continue;
597 } 644 }
598 645