aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrey Ryabinin <aryabinin@virtuozzo.com>2017-05-03 17:55:59 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-03 18:52:12 -0400
commit55635ba76ef91f26b418702ace5e6287eb727f6a (patch)
tree6d802c7031cf0aea4ebec1995b4133e7b26d15de
parentf0fe9984656604ea8effd5ff82709ff8ce1f954b (diff)
fs: fix data invalidation in the cleancache during direct IO
Patch series "Properly invalidate data in the cleancache", v2. We've noticed that after direct IO write, buffered read sometimes gets stale data which is coming from the cleancache. The reason for this is that some direct write hooks call call invalidate_inode_pages2[_range]() conditionally iff mapping->nrpages is not zero, so we may not invalidate data in the cleancache. Another odd thing is that we check only for ->nrpages and don't check for ->nrexceptional, but invalidate_inode_pages2[_range] also invalidates exceptional entries as well. So we invalidate exceptional entries only if ->nrpages != 0? This doesn't feel right. - Patch 1 fixes direct IO writes by removing ->nrpages check. - Patch 2 fixes similar case in invalidate_bdev(). Note: I only fixed conditional cleancache_invalidate_inode() here. Do we also need to add ->nrexceptional check in into invalidate_bdev()? - Patches 3-4: some optimizations. This patch (of 4): Some direct IO write fs hooks call invalidate_inode_pages2[_range]() conditionally iff mapping->nrpages is not zero. This can't be right, because invalidate_inode_pages2[_range]() also invalidate data in the cleancache via cleancache_invalidate_inode() call. So if page cache is empty but there is some data in the cleancache, buffered read after direct IO write would get stale data from the cleancache. Also it doesn't feel right to check only for ->nrpages because invalidate_inode_pages2[_range] invalidates exceptional entries as well. Fix this by calling invalidate_inode_pages2[_range]() regardless of nrpages state. Note: nfs,cifs,9p doesn't need similar fix because the never call cleancache_get_page() (nor directly, nor via mpage_readpage[s]()), so they are not affected by this bug. Fixes: c515e1fd361c ("mm/fs: add hooks to support cleancache") Link: http://lkml.kernel.org/r/20170424164135.22350-2-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin <aryabinin@virtuozzo.com> Reviewed-by: Jan Kara <jack@suse.cz> Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Jens Axboe <axboe@kernel.dk> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Alexey Kuznetsov <kuznet@virtuozzo.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Nikolay Borisov <n.borisov.lkml@gmail.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/iomap.c18
-rw-r--r--mm/filemap.c26
2 files changed, 19 insertions, 25 deletions
diff --git a/fs/iomap.c b/fs/iomap.c
index 141c3cd55a8b..1c25ae30500e 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -887,16 +887,14 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
887 flags |= IOMAP_WRITE; 887 flags |= IOMAP_WRITE;
888 } 888 }
889 889
890 if (mapping->nrpages) { 890 ret = filemap_write_and_wait_range(mapping, start, end);
891 ret = filemap_write_and_wait_range(mapping, start, end); 891 if (ret)
892 if (ret) 892 goto out_free_dio;
893 goto out_free_dio;
894 893
895 ret = invalidate_inode_pages2_range(mapping, 894 ret = invalidate_inode_pages2_range(mapping,
896 start >> PAGE_SHIFT, end >> PAGE_SHIFT); 895 start >> PAGE_SHIFT, end >> PAGE_SHIFT);
897 WARN_ON_ONCE(ret); 896 WARN_ON_ONCE(ret);
898 ret = 0; 897 ret = 0;
899 }
900 898
901 inode_dio_begin(inode); 899 inode_dio_begin(inode);
902 900
@@ -951,7 +949,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
951 * one is a pretty crazy thing to do, so we don't support it 100%. If 949 * one is a pretty crazy thing to do, so we don't support it 100%. If
952 * this invalidation fails, tough, the write still worked... 950 * this invalidation fails, tough, the write still worked...
953 */ 951 */
954 if (iov_iter_rw(iter) == WRITE && mapping->nrpages) { 952 if (iov_iter_rw(iter) == WRITE) {
955 int err = invalidate_inode_pages2_range(mapping, 953 int err = invalidate_inode_pages2_range(mapping,
956 start >> PAGE_SHIFT, end >> PAGE_SHIFT); 954 start >> PAGE_SHIFT, end >> PAGE_SHIFT);
957 WARN_ON_ONCE(err); 955 WARN_ON_ONCE(err);
diff --git a/mm/filemap.c b/mm/filemap.c
index 7f425f18c158..681da61080bc 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2720,18 +2720,16 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
2720 * about to write. We do this *before* the write so that we can return 2720 * about to write. We do this *before* the write so that we can return
2721 * without clobbering -EIOCBQUEUED from ->direct_IO(). 2721 * without clobbering -EIOCBQUEUED from ->direct_IO().
2722 */ 2722 */
2723 if (mapping->nrpages) { 2723 written = invalidate_inode_pages2_range(mapping,
2724 written = invalidate_inode_pages2_range(mapping,
2725 pos >> PAGE_SHIFT, end); 2724 pos >> PAGE_SHIFT, end);
2726 /* 2725 /*
2727 * If a page can not be invalidated, return 0 to fall back 2726 * If a page can not be invalidated, return 0 to fall back
2728 * to buffered write. 2727 * to buffered write.
2729 */ 2728 */
2730 if (written) { 2729 if (written) {
2731 if (written == -EBUSY) 2730 if (written == -EBUSY)
2732 return 0; 2731 return 0;
2733 goto out; 2732 goto out;
2734 }
2735 } 2733 }
2736 2734
2737 written = mapping->a_ops->direct_IO(iocb, from); 2735 written = mapping->a_ops->direct_IO(iocb, from);
@@ -2744,10 +2742,8 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
2744 * so we don't support it 100%. If this invalidation 2742 * so we don't support it 100%. If this invalidation
2745 * fails, tough, the write still worked... 2743 * fails, tough, the write still worked...
2746 */ 2744 */
2747 if (mapping->nrpages) { 2745 invalidate_inode_pages2_range(mapping,
2748 invalidate_inode_pages2_range(mapping, 2746 pos >> PAGE_SHIFT, end);
2749 pos >> PAGE_SHIFT, end);
2750 }
2751 2747
2752 if (written > 0) { 2748 if (written > 0) {
2753 pos += written; 2749 pos += written;