aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorJosef Bacik <josef@redhat.com>2012-06-08 15:26:47 -0400
committerChris Mason <chris.mason@oracle.com>2012-06-14 21:30:54 -0400
commit7ddf5a42d311d74fd9f7373cb56def0843c219f8 (patch)
tree3b6a46eec858b867db9184d0e8beefe4ed01e9ec /fs/btrfs
parent8180ef8894fa402443205cff1e23417e8d3434df (diff)
Btrfs: call filemap_fdatawrite twice for compression
I removed this in an earlier commit and I was wrong. Because compression can return from filemap_fdatawrite() without having actually set any of it's pages as writeback() it can make filemap_fdatawait() do essentially nothing, and then we won't find any ordered extents because they may not have been created yet. So not only does this make fsync() completely useless, but it will also screw up if you truncate on a non-page aligned offset since we zero out the end and then wait on ordered extents and then call drop caches. We can drop the cache before the io completes and then we try to unpin the extent we just wrote we won't find it and everything goes sideways. So fix this by putting it back and put a giant comment there to keep me from trying to remove it in the future. Thanks, Signed-off-by: Josef Bacik <josef@redhat.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/btrfs_inode.h1
-rw-r--r--fs/btrfs/inode.c15
-rw-r--r--fs/btrfs/ordered-data.c22
3 files changed, 31 insertions, 7 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index e616f8872e69..12394a90d60f 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -37,6 +37,7 @@
37#define BTRFS_INODE_IN_DEFRAG 3 37#define BTRFS_INODE_IN_DEFRAG 3
38#define BTRFS_INODE_DELALLOC_META_RESERVED 4 38#define BTRFS_INODE_DELALLOC_META_RESERVED 4
39#define BTRFS_INODE_HAS_ORPHAN_ITEM 5 39#define BTRFS_INODE_HAS_ORPHAN_ITEM 5
40#define BTRFS_INODE_HAS_ASYNC_EXTENT 6
40 41
41/* in memory btrfs inode */ 42/* in memory btrfs inode */
42struct btrfs_inode { 43struct btrfs_inode {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 06075043da5d..7a090fb4eb98 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1398,20 +1398,23 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1398 int ret; 1398 int ret;
1399 struct btrfs_root *root = BTRFS_I(inode)->root; 1399 struct btrfs_root *root = BTRFS_I(inode)->root;
1400 1400
1401 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) 1401 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) {
1402 ret = run_delalloc_nocow(inode, locked_page, start, end, 1402 ret = run_delalloc_nocow(inode, locked_page, start, end,
1403 page_started, 1, nr_written); 1403 page_started, 1, nr_written);
1404 else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) 1404 } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) {
1405 ret = run_delalloc_nocow(inode, locked_page, start, end, 1405 ret = run_delalloc_nocow(inode, locked_page, start, end,
1406 page_started, 0, nr_written); 1406 page_started, 0, nr_written);
1407 else if (!btrfs_test_opt(root, COMPRESS) && 1407 } else if (!btrfs_test_opt(root, COMPRESS) &&
1408 !(BTRFS_I(inode)->force_compress) && 1408 !(BTRFS_I(inode)->force_compress) &&
1409 !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) 1409 !(BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS)) {
1410 ret = cow_file_range(inode, locked_page, start, end, 1410 ret = cow_file_range(inode, locked_page, start, end,
1411 page_started, nr_written, 1); 1411 page_started, nr_written, 1);
1412 else 1412 } else {
1413 set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
1414 &BTRFS_I(inode)->runtime_flags);
1413 ret = cow_file_range_async(inode, locked_page, start, end, 1415 ret = cow_file_range_async(inode, locked_page, start, end,
1414 page_started, nr_written); 1416 page_started, nr_written);
1417 }
1415 return ret; 1418 return ret;
1416} 1419}
1417 1420
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 9e138cdc36c5..643335a4fe3c 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -627,7 +627,27 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
627 /* start IO across the range first to instantiate any delalloc 627 /* start IO across the range first to instantiate any delalloc
628 * extents 628 * extents
629 */ 629 */
630 filemap_write_and_wait_range(inode->i_mapping, start, orig_end); 630 filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
631
632 /*
633 * So with compression we will find and lock a dirty page and clear the
634 * first one as dirty, setup an async extent, and immediately return
635 * with the entire range locked but with nobody actually marked with
636 * writeback. So we can't just filemap_write_and_wait_range() and
637 * expect it to work since it will just kick off a thread to do the
638 * actual work. So we need to call filemap_fdatawrite_range _again_
639 * since it will wait on the page lock, which won't be unlocked until
640 * after the pages have been marked as writeback and so we're good to go
641 * from there. We have to do this otherwise we'll miss the ordered
642 * extents and that results in badness. Please Josef, do not think you
643 * know better and pull this out at some point in the future, it is
644 * right and you are wrong.
645 */
646 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
647 &BTRFS_I(inode)->runtime_flags))
648 filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
649
650 filemap_fdatawait_range(inode->i_mapping, start, orig_end);
631 651
632 end = orig_end; 652 end = orig_end;
633 found = 0; 653 found = 0;