Btrfs: fix btrfs fallocate oops and deadlock

Btrfs fallocate was incorrectly starting a transaction with a lock held on the extent_io tree for the file, which could deadlock. Strictly speaking it was using join_transaction which would be safe, but it is better to move the transaction outside of the lock. When preallocated extents are overwritten, btrfs_mark_buffer_dirty was being called on an unlocked buffer. This was triggering an assertion and oops because the lock is supposed to be held. The bug was calling btrfs_mark_buffer_dirty on a leaf after btrfs_del_item had been run. btrfs_del_item takes care of dirtying things, so the solution is a to skip the btrfs_mark_buffer_dirty call in this case. Signed-off-by: Chris Mason <chris.mason@oracle.com>
author: Chris Mason <chris.mason@oracle.com> 2009-04-21 11:53:38 -0400
committer: Chris Mason <chris.mason@oracle.com> 2009-04-21 12:45:12 -0400
commit: 546888da82082555a56528730a83f0afd12f33bf (patch)
tree: 98ee868d1b8a4bd390a980fed707f91419b79fb5 /fs/btrfs
parent: 8c594ea81d7abbbffdda447b127f8ba8d76f319d (diff)
2 files changed, 31 insertions, 9 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e21c0060ee73..482f8db2cfd0 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -830,7 +830,7 @@ again:
                ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
                BUG_ON(ret);
-                goto done;
+                goto release;
        } else if (split == start) {
                if (locked_end < extent_end) {
                        ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
@@ -926,6 +926,8 @@ again:
        }
 done:
        btrfs_mark_buffer_dirty(leaf);
+release:
        btrfs_release_path(root, path);
        if (split_end && split == start) {
                split = end;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a0d1dd492a58..65219f6a16a1 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4970,10 +4970,10 @@ out_fail:
        return err;
 }
-static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
+static int prealloc_file_range(struct btrfs_trans_handle *trans,
+                               struct inode *inode, u64 start, u64 end,
                               u64 alloc_hint, int mode)
 {
-        struct btrfs_trans_handle *trans;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_key ins;
        u64 alloc_size;
@@ -4981,10 +4981,6 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
        u64 num_bytes = end - start;
        int ret = 0;
-        trans = btrfs_join_transaction(root, 1);
-        BUG_ON(!trans);
-        btrfs_set_trans_block_group(trans, inode);
        while (num_bytes > 0) {
                alloc_size = min(num_bytes, root->fs_info->max_extent);
                ret = btrfs_reserve_extent(trans, root, alloc_size,
@@ -5015,7 +5011,6 @@ out:
                BUG_ON(ret);
        }
-        btrfs_end_transaction(trans, root);
        return ret;
 }
@@ -5029,11 +5024,18 @@ static long btrfs_fallocate(struct inode *inode, int mode,
        u64 alloc_hint = 0;
        u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
        struct extent_map *em;
+        struct btrfs_trans_handle *trans;
        int ret;
        alloc_start = offset & ~mask;
        alloc_end =  (offset + len + mask) & ~mask;
+        /*
+         * wait for ordered IO before we have any locks.  We'll loop again
+         * below with the locks held.
+         */
+        btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
        mutex_lock(&inode->i_mutex);
        if (alloc_start > inode->i_size) {
                ret = btrfs_cont_expand(inode, alloc_start);
@@ -5043,6 +5045,16 @@ static long btrfs_fallocate(struct inode *inode, int mode,
        while (1) {
                struct btrfs_ordered_extent *ordered;
+                trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
+                if (!trans) {
+                        ret = -EIO;
+                        goto out;
+                }
+                /* the extent lock is ordered inside the running
+                 * transaction
+                 */
                lock_extent(&BTRFS_I(inode)->io_tree, alloc_start,
                            alloc_end - 1, GFP_NOFS);
                ordered = btrfs_lookup_first_ordered_extent(inode,
@@ -5053,6 +5065,12 @@ static long btrfs_fallocate(struct inode *inode, int mode,
                        btrfs_put_ordered_extent(ordered);
                        unlock_extent(&BTRFS_I(inode)->io_tree,
                                      alloc_start, alloc_end - 1, GFP_NOFS);
+                        btrfs_end_transaction(trans, BTRFS_I(inode)->root);
+                        /*
+                         * we can't wait on the range with the transaction
+                         * running or with the extent lock held
+                         */
                        btrfs_wait_ordered_range(inode, alloc_start,
                                                 alloc_end - alloc_start);
                } else {
@@ -5070,7 +5088,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
                last_byte = min(extent_map_end(em), alloc_end);
                last_byte = (last_byte + mask) & ~mask;
                if (em->block_start == EXTENT_MAP_HOLE) {
-                        ret = prealloc_file_range(inode, cur_offset,
+                        ret = prealloc_file_range(trans, inode, cur_offset,
                                        last_byte, alloc_hint, mode);
                        if (ret < 0) {
                                free_extent_map(em);
@@ -5089,6 +5107,8 @@ static long btrfs_fallocate(struct inode *inode, int mode,
        }
        unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, alloc_end - 1,
                      GFP_NOFS);
+        btrfs_end_transaction(trans, BTRFS_I(inode)->root);
 out:
        mutex_unlock(&inode->i_mutex);
        return ret;
author	Chris Mason <chris.mason@oracle.com>	2009-04-21 11:53:38 -0400
committer	Chris Mason <chris.mason@oracle.com>	2009-04-21 12:45:12 -0400
commit	546888da82082555a56528730a83f0afd12f33bf (patch)
tree	98ee868d1b8a4bd390a980fed707f91419b79fb5 /fs/btrfs
parent	8c594ea81d7abbbffdda447b127f8ba8d76f319d (diff)