From 920bbbfb05c9fce22e088d20eb9dcb8f96342de9 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 12 Nov 2009 09:34:08 +0000 Subject: Btrfs: Rewrite btrfs_drop_extents Rewrite btrfs_drop_extents by using btrfs_duplicate_item, so we can avoid calling lock_extent within transaction. Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b3ad168a0bfc..ef250be49cdb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -230,8 +230,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, return 1; } - ret = btrfs_drop_extents(trans, root, inode, start, - aligned_end, aligned_end, start, + ret = btrfs_drop_extents(trans, inode, start, aligned_end, &hint_byte, 1); BUG_ON(ret); @@ -1596,7 +1595,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, struct inode *inode, u64 file_pos, u64 disk_bytenr, u64 disk_num_bytes, u64 num_bytes, u64 ram_bytes, - u64 locked_end, u8 compression, u8 encryption, u16 other_encoding, int extent_type) { @@ -1622,9 +1620,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, * the caller is expected to unpin it and allow it to be merged * with the others. */ - ret = btrfs_drop_extents(trans, root, inode, file_pos, - file_pos + num_bytes, locked_end, - file_pos, &hint, 0); + ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes, + &hint, 0); BUG_ON(ret); ins.objectid = inode->i_ino; @@ -1746,7 +1743,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) compressed = 1; if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { BUG_ON(compressed); - ret = btrfs_mark_extent_written(trans, root, inode, + ret = btrfs_mark_extent_written(trans, inode, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len); @@ -1758,8 +1755,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) ordered_extent->disk_len, ordered_extent->len, ordered_extent->len, - ordered_extent->file_offset + - ordered_extent->len, compressed, 0, 0, BTRFS_FILE_EXTENT_REG); unpin_extent_cache(&BTRFS_I(inode)->extent_tree, @@ -3209,11 +3204,9 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { u64 hint_byte = 0; hole_size = last_byte - cur_offset; - err = btrfs_drop_extents(trans, root, inode, - cur_offset, + err = btrfs_drop_extents(trans, inode, cur_offset, cur_offset + hole_size, - block_end, - cur_offset, &hint_byte, 1); + &hint_byte, 1); if (err) break; @@ -5643,7 +5636,7 @@ out_fail: static int prealloc_file_range(struct btrfs_trans_handle *trans, struct inode *inode, u64 start, u64 end, - u64 locked_end, u64 alloc_hint, int mode) + u64 alloc_hint, int mode) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key ins; @@ -5669,8 +5662,7 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, ret = insert_reserved_file_extent(trans, inode, cur_offset, ins.objectid, ins.offset, ins.offset, - ins.offset, locked_end, - 0, 0, 0, + ins.offset, 0, 0, 0, BTRFS_FILE_EXTENT_PREALLOC); BUG_ON(ret); btrfs_drop_extent_cache(inode, cur_offset, @@ -5779,8 +5771,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, last_byte = (last_byte + mask) & ~mask; if (em->block_start == EXTENT_MAP_HOLE) { ret = prealloc_file_range(trans, inode, cur_offset, - last_byte, locked_end + 1, - alloc_hint, mode); + last_byte, alloc_hint, mode); if (ret < 0) { free_extent_map(em); break; -- cgit v1.2.2 From c216775458a2ee345d9412a2770c2916acfb5d30 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 12 Nov 2009 09:34:21 +0000 Subject: Btrfs: Fix disk_i_size update corner case There are some cases file extents are inserted without involving ordered struct. In these cases, we update disk_i_size directly, without checking pending ordered extent and DELALLOC bit. This patch extends btrfs_ordered_update_i_size() to handle these cases. Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 71 +++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 27 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ef250be49cdb..fa57247887e3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -188,8 +188,18 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); btrfs_free_path(path); + /* + * we're an inline extent, so nobody can + * extend the file past i_size without locking + * a page we already have locked. + * + * We must do any isize and inode updates + * before we unlock the pages. Otherwise we + * could end up racing with unlink. + */ BTRFS_I(inode)->disk_i_size = inode->i_size; btrfs_update_inode(trans, root, inode); + return 0; fail: btrfs_free_path(path); @@ -415,7 +425,6 @@ again: start, end, total_compressed, pages); } - btrfs_end_transaction(trans, root); if (ret == 0) { /* * inline extent creation worked, we don't need @@ -429,9 +438,11 @@ again: EXTENT_CLEAR_DELALLOC | EXTENT_CLEAR_ACCOUNTING | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); - ret = 0; + + btrfs_end_transaction(trans, root); goto free_pages_out; } + btrfs_end_transaction(trans, root); } if (will_compress) { @@ -542,7 +553,6 @@ static noinline int submit_compressed_extents(struct inode *inode, if (list_empty(&async_cow->extents)) return 0; - trans = btrfs_join_transaction(root, 1); while (!list_empty(&async_cow->extents)) { async_extent = list_entry(async_cow->extents.next, @@ -589,19 +599,15 @@ retry: lock_extent(io_tree, async_extent->start, async_extent->start + async_extent->ram_size - 1, GFP_NOFS); - /* - * here we're doing allocation and writeback of the - * compressed pages - */ - btrfs_drop_extent_cache(inode, async_extent->start, - async_extent->start + - async_extent->ram_size - 1, 0); + trans = btrfs_join_transaction(root, 1); ret = btrfs_reserve_extent(trans, root, async_extent->compressed_size, async_extent->compressed_size, 0, alloc_hint, (u64)-1, &ins, 1); + btrfs_end_transaction(trans, root); + if (ret) { int i; for (i = 0; i < async_extent->nr_pages; i++) { @@ -617,6 +623,14 @@ retry: goto retry; } + /* + * here we're doing allocation and writeback of the + * compressed pages + */ + btrfs_drop_extent_cache(inode, async_extent->start, + async_extent->start + + async_extent->ram_size - 1, 0); + em = alloc_extent_map(GFP_NOFS); em->start = async_extent->start; em->len = async_extent->ram_size; @@ -648,8 +662,6 @@ retry: BTRFS_ORDERED_COMPRESSED); BUG_ON(ret); - btrfs_end_transaction(trans, root); - /* * clear dirty, set writeback and unlock the pages. */ @@ -671,13 +683,11 @@ retry: async_extent->nr_pages); BUG_ON(ret); - trans = btrfs_join_transaction(root, 1); alloc_hint = ins.objectid + ins.offset; kfree(async_extent); cond_resched(); } - btrfs_end_transaction(trans, root); return 0; } @@ -741,6 +751,7 @@ static noinline int cow_file_range(struct inode *inode, EXTENT_CLEAR_DIRTY | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); + *nr_written = *nr_written + (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE; *page_started = 1; @@ -1727,18 +1738,27 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) } } - trans = btrfs_join_transaction(root, 1); - if (!ordered_extent) ordered_extent = btrfs_lookup_ordered_extent(inode, start); BUG_ON(!ordered_extent); - if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) - goto nocow; + if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { + BUG_ON(!list_empty(&ordered_extent->list)); + ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); + if (!ret) { + trans = btrfs_join_transaction(root, 1); + ret = btrfs_update_inode(trans, root, inode); + BUG_ON(ret); + btrfs_end_transaction(trans, root); + } + goto out; + } lock_extent(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, GFP_NOFS); + trans = btrfs_join_transaction(root, 1); + if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) compressed = 1; if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { @@ -1765,22 +1785,20 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) unlock_extent(io_tree, ordered_extent->file_offset, ordered_extent->file_offset + ordered_extent->len - 1, GFP_NOFS); -nocow: add_pending_csums(trans, inode, ordered_extent->file_offset, &ordered_extent->list); - mutex_lock(&BTRFS_I(inode)->extent_mutex); - btrfs_ordered_update_i_size(inode, ordered_extent); - btrfs_update_inode(trans, root, inode); - btrfs_remove_ordered_extent(inode, ordered_extent); - mutex_unlock(&BTRFS_I(inode)->extent_mutex); - + /* this also removes the ordered extent from the tree */ + btrfs_ordered_update_i_size(inode, 0, ordered_extent); + ret = btrfs_update_inode(trans, root, inode); + BUG_ON(ret); + btrfs_end_transaction(trans, root); +out: /* once for us */ btrfs_put_ordered_extent(ordered_extent); /* once for the tree */ btrfs_put_ordered_extent(ordered_extent); - btrfs_end_transaction(trans, root); return 0; } @@ -3562,7 +3580,6 @@ static noinline void init_btrfs_i(struct inode *inode) INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); - mutex_init(&BTRFS_I(inode)->extent_mutex); mutex_init(&BTRFS_I(inode)->log_mutex); } -- cgit v1.2.2 From c71bf099abddf3e0fdc27f251ba76fca1461d49a Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 12 Nov 2009 09:34:40 +0000 Subject: Btrfs: Avoid orphan inodes cleanup while replaying log We do log replay in a single transaction, so it's not good to do unbound operations. This patch cleans up orphan inodes cleanup after replaying the log. It also avoids doing other unbound operations such as truncating a file during replaying log. These unbound operations are postponed to the orphan inode cleanup stage. Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fa57247887e3..eb2db3bde236 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2093,16 +2093,17 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) struct inode *inode; int ret = 0, nr_unlink = 0, nr_truncate = 0; - path = btrfs_alloc_path(); - if (!path) + if (!xchg(&root->clean_orphans, 0)) return; + + path = btrfs_alloc_path(); + BUG_ON(!path); path->reada = -1; key.objectid = BTRFS_ORPHAN_OBJECTID; btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); key.offset = (u64)-1; - while (1) { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) { @@ -3298,6 +3299,11 @@ void btrfs_delete_inode(struct inode *inode) } btrfs_wait_ordered_range(inode, 0, (u64)-1); + if (root->fs_info->log_root_recovering) { + BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan)); + goto no_delete; + } + if (inode->i_nlink > 0) { BUG_ON(btrfs_root_refs(&root->root_item) != 0); goto no_delete; @@ -3705,6 +3711,13 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) } srcu_read_unlock(&root->fs_info->subvol_srcu, index); + if (root != sub_root) { + down_read(&root->fs_info->cleanup_work_sem); + if (!(inode->i_sb->s_flags & MS_RDONLY)) + btrfs_orphan_cleanup(sub_root); + up_read(&root->fs_info->cleanup_work_sem); + } + return inode; } -- cgit v1.2.2 From 5a303d5d4b8055d2e5a03e92d04745bfc5881a22 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 12 Nov 2009 09:34:52 +0000 Subject: Btrfs: Make fallocate(2) more ENOSPC friendly fallocate(2) may allocate large number of file extents, so it's not good to do it in a single transaction. This patch make fallocate(2) start a new transaction for each file extents it allocates. Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 65 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 32 insertions(+), 33 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index eb2db3bde236..8d8baaa61504 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5664,10 +5664,10 @@ out_fail: return err; } -static int prealloc_file_range(struct btrfs_trans_handle *trans, - struct inode *inode, u64 start, u64 end, +static int prealloc_file_range(struct inode *inode, u64 start, u64 end, u64 alloc_hint, int mode) { + struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_key ins; u64 alloc_size; @@ -5678,17 +5678,23 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, while (num_bytes > 0) { alloc_size = min(num_bytes, root->fs_info->max_extent); - ret = btrfs_reserve_metadata_space(root, 1); - if (ret) - goto out; - ret = btrfs_reserve_extent(trans, root, alloc_size, root->sectorsize, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) { WARN_ON(1); - goto out; + break; + } + + ret = btrfs_reserve_metadata_space(root, 3); + if (ret) { + btrfs_free_reserved_extent(root, ins.objectid, + ins.offset); + break; } + + trans = btrfs_start_transaction(root, 1); + ret = insert_reserved_file_extent(trans, inode, cur_offset, ins.objectid, ins.offset, ins.offset, @@ -5697,22 +5703,25 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans, BUG_ON(ret); btrfs_drop_extent_cache(inode, cur_offset, cur_offset + ins.offset -1, 0); + num_bytes -= ins.offset; cur_offset += ins.offset; alloc_hint = ins.objectid + ins.offset; - btrfs_unreserve_metadata_space(root, 1); - } -out: - if (cur_offset > start) { + inode->i_ctime = CURRENT_TIME; BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; if (!(mode & FALLOC_FL_KEEP_SIZE) && - cur_offset > i_size_read(inode)) - btrfs_i_size_write(inode, cur_offset); + cur_offset > inode->i_size) { + i_size_write(inode, cur_offset); + btrfs_ordered_update_i_size(inode, cur_offset, NULL); + } + ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); - } + btrfs_end_transaction(trans, root); + btrfs_unreserve_metadata_space(root, 3); + } return ret; } @@ -5727,8 +5736,6 @@ static long btrfs_fallocate(struct inode *inode, int mode, u64 locked_end; u64 mask = BTRFS_I(inode)->root->sectorsize - 1; struct extent_map *em; - struct btrfs_trans_handle *trans; - struct btrfs_root *root; int ret; alloc_start = offset & ~mask; @@ -5747,9 +5754,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, goto out; } - root = BTRFS_I(inode)->root; - - ret = btrfs_check_data_free_space(root, inode, + ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode, alloc_end - alloc_start); if (ret) goto out; @@ -5758,12 +5763,6 @@ static long btrfs_fallocate(struct inode *inode, int mode, while (1) { struct btrfs_ordered_extent *ordered; - trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); - if (!trans) { - ret = -EIO; - goto out_free; - } - /* the extent lock is ordered inside the running * transaction */ @@ -5777,8 +5776,6 @@ static long btrfs_fallocate(struct inode *inode, int mode, btrfs_put_ordered_extent(ordered); unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, GFP_NOFS); - btrfs_end_transaction(trans, BTRFS_I(inode)->root); - /* * we can't wait on the range with the transaction * running or with the extent lock held @@ -5799,9 +5796,12 @@ static long btrfs_fallocate(struct inode *inode, int mode, BUG_ON(IS_ERR(em) || !em); last_byte = min(extent_map_end(em), alloc_end); last_byte = (last_byte + mask) & ~mask; - if (em->block_start == EXTENT_MAP_HOLE) { - ret = prealloc_file_range(trans, inode, cur_offset, - last_byte, alloc_hint, mode); + if (em->block_start == EXTENT_MAP_HOLE || + (cur_offset >= inode->i_size && + !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { + ret = prealloc_file_range(inode, + cur_offset, last_byte, + alloc_hint, mode); if (ret < 0) { free_extent_map(em); break; @@ -5820,9 +5820,8 @@ static long btrfs_fallocate(struct inode *inode, int mode, unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, GFP_NOFS); - btrfs_end_transaction(trans, BTRFS_I(inode)->root); -out_free: - btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start); + btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode, + alloc_end - alloc_start); out: mutex_unlock(&inode->i_mutex); return ret; -- cgit v1.2.2 From 8082510e7124cc50d728f1b875639cb4e22312cc Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 12 Nov 2009 09:35:36 +0000 Subject: Btrfs: Make truncate(2) more ENOSPC friendly truncating and deleting regular files are unbound operations, so it's not good to do them in a single transaction. This patch makes btrfs_truncate and btrfs_delete_inode start a new transaction after all items in a tree leaf are deleted. Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 316 +++++++++++++++++++++++++++++++++---------------------- 1 file changed, 192 insertions(+), 124 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8d8baaa61504..dcec42ee8cf2 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2848,37 +2848,40 @@ out: * min_type is the minimum key type to truncate down to. If set to 0, this * will kill all the items on this inode, including the INODE_ITEM_KEY. */ -noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct inode *inode, - u64 new_size, u32 min_type) +int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, + u64 new_size, u32 min_type) { - int ret; struct btrfs_path *path; - struct btrfs_key key; - struct btrfs_key found_key; - u32 found_type = (u8)-1; struct extent_buffer *leaf; struct btrfs_file_extent_item *fi; + struct btrfs_key key; + struct btrfs_key found_key; u64 extent_start = 0; u64 extent_num_bytes = 0; u64 extent_offset = 0; u64 item_end = 0; + u64 mask = root->sectorsize - 1; + u32 found_type = (u8)-1; int found_extent; int del_item; int pending_del_nr = 0; int pending_del_slot = 0; int extent_type = -1; int encoding; - u64 mask = root->sectorsize - 1; + int ret; + int err = 0; + + BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); if (root->ref_cows) btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); + path = btrfs_alloc_path(); BUG_ON(!path); path->reada = -1; - /* FIXME, add redo link to tree so we don't leak on crash */ key.objectid = inode->i_ino; key.offset = (u64)-1; key.type = (u8)-1; @@ -2886,17 +2889,17 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, search_again: path->leave_spinning = 1; ret = btrfs_search_slot(trans, root, &key, path, -1, 1); - if (ret < 0) - goto error; + if (ret < 0) { + err = ret; + goto out; + } if (ret > 0) { /* there are no items in the tree for us to truncate, we're * done */ - if (path->slots[0] == 0) { - ret = 0; - goto error; - } + if (path->slots[0] == 0) + goto out; path->slots[0]--; } @@ -2931,28 +2934,17 @@ search_again: } item_end--; } - if (item_end < new_size) { - if (found_type == BTRFS_DIR_ITEM_KEY) - found_type = BTRFS_INODE_ITEM_KEY; - else if (found_type == BTRFS_EXTENT_ITEM_KEY) - found_type = BTRFS_EXTENT_DATA_KEY; - else if (found_type == BTRFS_EXTENT_DATA_KEY) - found_type = BTRFS_XATTR_ITEM_KEY; - else if (found_type == BTRFS_XATTR_ITEM_KEY) - found_type = BTRFS_INODE_REF_KEY; - else if (found_type) - found_type--; - else + if (found_type > min_type) { + del_item = 1; + } else { + if (item_end < new_size) break; - btrfs_set_key_type(&key, found_type); - goto next; + if (found_key.offset >= new_size) + del_item = 1; + else + del_item = 0; } - if (found_key.offset >= new_size) - del_item = 1; - else - del_item = 0; found_extent = 0; - /* FIXME, shrink the extent if the ref count is only 1 */ if (found_type != BTRFS_EXTENT_DATA_KEY) goto delete; @@ -3039,42 +3031,36 @@ delete: inode->i_ino, extent_offset); BUG_ON(ret); } -next: - if (path->slots[0] == 0) { - if (pending_del_nr) - goto del_pending; - btrfs_release_path(root, path); - if (found_type == BTRFS_INODE_ITEM_KEY) - break; - goto search_again; - } - path->slots[0]--; - if (pending_del_nr && - path->slots[0] + 1 != pending_del_slot) { - struct btrfs_key debug; -del_pending: - btrfs_item_key_to_cpu(path->nodes[0], &debug, - pending_del_slot); - ret = btrfs_del_items(trans, root, path, - pending_del_slot, - pending_del_nr); - BUG_ON(ret); - pending_del_nr = 0; + if (found_type == BTRFS_INODE_ITEM_KEY) + break; + + if (path->slots[0] == 0 || + path->slots[0] != pending_del_slot) { + if (root->ref_cows) { + err = -EAGAIN; + goto out; + } + if (pending_del_nr) { + ret = btrfs_del_items(trans, root, path, + pending_del_slot, + pending_del_nr); + BUG_ON(ret); + pending_del_nr = 0; + } btrfs_release_path(root, path); - if (found_type == BTRFS_INODE_ITEM_KEY) - break; goto search_again; + } else { + path->slots[0]--; } } - ret = 0; -error: +out: if (pending_del_nr) { ret = btrfs_del_items(trans, root, path, pending_del_slot, pending_del_nr); } btrfs_free_path(path); - return ret; + return err; } /* @@ -3194,10 +3180,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) if (size <= hole_start) return 0; - err = btrfs_truncate_page(inode->i_mapping, inode->i_size); - if (err) - return err; - while (1) { struct btrfs_ordered_extent *ordered; btrfs_wait_ordered_range(inode, hole_start, @@ -3210,9 +3192,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) btrfs_put_ordered_extent(ordered); } - trans = btrfs_start_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - cur_offset = hole_start; while (1) { em = btrfs_get_extent(inode, NULL, 0, cur_offset, @@ -3220,38 +3199,120 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) BUG_ON(IS_ERR(em) || !em); last_byte = min(extent_map_end(em), block_end); last_byte = (last_byte + mask) & ~mask; - if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) { + if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { u64 hint_byte = 0; hole_size = last_byte - cur_offset; - err = btrfs_drop_extents(trans, inode, cur_offset, - cur_offset + hole_size, - &hint_byte, 1); - if (err) - break; - err = btrfs_reserve_metadata_space(root, 1); + err = btrfs_reserve_metadata_space(root, 2); if (err) break; + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + + err = btrfs_drop_extents(trans, inode, cur_offset, + cur_offset + hole_size, + &hint_byte, 1); + BUG_ON(err); + err = btrfs_insert_file_extent(trans, root, inode->i_ino, cur_offset, 0, 0, hole_size, 0, hole_size, 0, 0, 0); + BUG_ON(err); + btrfs_drop_extent_cache(inode, hole_start, last_byte - 1, 0); - btrfs_unreserve_metadata_space(root, 1); + + btrfs_end_transaction(trans, root); + btrfs_unreserve_metadata_space(root, 2); } free_extent_map(em); cur_offset = last_byte; - if (err || cur_offset >= block_end) + if (cur_offset >= block_end) break; } - btrfs_end_transaction(trans, root); unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); return err; } +static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) +{ + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_trans_handle *trans; + unsigned long nr; + int ret; + + if (attr->ia_size == inode->i_size) + return 0; + + if (attr->ia_size > inode->i_size) { + unsigned long limit; + limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur; + if (attr->ia_size > inode->i_sb->s_maxbytes) + return -EFBIG; + if (limit != RLIM_INFINITY && attr->ia_size > limit) { + send_sig(SIGXFSZ, current, 0); + return -EFBIG; + } + } + + ret = btrfs_reserve_metadata_space(root, 1); + if (ret) + return ret; + + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + + ret = btrfs_orphan_add(trans, inode); + BUG_ON(ret); + + nr = trans->blocks_used; + btrfs_end_transaction(trans, root); + btrfs_unreserve_metadata_space(root, 1); + btrfs_btree_balance_dirty(root, nr); + + if (attr->ia_size > inode->i_size) { + ret = btrfs_cont_expand(inode, attr->ia_size); + if (ret) { + btrfs_truncate(inode); + return ret; + } + + i_size_write(inode, attr->ia_size); + btrfs_ordered_update_i_size(inode, inode->i_size, NULL); + + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + + ret = btrfs_update_inode(trans, root, inode); + BUG_ON(ret); + if (inode->i_nlink > 0) { + ret = btrfs_orphan_del(trans, inode); + BUG_ON(ret); + } + nr = trans->blocks_used; + btrfs_end_transaction(trans, root); + btrfs_btree_balance_dirty(root, nr); + return 0; + } + + /* + * We're truncating a file that used to have good data down to + * zero. Make sure it gets into the ordered flush list so that + * any new writes get down to disk quickly. + */ + if (attr->ia_size == 0) + BTRFS_I(inode)->ordered_data_close = 1; + + /* we don't support swapfiles, so vmtruncate shouldn't fail */ + ret = vmtruncate(inode, attr->ia_size); + BUG_ON(ret); + + return 0; +} + static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode; @@ -3262,23 +3323,14 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) return err; if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { - if (attr->ia_size > inode->i_size) { - err = btrfs_cont_expand(inode, attr->ia_size); - if (err) - return err; - } else if (inode->i_size > 0 && - attr->ia_size == 0) { - - /* we're truncating a file that used to have good - * data down to zero. Make sure it gets into - * the ordered flush list so that any new writes - * get down to disk quickly. - */ - BTRFS_I(inode)->ordered_data_close = 1; - } + err = btrfs_setattr_size(inode, attr); + if (err) + return err; } + attr->ia_valid &= ~ATTR_SIZE; - err = inode_setattr(inode, attr); + if (attr->ia_valid) + err = inode_setattr(inode, attr); if (!err && ((attr->ia_valid & ATTR_MODE))) err = btrfs_acl_chmod(inode); @@ -3310,30 +3362,32 @@ void btrfs_delete_inode(struct inode *inode) } btrfs_i_size_write(inode, 0); - trans = btrfs_join_transaction(root, 1); - btrfs_set_trans_block_group(trans, inode); - ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 0); - if (ret) { - btrfs_orphan_del(NULL, inode); - goto no_delete_lock; - } + while (1) { + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); - btrfs_orphan_del(trans, inode); + if (ret != -EAGAIN) + break; - nr = trans->blocks_used; - clear_inode(inode); + nr = trans->blocks_used; + btrfs_end_transaction(trans, root); + trans = NULL; + btrfs_btree_balance_dirty(root, nr); + } - btrfs_end_transaction(trans, root); - btrfs_btree_balance_dirty(root, nr); - return; + if (ret == 0) { + ret = btrfs_orphan_del(trans, inode); + BUG_ON(ret); + } -no_delete_lock: nr = trans->blocks_used; btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root, nr); no_delete: clear_inode(inode); + return; } /* @@ -5097,17 +5151,20 @@ static void btrfs_truncate(struct inode *inode) unsigned long nr; u64 mask = root->sectorsize - 1; - if (!S_ISREG(inode->i_mode)) - return; - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) + if (!S_ISREG(inode->i_mode)) { + WARN_ON(1); return; + } ret = btrfs_truncate_page(inode->i_mapping, inode->i_size); if (ret) return; + btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); + btrfs_ordered_update_i_size(inode, inode->i_size, NULL); trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); /* * setattr is responsible for setting the ordered_data_close flag, @@ -5129,21 +5186,32 @@ static void btrfs_truncate(struct inode *inode) if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close) btrfs_add_ordered_operation(trans, root, inode); - btrfs_set_trans_block_group(trans, inode); - btrfs_i_size_write(inode, inode->i_size); + while (1) { + ret = btrfs_truncate_inode_items(trans, root, inode, + inode->i_size, + BTRFS_EXTENT_DATA_KEY); + if (ret != -EAGAIN) + break; - ret = btrfs_orphan_add(trans, inode); - if (ret) - goto out; - /* FIXME, add redo link to tree so we don't leak on crash */ - ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, - BTRFS_EXTENT_DATA_KEY); - btrfs_update_inode(trans, root, inode); + ret = btrfs_update_inode(trans, root, inode); + BUG_ON(ret); + + nr = trans->blocks_used; + btrfs_end_transaction(trans, root); + btrfs_btree_balance_dirty(root, nr); + + trans = btrfs_start_transaction(root, 1); + btrfs_set_trans_block_group(trans, inode); + } - ret = btrfs_orphan_del(trans, inode); + if (ret == 0 && inode->i_nlink > 0) { + ret = btrfs_orphan_del(trans, inode); + BUG_ON(ret); + } + + ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); -out: nr = trans->blocks_used; ret = btrfs_end_transaction_throttle(trans, root); BUG_ON(ret); @@ -5240,9 +5308,9 @@ void btrfs_destroy_inode(struct inode *inode) spin_lock(&root->list_lock); if (!list_empty(&BTRFS_I(inode)->i_orphan)) { - printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" - " list\n", inode->i_ino); - dump_stack(); + printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", + inode->i_ino); + list_del_init(&BTRFS_I(inode)->i_orphan); } spin_unlock(&root->list_lock); -- cgit v1.2.2 From f34f57a3ab4e73304d78c125682f1a53cd3975f2 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 12 Nov 2009 09:35:27 +0000 Subject: Btrfs: Pass transaction handle to security and ACL initialization functions Pass transaction handle down to security and ACL initialization functions, so we can avoid starting nested transactions Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index dcec42ee8cf2..82740a3c628a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -88,13 +88,14 @@ static noinline int cow_file_range(struct inode *inode, u64 start, u64 end, int *page_started, unsigned long *nr_written, int unlock); -static int btrfs_init_inode_security(struct inode *inode, struct inode *dir) +static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, + struct inode *inode, struct inode *dir) { int err; - err = btrfs_init_acl(inode, dir); + err = btrfs_init_acl(trans, inode, dir); if (!err) - err = btrfs_xattr_security_init(inode, dir); + err = btrfs_xattr_security_init(trans, inode, dir); return err; } @@ -4296,7 +4297,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) goto out_unlock; - err = btrfs_init_inode_security(inode, dir); + err = btrfs_init_inode_security(trans, inode, dir); if (err) { drop_inode = 1; goto out_unlock; @@ -4367,7 +4368,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) goto out_unlock; - err = btrfs_init_inode_security(inode, dir); + err = btrfs_init_inode_security(trans, inode, dir); if (err) { drop_inode = 1; goto out_unlock; @@ -4500,7 +4501,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) drop_on_err = 1; - err = btrfs_init_inode_security(inode, dir); + err = btrfs_init_inode_security(trans, inode, dir); if (err) goto out_fail; @@ -5660,7 +5661,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, if (IS_ERR(inode)) goto out_unlock; - err = btrfs_init_inode_security(inode, dir); + err = btrfs_init_inode_security(trans, inode, dir); if (err) { drop_inode = 1; goto out_unlock; -- cgit v1.2.2 From 24bbcf0442ee04660a5a030efdbb6d03f1c275cb Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 12 Nov 2009 09:36:34 +0000 Subject: Btrfs: Add delayed iput iput() can trigger new transactions if we are dropping the final reference, so calling it in btrfs_commit_transaction may end up deadlock. This patch adds delayed iput to avoid the issue. Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 82740a3c628a..168e8c040aab 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2022,6 +2022,54 @@ zeroit: return -EIO; } +struct delayed_iput { + struct list_head list; + struct inode *inode; +}; + +void btrfs_add_delayed_iput(struct inode *inode) +{ + struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; + struct delayed_iput *delayed; + + if (atomic_add_unless(&inode->i_count, -1, 1)) + return; + + delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL); + delayed->inode = inode; + + spin_lock(&fs_info->delayed_iput_lock); + list_add_tail(&delayed->list, &fs_info->delayed_iputs); + spin_unlock(&fs_info->delayed_iput_lock); +} + +void btrfs_run_delayed_iputs(struct btrfs_root *root) +{ + LIST_HEAD(list); + struct btrfs_fs_info *fs_info = root->fs_info; + struct delayed_iput *delayed; + int empty; + + spin_lock(&fs_info->delayed_iput_lock); + empty = list_empty(&fs_info->delayed_iputs); + spin_unlock(&fs_info->delayed_iput_lock); + if (empty) + return; + + down_read(&root->fs_info->cleanup_work_sem); + spin_lock(&fs_info->delayed_iput_lock); + list_splice_init(&fs_info->delayed_iputs, &list); + spin_unlock(&fs_info->delayed_iput_lock); + + while (!list_empty(&list)) { + delayed = list_entry(list.next, struct delayed_iput, list); + list_del(&delayed->list); + iput(delayed->inode); + kfree(delayed); + } + up_read(&root->fs_info->cleanup_work_sem); +} + /* * This creates an orphan entry for the given inode in case something goes * wrong in the middle of an unlink/truncate. @@ -5568,7 +5616,7 @@ out_fail: * some fairly slow code that needs optimization. This walks the list * of all the inodes with pending delalloc and forces them to disk. */ -int btrfs_start_delalloc_inodes(struct btrfs_root *root) +int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) { struct list_head *head = &root->fs_info->delalloc_inodes; struct btrfs_inode *binode; @@ -5587,7 +5635,10 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root) spin_unlock(&root->fs_info->delalloc_lock); if (inode) { filemap_flush(inode->i_mapping); - iput(inode); + if (delay_iput) + btrfs_add_delayed_iput(inode); + else + iput(inode); } cond_resched(); spin_lock(&root->fs_info->delalloc_lock); -- cgit v1.2.2 From 4a8be425a8fb8fbb5d881eb55fa6634c3463b9c9 Mon Sep 17 00:00:00 2001 From: TARUISI Hiroaki Date: Thu, 12 Nov 2009 07:14:26 +0000 Subject: Btrfs: deny sys_link across subvolumes. I rebased Christian Parpart's patch to deny hard link across subvolumes. Original patch modifies also btrfs_rename, but I excluded it because we can move across subvolumes now and it make no problem. ----------------- Hard link across subvolumes should not allowed in Btrfs. btrfs_link checks root of 'to' directory is same as root of 'from' file. If not same, btrfs_link returns -EPERM. Signed-off-by: TARUISI Hiroaki Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 168e8c040aab..da76cad92ecf 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4462,6 +4462,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, if (inode->i_nlink == 0) return -ENOENT; + /* do not allow sys_link's with other subvols of the same device */ + if (root->objectid != BTRFS_I(inode)->root->objectid) + return -EPERM; + /* * 1 item for inode ref * 2 items for dir items -- cgit v1.2.2 From 3a1abec9f6880cf406593c392636199ea1c6c917 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 17 Dec 2009 15:47:17 -0500 Subject: Btrfs: make sure fallocate properly starts a transaction The recent patch to make fallocate enospc friendly would send down a NULL trans handle to the allocator. This moves the transaction start to properly fix things. Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index da76cad92ecf..5440bab23635 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5802,23 +5802,23 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, while (num_bytes > 0) { alloc_size = min(num_bytes, root->fs_info->max_extent); + trans = btrfs_start_transaction(root, 1); + ret = btrfs_reserve_extent(trans, root, alloc_size, root->sectorsize, 0, alloc_hint, (u64)-1, &ins, 1); if (ret) { WARN_ON(1); - break; + goto stop_trans; } ret = btrfs_reserve_metadata_space(root, 3); if (ret) { btrfs_free_reserved_extent(root, ins.objectid, ins.offset); - break; + goto stop_trans; } - trans = btrfs_start_transaction(root, 1); - ret = insert_reserved_file_extent(trans, inode, cur_offset, ins.objectid, ins.offset, ins.offset, @@ -5847,6 +5847,11 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, btrfs_unreserve_metadata_space(root, 3); } return ret; + +stop_trans: + btrfs_end_transaction(trans, root); + return ret; + } static long btrfs_fallocate(struct inode *inode, int mode, -- cgit v1.2.2