From 33345d01522f8152f99dc84a3e7a1a45707f387f Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Wed, 20 Apr 2011 10:31:50 +0800 Subject: Btrfs: Always use 64bit inode number There's a potential problem in 32bit system when we exhaust 32bit inode numbers and start to allocate big inode numbers, because btrfs uses inode->i_ino in many places. So here we always use BTRFS_I(inode)->location.objectid, which is an u64 variable. There are 2 exceptions that BTRFS_I(inode)->location.objectid != inode->i_ino: the btree inode (0 vs 1) and empty subvol dirs (256 vs 2), and inode->i_ino will be used in those cases. Another reason to make this change is I'm going to use a special inode to save free ino cache, and the inode number must be > (u64)-256. Signed-off-by: Li Zefan --- fs/btrfs/tree-log.c | 54 ++++++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c50271ad3157..4323dc68d6cd 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -519,7 +519,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, * file. This must be done before the btrfs_drop_extents run * so we don't try to drop this extent. */ - ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, + ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode), start, 0); if (ret == 0 && @@ -832,7 +832,7 @@ again: read_extent_buffer(eb, name, (unsigned long)(ref + 1), namelen); /* if we already have a perfect match, we're done */ - if (inode_in_dir(root, path, dir->i_ino, inode->i_ino, + if (inode_in_dir(root, path, btrfs_ino(dir), btrfs_ino(inode), btrfs_inode_ref_index(eb, ref), name, namelen)) { goto out; @@ -960,8 +960,9 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, unsigned long ptr; unsigned long ptr_end; int name_len; + u64 ino = btrfs_ino(inode); - key.objectid = inode->i_ino; + key.objectid = ino; key.type = BTRFS_INODE_REF_KEY; key.offset = (u64)-1; @@ -980,7 +981,7 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, } btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); - if (key.objectid != inode->i_ino || + if (key.objectid != ino || key.type != BTRFS_INODE_REF_KEY) break; ptr = btrfs_item_ptr_offset(path->nodes[0], path->slots[0]); @@ -1011,10 +1012,10 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, if (inode->i_nlink == 0) { if (S_ISDIR(inode->i_mode)) { ret = replay_dir_deletes(trans, root, NULL, path, - inode->i_ino, 1); + ino, 1); BUG_ON(ret); } - ret = insert_orphan_item(trans, root, inode->i_ino); + ret = insert_orphan_item(trans, root, ino); BUG_ON(ret); } btrfs_free_path(path); @@ -2197,6 +2198,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, int ret; int err = 0; int bytes_del = 0; + u64 dir_ino = btrfs_ino(dir); if (BTRFS_I(dir)->logged_trans < trans->transid) return 0; @@ -2212,7 +2214,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, + di = btrfs_lookup_dir_item(trans, log, path, dir_ino, name, name_len, -1); if (IS_ERR(di)) { err = PTR_ERR(di); @@ -2224,7 +2226,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, BUG_ON(ret); } btrfs_release_path(log, path); - di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino, + di = btrfs_lookup_dir_index_item(trans, log, path, dir_ino, index, name, name_len, -1); if (IS_ERR(di)) { err = PTR_ERR(di); @@ -2242,7 +2244,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, if (bytes_del) { struct btrfs_key key; - key.objectid = dir->i_ino; + key.objectid = dir_ino; key.offset = 0; key.type = BTRFS_INODE_ITEM_KEY; btrfs_release_path(log, path); @@ -2300,7 +2302,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, log = root->log_root; mutex_lock(&BTRFS_I(inode)->log_mutex); - ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, + ret = btrfs_del_inode_ref(trans, log, name, name_len, btrfs_ino(inode), dirid, &index); mutex_unlock(&BTRFS_I(inode)->log_mutex); if (ret == -ENOSPC) { @@ -2366,13 +2368,14 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, int nritems; u64 first_offset = min_offset; u64 last_offset = (u64)-1; + u64 ino = btrfs_ino(inode); log = root->log_root; - max_key.objectid = inode->i_ino; + max_key.objectid = ino; max_key.offset = (u64)-1; max_key.type = key_type; - min_key.objectid = inode->i_ino; + min_key.objectid = ino; min_key.type = key_type; min_key.offset = min_offset; @@ -2385,9 +2388,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, * we didn't find anything from this transaction, see if there * is anything at all */ - if (ret != 0 || min_key.objectid != inode->i_ino || - min_key.type != key_type) { - min_key.objectid = inode->i_ino; + if (ret != 0 || min_key.objectid != ino || min_key.type != key_type) { + min_key.objectid = ino; min_key.type = key_type; min_key.offset = (u64)-1; btrfs_release_path(root, path); @@ -2396,7 +2398,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, btrfs_release_path(root, path); return ret; } - ret = btrfs_previous_item(root, path, inode->i_ino, key_type); + ret = btrfs_previous_item(root, path, ino, key_type); /* if ret == 0 there are items for this type, * create a range to tell us the last key of this type. @@ -2414,7 +2416,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, } /* go backward to find any previous key */ - ret = btrfs_previous_item(root, path, inode->i_ino, key_type); + ret = btrfs_previous_item(root, path, ino, key_type); if (ret == 0) { struct btrfs_key tmp; btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); @@ -2449,8 +2451,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, for (i = path->slots[0]; i < nritems; i++) { btrfs_item_key_to_cpu(src, &min_key, i); - if (min_key.objectid != inode->i_ino || - min_key.type != key_type) + if (min_key.objectid != ino || min_key.type != key_type) goto done; ret = overwrite_item(trans, log, dst_path, src, i, &min_key); @@ -2471,7 +2472,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, goto done; } btrfs_item_key_to_cpu(path->nodes[0], &tmp, path->slots[0]); - if (tmp.objectid != inode->i_ino || tmp.type != key_type) { + if (tmp.objectid != ino || tmp.type != key_type) { last_offset = (u64)-1; goto done; } @@ -2497,8 +2498,7 @@ done: * is valid */ ret = insert_dir_log_key(trans, log, path, key_type, - inode->i_ino, first_offset, - last_offset); + ino, first_offset, last_offset); if (ret) err = ret; } @@ -2742,6 +2742,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, int nritems; int ins_start_slot = 0; int ins_nr; + u64 ino = btrfs_ino(inode); log = root->log_root; @@ -2754,11 +2755,11 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, return -ENOMEM; } - min_key.objectid = inode->i_ino; + min_key.objectid = ino; min_key.type = BTRFS_INODE_ITEM_KEY; min_key.offset = 0; - max_key.objectid = inode->i_ino; + max_key.objectid = ino; /* today the code can only do partial logging of directories */ if (!S_ISDIR(inode->i_mode)) @@ -2781,8 +2782,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, if (inode_only == LOG_INODE_EXISTS) max_key_type = BTRFS_XATTR_ITEM_KEY; - ret = drop_objectid_items(trans, log, path, - inode->i_ino, max_key_type); + ret = drop_objectid_items(trans, log, path, ino, max_key_type); } else { ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); } @@ -2800,7 +2800,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, break; again: /* note, ins_nr might be > 0 here, cleanup outside the loop */ - if (min_key.objectid != inode->i_ino) + if (min_key.objectid != ino) break; if (min_key.type > max_key.type) break; -- cgit v1.2.2 From c704005d886cf0bc9bc3974eb009b22fe0da32c7 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Tue, 19 Apr 2011 18:00:01 +0200 Subject: btrfs: unify checking of IS_ERR and null use IS_ERR_OR_NULL when possible, done by this coccinelle script: @ match @ identifier id; @@ ( - BUG_ON(IS_ERR(id) || !id); + BUG_ON(IS_ERR_OR_NULL(id)); | - IS_ERR(id) || !id + IS_ERR_OR_NULL(id) | - !id || IS_ERR(id) + IS_ERR_OR_NULL(id) ) Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f997ec0c1ba4..d5313d63f967 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1205,7 +1205,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, } else { BUG(); } - if (!dst_di || IS_ERR(dst_di)) { + if (IS_ERR_OR_NULL(dst_di)) { /* we need a sequence number to insert, so we only * do inserts for the BTRFS_DIR_INDEX_KEY types */ @@ -1426,7 +1426,7 @@ again: dir_key->offset, name, name_len, 0); } - if (!log_di || IS_ERR(log_di)) { + if (IS_ERR_OR_NULL(log_di)) { btrfs_dir_item_key_to_cpu(eb, di, &location); btrfs_release_path(root, path); btrfs_release_path(log, log_path); -- cgit v1.2.2 From b3b4aa74b58bded927f579fff787fb6fa1c0393c Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 21 Apr 2011 01:20:15 +0200 Subject: btrfs: drop unused parameter from btrfs_release_path parameter tree root it's not used since commit 5f39d397dfbe140a14edecd4e73c34ce23c4f9ee ("Btrfs: Create extent_buffer interface for large blocksizes") Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 98 ++++++++++++++++++++++++++--------------------------- 1 file changed, 49 insertions(+), 49 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d5313d63f967..c599e8c2a53c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -333,13 +333,13 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, goto insert; if (item_size == 0) { - btrfs_release_path(root, path); + btrfs_release_path(path); return 0; } dst_copy = kmalloc(item_size, GFP_NOFS); src_copy = kmalloc(item_size, GFP_NOFS); if (!dst_copy || !src_copy) { - btrfs_release_path(root, path); + btrfs_release_path(path); kfree(dst_copy); kfree(src_copy); return -ENOMEM; @@ -361,13 +361,13 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, * sync */ if (ret == 0) { - btrfs_release_path(root, path); + btrfs_release_path(path); return 0; } } insert: - btrfs_release_path(root, path); + btrfs_release_path(path); /* try to insert the key into the destination tree */ ret = btrfs_insert_empty_item(trans, root, path, key, item_size); @@ -438,7 +438,7 @@ insert: } no_copy: btrfs_mark_buffer_dirty(path->nodes[0]); - btrfs_release_path(root, path); + btrfs_release_path(path); return 0; } @@ -544,11 +544,11 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, * we don't have to do anything */ if (memcmp(&cmp1, &cmp2, sizeof(cmp1)) == 0) { - btrfs_release_path(root, path); + btrfs_release_path(path); goto out; } } - btrfs_release_path(root, path); + btrfs_release_path(path); saved_nbytes = inode_get_bytes(inode); /* drop any overlapping extents */ @@ -600,7 +600,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, key->objectid, offset, &ins); BUG_ON(ret); } - btrfs_release_path(root, path); + btrfs_release_path(path); if (btrfs_file_extent_compression(eb, item)) { csum_start = ins.objectid; @@ -629,7 +629,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, kfree(sums); } } else { - btrfs_release_path(root, path); + btrfs_release_path(path); } } else if (found_type == BTRFS_FILE_EXTENT_INLINE) { /* inline extents are easy, we just overwrite them */ @@ -675,7 +675,7 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, return -ENOMEM; read_extent_buffer(leaf, name, (unsigned long)(di + 1), name_len); - btrfs_release_path(root, path); + btrfs_release_path(path); inode = read_one_inode(root, location.objectid); BUG_ON(!inode); @@ -713,7 +713,7 @@ static noinline int inode_in_dir(struct btrfs_root *root, goto out; } else goto out; - btrfs_release_path(root, path); + btrfs_release_path(path); di = btrfs_lookup_dir_item(NULL, root, path, dirid, name, name_len, 0); if (di && !IS_ERR(di)) { @@ -724,7 +724,7 @@ static noinline int inode_in_dir(struct btrfs_root *root, goto out; match = 1; out: - btrfs_release_path(root, path); + btrfs_release_path(path); return match; } @@ -884,7 +884,7 @@ again: if (!backref_in_log(log, key, victim_name, victim_name_len)) { btrfs_inc_nlink(inode); - btrfs_release_path(root, path); + btrfs_release_path(path); ret = btrfs_unlink_inode(trans, root, dir, inode, victim_name, @@ -901,7 +901,7 @@ again: */ search_done = 1; } - btrfs_release_path(root, path); + btrfs_release_path(path); insert: /* insert our name */ @@ -922,7 +922,7 @@ out: BUG_ON(ret); out_nowrite: - btrfs_release_path(root, path); + btrfs_release_path(path); iput(dir); iput(inode); return 0; @@ -999,9 +999,9 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, if (key.offset == 0) break; key.offset--; - btrfs_release_path(root, path); + btrfs_release_path(path); } - btrfs_release_path(root, path); + btrfs_release_path(path); if (nlink != inode->i_nlink) { inode->i_nlink = nlink; btrfs_update_inode(trans, root, inode); @@ -1052,7 +1052,7 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, ret = btrfs_del_item(trans, root, path); BUG_ON(ret); - btrfs_release_path(root, path); + btrfs_release_path(path); inode = read_one_inode(root, key.offset); BUG_ON(!inode); @@ -1068,7 +1068,7 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, */ key.offset = (u64)-1; } - btrfs_release_path(root, path); + btrfs_release_path(path); return 0; } @@ -1096,7 +1096,7 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, root, path, &key, 0); - btrfs_release_path(root, path); + btrfs_release_path(path); if (ret == 0) { btrfs_inc_nlink(inode); btrfs_update_inode(trans, root, inode); @@ -1192,7 +1192,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, exists = 1; else exists = 0; - btrfs_release_path(root, path); + btrfs_release_path(path); if (key->type == BTRFS_DIR_ITEM_KEY) { dst_di = btrfs_lookup_dir_item(trans, root, path, key->objectid, @@ -1236,13 +1236,13 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, if (key->type == BTRFS_DIR_INDEX_KEY) goto insert; out: - btrfs_release_path(root, path); + btrfs_release_path(path); kfree(name); iput(dir); return 0; insert: - btrfs_release_path(root, path); + btrfs_release_path(path); ret = insert_one_name(trans, root, path, key->objectid, key->offset, name, name_len, log_type, &log_key); @@ -1363,7 +1363,7 @@ next: *end_ret = found_end; ret = 0; out: - btrfs_release_path(root, path); + btrfs_release_path(path); return ret; } @@ -1428,8 +1428,8 @@ again: } if (IS_ERR_OR_NULL(log_di)) { btrfs_dir_item_key_to_cpu(eb, di, &location); - btrfs_release_path(root, path); - btrfs_release_path(log, log_path); + btrfs_release_path(path); + btrfs_release_path(log_path); inode = read_one_inode(root, location.objectid); BUG_ON(!inode); @@ -1453,7 +1453,7 @@ again: ret = 0; goto out; } - btrfs_release_path(log, log_path); + btrfs_release_path(log_path); kfree(name); ptr = (unsigned long)(di + 1); @@ -1461,8 +1461,8 @@ again: } ret = 0; out: - btrfs_release_path(root, path); - btrfs_release_path(log, log_path); + btrfs_release_path(path); + btrfs_release_path(log_path); return ret; } @@ -1550,7 +1550,7 @@ again: break; dir_key.offset = found_key.offset + 1; } - btrfs_release_path(root, path); + btrfs_release_path(path); if (range_end == (u64)-1) break; range_start = range_end + 1; @@ -1561,11 +1561,11 @@ next_type: if (key_type == BTRFS_DIR_LOG_ITEM_KEY) { key_type = BTRFS_DIR_LOG_INDEX_KEY; dir_key.type = BTRFS_DIR_INDEX_KEY; - btrfs_release_path(root, path); + btrfs_release_path(path); goto again; } out: - btrfs_release_path(root, path); + btrfs_release_path(path); btrfs_free_path(log_path); iput(dir); return ret; @@ -2225,7 +2225,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, bytes_del += name_len; BUG_ON(ret); } - btrfs_release_path(log, path); + btrfs_release_path(path); di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino, index, name, name_len, -1); if (IS_ERR(di)) { @@ -2247,7 +2247,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, key.objectid = dir->i_ino; key.offset = 0; key.type = BTRFS_INODE_ITEM_KEY; - btrfs_release_path(log, path); + btrfs_release_path(path); ret = btrfs_search_slot(trans, log, &key, path, 0, 1); if (ret < 0) { @@ -2269,7 +2269,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(path->nodes[0]); } else ret = 0; - btrfs_release_path(log, path); + btrfs_release_path(path); } fail: btrfs_free_path(path); @@ -2344,7 +2344,7 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, struct btrfs_dir_log_item); btrfs_set_dir_log_end(path->nodes[0], item, last_offset); btrfs_mark_buffer_dirty(path->nodes[0]); - btrfs_release_path(log, path); + btrfs_release_path(path); return 0; } @@ -2393,10 +2393,10 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, min_key.objectid = inode->i_ino; min_key.type = key_type; min_key.offset = (u64)-1; - btrfs_release_path(root, path); + btrfs_release_path(path); ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); if (ret < 0) { - btrfs_release_path(root, path); + btrfs_release_path(path); return ret; } ret = btrfs_previous_item(root, path, inode->i_ino, key_type); @@ -2432,7 +2432,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, } } } - btrfs_release_path(root, path); + btrfs_release_path(path); /* find the first key from this transaction again */ ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); @@ -2490,8 +2490,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, } } done: - btrfs_release_path(root, path); - btrfs_release_path(log, dst_path); + btrfs_release_path(path); + btrfs_release_path(dst_path); if (err == 0) { *last_offset_ret = last_offset; @@ -2588,9 +2588,9 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, ret = btrfs_del_item(trans, log, path); BUG_ON(ret); - btrfs_release_path(log, path); + btrfs_release_path(path); } - btrfs_release_path(log, path); + btrfs_release_path(path); return ret; } @@ -2696,7 +2696,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, } btrfs_mark_buffer_dirty(dst_path->nodes[0]); - btrfs_release_path(log, dst_path); + btrfs_release_path(dst_path); kfree(ins_data); /* @@ -2845,7 +2845,7 @@ next_slot: } ins_nr = 0; } - btrfs_release_path(root, path); + btrfs_release_path(path); if (min_key.offset < (u64)-1) min_key.offset++; @@ -2868,8 +2868,8 @@ next_slot: } WARN_ON(ins_nr); if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { - btrfs_release_path(root, path); - btrfs_release_path(log, dst_path); + btrfs_release_path(path); + btrfs_release_path(dst_path); ret = log_directory_changes(trans, root, inode, path, dst_path); if (ret) { err = ret; @@ -3136,7 +3136,7 @@ again: } btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); - btrfs_release_path(log_root_tree, path); + btrfs_release_path(path); if (found_key.objectid != BTRFS_TREE_LOG_OBJECTID) break; @@ -3171,7 +3171,7 @@ again: if (found_key.offset == 0) break; } - btrfs_release_path(log_root_tree, path); + btrfs_release_path(path); /* step one is to pin it all, step two is to replay just inodes */ if (wc.pin) { -- cgit v1.2.2 From a2de733c78fa7af51ba9670482fa7d392aa67c57 Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Tue, 8 Mar 2011 14:14:00 +0100 Subject: btrfs: scrub This adds an initial implementation for scrub. It works quite straightforward. The usermode issues an ioctl for each device in the fs. For each device, it enumerates the allocated device chunks. For each chunk, the contained extents are enumerated and the data checksums fetched. The extents are read sequentially and the checksums verified. If an error occurs (checksum or EIO), a good copy is searched for. If one is found, the bad copy will be rewritten. All enumerations happen from the commit roots. During a transaction commit, the scrubs get paused and afterwards continue from the new roots. This commit is based on the series originally posted to linux-btrfs with some improvements that resulted from comments from David Sterba, Ilya Dryomov and Jan Schmidt. Signed-off-by: Arne Jansen --- fs/btrfs/tree-log.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f997ec0c1ba4..f1a0726da5f5 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -614,7 +614,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, ret = btrfs_lookup_csums_range(root->log_root, csum_start, csum_end - 1, - &ordered_sums); + &ordered_sums, 0); BUG_ON(ret); while (!list_empty(&ordered_sums)) { struct btrfs_ordered_sum *sums; @@ -2093,7 +2093,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, * the running transaction open, so a full commit can't hop * in and cause problems either. */ + btrfs_scrub_pause_super(root); write_ctree_super(trans, root->fs_info->tree_root, 1); + btrfs_scrub_continue_super(root); ret = 0; mutex_lock(&root->log_mutex); @@ -2689,7 +2691,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, ret = btrfs_lookup_csums_range( log->fs_info->csum_root, ds + cs, ds + cs + cl - 1, - &ordered_sums); + &ordered_sums, 0); BUG_ON(ret); } } -- cgit v1.2.2 From 16cdcec736cd214350cdb591bf1091f8beedefa0 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Fri, 22 Apr 2011 18:12:22 +0800 Subject: btrfs: implement delayed inode items operation Changelog V5 -> V6: - Fix oom when the memory load is high, by storing the delayed nodes into the root's radix tree, and letting btrfs inodes go. Changelog V4 -> V5: - Fix the race on adding the delayed node to the inode, which is spotted by Chris Mason. - Merge Chris Mason's incremental patch into this patch. - Fix deadlock between readdir() and memory fault, which is reported by Itaru Kitayama. Changelog V3 -> V4: - Fix nested lock, which is reported by Itaru Kitayama, by updating space cache inode in time. Changelog V2 -> V3: - Fix the race between the delayed worker and the task which does delayed items balance, which is reported by Tsutomu Itoh. - Modify the patch address David Sterba's comment. - Fix the bug of the cpu recursion spinlock, reported by Chris Mason Changelog V1 -> V2: - break up the global rb-tree, use a list to manage the delayed nodes, which is created for every directory and file, and used to manage the delayed directory name index items and the delayed inode item. - introduce a worker to deal with the delayed nodes. Compare with Ext3/4, the performance of file creation and deletion on btrfs is very poor. the reason is that btrfs must do a lot of b+ tree insertions, such as inode item, directory name item, directory name index and so on. If we can do some delayed b+ tree insertion or deletion, we can improve the performance, so we made this patch which implemented delayed directory name index insertion/deletion and delayed inode update. Implementation: - introduce a delayed root object into the filesystem, that use two lists to manage the delayed nodes which are created for every file/directory. One is used to manage all the delayed nodes that have delayed items. And the other is used to manage the delayed nodes which is waiting to be dealt with by the work thread. - Every delayed node has two rb-tree, one is used to manage the directory name index which is going to be inserted into b+ tree, and the other is used to manage the directory name index which is going to be deleted from b+ tree. - introduce a worker to deal with the delayed operation. This worker is used to deal with the works of the delayed directory name index items insertion and deletion and the delayed inode update. When the delayed items is beyond the lower limit, we create works for some delayed nodes and insert them into the work queue of the worker, and then go back. When the delayed items is beyond the upper bound, we create works for all the delayed nodes that haven't been dealt with, and insert them into the work queue of the worker, and then wait for that the untreated items is below some threshold value. - When we want to insert a directory name index into b+ tree, we just add the information into the delayed inserting rb-tree. And then we check the number of the delayed items and do delayed items balance. (The balance policy is above.) - When we want to delete a directory name index from the b+ tree, we search it in the inserting rb-tree at first. If we look it up, just drop it. If not, add the key of it into the delayed deleting rb-tree. Similar to the delayed inserting rb-tree, we also check the number of the delayed items and do delayed items balance. (The same to inserting manipulation) - When we want to update the metadata of some inode, we cached the data of the inode into the delayed node. the worker will flush it into the b+ tree after dealing with the delayed insertion and deletion. - We will move the delayed node to the tail of the list after we access the delayed node, By this way, we can cache more delayed items and merge more inode updates. - If we want to commit transaction, we will deal with all the delayed node. - the delayed node will be freed when we free the btrfs inode. - Before we log the inode items, we commit all the directory name index items and the delayed inode update. I did a quick test by the benchmark tool[1] and found we can improve the performance of file creation by ~15%, and file deletion by ~20%. Before applying this patch: Create files: Total files: 50000 Total time: 1.096108 Average time: 0.000022 Delete files: Total files: 50000 Total time: 1.510403 Average time: 0.000030 After applying this patch: Create files: Total files: 50000 Total time: 0.932899 Average time: 0.000019 Delete files: Total files: 50000 Total time: 1.215732 Average time: 0.000024 [1] http://marc.info/?l=linux-btrfs&m=128212635122920&q=p3 Many thanks for Kitayama-san's help! Signed-off-by: Miao Xie Reviewed-by: David Sterba Tested-by: Tsutomu Itoh Tested-by: Itaru Kitayama Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f997ec0c1ba4..ae0b72856bfb 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2773,6 +2773,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, max_key.type = (u8)-1; max_key.offset = (u64)-1; + ret = btrfs_commit_inode_delayed_items(trans, inode); + if (ret) { + btrfs_free_path(path); + btrfs_free_path(dst_path); + return ret; + } + mutex_lock(&BTRFS_I(inode)->log_mutex); /* -- cgit v1.2.2 From 8e531cdfeb75269c6c5aae33651cca39707848da Mon Sep 17 00:00:00 2001 From: liubo Date: Fri, 6 May 2011 10:36:09 +0800 Subject: Btrfs: do not flush csum items of unchanged file data during treelog The current code relogs the entire inode every time during fsync log, and it is much better suited to small files rather than large ones. During my performance test, the fsync performace of large files sucks, and we can ascribe this to the tremendous amount of csum infos of the large ones, cause we have to flush all of these csum infos into log trees even when there are only _one_ change in the whole file data. Apparently, to optimize fsync, we need to create a filter to skip the unnecessary csum ones, that is, the corresponding file data remains unchanged before this fsync. Here I have some test results to show, I use sysbench to do "random write + fsync". === sysbench --test=fileio --num-threads=1 --file-num=2 --file-block-size=4K --file-total-size=8G --file-test-mode=rndwr --file-io-mode=sync --file-extra-flags= [prepare, run] === Sysbench args: - Number of threads: 1 - Extra file open flags: 0 - 2 files, 4Gb each - Block size 4Kb - Number of random requests for random IO: 10000 - Read/Write ratio for combined random IO test: 1.50 - Periodic FSYNC enabled, calling fsync() each 100 requests. - Calling fsync() at the end of test, Enabled. - Using synchronous I/O mode - Doing random write test Sysbench results: === Operations performed: 0 Read, 10000 Write, 200 Other = 10200 Total Read 0b Written 39.062Mb Total transferred 39.062Mb === a) without patch: (*SPEED* : 451.01Kb/sec) 112.75 Requests/sec executed b) with patch: (*SPEED* : 4.7533Mb/sec) 1216.84 Requests/sec executed PS: I've made a _sub transid_ stuff patch, but it does not perform as effectively as this patch, and I'm wanderring where the problem is and trying to improve it more. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index b4c191d6c774..0f5537e60bb4 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -2667,6 +2667,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, extent = btrfs_item_ptr(src, start_slot + i, struct btrfs_file_extent_item); + if (btrfs_file_extent_generation(src, extent) < trans->transid) + continue; + found_type = btrfs_file_extent_type(src, extent); if (found_type == BTRFS_FILE_EXTENT_REG || found_type == BTRFS_FILE_EXTENT_PREALLOC) { -- cgit v1.2.2 From 65a246c5ffe3b487a001de025816326939e63362 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Thu, 19 May 2011 04:37:44 +0000 Subject: Btrfs: return error code to caller when btrfs_del_item fails The error code is returned instead of calling BUG_ON when btrfs_del_item returns the error. Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f997ec0c1ba4..cf2baeb70462 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1050,7 +1050,8 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, break; ret = btrfs_del_item(trans, root, path); - BUG_ON(ret); + if (ret) + goto out; btrfs_release_path(root, path); inode = read_one_inode(root, key.offset); @@ -1068,8 +1069,10 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, */ key.offset = (u64)-1; } + ret = 0; +out: btrfs_release_path(root, path); - return 0; + return ret; } @@ -2587,7 +2590,8 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, break; ret = btrfs_del_item(trans, log, path); - BUG_ON(ret); + if (ret) + break; btrfs_release_path(log, path); } btrfs_release_path(log, path); -- cgit v1.2.2 From 1cd307990d6e2b4965620e339a92e0d7ae853e13 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Thu, 19 May 2011 05:19:08 +0000 Subject: Btrfs: BUG_ON is deleted from the caller of btrfs_truncate_item & btrfs_extend_item Currently, btrfs_truncate_item and btrfs_extend_item returns only 0. So, the check by BUG_ON in the caller is unnecessary. Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index cf2baeb70462..0350147106d5 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -382,7 +382,6 @@ insert: } else if (found_size < item_size) { ret = btrfs_extend_item(trans, root, path, item_size - found_size); - BUG_ON(ret); } } else if (ret) { return ret; -- cgit v1.2.2 From c00e9493f1412621c8665a707d63e32b0768f572 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Thu, 28 Apr 2011 09:10:23 +0000 Subject: Btrfs: return error to caller if read_one_inode() fails When read_one_inode() fails, error code is returned to caller instead of BUG_ON(). Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 0350147106d5..46b7b57650ab 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -677,7 +677,10 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, btrfs_release_path(root, path); inode = read_one_inode(root, location.objectid); - BUG_ON(!inode); + if (!inode) { + kfree(name); + return -EIO; + } ret = link_to_fixup_dir(trans, root, path, location.objectid); BUG_ON(ret); @@ -816,7 +819,10 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans, return -ENOENT; inode = read_one_inode(root, key->objectid); - BUG_ON(!inode); + if (!inode) { + iput(dir); + return -EIO; + } ref_ptr = btrfs_item_ptr_offset(eb, slot); ref_end = ref_ptr + btrfs_item_size_nr(eb, slot); @@ -1054,7 +1060,8 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, btrfs_release_path(root, path); inode = read_one_inode(root, key.offset); - BUG_ON(!inode); + if (!inode) + return -EIO; ret = fixup_inode_link_count(trans, root, inode); BUG_ON(ret); @@ -1090,7 +1097,8 @@ static noinline int link_to_fixup_dir(struct btrfs_trans_handle *trans, struct inode *inode; inode = read_one_inode(root, objectid); - BUG_ON(!inode); + if (!inode) + return -EIO; key.objectid = BTRFS_TREE_LOG_FIXUP_OBJECTID; btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY); @@ -1177,7 +1185,8 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans, int ret; dir = read_one_inode(root, key->objectid); - BUG_ON(!dir); + if (!dir) + return -EIO; name_len = btrfs_dir_name_len(eb, di); name = kmalloc(name_len, GFP_NOFS); @@ -1433,7 +1442,10 @@ again: btrfs_release_path(root, path); btrfs_release_path(log, log_path); inode = read_one_inode(root, location.objectid); - BUG_ON(!inode); + if (!inode) { + kfree(name); + return -EIO; + } ret = link_to_fixup_dir(trans, root, path, location.objectid); -- cgit v1.2.2 From 37daa4f968e9470ae9f30e246a5781717c598271 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Thu, 28 Apr 2011 09:18:21 +0000 Subject: Btrfs: check return value of btrfs_inc_extent_ref() If return value of btrfs_inc_extent_ref() is not 0, BUG() is called. Signed-off-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/tree-log.c') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 46b7b57650ab..c2d887566400 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -589,6 +589,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, ins.objectid, ins.offset, 0, root->root_key.objectid, key->objectid, offset); + BUG_ON(ret); } else { /* * insert the extent pointer in the extent -- cgit v1.2.2