From d790155457a830d064d57e742521f114d3c38108 Mon Sep 17 00:00:00 2001 From: Carey Underwood Date: Mon, 4 Mar 2013 16:37:06 -0600 Subject: Btrfs: Release uuid_mutex for shrink during device delete Device scanning waits on the uuid_mutex, which can result in a very long wait if dev delete is shrinking the device. Signed-off-by: Carey Underwood Reviewed-by: David Sterba Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 78b871753cb6..b5c2b6acbf60 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1562,7 +1562,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) clear_super = true; } + mutex_unlock(&uuid_mutex); ret = btrfs_shrink_device(device, 0); + mutex_lock(&uuid_mutex); if (ret) goto error_undo; -- cgit v1.2.2 From ccf39f92f37b0ee7d8de663fc438482f2019c4a6 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 11 Jul 2013 15:41:11 +0200 Subject: btrfs: make errors in btrfs_num_copies less noisy The log message level 'critical' is verbose enough, 'emergency' beeps on all terminals. Signed-off-by: David Sterba Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b5c2b6acbf60..4c1dd540631e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4196,13 +4196,13 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) * and exit, so return 1 so the callers don't try to use other copies. */ if (!em) { - btrfs_emerg(fs_info, "No mapping for %Lu-%Lu\n", logical, + btrfs_crit(fs_info, "No mapping for %Lu-%Lu\n", logical, logical+len); return 1; } if (em->start > logical || em->start + em->len < logical) { - btrfs_emerg(fs_info, "Invalid mapping for %Lu-%Lu, got " + btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got " "%Lu-%Lu\n", logical, logical+len, em->start, em->start + em->len); return 1; -- cgit v1.2.2 From 3cae210fa529d69cb25c2a3c491f29dab687b245 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 16 Jul 2013 11:19:18 +0800 Subject: btrfs: Cleanup for using BTRFS_SETGET_STACK instead of raw convert Some codes still use the cpu_to_lexx instead of the BTRFS_SETGET_STACK_FUNCS declared in ctree.h. Also added some BTRFS_SETGET_STACK_FUNCS for btrfs_header btrfs_timespec and other structures. Signed-off-by: Qu Wenruo Reviewed-by: Miao Xie Reviewed-by: David Sterba Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 4c1dd540631e..557a7438f929 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -865,7 +865,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, disk_super = p + (bytenr & ~PAGE_CACHE_MASK); if (btrfs_super_bytenr(disk_super) != bytenr || - disk_super->magic != cpu_to_le64(BTRFS_MAGIC)) + btrfs_super_magic(disk_super) != BTRFS_MAGIC) goto error_unmap; devid = btrfs_stack_device_id(&disk_super->dev_item); -- cgit v1.2.2 From 395927a9d8e64af518c2ccdbfbbdf9184b558315 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Tue, 30 Jul 2013 12:03:04 +0100 Subject: Btrfs: optimize function btrfs_read_chunk_tree After reading all device items from the chunk tree, don't exit the loop and then navigate down the tree again to find the chunk items. Instead just read all device items and chunk items with a single tree search. This is possible because all device items are found before any chunk item in the chunks tree. Signed-off-by: Filipe David Borba Manana Reviewed-by: Miao Xie Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 557a7438f929..93f39b1e7173 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5646,14 +5646,15 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) mutex_lock(&uuid_mutex); lock_chunks(root); - /* first we search for all of the device items, and then we - * read in all of the chunk items. This way we can create chunk - * mappings that reference all of the devices that are afound + /* + * Read all device items, and then all the chunk items. All + * device items are found before any chunk item (their object id + * is smaller than the lowest possible object id for a chunk + * item - BTRFS_FIRST_CHUNK_TREE_OBJECTID). */ key.objectid = BTRFS_DEV_ITEMS_OBJECTID; key.offset = 0; key.type = 0; -again: ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto error; @@ -5669,17 +5670,13 @@ again: break; } btrfs_item_key_to_cpu(leaf, &found_key, slot); - if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { - if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID) - break; - if (found_key.type == BTRFS_DEV_ITEM_KEY) { - struct btrfs_dev_item *dev_item; - dev_item = btrfs_item_ptr(leaf, slot, + if (found_key.type == BTRFS_DEV_ITEM_KEY) { + struct btrfs_dev_item *dev_item; + dev_item = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item); - ret = read_one_dev(root, leaf, dev_item); - if (ret) - goto error; - } + ret = read_one_dev(root, leaf, dev_item); + if (ret) + goto error; } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { struct btrfs_chunk *chunk; chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); @@ -5689,11 +5686,6 @@ again: } path->slots[0]++; } - if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { - key.objectid = 0; - btrfs_release_path(path); - goto again; - } ret = 0; error: unlock_chunks(root); -- cgit v1.2.2 From eb2067f713ff1c6d867020e85f95fba7d000ed1a Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 30 Jul 2013 13:42:17 -0400 Subject: Fix leak in __btrfs_map_block error path If we bail out when the stripe alloc fails, we need to undo the earlier allocation of raid_map. Signed-off-by: Dave Jones Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 93f39b1e7173..44abd151132a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4673,6 +4673,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, } bbio = kzalloc(btrfs_bio_size(num_alloc_stripes), GFP_NOFS); if (!bbio) { + kfree(raid_map); ret = -ENOMEM; goto out; } -- cgit v1.2.2 From 35a3621beb3e2face3e7954eaee20a8fa0043fac Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Wed, 14 Aug 2013 18:12:25 +0200 Subject: Btrfs: get rid of sparse warnings make C=2 fs/btrfs/ CF=-D__CHECK_ENDIAN__ I tried to filter out the warnings for which patches have already been sent to the mailing list, pending for inclusion in btrfs-next. All these changes should be obviously safe. Signed-off-by: Stefan Behrens Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 44abd151132a..306547b51a13 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3076,9 +3076,6 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info) atomic_set(&fs_info->mutually_exclusive_operation_running, 0); } -void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, - struct btrfs_ioctl_balance_args *bargs); - /* * Should be called with both balance and volume mutexes held */ -- cgit v1.2.2 From f7a81ea4cc6bdb51d8267d2f3ff485f0b4070074 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Thu, 15 Aug 2013 17:11:19 +0200 Subject: Btrfs: create UUID tree if required This tree is not created by mkfs.btrfs. Therefore when a filesystem is mounted writable and the UUID tree does not exist, this tree is created if required. The tree is also added to the fs_info structure and initialized, but this commit does not yet read or write UUID tree elements. Signed-off-by: Stefan Behrens Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 306547b51a13..e084218c09d2 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3429,6 +3429,32 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info) return 0; } +int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *uuid_root; + + /* + * 1 - root node + * 1 - root item + */ + trans = btrfs_start_transaction(tree_root, 2); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + uuid_root = btrfs_create_tree(trans, fs_info, + BTRFS_UUID_TREE_OBJECTID); + if (IS_ERR(uuid_root)) { + btrfs_abort_transaction(trans, tree_root, + PTR_ERR(uuid_root)); + return PTR_ERR(uuid_root); + } + + fs_info->uuid_root = uuid_root; + + return btrfs_commit_transaction(trans, tree_root); +} /* * shrinking a device means finding all of the device extents past * the new size, and then following the back refs to the chunks. -- cgit v1.2.2 From 803b2f54fb4faf6d76fca43e59bcc555d9713cd4 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Thu, 15 Aug 2013 17:11:21 +0200 Subject: Btrfs: fill UUID tree initially When the UUID tree is initially created, a task is spawned that walks through the root tree. For each found subvolume root_item, the uuid and received_uuid entries in the UUID tree are added. This is such a quick operation so that in case somebody wants to unmount the filesystem while the task is still running, the unmount is delayed until the UUID tree building task is finished. Signed-off-by: Stefan Behrens Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 150 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e084218c09d2..4066803fe765 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "compat.h" #include "ctree.h" @@ -3429,11 +3430,146 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info) return 0; } +static int btrfs_uuid_scan_kthread(void *data) +{ + struct btrfs_fs_info *fs_info = data; + struct btrfs_root *root = fs_info->tree_root; + struct btrfs_key key; + struct btrfs_key max_key; + struct btrfs_path *path = NULL; + int ret = 0; + struct extent_buffer *eb; + int slot; + struct btrfs_root_item root_item; + u32 item_size; + struct btrfs_trans_handle *trans; + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + key.objectid = 0; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = 0; + + max_key.objectid = (u64)-1; + max_key.type = BTRFS_ROOT_ITEM_KEY; + max_key.offset = (u64)-1; + + path->keep_locks = 1; + + while (1) { + ret = btrfs_search_forward(root, &key, &max_key, path, 0); + if (ret) { + if (ret > 0) + ret = 0; + break; + } + + if (key.type != BTRFS_ROOT_ITEM_KEY || + (key.objectid < BTRFS_FIRST_FREE_OBJECTID && + key.objectid != BTRFS_FS_TREE_OBJECTID) || + key.objectid > BTRFS_LAST_FREE_OBJECTID) + goto skip; + + eb = path->nodes[0]; + slot = path->slots[0]; + item_size = btrfs_item_size_nr(eb, slot); + if (item_size < sizeof(root_item)) + goto skip; + + trans = NULL; + read_extent_buffer(eb, &root_item, + btrfs_item_ptr_offset(eb, slot), + (int)sizeof(root_item)); + if (btrfs_root_refs(&root_item) == 0) + goto skip; + if (!btrfs_is_empty_uuid(root_item.uuid)) { + /* + * 1 - subvol uuid item + * 1 - received_subvol uuid item + */ + trans = btrfs_start_transaction(fs_info->uuid_root, 2); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + break; + } + ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, + root_item.uuid, + BTRFS_UUID_KEY_SUBVOL, + key.objectid); + if (ret < 0) { + pr_warn("btrfs: uuid_tree_add failed %d\n", + ret); + btrfs_end_transaction(trans, + fs_info->uuid_root); + break; + } + } + + if (!btrfs_is_empty_uuid(root_item.received_uuid)) { + if (!trans) { + /* 1 - received_subvol uuid item */ + trans = btrfs_start_transaction( + fs_info->uuid_root, 1); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + break; + } + } + ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, + root_item.received_uuid, + BTRFS_UUID_KEY_RECEIVED_SUBVOL, + key.objectid); + if (ret < 0) { + pr_warn("btrfs: uuid_tree_add failed %d\n", + ret); + btrfs_end_transaction(trans, + fs_info->uuid_root); + break; + } + } + + if (trans) { + ret = btrfs_end_transaction(trans, fs_info->uuid_root); + if (ret) + break; + } + +skip: + btrfs_release_path(path); + if (key.offset < (u64)-1) { + key.offset++; + } else if (key.type < BTRFS_ROOT_ITEM_KEY) { + key.offset = 0; + key.type = BTRFS_ROOT_ITEM_KEY; + } else if (key.objectid < (u64)-1) { + key.offset = 0; + key.type = BTRFS_ROOT_ITEM_KEY; + key.objectid++; + } else { + break; + } + cond_resched(); + } + +out: + btrfs_free_path(path); + if (ret) + pr_warn("btrfs: btrfs_uuid_scan_kthread failed %d\n", ret); + up(&fs_info->uuid_tree_rescan_sem); + return 0; +} + int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info) { struct btrfs_trans_handle *trans; struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *uuid_root; + struct task_struct *task; + int ret; /* * 1 - root node @@ -3453,8 +3589,21 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info) fs_info->uuid_root = uuid_root; - return btrfs_commit_transaction(trans, tree_root); + ret = btrfs_commit_transaction(trans, tree_root); + if (ret) + return ret; + + down(&fs_info->uuid_tree_rescan_sem); + task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid"); + if (IS_ERR(task)) { + pr_warn("btrfs: failed to start uuid_scan task\n"); + up(&fs_info->uuid_tree_rescan_sem); + return PTR_ERR(task); + } + + return 0; } + /* * shrinking a device means finding all of the device extents past * the new size, and then following the back refs to the chunks. -- cgit v1.2.2 From 70f801754728017ebc909d603c69255dc1e6f06f Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Thu, 15 Aug 2013 17:11:23 +0200 Subject: Btrfs: check UUID tree during mount if required If the filesystem was mounted with an old kernel that was not aware of the UUID tree, this is detected by looking at the uuid_tree_generation field of the superblock (similar to how the free space cache is doing it). If a mismatch is detected at mount time, a thread is started that does two things: 1. Iterate through the UUID tree, check each entry, delete those entries that are not valid anymore (i.e., the subvol does not exist anymore or the value changed). 2. Iterate through the root tree, for each found subvolume, add the UUID tree entries for the subvolume (if they are not already there). This mechanism is also used to handle and repair errors that happened during the initial creation and filling of the tree. The update of the uuid_tree_generation field (which indicates that the state of the UUID tree is up to date) is blocked until all create and repair operations are successfully completed. Signed-off-by: Stefan Behrens Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 4066803fe765..75bdea6bf188 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3559,10 +3559,76 @@ out: btrfs_free_path(path); if (ret) pr_warn("btrfs: btrfs_uuid_scan_kthread failed %d\n", ret); + else + fs_info->update_uuid_tree_gen = 1; up(&fs_info->uuid_tree_rescan_sem); return 0; } +/* + * Callback for btrfs_uuid_tree_iterate(). + * returns: + * 0 check succeeded, the entry is not outdated. + * < 0 if an error occured. + * > 0 if the check failed, which means the caller shall remove the entry. + */ +static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info, + u8 *uuid, u8 type, u64 subid) +{ + struct btrfs_key key; + int ret = 0; + struct btrfs_root *subvol_root; + + if (type != BTRFS_UUID_KEY_SUBVOL && + type != BTRFS_UUID_KEY_RECEIVED_SUBVOL) + goto out; + + key.objectid = subid; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + subvol_root = btrfs_read_fs_root_no_name(fs_info, &key); + if (IS_ERR(subvol_root)) { + ret = PTR_ERR(subvol_root); + if (ret == -ENOENT) + ret = 1; + goto out; + } + + switch (type) { + case BTRFS_UUID_KEY_SUBVOL: + if (memcmp(uuid, subvol_root->root_item.uuid, BTRFS_UUID_SIZE)) + ret = 1; + break; + case BTRFS_UUID_KEY_RECEIVED_SUBVOL: + if (memcmp(uuid, subvol_root->root_item.received_uuid, + BTRFS_UUID_SIZE)) + ret = 1; + break; + } + +out: + return ret; +} + +static int btrfs_uuid_rescan_kthread(void *data) +{ + struct btrfs_fs_info *fs_info = (struct btrfs_fs_info *)data; + int ret; + + /* + * 1st step is to iterate through the existing UUID tree and + * to delete all entries that contain outdated data. + * 2nd step is to add all missing entries to the UUID tree. + */ + ret = btrfs_uuid_tree_iterate(fs_info, btrfs_check_uuid_tree_entry); + if (ret < 0) { + pr_warn("btrfs: iterating uuid_tree failed %d\n", ret); + up(&fs_info->uuid_tree_rescan_sem); + return ret; + } + return btrfs_uuid_scan_kthread(data); +} + int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info) { struct btrfs_trans_handle *trans; @@ -3596,6 +3662,7 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info) down(&fs_info->uuid_tree_rescan_sem); task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid"); if (IS_ERR(task)) { + /* fs_info->update_uuid_tree_gen remains 0 in all error case */ pr_warn("btrfs: failed to start uuid_scan task\n"); up(&fs_info->uuid_tree_rescan_sem); return PTR_ERR(task); @@ -3604,6 +3671,22 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info) return 0; } +int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info) +{ + struct task_struct *task; + + down(&fs_info->uuid_tree_rescan_sem); + task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid"); + if (IS_ERR(task)) { + /* fs_info->update_uuid_tree_gen remains 0 in all error case */ + pr_warn("btrfs: failed to start uuid_rescan task\n"); + up(&fs_info->uuid_tree_rescan_sem); + return PTR_ERR(task); + } + + return 0; +} + /* * shrinking a device means finding all of the device extents past * the new size, and then following the back refs to the chunks. -- cgit v1.2.2 From 53f10659f9994df8efe788f82d3da78d48e650c5 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 12 Aug 2013 14:33:01 +0300 Subject: Btrfs: find_next_devid: root -> fs_info find_next_devid() knows which root to search, so it should take an fs_info instead of an arbitrary root. Signed-off-by: Ilya Dryomov Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 75bdea6bf188..9b3595e370f8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1308,15 +1308,14 @@ static u64 find_next_chunk(struct btrfs_fs_info *fs_info) return ret; } -static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid) +static noinline int find_next_devid(struct btrfs_fs_info *fs_info, + u64 *devid_ret) { int ret; struct btrfs_key key; struct btrfs_key found_key; struct btrfs_path *path; - root = root->fs_info->chunk_root; - path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -1325,20 +1324,21 @@ static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid) key.type = BTRFS_DEV_ITEM_KEY; key.offset = (u64)-1; - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0); if (ret < 0) goto error; BUG_ON(ret == 0); /* Corruption */ - ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID, + ret = btrfs_previous_item(fs_info->chunk_root, path, + BTRFS_DEV_ITEMS_OBJECTID, BTRFS_DEV_ITEM_KEY); if (ret) { - *objectid = 1; + *devid_ret = 1; } else { btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); - *objectid = found_key.offset + 1; + *devid_ret = found_key.offset + 1; } ret = 0; error: @@ -1974,7 +1974,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) } rcu_assign_pointer(device->name, name); - ret = find_next_devid(root, &device->devid); + ret = find_next_devid(root->fs_info, &device->devid); if (ret) { rcu_string_free(device->name); kfree(device); -- cgit v1.2.2 From 12bd2fc0d2f589f9605b8f497eee2e7724f3af24 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 23 Aug 2013 13:20:17 +0300 Subject: Btrfs: add btrfs_alloc_device and switch to it Currently btrfs_device is allocated ad-hoc in a few different places, and as a result not all fields are initialized properly. In particular, readahead state is only initialized in device_list_add (at scan time), and not in btrfs_init_new_device (when the new device is added with 'btrfs dev add'). Fix this by adding an allocation helper and switch everybody but __btrfs_close_devices to it. (__btrfs_close_devices is dealt with in a later commit.) Signed-off-by: Ilya Dryomov Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 151 +++++++++++++++++++++++++++++++++-------------------- 1 file changed, 94 insertions(+), 57 deletions(-) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9b3595e370f8..4bc07d910e54 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -102,6 +102,27 @@ void btrfs_cleanup_fs_uuids(void) } } +static struct btrfs_device *__alloc_device(void) +{ + struct btrfs_device *dev; + + dev = kzalloc(sizeof(*dev), GFP_NOFS); + if (!dev) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&dev->dev_list); + INIT_LIST_HEAD(&dev->dev_alloc_list); + + spin_lock_init(&dev->io_lock); + + spin_lock_init(&dev->reada_lock); + atomic_set(&dev->reada_in_flight, 0); + INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT); + INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT); + + return dev; +} + static noinline struct btrfs_device *__find_device(struct list_head *head, u64 devid, u8 *uuid) { @@ -415,17 +436,12 @@ static noinline int device_list_add(const char *path, if (fs_devices->opened) return -EBUSY; - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) { + device = btrfs_alloc_device(NULL, &devid, + disk_super->dev_item.uuid); + if (IS_ERR(device)) { /* we can safely leave the fs_devices entry around */ - return -ENOMEM; + return PTR_ERR(device); } - device->devid = devid; - device->dev_stats_valid = 0; - device->work.func = pending_bios_fn; - memcpy(device->uuid, disk_super->dev_item.uuid, - BTRFS_UUID_SIZE); - spin_lock_init(&device->io_lock); name = rcu_string_strdup(path, GFP_NOFS); if (!name) { @@ -433,15 +449,6 @@ static noinline int device_list_add(const char *path, return -ENOMEM; } rcu_assign_pointer(device->name, name); - INIT_LIST_HEAD(&device->dev_alloc_list); - - /* init readahead state */ - spin_lock_init(&device->reada_lock); - device->reada_curr_zone = NULL; - atomic_set(&device->reada_in_flight, 0); - device->reada_next = 0; - INIT_RADIX_TREE(&device->reada_zones, GFP_NOFS & ~__GFP_WAIT); - INIT_RADIX_TREE(&device->reada_extents, GFP_NOFS & ~__GFP_WAIT); mutex_lock(&fs_devices->device_list_mutex); list_add_rcu(&device->dev_list, &fs_devices->devices); @@ -492,8 +499,9 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) list_for_each_entry(orig_dev, &orig->devices, dev_list) { struct rcu_string *name; - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) + device = btrfs_alloc_device(NULL, &orig_dev->devid, + orig_dev->uuid); + if (IS_ERR(device)) goto error; /* @@ -507,13 +515,6 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) } rcu_assign_pointer(device->name, name); - device->devid = orig_dev->devid; - device->work.func = pending_bios_fn; - memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid)); - spin_lock_init(&device->io_lock); - INIT_LIST_HEAD(&device->dev_list); - INIT_LIST_HEAD(&device->dev_alloc_list); - list_add(&device->dev_list, &fs_devices->devices); device->fs_devices = fs_devices; fs_devices->num_devices++; @@ -1959,10 +1960,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) } mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) { + device = btrfs_alloc_device(root->fs_info, NULL, NULL); + if (IS_ERR(device)) { /* we can safely leave the fs_devices entry around */ - ret = -ENOMEM; + ret = PTR_ERR(device); goto error; } @@ -1974,13 +1975,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) } rcu_assign_pointer(device->name, name); - ret = find_next_devid(root->fs_info, &device->devid); - if (ret) { - rcu_string_free(device->name); - kfree(device); - goto error; - } - trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { rcu_string_free(device->name); @@ -1995,9 +1989,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) if (blk_queue_discard(q)) device->can_discard = 1; device->writeable = 1; - device->work.func = pending_bios_fn; - generate_random_uuid(device->uuid); - spin_lock_init(&device->io_lock); device->generation = trans->transid; device->io_width = root->sectorsize; device->io_align = root->sectorsize; @@ -2124,6 +2115,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, struct btrfs_fs_info *fs_info = root->fs_info; struct list_head *devices; struct rcu_string *name; + u64 devid = BTRFS_DEV_REPLACE_DEVID; int ret = 0; *device_out = NULL; @@ -2145,9 +2137,9 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, } } - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) { - ret = -ENOMEM; + device = btrfs_alloc_device(NULL, &devid, NULL); + if (IS_ERR(device)) { + ret = PTR_ERR(device); goto error; } @@ -2164,10 +2156,6 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, device->can_discard = 1; mutex_lock(&root->fs_info->fs_devices->device_list_mutex); device->writeable = 1; - device->work.func = pending_bios_fn; - generate_random_uuid(device->uuid); - device->devid = BTRFS_DEV_REPLACE_DEVID; - spin_lock_init(&device->io_lock); device->generation = 0; device->io_width = root->sectorsize; device->io_align = root->sectorsize; @@ -5572,23 +5560,72 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root, struct btrfs_device *device; struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) + device = btrfs_alloc_device(NULL, &devid, dev_uuid); + if (IS_ERR(device)) return NULL; - list_add(&device->dev_list, - &fs_devices->devices); - device->devid = devid; - device->work.func = pending_bios_fn; + + list_add(&device->dev_list, &fs_devices->devices); device->fs_devices = fs_devices; - device->missing = 1; fs_devices->num_devices++; + + device->missing = 1; fs_devices->missing_devices++; - spin_lock_init(&device->io_lock); - INIT_LIST_HEAD(&device->dev_alloc_list); - memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE); + return device; } +/** + * btrfs_alloc_device - allocate struct btrfs_device + * @fs_info: used only for generating a new devid, can be NULL if + * devid is provided (i.e. @devid != NULL). + * @devid: a pointer to devid for this device. If NULL a new devid + * is generated. + * @uuid: a pointer to UUID for this device. If NULL a new UUID + * is generated. + * + * Return: a pointer to a new &struct btrfs_device on success; ERR_PTR() + * on error. Returned struct is not linked onto any lists and can be + * destroyed with kfree() right away. + */ +struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, + const u64 *devid, + const u8 *uuid) +{ + struct btrfs_device *dev; + u64 tmp; + + if (!devid && !fs_info) { + WARN_ON(1); + return ERR_PTR(-EINVAL); + } + + dev = __alloc_device(); + if (IS_ERR(dev)) + return dev; + + if (devid) + tmp = *devid; + else { + int ret; + + ret = find_next_devid(fs_info, &tmp); + if (ret) { + kfree(dev); + return ERR_PTR(ret); + } + } + dev->devid = tmp; + + if (uuid) + memcpy(dev->uuid, uuid, BTRFS_UUID_SIZE); + else + generate_random_uuid(dev->uuid); + + dev->work.func = pending_bios_fn; + + return dev; +} + static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, struct extent_buffer *leaf, struct btrfs_chunk *chunk) -- cgit v1.2.2 From 2208a378f35fea7a1b778bf856edb971fb7ea9e8 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 12 Aug 2013 14:33:03 +0300 Subject: Btrfs: add alloc_fs_devices and switch to it In the spirit of btrfs_alloc_device, add a helper for allocating and doing some common initialization of btrfs_fs_devices struct. Signed-off-by: Ilya Dryomov Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 71 ++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 53 insertions(+), 18 deletions(-) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 4bc07d910e54..d38065eca7c9 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -63,6 +63,48 @@ static void unlock_chunks(struct btrfs_root *root) mutex_unlock(&root->fs_info->chunk_mutex); } +static struct btrfs_fs_devices *__alloc_fs_devices(void) +{ + struct btrfs_fs_devices *fs_devs; + + fs_devs = kzalloc(sizeof(*fs_devs), GFP_NOFS); + if (!fs_devs) + return ERR_PTR(-ENOMEM); + + mutex_init(&fs_devs->device_list_mutex); + + INIT_LIST_HEAD(&fs_devs->devices); + INIT_LIST_HEAD(&fs_devs->alloc_list); + INIT_LIST_HEAD(&fs_devs->list); + + return fs_devs; +} + +/** + * alloc_fs_devices - allocate struct btrfs_fs_devices + * @fsid: a pointer to UUID for this FS. If NULL a new UUID is + * generated. + * + * Return: a pointer to a new &struct btrfs_fs_devices on success; + * ERR_PTR() on error. Returned struct is not linked onto any lists and + * can be destroyed with kfree() right away. + */ +static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid) +{ + struct btrfs_fs_devices *fs_devs; + + fs_devs = __alloc_fs_devices(); + if (IS_ERR(fs_devs)) + return fs_devs; + + if (fsid) + memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE); + else + generate_random_uuid(fs_devs->fsid); + + return fs_devs; +} + static void free_fs_devices(struct btrfs_fs_devices *fs_devices) { struct btrfs_device *device; @@ -417,16 +459,14 @@ static noinline int device_list_add(const char *path, fs_devices = find_fsid(disk_super->fsid); if (!fs_devices) { - fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); - if (!fs_devices) - return -ENOMEM; - INIT_LIST_HEAD(&fs_devices->devices); - INIT_LIST_HEAD(&fs_devices->alloc_list); + fs_devices = alloc_fs_devices(disk_super->fsid); + if (IS_ERR(fs_devices)) + return PTR_ERR(fs_devices); + list_add(&fs_devices->list, &fs_uuids); - memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); fs_devices->latest_devid = devid; fs_devices->latest_trans = found_transid; - mutex_init(&fs_devices->device_list_mutex); + device = NULL; } else { device = __find_device(&fs_devices->devices, devid, @@ -482,18 +522,13 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) struct btrfs_device *device; struct btrfs_device *orig_dev; - fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); - if (!fs_devices) - return ERR_PTR(-ENOMEM); + fs_devices = alloc_fs_devices(orig->fsid); + if (IS_ERR(fs_devices)) + return fs_devices; - INIT_LIST_HEAD(&fs_devices->devices); - INIT_LIST_HEAD(&fs_devices->alloc_list); - INIT_LIST_HEAD(&fs_devices->list); - mutex_init(&fs_devices->device_list_mutex); fs_devices->latest_devid = orig->latest_devid; fs_devices->latest_trans = orig->latest_trans; fs_devices->total_devices = orig->total_devices; - memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid)); /* We have held the volume lock, it is safe to get the devices. */ list_for_each_entry(orig_dev, &orig->devices, dev_list) { @@ -1797,9 +1832,9 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) if (!fs_devices->seeding) return -EINVAL; - seed_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); - if (!seed_devices) - return -ENOMEM; + seed_devices = __alloc_fs_devices(); + if (IS_ERR(seed_devices)) + return PTR_ERR(seed_devices); old_devices = clone_fs_devices(fs_devices); if (IS_ERR(old_devices)) { -- cgit v1.2.2 From a1e8780a89ec13217fa1c8f86faf5546110e1402 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 12 Aug 2013 14:33:04 +0300 Subject: Btrfs: rollback btrfs_device fields on umount It turns out we don't properly rollback in-core btrfs_device state on umount. We zero out ->bdev, ->in_fs_metadata and that's about it. In particular, we don't zero out ->generation, and this can lead to us refusing a mount -- a non-NULL fs_devices->latest_bdev is essential, but btrfs_close_extra_devices will happily assign NULL to ->latest_bdev if the first device on the dev_list happens to be missing and consequently has no bdev attached. This happens because since commit a6b0d5c8 btrfs_close_extra_devices adjusts ->latest_bdev, and in doing that, relies on the ->generation. Fix this, and possibly other problems, by zeroing out everything except for what device_list_add sets, so that a mount right after insmod and 'btrfs dev scan' is no different from any later mount in this respect. Signed-off-by: Ilya Dryomov Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d38065eca7c9..ed685991b2c1 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -674,22 +674,19 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) if (device->can_discard) fs_devices->num_can_discard--; - new_device = kmalloc(sizeof(*new_device), GFP_NOFS); - BUG_ON(!new_device); /* -ENOMEM */ - memcpy(new_device, device, sizeof(*new_device)); + new_device = btrfs_alloc_device(NULL, &device->devid, + device->uuid); + BUG_ON(IS_ERR(new_device)); /* -ENOMEM */ /* Safe because we are under uuid_mutex */ if (device->name) { name = rcu_string_strdup(device->name->str, GFP_NOFS); - BUG_ON(device->name && !name); /* -ENOMEM */ + BUG_ON(!name); /* -ENOMEM */ rcu_assign_pointer(new_device->name, name); } - new_device->bdev = NULL; - new_device->writeable = 0; - new_device->in_fs_metadata = 0; - new_device->can_discard = 0; - spin_lock_init(&new_device->io_lock); + list_replace_rcu(&device->dev_list, &new_device->dev_list); + new_device->fs_devices = device->fs_devices; call_rcu(&device->rcu, free_device); } -- cgit v1.2.2 From c1c9ff7c94e83fae89a742df74db51156869bad5 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 20 Aug 2013 13:20:07 +0200 Subject: Btrfs: Remove superfluous casts from u64 to unsigned long long u64 is "unsigned long long" on all architectures now, so there's no need to cast it when formatting it using the "ll" length modifier. Signed-off-by: Geert Uytterhoeven Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index ed685991b2c1..1534a134750f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -914,8 +914,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, printk(KERN_INFO "device fsid %pU ", disk_super->fsid); } - printk(KERN_CONT "devid %llu transid %llu %s\n", - (unsigned long long)devid, (unsigned long long)transid, path); + printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path); ret = device_list_add(path, disk_super, devid, fs_devices_ret); if (!ret && fs_devices_ret) @@ -3159,7 +3158,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl, (bctl->data.target & ~allowed))) { printk(KERN_ERR "btrfs: unable to start balance with target " "data profile %llu\n", - (unsigned long long)bctl->data.target); + bctl->data.target); ret = -EINVAL; goto out; } @@ -3168,7 +3167,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl, (bctl->meta.target & ~allowed))) { printk(KERN_ERR "btrfs: unable to start balance with target " "metadata profile %llu\n", - (unsigned long long)bctl->meta.target); + bctl->meta.target); ret = -EINVAL; goto out; } @@ -3177,7 +3176,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl, (bctl->sys.target & ~allowed))) { printk(KERN_ERR "btrfs: unable to start balance with target " "system profile %llu\n", - (unsigned long long)bctl->sys.target); + bctl->sys.target); ret = -EINVAL; goto out; } @@ -4652,8 +4651,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, if (!em) { btrfs_crit(fs_info, "unable to find logical %llu len %llu", - (unsigned long long)logical, - (unsigned long long)*length); + logical, *length); return -EINVAL; } @@ -5524,9 +5522,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, if (map_length < length) { btrfs_crit(root->fs_info, "mapping failed logical %llu bio len %llu len %llu", - (unsigned long long)logical, - (unsigned long long)length, - (unsigned long long)map_length); + logical, length, map_length); BUG(); } @@ -5846,8 +5842,7 @@ static int read_one_dev(struct btrfs_root *root, return -EIO; if (!device) { - btrfs_warn(root->fs_info, "devid %llu missing", - (unsigned long long)devid); + btrfs_warn(root->fs_info, "devid %llu missing", devid); device = add_missing_dev(root, devid, dev_uuid); if (!device) return -ENOMEM; -- cgit v1.2.2 From 410ba3a291081d98f0e58a868e1305110d986903 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 20 Aug 2013 13:20:11 +0200 Subject: Btrfs: Make btrfs_device_uuid() return unsigned long All callers of btrfs_device_uuid() cast its return type to unsigned long. Signed-off-by: Geert Uytterhoeven Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 1534a134750f..9ea500465256 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1424,7 +1424,7 @@ static int btrfs_add_device(struct btrfs_trans_handle *trans, btrfs_set_device_bandwidth(leaf, dev_item, 0); btrfs_set_device_start_offset(leaf, dev_item, 0); - ptr = (unsigned long)btrfs_device_uuid(dev_item); + ptr = btrfs_device_uuid(dev_item); write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); ptr = (unsigned long)btrfs_device_fsid(dev_item); write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE); @@ -1924,8 +1924,7 @@ next_slot: dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); devid = btrfs_device_id(leaf, dev_item); - read_extent_buffer(leaf, dev_uuid, - (unsigned long)btrfs_device_uuid(dev_item), + read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE); read_extent_buffer(leaf, fs_uuid, (unsigned long)btrfs_device_fsid(dev_item), @@ -5760,7 +5759,7 @@ static void fill_device_from_item(struct extent_buffer *leaf, WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID); device->is_tgtdev_for_dev_replace = 0; - ptr = (unsigned long)btrfs_device_uuid(dev_item); + ptr = btrfs_device_uuid(dev_item); read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); } @@ -5823,8 +5822,7 @@ static int read_one_dev(struct btrfs_root *root, u8 dev_uuid[BTRFS_UUID_SIZE]; devid = btrfs_device_id(leaf, dev_item); - read_extent_buffer(leaf, dev_uuid, - (unsigned long)btrfs_device_uuid(dev_item), + read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE); read_extent_buffer(leaf, fs_uuid, (unsigned long)btrfs_device_fsid(dev_item), -- cgit v1.2.2 From 1473b24ee0cb43d4b48aa0c5f1b8417928910a4d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 20 Aug 2013 13:20:12 +0200 Subject: Btrfs: Make btrfs_device_fsid() return unsigned long All callers of btrfs_device_fsid() cast its return type to unsigned long. Signed-off-by: Geert Uytterhoeven Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9ea500465256..8b8c4397165c 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1426,7 +1426,7 @@ static int btrfs_add_device(struct btrfs_trans_handle *trans, ptr = btrfs_device_uuid(dev_item); write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); - ptr = (unsigned long)btrfs_device_fsid(dev_item); + ptr = btrfs_device_fsid(dev_item); write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE); btrfs_mark_buffer_dirty(leaf); @@ -1926,8 +1926,7 @@ next_slot: devid = btrfs_device_id(leaf, dev_item); read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE); - read_extent_buffer(leaf, fs_uuid, - (unsigned long)btrfs_device_fsid(dev_item), + read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE); device = btrfs_find_device(root->fs_info, devid, dev_uuid, fs_uuid); @@ -5824,8 +5823,7 @@ static int read_one_dev(struct btrfs_root *root, devid = btrfs_device_id(leaf, dev_item); read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE); - read_extent_buffer(leaf, fs_uuid, - (unsigned long)btrfs_device_fsid(dev_item), + read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE); if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) { -- cgit v1.2.2 From 231e88f41027d90e9516d257d8085069b65686dd Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 20 Aug 2013 13:20:13 +0200 Subject: Btrfs: Make btrfs_dev_extent_chunk_tree_uuid() return unsigned long Internally, btrfs_dev_extent_chunk_tree_uuid() calculates an unsigned long, but casts it to a pointer, while all callers cast it to unsigned long again. Signed-off-by: Geert Uytterhoeven Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8b8c4397165c..34068b887f14 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1311,8 +1311,7 @@ static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset); write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, - (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent), - BTRFS_UUID_SIZE); + btrfs_dev_extent_chunk_tree_uuid(extent), BTRFS_UUID_SIZE); btrfs_set_dev_extent_length(leaf, extent, num_bytes); btrfs_mark_buffer_dirty(leaf); -- cgit v1.2.2 From d73068018419c5999f594a52998621947dc1f7d0 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Fri, 9 Aug 2013 15:41:36 +0100 Subject: Btrfs: fix race between removing a dev and writing sbs This change fixes an issue when removing a device and writing all super blocks run simultaneously. Here's the steps necessary for the issue to happen: 1) disk-io.c:write_all_supers() gets a number of N devices from the super_copy, so it will not panic if it fails to write super blocks for N - 1 devices; 2) Then it tries to acquire the device_list_mutex, but blocks because volumes.c:btrfs_rm_device() got it first; 3) btrfs_rm_device() removes the device from the list, then unlocks the mutex and after the unlock it updates the number of devices in super_copy to N - 1. 4) write_all_supers() finally acquires the mutex, iterates over all the devices in the list and gets N - 1 errors, that is, it failed to write super blocks to all the devices; 5) Because write_all_supers() thinks there are a total of N devices, it considers N - 1 errors to be ok, and therefore won't panic. So this change just makes sure that write_all_supers() reads the number of devices from super_copy after it acquires the device_list_mutex. Conversely, it changes btrfs_rm_device() to update the number of devices in super_copy before it releases the device list mutex. The code path to add a new device (volumes.c:btrfs_init_new_device), already has the right behaviour: it updates the number of devices in super_copy while holding the device_list_mutex. The only code path that doesn't lock the device list mutex before updating the number of devices in the super copy is disk-io.c:next_root_backup(), called by open_ctree() during mount time where concurrency issues can't happen. Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 34068b887f14..3f1c2c200691 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1620,7 +1620,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) /* * the device list mutex makes sure that we don't change * the device list while someone else is writing out all - * the device supers. + * the device supers. Whoever is writing all supers, should + * lock the device list mutex before getting the number of + * devices in the super block (super_copy). Conversely, + * whoever updates the number of devices in the super block + * (super_copy) should hold the device list mutex. */ cur_devices = device->fs_devices; @@ -1644,10 +1648,10 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) device->fs_devices->open_devices--; call_rcu(&device->rcu, free_device); - mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1; btrfs_set_super_num_devices(root->fs_info->super_copy, num_devices); + mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); if (cur_devices->open_devices == 0) { struct btrfs_fs_devices *fs_devices; -- cgit v1.2.2 From f71717502460c5cd6409b66835b17ae00af6d5f1 Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Mon, 12 Aug 2013 20:56:58 +0100 Subject: Btrfs: fix race conditions in BTRFS_IOC_FS_INFO ioctl The handler for the ioctl BTRFS_IOC_FS_INFO was reading the number of devices before acquiring the device list mutex. This could lead to inconsistent results because the update of the device list and the number of devices counter (amongst other counters related to the device list) are updated in volumes.c while holding the device list mutex - except for 2 places, one was volumes.c:btrfs_prepare_sprout() and the other was volumes.c:device_list_add(). For example, if we have 2 devices, with IDs 1 and 2 and then add a new device, with ID 3, and while adding the device is in progress an BTRFS_IOC_FS_INFO ioctl arrives, it could return a number of devices of 2 and a max dev id of 3. This would be incorrect. Also, this ioctl handler was reading the fsid while it can be updated concurrently. This can happen when while a new device is being added and the current filesystem is in seeding mode. Example: $ mkfs.btrfs -f /dev/sdb1 $ mkfs.btrfs -f /dev/sdb2 $ btrfstune -S 1 /dev/sdb1 $ mount /dev/sdb1 /mnt/test $ btrfs device add /dev/sdb2 /mnt/test If during the last step a BTRFS_IOC_FS_INFO ioctl was requested, it could read an fsid that was never valid (some bits part of the old fsid and others part of the new fsid). Also, it could read a number of devices that doesn't match the number of devices in the list and the max device id, as explained before. Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 3f1c2c200691..74614e3b5ad3 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -492,10 +492,10 @@ static noinline int device_list_add(const char *path, mutex_lock(&fs_devices->device_list_mutex); list_add_rcu(&device->dev_list, &fs_devices->devices); + fs_devices->num_devices++; mutex_unlock(&fs_devices->device_list_mutex); device->fs_devices = fs_devices; - fs_devices->num_devices++; } else if (!device->name || strcmp(device->name->str, path)) { name = rcu_string_strdup(path, GFP_NOFS); if (!name) @@ -1852,7 +1852,6 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) mutex_lock(&root->fs_info->fs_devices->device_list_mutex); list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices, synchronize_rcu); - mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list); list_for_each_entry(device, &seed_devices->devices, dev_list) { @@ -1868,6 +1867,8 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) generate_random_uuid(fs_devices->fsid); memcpy(root->fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); + mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); + super_flags = btrfs_super_flags(disk_super) & ~BTRFS_SUPER_FLAG_SEEDING; btrfs_set_super_flags(disk_super, super_flags); -- cgit v1.2.2 From 726551ebc79c0a41f66376463ebe8c84c89c1151 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 26 Aug 2013 13:45:53 -0400 Subject: Btrfs: adjust the fs_devices->missing count on unmount I noticed that if I tried to mount a file system with -o degraded after having done it once already we would fail to mount. This is because the fs_devices->missing count was getting bumped everytime we mounted, but not getting reset whenever we unmounted. To fix this we just drop the missing count as we're closing devices to make sure this doesn't happen. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 74614e3b5ad3..f42e41290124 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -673,6 +673,8 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) if (device->can_discard) fs_devices->num_can_discard--; + if (device->missing) + fs_devices->missing_devices--; new_device = btrfs_alloc_device(NULL, &device->devid, device->uuid); -- cgit v1.2.2 From 795a33213973cf0195198132162862fb05929425 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 27 Aug 2013 13:50:44 +0300 Subject: Btrfs: stop refusing the relocation of chunk 0 AFAICT chunk 0 is no longer special, and so it should be restriped just like every other chunk. One reason for this change is us refusing the relocation can lead to filesystems that can only be mounted ro, and never rw -- see the bugzilla [1] for details. The other reason is that device removal code is already doing this: it will happily relocate chunk 0 is part of shrinking the device. [1] https://bugzilla.kernel.org/show_bug.cgi?id=60594 Reported-by: Xavier Bassery Signed-off-by: Ilya Dryomov Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f42e41290124..603cce85764f 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2997,10 +2997,6 @@ again: if (found_key.objectid != key.objectid) break; - /* chunk zero is special */ - if (found_key.offset == 0) - break; - chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); if (!counting) { @@ -3036,6 +3032,8 @@ again: spin_unlock(&fs_info->balance_lock); } loop: + if (found_key.offset == 0) + break; key.offset = found_key.offset - 1; } -- cgit v1.2.2 From f45388f3874535062526ec071284d836f5a4b5de Mon Sep 17 00:00:00 2001 From: Filipe David Borba Manana Date: Wed, 28 Aug 2013 10:28:34 +0100 Subject: Btrfs: fix deadlock in uuid scan kthread If there's an ongoing transaction when the uuid scan kthread attempts to create one, the kthread will block, waiting for that transaction to finish while it's keeping locks on the tree root, and in turn the existing transaction is waiting for those locks to be free. The stack trace reported by the kernel follows. [36700.671601] INFO: task btrfs-uuid:15480 blocked for more than 120 seconds. [36700.671602] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [36700.671602] btrfs-uuid D 0000000000000000 0 15480 2 0x00000000 [36700.671604] ffff880710bd5b88 0000000000000046 ffff8803d36ba850 0000000000030000 [36700.671605] ffff8806d76dc530 ffff880710bd5fd8 ffff880710bd5fd8 ffff880710bd5fd8 [36700.671607] ffff8808098ac530 ffff8806d76dc530 ffff880710bd5b98 ffff8805e4508e40 [36700.671608] Call Trace: [36700.671610] [] schedule+0x29/0x70 [36700.671620] [] wait_current_trans.isra.33+0xbf/0x120 [btrfs] [36700.671623] [] ? add_wait_queue+0x60/0x60 [36700.671629] [] start_transaction+0x3d6/0x530 [btrfs] [36700.671636] [] ? btrfs_get_token_32+0x64/0xf0 [btrfs] [36700.671642] [] btrfs_start_transaction+0x1b/0x20 [btrfs] [36700.671649] [] btrfs_uuid_scan_kthread+0x211/0x3d0 [btrfs] [36700.671655] [] ? __btrfs_open_devices+0x2a0/0x2a0 [btrfs] [36700.671657] [] kthread+0xc0/0xd0 [36700.671659] [] ? flush_kthread_worker+0xb0/0xb0 [36700.671661] [] ret_from_fork+0x7c/0xb0 [36700.671662] [] ? flush_kthread_worker+0xb0/0xb0 [36700.671663] INFO: task btrfs:15481 blocked for more than 120 seconds. [36700.671664] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. [36700.671665] btrfs D 0000000000000000 0 15481 15212 0x00000004 [36700.671666] ffff880248cbf4c8 0000000000000086 ffff8803d36ba700 ffff8801dbd5c280 [36700.671668] ffff880807815c40 ffff880248cbffd8 ffff880248cbffd8 ffff880248cbffd8 [36700.671669] ffff8805e86a0000 ffff880807815c40 ffff880248cbf4d8 ffff8801dbd5c280 [36700.671670] Call Trace: [36700.671672] [] schedule+0x29/0x70 [36700.671679] [] btrfs_tree_lock+0x6d/0x230 [btrfs] [36700.671680] [] ? add_wait_queue+0x60/0x60 [36700.671685] [] btrfs_search_slot+0x999/0xb00 [btrfs] [36700.671691] [] ? btrfs_lookup_first_ordered_extent+0x5e/0xb0 [btrfs] [36700.671698] [] __btrfs_write_out_cache+0x8c4/0xa80 [btrfs] [36700.671704] [] btrfs_write_out_cache+0xb2/0xf0 [btrfs] [36700.671710] [] ? free_extent_buffer+0x61/0xc0 [btrfs] [36700.671716] [] btrfs_write_dirty_block_groups+0x562/0x650 [btrfs] [36700.671723] [] commit_cowonly_roots+0x171/0x24b [btrfs] [36700.671729] [] btrfs_commit_transaction+0x4fe/0xa10 [btrfs] [36700.671735] [] create_subvol+0x5c0/0x636 [btrfs] [36700.671742] [] btrfs_mksubvol.isra.60+0x33f/0x3f0 [btrfs] [36700.671747] [] btrfs_ioctl_snap_create_transid+0x142/0x190 [btrfs] [36700.671752] [] ? btrfs_ioctl_snap_create+0x2c/0x80 [btrfs] [36700.671757] [] btrfs_ioctl_snap_create+0x5e/0x80 [btrfs] [36700.671759] [] ? handle_pte_fault+0x84/0x920 [36700.671764] [] btrfs_ioctl+0xf0b/0x1d00 [btrfs] [36700.671766] [] ? handle_mm_fault+0x210/0x310 [36700.671768] [] ? __do_page_fault+0x284/0x4e0 [36700.671770] [] do_vfs_ioctl+0x96/0x550 [36700.671772] [] ? __sb_end_write+0x33/0x70 [36700.671774] [] SyS_ioctl+0x91/0xb0 [36700.671775] [] system_call_fastpath+0x16/0x1b Signed-off-by: Filipe David Borba Manana Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) (limited to 'fs/btrfs/volumes.c') diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 603cce85764f..0db165ee4340 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3463,7 +3463,7 @@ static int btrfs_uuid_scan_kthread(void *data) int slot; struct btrfs_root_item root_item; u32 item_size; - struct btrfs_trans_handle *trans; + struct btrfs_trans_handle *trans = NULL; path = btrfs_alloc_path(); if (!path) { @@ -3501,13 +3501,18 @@ static int btrfs_uuid_scan_kthread(void *data) if (item_size < sizeof(root_item)) goto skip; - trans = NULL; read_extent_buffer(eb, &root_item, btrfs_item_ptr_offset(eb, slot), (int)sizeof(root_item)); if (btrfs_root_refs(&root_item) == 0) goto skip; - if (!btrfs_is_empty_uuid(root_item.uuid)) { + + if (!btrfs_is_empty_uuid(root_item.uuid) || + !btrfs_is_empty_uuid(root_item.received_uuid)) { + if (trans) + goto update_tree; + + btrfs_release_path(path); /* * 1 - subvol uuid item * 1 - received_subvol uuid item @@ -3517,6 +3522,12 @@ static int btrfs_uuid_scan_kthread(void *data) ret = PTR_ERR(trans); break; } + continue; + } else { + goto skip; + } +update_tree: + if (!btrfs_is_empty_uuid(root_item.uuid)) { ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, root_item.uuid, BTRFS_UUID_KEY_SUBVOL, @@ -3524,22 +3535,11 @@ static int btrfs_uuid_scan_kthread(void *data) if (ret < 0) { pr_warn("btrfs: uuid_tree_add failed %d\n", ret); - btrfs_end_transaction(trans, - fs_info->uuid_root); break; } } if (!btrfs_is_empty_uuid(root_item.received_uuid)) { - if (!trans) { - /* 1 - received_subvol uuid item */ - trans = btrfs_start_transaction( - fs_info->uuid_root, 1); - if (IS_ERR(trans)) { - ret = PTR_ERR(trans); - break; - } - } ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, root_item.received_uuid, BTRFS_UUID_KEY_RECEIVED_SUBVOL, @@ -3547,19 +3547,18 @@ static int btrfs_uuid_scan_kthread(void *data) if (ret < 0) { pr_warn("btrfs: uuid_tree_add failed %d\n", ret); - btrfs_end_transaction(trans, - fs_info->uuid_root); break; } } +skip: if (trans) { ret = btrfs_end_transaction(trans, fs_info->uuid_root); + trans = NULL; if (ret) break; } -skip: btrfs_release_path(path); if (key.offset < (u64)-1) { key.offset++; @@ -3578,6 +3577,8 @@ skip: out: btrfs_free_path(path); + if (trans && !IS_ERR(trans)) + btrfs_end_transaction(trans, fs_info->uuid_root); if (ret) pr_warn("btrfs: btrfs_uuid_scan_kthread failed %d\n", ret); else -- cgit v1.2.2