From 920bbbfb05c9fce22e088d20eb9dcb8f96342de9 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 12 Nov 2009 09:34:08 +0000 Subject: Btrfs: Rewrite btrfs_drop_extents Rewrite btrfs_drop_extents by using btrfs_duplicate_item, so we can avoid calling lock_extent within transaction. Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index cdbb054102b9..a1a8db8c149d 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1027,8 +1027,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, BUG_ON(!trans); /* punch hole in destination first */ - btrfs_drop_extents(trans, root, inode, off, off + len, - off + len, 0, &hint_byte, 1); + btrfs_drop_extents(trans, inode, off, off + len, &hint_byte, 1); /* clone data */ key.objectid = src->i_ino; -- cgit v1.2.2 From 2e4bfab97055aa6acdd0637913bd705c2d6506d6 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 12 Nov 2009 09:37:02 +0000 Subject: Btrfs: Avoid orphan inodes cleanup during committing transaction btrfs_lookup_dentry may trigger orphan cleanup, so it's not good to call it while committing a transaction. Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a1a8db8c149d..3d6b33871afe 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -237,7 +237,6 @@ static noinline int create_subvol(struct btrfs_root *root, u64 objectid; u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; u64 index = 0; - unsigned long nr = 1; /* * 1 - inode item @@ -342,24 +341,21 @@ static noinline int create_subvol(struct btrfs_root *root, d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); fail: - nr = trans->blocks_used; err = btrfs_commit_transaction(trans, root); if (err && !ret) ret = err; btrfs_unreserve_metadata_space(root, 6); - btrfs_btree_balance_dirty(root, nr); return ret; } static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, char *name, int namelen) { + struct inode *inode; struct btrfs_pending_snapshot *pending_snapshot; struct btrfs_trans_handle *trans; - int ret = 0; - int err; - unsigned long nr = 0; + int ret; if (!root->ref_cows) return -EINVAL; @@ -372,20 +368,20 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, */ ret = btrfs_reserve_metadata_space(root, 6); if (ret) - goto fail_unlock; + goto fail; pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); if (!pending_snapshot) { ret = -ENOMEM; btrfs_unreserve_metadata_space(root, 6); - goto fail_unlock; + goto fail; } pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); if (!pending_snapshot->name) { ret = -ENOMEM; kfree(pending_snapshot); btrfs_unreserve_metadata_space(root, 6); - goto fail_unlock; + goto fail; } memcpy(pending_snapshot->name, name, namelen); pending_snapshot->name[namelen] = '\0'; @@ -395,10 +391,19 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, pending_snapshot->root = root; list_add(&pending_snapshot->list, &trans->transaction->pending_snapshots); - err = btrfs_commit_transaction(trans, root); + ret = btrfs_commit_transaction(trans, root); + BUG_ON(ret); + btrfs_unreserve_metadata_space(root, 6); -fail_unlock: - btrfs_btree_balance_dirty(root, nr); + inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + goto fail; + } + BUG_ON(!inode); + d_instantiate(dentry, inode); + ret = 0; +fail: return ret; } -- cgit v1.2.2 From 86b9f2eca5e0984145e3c7698a7cd6dd65c2a93f Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 12 Nov 2009 09:36:50 +0000 Subject: Btrfs: Fix per root used space accounting The bytes_used field in root item was originally planned to trace the amount of used data and tree blocks. But it never worked right since we can't trace freeing of data accurately. This patch changes it to only trace the amount of tree blocks. Signed-off-by: Yan Zheng Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 3d6b33871afe..645a17927a8f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -289,7 +289,7 @@ static noinline int create_subvol(struct btrfs_root *root, btrfs_set_root_generation(&root_item, trans->transid); btrfs_set_root_level(&root_item, 0); btrfs_set_root_refs(&root_item, 1); - btrfs_set_root_used(&root_item, 0); + btrfs_set_root_used(&root_item, leaf->len); btrfs_set_root_last_snapshot(&root_item, 0); memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress)); -- cgit v1.2.2 From 98d377a0894e6bcca44eafd4d2eee74e8af4db83 Mon Sep 17 00:00:00 2001 From: TARUISI Hiroaki Date: Wed, 18 Nov 2009 05:42:14 +0000 Subject: Btrfs: add a function to lookup a directory path by following backrefs This will be used by the inode lookup ioctl. Signed-off-by: TARUISI Hiroaki Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 92 insertions(+) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 645a17927a8f..ac2a28f4fa1a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -48,6 +48,7 @@ #include "print-tree.h" #include "volumes.h" #include "locking.h" +#include "ctree.h" /* Mask out flags that are inappropriate for the given type of inode. */ static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) @@ -743,6 +744,97 @@ out: return ret; } +/* + Search INODE_REFs to identify path name of 'dirid' directory + in a 'tree_id' tree. and sets path name to 'name'. +*/ +static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, + u64 tree_id, u64 dirid, char *name) +{ + struct btrfs_root *root; + struct btrfs_key key; + char *name_stack, *ptr; + int ret = -1; + int slot; + int len; + int total_len = 0; + struct btrfs_inode_ref *iref; + struct extent_buffer *l; + struct btrfs_path *path; + + if (dirid == BTRFS_FIRST_FREE_OBJECTID) { + name[0]='\0'; + return 0; + } + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + name_stack = kzalloc(BTRFS_PATH_NAME_MAX+1, GFP_NOFS); + if (!name_stack) { + btrfs_free_path(path); + return -ENOMEM; + } + + ptr = &name_stack[BTRFS_PATH_NAME_MAX]; + + key.objectid = tree_id; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + root = btrfs_read_fs_root_no_name(info, &key); + if (IS_ERR(root)) { + printk(KERN_ERR "could not find root %llu\n", tree_id); + return -ENOENT; + } + + key.objectid = dirid; + key.type = BTRFS_INODE_REF_KEY; + key.offset = 0; + + while(1) { + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + + l = path->nodes[0]; + slot = path->slots[0]; + btrfs_item_key_to_cpu(l, &key, slot); + + if (ret > 0 && (key.objectid != dirid || + key.type != BTRFS_INODE_REF_KEY)) + goto out; + + iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref); + len = btrfs_inode_ref_name_len(l, iref); + ptr -= len + 1; + total_len += len + 1; + if (ptr < name_stack) + goto out; + + *(ptr + len) = '/'; + read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len); + + if (key.offset == BTRFS_FIRST_FREE_OBJECTID) + break; + + btrfs_release_path(root, path); + key.objectid = key.offset; + key.offset = 0; + dirid = key.objectid; + + } + if (ptr < name_stack) + goto out; + strncpy(name, ptr, total_len); + name[total_len]='\0'; + ret = 0; +out: + btrfs_free_path(path); + kfree(name_stack); + return ret; +} + static noinline int btrfs_ioctl_snap_destroy(struct file *file, void __user *arg) { -- cgit v1.2.2 From ac8e9819d71f907a0532b01b22c26b56bbbcbd21 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Sun, 28 Feb 2010 15:39:26 -0500 Subject: Btrfs: add search and inode lookup ioctls The search ioctl is a generic tool for doing btree searches from userland applications. The first user of the search ioctl is a subvolume listing feature, but we'll also use it to find new files in a subvolume. The search ioctl allows you to specify min and max keys to search for, along with min and max transid. It returns the items along with a header that includes the item key. Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 249 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 233 insertions(+), 16 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index ac2a28f4fa1a..c6044733198d 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -744,16 +744,206 @@ out: return ret; } +static noinline int key_in_sk(struct btrfs_key *key, + struct btrfs_ioctl_search_key *sk) +{ + if (key->objectid < sk->min_objectid) + return 0; + if (key->offset < sk->min_offset) + return 0; + if (key->type < sk->min_type) + return 0; + if (key->objectid > sk->max_objectid) + return 0; + if (key->type > sk->max_type) + return 0; + if (key->offset > sk->max_offset) + return 0; + return 1; +} + +static noinline int copy_to_sk(struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *key, + struct btrfs_ioctl_search_key *sk, + char *buf, + unsigned long *sk_offset, + int *num_found) +{ + u64 found_transid; + struct extent_buffer *leaf; + struct btrfs_ioctl_search_header sh; + unsigned long item_off; + unsigned long item_len; + int nritems; + int i; + int slot; + int found = 0; + int ret = 0; + + leaf = path->nodes[0]; + slot = path->slots[0]; + nritems = btrfs_header_nritems(leaf); + + if (btrfs_header_generation(leaf) > sk->max_transid) { + i = nritems; + goto advance_key; + } + found_transid = btrfs_header_generation(leaf); + + for (i = slot; i < nritems; i++) { + item_off = btrfs_item_ptr_offset(leaf, i); + item_len = btrfs_item_size_nr(leaf, i); + + if (item_len > BTRFS_SEARCH_ARGS_BUFSIZE) + item_len = 0; + + if (sizeof(sh) + item_len + *sk_offset > + BTRFS_SEARCH_ARGS_BUFSIZE) { + ret = 1; + goto overflow; + } + + btrfs_item_key_to_cpu(leaf, key, i); + if (!key_in_sk(key, sk)) + continue; + + sh.objectid = key->objectid; + sh.offset = key->offset; + sh.type = key->type; + sh.len = item_len; + sh.transid = found_transid; + + /* copy search result header */ + memcpy(buf + *sk_offset, &sh, sizeof(sh)); + *sk_offset += sizeof(sh); + + if (item_len) { + char *p = buf + *sk_offset; + /* copy the item */ + read_extent_buffer(leaf, p, + item_off, item_len); + *sk_offset += item_len; + found++; + } + + if (*num_found >= sk->nr_items) + break; + } +advance_key: + if (key->offset < (u64)-1) + key->offset++; + else if (key->type < (u64)-1) + key->type++; + else if (key->objectid < (u64)-1) + key->objectid++; + ret = 0; +overflow: + *num_found += found; + return ret; +} + +static noinline int search_ioctl(struct inode *inode, + struct btrfs_ioctl_search_args *args) +{ + struct btrfs_root *root; + struct btrfs_key key; + struct btrfs_key max_key; + struct btrfs_path *path; + struct btrfs_ioctl_search_key *sk = &args->key; + struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info; + int ret; + int num_found = 0; + unsigned long sk_offset = 0; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + if (sk->tree_id == 0) { + /* search the root of the inode that was passed */ + root = BTRFS_I(inode)->root; + } else { + key.objectid = sk->tree_id; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + root = btrfs_read_fs_root_no_name(info, &key); + if (IS_ERR(root)) { + printk(KERN_ERR "could not find root %llu\n", + sk->tree_id); + btrfs_free_path(path); + return -ENOENT; + } + } + + key.objectid = sk->min_objectid; + key.type = sk->min_type; + key.offset = sk->min_offset; + + max_key.objectid = sk->max_objectid; + max_key.type = sk->max_type; + max_key.offset = sk->max_offset; + + path->keep_locks = 1; + + while(1) { + ret = btrfs_search_forward(root, &key, &max_key, path, 0, + sk->min_transid); + if (ret != 0) { + if (ret > 0) + ret = 0; + goto err; + } + ret = copy_to_sk(root, path, &key, sk, args->buf, + &sk_offset, &num_found); + btrfs_release_path(root, path); + if (ret || num_found >= sk->nr_items) + break; + + } + ret = 0; +err: + sk->nr_items = num_found; + btrfs_free_path(path); + return ret; +} + +static noinline int btrfs_ioctl_tree_search(struct file *file, + void __user *argp) +{ + struct btrfs_ioctl_search_args *args; + struct inode *inode; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + args = kmalloc(sizeof(*args), GFP_KERNEL); + if (!args) + return -ENOMEM; + + if (copy_from_user(args, argp, sizeof(*args))) { + kfree(args); + return -EFAULT; + } + inode = fdentry(file)->d_inode; + ret = search_ioctl(inode, args); + if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) + ret = -EFAULT; + kfree(args); + return ret; +} + /* - Search INODE_REFs to identify path name of 'dirid' directory - in a 'tree_id' tree. and sets path name to 'name'. -*/ + * Search INODE_REFs to identify path name of 'dirid' directory + * in a 'tree_id' tree. and sets path name to 'name'. + */ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, u64 tree_id, u64 dirid, char *name) { struct btrfs_root *root; struct btrfs_key key; - char *name_stack, *ptr; + char *ptr; int ret = -1; int slot; int len; @@ -771,13 +961,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, if (!path) return -ENOMEM; - name_stack = kzalloc(BTRFS_PATH_NAME_MAX+1, GFP_NOFS); - if (!name_stack) { - btrfs_free_path(path); - return -ENOMEM; - } - - ptr = &name_stack[BTRFS_PATH_NAME_MAX]; + ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX]; key.objectid = tree_id; key.type = BTRFS_ROOT_ITEM_KEY; @@ -802,14 +986,16 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, btrfs_item_key_to_cpu(l, &key, slot); if (ret > 0 && (key.objectid != dirid || - key.type != BTRFS_INODE_REF_KEY)) + key.type != BTRFS_INODE_REF_KEY)) { + ret = -ENOENT; goto out; + } iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref); len = btrfs_inode_ref_name_len(l, iref); ptr -= len + 1; total_len += len + 1; - if (ptr < name_stack) + if (ptr < name) goto out; *(ptr + len) = '/'; @@ -824,14 +1010,41 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, dirid = key.objectid; } - if (ptr < name_stack) + if (ptr < name) goto out; - strncpy(name, ptr, total_len); + memcpy(name, ptr, total_len); name[total_len]='\0'; ret = 0; out: btrfs_free_path(path); - kfree(name_stack); + return ret; +} + +static noinline int btrfs_ioctl_ino_lookup(struct file *file, + void __user *argp) +{ + struct btrfs_ioctl_ino_lookup_args *args; + struct inode *inode; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + args = kmalloc(sizeof(*args), GFP_KERNEL); + if (copy_from_user(args, argp, sizeof(*args))) { + kfree(args); + return -EFAULT; + } + inode = fdentry(file)->d_inode; + + ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info, + args->treeid, args->objectid, + args->name); + + if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) + ret = -EFAULT; + + kfree(args); return ret; } @@ -1430,6 +1643,10 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_trans_start(file); case BTRFS_IOC_TRANS_END: return btrfs_ioctl_trans_end(file); + case BTRFS_IOC_TREE_SEARCH: + return btrfs_ioctl_tree_search(file, argp); + case BTRFS_IOC_INO_LOOKUP: + return btrfs_ioctl_ino_lookup(file, argp); case BTRFS_IOC_SYNC: btrfs_sync_fs(file->f_dentry->d_sb, 1); return 0; -- cgit v1.2.2 From 6ef5ed0d386be5c43ec66d6f2999919c0893558b Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 11 Dec 2009 21:11:29 +0000 Subject: Btrfs: add ioctl and incompat flag to set the default mount subvol This patch needs to go along with my previous patch. This lets us set the default dir item's location to whatever root we want to use as our default mounting subvol. With this we don't have to use mount -o subvol= anymore to mount a different subvol, we can just set the new one and it will just magically work. I've done some moderate testing with this, mostly just switching the default mount around, mounting subvols and the default mount at the same time and such, everything seems to work. Thanks, Older kernels would generally be able to still mount the filesystem with the default subvolume set, but it would result in a different volume being mounted, which could be an even more unpleasant suprise for users. So if you set your default subvolume, you can't go back to older kernels. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c6044733198d..7875a75315d0 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1579,6 +1579,79 @@ out: return ret; } +static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) +{ + struct inode *inode = fdentry(file)->d_inode; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_root *new_root; + struct btrfs_dir_item *di; + struct btrfs_trans_handle *trans; + struct btrfs_path *path; + struct btrfs_key location; + struct btrfs_disk_key disk_key; + struct btrfs_super_block *disk_super; + u64 features; + u64 objectid = 0; + u64 dir_id; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&objectid, argp, sizeof(objectid))) + return -EFAULT; + + if (!objectid) + objectid = root->root_key.objectid; + + location.objectid = objectid; + location.type = BTRFS_ROOT_ITEM_KEY; + location.offset = (u64)-1; + + new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); + if (IS_ERR(new_root)) + return PTR_ERR(new_root); + + if (btrfs_root_refs(&new_root->root_item) == 0) + return -ENOENT; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + path->leave_spinning = 1; + + trans = btrfs_start_transaction(root, 1); + if (!trans) { + btrfs_free_path(path); + return -ENOMEM; + } + + dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); + di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path, + dir_id, "default", 7, 1); + if (!di) { + btrfs_free_path(path); + btrfs_end_transaction(trans, root); + printk(KERN_ERR "Umm, you don't have the default dir item, " + "this isn't going to work\n"); + return -ENOENT; + } + + btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key); + btrfs_set_dir_item_key(path->nodes[0], di, &disk_key); + btrfs_mark_buffer_dirty(path->nodes[0]); + btrfs_free_path(path); + + disk_super = &root->fs_info->super_copy; + features = btrfs_super_incompat_flags(disk_super); + if (!(features & BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)) { + features |= BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL; + btrfs_set_super_incompat_flags(disk_super, features); + } + btrfs_end_transaction(trans, root); + + return 0; +} + /* * there are many ways the trans_start and trans_end ioctls can lead * to deadlocks. They should only be used by applications that @@ -1625,6 +1698,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_snap_create(file, argp, 1); case BTRFS_IOC_SNAP_DESTROY: return btrfs_ioctl_snap_destroy(file, argp); + case BTRFS_IOC_DEFAULT_SUBVOL: + return btrfs_ioctl_default_subvol(file, argp); case BTRFS_IOC_DEFRAG: return btrfs_ioctl_defrag(file); case BTRFS_IOC_RESIZE: -- cgit v1.2.2 From 940100a4a7b78b27e60a3e72340fb9b5397dcdb2 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 10 Mar 2010 10:52:59 -0500 Subject: Btrfs: be more selective in the defrag ioctl The btrfs defrag ioctl had some bugs around delalloc accounting, and it wasn't properly skipping pages that were not in the mapping. It wasn't properly clearing the page checked flag, which could make the writeback code ignore the page forever while pinning it as dirty. This commit fixes those problems and makes defrag a little smarter. It skips holes and it doesn't waste time defragging large extents. If a tiny extent comes before a very large extent, it will defrag both of them to make sure the tiny extent ends up next to something big. Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 150 +++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 140 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 7875a75315d0..3a89cd77f307 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -475,6 +475,73 @@ out_unlock: return error; } +static int should_defrag_range(struct inode *inode, u64 start, u64 len, + u64 *last_len, u64 *skip, u64 *defrag_end) +{ + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; + struct extent_map *em = NULL; + struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; + int ret = 1; + + /* + * make sure that once we start defragging and extent, we keep on + * defragging it + */ + if (start < *defrag_end) + return 1; + + *skip = 0; + + /* + * hopefully we have this extent in the tree already, try without + * the full extent lock + */ + read_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, start, len); + read_unlock(&em_tree->lock); + + if (!em) { + /* get the big lock and read metadata off disk */ + lock_extent(io_tree, start, start + len - 1, GFP_NOFS); + em = btrfs_get_extent(inode, NULL, 0, start, len, 0); + unlock_extent(io_tree, start, start + len - 1, GFP_NOFS); + + if (!em) + return 0; + } + + /* this will cover holes, and inline extents */ + if (em->block_start >= EXTENT_MAP_LAST_BYTE) + ret = 0; + + /* + * we hit a real extent, if it is big don't bother defragging it again + */ + if ((*last_len == 0 || *last_len >= 256 * 1024) && + em->len >= 256 * 1024) + ret = 0; + + /* + * last_len ends up being a counter of how many bytes we've defragged. + * every time we choose not to defrag an extent, we reset *last_len + * so that the next tiny extent will force a defrag. + * + * The end result of this is that tiny extents before a single big + * extent will force at least part of that big extent to be defragged. + */ + if (ret) { + *last_len += len; + *defrag_end = extent_map_end(em); + } else { + *last_len = 0; + *skip = extent_map_end(em); + *defrag_end = 0; + } + + free_extent_map(em); + return ret; +} + static int btrfs_defrag_file(struct file *file) { struct inode *inode = fdentry(file)->d_inode; @@ -487,37 +554,86 @@ static int btrfs_defrag_file(struct file *file) unsigned long total_read = 0; u64 page_start; u64 page_end; + u64 last_len = 0; + u64 skip = 0; + u64 defrag_end = 0; unsigned long i; int ret; - ret = btrfs_check_data_free_space(root, inode, inode->i_size); - if (ret) - return -ENOSPC; + if (inode->i_size == 0) + return 0; + + last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; + i = 0; + while (i <= last_index) { + if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, + PAGE_CACHE_SIZE, &last_len, &skip, + &defrag_end)) { + unsigned long next; + /* + * the should_defrag function tells us how much to skip + * bump our counter by the suggested amount + */ + next = (skip + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + i = max(i + 1, next); + continue; + } - mutex_lock(&inode->i_mutex); - last_index = inode->i_size >> PAGE_CACHE_SHIFT; - for (i = 0; i <= last_index; i++) { if (total_read % ra_pages == 0) { btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i, min(last_index, i + ra_pages - 1)); } total_read++; + mutex_lock(&inode->i_mutex); + + ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); + if (ret) { + ret = -ENOSPC; + break; + } + + ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); + if (ret) { + btrfs_free_reserved_data_space(root, inode, + PAGE_CACHE_SIZE); + ret = -ENOSPC; + break; + } again: + if (inode->i_size == 0 || + i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) { + ret = 0; + goto err_reservations; + } + page = grab_cache_page(inode->i_mapping, i); if (!page) - goto out_unlock; + goto err_reservations; + if (!PageUptodate(page)) { btrfs_readpage(NULL, page); lock_page(page); if (!PageUptodate(page)) { unlock_page(page); page_cache_release(page); - goto out_unlock; + goto err_reservations; } } + if (page->mapping != inode->i_mapping) { + unlock_page(page); + page_cache_release(page); + goto again; + } + wait_on_page_writeback(page); + if (PageDirty(page)) { + btrfs_free_reserved_data_space(root, inode, + PAGE_CACHE_SIZE); + goto loop_unlock; + } + page_start = (u64)page->index << PAGE_CACHE_SHIFT; page_end = page_start + PAGE_CACHE_SIZE - 1; lock_extent(io_tree, page_start, page_end, GFP_NOFS); @@ -538,18 +654,32 @@ again: * page if it is dirtied again later */ clear_page_dirty_for_io(page); + clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, + page_end, EXTENT_DIRTY | EXTENT_DELALLOC | + EXTENT_DO_ACCOUNTING, GFP_NOFS); btrfs_set_extent_delalloc(inode, page_start, page_end); + ClearPageChecked(page); set_page_dirty(page); unlock_extent(io_tree, page_start, page_end, GFP_NOFS); + +loop_unlock: unlock_page(page); page_cache_release(page); + mutex_unlock(&inode->i_mutex); + + btrfs_unreserve_metadata_for_delalloc(root, inode, 1); balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); + i++; } -out_unlock: - mutex_unlock(&inode->i_mutex); return 0; + +err_reservations: + mutex_unlock(&inode->i_mutex); + btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); + btrfs_unreserve_metadata_for_delalloc(root, inode, 1); + return ret; } static noinline int btrfs_ioctl_resize(struct btrfs_root *root, -- cgit v1.2.2 From 1e701a3292e25a6c4939cad9f24951dc6b6ad853 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 11 Mar 2010 09:42:04 -0500 Subject: Btrfs: add new defrag-range ioctl. The btrfs defrag ioctl was limited to doing the entire file. This commit adds a new interface that can defrag a specific range inside the file. It can also force compression on the file, allowing you to selectively compress individual files after they were created, even when mount -o compress isn't turned on. Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 73 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 3a89cd77f307..d866b460c26e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -476,13 +476,18 @@ out_unlock: } static int should_defrag_range(struct inode *inode, u64 start, u64 len, - u64 *last_len, u64 *skip, u64 *defrag_end) + int thresh, u64 *last_len, u64 *skip, + u64 *defrag_end) { struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_map *em = NULL; struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; int ret = 1; + + if (thresh == 0) + thresh = 256 * 1024; + /* * make sure that once we start defragging and extent, we keep on * defragging it @@ -517,8 +522,7 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, /* * we hit a real extent, if it is big don't bother defragging it again */ - if ((*last_len == 0 || *last_len >= 256 * 1024) && - em->len >= 256 * 1024) + if ((*last_len == 0 || *last_len >= thresh) && em->len >= thresh) ret = 0; /* @@ -542,7 +546,8 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, return ret; } -static int btrfs_defrag_file(struct file *file) +static int btrfs_defrag_file(struct file *file, + struct btrfs_ioctl_defrag_range_args *range) { struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; @@ -563,11 +568,19 @@ static int btrfs_defrag_file(struct file *file) if (inode->i_size == 0) return 0; - last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; - i = 0; + if (range->start + range->len > range->start) { + last_index = min_t(u64, inode->i_size - 1, + range->start + range->len - 1) >> PAGE_CACHE_SHIFT; + } else { + last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT; + } + + i = range->start >> PAGE_CACHE_SHIFT; while (i <= last_index) { if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, - PAGE_CACHE_SIZE, &last_len, &skip, + PAGE_CACHE_SIZE, + range->extent_thresh, + &last_len, &skip, &defrag_end)) { unsigned long next; /* @@ -585,6 +598,8 @@ static int btrfs_defrag_file(struct file *file) } total_read++; mutex_lock(&inode->i_mutex); + if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) + BTRFS_I(inode)->force_compress = 1; ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); if (ret) { @@ -673,6 +688,28 @@ loop_unlock: i++; } + if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) + filemap_flush(inode->i_mapping); + + if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { + /* the filemap_flush will queue IO into the worker threads, but + * we have to make sure the IO is actually started and that + * ordered extents get created before we return + */ + atomic_inc(&root->fs_info->async_submit_draining); + while (atomic_read(&root->fs_info->nr_async_submits) || + atomic_read(&root->fs_info->async_delalloc_pages)) { + wait_event(root->fs_info->async_submit_wait, + (atomic_read(&root->fs_info->nr_async_submits) == 0 && + atomic_read(&root->fs_info->async_delalloc_pages) == 0)); + } + atomic_dec(&root->fs_info->async_submit_draining); + + mutex_lock(&inode->i_mutex); + BTRFS_I(inode)->force_compress = 0; + mutex_unlock(&inode->i_mutex); + } + return 0; err_reservations: @@ -1284,10 +1321,11 @@ out: return err; } -static int btrfs_ioctl_defrag(struct file *file) +static int btrfs_ioctl_defrag(struct file *file, void __user *argp) { struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_ioctl_defrag_range_args *range; int ret; ret = mnt_want_write(file->f_path.mnt); @@ -1308,7 +1346,30 @@ static int btrfs_ioctl_defrag(struct file *file) ret = -EINVAL; goto out; } - btrfs_defrag_file(file); + + range = kzalloc(sizeof(*range), GFP_KERNEL); + if (!range) { + ret = -ENOMEM; + goto out; + } + + if (argp) { + if (copy_from_user(range, argp, + sizeof(*range))) { + ret = -EFAULT; + kfree(range); + } + /* compression requires us to start the IO */ + if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { + range->flags |= BTRFS_DEFRAG_RANGE_START_IO; + range->extent_thresh = (u32)-1; + } + } else { + /* the rest are all set to zero by kzalloc */ + range->len = (u64)-1; + } + btrfs_defrag_file(file, range); + kfree(range); break; } out: @@ -1831,7 +1892,9 @@ long btrfs_ioctl(struct file *file, unsigned int case BTRFS_IOC_DEFAULT_SUBVOL: return btrfs_ioctl_default_subvol(file, argp); case BTRFS_IOC_DEFRAG: - return btrfs_ioctl_defrag(file); + return btrfs_ioctl_defrag(file, NULL); + case BTRFS_IOC_DEFRAG_RANGE: + return btrfs_ioctl_defrag(file, argp); case BTRFS_IOC_RESIZE: return btrfs_ioctl_resize(root, argp); case BTRFS_IOC_ADD_DEV: -- cgit v1.2.2 From 2ac55d41b5d6bf49e76bc85db5431240617e2f8f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 3 Feb 2010 19:33:23 +0000 Subject: Btrfs: cache the extent state everywhere we possibly can V2 This patch just goes through and fixes everybody that does lock_extent() blah unlock_extent() to use lock_extent_bits() blah unlock_extent_cached() and pass around a extent_state so we only have to do the searches once per function. This gives me about a 3 mb/s boots on my random write test. I have not converted some things, like the relocation and ioctl's, since they aren't heavily used and the relocation stuff is in the middle of being re-written. I also changed the clear_extent_bit() to only unset the cached state if we are clearing EXTENT_LOCKED and related stuff, so we can do things like this lock_extent_bits() clear delalloc bits unlock_extent_cached() without losing our cached state. I tested this thoroughly and turned on LEAK_DEBUG to make sure we weren't leaking extent states, everything worked out fine. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index d866b460c26e..9aaba6e472d3 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -673,7 +673,7 @@ again: page_end, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, GFP_NOFS); - btrfs_set_extent_delalloc(inode, page_start, page_end); + btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); ClearPageChecked(page); set_page_dirty(page); unlock_extent(io_tree, page_start, page_end, GFP_NOFS); -- cgit v1.2.2 From 1406e4327be3a533a2b18582f715ce2cfbcf6804 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 13 Jan 2010 18:19:06 +0000 Subject: Btrfs: add a "df" ioctl for btrfs df is a very loaded question in btrfs. This gives us a way to get the per-space usage information so we can tell exactly what is in use where. This will help us figure out ENOSPC problems, and help users better understand where their disk space is going. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 9aaba6e472d3..9213d39d36cc 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1843,6 +1843,49 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) return 0; } +long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) +{ + struct btrfs_ioctl_space_args space_args; + struct btrfs_ioctl_space_info space; + struct btrfs_ioctl_space_info *dest; + struct btrfs_space_info *info; + int ret = 0; + + if (copy_from_user(&space_args, + (struct btrfs_ioctl_space_args __user *)arg, + sizeof(space_args))) + return -EFAULT; + + space_args.total_spaces = 0; + dest = (struct btrfs_ioctl_space_info *) + (arg + sizeof(struct btrfs_ioctl_space_args)); + + rcu_read_lock(); + list_for_each_entry_rcu(info, &root->fs_info->space_info, list) { + if (!space_args.space_slots) { + space_args.total_spaces++; + continue; + } + if (space_args.total_spaces >= space_args.space_slots) + break; + space.flags = info->flags; + space.total_bytes = info->total_bytes; + space.used_bytes = info->bytes_used; + if (copy_to_user(dest, &space, sizeof(space))) { + ret = -EFAULT; + break; + } + dest++; + space_args.total_spaces++; + } + rcu_read_unlock(); + + if (copy_to_user(arg, &space_args, sizeof(space_args))) + ret = -EFAULT; + + return ret; +} + /* * there are many ways the trans_start and trans_end ioctls can lead * to deadlocks. They should only be used by applications that @@ -1915,6 +1958,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_tree_search(file, argp); case BTRFS_IOC_INO_LOOKUP: return btrfs_ioctl_ino_lookup(file, argp); + case BTRFS_IOC_SPACE_INFO: + return btrfs_ioctl_space_info(root, argp); case BTRFS_IOC_SYNC: btrfs_sync_fs(file->f_dentry->d_sb, 1); return 0; -- cgit v1.2.2 From 91748467a5c5884e44ad5cf58630c0c28474f1f6 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 28 Feb 2010 10:59:11 +0000 Subject: btrfs: use memparse Use memparse() instead of its own private implementation. Signed-off-by: Akinobu Mita Cc: Chris Mason Cc: linux-btrfs@vger.kernel.org Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 9213d39d36cc..363e209679b6 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -776,7 +776,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, mod = 1; sizestr++; } - new_size = btrfs_parse_size(sizestr); + new_size = memparse(sizestr, NULL); if (new_size == 0) { ret = -EINVAL; goto out_unlock; -- cgit v1.2.2 From 854d2c3531e6d32e76b94ca5e096ea54c7497e40 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 16 Mar 2010 00:02:25 +0000 Subject: Btrfs: fix search_ioctl key advance key->type is u8, not u64. fs/btrfs/ioctl.c: In function 'copy_to_sk': fs/btrfs/ioctl.c:1024: warning: comparison is always true due to limited range of data type Signed-off-by: Sage Weil Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 363e209679b6..38a68863390a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1000,7 +1000,7 @@ static noinline int copy_to_sk(struct btrfs_root *root, advance_key: if (key->offset < (u64)-1) key->offset++; - else if (key->type < (u64)-1) + else if (key->type < (u8)-1) key->type++; else if (key->objectid < (u64)-1) key->objectid++; -- cgit v1.2.2 From 7fde62bffb576d384ea49a3aed3403d5609ee5bc Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 16 Mar 2010 15:40:10 -0400 Subject: Btrfs: buffer results in the space_info ioctl The space_info ioctl was using copy_to_user inside rcu_read_lock. This commit changes things to copy into a buffer first and then dump the result down to userland. Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 57 +++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 46 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 38a68863390a..4329610b141b 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1848,39 +1848,74 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) struct btrfs_ioctl_space_args space_args; struct btrfs_ioctl_space_info space; struct btrfs_ioctl_space_info *dest; + struct btrfs_ioctl_space_info *dest_orig; + struct btrfs_ioctl_space_info *user_dest; struct btrfs_space_info *info; + int alloc_size; int ret = 0; + int slot_count = 0; if (copy_from_user(&space_args, (struct btrfs_ioctl_space_args __user *)arg, sizeof(space_args))) return -EFAULT; + /* first we count slots */ + rcu_read_lock(); + list_for_each_entry_rcu(info, &root->fs_info->space_info, list) + slot_count++; + rcu_read_unlock(); + + /* space_slots == 0 means they are asking for a count */ + if (space_args.space_slots == 0) { + space_args.total_spaces = slot_count; + goto out; + } + alloc_size = sizeof(*dest) * slot_count; + /* we generally have at most 6 or so space infos, one for each raid + * level. So, a whole page should be more than enough for everyone + */ + if (alloc_size > PAGE_CACHE_SIZE) + return -ENOMEM; + space_args.total_spaces = 0; - dest = (struct btrfs_ioctl_space_info *) - (arg + sizeof(struct btrfs_ioctl_space_args)); + dest = kmalloc(alloc_size, GFP_NOFS); + if (!dest) + return -ENOMEM; + dest_orig = dest; + /* now we have a buffer to copy into */ rcu_read_lock(); list_for_each_entry_rcu(info, &root->fs_info->space_info, list) { - if (!space_args.space_slots) { - space_args.total_spaces++; - continue; - } + /* make sure we don't copy more than we allocated + * in our buffer + */ + if (slot_count == 0) + break; + slot_count--; + + /* make sure userland has enough room in their buffer */ if (space_args.total_spaces >= space_args.space_slots) break; + space.flags = info->flags; space.total_bytes = info->total_bytes; space.used_bytes = info->bytes_used; - if (copy_to_user(dest, &space, sizeof(space))) { - ret = -EFAULT; - break; - } + memcpy(dest, &space, sizeof(space)); dest++; space_args.total_spaces++; } rcu_read_unlock(); - if (copy_to_user(arg, &space_args, sizeof(space_args))) + user_dest = (struct btrfs_ioctl_space_info *) + (arg + sizeof(struct btrfs_ioctl_space_args)); + + if (copy_to_user(user_dest, dest_orig, alloc_size)) + ret = -EFAULT; + + kfree(dest_orig); +out: + if (ret == 0 && copy_to_user(arg, &space_args, sizeof(space_args))) ret = -EFAULT; return ret; -- cgit v1.2.2 From abc6e1341bda974e2d0eddb75f57a20ac18e9b33 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 18 Mar 2010 12:10:08 -0400 Subject: Btrfs: fix key checks and advance in the search ioctl The search ioctl was working well for finding tree roots, but using it for generic searches requires a few changes to how the keys are advanced. This treats the search control min fields for objectid, type and offset more like a key, where we drop the offset to zero once we bump the type, etc. The downside of this is that we are changing the min_type and min_offset fields during the search, and so the ioctl caller needs extra checks to make sure the keys in the result are the ones it wanted. This also changes key_in_sk to use btrfs_comp_cpu_keys, just to make things more readable. Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 4329610b141b..291aa51ff420 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -914,17 +914,23 @@ out: static noinline int key_in_sk(struct btrfs_key *key, struct btrfs_ioctl_search_key *sk) { - if (key->objectid < sk->min_objectid) - return 0; - if (key->offset < sk->min_offset) - return 0; - if (key->type < sk->min_type) - return 0; - if (key->objectid > sk->max_objectid) - return 0; - if (key->type > sk->max_type) + struct btrfs_key test; + int ret; + + test.objectid = sk->min_objectid; + test.type = sk->min_type; + test.offset = sk->min_offset; + + ret = btrfs_comp_cpu_keys(key, &test); + if (ret < 0) return 0; - if (key->offset > sk->max_offset) + + test.objectid = sk->max_objectid; + test.type = sk->max_type; + test.offset = sk->max_offset; + + ret = btrfs_comp_cpu_keys(key, &test); + if (ret > 0) return 0; return 1; } @@ -998,13 +1004,18 @@ static noinline int copy_to_sk(struct btrfs_root *root, break; } advance_key: - if (key->offset < (u64)-1) + ret = 0; + if (key->offset < (u64)-1 && key->offset < sk->max_offset) key->offset++; - else if (key->type < (u8)-1) + else if (key->type < (u8)-1 && key->type < sk->max_type) { + key->offset = 0; key->type++; - else if (key->objectid < (u64)-1) + } else if (key->objectid < (u64)-1 && key->objectid < sk->max_objectid) { + key->offset = 0; + key->type = 0; key->objectid++; - ret = 0; + } else + ret = 1; overflow: *num_found += found; return ret; -- cgit v1.2.2 From 90fdde147fd32d18a20be5b498d5f26e56cca8a3 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 18 Mar 2010 12:14:54 -0400 Subject: Btrfs: return keys for large items to the search ioctl The search ioctl was skipping large items entirely (ones that are too big for the results buffer). This changes things to at least copy the item header so that we can send information about the item back to userland. Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 291aa51ff420..fd757f576956 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -997,8 +997,8 @@ static noinline int copy_to_sk(struct btrfs_root *root, read_extent_buffer(leaf, p, item_off, item_len); *sk_offset += item_len; - found++; } + found++; if (*num_found >= sk->nr_items) break; -- cgit v1.2.2 From 1b53ac4d1b75b23bdc2b54ace787b8f718a987ef Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 18 Mar 2010 12:17:05 -0400 Subject: Btrfs: allow treeid==0 in the inode lookup ioctl When a root id of 0 is sent to the inode lookup ioctl, it will use the root of the file we're ioctling and pass the root id back to userland along with the results. This allows userland to do searches based on that root later on. Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index fd757f576956..1e462de6556e 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1215,6 +1215,9 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file, } inode = fdentry(file)->d_inode; + if (args->treeid == 0) + args->treeid = BTRFS_I(inode)->root->root_key.objectid; + ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info, args->treeid, args->objectid, args->name); -- cgit v1.2.2 From 8ad6fcab564c5bc956bdc3dfa440ab152b6e780f Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 18 Mar 2010 12:23:10 -0400 Subject: Btrfs: fix the inode ref searches done by btrfs_search_path_in_tree This is used by the inode lookup ioctl to follow all the backrefs up to the subvol root. But the search being done would sometimes land one past the last item in the leaf instead of finding the backref. This changes the search to look for the highest possible backref and hop back one item. It also fixes a leaked path on failure to find the root. Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 1e462de6556e..2845c6ceecd2 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1147,12 +1147,13 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, root = btrfs_read_fs_root_no_name(info, &key); if (IS_ERR(root)) { printk(KERN_ERR "could not find root %llu\n", tree_id); - return -ENOENT; + ret = -ENOENT; + goto out; } key.objectid = dirid; key.type = BTRFS_INODE_REF_KEY; - key.offset = 0; + key.offset = (u64)-1; while(1) { ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); @@ -1161,6 +1162,8 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, l = path->nodes[0]; slot = path->slots[0]; + if (ret > 0 && slot > 0) + slot--; btrfs_item_key_to_cpu(l, &key, slot); if (ret > 0 && (key.objectid != dirid || @@ -1184,7 +1187,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, btrfs_release_path(root, path); key.objectid = key.offset; - key.offset = 0; + key.offset = (u64)-1; dirid = key.objectid; } -- cgit v1.2.2 From 5a0e3ad6af8660be21ca98a971cd00f331318c05 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 24 Mar 2010 17:04:11 +0900 Subject: include cleanup: Update gfp.h and slab.h includes to prepare for breaking implicit slab.h inclusion from percpu.h percpu.h is included by sched.h and module.h and thus ends up being included when building most .c files. percpu.h includes slab.h which in turn includes gfp.h making everything defined by the two files universally available and complicating inclusion dependencies. percpu.h -> slab.h dependency is about to be removed. Prepare for this change by updating users of gfp and slab facilities include those headers directly instead of assuming availability. As this conversion needs to touch large number of source files, the following script is used as the basis of conversion. http://userweb.kernel.org/~tj/misc/slabh-sweep.py The script does the followings. * Scan files for gfp and slab usages and update includes such that only the necessary includes are there. ie. if only gfp is used, gfp.h, if slab is used, slab.h. * When the script inserts a new include, it looks at the include blocks and try to put the new include such that its order conforms to its surrounding. It's put in the include block which contains core kernel includes, in the same order that the rest are ordered - alphabetical, Christmas tree, rev-Xmas-tree or at the end if there doesn't seem to be any matching order. * If the script can't find a place to put a new include (mostly because the file doesn't have fitting include block), it prints out an error message indicating which .h file needs to be added to the file. The conversion was done in the following steps. 1. The initial automatic conversion of all .c files updated slightly over 4000 files, deleting around 700 includes and adding ~480 gfp.h and ~3000 slab.h inclusions. The script emitted errors for ~400 files. 2. Each error was manually checked. Some didn't need the inclusion, some needed manual addition while adding it to implementation .h or embedding .c file was more appropriate for others. This step added inclusions to around 150 files. 3. The script was run again and the output was compared to the edits from #2 to make sure no file was left behind. 4. Several build tests were done and a couple of problems were fixed. e.g. lib/decompress_*.c used malloc/free() wrappers around slab APIs requiring slab.h to be added manually. 5. The script was run on all .h files but without automatically editing them as sprinkling gfp.h and slab.h inclusions around .h files could easily lead to inclusion dependency hell. Most gfp.h inclusion directives were ignored as stuff from gfp.h was usually wildly available and often used in preprocessor macros. Each slab.h inclusion directive was examined and added manually as necessary. 6. percpu.h was updated not to include slab.h. 7. Build test were done on the following configurations and failures were fixed. CONFIG_GCOV_KERNEL was turned off for all tests (as my distributed build env didn't work with gcov compiles) and a few more options had to be turned off depending on archs to make things build (like ipr on powerpc/64 which failed due to missing writeq). * x86 and x86_64 UP and SMP allmodconfig and a custom test config. * powerpc and powerpc64 SMP allmodconfig * sparc and sparc64 SMP allmodconfig * ia64 SMP allmodconfig * s390 SMP allmodconfig * alpha SMP allmodconfig * um on x86_64 SMP allmodconfig 8. percpu.h modifications were reverted so that it could be applied as a separate patch and serve as bisection point. Given the fact that I had only a couple of failures from tests on step 6, I'm fairly confident about the coverage of this conversion patch. If there is a breakage, it's likely to be something in one of the arch headers which should be easily discoverable easily on most builds of the specific arch. Signed-off-by: Tejun Heo Guess-its-ok-by: Christoph Lameter Cc: Ingo Molnar Cc: Lee Schermerhorn --- fs/btrfs/ioctl.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 2845c6ceecd2..9b3d73a0fdc8 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "compat.h" #include "ctree.h" #include "disk-io.h" -- cgit v1.2.2 From 2f3014fc2ab1e25c36531e19164c48182c168995 Mon Sep 17 00:00:00 2001 From: Andrea Gelmini Date: Thu, 25 Mar 2010 17:22:45 +0000 Subject: Btrfs: remove duplicate include in ioctl.c fs/btrfs/ioctl.c: ctree.h is included more than once. Signed-off-by: Andrea Gelmini Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 2845c6ceecd2..5c9f8b30608c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -48,7 +48,6 @@ #include "print-tree.h" #include "volumes.h" #include "locking.h" -#include "ctree.h" /* Mask out flags that are inappropriate for the given type of inode. */ static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) -- cgit v1.2.2 From 683be16eb6e19a35aca2473668652259ed074094 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 20 Mar 2010 11:24:48 +0000 Subject: Btrfs: dereferencing freed memory The original code dereferenced range on the next line. Signed-off-by: Dan Carpenter Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 5c9f8b30608c..874d36e5f167 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1374,6 +1374,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) sizeof(*range))) { ret = -EFAULT; kfree(range); + goto out; } /* compression requires us to start the IO */ if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { -- cgit v1.2.2 From c2b96929e2ca6914cf4a66cd8fe2a34c4a98277f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 20 Mar 2010 11:24:15 +0000 Subject: Btrfs: handle kmalloc() failure in inode lookup ioctl Return -ENOMEM if kmalloc() fails. Signed-off-by: Dan Carpenter Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 874d36e5f167..74d89133f768 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1211,6 +1211,9 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file, return -EPERM; args = kmalloc(sizeof(*args), GFP_KERNEL); + if (!args) + return -ENOMEM; + if (copy_from_user(args, argp, sizeof(*args))) { kfree(args); return -EFAULT; -- cgit v1.2.2 From 6cf8bfbf5e88edfb09a2bf0631a067060f534592 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Sat, 20 Mar 2010 11:22:10 +0000 Subject: Btrfs: check btrfs_get_extent return for IS_ERR() btrfs_get_extent() never returns NULL, only a valid pointer or ERR_PTR() Signed-off-by: Dan Carpenter Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 74d89133f768..2b7dd88fc54f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -510,7 +510,7 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, em = btrfs_get_extent(inode, NULL, 0, start, len, 0); unlock_extent(io_tree, start, start + len - 1, GFP_NOFS); - if (!em) + if (IS_ERR(em)) return 0; } -- cgit v1.2.2 From 5dc6416414fb3ec6e2825fd4d20c8bf1d7fe0395 Mon Sep 17 00:00:00 2001 From: Dan Rosenberg Date: Sat, 15 May 2010 11:27:37 -0400 Subject: Btrfs: check for read permission on src file in the clone ioctl The existing code would have allowed you to clone a file that was only open for writing Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/btrfs/ioctl.c') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 2b7dd88fc54f..9de6c3a75bfb 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1480,12 +1480,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, ret = -EBADF; goto out_drop_write; } + src = src_file->f_dentry->d_inode; ret = -EINVAL; if (src == inode) goto out_fput; + /* the src must be open for reading */ + if (!(src_file->f_mode & FMODE_READ)) + goto out_fput; + ret = -EISDIR; if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) goto out_fput; -- cgit v1.2.2