aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSage Weil <sage@newdream.net>2008-06-10 10:07:39 -0400
committerChris Mason <chris.mason@oracle.com>2008-09-25 11:04:03 -0400
commit6bf13c0cc833bf5ba013d6aa60379484bf48c4e6 (patch)
treeae2b54a1a26a89fece49f6b6d6dff8448efab542
parenteba12c7bfcb4855fc757357e5e5b0b9a474499ba (diff)
Btrfs: transaction ioctls
These ioctls let a user application hold a transaction open while it performs a series of operations. A final ioctl does a sync on the fs (closing the current transaction). This is the main requirement for Ceph's OSD to be able to keep the data it's storing in a btrfs volume consistent, and AFAICS it works just fine. The application would do something like fd = ::open("some/file", O_RDONLY); ::ioctl(fd, BTRFS_IOC_TRANS_START); /* do a bunch of stuff */ ::ioctl(fd, BTRFS_IOC_TRANS_END); or just ::close(fd); And to ensure it commits to disk, ::ioctl(fd, BTRFS_IOC_SYNC); When a transaction is held open, the trans_handle is attached to the struct file (via private_data) so that it will get cleaned up if the process dies unexpectedly. A held transaction is also ended on fsync() to avoid a deadlock. A misbehaving application could also deliberately hold a transaction open, effectively locking up the FS, so it may make sense to restrict something like this to root or something. Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/ctree.h4
-rw-r--r--fs/btrfs/file.c7
-rw-r--r--fs/btrfs/inode.c63
-rw-r--r--fs/btrfs/ioctl.h9
-rw-r--r--fs/btrfs/super.c2
5 files changed, 83 insertions, 2 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ad4eacca7f59..1dcf4fb5b688 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1575,6 +1575,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb);
1575void btrfs_destroy_inode(struct inode *inode); 1575void btrfs_destroy_inode(struct inode *inode);
1576int btrfs_init_cachep(void); 1576int btrfs_init_cachep(void);
1577void btrfs_destroy_cachep(void); 1577void btrfs_destroy_cachep(void);
1578long btrfs_ioctl_trans_end(struct file *file);
1578long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 1579long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
1579struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, 1580struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
1580 struct btrfs_root *root); 1581 struct btrfs_root *root);
@@ -1595,6 +1596,8 @@ extern struct file_operations btrfs_file_operations;
1595int btrfs_drop_extents(struct btrfs_trans_handle *trans, 1596int btrfs_drop_extents(struct btrfs_trans_handle *trans,
1596 struct btrfs_root *root, struct inode *inode, 1597 struct btrfs_root *root, struct inode *inode,
1597 u64 start, u64 end, u64 inline_limit, u64 *hint_block); 1598 u64 start, u64 end, u64 inline_limit, u64 *hint_block);
1599int btrfs_release_file(struct inode *inode, struct file *file);
1600
1598/* tree-defrag.c */ 1601/* tree-defrag.c */
1599int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, 1602int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
1600 struct btrfs_root *root, int cache_only); 1603 struct btrfs_root *root, int cache_only);
@@ -1615,4 +1618,5 @@ int btrfs_delete_xattrs(struct btrfs_trans_handle *trans,
1615u64 btrfs_parse_size(char *str); 1618u64 btrfs_parse_size(char *str);
1616int btrfs_parse_options(char *options, struct btrfs_root *root, 1619int btrfs_parse_options(char *options, struct btrfs_root *root,
1617 char **subvol_name); 1620 char **subvol_name);
1621int btrfs_sync_fs(struct super_block *sb, int wait);
1618#endif 1622#endif
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c4fa4664a456..73c6d085bd90 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -978,9 +978,11 @@ out_nolock:
978 return num_written ? num_written : err; 978 return num_written ? num_written : err;
979} 979}
980 980
981static int btrfs_release_file (struct inode * inode, struct file * filp) 981int btrfs_release_file(struct inode * inode, struct file * filp)
982{ 982{
983 btrfs_del_ordered_inode(inode); 983 btrfs_del_ordered_inode(inode);
984 if (filp->private_data)
985 btrfs_ioctl_trans_end(filp);
984 return 0; 986 return 0;
985} 987}
986 988
@@ -1011,6 +1013,9 @@ static int btrfs_sync_file(struct file *file,
1011 /* 1013 /*
1012 * ok we haven't committed the transaction yet, lets do a commit 1014 * ok we haven't committed the transaction yet, lets do a commit
1013 */ 1015 */
1016 if (file->private_data)
1017 btrfs_ioctl_trans_end(file);
1018
1014 trans = btrfs_start_transaction(root, 1); 1019 trans = btrfs_start_transaction(root, 1);
1015 if (!trans) { 1020 if (!trans) {
1016 ret = -ENOMEM; 1021 ret = -ENOMEM;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 31aa4ba06fce..0f14697becef 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3336,6 +3336,61 @@ out_fput:
3336 return ret; 3336 return ret;
3337} 3337}
3338 3338
3339/*
3340 * there are many ways the trans_start and trans_end ioctls can lead
3341 * to deadlocks. They should only be used by applications that
3342 * basically own the machine, and have a very in depth understanding
3343 * of all the possible deadlocks and enospc problems.
3344 */
3345long btrfs_ioctl_trans_start(struct file *file)
3346{
3347 struct inode *inode = fdentry(file)->d_inode;
3348 struct btrfs_root *root = BTRFS_I(inode)->root;
3349 struct btrfs_trans_handle *trans;
3350 int ret = 0;
3351
3352 mutex_lock(&root->fs_info->fs_mutex);
3353 if (file->private_data) {
3354 ret = -EINPROGRESS;
3355 goto out;
3356 }
3357 trans = btrfs_start_transaction(root, 0);
3358 if (trans)
3359 file->private_data = trans;
3360 else
3361 ret = -ENOMEM;
3362 /*printk(KERN_INFO "btrfs_ioctl_trans_start on %p\n", file);*/
3363out:
3364 mutex_unlock(&root->fs_info->fs_mutex);
3365 return ret;
3366}
3367
3368/*
3369 * there are many ways the trans_start and trans_end ioctls can lead
3370 * to deadlocks. They should only be used by applications that
3371 * basically own the machine, and have a very in depth understanding
3372 * of all the possible deadlocks and enospc problems.
3373 */
3374long btrfs_ioctl_trans_end(struct file *file)
3375{
3376 struct inode *inode = fdentry(file)->d_inode;
3377 struct btrfs_root *root = BTRFS_I(inode)->root;
3378 struct btrfs_trans_handle *trans;
3379 int ret = 0;
3380
3381 mutex_lock(&root->fs_info->fs_mutex);
3382 trans = file->private_data;
3383 if (!trans) {
3384 ret = -EINVAL;
3385 goto out;
3386 }
3387 btrfs_end_transaction(trans, root);
3388 file->private_data = 0;
3389out:
3390 mutex_unlock(&root->fs_info->fs_mutex);
3391 return ret;
3392}
3393
3339long btrfs_ioctl(struct file *file, unsigned int 3394long btrfs_ioctl(struct file *file, unsigned int
3340 cmd, unsigned long arg) 3395 cmd, unsigned long arg)
3341{ 3396{
@@ -3356,6 +3411,13 @@ long btrfs_ioctl(struct file *file, unsigned int
3356 return btrfs_balance(root->fs_info->dev_root); 3411 return btrfs_balance(root->fs_info->dev_root);
3357 case BTRFS_IOC_CLONE: 3412 case BTRFS_IOC_CLONE:
3358 return btrfs_ioctl_clone(file, arg); 3413 return btrfs_ioctl_clone(file, arg);
3414 case BTRFS_IOC_TRANS_START:
3415 return btrfs_ioctl_trans_start(file);
3416 case BTRFS_IOC_TRANS_END:
3417 return btrfs_ioctl_trans_end(file);
3418 case BTRFS_IOC_SYNC:
3419 btrfs_sync_fs(file->f_dentry->d_sb, 1);
3420 return 0;
3359 } 3421 }
3360 3422
3361 return -ENOTTY; 3423 return -ENOTTY;
@@ -3679,6 +3741,7 @@ static struct file_operations btrfs_dir_file_operations = {
3679#ifdef CONFIG_COMPAT 3741#ifdef CONFIG_COMPAT
3680 .compat_ioctl = btrfs_ioctl, 3742 .compat_ioctl = btrfs_ioctl,
3681#endif 3743#endif
3744 .release = btrfs_release_file,
3682}; 3745};
3683 3746
3684static struct extent_io_ops btrfs_extent_io_ops = { 3747static struct extent_io_ops btrfs_extent_io_ops = {
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index b0e73f51d636..85ed35a775b1 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -36,6 +36,14 @@ struct btrfs_ioctl_vol_args {
36 struct btrfs_ioctl_vol_args) 36 struct btrfs_ioctl_vol_args)
37#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ 37#define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \
38 struct btrfs_ioctl_vol_args) 38 struct btrfs_ioctl_vol_args)
39/* trans start and trans end are dangerous, and only for
40 * use by applications that know how to avoid the
41 * resulting deadlocks
42 */
43#define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6)
44#define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7)
45#define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8)
46
39#define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int) 47#define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int)
40#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \ 48#define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \
41 struct btrfs_ioctl_vol_args) 49 struct btrfs_ioctl_vol_args)
@@ -43,4 +51,5 @@ struct btrfs_ioctl_vol_args {
43 struct btrfs_ioctl_vol_args) 51 struct btrfs_ioctl_vol_args)
44#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ 52#define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \
45 struct btrfs_ioctl_vol_args) 53 struct btrfs_ioctl_vol_args)
54
46#endif 55#endif
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 77f44494e229..39bb86945ed0 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -293,7 +293,7 @@ fail_close:
293 return err; 293 return err;
294} 294}
295 295
296static int btrfs_sync_fs(struct super_block *sb, int wait) 296int btrfs_sync_fs(struct super_block *sb, int wait)
297{ 297{
298 struct btrfs_trans_handle *trans; 298 struct btrfs_trans_handle *trans;
299 struct btrfs_root *root; 299 struct btrfs_root *root;