diff options
author | Sage Weil <sage@newdream.net> | 2008-06-10 10:07:39 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:03 -0400 |
commit | 6bf13c0cc833bf5ba013d6aa60379484bf48c4e6 (patch) | |
tree | ae2b54a1a26a89fece49f6b6d6dff8448efab542 | |
parent | eba12c7bfcb4855fc757357e5e5b0b9a474499ba (diff) |
Btrfs: transaction ioctls
These ioctls let a user application hold a transaction open while it
performs a series of operations. A final ioctl does a sync on the fs
(closing the current transaction). This is the main requirement for
Ceph's OSD to be able to keep the data it's storing in a btrfs volume
consistent, and AFAICS it works just fine. The application would do
something like
fd = ::open("some/file", O_RDONLY);
::ioctl(fd, BTRFS_IOC_TRANS_START);
/* do a bunch of stuff */
::ioctl(fd, BTRFS_IOC_TRANS_END);
or just
::close(fd);
And to ensure it commits to disk,
::ioctl(fd, BTRFS_IOC_SYNC);
When a transaction is held open, the trans_handle is attached to the
struct file (via private_data) so that it will get cleaned up if the
process dies unexpectedly. A held transaction is also ended on fsync() to
avoid a deadlock.
A misbehaving application could also deliberately hold a transaction open,
effectively locking up the FS, so it may make sense to restrict something
like this to root or something.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r-- | fs/btrfs/ctree.h | 4 | ||||
-rw-r--r-- | fs/btrfs/file.c | 7 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 63 | ||||
-rw-r--r-- | fs/btrfs/ioctl.h | 9 | ||||
-rw-r--r-- | fs/btrfs/super.c | 2 |
5 files changed, 83 insertions, 2 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index ad4eacca7f59..1dcf4fb5b688 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -1575,6 +1575,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb); | |||
1575 | void btrfs_destroy_inode(struct inode *inode); | 1575 | void btrfs_destroy_inode(struct inode *inode); |
1576 | int btrfs_init_cachep(void); | 1576 | int btrfs_init_cachep(void); |
1577 | void btrfs_destroy_cachep(void); | 1577 | void btrfs_destroy_cachep(void); |
1578 | long btrfs_ioctl_trans_end(struct file *file); | ||
1578 | long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | 1579 | long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); |
1579 | struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, | 1580 | struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid, |
1580 | struct btrfs_root *root); | 1581 | struct btrfs_root *root); |
@@ -1595,6 +1596,8 @@ extern struct file_operations btrfs_file_operations; | |||
1595 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, | 1596 | int btrfs_drop_extents(struct btrfs_trans_handle *trans, |
1596 | struct btrfs_root *root, struct inode *inode, | 1597 | struct btrfs_root *root, struct inode *inode, |
1597 | u64 start, u64 end, u64 inline_limit, u64 *hint_block); | 1598 | u64 start, u64 end, u64 inline_limit, u64 *hint_block); |
1599 | int btrfs_release_file(struct inode *inode, struct file *file); | ||
1600 | |||
1598 | /* tree-defrag.c */ | 1601 | /* tree-defrag.c */ |
1599 | int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | 1602 | int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, |
1600 | struct btrfs_root *root, int cache_only); | 1603 | struct btrfs_root *root, int cache_only); |
@@ -1615,4 +1618,5 @@ int btrfs_delete_xattrs(struct btrfs_trans_handle *trans, | |||
1615 | u64 btrfs_parse_size(char *str); | 1618 | u64 btrfs_parse_size(char *str); |
1616 | int btrfs_parse_options(char *options, struct btrfs_root *root, | 1619 | int btrfs_parse_options(char *options, struct btrfs_root *root, |
1617 | char **subvol_name); | 1620 | char **subvol_name); |
1621 | int btrfs_sync_fs(struct super_block *sb, int wait); | ||
1618 | #endif | 1622 | #endif |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index c4fa4664a456..73c6d085bd90 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -978,9 +978,11 @@ out_nolock: | |||
978 | return num_written ? num_written : err; | 978 | return num_written ? num_written : err; |
979 | } | 979 | } |
980 | 980 | ||
981 | static int btrfs_release_file (struct inode * inode, struct file * filp) | 981 | int btrfs_release_file(struct inode * inode, struct file * filp) |
982 | { | 982 | { |
983 | btrfs_del_ordered_inode(inode); | 983 | btrfs_del_ordered_inode(inode); |
984 | if (filp->private_data) | ||
985 | btrfs_ioctl_trans_end(filp); | ||
984 | return 0; | 986 | return 0; |
985 | } | 987 | } |
986 | 988 | ||
@@ -1011,6 +1013,9 @@ static int btrfs_sync_file(struct file *file, | |||
1011 | /* | 1013 | /* |
1012 | * ok we haven't committed the transaction yet, lets do a commit | 1014 | * ok we haven't committed the transaction yet, lets do a commit |
1013 | */ | 1015 | */ |
1016 | if (file->private_data) | ||
1017 | btrfs_ioctl_trans_end(file); | ||
1018 | |||
1014 | trans = btrfs_start_transaction(root, 1); | 1019 | trans = btrfs_start_transaction(root, 1); |
1015 | if (!trans) { | 1020 | if (!trans) { |
1016 | ret = -ENOMEM; | 1021 | ret = -ENOMEM; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 31aa4ba06fce..0f14697becef 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -3336,6 +3336,61 @@ out_fput: | |||
3336 | return ret; | 3336 | return ret; |
3337 | } | 3337 | } |
3338 | 3338 | ||
3339 | /* | ||
3340 | * there are many ways the trans_start and trans_end ioctls can lead | ||
3341 | * to deadlocks. They should only be used by applications that | ||
3342 | * basically own the machine, and have a very in depth understanding | ||
3343 | * of all the possible deadlocks and enospc problems. | ||
3344 | */ | ||
3345 | long btrfs_ioctl_trans_start(struct file *file) | ||
3346 | { | ||
3347 | struct inode *inode = fdentry(file)->d_inode; | ||
3348 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3349 | struct btrfs_trans_handle *trans; | ||
3350 | int ret = 0; | ||
3351 | |||
3352 | mutex_lock(&root->fs_info->fs_mutex); | ||
3353 | if (file->private_data) { | ||
3354 | ret = -EINPROGRESS; | ||
3355 | goto out; | ||
3356 | } | ||
3357 | trans = btrfs_start_transaction(root, 0); | ||
3358 | if (trans) | ||
3359 | file->private_data = trans; | ||
3360 | else | ||
3361 | ret = -ENOMEM; | ||
3362 | /*printk(KERN_INFO "btrfs_ioctl_trans_start on %p\n", file);*/ | ||
3363 | out: | ||
3364 | mutex_unlock(&root->fs_info->fs_mutex); | ||
3365 | return ret; | ||
3366 | } | ||
3367 | |||
3368 | /* | ||
3369 | * there are many ways the trans_start and trans_end ioctls can lead | ||
3370 | * to deadlocks. They should only be used by applications that | ||
3371 | * basically own the machine, and have a very in depth understanding | ||
3372 | * of all the possible deadlocks and enospc problems. | ||
3373 | */ | ||
3374 | long btrfs_ioctl_trans_end(struct file *file) | ||
3375 | { | ||
3376 | struct inode *inode = fdentry(file)->d_inode; | ||
3377 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3378 | struct btrfs_trans_handle *trans; | ||
3379 | int ret = 0; | ||
3380 | |||
3381 | mutex_lock(&root->fs_info->fs_mutex); | ||
3382 | trans = file->private_data; | ||
3383 | if (!trans) { | ||
3384 | ret = -EINVAL; | ||
3385 | goto out; | ||
3386 | } | ||
3387 | btrfs_end_transaction(trans, root); | ||
3388 | file->private_data = 0; | ||
3389 | out: | ||
3390 | mutex_unlock(&root->fs_info->fs_mutex); | ||
3391 | return ret; | ||
3392 | } | ||
3393 | |||
3339 | long btrfs_ioctl(struct file *file, unsigned int | 3394 | long btrfs_ioctl(struct file *file, unsigned int |
3340 | cmd, unsigned long arg) | 3395 | cmd, unsigned long arg) |
3341 | { | 3396 | { |
@@ -3356,6 +3411,13 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
3356 | return btrfs_balance(root->fs_info->dev_root); | 3411 | return btrfs_balance(root->fs_info->dev_root); |
3357 | case BTRFS_IOC_CLONE: | 3412 | case BTRFS_IOC_CLONE: |
3358 | return btrfs_ioctl_clone(file, arg); | 3413 | return btrfs_ioctl_clone(file, arg); |
3414 | case BTRFS_IOC_TRANS_START: | ||
3415 | return btrfs_ioctl_trans_start(file); | ||
3416 | case BTRFS_IOC_TRANS_END: | ||
3417 | return btrfs_ioctl_trans_end(file); | ||
3418 | case BTRFS_IOC_SYNC: | ||
3419 | btrfs_sync_fs(file->f_dentry->d_sb, 1); | ||
3420 | return 0; | ||
3359 | } | 3421 | } |
3360 | 3422 | ||
3361 | return -ENOTTY; | 3423 | return -ENOTTY; |
@@ -3679,6 +3741,7 @@ static struct file_operations btrfs_dir_file_operations = { | |||
3679 | #ifdef CONFIG_COMPAT | 3741 | #ifdef CONFIG_COMPAT |
3680 | .compat_ioctl = btrfs_ioctl, | 3742 | .compat_ioctl = btrfs_ioctl, |
3681 | #endif | 3743 | #endif |
3744 | .release = btrfs_release_file, | ||
3682 | }; | 3745 | }; |
3683 | 3746 | ||
3684 | static struct extent_io_ops btrfs_extent_io_ops = { | 3747 | static struct extent_io_ops btrfs_extent_io_ops = { |
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index b0e73f51d636..85ed35a775b1 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h | |||
@@ -36,6 +36,14 @@ struct btrfs_ioctl_vol_args { | |||
36 | struct btrfs_ioctl_vol_args) | 36 | struct btrfs_ioctl_vol_args) |
37 | #define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ | 37 | #define BTRFS_IOC_SCAN_DEV _IOW(BTRFS_IOCTL_MAGIC, 4, \ |
38 | struct btrfs_ioctl_vol_args) | 38 | struct btrfs_ioctl_vol_args) |
39 | /* trans start and trans end are dangerous, and only for | ||
40 | * use by applications that know how to avoid the | ||
41 | * resulting deadlocks | ||
42 | */ | ||
43 | #define BTRFS_IOC_TRANS_START _IO(BTRFS_IOCTL_MAGIC, 6) | ||
44 | #define BTRFS_IOC_TRANS_END _IO(BTRFS_IOCTL_MAGIC, 7) | ||
45 | #define BTRFS_IOC_SYNC _IO(BTRFS_IOCTL_MAGIC, 8) | ||
46 | |||
39 | #define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int) | 47 | #define BTRFS_IOC_CLONE _IOW(BTRFS_IOCTL_MAGIC, 9, int) |
40 | #define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \ | 48 | #define BTRFS_IOC_ADD_DEV _IOW(BTRFS_IOCTL_MAGIC, 10, \ |
41 | struct btrfs_ioctl_vol_args) | 49 | struct btrfs_ioctl_vol_args) |
@@ -43,4 +51,5 @@ struct btrfs_ioctl_vol_args { | |||
43 | struct btrfs_ioctl_vol_args) | 51 | struct btrfs_ioctl_vol_args) |
44 | #define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ | 52 | #define BTRFS_IOC_BALANCE _IOW(BTRFS_IOCTL_MAGIC, 12, \ |
45 | struct btrfs_ioctl_vol_args) | 53 | struct btrfs_ioctl_vol_args) |
54 | |||
46 | #endif | 55 | #endif |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 77f44494e229..39bb86945ed0 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -293,7 +293,7 @@ fail_close: | |||
293 | return err; | 293 | return err; |
294 | } | 294 | } |
295 | 295 | ||
296 | static int btrfs_sync_fs(struct super_block *sb, int wait) | 296 | int btrfs_sync_fs(struct super_block *sb, int wait) |
297 | { | 297 | { |
298 | struct btrfs_trans_handle *trans; | 298 | struct btrfs_trans_handle *trans; |
299 | struct btrfs_root *root; | 299 | struct btrfs_root *root; |