diff options
author | Ilya Dryomov <idryomov@gmail.com> | 2013-01-20 08:57:57 -0500 |
---|---|---|
committer | Ilya Dryomov <idryomov@gmail.com> | 2013-01-20 09:21:17 -0500 |
commit | ed0fb78fb6aa294a719f8f5654fdff0ec8bc00bc (patch) | |
tree | 27675574e1f79775a5ab03f84e27b81266be4e21 /fs | |
parent | 3972f2603d8570effaf633cea52b12c7c2773c11 (diff) |
Btrfs: bring back balance pause/resume logic
Balance pause/resume logic got broken by 5ac00add (went in into 3.8-rc1
as part of dev-replace merge). Offending commit took a stab at making
mutually exclusive volume operations (add_dev, rm_dev, resize, balance,
replace_dev) not block behind volume_mutex if another such operation is
in progress and instead return an error right away. Balancing front-end
relied on the blocking behaviour, so the fix is ugly, but short of a
complete rework, it's the best we can do.
Reported-by: Liu Bo <bo.li.liu@oracle.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/ioctl.c | 78 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 10 |
2 files changed, 71 insertions, 17 deletions
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 982c0b9ceea5..77d8273e394c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -3440,8 +3440,8 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) | |||
3440 | struct btrfs_fs_info *fs_info = root->fs_info; | 3440 | struct btrfs_fs_info *fs_info = root->fs_info; |
3441 | struct btrfs_ioctl_balance_args *bargs; | 3441 | struct btrfs_ioctl_balance_args *bargs; |
3442 | struct btrfs_balance_control *bctl; | 3442 | struct btrfs_balance_control *bctl; |
3443 | bool need_unlock; /* for mut. excl. ops lock */ | ||
3443 | int ret; | 3444 | int ret; |
3444 | int need_to_clear_lock = 0; | ||
3445 | 3445 | ||
3446 | if (!capable(CAP_SYS_ADMIN)) | 3446 | if (!capable(CAP_SYS_ADMIN)) |
3447 | return -EPERM; | 3447 | return -EPERM; |
@@ -3450,14 +3450,61 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) | |||
3450 | if (ret) | 3450 | if (ret) |
3451 | return ret; | 3451 | return ret; |
3452 | 3452 | ||
3453 | mutex_lock(&fs_info->volume_mutex); | 3453 | again: |
3454 | if (!atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) { | ||
3455 | mutex_lock(&fs_info->volume_mutex); | ||
3456 | mutex_lock(&fs_info->balance_mutex); | ||
3457 | need_unlock = true; | ||
3458 | goto locked; | ||
3459 | } | ||
3460 | |||
3461 | /* | ||
3462 | * mut. excl. ops lock is locked. Three possibilites: | ||
3463 | * (1) some other op is running | ||
3464 | * (2) balance is running | ||
3465 | * (3) balance is paused -- special case (think resume) | ||
3466 | */ | ||
3454 | mutex_lock(&fs_info->balance_mutex); | 3467 | mutex_lock(&fs_info->balance_mutex); |
3468 | if (fs_info->balance_ctl) { | ||
3469 | /* this is either (2) or (3) */ | ||
3470 | if (!atomic_read(&fs_info->balance_running)) { | ||
3471 | mutex_unlock(&fs_info->balance_mutex); | ||
3472 | if (!mutex_trylock(&fs_info->volume_mutex)) | ||
3473 | goto again; | ||
3474 | mutex_lock(&fs_info->balance_mutex); | ||
3475 | |||
3476 | if (fs_info->balance_ctl && | ||
3477 | !atomic_read(&fs_info->balance_running)) { | ||
3478 | /* this is (3) */ | ||
3479 | need_unlock = false; | ||
3480 | goto locked; | ||
3481 | } | ||
3482 | |||
3483 | mutex_unlock(&fs_info->balance_mutex); | ||
3484 | mutex_unlock(&fs_info->volume_mutex); | ||
3485 | goto again; | ||
3486 | } else { | ||
3487 | /* this is (2) */ | ||
3488 | mutex_unlock(&fs_info->balance_mutex); | ||
3489 | ret = -EINPROGRESS; | ||
3490 | goto out; | ||
3491 | } | ||
3492 | } else { | ||
3493 | /* this is (1) */ | ||
3494 | mutex_unlock(&fs_info->balance_mutex); | ||
3495 | pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); | ||
3496 | ret = -EINVAL; | ||
3497 | goto out; | ||
3498 | } | ||
3499 | |||
3500 | locked: | ||
3501 | BUG_ON(!atomic_read(&fs_info->mutually_exclusive_operation_running)); | ||
3455 | 3502 | ||
3456 | if (arg) { | 3503 | if (arg) { |
3457 | bargs = memdup_user(arg, sizeof(*bargs)); | 3504 | bargs = memdup_user(arg, sizeof(*bargs)); |
3458 | if (IS_ERR(bargs)) { | 3505 | if (IS_ERR(bargs)) { |
3459 | ret = PTR_ERR(bargs); | 3506 | ret = PTR_ERR(bargs); |
3460 | goto out; | 3507 | goto out_unlock; |
3461 | } | 3508 | } |
3462 | 3509 | ||
3463 | if (bargs->flags & BTRFS_BALANCE_RESUME) { | 3510 | if (bargs->flags & BTRFS_BALANCE_RESUME) { |
@@ -3477,13 +3524,10 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) | |||
3477 | bargs = NULL; | 3524 | bargs = NULL; |
3478 | } | 3525 | } |
3479 | 3526 | ||
3480 | if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, | 3527 | if (fs_info->balance_ctl) { |
3481 | 1)) { | ||
3482 | pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); | ||
3483 | ret = -EINPROGRESS; | 3528 | ret = -EINPROGRESS; |
3484 | goto out_bargs; | 3529 | goto out_bargs; |
3485 | } | 3530 | } |
3486 | need_to_clear_lock = 1; | ||
3487 | 3531 | ||
3488 | bctl = kzalloc(sizeof(*bctl), GFP_NOFS); | 3532 | bctl = kzalloc(sizeof(*bctl), GFP_NOFS); |
3489 | if (!bctl) { | 3533 | if (!bctl) { |
@@ -3504,11 +3548,17 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) | |||
3504 | } | 3548 | } |
3505 | 3549 | ||
3506 | do_balance: | 3550 | do_balance: |
3507 | ret = btrfs_balance(bctl, bargs); | ||
3508 | /* | 3551 | /* |
3509 | * bctl is freed in __cancel_balance or in free_fs_info if | 3552 | * Ownership of bctl and mutually_exclusive_operation_running |
3510 | * restriper was paused all the way until unmount | 3553 | * goes to to btrfs_balance. bctl is freed in __cancel_balance, |
3554 | * or, if restriper was paused all the way until unmount, in | ||
3555 | * free_fs_info. mutually_exclusive_operation_running is | ||
3556 | * cleared in __cancel_balance. | ||
3511 | */ | 3557 | */ |
3558 | need_unlock = false; | ||
3559 | |||
3560 | ret = btrfs_balance(bctl, bargs); | ||
3561 | |||
3512 | if (arg) { | 3562 | if (arg) { |
3513 | if (copy_to_user(arg, bargs, sizeof(*bargs))) | 3563 | if (copy_to_user(arg, bargs, sizeof(*bargs))) |
3514 | ret = -EFAULT; | 3564 | ret = -EFAULT; |
@@ -3516,12 +3566,12 @@ do_balance: | |||
3516 | 3566 | ||
3517 | out_bargs: | 3567 | out_bargs: |
3518 | kfree(bargs); | 3568 | kfree(bargs); |
3519 | out: | 3569 | out_unlock: |
3520 | if (need_to_clear_lock) | ||
3521 | atomic_set(&root->fs_info->mutually_exclusive_operation_running, | ||
3522 | 0); | ||
3523 | mutex_unlock(&fs_info->balance_mutex); | 3570 | mutex_unlock(&fs_info->balance_mutex); |
3524 | mutex_unlock(&fs_info->volume_mutex); | 3571 | mutex_unlock(&fs_info->volume_mutex); |
3572 | if (need_unlock) | ||
3573 | atomic_set(&fs_info->mutually_exclusive_operation_running, 0); | ||
3574 | out: | ||
3525 | mnt_drop_write_file(file); | 3575 | mnt_drop_write_file(file); |
3526 | return ret; | 3576 | return ret; |
3527 | } | 3577 | } |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 86279c37de64..9c84dbe64f18 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -2959,6 +2959,8 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info) | |||
2959 | unset_balance_control(fs_info); | 2959 | unset_balance_control(fs_info); |
2960 | ret = del_balance_item(fs_info->tree_root); | 2960 | ret = del_balance_item(fs_info->tree_root); |
2961 | BUG_ON(ret); | 2961 | BUG_ON(ret); |
2962 | |||
2963 | atomic_set(&fs_info->mutually_exclusive_operation_running, 0); | ||
2962 | } | 2964 | } |
2963 | 2965 | ||
2964 | void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, | 2966 | void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, |
@@ -3138,8 +3140,10 @@ int btrfs_balance(struct btrfs_balance_control *bctl, | |||
3138 | out: | 3140 | out: |
3139 | if (bctl->flags & BTRFS_BALANCE_RESUME) | 3141 | if (bctl->flags & BTRFS_BALANCE_RESUME) |
3140 | __cancel_balance(fs_info); | 3142 | __cancel_balance(fs_info); |
3141 | else | 3143 | else { |
3142 | kfree(bctl); | 3144 | kfree(bctl); |
3145 | atomic_set(&fs_info->mutually_exclusive_operation_running, 0); | ||
3146 | } | ||
3143 | return ret; | 3147 | return ret; |
3144 | } | 3148 | } |
3145 | 3149 | ||
@@ -3156,7 +3160,6 @@ static int balance_kthread(void *data) | |||
3156 | ret = btrfs_balance(fs_info->balance_ctl, NULL); | 3160 | ret = btrfs_balance(fs_info->balance_ctl, NULL); |
3157 | } | 3161 | } |
3158 | 3162 | ||
3159 | atomic_set(&fs_info->mutually_exclusive_operation_running, 0); | ||
3160 | mutex_unlock(&fs_info->balance_mutex); | 3163 | mutex_unlock(&fs_info->balance_mutex); |
3161 | mutex_unlock(&fs_info->volume_mutex); | 3164 | mutex_unlock(&fs_info->volume_mutex); |
3162 | 3165 | ||
@@ -3179,7 +3182,6 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info) | |||
3179 | return 0; | 3182 | return 0; |
3180 | } | 3183 | } |
3181 | 3184 | ||
3182 | WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)); | ||
3183 | tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); | 3185 | tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); |
3184 | if (IS_ERR(tsk)) | 3186 | if (IS_ERR(tsk)) |
3185 | return PTR_ERR(tsk); | 3187 | return PTR_ERR(tsk); |
@@ -3233,6 +3235,8 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info) | |||
3233 | btrfs_balance_sys(leaf, item, &disk_bargs); | 3235 | btrfs_balance_sys(leaf, item, &disk_bargs); |
3234 | btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs); | 3236 | btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs); |
3235 | 3237 | ||
3238 | WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)); | ||
3239 | |||
3236 | mutex_lock(&fs_info->volume_mutex); | 3240 | mutex_lock(&fs_info->volume_mutex); |
3237 | mutex_lock(&fs_info->balance_mutex); | 3241 | mutex_lock(&fs_info->balance_mutex); |
3238 | 3242 | ||