aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/dev-replace.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/dev-replace.c')
-rw-r--r--fs/btrfs/dev-replace.c134
1 files changed, 72 insertions, 62 deletions
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index cbb7dbfb3fff..a1d6652e0c47 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -202,13 +202,13 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
202 struct btrfs_dev_replace_item *ptr; 202 struct btrfs_dev_replace_item *ptr;
203 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; 203 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
204 204
205 btrfs_dev_replace_lock(dev_replace); 205 btrfs_dev_replace_lock(dev_replace, 0);
206 if (!dev_replace->is_valid || 206 if (!dev_replace->is_valid ||
207 !dev_replace->item_needs_writeback) { 207 !dev_replace->item_needs_writeback) {
208 btrfs_dev_replace_unlock(dev_replace); 208 btrfs_dev_replace_unlock(dev_replace, 0);
209 return 0; 209 return 0;
210 } 210 }
211 btrfs_dev_replace_unlock(dev_replace); 211 btrfs_dev_replace_unlock(dev_replace, 0);
212 212
213 key.objectid = 0; 213 key.objectid = 0;
214 key.type = BTRFS_DEV_REPLACE_KEY; 214 key.type = BTRFS_DEV_REPLACE_KEY;
@@ -264,7 +264,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
264 ptr = btrfs_item_ptr(eb, path->slots[0], 264 ptr = btrfs_item_ptr(eb, path->slots[0],
265 struct btrfs_dev_replace_item); 265 struct btrfs_dev_replace_item);
266 266
267 btrfs_dev_replace_lock(dev_replace); 267 btrfs_dev_replace_lock(dev_replace, 1);
268 if (dev_replace->srcdev) 268 if (dev_replace->srcdev)
269 btrfs_set_dev_replace_src_devid(eb, ptr, 269 btrfs_set_dev_replace_src_devid(eb, ptr,
270 dev_replace->srcdev->devid); 270 dev_replace->srcdev->devid);
@@ -287,7 +287,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
287 btrfs_set_dev_replace_cursor_right(eb, ptr, 287 btrfs_set_dev_replace_cursor_right(eb, ptr,
288 dev_replace->cursor_right); 288 dev_replace->cursor_right);
289 dev_replace->item_needs_writeback = 0; 289 dev_replace->item_needs_writeback = 0;
290 btrfs_dev_replace_unlock(dev_replace); 290 btrfs_dev_replace_unlock(dev_replace, 1);
291 291
292 btrfs_mark_buffer_dirty(eb); 292 btrfs_mark_buffer_dirty(eb);
293 293
@@ -356,7 +356,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
356 return PTR_ERR(trans); 356 return PTR_ERR(trans);
357 } 357 }
358 358
359 btrfs_dev_replace_lock(dev_replace); 359 btrfs_dev_replace_lock(dev_replace, 1);
360 switch (dev_replace->replace_state) { 360 switch (dev_replace->replace_state) {
361 case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: 361 case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
362 case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: 362 case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
@@ -395,7 +395,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
395 dev_replace->is_valid = 1; 395 dev_replace->is_valid = 1;
396 dev_replace->item_needs_writeback = 1; 396 dev_replace->item_needs_writeback = 1;
397 args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; 397 args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
398 btrfs_dev_replace_unlock(dev_replace); 398 btrfs_dev_replace_unlock(dev_replace, 1);
399 399
400 ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device); 400 ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device);
401 if (ret) 401 if (ret)
@@ -407,7 +407,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
407 trans = btrfs_start_transaction(root, 0); 407 trans = btrfs_start_transaction(root, 0);
408 if (IS_ERR(trans)) { 408 if (IS_ERR(trans)) {
409 ret = PTR_ERR(trans); 409 ret = PTR_ERR(trans);
410 btrfs_dev_replace_lock(dev_replace); 410 btrfs_dev_replace_lock(dev_replace, 1);
411 goto leave; 411 goto leave;
412 } 412 }
413 413
@@ -433,7 +433,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
433leave: 433leave:
434 dev_replace->srcdev = NULL; 434 dev_replace->srcdev = NULL;
435 dev_replace->tgtdev = NULL; 435 dev_replace->tgtdev = NULL;
436 btrfs_dev_replace_unlock(dev_replace); 436 btrfs_dev_replace_unlock(dev_replace, 1);
437 btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); 437 btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
438 return ret; 438 return ret;
439} 439}
@@ -471,18 +471,18 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
471 /* don't allow cancel or unmount to disturb the finishing procedure */ 471 /* don't allow cancel or unmount to disturb the finishing procedure */
472 mutex_lock(&dev_replace->lock_finishing_cancel_unmount); 472 mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
473 473
474 btrfs_dev_replace_lock(dev_replace); 474 btrfs_dev_replace_lock(dev_replace, 0);
475 /* was the operation canceled, or is it finished? */ 475 /* was the operation canceled, or is it finished? */
476 if (dev_replace->replace_state != 476 if (dev_replace->replace_state !=
477 BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED) { 477 BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED) {
478 btrfs_dev_replace_unlock(dev_replace); 478 btrfs_dev_replace_unlock(dev_replace, 0);
479 mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); 479 mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
480 return 0; 480 return 0;
481 } 481 }
482 482
483 tgt_device = dev_replace->tgtdev; 483 tgt_device = dev_replace->tgtdev;
484 src_device = dev_replace->srcdev; 484 src_device = dev_replace->srcdev;
485 btrfs_dev_replace_unlock(dev_replace); 485 btrfs_dev_replace_unlock(dev_replace, 0);
486 486
487 /* 487 /*
488 * flush all outstanding I/O and inode extent mappings before the 488 * flush all outstanding I/O and inode extent mappings before the
@@ -507,7 +507,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
507 /* keep away write_all_supers() during the finishing procedure */ 507 /* keep away write_all_supers() during the finishing procedure */
508 mutex_lock(&root->fs_info->fs_devices->device_list_mutex); 508 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
509 mutex_lock(&root->fs_info->chunk_mutex); 509 mutex_lock(&root->fs_info->chunk_mutex);
510 btrfs_dev_replace_lock(dev_replace); 510 btrfs_dev_replace_lock(dev_replace, 1);
511 dev_replace->replace_state = 511 dev_replace->replace_state =
512 scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED 512 scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED
513 : BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED; 513 : BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED;
@@ -528,7 +528,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
528 rcu_str_deref(src_device->name), 528 rcu_str_deref(src_device->name),
529 src_device->devid, 529 src_device->devid,
530 rcu_str_deref(tgt_device->name), scrub_ret); 530 rcu_str_deref(tgt_device->name), scrub_ret);
531 btrfs_dev_replace_unlock(dev_replace); 531 btrfs_dev_replace_unlock(dev_replace, 1);
532 mutex_unlock(&root->fs_info->chunk_mutex); 532 mutex_unlock(&root->fs_info->chunk_mutex);
533 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); 533 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
534 mutex_unlock(&uuid_mutex); 534 mutex_unlock(&uuid_mutex);
@@ -565,7 +565,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
565 list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); 565 list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
566 fs_info->fs_devices->rw_devices++; 566 fs_info->fs_devices->rw_devices++;
567 567
568 btrfs_dev_replace_unlock(dev_replace); 568 btrfs_dev_replace_unlock(dev_replace, 1);
569 569
570 btrfs_rm_dev_replace_blocked(fs_info); 570 btrfs_rm_dev_replace_blocked(fs_info);
571 571
@@ -649,7 +649,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
649 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; 649 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
650 struct btrfs_device *srcdev; 650 struct btrfs_device *srcdev;
651 651
652 btrfs_dev_replace_lock(dev_replace); 652 btrfs_dev_replace_lock(dev_replace, 0);
653 /* even if !dev_replace_is_valid, the values are good enough for 653 /* even if !dev_replace_is_valid, the values are good enough for
654 * the replace_status ioctl */ 654 * the replace_status ioctl */
655 args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; 655 args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
@@ -675,7 +675,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
675 div_u64(btrfs_device_get_total_bytes(srcdev), 1000)); 675 div_u64(btrfs_device_get_total_bytes(srcdev), 1000));
676 break; 676 break;
677 } 677 }
678 btrfs_dev_replace_unlock(dev_replace); 678 btrfs_dev_replace_unlock(dev_replace, 0);
679} 679}
680 680
681int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info, 681int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
@@ -698,13 +698,13 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
698 return -EROFS; 698 return -EROFS;
699 699
700 mutex_lock(&dev_replace->lock_finishing_cancel_unmount); 700 mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
701 btrfs_dev_replace_lock(dev_replace); 701 btrfs_dev_replace_lock(dev_replace, 1);
702 switch (dev_replace->replace_state) { 702 switch (dev_replace->replace_state) {
703 case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: 703 case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
704 case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: 704 case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
705 case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: 705 case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
706 result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED; 706 result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED;
707 btrfs_dev_replace_unlock(dev_replace); 707 btrfs_dev_replace_unlock(dev_replace, 1);
708 goto leave; 708 goto leave;
709 case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: 709 case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
710 case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: 710 case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
@@ -717,7 +717,7 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
717 dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED; 717 dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED;
718 dev_replace->time_stopped = get_seconds(); 718 dev_replace->time_stopped = get_seconds();
719 dev_replace->item_needs_writeback = 1; 719 dev_replace->item_needs_writeback = 1;
720 btrfs_dev_replace_unlock(dev_replace); 720 btrfs_dev_replace_unlock(dev_replace, 1);
721 btrfs_scrub_cancel(fs_info); 721 btrfs_scrub_cancel(fs_info);
722 722
723 trans = btrfs_start_transaction(root, 0); 723 trans = btrfs_start_transaction(root, 0);
@@ -740,7 +740,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
740 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; 740 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
741 741
742 mutex_lock(&dev_replace->lock_finishing_cancel_unmount); 742 mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
743 btrfs_dev_replace_lock(dev_replace); 743 btrfs_dev_replace_lock(dev_replace, 1);
744 switch (dev_replace->replace_state) { 744 switch (dev_replace->replace_state) {
745 case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: 745 case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
746 case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: 746 case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
@@ -756,7 +756,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
756 break; 756 break;
757 } 757 }
758 758
759 btrfs_dev_replace_unlock(dev_replace); 759 btrfs_dev_replace_unlock(dev_replace, 1);
760 mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); 760 mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
761} 761}
762 762
@@ -766,12 +766,12 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
766 struct task_struct *task; 766 struct task_struct *task;
767 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; 767 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
768 768
769 btrfs_dev_replace_lock(dev_replace); 769 btrfs_dev_replace_lock(dev_replace, 1);
770 switch (dev_replace->replace_state) { 770 switch (dev_replace->replace_state) {
771 case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: 771 case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
772 case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: 772 case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
773 case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: 773 case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
774 btrfs_dev_replace_unlock(dev_replace); 774 btrfs_dev_replace_unlock(dev_replace, 1);
775 return 0; 775 return 0;
776 case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: 776 case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
777 break; 777 break;
@@ -784,10 +784,10 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
784 btrfs_info(fs_info, "cannot continue dev_replace, tgtdev is missing"); 784 btrfs_info(fs_info, "cannot continue dev_replace, tgtdev is missing");
785 btrfs_info(fs_info, 785 btrfs_info(fs_info,
786 "you may cancel the operation after 'mount -o degraded'"); 786 "you may cancel the operation after 'mount -o degraded'");
787 btrfs_dev_replace_unlock(dev_replace); 787 btrfs_dev_replace_unlock(dev_replace, 1);
788 return 0; 788 return 0;
789 } 789 }
790 btrfs_dev_replace_unlock(dev_replace); 790 btrfs_dev_replace_unlock(dev_replace, 1);
791 791
792 WARN_ON(atomic_xchg( 792 WARN_ON(atomic_xchg(
793 &fs_info->mutually_exclusive_operation_running, 1)); 793 &fs_info->mutually_exclusive_operation_running, 1));
@@ -802,7 +802,7 @@ static int btrfs_dev_replace_kthread(void *data)
802 struct btrfs_ioctl_dev_replace_args *status_args; 802 struct btrfs_ioctl_dev_replace_args *status_args;
803 u64 progress; 803 u64 progress;
804 804
805 status_args = kzalloc(sizeof(*status_args), GFP_NOFS); 805 status_args = kzalloc(sizeof(*status_args), GFP_KERNEL);
806 if (status_args) { 806 if (status_args) {
807 btrfs_dev_replace_status(fs_info, status_args); 807 btrfs_dev_replace_status(fs_info, status_args);
808 progress = status_args->status.progress_1000; 808 progress = status_args->status.progress_1000;
@@ -858,55 +858,65 @@ int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace)
858 * not called and the the filesystem is remounted 858 * not called and the the filesystem is remounted
859 * in degraded state. This does not stop the 859 * in degraded state. This does not stop the
860 * dev_replace procedure. It needs to be canceled 860 * dev_replace procedure. It needs to be canceled
861 * manually if the cancelation is wanted. 861 * manually if the cancellation is wanted.
862 */ 862 */
863 break; 863 break;
864 } 864 }
865 return 1; 865 return 1;
866} 866}
867 867
868void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace) 868void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace, int rw)
869{ 869{
870 /* the beginning is just an optimization for the typical case */ 870 if (rw == 1) {
871 if (atomic_read(&dev_replace->nesting_level) == 0) { 871 /* write */
872acquire_lock: 872again:
873 /* this is not a nested case where the same thread 873 wait_event(dev_replace->read_lock_wq,
874 * is trying to acqurire the same lock twice */ 874 atomic_read(&dev_replace->blocking_readers) == 0);
875 mutex_lock(&dev_replace->lock); 875 write_lock(&dev_replace->lock);
876 mutex_lock(&dev_replace->lock_management_lock); 876 if (atomic_read(&dev_replace->blocking_readers)) {
877 dev_replace->lock_owner = current->pid; 877 write_unlock(&dev_replace->lock);
878 atomic_inc(&dev_replace->nesting_level); 878 goto again;
879 mutex_unlock(&dev_replace->lock_management_lock); 879 }
880 return; 880 } else {
881 read_lock(&dev_replace->lock);
882 atomic_inc(&dev_replace->read_locks);
881 } 883 }
884}
882 885
883 mutex_lock(&dev_replace->lock_management_lock); 886void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace, int rw)
884 if (atomic_read(&dev_replace->nesting_level) > 0 && 887{
885 dev_replace->lock_owner == current->pid) { 888 if (rw == 1) {
886 WARN_ON(!mutex_is_locked(&dev_replace->lock)); 889 /* write */
887 atomic_inc(&dev_replace->nesting_level); 890 ASSERT(atomic_read(&dev_replace->blocking_readers) == 0);
888 mutex_unlock(&dev_replace->lock_management_lock); 891 write_unlock(&dev_replace->lock);
889 return; 892 } else {
893 ASSERT(atomic_read(&dev_replace->read_locks) > 0);
894 atomic_dec(&dev_replace->read_locks);
895 read_unlock(&dev_replace->lock);
890 } 896 }
897}
891 898
892 mutex_unlock(&dev_replace->lock_management_lock); 899/* inc blocking cnt and release read lock */
893 goto acquire_lock; 900void btrfs_dev_replace_set_lock_blocking(
901 struct btrfs_dev_replace *dev_replace)
902{
903 /* only set blocking for read lock */
904 ASSERT(atomic_read(&dev_replace->read_locks) > 0);
905 atomic_inc(&dev_replace->blocking_readers);
906 read_unlock(&dev_replace->lock);
894} 907}
895 908
896void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace) 909/* acquire read lock and dec blocking cnt */
910void btrfs_dev_replace_clear_lock_blocking(
911 struct btrfs_dev_replace *dev_replace)
897{ 912{
898 WARN_ON(!mutex_is_locked(&dev_replace->lock)); 913 /* only set blocking for read lock */
899 mutex_lock(&dev_replace->lock_management_lock); 914 ASSERT(atomic_read(&dev_replace->read_locks) > 0);
900 WARN_ON(atomic_read(&dev_replace->nesting_level) < 1); 915 ASSERT(atomic_read(&dev_replace->blocking_readers) > 0);
901 WARN_ON(dev_replace->lock_owner != current->pid); 916 read_lock(&dev_replace->lock);
902 atomic_dec(&dev_replace->nesting_level); 917 if (atomic_dec_and_test(&dev_replace->blocking_readers) &&
903 if (atomic_read(&dev_replace->nesting_level) == 0) { 918 waitqueue_active(&dev_replace->read_lock_wq))
904 dev_replace->lock_owner = 0; 919 wake_up(&dev_replace->read_lock_wq);
905 mutex_unlock(&dev_replace->lock_management_lock);
906 mutex_unlock(&dev_replace->lock);
907 } else {
908 mutex_unlock(&dev_replace->lock_management_lock);
909 }
910} 920}
911 921
912void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info) 922void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info)