aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/dev-replace.c
diff options
context:
space:
mode:
authorMiao Xie <miaox@cn.fujitsu.com>2014-01-30 03:46:55 -0500
committerJosef Bacik <jbacik@fb.com>2014-03-10 15:15:39 -0400
commitc404e0dc2c843b154f9a36c3aec10d0a715d88eb (patch)
tree643a2ab96708ef72c50679dd8da28e5d519fcf72 /fs/btrfs/dev-replace.c
parent391cd9df81ac07ce7e66ac8fb13e56693061a6e6 (diff)
Btrfs: fix use-after-free in the finishing procedure of the device replace
During device replace test, we hit a null pointer deference (It was very easy to reproduce it by running xfstests' btrfs/011 on the devices with the virtio scsi driver). There were two bugs that caused this problem: - We might allocate new chunks on the replaced device after we updated the mapping tree. And we forgot to replace the source device in those mapping of the new chunks. - We might get the mapping information which including the source device before the mapping information update. And then submit the bio which was based on that mapping information after we freed the source device. For the first bug, we can fix it by doing mapping tree update and source device remove in the same context of the chunk mutex. The chunk mutex is used to protect the allocable device list, the above method can avoid the new chunk allocation, and after we remove the source device, all the new chunks will be allocated on the new device. So it can fix the first bug. For the second bug, we need make sure all flighting bios are finished and no new bios are produced during we are removing the source device. To fix this problem, we introduced a global @bio_counter, we not only inc/dec @bio_counter outsize of map_blocks, but also inc it before submitting bio and dec @bio_counter when ending bios. Since Raid56 is a little different and device replace dosen't support raid56 yet, it is not addressed in the patch and I add comments to make sure we will fix it in the future. Reported-by: Qu Wenruo <quwenruo@cn.fujitsu.com> Signed-off-by: Wang Shilong <wangsl.fnst@cn.fujitsu.com> Signed-off-by: Miao Xie <miaox@cn.fujitsu.com> Signed-off-by: Josef Bacik <jbacik@fb.com>
Diffstat (limited to 'fs/btrfs/dev-replace.c')
-rw-r--r--fs/btrfs/dev-replace.c74
1 files changed, 67 insertions, 7 deletions
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index b20d59e5e5dd..ec1c3f3a775d 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -431,6 +431,35 @@ leave_no_lock:
431 return ret; 431 return ret;
432} 432}
433 433
434/*
435 * blocked until all flighting bios are finished.
436 */
437static void btrfs_rm_dev_replace_blocked(struct btrfs_fs_info *fs_info)
438{
439 s64 writers;
440 DEFINE_WAIT(wait);
441
442 set_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state);
443 do {
444 prepare_to_wait(&fs_info->replace_wait, &wait,
445 TASK_UNINTERRUPTIBLE);
446 writers = percpu_counter_sum(&fs_info->bio_counter);
447 if (writers)
448 schedule();
449 finish_wait(&fs_info->replace_wait, &wait);
450 } while (writers);
451}
452
453/*
454 * we have removed target device, it is safe to allow new bios request.
455 */
456static void btrfs_rm_dev_replace_unblocked(struct btrfs_fs_info *fs_info)
457{
458 clear_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state);
459 if (waitqueue_active(&fs_info->replace_wait))
460 wake_up(&fs_info->replace_wait);
461}
462
434static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, 463static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
435 int scrub_ret) 464 int scrub_ret)
436{ 465{
@@ -458,12 +487,6 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
458 src_device = dev_replace->srcdev; 487 src_device = dev_replace->srcdev;
459 btrfs_dev_replace_unlock(dev_replace); 488 btrfs_dev_replace_unlock(dev_replace);
460 489
461 /* replace old device with new one in mapping tree */
462 if (!scrub_ret)
463 btrfs_dev_replace_update_device_in_mapping_tree(fs_info,
464 src_device,
465 tgt_device);
466
467 /* 490 /*
468 * flush all outstanding I/O and inode extent mappings before the 491 * flush all outstanding I/O and inode extent mappings before the
469 * copy operation is declared as being finished 492 * copy operation is declared as being finished
@@ -495,7 +518,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
495 dev_replace->time_stopped = get_seconds(); 518 dev_replace->time_stopped = get_seconds();
496 dev_replace->item_needs_writeback = 1; 519 dev_replace->item_needs_writeback = 1;
497 520
498 if (scrub_ret) { 521 /* replace old device with new one in mapping tree */
522 if (!scrub_ret) {
523 btrfs_dev_replace_update_device_in_mapping_tree(fs_info,
524 src_device,
525 tgt_device);
526 } else {
499 printk_in_rcu(KERN_ERR 527 printk_in_rcu(KERN_ERR
500 "BTRFS: btrfs_scrub_dev(%s, %llu, %s) failed %d\n", 528 "BTRFS: btrfs_scrub_dev(%s, %llu, %s) failed %d\n",
501 src_device->missing ? "<missing disk>" : 529 src_device->missing ? "<missing disk>" :
@@ -534,8 +562,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
534 fs_info->fs_devices->latest_bdev = tgt_device->bdev; 562 fs_info->fs_devices->latest_bdev = tgt_device->bdev;
535 list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); 563 list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
536 564
565 btrfs_rm_dev_replace_blocked(fs_info);
566
537 btrfs_rm_dev_replace_srcdev(fs_info, src_device); 567 btrfs_rm_dev_replace_srcdev(fs_info, src_device);
538 568
569 btrfs_rm_dev_replace_unblocked(fs_info);
570
539 /* 571 /*
540 * this is again a consistent state where no dev_replace procedure 572 * this is again a consistent state where no dev_replace procedure
541 * is running, the target device is part of the filesystem, the 573 * is running, the target device is part of the filesystem, the
@@ -865,3 +897,31 @@ void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace)
865 mutex_unlock(&dev_replace->lock_management_lock); 897 mutex_unlock(&dev_replace->lock_management_lock);
866 } 898 }
867} 899}
900
901void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info)
902{
903 percpu_counter_inc(&fs_info->bio_counter);
904}
905
906void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info)
907{
908 percpu_counter_dec(&fs_info->bio_counter);
909
910 if (waitqueue_active(&fs_info->replace_wait))
911 wake_up(&fs_info->replace_wait);
912}
913
914void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info)
915{
916 DEFINE_WAIT(wait);
917again:
918 percpu_counter_inc(&fs_info->bio_counter);
919 if (test_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state)) {
920 btrfs_bio_counter_dec(fs_info);
921 wait_event(fs_info->replace_wait,
922 !test_bit(BTRFS_FS_STATE_DEV_REPLACING,
923 &fs_info->fs_state));
924 goto again;
925 }
926
927}