aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/dev-replace.c
diff options
context:
space:
mode:
authorAnand Jain <anand.jain@oracle.com>2018-11-14 00:50:26 -0500
committerDavid Sterba <dsterba@suse.com>2018-12-17 08:51:35 -0500
commitd189dd70e2556181732598956d808ea53cc8774e (patch)
treec4a6a54e817f27e7d44cd9c6678390287c1cca46 /fs/btrfs/dev-replace.c
parent05c49e6bc1e8866ecfd674ebeeb58cdbff9145c2 (diff)
btrfs: fix use-after-free due to race between replace start and cancel
The device replace cancel thread can race with the replace start thread and if fs_info::scrubs_running is not yet set, btrfs_scrub_cancel() will fail to stop the scrub thread. The scrub thread continues with the scrub for replace which then will try to write to the target device and which is already freed by the cancel thread. scrub_setup_ctx() warns as tgtdev is NULL. struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) { ... if (is_dev_replace) { WARN_ON(!fs_info->dev_replace.tgtdev); <=== sctx->pages_per_wr_bio = SCRUB_PAGES_PER_WR_BIO; sctx->wr_tgtdev = fs_info->dev_replace.tgtdev; sctx->flush_all_writes = false; } [ 6724.497655] BTRFS info (device sdb): dev_replace from /dev/sdb (devid 1) to /dev/sdc started [ 6753.945017] BTRFS info (device sdb): dev_replace from /dev/sdb (devid 1) to /dev/sdc canceled [ 6852.426700] WARNING: CPU: 0 PID: 4494 at fs/btrfs/scrub.c:622 scrub_setup_ctx.isra.19+0x220/0x230 [btrfs] ... [ 6852.428928] RIP: 0010:scrub_setup_ctx.isra.19+0x220/0x230 [btrfs] ... [ 6852.432970] Call Trace: [ 6852.433202] btrfs_scrub_dev+0x19b/0x5c0 [btrfs] [ 6852.433471] btrfs_dev_replace_start+0x48c/0x6a0 [btrfs] [ 6852.433800] btrfs_dev_replace_by_ioctl+0x3a/0x60 [btrfs] [ 6852.434097] btrfs_ioctl+0x2476/0x2d20 [btrfs] [ 6852.434365] ? do_sigaction+0x7d/0x1e0 [ 6852.434623] do_vfs_ioctl+0xa9/0x6c0 [ 6852.434865] ? syscall_trace_enter+0x1c8/0x310 [ 6852.435124] ? syscall_trace_enter+0x1c8/0x310 [ 6852.435387] ksys_ioctl+0x60/0x90 [ 6852.435663] __x64_sys_ioctl+0x16/0x20 [ 6852.435907] do_syscall_64+0x50/0x180 [ 6852.436150] entry_SYSCALL_64_after_hwframe+0x49/0xbe Further, as the replace thread enters scrub_write_page_to_dev_replace() without the target device it panics: static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx, struct scrub_page *spage) { ... bio_set_dev(bio, sbio->dev->bdev); <====== [ 6929.715145] BUG: unable to handle kernel NULL pointer dereference at 00000000000000a0 .. [ 6929.717106] Workqueue: btrfs-scrub btrfs_scrub_helper [btrfs] [ 6929.717420] RIP: 0010:scrub_write_page_to_dev_replace+0xb4/0x260 [btrfs] .. [ 6929.721430] Call Trace: [ 6929.721663] scrub_write_block_to_dev_replace+0x3f/0x60 [btrfs] [ 6929.721975] scrub_bio_end_io_worker+0x1af/0x490 [btrfs] [ 6929.722277] normal_work_helper+0xf0/0x4c0 [btrfs] [ 6929.722552] process_one_work+0x1f4/0x520 [ 6929.722805] ? process_one_work+0x16e/0x520 [ 6929.723063] worker_thread+0x46/0x3d0 [ 6929.723313] kthread+0xf8/0x130 [ 6929.723544] ? process_one_work+0x520/0x520 [ 6929.723800] ? kthread_delayed_work_timer_fn+0x80/0x80 [ 6929.724081] ret_from_fork+0x3a/0x50 Fix this by letting the btrfs_dev_replace_finishing() to do the job of cleaning after the cancel, including freeing of the target device. btrfs_dev_replace_finishing() is called when btrfs_scub_dev() returns along with the scrub return status. Signed-off-by: Anand Jain <anand.jain@oracle.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/dev-replace.c')
-rw-r--r--fs/btrfs/dev-replace.c63
1 files changed, 41 insertions, 22 deletions
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 33d07c426c59..08092d329f66 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -803,39 +803,58 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
803 case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: 803 case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
804 result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED; 804 result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED;
805 btrfs_dev_replace_write_unlock(dev_replace); 805 btrfs_dev_replace_write_unlock(dev_replace);
806 goto leave; 806 break;
807 case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: 807 case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
808 result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
809 tgt_device = dev_replace->tgtdev;
810 src_device = dev_replace->srcdev;
811 btrfs_dev_replace_write_unlock(dev_replace);
812 btrfs_scrub_cancel(fs_info);
813 /* btrfs_dev_replace_finishing() will handle the cleanup part */
814 btrfs_info_in_rcu(fs_info,
815 "dev_replace from %s (devid %llu) to %s canceled",
816 btrfs_dev_name(src_device), src_device->devid,
817 btrfs_dev_name(tgt_device));
818 break;
808 case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: 819 case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
820 /*
821 * Scrub doing the replace isn't running so we need to do the
822 * cleanup step of btrfs_dev_replace_finishing() here
823 */
809 result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; 824 result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
810 tgt_device = dev_replace->tgtdev; 825 tgt_device = dev_replace->tgtdev;
811 src_device = dev_replace->srcdev; 826 src_device = dev_replace->srcdev;
812 dev_replace->tgtdev = NULL; 827 dev_replace->tgtdev = NULL;
813 dev_replace->srcdev = NULL; 828 dev_replace->srcdev = NULL;
814 break; 829 dev_replace->replace_state =
815 } 830 BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED;
816 dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED; 831 dev_replace->time_stopped = ktime_get_real_seconds();
817 dev_replace->time_stopped = ktime_get_real_seconds(); 832 dev_replace->item_needs_writeback = 1;
818 dev_replace->item_needs_writeback = 1;
819 btrfs_dev_replace_write_unlock(dev_replace);
820 btrfs_scrub_cancel(fs_info);
821 833
822 trans = btrfs_start_transaction(root, 0); 834 btrfs_dev_replace_write_unlock(dev_replace);
823 if (IS_ERR(trans)) {
824 mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
825 return PTR_ERR(trans);
826 }
827 ret = btrfs_commit_transaction(trans);
828 WARN_ON(ret);
829 835
830 btrfs_info_in_rcu(fs_info, 836 btrfs_scrub_cancel(fs_info);
831 "dev_replace from %s (devid %llu) to %s canceled", 837
832 btrfs_dev_name(src_device), src_device->devid, 838 trans = btrfs_start_transaction(root, 0);
833 btrfs_dev_name(tgt_device)); 839 if (IS_ERR(trans)) {
840 mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
841 return PTR_ERR(trans);
842 }
843 ret = btrfs_commit_transaction(trans);
844 WARN_ON(ret);
834 845
835 if (tgt_device) 846 btrfs_info_in_rcu(fs_info,
836 btrfs_destroy_dev_replace_tgtdev(tgt_device); 847 "suspended dev_replace from %s (devid %llu) to %s canceled",
848 btrfs_dev_name(src_device), src_device->devid,
849 btrfs_dev_name(tgt_device));
850
851 if (tgt_device)
852 btrfs_destroy_dev_replace_tgtdev(tgt_device);
853 break;
854 default:
855 result = -EINVAL;
856 }
837 857
838leave:
839 mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); 858 mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
840 return result; 859 return result;
841} 860}