diff options
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r-- | fs/btrfs/volumes.c | 966 |
1 files changed, 777 insertions, 189 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0f5ebb72a5ea..5cce6aa74012 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -25,7 +25,6 @@ | |||
25 | #include <linux/capability.h> | 25 | #include <linux/capability.h> |
26 | #include <linux/ratelimit.h> | 26 | #include <linux/ratelimit.h> |
27 | #include <linux/kthread.h> | 27 | #include <linux/kthread.h> |
28 | #include <asm/div64.h> | ||
29 | #include "compat.h" | 28 | #include "compat.h" |
30 | #include "ctree.h" | 29 | #include "ctree.h" |
31 | #include "extent_map.h" | 30 | #include "extent_map.h" |
@@ -36,6 +35,8 @@ | |||
36 | #include "async-thread.h" | 35 | #include "async-thread.h" |
37 | #include "check-integrity.h" | 36 | #include "check-integrity.h" |
38 | #include "rcu-string.h" | 37 | #include "rcu-string.h" |
38 | #include "math.h" | ||
39 | #include "dev-replace.h" | ||
39 | 40 | ||
40 | static int init_first_rw_device(struct btrfs_trans_handle *trans, | 41 | static int init_first_rw_device(struct btrfs_trans_handle *trans, |
41 | struct btrfs_root *root, | 42 | struct btrfs_root *root, |
@@ -71,6 +72,19 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices) | |||
71 | kfree(fs_devices); | 72 | kfree(fs_devices); |
72 | } | 73 | } |
73 | 74 | ||
75 | static void btrfs_kobject_uevent(struct block_device *bdev, | ||
76 | enum kobject_action action) | ||
77 | { | ||
78 | int ret; | ||
79 | |||
80 | ret = kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, action); | ||
81 | if (ret) | ||
82 | pr_warn("Sending event '%d' to kobject: '%s' (%p): failed\n", | ||
83 | action, | ||
84 | kobject_name(&disk_to_dev(bdev->bd_disk)->kobj), | ||
85 | &disk_to_dev(bdev->bd_disk)->kobj); | ||
86 | } | ||
87 | |||
74 | void btrfs_cleanup_fs_uuids(void) | 88 | void btrfs_cleanup_fs_uuids(void) |
75 | { | 89 | { |
76 | struct btrfs_fs_devices *fs_devices; | 90 | struct btrfs_fs_devices *fs_devices; |
@@ -108,6 +122,44 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid) | |||
108 | return NULL; | 122 | return NULL; |
109 | } | 123 | } |
110 | 124 | ||
125 | static int | ||
126 | btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder, | ||
127 | int flush, struct block_device **bdev, | ||
128 | struct buffer_head **bh) | ||
129 | { | ||
130 | int ret; | ||
131 | |||
132 | *bdev = blkdev_get_by_path(device_path, flags, holder); | ||
133 | |||
134 | if (IS_ERR(*bdev)) { | ||
135 | ret = PTR_ERR(*bdev); | ||
136 | printk(KERN_INFO "btrfs: open %s failed\n", device_path); | ||
137 | goto error; | ||
138 | } | ||
139 | |||
140 | if (flush) | ||
141 | filemap_write_and_wait((*bdev)->bd_inode->i_mapping); | ||
142 | ret = set_blocksize(*bdev, 4096); | ||
143 | if (ret) { | ||
144 | blkdev_put(*bdev, flags); | ||
145 | goto error; | ||
146 | } | ||
147 | invalidate_bdev(*bdev); | ||
148 | *bh = btrfs_read_dev_super(*bdev); | ||
149 | if (!*bh) { | ||
150 | ret = -EINVAL; | ||
151 | blkdev_put(*bdev, flags); | ||
152 | goto error; | ||
153 | } | ||
154 | |||
155 | return 0; | ||
156 | |||
157 | error: | ||
158 | *bdev = NULL; | ||
159 | *bh = NULL; | ||
160 | return ret; | ||
161 | } | ||
162 | |||
111 | static void requeue_list(struct btrfs_pending_bios *pending_bios, | 163 | static void requeue_list(struct btrfs_pending_bios *pending_bios, |
112 | struct bio *head, struct bio *tail) | 164 | struct bio *head, struct bio *tail) |
113 | { | 165 | { |
@@ -467,7 +519,8 @@ error: | |||
467 | return ERR_PTR(-ENOMEM); | 519 | return ERR_PTR(-ENOMEM); |
468 | } | 520 | } |
469 | 521 | ||
470 | void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) | 522 | void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info, |
523 | struct btrfs_fs_devices *fs_devices, int step) | ||
471 | { | 524 | { |
472 | struct btrfs_device *device, *next; | 525 | struct btrfs_device *device, *next; |
473 | 526 | ||
@@ -480,8 +533,9 @@ again: | |||
480 | /* This is the initialized path, it is safe to release the devices. */ | 533 | /* This is the initialized path, it is safe to release the devices. */ |
481 | list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { | 534 | list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { |
482 | if (device->in_fs_metadata) { | 535 | if (device->in_fs_metadata) { |
483 | if (!latest_transid || | 536 | if (!device->is_tgtdev_for_dev_replace && |
484 | device->generation > latest_transid) { | 537 | (!latest_transid || |
538 | device->generation > latest_transid)) { | ||
485 | latest_devid = device->devid; | 539 | latest_devid = device->devid; |
486 | latest_transid = device->generation; | 540 | latest_transid = device->generation; |
487 | latest_bdev = device->bdev; | 541 | latest_bdev = device->bdev; |
@@ -489,6 +543,21 @@ again: | |||
489 | continue; | 543 | continue; |
490 | } | 544 | } |
491 | 545 | ||
546 | if (device->devid == BTRFS_DEV_REPLACE_DEVID) { | ||
547 | /* | ||
548 | * In the first step, keep the device which has | ||
549 | * the correct fsid and the devid that is used | ||
550 | * for the dev_replace procedure. | ||
551 | * In the second step, the dev_replace state is | ||
552 | * read from the device tree and it is known | ||
553 | * whether the procedure is really active or | ||
554 | * not, which means whether this device is | ||
555 | * used or whether it should be removed. | ||
556 | */ | ||
557 | if (step == 0 || device->is_tgtdev_for_dev_replace) { | ||
558 | continue; | ||
559 | } | ||
560 | } | ||
492 | if (device->bdev) { | 561 | if (device->bdev) { |
493 | blkdev_put(device->bdev, device->mode); | 562 | blkdev_put(device->bdev, device->mode); |
494 | device->bdev = NULL; | 563 | device->bdev = NULL; |
@@ -497,7 +566,8 @@ again: | |||
497 | if (device->writeable) { | 566 | if (device->writeable) { |
498 | list_del_init(&device->dev_alloc_list); | 567 | list_del_init(&device->dev_alloc_list); |
499 | device->writeable = 0; | 568 | device->writeable = 0; |
500 | fs_devices->rw_devices--; | 569 | if (!device->is_tgtdev_for_dev_replace) |
570 | fs_devices->rw_devices--; | ||
501 | } | 571 | } |
502 | list_del_init(&device->dev_list); | 572 | list_del_init(&device->dev_list); |
503 | fs_devices->num_devices--; | 573 | fs_devices->num_devices--; |
@@ -555,7 +625,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) | |||
555 | if (device->bdev) | 625 | if (device->bdev) |
556 | fs_devices->open_devices--; | 626 | fs_devices->open_devices--; |
557 | 627 | ||
558 | if (device->writeable) { | 628 | if (device->writeable && !device->is_tgtdev_for_dev_replace) { |
559 | list_del_init(&device->dev_alloc_list); | 629 | list_del_init(&device->dev_alloc_list); |
560 | fs_devices->rw_devices--; | 630 | fs_devices->rw_devices--; |
561 | } | 631 | } |
@@ -637,18 +707,10 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
637 | if (!device->name) | 707 | if (!device->name) |
638 | continue; | 708 | continue; |
639 | 709 | ||
640 | bdev = blkdev_get_by_path(device->name->str, flags, holder); | 710 | ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1, |
641 | if (IS_ERR(bdev)) { | 711 | &bdev, &bh); |
642 | printk(KERN_INFO "btrfs: open %s failed\n", device->name->str); | 712 | if (ret) |
643 | goto error; | 713 | continue; |
644 | } | ||
645 | filemap_write_and_wait(bdev->bd_inode->i_mapping); | ||
646 | invalidate_bdev(bdev); | ||
647 | set_blocksize(bdev, 4096); | ||
648 | |||
649 | bh = btrfs_read_dev_super(bdev); | ||
650 | if (!bh) | ||
651 | goto error_close; | ||
652 | 714 | ||
653 | disk_super = (struct btrfs_super_block *)bh->b_data; | 715 | disk_super = (struct btrfs_super_block *)bh->b_data; |
654 | devid = btrfs_stack_device_id(&disk_super->dev_item); | 716 | devid = btrfs_stack_device_id(&disk_super->dev_item); |
@@ -687,7 +749,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
687 | fs_devices->rotating = 1; | 749 | fs_devices->rotating = 1; |
688 | 750 | ||
689 | fs_devices->open_devices++; | 751 | fs_devices->open_devices++; |
690 | if (device->writeable) { | 752 | if (device->writeable && !device->is_tgtdev_for_dev_replace) { |
691 | fs_devices->rw_devices++; | 753 | fs_devices->rw_devices++; |
692 | list_add(&device->dev_alloc_list, | 754 | list_add(&device->dev_alloc_list, |
693 | &fs_devices->alloc_list); | 755 | &fs_devices->alloc_list); |
@@ -697,9 +759,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
697 | 759 | ||
698 | error_brelse: | 760 | error_brelse: |
699 | brelse(bh); | 761 | brelse(bh); |
700 | error_close: | ||
701 | blkdev_put(bdev, flags); | 762 | blkdev_put(bdev, flags); |
702 | error: | ||
703 | continue; | 763 | continue; |
704 | } | 764 | } |
705 | if (fs_devices->open_devices == 0) { | 765 | if (fs_devices->open_devices == 0) { |
@@ -744,40 +804,30 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
744 | u64 total_devices; | 804 | u64 total_devices; |
745 | 805 | ||
746 | flags |= FMODE_EXCL; | 806 | flags |= FMODE_EXCL; |
747 | bdev = blkdev_get_by_path(path, flags, holder); | ||
748 | |||
749 | if (IS_ERR(bdev)) { | ||
750 | ret = PTR_ERR(bdev); | ||
751 | goto error; | ||
752 | } | ||
753 | |||
754 | mutex_lock(&uuid_mutex); | 807 | mutex_lock(&uuid_mutex); |
755 | ret = set_blocksize(bdev, 4096); | 808 | ret = btrfs_get_bdev_and_sb(path, flags, holder, 0, &bdev, &bh); |
756 | if (ret) | 809 | if (ret) |
757 | goto error_close; | 810 | goto error; |
758 | bh = btrfs_read_dev_super(bdev); | ||
759 | if (!bh) { | ||
760 | ret = -EINVAL; | ||
761 | goto error_close; | ||
762 | } | ||
763 | disk_super = (struct btrfs_super_block *)bh->b_data; | 811 | disk_super = (struct btrfs_super_block *)bh->b_data; |
764 | devid = btrfs_stack_device_id(&disk_super->dev_item); | 812 | devid = btrfs_stack_device_id(&disk_super->dev_item); |
765 | transid = btrfs_super_generation(disk_super); | 813 | transid = btrfs_super_generation(disk_super); |
766 | total_devices = btrfs_super_num_devices(disk_super); | 814 | total_devices = btrfs_super_num_devices(disk_super); |
767 | if (disk_super->label[0]) | 815 | if (disk_super->label[0]) { |
816 | if (disk_super->label[BTRFS_LABEL_SIZE - 1]) | ||
817 | disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0'; | ||
768 | printk(KERN_INFO "device label %s ", disk_super->label); | 818 | printk(KERN_INFO "device label %s ", disk_super->label); |
769 | else | 819 | } else { |
770 | printk(KERN_INFO "device fsid %pU ", disk_super->fsid); | 820 | printk(KERN_INFO "device fsid %pU ", disk_super->fsid); |
821 | } | ||
771 | printk(KERN_CONT "devid %llu transid %llu %s\n", | 822 | printk(KERN_CONT "devid %llu transid %llu %s\n", |
772 | (unsigned long long)devid, (unsigned long long)transid, path); | 823 | (unsigned long long)devid, (unsigned long long)transid, path); |
773 | ret = device_list_add(path, disk_super, devid, fs_devices_ret); | 824 | ret = device_list_add(path, disk_super, devid, fs_devices_ret); |
774 | if (!ret && fs_devices_ret) | 825 | if (!ret && fs_devices_ret) |
775 | (*fs_devices_ret)->total_devices = total_devices; | 826 | (*fs_devices_ret)->total_devices = total_devices; |
776 | brelse(bh); | 827 | brelse(bh); |
777 | error_close: | ||
778 | mutex_unlock(&uuid_mutex); | ||
779 | blkdev_put(bdev, flags); | 828 | blkdev_put(bdev, flags); |
780 | error: | 829 | error: |
830 | mutex_unlock(&uuid_mutex); | ||
781 | return ret; | 831 | return ret; |
782 | } | 832 | } |
783 | 833 | ||
@@ -796,7 +846,7 @@ int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, | |||
796 | 846 | ||
797 | *length = 0; | 847 | *length = 0; |
798 | 848 | ||
799 | if (start >= device->total_bytes) | 849 | if (start >= device->total_bytes || device->is_tgtdev_for_dev_replace) |
800 | return 0; | 850 | return 0; |
801 | 851 | ||
802 | path = btrfs_alloc_path(); | 852 | path = btrfs_alloc_path(); |
@@ -913,7 +963,7 @@ int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, | |||
913 | max_hole_size = 0; | 963 | max_hole_size = 0; |
914 | hole_size = 0; | 964 | hole_size = 0; |
915 | 965 | ||
916 | if (search_start >= search_end) { | 966 | if (search_start >= search_end || device->is_tgtdev_for_dev_replace) { |
917 | ret = -ENOSPC; | 967 | ret = -ENOSPC; |
918 | goto error; | 968 | goto error; |
919 | } | 969 | } |
@@ -1096,6 +1146,7 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, | |||
1096 | struct btrfs_key key; | 1146 | struct btrfs_key key; |
1097 | 1147 | ||
1098 | WARN_ON(!device->in_fs_metadata); | 1148 | WARN_ON(!device->in_fs_metadata); |
1149 | WARN_ON(device->is_tgtdev_for_dev_replace); | ||
1099 | path = btrfs_alloc_path(); | 1150 | path = btrfs_alloc_path(); |
1100 | if (!path) | 1151 | if (!path) |
1101 | return -ENOMEM; | 1152 | return -ENOMEM; |
@@ -1330,16 +1381,22 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1330 | root->fs_info->avail_system_alloc_bits | | 1381 | root->fs_info->avail_system_alloc_bits | |
1331 | root->fs_info->avail_metadata_alloc_bits; | 1382 | root->fs_info->avail_metadata_alloc_bits; |
1332 | 1383 | ||
1333 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && | 1384 | num_devices = root->fs_info->fs_devices->num_devices; |
1334 | root->fs_info->fs_devices->num_devices <= 4) { | 1385 | btrfs_dev_replace_lock(&root->fs_info->dev_replace); |
1386 | if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) { | ||
1387 | WARN_ON(num_devices < 1); | ||
1388 | num_devices--; | ||
1389 | } | ||
1390 | btrfs_dev_replace_unlock(&root->fs_info->dev_replace); | ||
1391 | |||
1392 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) { | ||
1335 | printk(KERN_ERR "btrfs: unable to go below four devices " | 1393 | printk(KERN_ERR "btrfs: unable to go below four devices " |
1336 | "on raid10\n"); | 1394 | "on raid10\n"); |
1337 | ret = -EINVAL; | 1395 | ret = -EINVAL; |
1338 | goto out; | 1396 | goto out; |
1339 | } | 1397 | } |
1340 | 1398 | ||
1341 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && | 1399 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && num_devices <= 2) { |
1342 | root->fs_info->fs_devices->num_devices <= 2) { | ||
1343 | printk(KERN_ERR "btrfs: unable to go below two " | 1400 | printk(KERN_ERR "btrfs: unable to go below two " |
1344 | "devices on raid1\n"); | 1401 | "devices on raid1\n"); |
1345 | ret = -EINVAL; | 1402 | ret = -EINVAL; |
@@ -1357,7 +1414,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1357 | * is held. | 1414 | * is held. |
1358 | */ | 1415 | */ |
1359 | list_for_each_entry(tmp, devices, dev_list) { | 1416 | list_for_each_entry(tmp, devices, dev_list) { |
1360 | if (tmp->in_fs_metadata && !tmp->bdev) { | 1417 | if (tmp->in_fs_metadata && |
1418 | !tmp->is_tgtdev_for_dev_replace && | ||
1419 | !tmp->bdev) { | ||
1361 | device = tmp; | 1420 | device = tmp; |
1362 | break; | 1421 | break; |
1363 | } | 1422 | } |
@@ -1371,24 +1430,16 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1371 | goto out; | 1430 | goto out; |
1372 | } | 1431 | } |
1373 | } else { | 1432 | } else { |
1374 | bdev = blkdev_get_by_path(device_path, FMODE_READ | FMODE_EXCL, | 1433 | ret = btrfs_get_bdev_and_sb(device_path, |
1375 | root->fs_info->bdev_holder); | 1434 | FMODE_READ | FMODE_EXCL, |
1376 | if (IS_ERR(bdev)) { | 1435 | root->fs_info->bdev_holder, 0, |
1377 | ret = PTR_ERR(bdev); | 1436 | &bdev, &bh); |
1437 | if (ret) | ||
1378 | goto out; | 1438 | goto out; |
1379 | } | ||
1380 | |||
1381 | set_blocksize(bdev, 4096); | ||
1382 | invalidate_bdev(bdev); | ||
1383 | bh = btrfs_read_dev_super(bdev); | ||
1384 | if (!bh) { | ||
1385 | ret = -EINVAL; | ||
1386 | goto error_close; | ||
1387 | } | ||
1388 | disk_super = (struct btrfs_super_block *)bh->b_data; | 1439 | disk_super = (struct btrfs_super_block *)bh->b_data; |
1389 | devid = btrfs_stack_device_id(&disk_super->dev_item); | 1440 | devid = btrfs_stack_device_id(&disk_super->dev_item); |
1390 | dev_uuid = disk_super->dev_item.uuid; | 1441 | dev_uuid = disk_super->dev_item.uuid; |
1391 | device = btrfs_find_device(root, devid, dev_uuid, | 1442 | device = btrfs_find_device(root->fs_info, devid, dev_uuid, |
1392 | disk_super->fsid); | 1443 | disk_super->fsid); |
1393 | if (!device) { | 1444 | if (!device) { |
1394 | ret = -ENOENT; | 1445 | ret = -ENOENT; |
@@ -1396,6 +1447,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1396 | } | 1447 | } |
1397 | } | 1448 | } |
1398 | 1449 | ||
1450 | if (device->is_tgtdev_for_dev_replace) { | ||
1451 | pr_err("btrfs: unable to remove the dev_replace target dev\n"); | ||
1452 | ret = -EINVAL; | ||
1453 | goto error_brelse; | ||
1454 | } | ||
1455 | |||
1399 | if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) { | 1456 | if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) { |
1400 | printk(KERN_ERR "btrfs: unable to remove the only writeable " | 1457 | printk(KERN_ERR "btrfs: unable to remove the only writeable " |
1401 | "device\n"); | 1458 | "device\n"); |
@@ -1415,6 +1472,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1415 | if (ret) | 1472 | if (ret) |
1416 | goto error_undo; | 1473 | goto error_undo; |
1417 | 1474 | ||
1475 | /* | ||
1476 | * TODO: the superblock still includes this device in its num_devices | ||
1477 | * counter although write_all_supers() is not locked out. This | ||
1478 | * could give a filesystem state which requires a degraded mount. | ||
1479 | */ | ||
1418 | ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device); | 1480 | ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device); |
1419 | if (ret) | 1481 | if (ret) |
1420 | goto error_undo; | 1482 | goto error_undo; |
@@ -1425,7 +1487,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1425 | spin_unlock(&root->fs_info->free_chunk_lock); | 1487 | spin_unlock(&root->fs_info->free_chunk_lock); |
1426 | 1488 | ||
1427 | device->in_fs_metadata = 0; | 1489 | device->in_fs_metadata = 0; |
1428 | btrfs_scrub_cancel_dev(root, device); | 1490 | btrfs_scrub_cancel_dev(root->fs_info, device); |
1429 | 1491 | ||
1430 | /* | 1492 | /* |
1431 | * the device list mutex makes sure that we don't change | 1493 | * the device list mutex makes sure that we don't change |
@@ -1482,7 +1544,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1482 | * at this point, the device is zero sized. We want to | 1544 | * at this point, the device is zero sized. We want to |
1483 | * remove it from the devices list and zero out the old super | 1545 | * remove it from the devices list and zero out the old super |
1484 | */ | 1546 | */ |
1485 | if (clear_super) { | 1547 | if (clear_super && disk_super) { |
1486 | /* make sure this device isn't detected as part of | 1548 | /* make sure this device isn't detected as part of |
1487 | * the FS anymore | 1549 | * the FS anymore |
1488 | */ | 1550 | */ |
@@ -1493,9 +1555,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1493 | 1555 | ||
1494 | ret = 0; | 1556 | ret = 0; |
1495 | 1557 | ||
1558 | /* Notify udev that device has changed */ | ||
1559 | btrfs_kobject_uevent(bdev, KOBJ_CHANGE); | ||
1560 | |||
1496 | error_brelse: | 1561 | error_brelse: |
1497 | brelse(bh); | 1562 | brelse(bh); |
1498 | error_close: | ||
1499 | if (bdev) | 1563 | if (bdev) |
1500 | blkdev_put(bdev, FMODE_READ | FMODE_EXCL); | 1564 | blkdev_put(bdev, FMODE_READ | FMODE_EXCL); |
1501 | out: | 1565 | out: |
@@ -1512,6 +1576,112 @@ error_undo: | |||
1512 | goto error_brelse; | 1576 | goto error_brelse; |
1513 | } | 1577 | } |
1514 | 1578 | ||
1579 | void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info, | ||
1580 | struct btrfs_device *srcdev) | ||
1581 | { | ||
1582 | WARN_ON(!mutex_is_locked(&fs_info->fs_devices->device_list_mutex)); | ||
1583 | list_del_rcu(&srcdev->dev_list); | ||
1584 | list_del_rcu(&srcdev->dev_alloc_list); | ||
1585 | fs_info->fs_devices->num_devices--; | ||
1586 | if (srcdev->missing) { | ||
1587 | fs_info->fs_devices->missing_devices--; | ||
1588 | fs_info->fs_devices->rw_devices++; | ||
1589 | } | ||
1590 | if (srcdev->can_discard) | ||
1591 | fs_info->fs_devices->num_can_discard--; | ||
1592 | if (srcdev->bdev) | ||
1593 | fs_info->fs_devices->open_devices--; | ||
1594 | |||
1595 | call_rcu(&srcdev->rcu, free_device); | ||
1596 | } | ||
1597 | |||
1598 | void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, | ||
1599 | struct btrfs_device *tgtdev) | ||
1600 | { | ||
1601 | struct btrfs_device *next_device; | ||
1602 | |||
1603 | WARN_ON(!tgtdev); | ||
1604 | mutex_lock(&fs_info->fs_devices->device_list_mutex); | ||
1605 | if (tgtdev->bdev) { | ||
1606 | btrfs_scratch_superblock(tgtdev); | ||
1607 | fs_info->fs_devices->open_devices--; | ||
1608 | } | ||
1609 | fs_info->fs_devices->num_devices--; | ||
1610 | if (tgtdev->can_discard) | ||
1611 | fs_info->fs_devices->num_can_discard++; | ||
1612 | |||
1613 | next_device = list_entry(fs_info->fs_devices->devices.next, | ||
1614 | struct btrfs_device, dev_list); | ||
1615 | if (tgtdev->bdev == fs_info->sb->s_bdev) | ||
1616 | fs_info->sb->s_bdev = next_device->bdev; | ||
1617 | if (tgtdev->bdev == fs_info->fs_devices->latest_bdev) | ||
1618 | fs_info->fs_devices->latest_bdev = next_device->bdev; | ||
1619 | list_del_rcu(&tgtdev->dev_list); | ||
1620 | |||
1621 | call_rcu(&tgtdev->rcu, free_device); | ||
1622 | |||
1623 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); | ||
1624 | } | ||
1625 | |||
1626 | int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path, | ||
1627 | struct btrfs_device **device) | ||
1628 | { | ||
1629 | int ret = 0; | ||
1630 | struct btrfs_super_block *disk_super; | ||
1631 | u64 devid; | ||
1632 | u8 *dev_uuid; | ||
1633 | struct block_device *bdev; | ||
1634 | struct buffer_head *bh; | ||
1635 | |||
1636 | *device = NULL; | ||
1637 | ret = btrfs_get_bdev_and_sb(device_path, FMODE_READ, | ||
1638 | root->fs_info->bdev_holder, 0, &bdev, &bh); | ||
1639 | if (ret) | ||
1640 | return ret; | ||
1641 | disk_super = (struct btrfs_super_block *)bh->b_data; | ||
1642 | devid = btrfs_stack_device_id(&disk_super->dev_item); | ||
1643 | dev_uuid = disk_super->dev_item.uuid; | ||
1644 | *device = btrfs_find_device(root->fs_info, devid, dev_uuid, | ||
1645 | disk_super->fsid); | ||
1646 | brelse(bh); | ||
1647 | if (!*device) | ||
1648 | ret = -ENOENT; | ||
1649 | blkdev_put(bdev, FMODE_READ); | ||
1650 | return ret; | ||
1651 | } | ||
1652 | |||
1653 | int btrfs_find_device_missing_or_by_path(struct btrfs_root *root, | ||
1654 | char *device_path, | ||
1655 | struct btrfs_device **device) | ||
1656 | { | ||
1657 | *device = NULL; | ||
1658 | if (strcmp(device_path, "missing") == 0) { | ||
1659 | struct list_head *devices; | ||
1660 | struct btrfs_device *tmp; | ||
1661 | |||
1662 | devices = &root->fs_info->fs_devices->devices; | ||
1663 | /* | ||
1664 | * It is safe to read the devices since the volume_mutex | ||
1665 | * is held by the caller. | ||
1666 | */ | ||
1667 | list_for_each_entry(tmp, devices, dev_list) { | ||
1668 | if (tmp->in_fs_metadata && !tmp->bdev) { | ||
1669 | *device = tmp; | ||
1670 | break; | ||
1671 | } | ||
1672 | } | ||
1673 | |||
1674 | if (!*device) { | ||
1675 | pr_err("btrfs: no missing device found\n"); | ||
1676 | return -ENOENT; | ||
1677 | } | ||
1678 | |||
1679 | return 0; | ||
1680 | } else { | ||
1681 | return btrfs_find_device_by_path(root, device_path, device); | ||
1682 | } | ||
1683 | } | ||
1684 | |||
1515 | /* | 1685 | /* |
1516 | * does all the dirty work required for changing file system's UUID. | 1686 | * does all the dirty work required for changing file system's UUID. |
1517 | */ | 1687 | */ |
@@ -1630,7 +1800,8 @@ next_slot: | |||
1630 | read_extent_buffer(leaf, fs_uuid, | 1800 | read_extent_buffer(leaf, fs_uuid, |
1631 | (unsigned long)btrfs_device_fsid(dev_item), | 1801 | (unsigned long)btrfs_device_fsid(dev_item), |
1632 | BTRFS_UUID_SIZE); | 1802 | BTRFS_UUID_SIZE); |
1633 | device = btrfs_find_device(root, devid, dev_uuid, fs_uuid); | 1803 | device = btrfs_find_device(root->fs_info, devid, dev_uuid, |
1804 | fs_uuid); | ||
1634 | BUG_ON(!device); /* Logic error */ | 1805 | BUG_ON(!device); /* Logic error */ |
1635 | 1806 | ||
1636 | if (device->fs_devices->seeding) { | 1807 | if (device->fs_devices->seeding) { |
@@ -1678,16 +1849,17 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1678 | filemap_write_and_wait(bdev->bd_inode->i_mapping); | 1849 | filemap_write_and_wait(bdev->bd_inode->i_mapping); |
1679 | 1850 | ||
1680 | devices = &root->fs_info->fs_devices->devices; | 1851 | devices = &root->fs_info->fs_devices->devices; |
1681 | /* | 1852 | |
1682 | * we have the volume lock, so we don't need the extra | 1853 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
1683 | * device list mutex while reading the list here. | ||
1684 | */ | ||
1685 | list_for_each_entry(device, devices, dev_list) { | 1854 | list_for_each_entry(device, devices, dev_list) { |
1686 | if (device->bdev == bdev) { | 1855 | if (device->bdev == bdev) { |
1687 | ret = -EEXIST; | 1856 | ret = -EEXIST; |
1857 | mutex_unlock( | ||
1858 | &root->fs_info->fs_devices->device_list_mutex); | ||
1688 | goto error; | 1859 | goto error; |
1689 | } | 1860 | } |
1690 | } | 1861 | } |
1862 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
1691 | 1863 | ||
1692 | device = kzalloc(sizeof(*device), GFP_NOFS); | 1864 | device = kzalloc(sizeof(*device), GFP_NOFS); |
1693 | if (!device) { | 1865 | if (!device) { |
@@ -1737,6 +1909,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1737 | device->dev_root = root->fs_info->dev_root; | 1909 | device->dev_root = root->fs_info->dev_root; |
1738 | device->bdev = bdev; | 1910 | device->bdev = bdev; |
1739 | device->in_fs_metadata = 1; | 1911 | device->in_fs_metadata = 1; |
1912 | device->is_tgtdev_for_dev_replace = 0; | ||
1740 | device->mode = FMODE_EXCL; | 1913 | device->mode = FMODE_EXCL; |
1741 | set_blocksize(device->bdev, 4096); | 1914 | set_blocksize(device->bdev, 4096); |
1742 | 1915 | ||
@@ -1844,6 +2017,98 @@ error: | |||
1844 | return ret; | 2017 | return ret; |
1845 | } | 2018 | } |
1846 | 2019 | ||
2020 | int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, | ||
2021 | struct btrfs_device **device_out) | ||
2022 | { | ||
2023 | struct request_queue *q; | ||
2024 | struct btrfs_device *device; | ||
2025 | struct block_device *bdev; | ||
2026 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
2027 | struct list_head *devices; | ||
2028 | struct rcu_string *name; | ||
2029 | int ret = 0; | ||
2030 | |||
2031 | *device_out = NULL; | ||
2032 | if (fs_info->fs_devices->seeding) | ||
2033 | return -EINVAL; | ||
2034 | |||
2035 | bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, | ||
2036 | fs_info->bdev_holder); | ||
2037 | if (IS_ERR(bdev)) | ||
2038 | return PTR_ERR(bdev); | ||
2039 | |||
2040 | filemap_write_and_wait(bdev->bd_inode->i_mapping); | ||
2041 | |||
2042 | devices = &fs_info->fs_devices->devices; | ||
2043 | list_for_each_entry(device, devices, dev_list) { | ||
2044 | if (device->bdev == bdev) { | ||
2045 | ret = -EEXIST; | ||
2046 | goto error; | ||
2047 | } | ||
2048 | } | ||
2049 | |||
2050 | device = kzalloc(sizeof(*device), GFP_NOFS); | ||
2051 | if (!device) { | ||
2052 | ret = -ENOMEM; | ||
2053 | goto error; | ||
2054 | } | ||
2055 | |||
2056 | name = rcu_string_strdup(device_path, GFP_NOFS); | ||
2057 | if (!name) { | ||
2058 | kfree(device); | ||
2059 | ret = -ENOMEM; | ||
2060 | goto error; | ||
2061 | } | ||
2062 | rcu_assign_pointer(device->name, name); | ||
2063 | |||
2064 | q = bdev_get_queue(bdev); | ||
2065 | if (blk_queue_discard(q)) | ||
2066 | device->can_discard = 1; | ||
2067 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | ||
2068 | device->writeable = 1; | ||
2069 | device->work.func = pending_bios_fn; | ||
2070 | generate_random_uuid(device->uuid); | ||
2071 | device->devid = BTRFS_DEV_REPLACE_DEVID; | ||
2072 | spin_lock_init(&device->io_lock); | ||
2073 | device->generation = 0; | ||
2074 | device->io_width = root->sectorsize; | ||
2075 | device->io_align = root->sectorsize; | ||
2076 | device->sector_size = root->sectorsize; | ||
2077 | device->total_bytes = i_size_read(bdev->bd_inode); | ||
2078 | device->disk_total_bytes = device->total_bytes; | ||
2079 | device->dev_root = fs_info->dev_root; | ||
2080 | device->bdev = bdev; | ||
2081 | device->in_fs_metadata = 1; | ||
2082 | device->is_tgtdev_for_dev_replace = 1; | ||
2083 | device->mode = FMODE_EXCL; | ||
2084 | set_blocksize(device->bdev, 4096); | ||
2085 | device->fs_devices = fs_info->fs_devices; | ||
2086 | list_add(&device->dev_list, &fs_info->fs_devices->devices); | ||
2087 | fs_info->fs_devices->num_devices++; | ||
2088 | fs_info->fs_devices->open_devices++; | ||
2089 | if (device->can_discard) | ||
2090 | fs_info->fs_devices->num_can_discard++; | ||
2091 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
2092 | |||
2093 | *device_out = device; | ||
2094 | return ret; | ||
2095 | |||
2096 | error: | ||
2097 | blkdev_put(bdev, FMODE_EXCL); | ||
2098 | return ret; | ||
2099 | } | ||
2100 | |||
2101 | void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info, | ||
2102 | struct btrfs_device *tgtdev) | ||
2103 | { | ||
2104 | WARN_ON(fs_info->fs_devices->rw_devices == 0); | ||
2105 | tgtdev->io_width = fs_info->dev_root->sectorsize; | ||
2106 | tgtdev->io_align = fs_info->dev_root->sectorsize; | ||
2107 | tgtdev->sector_size = fs_info->dev_root->sectorsize; | ||
2108 | tgtdev->dev_root = fs_info->dev_root; | ||
2109 | tgtdev->in_fs_metadata = 1; | ||
2110 | } | ||
2111 | |||
1847 | static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, | 2112 | static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, |
1848 | struct btrfs_device *device) | 2113 | struct btrfs_device *device) |
1849 | { | 2114 | { |
@@ -1900,7 +2165,8 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans, | |||
1900 | 2165 | ||
1901 | if (!device->writeable) | 2166 | if (!device->writeable) |
1902 | return -EACCES; | 2167 | return -EACCES; |
1903 | if (new_size <= device->total_bytes) | 2168 | if (new_size <= device->total_bytes || |
2169 | device->is_tgtdev_for_dev_replace) | ||
1904 | return -EINVAL; | 2170 | return -EINVAL; |
1905 | 2171 | ||
1906 | btrfs_set_super_total_bytes(super_copy, old_total + diff); | 2172 | btrfs_set_super_total_bytes(super_copy, old_total + diff); |
@@ -2338,18 +2604,6 @@ static int chunk_profiles_filter(u64 chunk_type, | |||
2338 | return 1; | 2604 | return 1; |
2339 | } | 2605 | } |
2340 | 2606 | ||
2341 | static u64 div_factor_fine(u64 num, int factor) | ||
2342 | { | ||
2343 | if (factor <= 0) | ||
2344 | return 0; | ||
2345 | if (factor >= 100) | ||
2346 | return num; | ||
2347 | |||
2348 | num *= factor; | ||
2349 | do_div(num, 100); | ||
2350 | return num; | ||
2351 | } | ||
2352 | |||
2353 | static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, | 2607 | static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, |
2354 | struct btrfs_balance_args *bargs) | 2608 | struct btrfs_balance_args *bargs) |
2355 | { | 2609 | { |
@@ -2514,15 +2768,6 @@ static int should_balance_chunk(struct btrfs_root *root, | |||
2514 | return 1; | 2768 | return 1; |
2515 | } | 2769 | } |
2516 | 2770 | ||
2517 | static u64 div_factor(u64 num, int factor) | ||
2518 | { | ||
2519 | if (factor == 10) | ||
2520 | return num; | ||
2521 | num *= factor; | ||
2522 | do_div(num, 10); | ||
2523 | return num; | ||
2524 | } | ||
2525 | |||
2526 | static int __btrfs_balance(struct btrfs_fs_info *fs_info) | 2771 | static int __btrfs_balance(struct btrfs_fs_info *fs_info) |
2527 | { | 2772 | { |
2528 | struct btrfs_balance_control *bctl = fs_info->balance_ctl; | 2773 | struct btrfs_balance_control *bctl = fs_info->balance_ctl; |
@@ -2550,7 +2795,8 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) | |||
2550 | size_to_free = div_factor(old_size, 1); | 2795 | size_to_free = div_factor(old_size, 1); |
2551 | size_to_free = min(size_to_free, (u64)1 * 1024 * 1024); | 2796 | size_to_free = min(size_to_free, (u64)1 * 1024 * 1024); |
2552 | if (!device->writeable || | 2797 | if (!device->writeable || |
2553 | device->total_bytes - device->bytes_used > size_to_free) | 2798 | device->total_bytes - device->bytes_used > size_to_free || |
2799 | device->is_tgtdev_for_dev_replace) | ||
2554 | continue; | 2800 | continue; |
2555 | 2801 | ||
2556 | ret = btrfs_shrink_device(device, old_size - size_to_free); | 2802 | ret = btrfs_shrink_device(device, old_size - size_to_free); |
@@ -2728,6 +2974,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl, | |||
2728 | u64 allowed; | 2974 | u64 allowed; |
2729 | int mixed = 0; | 2975 | int mixed = 0; |
2730 | int ret; | 2976 | int ret; |
2977 | u64 num_devices; | ||
2731 | 2978 | ||
2732 | if (btrfs_fs_closing(fs_info) || | 2979 | if (btrfs_fs_closing(fs_info) || |
2733 | atomic_read(&fs_info->balance_pause_req) || | 2980 | atomic_read(&fs_info->balance_pause_req) || |
@@ -2756,10 +3003,17 @@ int btrfs_balance(struct btrfs_balance_control *bctl, | |||
2756 | } | 3003 | } |
2757 | } | 3004 | } |
2758 | 3005 | ||
3006 | num_devices = fs_info->fs_devices->num_devices; | ||
3007 | btrfs_dev_replace_lock(&fs_info->dev_replace); | ||
3008 | if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) { | ||
3009 | BUG_ON(num_devices < 1); | ||
3010 | num_devices--; | ||
3011 | } | ||
3012 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | ||
2759 | allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; | 3013 | allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; |
2760 | if (fs_info->fs_devices->num_devices == 1) | 3014 | if (num_devices == 1) |
2761 | allowed |= BTRFS_BLOCK_GROUP_DUP; | 3015 | allowed |= BTRFS_BLOCK_GROUP_DUP; |
2762 | else if (fs_info->fs_devices->num_devices < 4) | 3016 | else if (num_devices < 4) |
2763 | allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1); | 3017 | allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1); |
2764 | else | 3018 | else |
2765 | allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | | 3019 | allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | |
@@ -2902,6 +3156,7 @@ static int balance_kthread(void *data) | |||
2902 | ret = btrfs_balance(fs_info->balance_ctl, NULL); | 3156 | ret = btrfs_balance(fs_info->balance_ctl, NULL); |
2903 | } | 3157 | } |
2904 | 3158 | ||
3159 | atomic_set(&fs_info->mutually_exclusive_operation_running, 0); | ||
2905 | mutex_unlock(&fs_info->balance_mutex); | 3160 | mutex_unlock(&fs_info->balance_mutex); |
2906 | mutex_unlock(&fs_info->volume_mutex); | 3161 | mutex_unlock(&fs_info->volume_mutex); |
2907 | 3162 | ||
@@ -2924,6 +3179,7 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info) | |||
2924 | return 0; | 3179 | return 0; |
2925 | } | 3180 | } |
2926 | 3181 | ||
3182 | WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)); | ||
2927 | tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); | 3183 | tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); |
2928 | if (IS_ERR(tsk)) | 3184 | if (IS_ERR(tsk)) |
2929 | return PTR_ERR(tsk); | 3185 | return PTR_ERR(tsk); |
@@ -3080,7 +3336,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
3080 | u64 old_size = device->total_bytes; | 3336 | u64 old_size = device->total_bytes; |
3081 | u64 diff = device->total_bytes - new_size; | 3337 | u64 diff = device->total_bytes - new_size; |
3082 | 3338 | ||
3083 | if (new_size >= device->total_bytes) | 3339 | if (device->is_tgtdev_for_dev_replace) |
3084 | return -EINVAL; | 3340 | return -EINVAL; |
3085 | 3341 | ||
3086 | path = btrfs_alloc_path(); | 3342 | path = btrfs_alloc_path(); |
@@ -3235,6 +3491,14 @@ static int btrfs_cmp_device_info(const void *a, const void *b) | |||
3235 | return 0; | 3491 | return 0; |
3236 | } | 3492 | } |
3237 | 3493 | ||
3494 | struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { | ||
3495 | { 2, 1, 0, 4, 2, 2 /* raid10 */ }, | ||
3496 | { 1, 1, 2, 2, 2, 2 /* raid1 */ }, | ||
3497 | { 1, 2, 1, 1, 1, 2 /* dup */ }, | ||
3498 | { 1, 1, 0, 2, 1, 1 /* raid0 */ }, | ||
3499 | { 1, 1, 0, 1, 1, 1 /* single */ }, | ||
3500 | }; | ||
3501 | |||
3238 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | 3502 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, |
3239 | struct btrfs_root *extent_root, | 3503 | struct btrfs_root *extent_root, |
3240 | struct map_lookup **map_ret, | 3504 | struct map_lookup **map_ret, |
@@ -3264,43 +3528,21 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
3264 | int ndevs; | 3528 | int ndevs; |
3265 | int i; | 3529 | int i; |
3266 | int j; | 3530 | int j; |
3531 | int index; | ||
3267 | 3532 | ||
3268 | BUG_ON(!alloc_profile_is_valid(type, 0)); | 3533 | BUG_ON(!alloc_profile_is_valid(type, 0)); |
3269 | 3534 | ||
3270 | if (list_empty(&fs_devices->alloc_list)) | 3535 | if (list_empty(&fs_devices->alloc_list)) |
3271 | return -ENOSPC; | 3536 | return -ENOSPC; |
3272 | 3537 | ||
3273 | sub_stripes = 1; | 3538 | index = __get_raid_index(type); |
3274 | dev_stripes = 1; | ||
3275 | devs_increment = 1; | ||
3276 | ncopies = 1; | ||
3277 | devs_max = 0; /* 0 == as many as possible */ | ||
3278 | devs_min = 1; | ||
3279 | 3539 | ||
3280 | /* | 3540 | sub_stripes = btrfs_raid_array[index].sub_stripes; |
3281 | * define the properties of each RAID type. | 3541 | dev_stripes = btrfs_raid_array[index].dev_stripes; |
3282 | * FIXME: move this to a global table and use it in all RAID | 3542 | devs_max = btrfs_raid_array[index].devs_max; |
3283 | * calculation code | 3543 | devs_min = btrfs_raid_array[index].devs_min; |
3284 | */ | 3544 | devs_increment = btrfs_raid_array[index].devs_increment; |
3285 | if (type & (BTRFS_BLOCK_GROUP_DUP)) { | 3545 | ncopies = btrfs_raid_array[index].ncopies; |
3286 | dev_stripes = 2; | ||
3287 | ncopies = 2; | ||
3288 | devs_max = 1; | ||
3289 | } else if (type & (BTRFS_BLOCK_GROUP_RAID0)) { | ||
3290 | devs_min = 2; | ||
3291 | } else if (type & (BTRFS_BLOCK_GROUP_RAID1)) { | ||
3292 | devs_increment = 2; | ||
3293 | ncopies = 2; | ||
3294 | devs_max = 2; | ||
3295 | devs_min = 2; | ||
3296 | } else if (type & (BTRFS_BLOCK_GROUP_RAID10)) { | ||
3297 | sub_stripes = 2; | ||
3298 | devs_increment = 2; | ||
3299 | ncopies = 2; | ||
3300 | devs_min = 4; | ||
3301 | } else { | ||
3302 | devs_max = 1; | ||
3303 | } | ||
3304 | 3546 | ||
3305 | if (type & BTRFS_BLOCK_GROUP_DATA) { | 3547 | if (type & BTRFS_BLOCK_GROUP_DATA) { |
3306 | max_stripe_size = 1024 * 1024 * 1024; | 3548 | max_stripe_size = 1024 * 1024 * 1024; |
@@ -3347,13 +3589,13 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
3347 | cur = cur->next; | 3589 | cur = cur->next; |
3348 | 3590 | ||
3349 | if (!device->writeable) { | 3591 | if (!device->writeable) { |
3350 | printk(KERN_ERR | 3592 | WARN(1, KERN_ERR |
3351 | "btrfs: read-only device in alloc_list\n"); | 3593 | "btrfs: read-only device in alloc_list\n"); |
3352 | WARN_ON(1); | ||
3353 | continue; | 3594 | continue; |
3354 | } | 3595 | } |
3355 | 3596 | ||
3356 | if (!device->in_fs_metadata) | 3597 | if (!device->in_fs_metadata || |
3598 | device->is_tgtdev_for_dev_replace) | ||
3357 | continue; | 3599 | continue; |
3358 | 3600 | ||
3359 | if (device->total_bytes > device->bytes_used) | 3601 | if (device->total_bytes > device->bytes_used) |
@@ -3382,6 +3624,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
3382 | devices_info[ndevs].total_avail = total_avail; | 3624 | devices_info[ndevs].total_avail = total_avail; |
3383 | devices_info[ndevs].dev = device; | 3625 | devices_info[ndevs].dev = device; |
3384 | ++ndevs; | 3626 | ++ndevs; |
3627 | WARN_ON(ndevs > fs_devices->rw_devices); | ||
3385 | } | 3628 | } |
3386 | 3629 | ||
3387 | /* | 3630 | /* |
@@ -3740,8 +3983,9 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) | |||
3740 | } | 3983 | } |
3741 | } | 3984 | } |
3742 | 3985 | ||
3743 | int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len) | 3986 | int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) |
3744 | { | 3987 | { |
3988 | struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; | ||
3745 | struct extent_map *em; | 3989 | struct extent_map *em; |
3746 | struct map_lookup *map; | 3990 | struct map_lookup *map; |
3747 | struct extent_map_tree *em_tree = &map_tree->map_tree; | 3991 | struct extent_map_tree *em_tree = &map_tree->map_tree; |
@@ -3761,32 +4005,60 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len) | |||
3761 | else | 4005 | else |
3762 | ret = 1; | 4006 | ret = 1; |
3763 | free_extent_map(em); | 4007 | free_extent_map(em); |
4008 | |||
4009 | btrfs_dev_replace_lock(&fs_info->dev_replace); | ||
4010 | if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) | ||
4011 | ret++; | ||
4012 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | ||
4013 | |||
3764 | return ret; | 4014 | return ret; |
3765 | } | 4015 | } |
3766 | 4016 | ||
3767 | static int find_live_mirror(struct map_lookup *map, int first, int num, | 4017 | static int find_live_mirror(struct btrfs_fs_info *fs_info, |
3768 | int optimal) | 4018 | struct map_lookup *map, int first, int num, |
4019 | int optimal, int dev_replace_is_ongoing) | ||
3769 | { | 4020 | { |
3770 | int i; | 4021 | int i; |
3771 | if (map->stripes[optimal].dev->bdev) | 4022 | int tolerance; |
3772 | return optimal; | 4023 | struct btrfs_device *srcdev; |
3773 | for (i = first; i < first + num; i++) { | 4024 | |
3774 | if (map->stripes[i].dev->bdev) | 4025 | if (dev_replace_is_ongoing && |
3775 | return i; | 4026 | fs_info->dev_replace.cont_reading_from_srcdev_mode == |
4027 | BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID) | ||
4028 | srcdev = fs_info->dev_replace.srcdev; | ||
4029 | else | ||
4030 | srcdev = NULL; | ||
4031 | |||
4032 | /* | ||
4033 | * try to avoid the drive that is the source drive for a | ||
4034 | * dev-replace procedure, only choose it if no other non-missing | ||
4035 | * mirror is available | ||
4036 | */ | ||
4037 | for (tolerance = 0; tolerance < 2; tolerance++) { | ||
4038 | if (map->stripes[optimal].dev->bdev && | ||
4039 | (tolerance || map->stripes[optimal].dev != srcdev)) | ||
4040 | return optimal; | ||
4041 | for (i = first; i < first + num; i++) { | ||
4042 | if (map->stripes[i].dev->bdev && | ||
4043 | (tolerance || map->stripes[i].dev != srcdev)) | ||
4044 | return i; | ||
4045 | } | ||
3776 | } | 4046 | } |
4047 | |||
3777 | /* we couldn't find one that doesn't fail. Just return something | 4048 | /* we couldn't find one that doesn't fail. Just return something |
3778 | * and the io error handling code will clean up eventually | 4049 | * and the io error handling code will clean up eventually |
3779 | */ | 4050 | */ |
3780 | return optimal; | 4051 | return optimal; |
3781 | } | 4052 | } |
3782 | 4053 | ||
3783 | static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | 4054 | static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, |
3784 | u64 logical, u64 *length, | 4055 | u64 logical, u64 *length, |
3785 | struct btrfs_bio **bbio_ret, | 4056 | struct btrfs_bio **bbio_ret, |
3786 | int mirror_num) | 4057 | int mirror_num) |
3787 | { | 4058 | { |
3788 | struct extent_map *em; | 4059 | struct extent_map *em; |
3789 | struct map_lookup *map; | 4060 | struct map_lookup *map; |
4061 | struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; | ||
3790 | struct extent_map_tree *em_tree = &map_tree->map_tree; | 4062 | struct extent_map_tree *em_tree = &map_tree->map_tree; |
3791 | u64 offset; | 4063 | u64 offset; |
3792 | u64 stripe_offset; | 4064 | u64 stripe_offset; |
@@ -3800,6 +4072,11 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
3800 | int num_stripes; | 4072 | int num_stripes; |
3801 | int max_errors = 0; | 4073 | int max_errors = 0; |
3802 | struct btrfs_bio *bbio = NULL; | 4074 | struct btrfs_bio *bbio = NULL; |
4075 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
4076 | int dev_replace_is_ongoing = 0; | ||
4077 | int num_alloc_stripes; | ||
4078 | int patch_the_first_stripe_for_dev_replace = 0; | ||
4079 | u64 physical_to_patch_in_first_stripe = 0; | ||
3803 | 4080 | ||
3804 | read_lock(&em_tree->lock); | 4081 | read_lock(&em_tree->lock); |
3805 | em = lookup_extent_mapping(em_tree, logical, *length); | 4082 | em = lookup_extent_mapping(em_tree, logical, *length); |
@@ -3816,9 +4093,6 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
3816 | map = (struct map_lookup *)em->bdev; | 4093 | map = (struct map_lookup *)em->bdev; |
3817 | offset = logical - em->start; | 4094 | offset = logical - em->start; |
3818 | 4095 | ||
3819 | if (mirror_num > map->num_stripes) | ||
3820 | mirror_num = 0; | ||
3821 | |||
3822 | stripe_nr = offset; | 4096 | stripe_nr = offset; |
3823 | /* | 4097 | /* |
3824 | * stripe_nr counts the total number of stripes we have to stride | 4098 | * stripe_nr counts the total number of stripes we have to stride |
@@ -3845,6 +4119,93 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
3845 | if (!bbio_ret) | 4119 | if (!bbio_ret) |
3846 | goto out; | 4120 | goto out; |
3847 | 4121 | ||
4122 | btrfs_dev_replace_lock(dev_replace); | ||
4123 | dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace); | ||
4124 | if (!dev_replace_is_ongoing) | ||
4125 | btrfs_dev_replace_unlock(dev_replace); | ||
4126 | |||
4127 | if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 && | ||
4128 | !(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) && | ||
4129 | dev_replace->tgtdev != NULL) { | ||
4130 | /* | ||
4131 | * in dev-replace case, for repair case (that's the only | ||
4132 | * case where the mirror is selected explicitly when | ||
4133 | * calling btrfs_map_block), blocks left of the left cursor | ||
4134 | * can also be read from the target drive. | ||
4135 | * For REQ_GET_READ_MIRRORS, the target drive is added as | ||
4136 | * the last one to the array of stripes. For READ, it also | ||
4137 | * needs to be supported using the same mirror number. | ||
4138 | * If the requested block is not left of the left cursor, | ||
4139 | * EIO is returned. This can happen because btrfs_num_copies() | ||
4140 | * returns one more in the dev-replace case. | ||
4141 | */ | ||
4142 | u64 tmp_length = *length; | ||
4143 | struct btrfs_bio *tmp_bbio = NULL; | ||
4144 | int tmp_num_stripes; | ||
4145 | u64 srcdev_devid = dev_replace->srcdev->devid; | ||
4146 | int index_srcdev = 0; | ||
4147 | int found = 0; | ||
4148 | u64 physical_of_found = 0; | ||
4149 | |||
4150 | ret = __btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, | ||
4151 | logical, &tmp_length, &tmp_bbio, 0); | ||
4152 | if (ret) { | ||
4153 | WARN_ON(tmp_bbio != NULL); | ||
4154 | goto out; | ||
4155 | } | ||
4156 | |||
4157 | tmp_num_stripes = tmp_bbio->num_stripes; | ||
4158 | if (mirror_num > tmp_num_stripes) { | ||
4159 | /* | ||
4160 | * REQ_GET_READ_MIRRORS does not contain this | ||
4161 | * mirror, that means that the requested area | ||
4162 | * is not left of the left cursor | ||
4163 | */ | ||
4164 | ret = -EIO; | ||
4165 | kfree(tmp_bbio); | ||
4166 | goto out; | ||
4167 | } | ||
4168 | |||
4169 | /* | ||
4170 | * process the rest of the function using the mirror_num | ||
4171 | * of the source drive. Therefore look it up first. | ||
4172 | * At the end, patch the device pointer to the one of the | ||
4173 | * target drive. | ||
4174 | */ | ||
4175 | for (i = 0; i < tmp_num_stripes; i++) { | ||
4176 | if (tmp_bbio->stripes[i].dev->devid == srcdev_devid) { | ||
4177 | /* | ||
4178 | * In case of DUP, in order to keep it | ||
4179 | * simple, only add the mirror with the | ||
4180 | * lowest physical address | ||
4181 | */ | ||
4182 | if (found && | ||
4183 | physical_of_found <= | ||
4184 | tmp_bbio->stripes[i].physical) | ||
4185 | continue; | ||
4186 | index_srcdev = i; | ||
4187 | found = 1; | ||
4188 | physical_of_found = | ||
4189 | tmp_bbio->stripes[i].physical; | ||
4190 | } | ||
4191 | } | ||
4192 | |||
4193 | if (found) { | ||
4194 | mirror_num = index_srcdev + 1; | ||
4195 | patch_the_first_stripe_for_dev_replace = 1; | ||
4196 | physical_to_patch_in_first_stripe = physical_of_found; | ||
4197 | } else { | ||
4198 | WARN_ON(1); | ||
4199 | ret = -EIO; | ||
4200 | kfree(tmp_bbio); | ||
4201 | goto out; | ||
4202 | } | ||
4203 | |||
4204 | kfree(tmp_bbio); | ||
4205 | } else if (mirror_num > map->num_stripes) { | ||
4206 | mirror_num = 0; | ||
4207 | } | ||
4208 | |||
3848 | num_stripes = 1; | 4209 | num_stripes = 1; |
3849 | stripe_index = 0; | 4210 | stripe_index = 0; |
3850 | stripe_nr_orig = stripe_nr; | 4211 | stripe_nr_orig = stripe_nr; |
@@ -3859,19 +4220,20 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
3859 | stripe_nr_end - stripe_nr_orig); | 4220 | stripe_nr_end - stripe_nr_orig); |
3860 | stripe_index = do_div(stripe_nr, map->num_stripes); | 4221 | stripe_index = do_div(stripe_nr, map->num_stripes); |
3861 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { | 4222 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { |
3862 | if (rw & (REQ_WRITE | REQ_DISCARD)) | 4223 | if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) |
3863 | num_stripes = map->num_stripes; | 4224 | num_stripes = map->num_stripes; |
3864 | else if (mirror_num) | 4225 | else if (mirror_num) |
3865 | stripe_index = mirror_num - 1; | 4226 | stripe_index = mirror_num - 1; |
3866 | else { | 4227 | else { |
3867 | stripe_index = find_live_mirror(map, 0, | 4228 | stripe_index = find_live_mirror(fs_info, map, 0, |
3868 | map->num_stripes, | 4229 | map->num_stripes, |
3869 | current->pid % map->num_stripes); | 4230 | current->pid % map->num_stripes, |
4231 | dev_replace_is_ongoing); | ||
3870 | mirror_num = stripe_index + 1; | 4232 | mirror_num = stripe_index + 1; |
3871 | } | 4233 | } |
3872 | 4234 | ||
3873 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { | 4235 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { |
3874 | if (rw & (REQ_WRITE | REQ_DISCARD)) { | 4236 | if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) { |
3875 | num_stripes = map->num_stripes; | 4237 | num_stripes = map->num_stripes; |
3876 | } else if (mirror_num) { | 4238 | } else if (mirror_num) { |
3877 | stripe_index = mirror_num - 1; | 4239 | stripe_index = mirror_num - 1; |
@@ -3885,7 +4247,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
3885 | stripe_index = do_div(stripe_nr, factor); | 4247 | stripe_index = do_div(stripe_nr, factor); |
3886 | stripe_index *= map->sub_stripes; | 4248 | stripe_index *= map->sub_stripes; |
3887 | 4249 | ||
3888 | if (rw & REQ_WRITE) | 4250 | if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) |
3889 | num_stripes = map->sub_stripes; | 4251 | num_stripes = map->sub_stripes; |
3890 | else if (rw & REQ_DISCARD) | 4252 | else if (rw & REQ_DISCARD) |
3891 | num_stripes = min_t(u64, map->sub_stripes * | 4253 | num_stripes = min_t(u64, map->sub_stripes * |
@@ -3895,9 +4257,11 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
3895 | stripe_index += mirror_num - 1; | 4257 | stripe_index += mirror_num - 1; |
3896 | else { | 4258 | else { |
3897 | int old_stripe_index = stripe_index; | 4259 | int old_stripe_index = stripe_index; |
3898 | stripe_index = find_live_mirror(map, stripe_index, | 4260 | stripe_index = find_live_mirror(fs_info, map, |
4261 | stripe_index, | ||
3899 | map->sub_stripes, stripe_index + | 4262 | map->sub_stripes, stripe_index + |
3900 | current->pid % map->sub_stripes); | 4263 | current->pid % map->sub_stripes, |
4264 | dev_replace_is_ongoing); | ||
3901 | mirror_num = stripe_index - old_stripe_index + 1; | 4265 | mirror_num = stripe_index - old_stripe_index + 1; |
3902 | } | 4266 | } |
3903 | } else { | 4267 | } else { |
@@ -3911,7 +4275,14 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
3911 | } | 4275 | } |
3912 | BUG_ON(stripe_index >= map->num_stripes); | 4276 | BUG_ON(stripe_index >= map->num_stripes); |
3913 | 4277 | ||
3914 | bbio = kzalloc(btrfs_bio_size(num_stripes), GFP_NOFS); | 4278 | num_alloc_stripes = num_stripes; |
4279 | if (dev_replace_is_ongoing) { | ||
4280 | if (rw & (REQ_WRITE | REQ_DISCARD)) | ||
4281 | num_alloc_stripes <<= 1; | ||
4282 | if (rw & REQ_GET_READ_MIRRORS) | ||
4283 | num_alloc_stripes++; | ||
4284 | } | ||
4285 | bbio = kzalloc(btrfs_bio_size(num_alloc_stripes), GFP_NOFS); | ||
3915 | if (!bbio) { | 4286 | if (!bbio) { |
3916 | ret = -ENOMEM; | 4287 | ret = -ENOMEM; |
3917 | goto out; | 4288 | goto out; |
@@ -3998,7 +4369,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
3998 | } | 4369 | } |
3999 | } | 4370 | } |
4000 | 4371 | ||
4001 | if (rw & REQ_WRITE) { | 4372 | if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) { |
4002 | if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | | 4373 | if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | |
4003 | BTRFS_BLOCK_GROUP_RAID10 | | 4374 | BTRFS_BLOCK_GROUP_RAID10 | |
4004 | BTRFS_BLOCK_GROUP_DUP)) { | 4375 | BTRFS_BLOCK_GROUP_DUP)) { |
@@ -4006,20 +4377,115 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
4006 | } | 4377 | } |
4007 | } | 4378 | } |
4008 | 4379 | ||
4380 | if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) && | ||
4381 | dev_replace->tgtdev != NULL) { | ||
4382 | int index_where_to_add; | ||
4383 | u64 srcdev_devid = dev_replace->srcdev->devid; | ||
4384 | |||
4385 | /* | ||
4386 | * duplicate the write operations while the dev replace | ||
4387 | * procedure is running. Since the copying of the old disk | ||
4388 | * to the new disk takes place at run time while the | ||
4389 | * filesystem is mounted writable, the regular write | ||
4390 | * operations to the old disk have to be duplicated to go | ||
4391 | * to the new disk as well. | ||
4392 | * Note that device->missing is handled by the caller, and | ||
4393 | * that the write to the old disk is already set up in the | ||
4394 | * stripes array. | ||
4395 | */ | ||
4396 | index_where_to_add = num_stripes; | ||
4397 | for (i = 0; i < num_stripes; i++) { | ||
4398 | if (bbio->stripes[i].dev->devid == srcdev_devid) { | ||
4399 | /* write to new disk, too */ | ||
4400 | struct btrfs_bio_stripe *new = | ||
4401 | bbio->stripes + index_where_to_add; | ||
4402 | struct btrfs_bio_stripe *old = | ||
4403 | bbio->stripes + i; | ||
4404 | |||
4405 | new->physical = old->physical; | ||
4406 | new->length = old->length; | ||
4407 | new->dev = dev_replace->tgtdev; | ||
4408 | index_where_to_add++; | ||
4409 | max_errors++; | ||
4410 | } | ||
4411 | } | ||
4412 | num_stripes = index_where_to_add; | ||
4413 | } else if (dev_replace_is_ongoing && (rw & REQ_GET_READ_MIRRORS) && | ||
4414 | dev_replace->tgtdev != NULL) { | ||
4415 | u64 srcdev_devid = dev_replace->srcdev->devid; | ||
4416 | int index_srcdev = 0; | ||
4417 | int found = 0; | ||
4418 | u64 physical_of_found = 0; | ||
4419 | |||
4420 | /* | ||
4421 | * During the dev-replace procedure, the target drive can | ||
4422 | * also be used to read data in case it is needed to repair | ||
4423 | * a corrupt block elsewhere. This is possible if the | ||
4424 | * requested area is left of the left cursor. In this area, | ||
4425 | * the target drive is a full copy of the source drive. | ||
4426 | */ | ||
4427 | for (i = 0; i < num_stripes; i++) { | ||
4428 | if (bbio->stripes[i].dev->devid == srcdev_devid) { | ||
4429 | /* | ||
4430 | * In case of DUP, in order to keep it | ||
4431 | * simple, only add the mirror with the | ||
4432 | * lowest physical address | ||
4433 | */ | ||
4434 | if (found && | ||
4435 | physical_of_found <= | ||
4436 | bbio->stripes[i].physical) | ||
4437 | continue; | ||
4438 | index_srcdev = i; | ||
4439 | found = 1; | ||
4440 | physical_of_found = bbio->stripes[i].physical; | ||
4441 | } | ||
4442 | } | ||
4443 | if (found) { | ||
4444 | u64 length = map->stripe_len; | ||
4445 | |||
4446 | if (physical_of_found + length <= | ||
4447 | dev_replace->cursor_left) { | ||
4448 | struct btrfs_bio_stripe *tgtdev_stripe = | ||
4449 | bbio->stripes + num_stripes; | ||
4450 | |||
4451 | tgtdev_stripe->physical = physical_of_found; | ||
4452 | tgtdev_stripe->length = | ||
4453 | bbio->stripes[index_srcdev].length; | ||
4454 | tgtdev_stripe->dev = dev_replace->tgtdev; | ||
4455 | |||
4456 | num_stripes++; | ||
4457 | } | ||
4458 | } | ||
4459 | } | ||
4460 | |||
4009 | *bbio_ret = bbio; | 4461 | *bbio_ret = bbio; |
4010 | bbio->num_stripes = num_stripes; | 4462 | bbio->num_stripes = num_stripes; |
4011 | bbio->max_errors = max_errors; | 4463 | bbio->max_errors = max_errors; |
4012 | bbio->mirror_num = mirror_num; | 4464 | bbio->mirror_num = mirror_num; |
4465 | |||
4466 | /* | ||
4467 | * this is the case that REQ_READ && dev_replace_is_ongoing && | ||
4468 | * mirror_num == num_stripes + 1 && dev_replace target drive is | ||
4469 | * available as a mirror | ||
4470 | */ | ||
4471 | if (patch_the_first_stripe_for_dev_replace && num_stripes > 0) { | ||
4472 | WARN_ON(num_stripes > 1); | ||
4473 | bbio->stripes[0].dev = dev_replace->tgtdev; | ||
4474 | bbio->stripes[0].physical = physical_to_patch_in_first_stripe; | ||
4475 | bbio->mirror_num = map->num_stripes + 1; | ||
4476 | } | ||
4013 | out: | 4477 | out: |
4478 | if (dev_replace_is_ongoing) | ||
4479 | btrfs_dev_replace_unlock(dev_replace); | ||
4014 | free_extent_map(em); | 4480 | free_extent_map(em); |
4015 | return ret; | 4481 | return ret; |
4016 | } | 4482 | } |
4017 | 4483 | ||
4018 | int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | 4484 | int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, |
4019 | u64 logical, u64 *length, | 4485 | u64 logical, u64 *length, |
4020 | struct btrfs_bio **bbio_ret, int mirror_num) | 4486 | struct btrfs_bio **bbio_ret, int mirror_num) |
4021 | { | 4487 | { |
4022 | return __btrfs_map_block(map_tree, rw, logical, length, bbio_ret, | 4488 | return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret, |
4023 | mirror_num); | 4489 | mirror_num); |
4024 | } | 4490 | } |
4025 | 4491 | ||
@@ -4238,10 +4704,116 @@ static noinline void schedule_bio(struct btrfs_root *root, | |||
4238 | &device->work); | 4704 | &device->work); |
4239 | } | 4705 | } |
4240 | 4706 | ||
4707 | static int bio_size_ok(struct block_device *bdev, struct bio *bio, | ||
4708 | sector_t sector) | ||
4709 | { | ||
4710 | struct bio_vec *prev; | ||
4711 | struct request_queue *q = bdev_get_queue(bdev); | ||
4712 | unsigned short max_sectors = queue_max_sectors(q); | ||
4713 | struct bvec_merge_data bvm = { | ||
4714 | .bi_bdev = bdev, | ||
4715 | .bi_sector = sector, | ||
4716 | .bi_rw = bio->bi_rw, | ||
4717 | }; | ||
4718 | |||
4719 | if (bio->bi_vcnt == 0) { | ||
4720 | WARN_ON(1); | ||
4721 | return 1; | ||
4722 | } | ||
4723 | |||
4724 | prev = &bio->bi_io_vec[bio->bi_vcnt - 1]; | ||
4725 | if ((bio->bi_size >> 9) > max_sectors) | ||
4726 | return 0; | ||
4727 | |||
4728 | if (!q->merge_bvec_fn) | ||
4729 | return 1; | ||
4730 | |||
4731 | bvm.bi_size = bio->bi_size - prev->bv_len; | ||
4732 | if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) | ||
4733 | return 0; | ||
4734 | return 1; | ||
4735 | } | ||
4736 | |||
4737 | static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, | ||
4738 | struct bio *bio, u64 physical, int dev_nr, | ||
4739 | int rw, int async) | ||
4740 | { | ||
4741 | struct btrfs_device *dev = bbio->stripes[dev_nr].dev; | ||
4742 | |||
4743 | bio->bi_private = bbio; | ||
4744 | bio->bi_private = merge_stripe_index_into_bio_private( | ||
4745 | bio->bi_private, (unsigned int)dev_nr); | ||
4746 | bio->bi_end_io = btrfs_end_bio; | ||
4747 | bio->bi_sector = physical >> 9; | ||
4748 | #ifdef DEBUG | ||
4749 | { | ||
4750 | struct rcu_string *name; | ||
4751 | |||
4752 | rcu_read_lock(); | ||
4753 | name = rcu_dereference(dev->name); | ||
4754 | pr_debug("btrfs_map_bio: rw %d, sector=%llu, dev=%lu " | ||
4755 | "(%s id %llu), size=%u\n", rw, | ||
4756 | (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev, | ||
4757 | name->str, dev->devid, bio->bi_size); | ||
4758 | rcu_read_unlock(); | ||
4759 | } | ||
4760 | #endif | ||
4761 | bio->bi_bdev = dev->bdev; | ||
4762 | if (async) | ||
4763 | schedule_bio(root, dev, rw, bio); | ||
4764 | else | ||
4765 | btrfsic_submit_bio(rw, bio); | ||
4766 | } | ||
4767 | |||
4768 | static int breakup_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, | ||
4769 | struct bio *first_bio, struct btrfs_device *dev, | ||
4770 | int dev_nr, int rw, int async) | ||
4771 | { | ||
4772 | struct bio_vec *bvec = first_bio->bi_io_vec; | ||
4773 | struct bio *bio; | ||
4774 | int nr_vecs = bio_get_nr_vecs(dev->bdev); | ||
4775 | u64 physical = bbio->stripes[dev_nr].physical; | ||
4776 | |||
4777 | again: | ||
4778 | bio = btrfs_bio_alloc(dev->bdev, physical >> 9, nr_vecs, GFP_NOFS); | ||
4779 | if (!bio) | ||
4780 | return -ENOMEM; | ||
4781 | |||
4782 | while (bvec <= (first_bio->bi_io_vec + first_bio->bi_vcnt - 1)) { | ||
4783 | if (bio_add_page(bio, bvec->bv_page, bvec->bv_len, | ||
4784 | bvec->bv_offset) < bvec->bv_len) { | ||
4785 | u64 len = bio->bi_size; | ||
4786 | |||
4787 | atomic_inc(&bbio->stripes_pending); | ||
4788 | submit_stripe_bio(root, bbio, bio, physical, dev_nr, | ||
4789 | rw, async); | ||
4790 | physical += len; | ||
4791 | goto again; | ||
4792 | } | ||
4793 | bvec++; | ||
4794 | } | ||
4795 | |||
4796 | submit_stripe_bio(root, bbio, bio, physical, dev_nr, rw, async); | ||
4797 | return 0; | ||
4798 | } | ||
4799 | |||
4800 | static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical) | ||
4801 | { | ||
4802 | atomic_inc(&bbio->error); | ||
4803 | if (atomic_dec_and_test(&bbio->stripes_pending)) { | ||
4804 | bio->bi_private = bbio->private; | ||
4805 | bio->bi_end_io = bbio->end_io; | ||
4806 | bio->bi_bdev = (struct block_device *) | ||
4807 | (unsigned long)bbio->mirror_num; | ||
4808 | bio->bi_sector = logical >> 9; | ||
4809 | kfree(bbio); | ||
4810 | bio_endio(bio, -EIO); | ||
4811 | } | ||
4812 | } | ||
4813 | |||
4241 | int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | 4814 | int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, |
4242 | int mirror_num, int async_submit) | 4815 | int mirror_num, int async_submit) |
4243 | { | 4816 | { |
4244 | struct btrfs_mapping_tree *map_tree; | ||
4245 | struct btrfs_device *dev; | 4817 | struct btrfs_device *dev; |
4246 | struct bio *first_bio = bio; | 4818 | struct bio *first_bio = bio; |
4247 | u64 logical = (u64)bio->bi_sector << 9; | 4819 | u64 logical = (u64)bio->bi_sector << 9; |
@@ -4253,12 +4825,11 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
4253 | struct btrfs_bio *bbio = NULL; | 4825 | struct btrfs_bio *bbio = NULL; |
4254 | 4826 | ||
4255 | length = bio->bi_size; | 4827 | length = bio->bi_size; |
4256 | map_tree = &root->fs_info->mapping_tree; | ||
4257 | map_length = length; | 4828 | map_length = length; |
4258 | 4829 | ||
4259 | ret = btrfs_map_block(map_tree, rw, logical, &map_length, &bbio, | 4830 | ret = btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio, |
4260 | mirror_num); | 4831 | mirror_num); |
4261 | if (ret) /* -ENOMEM */ | 4832 | if (ret) |
4262 | return ret; | 4833 | return ret; |
4263 | 4834 | ||
4264 | total_devs = bbio->num_stripes; | 4835 | total_devs = bbio->num_stripes; |
@@ -4276,52 +4847,48 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
4276 | atomic_set(&bbio->stripes_pending, bbio->num_stripes); | 4847 | atomic_set(&bbio->stripes_pending, bbio->num_stripes); |
4277 | 4848 | ||
4278 | while (dev_nr < total_devs) { | 4849 | while (dev_nr < total_devs) { |
4850 | dev = bbio->stripes[dev_nr].dev; | ||
4851 | if (!dev || !dev->bdev || (rw & WRITE && !dev->writeable)) { | ||
4852 | bbio_error(bbio, first_bio, logical); | ||
4853 | dev_nr++; | ||
4854 | continue; | ||
4855 | } | ||
4856 | |||
4857 | /* | ||
4858 | * Check and see if we're ok with this bio based on it's size | ||
4859 | * and offset with the given device. | ||
4860 | */ | ||
4861 | if (!bio_size_ok(dev->bdev, first_bio, | ||
4862 | bbio->stripes[dev_nr].physical >> 9)) { | ||
4863 | ret = breakup_stripe_bio(root, bbio, first_bio, dev, | ||
4864 | dev_nr, rw, async_submit); | ||
4865 | BUG_ON(ret); | ||
4866 | dev_nr++; | ||
4867 | continue; | ||
4868 | } | ||
4869 | |||
4279 | if (dev_nr < total_devs - 1) { | 4870 | if (dev_nr < total_devs - 1) { |
4280 | bio = bio_clone(first_bio, GFP_NOFS); | 4871 | bio = bio_clone(first_bio, GFP_NOFS); |
4281 | BUG_ON(!bio); /* -ENOMEM */ | 4872 | BUG_ON(!bio); /* -ENOMEM */ |
4282 | } else { | 4873 | } else { |
4283 | bio = first_bio; | 4874 | bio = first_bio; |
4284 | } | 4875 | } |
4285 | bio->bi_private = bbio; | 4876 | |
4286 | bio->bi_private = merge_stripe_index_into_bio_private( | 4877 | submit_stripe_bio(root, bbio, bio, |
4287 | bio->bi_private, (unsigned int)dev_nr); | 4878 | bbio->stripes[dev_nr].physical, dev_nr, rw, |
4288 | bio->bi_end_io = btrfs_end_bio; | 4879 | async_submit); |
4289 | bio->bi_sector = bbio->stripes[dev_nr].physical >> 9; | ||
4290 | dev = bbio->stripes[dev_nr].dev; | ||
4291 | if (dev && dev->bdev && (rw != WRITE || dev->writeable)) { | ||
4292 | #ifdef DEBUG | ||
4293 | struct rcu_string *name; | ||
4294 | |||
4295 | rcu_read_lock(); | ||
4296 | name = rcu_dereference(dev->name); | ||
4297 | pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu " | ||
4298 | "(%s id %llu), size=%u\n", rw, | ||
4299 | (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev, | ||
4300 | name->str, dev->devid, bio->bi_size); | ||
4301 | rcu_read_unlock(); | ||
4302 | #endif | ||
4303 | bio->bi_bdev = dev->bdev; | ||
4304 | if (async_submit) | ||
4305 | schedule_bio(root, dev, rw, bio); | ||
4306 | else | ||
4307 | btrfsic_submit_bio(rw, bio); | ||
4308 | } else { | ||
4309 | bio->bi_bdev = root->fs_info->fs_devices->latest_bdev; | ||
4310 | bio->bi_sector = logical >> 9; | ||
4311 | bio_endio(bio, -EIO); | ||
4312 | } | ||
4313 | dev_nr++; | 4880 | dev_nr++; |
4314 | } | 4881 | } |
4315 | return 0; | 4882 | return 0; |
4316 | } | 4883 | } |
4317 | 4884 | ||
4318 | struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, | 4885 | struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid, |
4319 | u8 *uuid, u8 *fsid) | 4886 | u8 *uuid, u8 *fsid) |
4320 | { | 4887 | { |
4321 | struct btrfs_device *device; | 4888 | struct btrfs_device *device; |
4322 | struct btrfs_fs_devices *cur_devices; | 4889 | struct btrfs_fs_devices *cur_devices; |
4323 | 4890 | ||
4324 | cur_devices = root->fs_info->fs_devices; | 4891 | cur_devices = fs_info->fs_devices; |
4325 | while (cur_devices) { | 4892 | while (cur_devices) { |
4326 | if (!fsid || | 4893 | if (!fsid || |
4327 | !memcmp(cur_devices->fsid, fsid, BTRFS_UUID_SIZE)) { | 4894 | !memcmp(cur_devices->fsid, fsid, BTRFS_UUID_SIZE)) { |
@@ -4402,6 +4969,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
4402 | em->bdev = (struct block_device *)map; | 4969 | em->bdev = (struct block_device *)map; |
4403 | em->start = logical; | 4970 | em->start = logical; |
4404 | em->len = length; | 4971 | em->len = length; |
4972 | em->orig_start = 0; | ||
4405 | em->block_start = 0; | 4973 | em->block_start = 0; |
4406 | em->block_len = em->len; | 4974 | em->block_len = em->len; |
4407 | 4975 | ||
@@ -4419,8 +4987,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
4419 | read_extent_buffer(leaf, uuid, (unsigned long) | 4987 | read_extent_buffer(leaf, uuid, (unsigned long) |
4420 | btrfs_stripe_dev_uuid_nr(chunk, i), | 4988 | btrfs_stripe_dev_uuid_nr(chunk, i), |
4421 | BTRFS_UUID_SIZE); | 4989 | BTRFS_UUID_SIZE); |
4422 | map->stripes[i].dev = btrfs_find_device(root, devid, uuid, | 4990 | map->stripes[i].dev = btrfs_find_device(root->fs_info, devid, |
4423 | NULL); | 4991 | uuid, NULL); |
4424 | if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) { | 4992 | if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) { |
4425 | kfree(map); | 4993 | kfree(map); |
4426 | free_extent_map(em); | 4994 | free_extent_map(em); |
@@ -4461,6 +5029,8 @@ static void fill_device_from_item(struct extent_buffer *leaf, | |||
4461 | device->io_align = btrfs_device_io_align(leaf, dev_item); | 5029 | device->io_align = btrfs_device_io_align(leaf, dev_item); |
4462 | device->io_width = btrfs_device_io_width(leaf, dev_item); | 5030 | device->io_width = btrfs_device_io_width(leaf, dev_item); |
4463 | device->sector_size = btrfs_device_sector_size(leaf, dev_item); | 5031 | device->sector_size = btrfs_device_sector_size(leaf, dev_item); |
5032 | WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID); | ||
5033 | device->is_tgtdev_for_dev_replace = 0; | ||
4464 | 5034 | ||
4465 | ptr = (unsigned long)btrfs_device_uuid(dev_item); | 5035 | ptr = (unsigned long)btrfs_device_uuid(dev_item); |
4466 | read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); | 5036 | read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); |
@@ -4538,7 +5108,7 @@ static int read_one_dev(struct btrfs_root *root, | |||
4538 | return ret; | 5108 | return ret; |
4539 | } | 5109 | } |
4540 | 5110 | ||
4541 | device = btrfs_find_device(root, devid, dev_uuid, fs_uuid); | 5111 | device = btrfs_find_device(root->fs_info, devid, dev_uuid, fs_uuid); |
4542 | if (!device || !device->bdev) { | 5112 | if (!device || !device->bdev) { |
4543 | if (!btrfs_test_opt(root, DEGRADED)) | 5113 | if (!btrfs_test_opt(root, DEGRADED)) |
4544 | return -EIO; | 5114 | return -EIO; |
@@ -4571,7 +5141,7 @@ static int read_one_dev(struct btrfs_root *root, | |||
4571 | fill_device_from_item(leaf, dev_item, device); | 5141 | fill_device_from_item(leaf, dev_item, device); |
4572 | device->dev_root = root->fs_info->dev_root; | 5142 | device->dev_root = root->fs_info->dev_root; |
4573 | device->in_fs_metadata = 1; | 5143 | device->in_fs_metadata = 1; |
4574 | if (device->writeable) { | 5144 | if (device->writeable && !device->is_tgtdev_for_dev_replace) { |
4575 | device->fs_devices->total_rw_bytes += device->total_bytes; | 5145 | device->fs_devices->total_rw_bytes += device->total_bytes; |
4576 | spin_lock(&root->fs_info->free_chunk_lock); | 5146 | spin_lock(&root->fs_info->free_chunk_lock); |
4577 | root->fs_info->free_chunk_space += device->total_bytes - | 5147 | root->fs_info->free_chunk_space += device->total_bytes - |
@@ -4930,7 +5500,7 @@ int btrfs_get_dev_stats(struct btrfs_root *root, | |||
4930 | int i; | 5500 | int i; |
4931 | 5501 | ||
4932 | mutex_lock(&fs_devices->device_list_mutex); | 5502 | mutex_lock(&fs_devices->device_list_mutex); |
4933 | dev = btrfs_find_device(root, stats->devid, NULL, NULL); | 5503 | dev = btrfs_find_device(root->fs_info, stats->devid, NULL, NULL); |
4934 | mutex_unlock(&fs_devices->device_list_mutex); | 5504 | mutex_unlock(&fs_devices->device_list_mutex); |
4935 | 5505 | ||
4936 | if (!dev) { | 5506 | if (!dev) { |
@@ -4958,3 +5528,21 @@ int btrfs_get_dev_stats(struct btrfs_root *root, | |||
4958 | stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX; | 5528 | stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX; |
4959 | return 0; | 5529 | return 0; |
4960 | } | 5530 | } |
5531 | |||
5532 | int btrfs_scratch_superblock(struct btrfs_device *device) | ||
5533 | { | ||
5534 | struct buffer_head *bh; | ||
5535 | struct btrfs_super_block *disk_super; | ||
5536 | |||
5537 | bh = btrfs_read_dev_super(device->bdev); | ||
5538 | if (!bh) | ||
5539 | return -EINVAL; | ||
5540 | disk_super = (struct btrfs_super_block *)bh->b_data; | ||
5541 | |||
5542 | memset(&disk_super->magic, 0, sizeof(disk_super->magic)); | ||
5543 | set_buffer_dirty(bh); | ||
5544 | sync_dirty_buffer(bh); | ||
5545 | brelse(bh); | ||
5546 | |||
5547 | return 0; | ||
5548 | } | ||