aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/volumes.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r--fs/btrfs/volumes.c966
1 files changed, 777 insertions, 189 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0f5ebb72a5ea..5cce6aa74012 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -25,7 +25,6 @@
25#include <linux/capability.h> 25#include <linux/capability.h>
26#include <linux/ratelimit.h> 26#include <linux/ratelimit.h>
27#include <linux/kthread.h> 27#include <linux/kthread.h>
28#include <asm/div64.h>
29#include "compat.h" 28#include "compat.h"
30#include "ctree.h" 29#include "ctree.h"
31#include "extent_map.h" 30#include "extent_map.h"
@@ -36,6 +35,8 @@
36#include "async-thread.h" 35#include "async-thread.h"
37#include "check-integrity.h" 36#include "check-integrity.h"
38#include "rcu-string.h" 37#include "rcu-string.h"
38#include "math.h"
39#include "dev-replace.h"
39 40
40static int init_first_rw_device(struct btrfs_trans_handle *trans, 41static int init_first_rw_device(struct btrfs_trans_handle *trans,
41 struct btrfs_root *root, 42 struct btrfs_root *root,
@@ -71,6 +72,19 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
71 kfree(fs_devices); 72 kfree(fs_devices);
72} 73}
73 74
75static void btrfs_kobject_uevent(struct block_device *bdev,
76 enum kobject_action action)
77{
78 int ret;
79
80 ret = kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, action);
81 if (ret)
82 pr_warn("Sending event '%d' to kobject: '%s' (%p): failed\n",
83 action,
84 kobject_name(&disk_to_dev(bdev->bd_disk)->kobj),
85 &disk_to_dev(bdev->bd_disk)->kobj);
86}
87
74void btrfs_cleanup_fs_uuids(void) 88void btrfs_cleanup_fs_uuids(void)
75{ 89{
76 struct btrfs_fs_devices *fs_devices; 90 struct btrfs_fs_devices *fs_devices;
@@ -108,6 +122,44 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
108 return NULL; 122 return NULL;
109} 123}
110 124
125static int
126btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
127 int flush, struct block_device **bdev,
128 struct buffer_head **bh)
129{
130 int ret;
131
132 *bdev = blkdev_get_by_path(device_path, flags, holder);
133
134 if (IS_ERR(*bdev)) {
135 ret = PTR_ERR(*bdev);
136 printk(KERN_INFO "btrfs: open %s failed\n", device_path);
137 goto error;
138 }
139
140 if (flush)
141 filemap_write_and_wait((*bdev)->bd_inode->i_mapping);
142 ret = set_blocksize(*bdev, 4096);
143 if (ret) {
144 blkdev_put(*bdev, flags);
145 goto error;
146 }
147 invalidate_bdev(*bdev);
148 *bh = btrfs_read_dev_super(*bdev);
149 if (!*bh) {
150 ret = -EINVAL;
151 blkdev_put(*bdev, flags);
152 goto error;
153 }
154
155 return 0;
156
157error:
158 *bdev = NULL;
159 *bh = NULL;
160 return ret;
161}
162
111static void requeue_list(struct btrfs_pending_bios *pending_bios, 163static void requeue_list(struct btrfs_pending_bios *pending_bios,
112 struct bio *head, struct bio *tail) 164 struct bio *head, struct bio *tail)
113{ 165{
@@ -467,7 +519,8 @@ error:
467 return ERR_PTR(-ENOMEM); 519 return ERR_PTR(-ENOMEM);
468} 520}
469 521
470void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) 522void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
523 struct btrfs_fs_devices *fs_devices, int step)
471{ 524{
472 struct btrfs_device *device, *next; 525 struct btrfs_device *device, *next;
473 526
@@ -480,8 +533,9 @@ again:
480 /* This is the initialized path, it is safe to release the devices. */ 533 /* This is the initialized path, it is safe to release the devices. */
481 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { 534 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
482 if (device->in_fs_metadata) { 535 if (device->in_fs_metadata) {
483 if (!latest_transid || 536 if (!device->is_tgtdev_for_dev_replace &&
484 device->generation > latest_transid) { 537 (!latest_transid ||
538 device->generation > latest_transid)) {
485 latest_devid = device->devid; 539 latest_devid = device->devid;
486 latest_transid = device->generation; 540 latest_transid = device->generation;
487 latest_bdev = device->bdev; 541 latest_bdev = device->bdev;
@@ -489,6 +543,21 @@ again:
489 continue; 543 continue;
490 } 544 }
491 545
546 if (device->devid == BTRFS_DEV_REPLACE_DEVID) {
547 /*
548 * In the first step, keep the device which has
549 * the correct fsid and the devid that is used
550 * for the dev_replace procedure.
551 * In the second step, the dev_replace state is
552 * read from the device tree and it is known
553 * whether the procedure is really active or
554 * not, which means whether this device is
555 * used or whether it should be removed.
556 */
557 if (step == 0 || device->is_tgtdev_for_dev_replace) {
558 continue;
559 }
560 }
492 if (device->bdev) { 561 if (device->bdev) {
493 blkdev_put(device->bdev, device->mode); 562 blkdev_put(device->bdev, device->mode);
494 device->bdev = NULL; 563 device->bdev = NULL;
@@ -497,7 +566,8 @@ again:
497 if (device->writeable) { 566 if (device->writeable) {
498 list_del_init(&device->dev_alloc_list); 567 list_del_init(&device->dev_alloc_list);
499 device->writeable = 0; 568 device->writeable = 0;
500 fs_devices->rw_devices--; 569 if (!device->is_tgtdev_for_dev_replace)
570 fs_devices->rw_devices--;
501 } 571 }
502 list_del_init(&device->dev_list); 572 list_del_init(&device->dev_list);
503 fs_devices->num_devices--; 573 fs_devices->num_devices--;
@@ -555,7 +625,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
555 if (device->bdev) 625 if (device->bdev)
556 fs_devices->open_devices--; 626 fs_devices->open_devices--;
557 627
558 if (device->writeable) { 628 if (device->writeable && !device->is_tgtdev_for_dev_replace) {
559 list_del_init(&device->dev_alloc_list); 629 list_del_init(&device->dev_alloc_list);
560 fs_devices->rw_devices--; 630 fs_devices->rw_devices--;
561 } 631 }
@@ -637,18 +707,10 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
637 if (!device->name) 707 if (!device->name)
638 continue; 708 continue;
639 709
640 bdev = blkdev_get_by_path(device->name->str, flags, holder); 710 ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
641 if (IS_ERR(bdev)) { 711 &bdev, &bh);
642 printk(KERN_INFO "btrfs: open %s failed\n", device->name->str); 712 if (ret)
643 goto error; 713 continue;
644 }
645 filemap_write_and_wait(bdev->bd_inode->i_mapping);
646 invalidate_bdev(bdev);
647 set_blocksize(bdev, 4096);
648
649 bh = btrfs_read_dev_super(bdev);
650 if (!bh)
651 goto error_close;
652 714
653 disk_super = (struct btrfs_super_block *)bh->b_data; 715 disk_super = (struct btrfs_super_block *)bh->b_data;
654 devid = btrfs_stack_device_id(&disk_super->dev_item); 716 devid = btrfs_stack_device_id(&disk_super->dev_item);
@@ -687,7 +749,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
687 fs_devices->rotating = 1; 749 fs_devices->rotating = 1;
688 750
689 fs_devices->open_devices++; 751 fs_devices->open_devices++;
690 if (device->writeable) { 752 if (device->writeable && !device->is_tgtdev_for_dev_replace) {
691 fs_devices->rw_devices++; 753 fs_devices->rw_devices++;
692 list_add(&device->dev_alloc_list, 754 list_add(&device->dev_alloc_list,
693 &fs_devices->alloc_list); 755 &fs_devices->alloc_list);
@@ -697,9 +759,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
697 759
698error_brelse: 760error_brelse:
699 brelse(bh); 761 brelse(bh);
700error_close:
701 blkdev_put(bdev, flags); 762 blkdev_put(bdev, flags);
702error:
703 continue; 763 continue;
704 } 764 }
705 if (fs_devices->open_devices == 0) { 765 if (fs_devices->open_devices == 0) {
@@ -744,40 +804,30 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
744 u64 total_devices; 804 u64 total_devices;
745 805
746 flags |= FMODE_EXCL; 806 flags |= FMODE_EXCL;
747 bdev = blkdev_get_by_path(path, flags, holder);
748
749 if (IS_ERR(bdev)) {
750 ret = PTR_ERR(bdev);
751 goto error;
752 }
753
754 mutex_lock(&uuid_mutex); 807 mutex_lock(&uuid_mutex);
755 ret = set_blocksize(bdev, 4096); 808 ret = btrfs_get_bdev_and_sb(path, flags, holder, 0, &bdev, &bh);
756 if (ret) 809 if (ret)
757 goto error_close; 810 goto error;
758 bh = btrfs_read_dev_super(bdev);
759 if (!bh) {
760 ret = -EINVAL;
761 goto error_close;
762 }
763 disk_super = (struct btrfs_super_block *)bh->b_data; 811 disk_super = (struct btrfs_super_block *)bh->b_data;
764 devid = btrfs_stack_device_id(&disk_super->dev_item); 812 devid = btrfs_stack_device_id(&disk_super->dev_item);
765 transid = btrfs_super_generation(disk_super); 813 transid = btrfs_super_generation(disk_super);
766 total_devices = btrfs_super_num_devices(disk_super); 814 total_devices = btrfs_super_num_devices(disk_super);
767 if (disk_super->label[0]) 815 if (disk_super->label[0]) {
816 if (disk_super->label[BTRFS_LABEL_SIZE - 1])
817 disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0';
768 printk(KERN_INFO "device label %s ", disk_super->label); 818 printk(KERN_INFO "device label %s ", disk_super->label);
769 else 819 } else {
770 printk(KERN_INFO "device fsid %pU ", disk_super->fsid); 820 printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
821 }
771 printk(KERN_CONT "devid %llu transid %llu %s\n", 822 printk(KERN_CONT "devid %llu transid %llu %s\n",
772 (unsigned long long)devid, (unsigned long long)transid, path); 823 (unsigned long long)devid, (unsigned long long)transid, path);
773 ret = device_list_add(path, disk_super, devid, fs_devices_ret); 824 ret = device_list_add(path, disk_super, devid, fs_devices_ret);
774 if (!ret && fs_devices_ret) 825 if (!ret && fs_devices_ret)
775 (*fs_devices_ret)->total_devices = total_devices; 826 (*fs_devices_ret)->total_devices = total_devices;
776 brelse(bh); 827 brelse(bh);
777error_close:
778 mutex_unlock(&uuid_mutex);
779 blkdev_put(bdev, flags); 828 blkdev_put(bdev, flags);
780error: 829error:
830 mutex_unlock(&uuid_mutex);
781 return ret; 831 return ret;
782} 832}
783 833
@@ -796,7 +846,7 @@ int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
796 846
797 *length = 0; 847 *length = 0;
798 848
799 if (start >= device->total_bytes) 849 if (start >= device->total_bytes || device->is_tgtdev_for_dev_replace)
800 return 0; 850 return 0;
801 851
802 path = btrfs_alloc_path(); 852 path = btrfs_alloc_path();
@@ -913,7 +963,7 @@ int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
913 max_hole_size = 0; 963 max_hole_size = 0;
914 hole_size = 0; 964 hole_size = 0;
915 965
916 if (search_start >= search_end) { 966 if (search_start >= search_end || device->is_tgtdev_for_dev_replace) {
917 ret = -ENOSPC; 967 ret = -ENOSPC;
918 goto error; 968 goto error;
919 } 969 }
@@ -1096,6 +1146,7 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
1096 struct btrfs_key key; 1146 struct btrfs_key key;
1097 1147
1098 WARN_ON(!device->in_fs_metadata); 1148 WARN_ON(!device->in_fs_metadata);
1149 WARN_ON(device->is_tgtdev_for_dev_replace);
1099 path = btrfs_alloc_path(); 1150 path = btrfs_alloc_path();
1100 if (!path) 1151 if (!path)
1101 return -ENOMEM; 1152 return -ENOMEM;
@@ -1330,16 +1381,22 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1330 root->fs_info->avail_system_alloc_bits | 1381 root->fs_info->avail_system_alloc_bits |
1331 root->fs_info->avail_metadata_alloc_bits; 1382 root->fs_info->avail_metadata_alloc_bits;
1332 1383
1333 if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && 1384 num_devices = root->fs_info->fs_devices->num_devices;
1334 root->fs_info->fs_devices->num_devices <= 4) { 1385 btrfs_dev_replace_lock(&root->fs_info->dev_replace);
1386 if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) {
1387 WARN_ON(num_devices < 1);
1388 num_devices--;
1389 }
1390 btrfs_dev_replace_unlock(&root->fs_info->dev_replace);
1391
1392 if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) {
1335 printk(KERN_ERR "btrfs: unable to go below four devices " 1393 printk(KERN_ERR "btrfs: unable to go below four devices "
1336 "on raid10\n"); 1394 "on raid10\n");
1337 ret = -EINVAL; 1395 ret = -EINVAL;
1338 goto out; 1396 goto out;
1339 } 1397 }
1340 1398
1341 if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && 1399 if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && num_devices <= 2) {
1342 root->fs_info->fs_devices->num_devices <= 2) {
1343 printk(KERN_ERR "btrfs: unable to go below two " 1400 printk(KERN_ERR "btrfs: unable to go below two "
1344 "devices on raid1\n"); 1401 "devices on raid1\n");
1345 ret = -EINVAL; 1402 ret = -EINVAL;
@@ -1357,7 +1414,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1357 * is held. 1414 * is held.
1358 */ 1415 */
1359 list_for_each_entry(tmp, devices, dev_list) { 1416 list_for_each_entry(tmp, devices, dev_list) {
1360 if (tmp->in_fs_metadata && !tmp->bdev) { 1417 if (tmp->in_fs_metadata &&
1418 !tmp->is_tgtdev_for_dev_replace &&
1419 !tmp->bdev) {
1361 device = tmp; 1420 device = tmp;
1362 break; 1421 break;
1363 } 1422 }
@@ -1371,24 +1430,16 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1371 goto out; 1430 goto out;
1372 } 1431 }
1373 } else { 1432 } else {
1374 bdev = blkdev_get_by_path(device_path, FMODE_READ | FMODE_EXCL, 1433 ret = btrfs_get_bdev_and_sb(device_path,
1375 root->fs_info->bdev_holder); 1434 FMODE_READ | FMODE_EXCL,
1376 if (IS_ERR(bdev)) { 1435 root->fs_info->bdev_holder, 0,
1377 ret = PTR_ERR(bdev); 1436 &bdev, &bh);
1437 if (ret)
1378 goto out; 1438 goto out;
1379 }
1380
1381 set_blocksize(bdev, 4096);
1382 invalidate_bdev(bdev);
1383 bh = btrfs_read_dev_super(bdev);
1384 if (!bh) {
1385 ret = -EINVAL;
1386 goto error_close;
1387 }
1388 disk_super = (struct btrfs_super_block *)bh->b_data; 1439 disk_super = (struct btrfs_super_block *)bh->b_data;
1389 devid = btrfs_stack_device_id(&disk_super->dev_item); 1440 devid = btrfs_stack_device_id(&disk_super->dev_item);
1390 dev_uuid = disk_super->dev_item.uuid; 1441 dev_uuid = disk_super->dev_item.uuid;
1391 device = btrfs_find_device(root, devid, dev_uuid, 1442 device = btrfs_find_device(root->fs_info, devid, dev_uuid,
1392 disk_super->fsid); 1443 disk_super->fsid);
1393 if (!device) { 1444 if (!device) {
1394 ret = -ENOENT; 1445 ret = -ENOENT;
@@ -1396,6 +1447,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1396 } 1447 }
1397 } 1448 }
1398 1449
1450 if (device->is_tgtdev_for_dev_replace) {
1451 pr_err("btrfs: unable to remove the dev_replace target dev\n");
1452 ret = -EINVAL;
1453 goto error_brelse;
1454 }
1455
1399 if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) { 1456 if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) {
1400 printk(KERN_ERR "btrfs: unable to remove the only writeable " 1457 printk(KERN_ERR "btrfs: unable to remove the only writeable "
1401 "device\n"); 1458 "device\n");
@@ -1415,6 +1472,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1415 if (ret) 1472 if (ret)
1416 goto error_undo; 1473 goto error_undo;
1417 1474
1475 /*
1476 * TODO: the superblock still includes this device in its num_devices
1477 * counter although write_all_supers() is not locked out. This
1478 * could give a filesystem state which requires a degraded mount.
1479 */
1418 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device); 1480 ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
1419 if (ret) 1481 if (ret)
1420 goto error_undo; 1482 goto error_undo;
@@ -1425,7 +1487,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1425 spin_unlock(&root->fs_info->free_chunk_lock); 1487 spin_unlock(&root->fs_info->free_chunk_lock);
1426 1488
1427 device->in_fs_metadata = 0; 1489 device->in_fs_metadata = 0;
1428 btrfs_scrub_cancel_dev(root, device); 1490 btrfs_scrub_cancel_dev(root->fs_info, device);
1429 1491
1430 /* 1492 /*
1431 * the device list mutex makes sure that we don't change 1493 * the device list mutex makes sure that we don't change
@@ -1482,7 +1544,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1482 * at this point, the device is zero sized. We want to 1544 * at this point, the device is zero sized. We want to
1483 * remove it from the devices list and zero out the old super 1545 * remove it from the devices list and zero out the old super
1484 */ 1546 */
1485 if (clear_super) { 1547 if (clear_super && disk_super) {
1486 /* make sure this device isn't detected as part of 1548 /* make sure this device isn't detected as part of
1487 * the FS anymore 1549 * the FS anymore
1488 */ 1550 */
@@ -1493,9 +1555,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1493 1555
1494 ret = 0; 1556 ret = 0;
1495 1557
1558 /* Notify udev that device has changed */
1559 btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
1560
1496error_brelse: 1561error_brelse:
1497 brelse(bh); 1562 brelse(bh);
1498error_close:
1499 if (bdev) 1563 if (bdev)
1500 blkdev_put(bdev, FMODE_READ | FMODE_EXCL); 1564 blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
1501out: 1565out:
@@ -1512,6 +1576,112 @@ error_undo:
1512 goto error_brelse; 1576 goto error_brelse;
1513} 1577}
1514 1578
1579void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info,
1580 struct btrfs_device *srcdev)
1581{
1582 WARN_ON(!mutex_is_locked(&fs_info->fs_devices->device_list_mutex));
1583 list_del_rcu(&srcdev->dev_list);
1584 list_del_rcu(&srcdev->dev_alloc_list);
1585 fs_info->fs_devices->num_devices--;
1586 if (srcdev->missing) {
1587 fs_info->fs_devices->missing_devices--;
1588 fs_info->fs_devices->rw_devices++;
1589 }
1590 if (srcdev->can_discard)
1591 fs_info->fs_devices->num_can_discard--;
1592 if (srcdev->bdev)
1593 fs_info->fs_devices->open_devices--;
1594
1595 call_rcu(&srcdev->rcu, free_device);
1596}
1597
1598void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
1599 struct btrfs_device *tgtdev)
1600{
1601 struct btrfs_device *next_device;
1602
1603 WARN_ON(!tgtdev);
1604 mutex_lock(&fs_info->fs_devices->device_list_mutex);
1605 if (tgtdev->bdev) {
1606 btrfs_scratch_superblock(tgtdev);
1607 fs_info->fs_devices->open_devices--;
1608 }
1609 fs_info->fs_devices->num_devices--;
1610 if (tgtdev->can_discard)
1611 fs_info->fs_devices->num_can_discard++;
1612
1613 next_device = list_entry(fs_info->fs_devices->devices.next,
1614 struct btrfs_device, dev_list);
1615 if (tgtdev->bdev == fs_info->sb->s_bdev)
1616 fs_info->sb->s_bdev = next_device->bdev;
1617 if (tgtdev->bdev == fs_info->fs_devices->latest_bdev)
1618 fs_info->fs_devices->latest_bdev = next_device->bdev;
1619 list_del_rcu(&tgtdev->dev_list);
1620
1621 call_rcu(&tgtdev->rcu, free_device);
1622
1623 mutex_unlock(&fs_info->fs_devices->device_list_mutex);
1624}
1625
1626int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path,
1627 struct btrfs_device **device)
1628{
1629 int ret = 0;
1630 struct btrfs_super_block *disk_super;
1631 u64 devid;
1632 u8 *dev_uuid;
1633 struct block_device *bdev;
1634 struct buffer_head *bh;
1635
1636 *device = NULL;
1637 ret = btrfs_get_bdev_and_sb(device_path, FMODE_READ,
1638 root->fs_info->bdev_holder, 0, &bdev, &bh);
1639 if (ret)
1640 return ret;
1641 disk_super = (struct btrfs_super_block *)bh->b_data;
1642 devid = btrfs_stack_device_id(&disk_super->dev_item);
1643 dev_uuid = disk_super->dev_item.uuid;
1644 *device = btrfs_find_device(root->fs_info, devid, dev_uuid,
1645 disk_super->fsid);
1646 brelse(bh);
1647 if (!*device)
1648 ret = -ENOENT;
1649 blkdev_put(bdev, FMODE_READ);
1650 return ret;
1651}
1652
1653int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
1654 char *device_path,
1655 struct btrfs_device **device)
1656{
1657 *device = NULL;
1658 if (strcmp(device_path, "missing") == 0) {
1659 struct list_head *devices;
1660 struct btrfs_device *tmp;
1661
1662 devices = &root->fs_info->fs_devices->devices;
1663 /*
1664 * It is safe to read the devices since the volume_mutex
1665 * is held by the caller.
1666 */
1667 list_for_each_entry(tmp, devices, dev_list) {
1668 if (tmp->in_fs_metadata && !tmp->bdev) {
1669 *device = tmp;
1670 break;
1671 }
1672 }
1673
1674 if (!*device) {
1675 pr_err("btrfs: no missing device found\n");
1676 return -ENOENT;
1677 }
1678
1679 return 0;
1680 } else {
1681 return btrfs_find_device_by_path(root, device_path, device);
1682 }
1683}
1684
1515/* 1685/*
1516 * does all the dirty work required for changing file system's UUID. 1686 * does all the dirty work required for changing file system's UUID.
1517 */ 1687 */
@@ -1630,7 +1800,8 @@ next_slot:
1630 read_extent_buffer(leaf, fs_uuid, 1800 read_extent_buffer(leaf, fs_uuid,
1631 (unsigned long)btrfs_device_fsid(dev_item), 1801 (unsigned long)btrfs_device_fsid(dev_item),
1632 BTRFS_UUID_SIZE); 1802 BTRFS_UUID_SIZE);
1633 device = btrfs_find_device(root, devid, dev_uuid, fs_uuid); 1803 device = btrfs_find_device(root->fs_info, devid, dev_uuid,
1804 fs_uuid);
1634 BUG_ON(!device); /* Logic error */ 1805 BUG_ON(!device); /* Logic error */
1635 1806
1636 if (device->fs_devices->seeding) { 1807 if (device->fs_devices->seeding) {
@@ -1678,16 +1849,17 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1678 filemap_write_and_wait(bdev->bd_inode->i_mapping); 1849 filemap_write_and_wait(bdev->bd_inode->i_mapping);
1679 1850
1680 devices = &root->fs_info->fs_devices->devices; 1851 devices = &root->fs_info->fs_devices->devices;
1681 /* 1852
1682 * we have the volume lock, so we don't need the extra 1853 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
1683 * device list mutex while reading the list here.
1684 */
1685 list_for_each_entry(device, devices, dev_list) { 1854 list_for_each_entry(device, devices, dev_list) {
1686 if (device->bdev == bdev) { 1855 if (device->bdev == bdev) {
1687 ret = -EEXIST; 1856 ret = -EEXIST;
1857 mutex_unlock(
1858 &root->fs_info->fs_devices->device_list_mutex);
1688 goto error; 1859 goto error;
1689 } 1860 }
1690 } 1861 }
1862 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
1691 1863
1692 device = kzalloc(sizeof(*device), GFP_NOFS); 1864 device = kzalloc(sizeof(*device), GFP_NOFS);
1693 if (!device) { 1865 if (!device) {
@@ -1737,6 +1909,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
1737 device->dev_root = root->fs_info->dev_root; 1909 device->dev_root = root->fs_info->dev_root;
1738 device->bdev = bdev; 1910 device->bdev = bdev;
1739 device->in_fs_metadata = 1; 1911 device->in_fs_metadata = 1;
1912 device->is_tgtdev_for_dev_replace = 0;
1740 device->mode = FMODE_EXCL; 1913 device->mode = FMODE_EXCL;
1741 set_blocksize(device->bdev, 4096); 1914 set_blocksize(device->bdev, 4096);
1742 1915
@@ -1844,6 +2017,98 @@ error:
1844 return ret; 2017 return ret;
1845} 2018}
1846 2019
2020int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path,
2021 struct btrfs_device **device_out)
2022{
2023 struct request_queue *q;
2024 struct btrfs_device *device;
2025 struct block_device *bdev;
2026 struct btrfs_fs_info *fs_info = root->fs_info;
2027 struct list_head *devices;
2028 struct rcu_string *name;
2029 int ret = 0;
2030
2031 *device_out = NULL;
2032 if (fs_info->fs_devices->seeding)
2033 return -EINVAL;
2034
2035 bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
2036 fs_info->bdev_holder);
2037 if (IS_ERR(bdev))
2038 return PTR_ERR(bdev);
2039
2040 filemap_write_and_wait(bdev->bd_inode->i_mapping);
2041
2042 devices = &fs_info->fs_devices->devices;
2043 list_for_each_entry(device, devices, dev_list) {
2044 if (device->bdev == bdev) {
2045 ret = -EEXIST;
2046 goto error;
2047 }
2048 }
2049
2050 device = kzalloc(sizeof(*device), GFP_NOFS);
2051 if (!device) {
2052 ret = -ENOMEM;
2053 goto error;
2054 }
2055
2056 name = rcu_string_strdup(device_path, GFP_NOFS);
2057 if (!name) {
2058 kfree(device);
2059 ret = -ENOMEM;
2060 goto error;
2061 }
2062 rcu_assign_pointer(device->name, name);
2063
2064 q = bdev_get_queue(bdev);
2065 if (blk_queue_discard(q))
2066 device->can_discard = 1;
2067 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
2068 device->writeable = 1;
2069 device->work.func = pending_bios_fn;
2070 generate_random_uuid(device->uuid);
2071 device->devid = BTRFS_DEV_REPLACE_DEVID;
2072 spin_lock_init(&device->io_lock);
2073 device->generation = 0;
2074 device->io_width = root->sectorsize;
2075 device->io_align = root->sectorsize;
2076 device->sector_size = root->sectorsize;
2077 device->total_bytes = i_size_read(bdev->bd_inode);
2078 device->disk_total_bytes = device->total_bytes;
2079 device->dev_root = fs_info->dev_root;
2080 device->bdev = bdev;
2081 device->in_fs_metadata = 1;
2082 device->is_tgtdev_for_dev_replace = 1;
2083 device->mode = FMODE_EXCL;
2084 set_blocksize(device->bdev, 4096);
2085 device->fs_devices = fs_info->fs_devices;
2086 list_add(&device->dev_list, &fs_info->fs_devices->devices);
2087 fs_info->fs_devices->num_devices++;
2088 fs_info->fs_devices->open_devices++;
2089 if (device->can_discard)
2090 fs_info->fs_devices->num_can_discard++;
2091 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
2092
2093 *device_out = device;
2094 return ret;
2095
2096error:
2097 blkdev_put(bdev, FMODE_EXCL);
2098 return ret;
2099}
2100
2101void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
2102 struct btrfs_device *tgtdev)
2103{
2104 WARN_ON(fs_info->fs_devices->rw_devices == 0);
2105 tgtdev->io_width = fs_info->dev_root->sectorsize;
2106 tgtdev->io_align = fs_info->dev_root->sectorsize;
2107 tgtdev->sector_size = fs_info->dev_root->sectorsize;
2108 tgtdev->dev_root = fs_info->dev_root;
2109 tgtdev->in_fs_metadata = 1;
2110}
2111
1847static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, 2112static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
1848 struct btrfs_device *device) 2113 struct btrfs_device *device)
1849{ 2114{
@@ -1900,7 +2165,8 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans,
1900 2165
1901 if (!device->writeable) 2166 if (!device->writeable)
1902 return -EACCES; 2167 return -EACCES;
1903 if (new_size <= device->total_bytes) 2168 if (new_size <= device->total_bytes ||
2169 device->is_tgtdev_for_dev_replace)
1904 return -EINVAL; 2170 return -EINVAL;
1905 2171
1906 btrfs_set_super_total_bytes(super_copy, old_total + diff); 2172 btrfs_set_super_total_bytes(super_copy, old_total + diff);
@@ -2338,18 +2604,6 @@ static int chunk_profiles_filter(u64 chunk_type,
2338 return 1; 2604 return 1;
2339} 2605}
2340 2606
2341static u64 div_factor_fine(u64 num, int factor)
2342{
2343 if (factor <= 0)
2344 return 0;
2345 if (factor >= 100)
2346 return num;
2347
2348 num *= factor;
2349 do_div(num, 100);
2350 return num;
2351}
2352
2353static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, 2607static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
2354 struct btrfs_balance_args *bargs) 2608 struct btrfs_balance_args *bargs)
2355{ 2609{
@@ -2514,15 +2768,6 @@ static int should_balance_chunk(struct btrfs_root *root,
2514 return 1; 2768 return 1;
2515} 2769}
2516 2770
2517static u64 div_factor(u64 num, int factor)
2518{
2519 if (factor == 10)
2520 return num;
2521 num *= factor;
2522 do_div(num, 10);
2523 return num;
2524}
2525
2526static int __btrfs_balance(struct btrfs_fs_info *fs_info) 2771static int __btrfs_balance(struct btrfs_fs_info *fs_info)
2527{ 2772{
2528 struct btrfs_balance_control *bctl = fs_info->balance_ctl; 2773 struct btrfs_balance_control *bctl = fs_info->balance_ctl;
@@ -2550,7 +2795,8 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
2550 size_to_free = div_factor(old_size, 1); 2795 size_to_free = div_factor(old_size, 1);
2551 size_to_free = min(size_to_free, (u64)1 * 1024 * 1024); 2796 size_to_free = min(size_to_free, (u64)1 * 1024 * 1024);
2552 if (!device->writeable || 2797 if (!device->writeable ||
2553 device->total_bytes - device->bytes_used > size_to_free) 2798 device->total_bytes - device->bytes_used > size_to_free ||
2799 device->is_tgtdev_for_dev_replace)
2554 continue; 2800 continue;
2555 2801
2556 ret = btrfs_shrink_device(device, old_size - size_to_free); 2802 ret = btrfs_shrink_device(device, old_size - size_to_free);
@@ -2728,6 +2974,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
2728 u64 allowed; 2974 u64 allowed;
2729 int mixed = 0; 2975 int mixed = 0;
2730 int ret; 2976 int ret;
2977 u64 num_devices;
2731 2978
2732 if (btrfs_fs_closing(fs_info) || 2979 if (btrfs_fs_closing(fs_info) ||
2733 atomic_read(&fs_info->balance_pause_req) || 2980 atomic_read(&fs_info->balance_pause_req) ||
@@ -2756,10 +3003,17 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
2756 } 3003 }
2757 } 3004 }
2758 3005
3006 num_devices = fs_info->fs_devices->num_devices;
3007 btrfs_dev_replace_lock(&fs_info->dev_replace);
3008 if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
3009 BUG_ON(num_devices < 1);
3010 num_devices--;
3011 }
3012 btrfs_dev_replace_unlock(&fs_info->dev_replace);
2759 allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; 3013 allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
2760 if (fs_info->fs_devices->num_devices == 1) 3014 if (num_devices == 1)
2761 allowed |= BTRFS_BLOCK_GROUP_DUP; 3015 allowed |= BTRFS_BLOCK_GROUP_DUP;
2762 else if (fs_info->fs_devices->num_devices < 4) 3016 else if (num_devices < 4)
2763 allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1); 3017 allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
2764 else 3018 else
2765 allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | 3019 allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 |
@@ -2902,6 +3156,7 @@ static int balance_kthread(void *data)
2902 ret = btrfs_balance(fs_info->balance_ctl, NULL); 3156 ret = btrfs_balance(fs_info->balance_ctl, NULL);
2903 } 3157 }
2904 3158
3159 atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
2905 mutex_unlock(&fs_info->balance_mutex); 3160 mutex_unlock(&fs_info->balance_mutex);
2906 mutex_unlock(&fs_info->volume_mutex); 3161 mutex_unlock(&fs_info->volume_mutex);
2907 3162
@@ -2924,6 +3179,7 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
2924 return 0; 3179 return 0;
2925 } 3180 }
2926 3181
3182 WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1));
2927 tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); 3183 tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
2928 if (IS_ERR(tsk)) 3184 if (IS_ERR(tsk))
2929 return PTR_ERR(tsk); 3185 return PTR_ERR(tsk);
@@ -3080,7 +3336,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
3080 u64 old_size = device->total_bytes; 3336 u64 old_size = device->total_bytes;
3081 u64 diff = device->total_bytes - new_size; 3337 u64 diff = device->total_bytes - new_size;
3082 3338
3083 if (new_size >= device->total_bytes) 3339 if (device->is_tgtdev_for_dev_replace)
3084 return -EINVAL; 3340 return -EINVAL;
3085 3341
3086 path = btrfs_alloc_path(); 3342 path = btrfs_alloc_path();
@@ -3235,6 +3491,14 @@ static int btrfs_cmp_device_info(const void *a, const void *b)
3235 return 0; 3491 return 0;
3236} 3492}
3237 3493
3494struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
3495 { 2, 1, 0, 4, 2, 2 /* raid10 */ },
3496 { 1, 1, 2, 2, 2, 2 /* raid1 */ },
3497 { 1, 2, 1, 1, 1, 2 /* dup */ },
3498 { 1, 1, 0, 2, 1, 1 /* raid0 */ },
3499 { 1, 1, 0, 1, 1, 1 /* single */ },
3500};
3501
3238static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, 3502static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
3239 struct btrfs_root *extent_root, 3503 struct btrfs_root *extent_root,
3240 struct map_lookup **map_ret, 3504 struct map_lookup **map_ret,
@@ -3264,43 +3528,21 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
3264 int ndevs; 3528 int ndevs;
3265 int i; 3529 int i;
3266 int j; 3530 int j;
3531 int index;
3267 3532
3268 BUG_ON(!alloc_profile_is_valid(type, 0)); 3533 BUG_ON(!alloc_profile_is_valid(type, 0));
3269 3534
3270 if (list_empty(&fs_devices->alloc_list)) 3535 if (list_empty(&fs_devices->alloc_list))
3271 return -ENOSPC; 3536 return -ENOSPC;
3272 3537
3273 sub_stripes = 1; 3538 index = __get_raid_index(type);
3274 dev_stripes = 1;
3275 devs_increment = 1;
3276 ncopies = 1;
3277 devs_max = 0; /* 0 == as many as possible */
3278 devs_min = 1;
3279 3539
3280 /* 3540 sub_stripes = btrfs_raid_array[index].sub_stripes;
3281 * define the properties of each RAID type. 3541 dev_stripes = btrfs_raid_array[index].dev_stripes;
3282 * FIXME: move this to a global table and use it in all RAID 3542 devs_max = btrfs_raid_array[index].devs_max;
3283 * calculation code 3543 devs_min = btrfs_raid_array[index].devs_min;
3284 */ 3544 devs_increment = btrfs_raid_array[index].devs_increment;
3285 if (type & (BTRFS_BLOCK_GROUP_DUP)) { 3545 ncopies = btrfs_raid_array[index].ncopies;
3286 dev_stripes = 2;
3287 ncopies = 2;
3288 devs_max = 1;
3289 } else if (type & (BTRFS_BLOCK_GROUP_RAID0)) {
3290 devs_min = 2;
3291 } else if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
3292 devs_increment = 2;
3293 ncopies = 2;
3294 devs_max = 2;
3295 devs_min = 2;
3296 } else if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
3297 sub_stripes = 2;
3298 devs_increment = 2;
3299 ncopies = 2;
3300 devs_min = 4;
3301 } else {
3302 devs_max = 1;
3303 }
3304 3546
3305 if (type & BTRFS_BLOCK_GROUP_DATA) { 3547 if (type & BTRFS_BLOCK_GROUP_DATA) {
3306 max_stripe_size = 1024 * 1024 * 1024; 3548 max_stripe_size = 1024 * 1024 * 1024;
@@ -3347,13 +3589,13 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
3347 cur = cur->next; 3589 cur = cur->next;
3348 3590
3349 if (!device->writeable) { 3591 if (!device->writeable) {
3350 printk(KERN_ERR 3592 WARN(1, KERN_ERR
3351 "btrfs: read-only device in alloc_list\n"); 3593 "btrfs: read-only device in alloc_list\n");
3352 WARN_ON(1);
3353 continue; 3594 continue;
3354 } 3595 }
3355 3596
3356 if (!device->in_fs_metadata) 3597 if (!device->in_fs_metadata ||
3598 device->is_tgtdev_for_dev_replace)
3357 continue; 3599 continue;
3358 3600
3359 if (device->total_bytes > device->bytes_used) 3601 if (device->total_bytes > device->bytes_used)
@@ -3382,6 +3624,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
3382 devices_info[ndevs].total_avail = total_avail; 3624 devices_info[ndevs].total_avail = total_avail;
3383 devices_info[ndevs].dev = device; 3625 devices_info[ndevs].dev = device;
3384 ++ndevs; 3626 ++ndevs;
3627 WARN_ON(ndevs > fs_devices->rw_devices);
3385 } 3628 }
3386 3629
3387 /* 3630 /*
@@ -3740,8 +3983,9 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
3740 } 3983 }
3741} 3984}
3742 3985
3743int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len) 3986int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
3744{ 3987{
3988 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
3745 struct extent_map *em; 3989 struct extent_map *em;
3746 struct map_lookup *map; 3990 struct map_lookup *map;
3747 struct extent_map_tree *em_tree = &map_tree->map_tree; 3991 struct extent_map_tree *em_tree = &map_tree->map_tree;
@@ -3761,32 +4005,60 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len)
3761 else 4005 else
3762 ret = 1; 4006 ret = 1;
3763 free_extent_map(em); 4007 free_extent_map(em);
4008
4009 btrfs_dev_replace_lock(&fs_info->dev_replace);
4010 if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))
4011 ret++;
4012 btrfs_dev_replace_unlock(&fs_info->dev_replace);
4013
3764 return ret; 4014 return ret;
3765} 4015}
3766 4016
3767static int find_live_mirror(struct map_lookup *map, int first, int num, 4017static int find_live_mirror(struct btrfs_fs_info *fs_info,
3768 int optimal) 4018 struct map_lookup *map, int first, int num,
4019 int optimal, int dev_replace_is_ongoing)
3769{ 4020{
3770 int i; 4021 int i;
3771 if (map->stripes[optimal].dev->bdev) 4022 int tolerance;
3772 return optimal; 4023 struct btrfs_device *srcdev;
3773 for (i = first; i < first + num; i++) { 4024
3774 if (map->stripes[i].dev->bdev) 4025 if (dev_replace_is_ongoing &&
3775 return i; 4026 fs_info->dev_replace.cont_reading_from_srcdev_mode ==
4027 BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID)
4028 srcdev = fs_info->dev_replace.srcdev;
4029 else
4030 srcdev = NULL;
4031
4032 /*
4033 * try to avoid the drive that is the source drive for a
4034 * dev-replace procedure, only choose it if no other non-missing
4035 * mirror is available
4036 */
4037 for (tolerance = 0; tolerance < 2; tolerance++) {
4038 if (map->stripes[optimal].dev->bdev &&
4039 (tolerance || map->stripes[optimal].dev != srcdev))
4040 return optimal;
4041 for (i = first; i < first + num; i++) {
4042 if (map->stripes[i].dev->bdev &&
4043 (tolerance || map->stripes[i].dev != srcdev))
4044 return i;
4045 }
3776 } 4046 }
4047
3777 /* we couldn't find one that doesn't fail. Just return something 4048 /* we couldn't find one that doesn't fail. Just return something
3778 * and the io error handling code will clean up eventually 4049 * and the io error handling code will clean up eventually
3779 */ 4050 */
3780 return optimal; 4051 return optimal;
3781} 4052}
3782 4053
3783static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, 4054static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
3784 u64 logical, u64 *length, 4055 u64 logical, u64 *length,
3785 struct btrfs_bio **bbio_ret, 4056 struct btrfs_bio **bbio_ret,
3786 int mirror_num) 4057 int mirror_num)
3787{ 4058{
3788 struct extent_map *em; 4059 struct extent_map *em;
3789 struct map_lookup *map; 4060 struct map_lookup *map;
4061 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
3790 struct extent_map_tree *em_tree = &map_tree->map_tree; 4062 struct extent_map_tree *em_tree = &map_tree->map_tree;
3791 u64 offset; 4063 u64 offset;
3792 u64 stripe_offset; 4064 u64 stripe_offset;
@@ -3800,6 +4072,11 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
3800 int num_stripes; 4072 int num_stripes;
3801 int max_errors = 0; 4073 int max_errors = 0;
3802 struct btrfs_bio *bbio = NULL; 4074 struct btrfs_bio *bbio = NULL;
4075 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
4076 int dev_replace_is_ongoing = 0;
4077 int num_alloc_stripes;
4078 int patch_the_first_stripe_for_dev_replace = 0;
4079 u64 physical_to_patch_in_first_stripe = 0;
3803 4080
3804 read_lock(&em_tree->lock); 4081 read_lock(&em_tree->lock);
3805 em = lookup_extent_mapping(em_tree, logical, *length); 4082 em = lookup_extent_mapping(em_tree, logical, *length);
@@ -3816,9 +4093,6 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
3816 map = (struct map_lookup *)em->bdev; 4093 map = (struct map_lookup *)em->bdev;
3817 offset = logical - em->start; 4094 offset = logical - em->start;
3818 4095
3819 if (mirror_num > map->num_stripes)
3820 mirror_num = 0;
3821
3822 stripe_nr = offset; 4096 stripe_nr = offset;
3823 /* 4097 /*
3824 * stripe_nr counts the total number of stripes we have to stride 4098 * stripe_nr counts the total number of stripes we have to stride
@@ -3845,6 +4119,93 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
3845 if (!bbio_ret) 4119 if (!bbio_ret)
3846 goto out; 4120 goto out;
3847 4121
4122 btrfs_dev_replace_lock(dev_replace);
4123 dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
4124 if (!dev_replace_is_ongoing)
4125 btrfs_dev_replace_unlock(dev_replace);
4126
4127 if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
4128 !(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) &&
4129 dev_replace->tgtdev != NULL) {
4130 /*
4131 * in dev-replace case, for repair case (that's the only
4132 * case where the mirror is selected explicitly when
4133 * calling btrfs_map_block), blocks left of the left cursor
4134 * can also be read from the target drive.
4135 * For REQ_GET_READ_MIRRORS, the target drive is added as
4136 * the last one to the array of stripes. For READ, it also
4137 * needs to be supported using the same mirror number.
4138 * If the requested block is not left of the left cursor,
4139 * EIO is returned. This can happen because btrfs_num_copies()
4140 * returns one more in the dev-replace case.
4141 */
4142 u64 tmp_length = *length;
4143 struct btrfs_bio *tmp_bbio = NULL;
4144 int tmp_num_stripes;
4145 u64 srcdev_devid = dev_replace->srcdev->devid;
4146 int index_srcdev = 0;
4147 int found = 0;
4148 u64 physical_of_found = 0;
4149
4150 ret = __btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS,
4151 logical, &tmp_length, &tmp_bbio, 0);
4152 if (ret) {
4153 WARN_ON(tmp_bbio != NULL);
4154 goto out;
4155 }
4156
4157 tmp_num_stripes = tmp_bbio->num_stripes;
4158 if (mirror_num > tmp_num_stripes) {
4159 /*
4160 * REQ_GET_READ_MIRRORS does not contain this
4161 * mirror, that means that the requested area
4162 * is not left of the left cursor
4163 */
4164 ret = -EIO;
4165 kfree(tmp_bbio);
4166 goto out;
4167 }
4168
4169 /*
4170 * process the rest of the function using the mirror_num
4171 * of the source drive. Therefore look it up first.
4172 * At the end, patch the device pointer to the one of the
4173 * target drive.
4174 */
4175 for (i = 0; i < tmp_num_stripes; i++) {
4176 if (tmp_bbio->stripes[i].dev->devid == srcdev_devid) {
4177 /*
4178 * In case of DUP, in order to keep it
4179 * simple, only add the mirror with the
4180 * lowest physical address
4181 */
4182 if (found &&
4183 physical_of_found <=
4184 tmp_bbio->stripes[i].physical)
4185 continue;
4186 index_srcdev = i;
4187 found = 1;
4188 physical_of_found =
4189 tmp_bbio->stripes[i].physical;
4190 }
4191 }
4192
4193 if (found) {
4194 mirror_num = index_srcdev + 1;
4195 patch_the_first_stripe_for_dev_replace = 1;
4196 physical_to_patch_in_first_stripe = physical_of_found;
4197 } else {
4198 WARN_ON(1);
4199 ret = -EIO;
4200 kfree(tmp_bbio);
4201 goto out;
4202 }
4203
4204 kfree(tmp_bbio);
4205 } else if (mirror_num > map->num_stripes) {
4206 mirror_num = 0;
4207 }
4208
3848 num_stripes = 1; 4209 num_stripes = 1;
3849 stripe_index = 0; 4210 stripe_index = 0;
3850 stripe_nr_orig = stripe_nr; 4211 stripe_nr_orig = stripe_nr;
@@ -3859,19 +4220,20 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
3859 stripe_nr_end - stripe_nr_orig); 4220 stripe_nr_end - stripe_nr_orig);
3860 stripe_index = do_div(stripe_nr, map->num_stripes); 4221 stripe_index = do_div(stripe_nr, map->num_stripes);
3861 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { 4222 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
3862 if (rw & (REQ_WRITE | REQ_DISCARD)) 4223 if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS))
3863 num_stripes = map->num_stripes; 4224 num_stripes = map->num_stripes;
3864 else if (mirror_num) 4225 else if (mirror_num)
3865 stripe_index = mirror_num - 1; 4226 stripe_index = mirror_num - 1;
3866 else { 4227 else {
3867 stripe_index = find_live_mirror(map, 0, 4228 stripe_index = find_live_mirror(fs_info, map, 0,
3868 map->num_stripes, 4229 map->num_stripes,
3869 current->pid % map->num_stripes); 4230 current->pid % map->num_stripes,
4231 dev_replace_is_ongoing);
3870 mirror_num = stripe_index + 1; 4232 mirror_num = stripe_index + 1;
3871 } 4233 }
3872 4234
3873 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { 4235 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
3874 if (rw & (REQ_WRITE | REQ_DISCARD)) { 4236 if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) {
3875 num_stripes = map->num_stripes; 4237 num_stripes = map->num_stripes;
3876 } else if (mirror_num) { 4238 } else if (mirror_num) {
3877 stripe_index = mirror_num - 1; 4239 stripe_index = mirror_num - 1;
@@ -3885,7 +4247,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
3885 stripe_index = do_div(stripe_nr, factor); 4247 stripe_index = do_div(stripe_nr, factor);
3886 stripe_index *= map->sub_stripes; 4248 stripe_index *= map->sub_stripes;
3887 4249
3888 if (rw & REQ_WRITE) 4250 if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS))
3889 num_stripes = map->sub_stripes; 4251 num_stripes = map->sub_stripes;
3890 else if (rw & REQ_DISCARD) 4252 else if (rw & REQ_DISCARD)
3891 num_stripes = min_t(u64, map->sub_stripes * 4253 num_stripes = min_t(u64, map->sub_stripes *
@@ -3895,9 +4257,11 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
3895 stripe_index += mirror_num - 1; 4257 stripe_index += mirror_num - 1;
3896 else { 4258 else {
3897 int old_stripe_index = stripe_index; 4259 int old_stripe_index = stripe_index;
3898 stripe_index = find_live_mirror(map, stripe_index, 4260 stripe_index = find_live_mirror(fs_info, map,
4261 stripe_index,
3899 map->sub_stripes, stripe_index + 4262 map->sub_stripes, stripe_index +
3900 current->pid % map->sub_stripes); 4263 current->pid % map->sub_stripes,
4264 dev_replace_is_ongoing);
3901 mirror_num = stripe_index - old_stripe_index + 1; 4265 mirror_num = stripe_index - old_stripe_index + 1;
3902 } 4266 }
3903 } else { 4267 } else {
@@ -3911,7 +4275,14 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
3911 } 4275 }
3912 BUG_ON(stripe_index >= map->num_stripes); 4276 BUG_ON(stripe_index >= map->num_stripes);
3913 4277
3914 bbio = kzalloc(btrfs_bio_size(num_stripes), GFP_NOFS); 4278 num_alloc_stripes = num_stripes;
4279 if (dev_replace_is_ongoing) {
4280 if (rw & (REQ_WRITE | REQ_DISCARD))
4281 num_alloc_stripes <<= 1;
4282 if (rw & REQ_GET_READ_MIRRORS)
4283 num_alloc_stripes++;
4284 }
4285 bbio = kzalloc(btrfs_bio_size(num_alloc_stripes), GFP_NOFS);
3915 if (!bbio) { 4286 if (!bbio) {
3916 ret = -ENOMEM; 4287 ret = -ENOMEM;
3917 goto out; 4288 goto out;
@@ -3998,7 +4369,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
3998 } 4369 }
3999 } 4370 }
4000 4371
4001 if (rw & REQ_WRITE) { 4372 if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) {
4002 if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | 4373 if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
4003 BTRFS_BLOCK_GROUP_RAID10 | 4374 BTRFS_BLOCK_GROUP_RAID10 |
4004 BTRFS_BLOCK_GROUP_DUP)) { 4375 BTRFS_BLOCK_GROUP_DUP)) {
@@ -4006,20 +4377,115 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
4006 } 4377 }
4007 } 4378 }
4008 4379
4380 if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) &&
4381 dev_replace->tgtdev != NULL) {
4382 int index_where_to_add;
4383 u64 srcdev_devid = dev_replace->srcdev->devid;
4384
4385 /*
4386 * duplicate the write operations while the dev replace
4387 * procedure is running. Since the copying of the old disk
4388 * to the new disk takes place at run time while the
4389 * filesystem is mounted writable, the regular write
4390 * operations to the old disk have to be duplicated to go
4391 * to the new disk as well.
4392 * Note that device->missing is handled by the caller, and
4393 * that the write to the old disk is already set up in the
4394 * stripes array.
4395 */
4396 index_where_to_add = num_stripes;
4397 for (i = 0; i < num_stripes; i++) {
4398 if (bbio->stripes[i].dev->devid == srcdev_devid) {
4399 /* write to new disk, too */
4400 struct btrfs_bio_stripe *new =
4401 bbio->stripes + index_where_to_add;
4402 struct btrfs_bio_stripe *old =
4403 bbio->stripes + i;
4404
4405 new->physical = old->physical;
4406 new->length = old->length;
4407 new->dev = dev_replace->tgtdev;
4408 index_where_to_add++;
4409 max_errors++;
4410 }
4411 }
4412 num_stripes = index_where_to_add;
4413 } else if (dev_replace_is_ongoing && (rw & REQ_GET_READ_MIRRORS) &&
4414 dev_replace->tgtdev != NULL) {
4415 u64 srcdev_devid = dev_replace->srcdev->devid;
4416 int index_srcdev = 0;
4417 int found = 0;
4418 u64 physical_of_found = 0;
4419
4420 /*
4421 * During the dev-replace procedure, the target drive can
4422 * also be used to read data in case it is needed to repair
4423 * a corrupt block elsewhere. This is possible if the
4424 * requested area is left of the left cursor. In this area,
4425 * the target drive is a full copy of the source drive.
4426 */
4427 for (i = 0; i < num_stripes; i++) {
4428 if (bbio->stripes[i].dev->devid == srcdev_devid) {
4429 /*
4430 * In case of DUP, in order to keep it
4431 * simple, only add the mirror with the
4432 * lowest physical address
4433 */
4434 if (found &&
4435 physical_of_found <=
4436 bbio->stripes[i].physical)
4437 continue;
4438 index_srcdev = i;
4439 found = 1;
4440 physical_of_found = bbio->stripes[i].physical;
4441 }
4442 }
4443 if (found) {
4444 u64 length = map->stripe_len;
4445
4446 if (physical_of_found + length <=
4447 dev_replace->cursor_left) {
4448 struct btrfs_bio_stripe *tgtdev_stripe =
4449 bbio->stripes + num_stripes;
4450
4451 tgtdev_stripe->physical = physical_of_found;
4452 tgtdev_stripe->length =
4453 bbio->stripes[index_srcdev].length;
4454 tgtdev_stripe->dev = dev_replace->tgtdev;
4455
4456 num_stripes++;
4457 }
4458 }
4459 }
4460
4009 *bbio_ret = bbio; 4461 *bbio_ret = bbio;
4010 bbio->num_stripes = num_stripes; 4462 bbio->num_stripes = num_stripes;
4011 bbio->max_errors = max_errors; 4463 bbio->max_errors = max_errors;
4012 bbio->mirror_num = mirror_num; 4464 bbio->mirror_num = mirror_num;
4465
4466 /*
4467 * this is the case that REQ_READ && dev_replace_is_ongoing &&
4468 * mirror_num == num_stripes + 1 && dev_replace target drive is
4469 * available as a mirror
4470 */
4471 if (patch_the_first_stripe_for_dev_replace && num_stripes > 0) {
4472 WARN_ON(num_stripes > 1);
4473 bbio->stripes[0].dev = dev_replace->tgtdev;
4474 bbio->stripes[0].physical = physical_to_patch_in_first_stripe;
4475 bbio->mirror_num = map->num_stripes + 1;
4476 }
4013out: 4477out:
4478 if (dev_replace_is_ongoing)
4479 btrfs_dev_replace_unlock(dev_replace);
4014 free_extent_map(em); 4480 free_extent_map(em);
4015 return ret; 4481 return ret;
4016} 4482}
4017 4483
4018int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, 4484int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
4019 u64 logical, u64 *length, 4485 u64 logical, u64 *length,
4020 struct btrfs_bio **bbio_ret, int mirror_num) 4486 struct btrfs_bio **bbio_ret, int mirror_num)
4021{ 4487{
4022 return __btrfs_map_block(map_tree, rw, logical, length, bbio_ret, 4488 return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret,
4023 mirror_num); 4489 mirror_num);
4024} 4490}
4025 4491
@@ -4238,10 +4704,116 @@ static noinline void schedule_bio(struct btrfs_root *root,
4238 &device->work); 4704 &device->work);
4239} 4705}
4240 4706
4707static int bio_size_ok(struct block_device *bdev, struct bio *bio,
4708 sector_t sector)
4709{
4710 struct bio_vec *prev;
4711 struct request_queue *q = bdev_get_queue(bdev);
4712 unsigned short max_sectors = queue_max_sectors(q);
4713 struct bvec_merge_data bvm = {
4714 .bi_bdev = bdev,
4715 .bi_sector = sector,
4716 .bi_rw = bio->bi_rw,
4717 };
4718
4719 if (bio->bi_vcnt == 0) {
4720 WARN_ON(1);
4721 return 1;
4722 }
4723
4724 prev = &bio->bi_io_vec[bio->bi_vcnt - 1];
4725 if ((bio->bi_size >> 9) > max_sectors)
4726 return 0;
4727
4728 if (!q->merge_bvec_fn)
4729 return 1;
4730
4731 bvm.bi_size = bio->bi_size - prev->bv_len;
4732 if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len)
4733 return 0;
4734 return 1;
4735}
4736
4737static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
4738 struct bio *bio, u64 physical, int dev_nr,
4739 int rw, int async)
4740{
4741 struct btrfs_device *dev = bbio->stripes[dev_nr].dev;
4742
4743 bio->bi_private = bbio;
4744 bio->bi_private = merge_stripe_index_into_bio_private(
4745 bio->bi_private, (unsigned int)dev_nr);
4746 bio->bi_end_io = btrfs_end_bio;
4747 bio->bi_sector = physical >> 9;
4748#ifdef DEBUG
4749 {
4750 struct rcu_string *name;
4751
4752 rcu_read_lock();
4753 name = rcu_dereference(dev->name);
4754 pr_debug("btrfs_map_bio: rw %d, sector=%llu, dev=%lu "
4755 "(%s id %llu), size=%u\n", rw,
4756 (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev,
4757 name->str, dev->devid, bio->bi_size);
4758 rcu_read_unlock();
4759 }
4760#endif
4761 bio->bi_bdev = dev->bdev;
4762 if (async)
4763 schedule_bio(root, dev, rw, bio);
4764 else
4765 btrfsic_submit_bio(rw, bio);
4766}
4767
4768static int breakup_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio,
4769 struct bio *first_bio, struct btrfs_device *dev,
4770 int dev_nr, int rw, int async)
4771{
4772 struct bio_vec *bvec = first_bio->bi_io_vec;
4773 struct bio *bio;
4774 int nr_vecs = bio_get_nr_vecs(dev->bdev);
4775 u64 physical = bbio->stripes[dev_nr].physical;
4776
4777again:
4778 bio = btrfs_bio_alloc(dev->bdev, physical >> 9, nr_vecs, GFP_NOFS);
4779 if (!bio)
4780 return -ENOMEM;
4781
4782 while (bvec <= (first_bio->bi_io_vec + first_bio->bi_vcnt - 1)) {
4783 if (bio_add_page(bio, bvec->bv_page, bvec->bv_len,
4784 bvec->bv_offset) < bvec->bv_len) {
4785 u64 len = bio->bi_size;
4786
4787 atomic_inc(&bbio->stripes_pending);
4788 submit_stripe_bio(root, bbio, bio, physical, dev_nr,
4789 rw, async);
4790 physical += len;
4791 goto again;
4792 }
4793 bvec++;
4794 }
4795
4796 submit_stripe_bio(root, bbio, bio, physical, dev_nr, rw, async);
4797 return 0;
4798}
4799
4800static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
4801{
4802 atomic_inc(&bbio->error);
4803 if (atomic_dec_and_test(&bbio->stripes_pending)) {
4804 bio->bi_private = bbio->private;
4805 bio->bi_end_io = bbio->end_io;
4806 bio->bi_bdev = (struct block_device *)
4807 (unsigned long)bbio->mirror_num;
4808 bio->bi_sector = logical >> 9;
4809 kfree(bbio);
4810 bio_endio(bio, -EIO);
4811 }
4812}
4813
4241int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, 4814int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
4242 int mirror_num, int async_submit) 4815 int mirror_num, int async_submit)
4243{ 4816{
4244 struct btrfs_mapping_tree *map_tree;
4245 struct btrfs_device *dev; 4817 struct btrfs_device *dev;
4246 struct bio *first_bio = bio; 4818 struct bio *first_bio = bio;
4247 u64 logical = (u64)bio->bi_sector << 9; 4819 u64 logical = (u64)bio->bi_sector << 9;
@@ -4253,12 +4825,11 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
4253 struct btrfs_bio *bbio = NULL; 4825 struct btrfs_bio *bbio = NULL;
4254 4826
4255 length = bio->bi_size; 4827 length = bio->bi_size;
4256 map_tree = &root->fs_info->mapping_tree;
4257 map_length = length; 4828 map_length = length;
4258 4829
4259 ret = btrfs_map_block(map_tree, rw, logical, &map_length, &bbio, 4830 ret = btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio,
4260 mirror_num); 4831 mirror_num);
4261 if (ret) /* -ENOMEM */ 4832 if (ret)
4262 return ret; 4833 return ret;
4263 4834
4264 total_devs = bbio->num_stripes; 4835 total_devs = bbio->num_stripes;
@@ -4276,52 +4847,48 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
4276 atomic_set(&bbio->stripes_pending, bbio->num_stripes); 4847 atomic_set(&bbio->stripes_pending, bbio->num_stripes);
4277 4848
4278 while (dev_nr < total_devs) { 4849 while (dev_nr < total_devs) {
4850 dev = bbio->stripes[dev_nr].dev;
4851 if (!dev || !dev->bdev || (rw & WRITE && !dev->writeable)) {
4852 bbio_error(bbio, first_bio, logical);
4853 dev_nr++;
4854 continue;
4855 }
4856
4857 /*
4858 * Check and see if we're ok with this bio based on it's size
4859 * and offset with the given device.
4860 */
4861 if (!bio_size_ok(dev->bdev, first_bio,
4862 bbio->stripes[dev_nr].physical >> 9)) {
4863 ret = breakup_stripe_bio(root, bbio, first_bio, dev,
4864 dev_nr, rw, async_submit);
4865 BUG_ON(ret);
4866 dev_nr++;
4867 continue;
4868 }
4869
4279 if (dev_nr < total_devs - 1) { 4870 if (dev_nr < total_devs - 1) {
4280 bio = bio_clone(first_bio, GFP_NOFS); 4871 bio = bio_clone(first_bio, GFP_NOFS);
4281 BUG_ON(!bio); /* -ENOMEM */ 4872 BUG_ON(!bio); /* -ENOMEM */
4282 } else { 4873 } else {
4283 bio = first_bio; 4874 bio = first_bio;
4284 } 4875 }
4285 bio->bi_private = bbio; 4876
4286 bio->bi_private = merge_stripe_index_into_bio_private( 4877 submit_stripe_bio(root, bbio, bio,
4287 bio->bi_private, (unsigned int)dev_nr); 4878 bbio->stripes[dev_nr].physical, dev_nr, rw,
4288 bio->bi_end_io = btrfs_end_bio; 4879 async_submit);
4289 bio->bi_sector = bbio->stripes[dev_nr].physical >> 9;
4290 dev = bbio->stripes[dev_nr].dev;
4291 if (dev && dev->bdev && (rw != WRITE || dev->writeable)) {
4292#ifdef DEBUG
4293 struct rcu_string *name;
4294
4295 rcu_read_lock();
4296 name = rcu_dereference(dev->name);
4297 pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu "
4298 "(%s id %llu), size=%u\n", rw,
4299 (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev,
4300 name->str, dev->devid, bio->bi_size);
4301 rcu_read_unlock();
4302#endif
4303 bio->bi_bdev = dev->bdev;
4304 if (async_submit)
4305 schedule_bio(root, dev, rw, bio);
4306 else
4307 btrfsic_submit_bio(rw, bio);
4308 } else {
4309 bio->bi_bdev = root->fs_info->fs_devices->latest_bdev;
4310 bio->bi_sector = logical >> 9;
4311 bio_endio(bio, -EIO);
4312 }
4313 dev_nr++; 4880 dev_nr++;
4314 } 4881 }
4315 return 0; 4882 return 0;
4316} 4883}
4317 4884
4318struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, 4885struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid,
4319 u8 *uuid, u8 *fsid) 4886 u8 *uuid, u8 *fsid)
4320{ 4887{
4321 struct btrfs_device *device; 4888 struct btrfs_device *device;
4322 struct btrfs_fs_devices *cur_devices; 4889 struct btrfs_fs_devices *cur_devices;
4323 4890
4324 cur_devices = root->fs_info->fs_devices; 4891 cur_devices = fs_info->fs_devices;
4325 while (cur_devices) { 4892 while (cur_devices) {
4326 if (!fsid || 4893 if (!fsid ||
4327 !memcmp(cur_devices->fsid, fsid, BTRFS_UUID_SIZE)) { 4894 !memcmp(cur_devices->fsid, fsid, BTRFS_UUID_SIZE)) {
@@ -4402,6 +4969,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
4402 em->bdev = (struct block_device *)map; 4969 em->bdev = (struct block_device *)map;
4403 em->start = logical; 4970 em->start = logical;
4404 em->len = length; 4971 em->len = length;
4972 em->orig_start = 0;
4405 em->block_start = 0; 4973 em->block_start = 0;
4406 em->block_len = em->len; 4974 em->block_len = em->len;
4407 4975
@@ -4419,8 +4987,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
4419 read_extent_buffer(leaf, uuid, (unsigned long) 4987 read_extent_buffer(leaf, uuid, (unsigned long)
4420 btrfs_stripe_dev_uuid_nr(chunk, i), 4988 btrfs_stripe_dev_uuid_nr(chunk, i),
4421 BTRFS_UUID_SIZE); 4989 BTRFS_UUID_SIZE);
4422 map->stripes[i].dev = btrfs_find_device(root, devid, uuid, 4990 map->stripes[i].dev = btrfs_find_device(root->fs_info, devid,
4423 NULL); 4991 uuid, NULL);
4424 if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) { 4992 if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) {
4425 kfree(map); 4993 kfree(map);
4426 free_extent_map(em); 4994 free_extent_map(em);
@@ -4461,6 +5029,8 @@ static void fill_device_from_item(struct extent_buffer *leaf,
4461 device->io_align = btrfs_device_io_align(leaf, dev_item); 5029 device->io_align = btrfs_device_io_align(leaf, dev_item);
4462 device->io_width = btrfs_device_io_width(leaf, dev_item); 5030 device->io_width = btrfs_device_io_width(leaf, dev_item);
4463 device->sector_size = btrfs_device_sector_size(leaf, dev_item); 5031 device->sector_size = btrfs_device_sector_size(leaf, dev_item);
5032 WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID);
5033 device->is_tgtdev_for_dev_replace = 0;
4464 5034
4465 ptr = (unsigned long)btrfs_device_uuid(dev_item); 5035 ptr = (unsigned long)btrfs_device_uuid(dev_item);
4466 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); 5036 read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
@@ -4538,7 +5108,7 @@ static int read_one_dev(struct btrfs_root *root,
4538 return ret; 5108 return ret;
4539 } 5109 }
4540 5110
4541 device = btrfs_find_device(root, devid, dev_uuid, fs_uuid); 5111 device = btrfs_find_device(root->fs_info, devid, dev_uuid, fs_uuid);
4542 if (!device || !device->bdev) { 5112 if (!device || !device->bdev) {
4543 if (!btrfs_test_opt(root, DEGRADED)) 5113 if (!btrfs_test_opt(root, DEGRADED))
4544 return -EIO; 5114 return -EIO;
@@ -4571,7 +5141,7 @@ static int read_one_dev(struct btrfs_root *root,
4571 fill_device_from_item(leaf, dev_item, device); 5141 fill_device_from_item(leaf, dev_item, device);
4572 device->dev_root = root->fs_info->dev_root; 5142 device->dev_root = root->fs_info->dev_root;
4573 device->in_fs_metadata = 1; 5143 device->in_fs_metadata = 1;
4574 if (device->writeable) { 5144 if (device->writeable && !device->is_tgtdev_for_dev_replace) {
4575 device->fs_devices->total_rw_bytes += device->total_bytes; 5145 device->fs_devices->total_rw_bytes += device->total_bytes;
4576 spin_lock(&root->fs_info->free_chunk_lock); 5146 spin_lock(&root->fs_info->free_chunk_lock);
4577 root->fs_info->free_chunk_space += device->total_bytes - 5147 root->fs_info->free_chunk_space += device->total_bytes -
@@ -4930,7 +5500,7 @@ int btrfs_get_dev_stats(struct btrfs_root *root,
4930 int i; 5500 int i;
4931 5501
4932 mutex_lock(&fs_devices->device_list_mutex); 5502 mutex_lock(&fs_devices->device_list_mutex);
4933 dev = btrfs_find_device(root, stats->devid, NULL, NULL); 5503 dev = btrfs_find_device(root->fs_info, stats->devid, NULL, NULL);
4934 mutex_unlock(&fs_devices->device_list_mutex); 5504 mutex_unlock(&fs_devices->device_list_mutex);
4935 5505
4936 if (!dev) { 5506 if (!dev) {
@@ -4958,3 +5528,21 @@ int btrfs_get_dev_stats(struct btrfs_root *root,
4958 stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX; 5528 stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX;
4959 return 0; 5529 return 0;
4960} 5530}
5531
5532int btrfs_scratch_superblock(struct btrfs_device *device)
5533{
5534 struct buffer_head *bh;
5535 struct btrfs_super_block *disk_super;
5536
5537 bh = btrfs_read_dev_super(device->bdev);
5538 if (!bh)
5539 return -EINVAL;
5540 disk_super = (struct btrfs_super_block *)bh->b_data;
5541
5542 memset(&disk_super->magic, 0, sizeof(disk_super->magic));
5543 set_buffer_dirty(bh);
5544 sync_dirty_buffer(bh);
5545 brelse(bh);
5546
5547 return 0;
5548}