diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-21 13:49:22 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-05-21 13:49:22 -0400 |
commit | 07be1337b9e8bfcd855c6e9175b5066a30ac609b (patch) | |
tree | e40ad01dc89f6eb17d461939b809fea3387fc2a5 /fs/btrfs/volumes.c | |
parent | 63d222b9d277c4d7bf08afd1631a7f8e327a825c (diff) | |
parent | c315ef8d9db7f1a0ebd023a395ebdfde1c68057e (diff) |
Merge branch 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason:
"This has our merge window series of cleanups and fixes. These target
a wide range of issues, but do include some important fixes for
qgroups, O_DIRECT, and fsync handling. Jeff Mahoney moved around a
few definitions to make them easier for userland to consume.
Also whiteout support is included now that issues with overlayfs have
been cleared up.
I have one more fix pending for page faults during btrfs_copy_from_user,
but I wanted to get this bulk out the door first"
* 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (90 commits)
btrfs: fix memory leak during RAID 5/6 device replacement
Btrfs: add semaphore to synchronize direct IO writes with fsync
Btrfs: fix race between block group relocation and nocow writes
Btrfs: fix race between fsync and direct IO writes for prealloc extents
Btrfs: fix number of transaction units for renames with whiteout
Btrfs: pin logs earlier when doing a rename exchange operation
Btrfs: unpin logs if rename exchange operation fails
Btrfs: fix inode leak on failure to setup whiteout inode in rename
btrfs: add support for RENAME_EXCHANGE and RENAME_WHITEOUT
Btrfs: pin log earlier when renaming
Btrfs: unpin log if rename operation fails
Btrfs: don't do unnecessary delalloc flushes when relocating
Btrfs: don't wait for unrelated IO to finish before relocation
Btrfs: fix empty symlink after creating symlink and fsync parent dir
Btrfs: fix for incorrect directory entries after fsync log replay
btrfs: build fixup for qgroup_account_snapshot
btrfs: qgroup: Fix qgroup accounting when creating snapshot
Btrfs: fix fspath error deallocation
btrfs: make find_workspace warn if there are no workspaces
btrfs: make find_workspace always succeed
...
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r-- | fs/btrfs/volumes.c | 454 |
1 files changed, 249 insertions, 205 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index bfb80da3e6eb..2b88127bba5b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -118,6 +118,21 @@ const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES] = { | |||
118 | [BTRFS_RAID_RAID6] = BTRFS_BLOCK_GROUP_RAID6, | 118 | [BTRFS_RAID_RAID6] = BTRFS_BLOCK_GROUP_RAID6, |
119 | }; | 119 | }; |
120 | 120 | ||
121 | /* | ||
122 | * Table to convert BTRFS_RAID_* to the error code if minimum number of devices | ||
123 | * condition is not met. Zero means there's no corresponding | ||
124 | * BTRFS_ERROR_DEV_*_NOT_MET value. | ||
125 | */ | ||
126 | const int btrfs_raid_mindev_error[BTRFS_NR_RAID_TYPES] = { | ||
127 | [BTRFS_RAID_RAID10] = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET, | ||
128 | [BTRFS_RAID_RAID1] = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET, | ||
129 | [BTRFS_RAID_DUP] = 0, | ||
130 | [BTRFS_RAID_RAID0] = 0, | ||
131 | [BTRFS_RAID_SINGLE] = 0, | ||
132 | [BTRFS_RAID_RAID5] = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET, | ||
133 | [BTRFS_RAID_RAID6] = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET, | ||
134 | }; | ||
135 | |||
121 | static int init_first_rw_device(struct btrfs_trans_handle *trans, | 136 | static int init_first_rw_device(struct btrfs_trans_handle *trans, |
122 | struct btrfs_root *root, | 137 | struct btrfs_root *root, |
123 | struct btrfs_device *device); | 138 | struct btrfs_device *device); |
@@ -699,7 +714,8 @@ static noinline int device_list_add(const char *path, | |||
699 | * if there is new btrfs on an already registered device, | 714 | * if there is new btrfs on an already registered device, |
700 | * then remove the stale device entry. | 715 | * then remove the stale device entry. |
701 | */ | 716 | */ |
702 | btrfs_free_stale_device(device); | 717 | if (ret > 0) |
718 | btrfs_free_stale_device(device); | ||
703 | 719 | ||
704 | *fs_devices_ret = fs_devices; | 720 | *fs_devices_ret = fs_devices; |
705 | 721 | ||
@@ -988,6 +1004,56 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
988 | return ret; | 1004 | return ret; |
989 | } | 1005 | } |
990 | 1006 | ||
1007 | void btrfs_release_disk_super(struct page *page) | ||
1008 | { | ||
1009 | kunmap(page); | ||
1010 | put_page(page); | ||
1011 | } | ||
1012 | |||
1013 | int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr, | ||
1014 | struct page **page, struct btrfs_super_block **disk_super) | ||
1015 | { | ||
1016 | void *p; | ||
1017 | pgoff_t index; | ||
1018 | |||
1019 | /* make sure our super fits in the device */ | ||
1020 | if (bytenr + PAGE_SIZE >= i_size_read(bdev->bd_inode)) | ||
1021 | return 1; | ||
1022 | |||
1023 | /* make sure our super fits in the page */ | ||
1024 | if (sizeof(**disk_super) > PAGE_SIZE) | ||
1025 | return 1; | ||
1026 | |||
1027 | /* make sure our super doesn't straddle pages on disk */ | ||
1028 | index = bytenr >> PAGE_SHIFT; | ||
1029 | if ((bytenr + sizeof(**disk_super) - 1) >> PAGE_SHIFT != index) | ||
1030 | return 1; | ||
1031 | |||
1032 | /* pull in the page with our super */ | ||
1033 | *page = read_cache_page_gfp(bdev->bd_inode->i_mapping, | ||
1034 | index, GFP_KERNEL); | ||
1035 | |||
1036 | if (IS_ERR_OR_NULL(*page)) | ||
1037 | return 1; | ||
1038 | |||
1039 | p = kmap(*page); | ||
1040 | |||
1041 | /* align our pointer to the offset of the super block */ | ||
1042 | *disk_super = p + (bytenr & ~PAGE_MASK); | ||
1043 | |||
1044 | if (btrfs_super_bytenr(*disk_super) != bytenr || | ||
1045 | btrfs_super_magic(*disk_super) != BTRFS_MAGIC) { | ||
1046 | btrfs_release_disk_super(*page); | ||
1047 | return 1; | ||
1048 | } | ||
1049 | |||
1050 | if ((*disk_super)->label[0] && | ||
1051 | (*disk_super)->label[BTRFS_LABEL_SIZE - 1]) | ||
1052 | (*disk_super)->label[BTRFS_LABEL_SIZE - 1] = '\0'; | ||
1053 | |||
1054 | return 0; | ||
1055 | } | ||
1056 | |||
991 | /* | 1057 | /* |
992 | * Look for a btrfs signature on a device. This may be called out of the mount path | 1058 | * Look for a btrfs signature on a device. This may be called out of the mount path |
993 | * and we are not allowed to call set_blocksize during the scan. The superblock | 1059 | * and we are not allowed to call set_blocksize during the scan. The superblock |
@@ -999,13 +1065,11 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
999 | struct btrfs_super_block *disk_super; | 1065 | struct btrfs_super_block *disk_super; |
1000 | struct block_device *bdev; | 1066 | struct block_device *bdev; |
1001 | struct page *page; | 1067 | struct page *page; |
1002 | void *p; | ||
1003 | int ret = -EINVAL; | 1068 | int ret = -EINVAL; |
1004 | u64 devid; | 1069 | u64 devid; |
1005 | u64 transid; | 1070 | u64 transid; |
1006 | u64 total_devices; | 1071 | u64 total_devices; |
1007 | u64 bytenr; | 1072 | u64 bytenr; |
1008 | pgoff_t index; | ||
1009 | 1073 | ||
1010 | /* | 1074 | /* |
1011 | * we would like to check all the supers, but that would make | 1075 | * we would like to check all the supers, but that would make |
@@ -1018,41 +1082,14 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
1018 | mutex_lock(&uuid_mutex); | 1082 | mutex_lock(&uuid_mutex); |
1019 | 1083 | ||
1020 | bdev = blkdev_get_by_path(path, flags, holder); | 1084 | bdev = blkdev_get_by_path(path, flags, holder); |
1021 | |||
1022 | if (IS_ERR(bdev)) { | 1085 | if (IS_ERR(bdev)) { |
1023 | ret = PTR_ERR(bdev); | 1086 | ret = PTR_ERR(bdev); |
1024 | goto error; | 1087 | goto error; |
1025 | } | 1088 | } |
1026 | 1089 | ||
1027 | /* make sure our super fits in the device */ | 1090 | if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super)) |
1028 | if (bytenr + PAGE_SIZE >= i_size_read(bdev->bd_inode)) | ||
1029 | goto error_bdev_put; | ||
1030 | |||
1031 | /* make sure our super fits in the page */ | ||
1032 | if (sizeof(*disk_super) > PAGE_SIZE) | ||
1033 | goto error_bdev_put; | ||
1034 | |||
1035 | /* make sure our super doesn't straddle pages on disk */ | ||
1036 | index = bytenr >> PAGE_SHIFT; | ||
1037 | if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_SHIFT != index) | ||
1038 | goto error_bdev_put; | ||
1039 | |||
1040 | /* pull in the page with our super */ | ||
1041 | page = read_cache_page_gfp(bdev->bd_inode->i_mapping, | ||
1042 | index, GFP_NOFS); | ||
1043 | |||
1044 | if (IS_ERR_OR_NULL(page)) | ||
1045 | goto error_bdev_put; | 1091 | goto error_bdev_put; |
1046 | 1092 | ||
1047 | p = kmap(page); | ||
1048 | |||
1049 | /* align our pointer to the offset of the super block */ | ||
1050 | disk_super = p + (bytenr & ~PAGE_MASK); | ||
1051 | |||
1052 | if (btrfs_super_bytenr(disk_super) != bytenr || | ||
1053 | btrfs_super_magic(disk_super) != BTRFS_MAGIC) | ||
1054 | goto error_unmap; | ||
1055 | |||
1056 | devid = btrfs_stack_device_id(&disk_super->dev_item); | 1093 | devid = btrfs_stack_device_id(&disk_super->dev_item); |
1057 | transid = btrfs_super_generation(disk_super); | 1094 | transid = btrfs_super_generation(disk_super); |
1058 | total_devices = btrfs_super_num_devices(disk_super); | 1095 | total_devices = btrfs_super_num_devices(disk_super); |
@@ -1060,8 +1097,6 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
1060 | ret = device_list_add(path, disk_super, devid, fs_devices_ret); | 1097 | ret = device_list_add(path, disk_super, devid, fs_devices_ret); |
1061 | if (ret > 0) { | 1098 | if (ret > 0) { |
1062 | if (disk_super->label[0]) { | 1099 | if (disk_super->label[0]) { |
1063 | if (disk_super->label[BTRFS_LABEL_SIZE - 1]) | ||
1064 | disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0'; | ||
1065 | printk(KERN_INFO "BTRFS: device label %s ", disk_super->label); | 1100 | printk(KERN_INFO "BTRFS: device label %s ", disk_super->label); |
1066 | } else { | 1101 | } else { |
1067 | printk(KERN_INFO "BTRFS: device fsid %pU ", disk_super->fsid); | 1102 | printk(KERN_INFO "BTRFS: device fsid %pU ", disk_super->fsid); |
@@ -1073,9 +1108,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
1073 | if (!ret && fs_devices_ret) | 1108 | if (!ret && fs_devices_ret) |
1074 | (*fs_devices_ret)->total_devices = total_devices; | 1109 | (*fs_devices_ret)->total_devices = total_devices; |
1075 | 1110 | ||
1076 | error_unmap: | 1111 | btrfs_release_disk_super(page); |
1077 | kunmap(page); | ||
1078 | put_page(page); | ||
1079 | 1112 | ||
1080 | error_bdev_put: | 1113 | error_bdev_put: |
1081 | blkdev_put(bdev, flags); | 1114 | blkdev_put(bdev, flags); |
@@ -1454,7 +1487,7 @@ again: | |||
1454 | extent = btrfs_item_ptr(leaf, path->slots[0], | 1487 | extent = btrfs_item_ptr(leaf, path->slots[0], |
1455 | struct btrfs_dev_extent); | 1488 | struct btrfs_dev_extent); |
1456 | } else { | 1489 | } else { |
1457 | btrfs_std_error(root->fs_info, ret, "Slot search failed"); | 1490 | btrfs_handle_fs_error(root->fs_info, ret, "Slot search failed"); |
1458 | goto out; | 1491 | goto out; |
1459 | } | 1492 | } |
1460 | 1493 | ||
@@ -1462,7 +1495,7 @@ again: | |||
1462 | 1495 | ||
1463 | ret = btrfs_del_item(trans, root, path); | 1496 | ret = btrfs_del_item(trans, root, path); |
1464 | if (ret) { | 1497 | if (ret) { |
1465 | btrfs_std_error(root->fs_info, ret, | 1498 | btrfs_handle_fs_error(root->fs_info, ret, |
1466 | "Failed to remove dev extent item"); | 1499 | "Failed to remove dev extent item"); |
1467 | } else { | 1500 | } else { |
1468 | set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags); | 1501 | set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags); |
@@ -1688,32 +1721,92 @@ out: | |||
1688 | return ret; | 1721 | return ret; |
1689 | } | 1722 | } |
1690 | 1723 | ||
1691 | int btrfs_rm_device(struct btrfs_root *root, char *device_path) | 1724 | /* |
1725 | * Verify that @num_devices satisfies the RAID profile constraints in the whole | ||
1726 | * filesystem. It's up to the caller to adjust that number regarding eg. device | ||
1727 | * replace. | ||
1728 | */ | ||
1729 | static int btrfs_check_raid_min_devices(struct btrfs_fs_info *fs_info, | ||
1730 | u64 num_devices) | ||
1731 | { | ||
1732 | u64 all_avail; | ||
1733 | unsigned seq; | ||
1734 | int i; | ||
1735 | |||
1736 | do { | ||
1737 | seq = read_seqbegin(&fs_info->profiles_lock); | ||
1738 | |||
1739 | all_avail = fs_info->avail_data_alloc_bits | | ||
1740 | fs_info->avail_system_alloc_bits | | ||
1741 | fs_info->avail_metadata_alloc_bits; | ||
1742 | } while (read_seqretry(&fs_info->profiles_lock, seq)); | ||
1743 | |||
1744 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { | ||
1745 | if (!(all_avail & btrfs_raid_group[i])) | ||
1746 | continue; | ||
1747 | |||
1748 | if (num_devices < btrfs_raid_array[i].devs_min) { | ||
1749 | int ret = btrfs_raid_mindev_error[i]; | ||
1750 | |||
1751 | if (ret) | ||
1752 | return ret; | ||
1753 | } | ||
1754 | } | ||
1755 | |||
1756 | return 0; | ||
1757 | } | ||
1758 | |||
1759 | struct btrfs_device *btrfs_find_next_active_device(struct btrfs_fs_devices *fs_devs, | ||
1760 | struct btrfs_device *device) | ||
1692 | { | 1761 | { |
1693 | struct btrfs_device *device; | ||
1694 | struct btrfs_device *next_device; | 1762 | struct btrfs_device *next_device; |
1695 | struct block_device *bdev; | 1763 | |
1696 | struct buffer_head *bh = NULL; | 1764 | list_for_each_entry(next_device, &fs_devs->devices, dev_list) { |
1697 | struct btrfs_super_block *disk_super; | 1765 | if (next_device != device && |
1766 | !next_device->missing && next_device->bdev) | ||
1767 | return next_device; | ||
1768 | } | ||
1769 | |||
1770 | return NULL; | ||
1771 | } | ||
1772 | |||
1773 | /* | ||
1774 | * Helper function to check if the given device is part of s_bdev / latest_bdev | ||
1775 | * and replace it with the provided or the next active device, in the context | ||
1776 | * where this function called, there should be always be another device (or | ||
1777 | * this_dev) which is active. | ||
1778 | */ | ||
1779 | void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info, | ||
1780 | struct btrfs_device *device, struct btrfs_device *this_dev) | ||
1781 | { | ||
1782 | struct btrfs_device *next_device; | ||
1783 | |||
1784 | if (this_dev) | ||
1785 | next_device = this_dev; | ||
1786 | else | ||
1787 | next_device = btrfs_find_next_active_device(fs_info->fs_devices, | ||
1788 | device); | ||
1789 | ASSERT(next_device); | ||
1790 | |||
1791 | if (fs_info->sb->s_bdev && | ||
1792 | (fs_info->sb->s_bdev == device->bdev)) | ||
1793 | fs_info->sb->s_bdev = next_device->bdev; | ||
1794 | |||
1795 | if (fs_info->fs_devices->latest_bdev == device->bdev) | ||
1796 | fs_info->fs_devices->latest_bdev = next_device->bdev; | ||
1797 | } | ||
1798 | |||
1799 | int btrfs_rm_device(struct btrfs_root *root, char *device_path, u64 devid) | ||
1800 | { | ||
1801 | struct btrfs_device *device; | ||
1698 | struct btrfs_fs_devices *cur_devices; | 1802 | struct btrfs_fs_devices *cur_devices; |
1699 | u64 all_avail; | ||
1700 | u64 devid; | ||
1701 | u64 num_devices; | 1803 | u64 num_devices; |
1702 | u8 *dev_uuid; | ||
1703 | unsigned seq; | ||
1704 | int ret = 0; | 1804 | int ret = 0; |
1705 | bool clear_super = false; | 1805 | bool clear_super = false; |
1806 | char *dev_name = NULL; | ||
1706 | 1807 | ||
1707 | mutex_lock(&uuid_mutex); | 1808 | mutex_lock(&uuid_mutex); |
1708 | 1809 | ||
1709 | do { | ||
1710 | seq = read_seqbegin(&root->fs_info->profiles_lock); | ||
1711 | |||
1712 | all_avail = root->fs_info->avail_data_alloc_bits | | ||
1713 | root->fs_info->avail_system_alloc_bits | | ||
1714 | root->fs_info->avail_metadata_alloc_bits; | ||
1715 | } while (read_seqretry(&root->fs_info->profiles_lock, seq)); | ||
1716 | |||
1717 | num_devices = root->fs_info->fs_devices->num_devices; | 1810 | num_devices = root->fs_info->fs_devices->num_devices; |
1718 | btrfs_dev_replace_lock(&root->fs_info->dev_replace, 0); | 1811 | btrfs_dev_replace_lock(&root->fs_info->dev_replace, 0); |
1719 | if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) { | 1812 | if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) { |
@@ -1722,78 +1815,23 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1722 | } | 1815 | } |
1723 | btrfs_dev_replace_unlock(&root->fs_info->dev_replace, 0); | 1816 | btrfs_dev_replace_unlock(&root->fs_info->dev_replace, 0); |
1724 | 1817 | ||
1725 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) { | 1818 | ret = btrfs_check_raid_min_devices(root->fs_info, num_devices - 1); |
1726 | ret = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET; | 1819 | if (ret) |
1727 | goto out; | ||
1728 | } | ||
1729 | |||
1730 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && num_devices <= 2) { | ||
1731 | ret = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET; | ||
1732 | goto out; | 1820 | goto out; |
1733 | } | ||
1734 | 1821 | ||
1735 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID5) && | 1822 | ret = btrfs_find_device_by_devspec(root, devid, device_path, |
1736 | root->fs_info->fs_devices->rw_devices <= 2) { | 1823 | &device); |
1737 | ret = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET; | 1824 | if (ret) |
1738 | goto out; | ||
1739 | } | ||
1740 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID6) && | ||
1741 | root->fs_info->fs_devices->rw_devices <= 3) { | ||
1742 | ret = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET; | ||
1743 | goto out; | 1825 | goto out; |
1744 | } | ||
1745 | |||
1746 | if (strcmp(device_path, "missing") == 0) { | ||
1747 | struct list_head *devices; | ||
1748 | struct btrfs_device *tmp; | ||
1749 | |||
1750 | device = NULL; | ||
1751 | devices = &root->fs_info->fs_devices->devices; | ||
1752 | /* | ||
1753 | * It is safe to read the devices since the volume_mutex | ||
1754 | * is held. | ||
1755 | */ | ||
1756 | list_for_each_entry(tmp, devices, dev_list) { | ||
1757 | if (tmp->in_fs_metadata && | ||
1758 | !tmp->is_tgtdev_for_dev_replace && | ||
1759 | !tmp->bdev) { | ||
1760 | device = tmp; | ||
1761 | break; | ||
1762 | } | ||
1763 | } | ||
1764 | bdev = NULL; | ||
1765 | bh = NULL; | ||
1766 | disk_super = NULL; | ||
1767 | if (!device) { | ||
1768 | ret = BTRFS_ERROR_DEV_MISSING_NOT_FOUND; | ||
1769 | goto out; | ||
1770 | } | ||
1771 | } else { | ||
1772 | ret = btrfs_get_bdev_and_sb(device_path, | ||
1773 | FMODE_WRITE | FMODE_EXCL, | ||
1774 | root->fs_info->bdev_holder, 0, | ||
1775 | &bdev, &bh); | ||
1776 | if (ret) | ||
1777 | goto out; | ||
1778 | disk_super = (struct btrfs_super_block *)bh->b_data; | ||
1779 | devid = btrfs_stack_device_id(&disk_super->dev_item); | ||
1780 | dev_uuid = disk_super->dev_item.uuid; | ||
1781 | device = btrfs_find_device(root->fs_info, devid, dev_uuid, | ||
1782 | disk_super->fsid); | ||
1783 | if (!device) { | ||
1784 | ret = -ENOENT; | ||
1785 | goto error_brelse; | ||
1786 | } | ||
1787 | } | ||
1788 | 1826 | ||
1789 | if (device->is_tgtdev_for_dev_replace) { | 1827 | if (device->is_tgtdev_for_dev_replace) { |
1790 | ret = BTRFS_ERROR_DEV_TGT_REPLACE; | 1828 | ret = BTRFS_ERROR_DEV_TGT_REPLACE; |
1791 | goto error_brelse; | 1829 | goto out; |
1792 | } | 1830 | } |
1793 | 1831 | ||
1794 | if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) { | 1832 | if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) { |
1795 | ret = BTRFS_ERROR_DEV_ONLY_WRITABLE; | 1833 | ret = BTRFS_ERROR_DEV_ONLY_WRITABLE; |
1796 | goto error_brelse; | 1834 | goto out; |
1797 | } | 1835 | } |
1798 | 1836 | ||
1799 | if (device->writeable) { | 1837 | if (device->writeable) { |
@@ -1801,6 +1839,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1801 | list_del_init(&device->dev_alloc_list); | 1839 | list_del_init(&device->dev_alloc_list); |
1802 | device->fs_devices->rw_devices--; | 1840 | device->fs_devices->rw_devices--; |
1803 | unlock_chunks(root); | 1841 | unlock_chunks(root); |
1842 | dev_name = kstrdup(device->name->str, GFP_KERNEL); | ||
1843 | if (!dev_name) { | ||
1844 | ret = -ENOMEM; | ||
1845 | goto error_undo; | ||
1846 | } | ||
1804 | clear_super = true; | 1847 | clear_super = true; |
1805 | } | 1848 | } |
1806 | 1849 | ||
@@ -1842,12 +1885,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1842 | if (device->missing) | 1885 | if (device->missing) |
1843 | device->fs_devices->missing_devices--; | 1886 | device->fs_devices->missing_devices--; |
1844 | 1887 | ||
1845 | next_device = list_entry(root->fs_info->fs_devices->devices.next, | 1888 | btrfs_assign_next_active_device(root->fs_info, device, NULL); |
1846 | struct btrfs_device, dev_list); | ||
1847 | if (device->bdev == root->fs_info->sb->s_bdev) | ||
1848 | root->fs_info->sb->s_bdev = next_device->bdev; | ||
1849 | if (device->bdev == root->fs_info->fs_devices->latest_bdev) | ||
1850 | root->fs_info->fs_devices->latest_bdev = next_device->bdev; | ||
1851 | 1889 | ||
1852 | if (device->bdev) { | 1890 | if (device->bdev) { |
1853 | device->fs_devices->open_devices--; | 1891 | device->fs_devices->open_devices--; |
@@ -1883,63 +1921,23 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1883 | * at this point, the device is zero sized. We want to | 1921 | * at this point, the device is zero sized. We want to |
1884 | * remove it from the devices list and zero out the old super | 1922 | * remove it from the devices list and zero out the old super |
1885 | */ | 1923 | */ |
1886 | if (clear_super && disk_super) { | 1924 | if (clear_super) { |
1887 | u64 bytenr; | 1925 | struct block_device *bdev; |
1888 | int i; | 1926 | |
1889 | 1927 | bdev = blkdev_get_by_path(dev_name, FMODE_READ | FMODE_EXCL, | |
1890 | /* make sure this device isn't detected as part of | 1928 | root->fs_info->bdev_holder); |
1891 | * the FS anymore | 1929 | if (!IS_ERR(bdev)) { |
1892 | */ | 1930 | btrfs_scratch_superblocks(bdev, dev_name); |
1893 | memset(&disk_super->magic, 0, sizeof(disk_super->magic)); | 1931 | blkdev_put(bdev, FMODE_READ | FMODE_EXCL); |
1894 | set_buffer_dirty(bh); | ||
1895 | sync_dirty_buffer(bh); | ||
1896 | |||
1897 | /* clear the mirror copies of super block on the disk | ||
1898 | * being removed, 0th copy is been taken care above and | ||
1899 | * the below would take of the rest | ||
1900 | */ | ||
1901 | for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) { | ||
1902 | bytenr = btrfs_sb_offset(i); | ||
1903 | if (bytenr + BTRFS_SUPER_INFO_SIZE >= | ||
1904 | i_size_read(bdev->bd_inode)) | ||
1905 | break; | ||
1906 | |||
1907 | brelse(bh); | ||
1908 | bh = __bread(bdev, bytenr / 4096, | ||
1909 | BTRFS_SUPER_INFO_SIZE); | ||
1910 | if (!bh) | ||
1911 | continue; | ||
1912 | |||
1913 | disk_super = (struct btrfs_super_block *)bh->b_data; | ||
1914 | |||
1915 | if (btrfs_super_bytenr(disk_super) != bytenr || | ||
1916 | btrfs_super_magic(disk_super) != BTRFS_MAGIC) { | ||
1917 | continue; | ||
1918 | } | ||
1919 | memset(&disk_super->magic, 0, | ||
1920 | sizeof(disk_super->magic)); | ||
1921 | set_buffer_dirty(bh); | ||
1922 | sync_dirty_buffer(bh); | ||
1923 | } | 1932 | } |
1924 | } | 1933 | } |
1925 | 1934 | ||
1926 | ret = 0; | ||
1927 | |||
1928 | if (bdev) { | ||
1929 | /* Notify udev that device has changed */ | ||
1930 | btrfs_kobject_uevent(bdev, KOBJ_CHANGE); | ||
1931 | |||
1932 | /* Update ctime/mtime for device path for libblkid */ | ||
1933 | update_dev_time(device_path); | ||
1934 | } | ||
1935 | |||
1936 | error_brelse: | ||
1937 | brelse(bh); | ||
1938 | if (bdev) | ||
1939 | blkdev_put(bdev, FMODE_READ | FMODE_EXCL); | ||
1940 | out: | 1935 | out: |
1936 | kfree(dev_name); | ||
1937 | |||
1941 | mutex_unlock(&uuid_mutex); | 1938 | mutex_unlock(&uuid_mutex); |
1942 | return ret; | 1939 | return ret; |
1940 | |||
1943 | error_undo: | 1941 | error_undo: |
1944 | if (device->writeable) { | 1942 | if (device->writeable) { |
1945 | lock_chunks(root); | 1943 | lock_chunks(root); |
@@ -1948,7 +1946,7 @@ error_undo: | |||
1948 | device->fs_devices->rw_devices++; | 1946 | device->fs_devices->rw_devices++; |
1949 | unlock_chunks(root); | 1947 | unlock_chunks(root); |
1950 | } | 1948 | } |
1951 | goto error_brelse; | 1949 | goto out; |
1952 | } | 1950 | } |
1953 | 1951 | ||
1954 | void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info, | 1952 | void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info, |
@@ -1972,11 +1970,8 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info, | |||
1972 | if (srcdev->missing) | 1970 | if (srcdev->missing) |
1973 | fs_devices->missing_devices--; | 1971 | fs_devices->missing_devices--; |
1974 | 1972 | ||
1975 | if (srcdev->writeable) { | 1973 | if (srcdev->writeable) |
1976 | fs_devices->rw_devices--; | 1974 | fs_devices->rw_devices--; |
1977 | /* zero out the old super if it is writable */ | ||
1978 | btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str); | ||
1979 | } | ||
1980 | 1975 | ||
1981 | if (srcdev->bdev) | 1976 | if (srcdev->bdev) |
1982 | fs_devices->open_devices--; | 1977 | fs_devices->open_devices--; |
@@ -1987,6 +1982,10 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, | |||
1987 | { | 1982 | { |
1988 | struct btrfs_fs_devices *fs_devices = srcdev->fs_devices; | 1983 | struct btrfs_fs_devices *fs_devices = srcdev->fs_devices; |
1989 | 1984 | ||
1985 | if (srcdev->writeable) { | ||
1986 | /* zero out the old super if it is writable */ | ||
1987 | btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str); | ||
1988 | } | ||
1990 | call_rcu(&srcdev->rcu, free_device); | 1989 | call_rcu(&srcdev->rcu, free_device); |
1991 | 1990 | ||
1992 | /* | 1991 | /* |
@@ -2016,32 +2015,33 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, | |||
2016 | void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, | 2015 | void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, |
2017 | struct btrfs_device *tgtdev) | 2016 | struct btrfs_device *tgtdev) |
2018 | { | 2017 | { |
2019 | struct btrfs_device *next_device; | ||
2020 | |||
2021 | mutex_lock(&uuid_mutex); | 2018 | mutex_lock(&uuid_mutex); |
2022 | WARN_ON(!tgtdev); | 2019 | WARN_ON(!tgtdev); |
2023 | mutex_lock(&fs_info->fs_devices->device_list_mutex); | 2020 | mutex_lock(&fs_info->fs_devices->device_list_mutex); |
2024 | 2021 | ||
2025 | btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev); | 2022 | btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev); |
2026 | 2023 | ||
2027 | if (tgtdev->bdev) { | 2024 | if (tgtdev->bdev) |
2028 | btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str); | ||
2029 | fs_info->fs_devices->open_devices--; | 2025 | fs_info->fs_devices->open_devices--; |
2030 | } | 2026 | |
2031 | fs_info->fs_devices->num_devices--; | 2027 | fs_info->fs_devices->num_devices--; |
2032 | 2028 | ||
2033 | next_device = list_entry(fs_info->fs_devices->devices.next, | 2029 | btrfs_assign_next_active_device(fs_info, tgtdev, NULL); |
2034 | struct btrfs_device, dev_list); | ||
2035 | if (tgtdev->bdev == fs_info->sb->s_bdev) | ||
2036 | fs_info->sb->s_bdev = next_device->bdev; | ||
2037 | if (tgtdev->bdev == fs_info->fs_devices->latest_bdev) | ||
2038 | fs_info->fs_devices->latest_bdev = next_device->bdev; | ||
2039 | list_del_rcu(&tgtdev->dev_list); | ||
2040 | 2030 | ||
2041 | call_rcu(&tgtdev->rcu, free_device); | 2031 | list_del_rcu(&tgtdev->dev_list); |
2042 | 2032 | ||
2043 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); | 2033 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); |
2044 | mutex_unlock(&uuid_mutex); | 2034 | mutex_unlock(&uuid_mutex); |
2035 | |||
2036 | /* | ||
2037 | * The update_dev_time() with in btrfs_scratch_superblocks() | ||
2038 | * may lead to a call to btrfs_show_devname() which will try | ||
2039 | * to hold device_list_mutex. And here this device | ||
2040 | * is already out of device list, so we don't have to hold | ||
2041 | * the device_list_mutex lock. | ||
2042 | */ | ||
2043 | btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str); | ||
2044 | call_rcu(&tgtdev->rcu, free_device); | ||
2045 | } | 2045 | } |
2046 | 2046 | ||
2047 | static int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path, | 2047 | static int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path, |
@@ -2102,6 +2102,31 @@ int btrfs_find_device_missing_or_by_path(struct btrfs_root *root, | |||
2102 | } | 2102 | } |
2103 | 2103 | ||
2104 | /* | 2104 | /* |
2105 | * Lookup a device given by device id, or the path if the id is 0. | ||
2106 | */ | ||
2107 | int btrfs_find_device_by_devspec(struct btrfs_root *root, u64 devid, | ||
2108 | char *devpath, | ||
2109 | struct btrfs_device **device) | ||
2110 | { | ||
2111 | int ret; | ||
2112 | |||
2113 | if (devid) { | ||
2114 | ret = 0; | ||
2115 | *device = btrfs_find_device(root->fs_info, devid, NULL, | ||
2116 | NULL); | ||
2117 | if (!*device) | ||
2118 | ret = -ENOENT; | ||
2119 | } else { | ||
2120 | if (!devpath || !devpath[0]) | ||
2121 | return -EINVAL; | ||
2122 | |||
2123 | ret = btrfs_find_device_missing_or_by_path(root, devpath, | ||
2124 | device); | ||
2125 | } | ||
2126 | return ret; | ||
2127 | } | ||
2128 | |||
2129 | /* | ||
2105 | * does all the dirty work required for changing file system's UUID. | 2130 | * does all the dirty work required for changing file system's UUID. |
2106 | */ | 2131 | */ |
2107 | static int btrfs_prepare_sprout(struct btrfs_root *root) | 2132 | static int btrfs_prepare_sprout(struct btrfs_root *root) |
@@ -2418,7 +2443,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
2418 | 2443 | ||
2419 | ret = btrfs_relocate_sys_chunks(root); | 2444 | ret = btrfs_relocate_sys_chunks(root); |
2420 | if (ret < 0) | 2445 | if (ret < 0) |
2421 | btrfs_std_error(root->fs_info, ret, | 2446 | btrfs_handle_fs_error(root->fs_info, ret, |
2422 | "Failed to relocate sys chunks after " | 2447 | "Failed to relocate sys chunks after " |
2423 | "device initialization. This can be fixed " | 2448 | "device initialization. This can be fixed " |
2424 | "using the \"btrfs balance\" command."); | 2449 | "using the \"btrfs balance\" command."); |
@@ -2663,7 +2688,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, | |||
2663 | if (ret < 0) | 2688 | if (ret < 0) |
2664 | goto out; | 2689 | goto out; |
2665 | else if (ret > 0) { /* Logic error or corruption */ | 2690 | else if (ret > 0) { /* Logic error or corruption */ |
2666 | btrfs_std_error(root->fs_info, -ENOENT, | 2691 | btrfs_handle_fs_error(root->fs_info, -ENOENT, |
2667 | "Failed lookup while freeing chunk."); | 2692 | "Failed lookup while freeing chunk."); |
2668 | ret = -ENOENT; | 2693 | ret = -ENOENT; |
2669 | goto out; | 2694 | goto out; |
@@ -2671,7 +2696,7 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, | |||
2671 | 2696 | ||
2672 | ret = btrfs_del_item(trans, root, path); | 2697 | ret = btrfs_del_item(trans, root, path); |
2673 | if (ret < 0) | 2698 | if (ret < 0) |
2674 | btrfs_std_error(root->fs_info, ret, | 2699 | btrfs_handle_fs_error(root->fs_info, ret, |
2675 | "Failed to delete chunk item."); | 2700 | "Failed to delete chunk item."); |
2676 | out: | 2701 | out: |
2677 | btrfs_free_path(path); | 2702 | btrfs_free_path(path); |
@@ -2857,7 +2882,7 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, u64 chunk_offset) | |||
2857 | chunk_offset); | 2882 | chunk_offset); |
2858 | if (IS_ERR(trans)) { | 2883 | if (IS_ERR(trans)) { |
2859 | ret = PTR_ERR(trans); | 2884 | ret = PTR_ERR(trans); |
2860 | btrfs_std_error(root->fs_info, ret, NULL); | 2885 | btrfs_handle_fs_error(root->fs_info, ret, NULL); |
2861 | return ret; | 2886 | return ret; |
2862 | } | 2887 | } |
2863 | 2888 | ||
@@ -3402,6 +3427,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) | |||
3402 | u32 count_meta = 0; | 3427 | u32 count_meta = 0; |
3403 | u32 count_sys = 0; | 3428 | u32 count_sys = 0; |
3404 | int chunk_reserved = 0; | 3429 | int chunk_reserved = 0; |
3430 | u64 bytes_used = 0; | ||
3405 | 3431 | ||
3406 | /* step one make some room on all the devices */ | 3432 | /* step one make some room on all the devices */ |
3407 | devices = &fs_info->fs_devices->devices; | 3433 | devices = &fs_info->fs_devices->devices; |
@@ -3540,7 +3566,13 @@ again: | |||
3540 | goto loop; | 3566 | goto loop; |
3541 | } | 3567 | } |
3542 | 3568 | ||
3543 | if ((chunk_type & BTRFS_BLOCK_GROUP_DATA) && !chunk_reserved) { | 3569 | ASSERT(fs_info->data_sinfo); |
3570 | spin_lock(&fs_info->data_sinfo->lock); | ||
3571 | bytes_used = fs_info->data_sinfo->bytes_used; | ||
3572 | spin_unlock(&fs_info->data_sinfo->lock); | ||
3573 | |||
3574 | if ((chunk_type & BTRFS_BLOCK_GROUP_DATA) && | ||
3575 | !chunk_reserved && !bytes_used) { | ||
3544 | trans = btrfs_start_transaction(chunk_root, 0); | 3576 | trans = btrfs_start_transaction(chunk_root, 0); |
3545 | if (IS_ERR(trans)) { | 3577 | if (IS_ERR(trans)) { |
3546 | mutex_unlock(&fs_info->delete_unused_bgs_mutex); | 3578 | mutex_unlock(&fs_info->delete_unused_bgs_mutex); |
@@ -3632,7 +3664,7 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info) | |||
3632 | unset_balance_control(fs_info); | 3664 | unset_balance_control(fs_info); |
3633 | ret = del_balance_item(fs_info->tree_root); | 3665 | ret = del_balance_item(fs_info->tree_root); |
3634 | if (ret) | 3666 | if (ret) |
3635 | btrfs_std_error(fs_info, ret, NULL); | 3667 | btrfs_handle_fs_error(fs_info, ret, NULL); |
3636 | 3668 | ||
3637 | atomic_set(&fs_info->mutually_exclusive_operation_running, 0); | 3669 | atomic_set(&fs_info->mutually_exclusive_operation_running, 0); |
3638 | } | 3670 | } |
@@ -3693,10 +3725,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl, | |||
3693 | num_devices--; | 3725 | num_devices--; |
3694 | } | 3726 | } |
3695 | btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); | 3727 | btrfs_dev_replace_unlock(&fs_info->dev_replace, 0); |
3696 | allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; | 3728 | allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE | BTRFS_BLOCK_GROUP_DUP; |
3697 | if (num_devices == 1) | 3729 | if (num_devices > 1) |
3698 | allowed |= BTRFS_BLOCK_GROUP_DUP; | ||
3699 | else if (num_devices > 1) | ||
3700 | allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1); | 3730 | allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1); |
3701 | if (num_devices > 2) | 3731 | if (num_devices > 2) |
3702 | allowed |= BTRFS_BLOCK_GROUP_RAID5; | 3732 | allowed |= BTRFS_BLOCK_GROUP_RAID5; |
@@ -5278,7 +5308,15 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5278 | stripe_nr = div64_u64(stripe_nr, stripe_len); | 5308 | stripe_nr = div64_u64(stripe_nr, stripe_len); |
5279 | 5309 | ||
5280 | stripe_offset = stripe_nr * stripe_len; | 5310 | stripe_offset = stripe_nr * stripe_len; |
5281 | BUG_ON(offset < stripe_offset); | 5311 | if (offset < stripe_offset) { |
5312 | btrfs_crit(fs_info, "stripe math has gone wrong, " | ||
5313 | "stripe_offset=%llu, offset=%llu, start=%llu, " | ||
5314 | "logical=%llu, stripe_len=%llu", | ||
5315 | stripe_offset, offset, em->start, logical, | ||
5316 | stripe_len); | ||
5317 | free_extent_map(em); | ||
5318 | return -EINVAL; | ||
5319 | } | ||
5282 | 5320 | ||
5283 | /* stripe_offset is the offset of this block in its stripe*/ | 5321 | /* stripe_offset is the offset of this block in its stripe*/ |
5284 | stripe_offset = offset - stripe_offset; | 5322 | stripe_offset = offset - stripe_offset; |
@@ -5519,7 +5557,13 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, | |||
5519 | &stripe_index); | 5557 | &stripe_index); |
5520 | mirror_num = stripe_index + 1; | 5558 | mirror_num = stripe_index + 1; |
5521 | } | 5559 | } |
5522 | BUG_ON(stripe_index >= map->num_stripes); | 5560 | if (stripe_index >= map->num_stripes) { |
5561 | btrfs_crit(fs_info, "stripe index math went horribly wrong, " | ||
5562 | "got stripe_index=%u, num_stripes=%u", | ||
5563 | stripe_index, map->num_stripes); | ||
5564 | ret = -EINVAL; | ||
5565 | goto out; | ||
5566 | } | ||
5523 | 5567 | ||
5524 | num_alloc_stripes = num_stripes; | 5568 | num_alloc_stripes = num_stripes; |
5525 | if (dev_replace_is_ongoing) { | 5569 | if (dev_replace_is_ongoing) { |
@@ -6242,7 +6286,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
6242 | "invalid chunk length %llu", length); | 6286 | "invalid chunk length %llu", length); |
6243 | return -EIO; | 6287 | return -EIO; |
6244 | } | 6288 | } |
6245 | if (!is_power_of_2(stripe_len)) { | 6289 | if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) { |
6246 | btrfs_err(root->fs_info, "invalid chunk stripe length: %llu", | 6290 | btrfs_err(root->fs_info, "invalid chunk stripe length: %llu", |
6247 | stripe_len); | 6291 | stripe_len); |
6248 | return -EIO; | 6292 | return -EIO; |