diff options
Diffstat (limited to 'fs/btrfs')
43 files changed, 5600 insertions, 1855 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index d7fcdba141a2..7df3e0f0ee51 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
| @@ -8,7 +8,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | |||
| 8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | 8 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
| 9 | export.o tree-log.o free-space-cache.o zlib.o lzo.o \ | 9 | export.o tree-log.o free-space-cache.o zlib.o lzo.o \ |
| 10 | compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ | 10 | compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ |
| 11 | reada.o backref.o ulist.o qgroup.o send.o | 11 | reada.o backref.o ulist.o qgroup.o send.o dev-replace.o |
| 12 | 12 | ||
| 13 | btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o | 13 | btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o |
| 14 | btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o | 14 | btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o |
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 0c16e3dbfd56..e15d2b0d8d3b 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
| @@ -121,6 +121,8 @@ static int btrfs_set_acl(struct btrfs_trans_handle *trans, | |||
| 121 | ret = posix_acl_equiv_mode(acl, &inode->i_mode); | 121 | ret = posix_acl_equiv_mode(acl, &inode->i_mode); |
| 122 | if (ret < 0) | 122 | if (ret < 0) |
| 123 | return ret; | 123 | return ret; |
| 124 | if (ret == 0) | ||
| 125 | acl = NULL; | ||
| 124 | } | 126 | } |
| 125 | ret = 0; | 127 | ret = 0; |
| 126 | break; | 128 | break; |
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 208d8aa5b07e..04edf69be875 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
| @@ -461,6 +461,7 @@ static int __merge_refs(struct list_head *head, int mode) | |||
| 461 | pos2 = n2, n2 = pos2->next) { | 461 | pos2 = n2, n2 = pos2->next) { |
| 462 | struct __prelim_ref *ref2; | 462 | struct __prelim_ref *ref2; |
| 463 | struct __prelim_ref *xchg; | 463 | struct __prelim_ref *xchg; |
| 464 | struct extent_inode_elem *eie; | ||
| 464 | 465 | ||
| 465 | ref2 = list_entry(pos2, struct __prelim_ref, list); | 466 | ref2 = list_entry(pos2, struct __prelim_ref, list); |
| 466 | 467 | ||
| @@ -472,12 +473,20 @@ static int __merge_refs(struct list_head *head, int mode) | |||
| 472 | ref1 = ref2; | 473 | ref1 = ref2; |
| 473 | ref2 = xchg; | 474 | ref2 = xchg; |
| 474 | } | 475 | } |
| 475 | ref1->count += ref2->count; | ||
| 476 | } else { | 476 | } else { |
| 477 | if (ref1->parent != ref2->parent) | 477 | if (ref1->parent != ref2->parent) |
| 478 | continue; | 478 | continue; |
| 479 | ref1->count += ref2->count; | ||
| 480 | } | 479 | } |
| 480 | |||
| 481 | eie = ref1->inode_list; | ||
| 482 | while (eie && eie->next) | ||
| 483 | eie = eie->next; | ||
| 484 | if (eie) | ||
| 485 | eie->next = ref2->inode_list; | ||
| 486 | else | ||
| 487 | ref1->inode_list = ref2->inode_list; | ||
| 488 | ref1->count += ref2->count; | ||
| 489 | |||
| 481 | list_del(&ref2->list); | 490 | list_del(&ref2->list); |
| 482 | kfree(ref2); | 491 | kfree(ref2); |
| 483 | } | 492 | } |
| @@ -890,8 +899,7 @@ again: | |||
| 890 | while (!list_empty(&prefs)) { | 899 | while (!list_empty(&prefs)) { |
| 891 | ref = list_first_entry(&prefs, struct __prelim_ref, list); | 900 | ref = list_first_entry(&prefs, struct __prelim_ref, list); |
| 892 | list_del(&ref->list); | 901 | list_del(&ref->list); |
| 893 | if (ref->count < 0) | 902 | WARN_ON(ref->count < 0); |
| 894 | WARN_ON(1); | ||
| 895 | if (ref->count && ref->root_id && ref->parent == 0) { | 903 | if (ref->count && ref->root_id && ref->parent == 0) { |
| 896 | /* no parent == root of tree */ | 904 | /* no parent == root of tree */ |
| 897 | ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); | 905 | ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index ed8ca7ca5eff..2a8c242bc4f5 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
| @@ -39,6 +39,7 @@ | |||
| 39 | #define BTRFS_INODE_HAS_ORPHAN_ITEM 5 | 39 | #define BTRFS_INODE_HAS_ORPHAN_ITEM 5 |
| 40 | #define BTRFS_INODE_HAS_ASYNC_EXTENT 6 | 40 | #define BTRFS_INODE_HAS_ASYNC_EXTENT 6 |
| 41 | #define BTRFS_INODE_NEEDS_FULL_SYNC 7 | 41 | #define BTRFS_INODE_NEEDS_FULL_SYNC 7 |
| 42 | #define BTRFS_INODE_COPY_EVERYTHING 8 | ||
| 42 | 43 | ||
| 43 | /* in memory btrfs inode */ | 44 | /* in memory btrfs inode */ |
| 44 | struct btrfs_inode { | 45 | struct btrfs_inode { |
| @@ -90,6 +91,9 @@ struct btrfs_inode { | |||
| 90 | 91 | ||
| 91 | unsigned long runtime_flags; | 92 | unsigned long runtime_flags; |
| 92 | 93 | ||
| 94 | /* Keep track of who's O_SYNC/fsycing currently */ | ||
| 95 | atomic_t sync_writers; | ||
| 96 | |||
| 93 | /* full 64 bit generation number, struct vfs_inode doesn't have a big | 97 | /* full 64 bit generation number, struct vfs_inode doesn't have a big |
| 94 | * enough field for this. | 98 | * enough field for this. |
| 95 | */ | 99 | */ |
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 5a3e45db642a..11d47bfb62b4 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c | |||
| @@ -137,7 +137,7 @@ struct btrfsic_block { | |||
| 137 | unsigned int never_written:1; /* block was added because it was | 137 | unsigned int never_written:1; /* block was added because it was |
| 138 | * referenced, not because it was | 138 | * referenced, not because it was |
| 139 | * written */ | 139 | * written */ |
| 140 | unsigned int mirror_num:2; /* large enough to hold | 140 | unsigned int mirror_num; /* large enough to hold |
| 141 | * BTRFS_SUPER_MIRROR_MAX */ | 141 | * BTRFS_SUPER_MIRROR_MAX */ |
| 142 | struct btrfsic_dev_state *dev_state; | 142 | struct btrfsic_dev_state *dev_state; |
| 143 | u64 dev_bytenr; /* key, physical byte num on disk */ | 143 | u64 dev_bytenr; /* key, physical byte num on disk */ |
| @@ -723,7 +723,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state, | |||
| 723 | } | 723 | } |
| 724 | 724 | ||
| 725 | num_copies = | 725 | num_copies = |
| 726 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | 726 | btrfs_num_copies(state->root->fs_info, |
| 727 | next_bytenr, state->metablock_size); | 727 | next_bytenr, state->metablock_size); |
| 728 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | 728 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) |
| 729 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | 729 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", |
| @@ -903,7 +903,7 @@ static int btrfsic_process_superblock_dev_mirror( | |||
| 903 | } | 903 | } |
| 904 | 904 | ||
| 905 | num_copies = | 905 | num_copies = |
| 906 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | 906 | btrfs_num_copies(state->root->fs_info, |
| 907 | next_bytenr, state->metablock_size); | 907 | next_bytenr, state->metablock_size); |
| 908 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | 908 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) |
| 909 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | 909 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", |
| @@ -1287,7 +1287,7 @@ static int btrfsic_create_link_to_next_block( | |||
| 1287 | *next_blockp = NULL; | 1287 | *next_blockp = NULL; |
| 1288 | if (0 == *num_copiesp) { | 1288 | if (0 == *num_copiesp) { |
| 1289 | *num_copiesp = | 1289 | *num_copiesp = |
| 1290 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | 1290 | btrfs_num_copies(state->root->fs_info, |
| 1291 | next_bytenr, state->metablock_size); | 1291 | next_bytenr, state->metablock_size); |
| 1292 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | 1292 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) |
| 1293 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | 1293 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", |
| @@ -1489,7 +1489,7 @@ static int btrfsic_handle_extent_data( | |||
| 1489 | chunk_len = num_bytes; | 1489 | chunk_len = num_bytes; |
| 1490 | 1490 | ||
| 1491 | num_copies = | 1491 | num_copies = |
| 1492 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | 1492 | btrfs_num_copies(state->root->fs_info, |
| 1493 | next_bytenr, state->datablock_size); | 1493 | next_bytenr, state->datablock_size); |
| 1494 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | 1494 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) |
| 1495 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | 1495 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", |
| @@ -1582,9 +1582,21 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, | |||
| 1582 | struct btrfs_device *device; | 1582 | struct btrfs_device *device; |
| 1583 | 1583 | ||
| 1584 | length = len; | 1584 | length = len; |
| 1585 | ret = btrfs_map_block(&state->root->fs_info->mapping_tree, READ, | 1585 | ret = btrfs_map_block(state->root->fs_info, READ, |
| 1586 | bytenr, &length, &multi, mirror_num); | 1586 | bytenr, &length, &multi, mirror_num); |
| 1587 | 1587 | ||
| 1588 | if (ret) { | ||
| 1589 | block_ctx_out->start = 0; | ||
| 1590 | block_ctx_out->dev_bytenr = 0; | ||
| 1591 | block_ctx_out->len = 0; | ||
| 1592 | block_ctx_out->dev = NULL; | ||
| 1593 | block_ctx_out->datav = NULL; | ||
| 1594 | block_ctx_out->pagev = NULL; | ||
| 1595 | block_ctx_out->mem_to_free = NULL; | ||
| 1596 | |||
| 1597 | return ret; | ||
| 1598 | } | ||
| 1599 | |||
| 1588 | device = multi->stripes[0].dev; | 1600 | device = multi->stripes[0].dev; |
| 1589 | block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev); | 1601 | block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev); |
| 1590 | block_ctx_out->dev_bytenr = multi->stripes[0].physical; | 1602 | block_ctx_out->dev_bytenr = multi->stripes[0].physical; |
| @@ -1594,8 +1606,7 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len, | |||
| 1594 | block_ctx_out->pagev = NULL; | 1606 | block_ctx_out->pagev = NULL; |
| 1595 | block_ctx_out->mem_to_free = NULL; | 1607 | block_ctx_out->mem_to_free = NULL; |
| 1596 | 1608 | ||
| 1597 | if (0 == ret) | 1609 | kfree(multi); |
| 1598 | kfree(multi); | ||
| 1599 | if (NULL == block_ctx_out->dev) { | 1610 | if (NULL == block_ctx_out->dev) { |
| 1600 | ret = -ENXIO; | 1611 | ret = -ENXIO; |
| 1601 | printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n"); | 1612 | printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n"); |
| @@ -2463,7 +2474,7 @@ static int btrfsic_process_written_superblock( | |||
| 2463 | } | 2474 | } |
| 2464 | 2475 | ||
| 2465 | num_copies = | 2476 | num_copies = |
| 2466 | btrfs_num_copies(&state->root->fs_info->mapping_tree, | 2477 | btrfs_num_copies(state->root->fs_info, |
| 2467 | next_bytenr, BTRFS_SUPER_INFO_SIZE); | 2478 | next_bytenr, BTRFS_SUPER_INFO_SIZE); |
| 2468 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) | 2479 | if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES) |
| 2469 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", | 2480 | printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n", |
| @@ -2960,7 +2971,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state, | |||
| 2960 | struct btrfsic_block_data_ctx block_ctx; | 2971 | struct btrfsic_block_data_ctx block_ctx; |
| 2961 | int match = 0; | 2972 | int match = 0; |
| 2962 | 2973 | ||
| 2963 | num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree, | 2974 | num_copies = btrfs_num_copies(state->root->fs_info, |
| 2964 | bytenr, state->metablock_size); | 2975 | bytenr, state->metablock_size); |
| 2965 | 2976 | ||
| 2966 | for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { | 2977 | for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) { |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index c6467aa88bee..94ab2f80e7e3 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
| @@ -687,7 +687,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
| 687 | 687 | ||
| 688 | ret = btrfs_map_bio(root, READ, comp_bio, | 688 | ret = btrfs_map_bio(root, READ, comp_bio, |
| 689 | mirror_num, 0); | 689 | mirror_num, 0); |
| 690 | BUG_ON(ret); /* -ENOMEM */ | 690 | if (ret) |
| 691 | bio_endio(comp_bio, ret); | ||
| 691 | 692 | ||
| 692 | bio_put(comp_bio); | 693 | bio_put(comp_bio); |
| 693 | 694 | ||
| @@ -712,7 +713,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | |||
| 712 | } | 713 | } |
| 713 | 714 | ||
| 714 | ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); | 715 | ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); |
| 715 | BUG_ON(ret); /* -ENOMEM */ | 716 | if (ret) |
| 717 | bio_endio(comp_bio, ret); | ||
| 716 | 718 | ||
| 717 | bio_put(comp_bio); | 719 | bio_put(comp_bio); |
| 718 | return 0; | 720 | return 0; |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index cdfb4c49a806..eea5da7a2b9a 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
| @@ -38,8 +38,7 @@ static int balance_node_right(struct btrfs_trans_handle *trans, | |||
| 38 | struct extent_buffer *dst_buf, | 38 | struct extent_buffer *dst_buf, |
| 39 | struct extent_buffer *src_buf); | 39 | struct extent_buffer *src_buf); |
| 40 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 40 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
| 41 | struct btrfs_path *path, int level, int slot, | 41 | struct btrfs_path *path, int level, int slot); |
| 42 | int tree_mod_log); | ||
| 43 | static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, | 42 | static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, |
| 44 | struct extent_buffer *eb); | 43 | struct extent_buffer *eb); |
| 45 | struct extent_buffer *read_old_tree_block(struct btrfs_root *root, u64 bytenr, | 44 | struct extent_buffer *read_old_tree_block(struct btrfs_root *root, u64 bytenr, |
| @@ -776,8 +775,7 @@ tree_mod_log_eb_move(struct btrfs_fs_info *fs_info, struct extent_buffer *dst, | |||
| 776 | 775 | ||
| 777 | static noinline void | 776 | static noinline void |
| 778 | tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, | 777 | tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info, |
| 779 | struct extent_buffer *eb, | 778 | struct extent_buffer *eb, int slot, int atomic) |
| 780 | struct btrfs_disk_key *disk_key, int slot, int atomic) | ||
| 781 | { | 779 | { |
| 782 | int ret; | 780 | int ret; |
| 783 | 781 | ||
| @@ -1361,19 +1359,16 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
| 1361 | u64 search_start; | 1359 | u64 search_start; |
| 1362 | int ret; | 1360 | int ret; |
| 1363 | 1361 | ||
| 1364 | if (trans->transaction != root->fs_info->running_transaction) { | 1362 | if (trans->transaction != root->fs_info->running_transaction) |
| 1365 | printk(KERN_CRIT "trans %llu running %llu\n", | 1363 | WARN(1, KERN_CRIT "trans %llu running %llu\n", |
| 1366 | (unsigned long long)trans->transid, | 1364 | (unsigned long long)trans->transid, |
| 1367 | (unsigned long long) | 1365 | (unsigned long long) |
| 1368 | root->fs_info->running_transaction->transid); | 1366 | root->fs_info->running_transaction->transid); |
| 1369 | WARN_ON(1); | 1367 | |
| 1370 | } | 1368 | if (trans->transid != root->fs_info->generation) |
| 1371 | if (trans->transid != root->fs_info->generation) { | 1369 | WARN(1, KERN_CRIT "trans %llu running %llu\n", |
| 1372 | printk(KERN_CRIT "trans %llu running %llu\n", | ||
| 1373 | (unsigned long long)trans->transid, | 1370 | (unsigned long long)trans->transid, |
| 1374 | (unsigned long long)root->fs_info->generation); | 1371 | (unsigned long long)root->fs_info->generation); |
| 1375 | WARN_ON(1); | ||
| 1376 | } | ||
| 1377 | 1372 | ||
| 1378 | if (!should_cow_block(trans, root, buf)) { | 1373 | if (!should_cow_block(trans, root, buf)) { |
| 1379 | *cow_ret = buf; | 1374 | *cow_ret = buf; |
| @@ -1469,10 +1464,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
| 1469 | if (cache_only && parent_level != 1) | 1464 | if (cache_only && parent_level != 1) |
| 1470 | return 0; | 1465 | return 0; |
| 1471 | 1466 | ||
| 1472 | if (trans->transaction != root->fs_info->running_transaction) | 1467 | WARN_ON(trans->transaction != root->fs_info->running_transaction); |
| 1473 | WARN_ON(1); | 1468 | WARN_ON(trans->transid != root->fs_info->generation); |
| 1474 | if (trans->transid != root->fs_info->generation) | ||
| 1475 | WARN_ON(1); | ||
| 1476 | 1469 | ||
| 1477 | parent_nritems = btrfs_header_nritems(parent); | 1470 | parent_nritems = btrfs_header_nritems(parent); |
| 1478 | blocksize = btrfs_level_size(root, parent_level - 1); | 1471 | blocksize = btrfs_level_size(root, parent_level - 1); |
| @@ -1827,7 +1820,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
| 1827 | if (btrfs_header_nritems(right) == 0) { | 1820 | if (btrfs_header_nritems(right) == 0) { |
| 1828 | clean_tree_block(trans, root, right); | 1821 | clean_tree_block(trans, root, right); |
| 1829 | btrfs_tree_unlock(right); | 1822 | btrfs_tree_unlock(right); |
| 1830 | del_ptr(trans, root, path, level + 1, pslot + 1, 1); | 1823 | del_ptr(trans, root, path, level + 1, pslot + 1); |
| 1831 | root_sub_used(root, right->len); | 1824 | root_sub_used(root, right->len); |
| 1832 | btrfs_free_tree_block(trans, root, right, 0, 1); | 1825 | btrfs_free_tree_block(trans, root, right, 0, 1); |
| 1833 | free_extent_buffer_stale(right); | 1826 | free_extent_buffer_stale(right); |
| @@ -1836,7 +1829,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
| 1836 | struct btrfs_disk_key right_key; | 1829 | struct btrfs_disk_key right_key; |
| 1837 | btrfs_node_key(right, &right_key, 0); | 1830 | btrfs_node_key(right, &right_key, 0); |
| 1838 | tree_mod_log_set_node_key(root->fs_info, parent, | 1831 | tree_mod_log_set_node_key(root->fs_info, parent, |
| 1839 | &right_key, pslot + 1, 0); | 1832 | pslot + 1, 0); |
| 1840 | btrfs_set_node_key(parent, &right_key, pslot + 1); | 1833 | btrfs_set_node_key(parent, &right_key, pslot + 1); |
| 1841 | btrfs_mark_buffer_dirty(parent); | 1834 | btrfs_mark_buffer_dirty(parent); |
| 1842 | } | 1835 | } |
| @@ -1871,7 +1864,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
| 1871 | if (btrfs_header_nritems(mid) == 0) { | 1864 | if (btrfs_header_nritems(mid) == 0) { |
| 1872 | clean_tree_block(trans, root, mid); | 1865 | clean_tree_block(trans, root, mid); |
| 1873 | btrfs_tree_unlock(mid); | 1866 | btrfs_tree_unlock(mid); |
| 1874 | del_ptr(trans, root, path, level + 1, pslot, 1); | 1867 | del_ptr(trans, root, path, level + 1, pslot); |
| 1875 | root_sub_used(root, mid->len); | 1868 | root_sub_used(root, mid->len); |
| 1876 | btrfs_free_tree_block(trans, root, mid, 0, 1); | 1869 | btrfs_free_tree_block(trans, root, mid, 0, 1); |
| 1877 | free_extent_buffer_stale(mid); | 1870 | free_extent_buffer_stale(mid); |
| @@ -1880,7 +1873,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
| 1880 | /* update the parent key to reflect our changes */ | 1873 | /* update the parent key to reflect our changes */ |
| 1881 | struct btrfs_disk_key mid_key; | 1874 | struct btrfs_disk_key mid_key; |
| 1882 | btrfs_node_key(mid, &mid_key, 0); | 1875 | btrfs_node_key(mid, &mid_key, 0); |
| 1883 | tree_mod_log_set_node_key(root->fs_info, parent, &mid_key, | 1876 | tree_mod_log_set_node_key(root->fs_info, parent, |
| 1884 | pslot, 0); | 1877 | pslot, 0); |
| 1885 | btrfs_set_node_key(parent, &mid_key, pslot); | 1878 | btrfs_set_node_key(parent, &mid_key, pslot); |
| 1886 | btrfs_mark_buffer_dirty(parent); | 1879 | btrfs_mark_buffer_dirty(parent); |
| @@ -1980,7 +1973,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
| 1980 | orig_slot += left_nr; | 1973 | orig_slot += left_nr; |
| 1981 | btrfs_node_key(mid, &disk_key, 0); | 1974 | btrfs_node_key(mid, &disk_key, 0); |
| 1982 | tree_mod_log_set_node_key(root->fs_info, parent, | 1975 | tree_mod_log_set_node_key(root->fs_info, parent, |
| 1983 | &disk_key, pslot, 0); | 1976 | pslot, 0); |
| 1984 | btrfs_set_node_key(parent, &disk_key, pslot); | 1977 | btrfs_set_node_key(parent, &disk_key, pslot); |
| 1985 | btrfs_mark_buffer_dirty(parent); | 1978 | btrfs_mark_buffer_dirty(parent); |
| 1986 | if (btrfs_header_nritems(left) > orig_slot) { | 1979 | if (btrfs_header_nritems(left) > orig_slot) { |
| @@ -2033,7 +2026,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
| 2033 | 2026 | ||
| 2034 | btrfs_node_key(right, &disk_key, 0); | 2027 | btrfs_node_key(right, &disk_key, 0); |
| 2035 | tree_mod_log_set_node_key(root->fs_info, parent, | 2028 | tree_mod_log_set_node_key(root->fs_info, parent, |
| 2036 | &disk_key, pslot + 1, 0); | 2029 | pslot + 1, 0); |
| 2037 | btrfs_set_node_key(parent, &disk_key, pslot + 1); | 2030 | btrfs_set_node_key(parent, &disk_key, pslot + 1); |
| 2038 | btrfs_mark_buffer_dirty(parent); | 2031 | btrfs_mark_buffer_dirty(parent); |
| 2039 | 2032 | ||
| @@ -2219,6 +2212,9 @@ static noinline void unlock_up(struct btrfs_path *path, int level, | |||
| 2219 | int no_skips = 0; | 2212 | int no_skips = 0; |
| 2220 | struct extent_buffer *t; | 2213 | struct extent_buffer *t; |
| 2221 | 2214 | ||
| 2215 | if (path->really_keep_locks) | ||
| 2216 | return; | ||
| 2217 | |||
| 2222 | for (i = level; i < BTRFS_MAX_LEVEL; i++) { | 2218 | for (i = level; i < BTRFS_MAX_LEVEL; i++) { |
| 2223 | if (!path->nodes[i]) | 2219 | if (!path->nodes[i]) |
| 2224 | break; | 2220 | break; |
| @@ -2266,7 +2262,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) | |||
| 2266 | { | 2262 | { |
| 2267 | int i; | 2263 | int i; |
| 2268 | 2264 | ||
| 2269 | if (path->keep_locks) | 2265 | if (path->keep_locks || path->really_keep_locks) |
| 2270 | return; | 2266 | return; |
| 2271 | 2267 | ||
| 2272 | for (i = level; i < BTRFS_MAX_LEVEL; i++) { | 2268 | for (i = level; i < BTRFS_MAX_LEVEL; i++) { |
| @@ -2499,7 +2495,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
| 2499 | if (!cow) | 2495 | if (!cow) |
| 2500 | write_lock_level = -1; | 2496 | write_lock_level = -1; |
| 2501 | 2497 | ||
| 2502 | if (cow && (p->keep_locks || p->lowest_level)) | 2498 | if (cow && (p->really_keep_locks || p->keep_locks || p->lowest_level)) |
| 2503 | write_lock_level = BTRFS_MAX_LEVEL; | 2499 | write_lock_level = BTRFS_MAX_LEVEL; |
| 2504 | 2500 | ||
| 2505 | min_write_lock_level = write_lock_level; | 2501 | min_write_lock_level = write_lock_level; |
| @@ -2568,7 +2564,10 @@ again: | |||
| 2568 | * must have write locks on this node and the | 2564 | * must have write locks on this node and the |
| 2569 | * parent | 2565 | * parent |
| 2570 | */ | 2566 | */ |
| 2571 | if (level + 1 > write_lock_level) { | 2567 | if (level > write_lock_level || |
| 2568 | (level + 1 > write_lock_level && | ||
| 2569 | level + 1 < BTRFS_MAX_LEVEL && | ||
| 2570 | p->nodes[level + 1])) { | ||
| 2572 | write_lock_level = level + 1; | 2571 | write_lock_level = level + 1; |
| 2573 | btrfs_release_path(p); | 2572 | btrfs_release_path(p); |
| 2574 | goto again; | 2573 | goto again; |
| @@ -2917,7 +2916,7 @@ static void fixup_low_keys(struct btrfs_trans_handle *trans, | |||
| 2917 | if (!path->nodes[i]) | 2916 | if (!path->nodes[i]) |
| 2918 | break; | 2917 | break; |
| 2919 | t = path->nodes[i]; | 2918 | t = path->nodes[i]; |
| 2920 | tree_mod_log_set_node_key(root->fs_info, t, key, tslot, 1); | 2919 | tree_mod_log_set_node_key(root->fs_info, t, tslot, 1); |
| 2921 | btrfs_set_node_key(t, key, tslot); | 2920 | btrfs_set_node_key(t, key, tslot); |
| 2922 | btrfs_mark_buffer_dirty(path->nodes[i]); | 2921 | btrfs_mark_buffer_dirty(path->nodes[i]); |
| 2923 | if (tslot != 0) | 2922 | if (tslot != 0) |
| @@ -3302,14 +3301,21 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
| 3302 | */ | 3301 | */ |
| 3303 | static int leaf_space_used(struct extent_buffer *l, int start, int nr) | 3302 | static int leaf_space_used(struct extent_buffer *l, int start, int nr) |
| 3304 | { | 3303 | { |
| 3304 | struct btrfs_item *start_item; | ||
| 3305 | struct btrfs_item *end_item; | ||
| 3306 | struct btrfs_map_token token; | ||
| 3305 | int data_len; | 3307 | int data_len; |
| 3306 | int nritems = btrfs_header_nritems(l); | 3308 | int nritems = btrfs_header_nritems(l); |
| 3307 | int end = min(nritems, start + nr) - 1; | 3309 | int end = min(nritems, start + nr) - 1; |
| 3308 | 3310 | ||
| 3309 | if (!nr) | 3311 | if (!nr) |
| 3310 | return 0; | 3312 | return 0; |
| 3311 | data_len = btrfs_item_end_nr(l, start); | 3313 | btrfs_init_map_token(&token); |
| 3312 | data_len = data_len - btrfs_item_offset_nr(l, end); | 3314 | start_item = btrfs_item_nr(l, start); |
| 3315 | end_item = btrfs_item_nr(l, end); | ||
| 3316 | data_len = btrfs_token_item_offset(l, start_item, &token) + | ||
| 3317 | btrfs_token_item_size(l, start_item, &token); | ||
| 3318 | data_len = data_len - btrfs_token_item_offset(l, end_item, &token); | ||
| 3313 | data_len += sizeof(struct btrfs_item) * nr; | 3319 | data_len += sizeof(struct btrfs_item) * nr; |
| 3314 | WARN_ON(data_len < 0); | 3320 | WARN_ON(data_len < 0); |
| 3315 | return data_len; | 3321 | return data_len; |
| @@ -3403,8 +3409,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | |||
| 3403 | if (push_items == 0) | 3409 | if (push_items == 0) |
| 3404 | goto out_unlock; | 3410 | goto out_unlock; |
| 3405 | 3411 | ||
| 3406 | if (!empty && push_items == left_nritems) | 3412 | WARN_ON(!empty && push_items == left_nritems); |
| 3407 | WARN_ON(1); | ||
| 3408 | 3413 | ||
| 3409 | /* push left to right */ | 3414 | /* push left to right */ |
| 3410 | right_nritems = btrfs_header_nritems(right); | 3415 | right_nritems = btrfs_header_nritems(right); |
| @@ -3642,11 +3647,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
| 3642 | btrfs_set_header_nritems(left, old_left_nritems + push_items); | 3647 | btrfs_set_header_nritems(left, old_left_nritems + push_items); |
| 3643 | 3648 | ||
| 3644 | /* fixup right node */ | 3649 | /* fixup right node */ |
| 3645 | if (push_items > right_nritems) { | 3650 | if (push_items > right_nritems) |
| 3646 | printk(KERN_CRIT "push items %d nr %u\n", push_items, | 3651 | WARN(1, KERN_CRIT "push items %d nr %u\n", push_items, |
| 3647 | right_nritems); | 3652 | right_nritems); |
| 3648 | WARN_ON(1); | ||
| 3649 | } | ||
| 3650 | 3653 | ||
| 3651 | if (push_items < right_nritems) { | 3654 | if (push_items < right_nritems) { |
| 3652 | push_space = btrfs_item_offset_nr(right, push_items - 1) - | 3655 | push_space = btrfs_item_offset_nr(right, push_items - 1) - |
| @@ -4602,8 +4605,7 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root | |||
| 4602 | * empty a node. | 4605 | * empty a node. |
| 4603 | */ | 4606 | */ |
| 4604 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 4607 | static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
| 4605 | struct btrfs_path *path, int level, int slot, | 4608 | struct btrfs_path *path, int level, int slot) |
| 4606 | int tree_mod_log) | ||
| 4607 | { | 4609 | { |
| 4608 | struct extent_buffer *parent = path->nodes[level]; | 4610 | struct extent_buffer *parent = path->nodes[level]; |
| 4609 | u32 nritems; | 4611 | u32 nritems; |
| @@ -4611,7 +4613,7 @@ static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
| 4611 | 4613 | ||
| 4612 | nritems = btrfs_header_nritems(parent); | 4614 | nritems = btrfs_header_nritems(parent); |
| 4613 | if (slot != nritems - 1) { | 4615 | if (slot != nritems - 1) { |
| 4614 | if (tree_mod_log && level) | 4616 | if (level) |
| 4615 | tree_mod_log_eb_move(root->fs_info, parent, slot, | 4617 | tree_mod_log_eb_move(root->fs_info, parent, slot, |
| 4616 | slot + 1, nritems - slot - 1); | 4618 | slot + 1, nritems - slot - 1); |
| 4617 | memmove_extent_buffer(parent, | 4619 | memmove_extent_buffer(parent, |
| @@ -4619,7 +4621,7 @@ static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
| 4619 | btrfs_node_key_ptr_offset(slot + 1), | 4621 | btrfs_node_key_ptr_offset(slot + 1), |
| 4620 | sizeof(struct btrfs_key_ptr) * | 4622 | sizeof(struct btrfs_key_ptr) * |
| 4621 | (nritems - slot - 1)); | 4623 | (nritems - slot - 1)); |
| 4622 | } else if (tree_mod_log && level) { | 4624 | } else if (level) { |
| 4623 | ret = tree_mod_log_insert_key(root->fs_info, parent, slot, | 4625 | ret = tree_mod_log_insert_key(root->fs_info, parent, slot, |
| 4624 | MOD_LOG_KEY_REMOVE); | 4626 | MOD_LOG_KEY_REMOVE); |
| 4625 | BUG_ON(ret < 0); | 4627 | BUG_ON(ret < 0); |
| @@ -4656,7 +4658,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans, | |||
| 4656 | struct extent_buffer *leaf) | 4658 | struct extent_buffer *leaf) |
| 4657 | { | 4659 | { |
| 4658 | WARN_ON(btrfs_header_generation(leaf) != trans->transid); | 4660 | WARN_ON(btrfs_header_generation(leaf) != trans->transid); |
| 4659 | del_ptr(trans, root, path, 1, path->slots[1], 1); | 4661 | del_ptr(trans, root, path, 1, path->slots[1]); |
| 4660 | 4662 | ||
| 4661 | /* | 4663 | /* |
| 4662 | * btrfs_free_extent is expensive, we want to make sure we | 4664 | * btrfs_free_extent is expensive, we want to make sure we |
| @@ -5123,13 +5125,13 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
| 5123 | right_path->search_commit_root = 1; | 5125 | right_path->search_commit_root = 1; |
| 5124 | right_path->skip_locking = 1; | 5126 | right_path->skip_locking = 1; |
| 5125 | 5127 | ||
| 5126 | spin_lock(&left_root->root_times_lock); | 5128 | spin_lock(&left_root->root_item_lock); |
| 5127 | left_start_ctransid = btrfs_root_ctransid(&left_root->root_item); | 5129 | left_start_ctransid = btrfs_root_ctransid(&left_root->root_item); |
| 5128 | spin_unlock(&left_root->root_times_lock); | 5130 | spin_unlock(&left_root->root_item_lock); |
| 5129 | 5131 | ||
| 5130 | spin_lock(&right_root->root_times_lock); | 5132 | spin_lock(&right_root->root_item_lock); |
| 5131 | right_start_ctransid = btrfs_root_ctransid(&right_root->root_item); | 5133 | right_start_ctransid = btrfs_root_ctransid(&right_root->root_item); |
| 5132 | spin_unlock(&right_root->root_times_lock); | 5134 | spin_unlock(&right_root->root_item_lock); |
| 5133 | 5135 | ||
| 5134 | trans = btrfs_join_transaction(left_root); | 5136 | trans = btrfs_join_transaction(left_root); |
| 5135 | if (IS_ERR(trans)) { | 5137 | if (IS_ERR(trans)) { |
| @@ -5224,15 +5226,15 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
| 5224 | goto out; | 5226 | goto out; |
| 5225 | } | 5227 | } |
| 5226 | 5228 | ||
| 5227 | spin_lock(&left_root->root_times_lock); | 5229 | spin_lock(&left_root->root_item_lock); |
| 5228 | ctransid = btrfs_root_ctransid(&left_root->root_item); | 5230 | ctransid = btrfs_root_ctransid(&left_root->root_item); |
| 5229 | spin_unlock(&left_root->root_times_lock); | 5231 | spin_unlock(&left_root->root_item_lock); |
| 5230 | if (ctransid != left_start_ctransid) | 5232 | if (ctransid != left_start_ctransid) |
| 5231 | left_start_ctransid = 0; | 5233 | left_start_ctransid = 0; |
| 5232 | 5234 | ||
| 5233 | spin_lock(&right_root->root_times_lock); | 5235 | spin_lock(&right_root->root_item_lock); |
| 5234 | ctransid = btrfs_root_ctransid(&right_root->root_item); | 5236 | ctransid = btrfs_root_ctransid(&right_root->root_item); |
| 5235 | spin_unlock(&right_root->root_times_lock); | 5237 | spin_unlock(&right_root->root_item_lock); |
| 5236 | if (ctransid != right_start_ctransid) | 5238 | if (ctransid != right_start_ctransid) |
| 5237 | right_start_ctransid = 0; | 5239 | right_start_ctransid = 0; |
| 5238 | 5240 | ||
| @@ -5496,6 +5498,139 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
| 5496 | return btrfs_next_old_leaf(root, path, 0); | 5498 | return btrfs_next_old_leaf(root, path, 0); |
| 5497 | } | 5499 | } |
| 5498 | 5500 | ||
| 5501 | /* Release the path up to but not including the given level */ | ||
| 5502 | static void btrfs_release_level(struct btrfs_path *path, int level) | ||
| 5503 | { | ||
| 5504 | int i; | ||
| 5505 | |||
| 5506 | for (i = 0; i < level; i++) { | ||
| 5507 | path->slots[i] = 0; | ||
| 5508 | if (!path->nodes[i]) | ||
| 5509 | continue; | ||
| 5510 | if (path->locks[i]) { | ||
| 5511 | btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]); | ||
| 5512 | path->locks[i] = 0; | ||
| 5513 | } | ||
| 5514 | free_extent_buffer(path->nodes[i]); | ||
| 5515 | path->nodes[i] = NULL; | ||
| 5516 | } | ||
| 5517 | } | ||
| 5518 | |||
| 5519 | /* | ||
| 5520 | * This function assumes 2 things | ||
| 5521 | * | ||
| 5522 | * 1) You are using path->keep_locks | ||
| 5523 | * 2) You are not inserting items. | ||
| 5524 | * | ||
| 5525 | * If either of these are not true do not use this function. If you need a next | ||
| 5526 | * leaf with either of these not being true then this function can be easily | ||
| 5527 | * adapted to do that, but at the moment these are the limitations. | ||
| 5528 | */ | ||
| 5529 | int btrfs_next_leaf_write(struct btrfs_trans_handle *trans, | ||
| 5530 | struct btrfs_root *root, struct btrfs_path *path, | ||
| 5531 | int del) | ||
| 5532 | { | ||
| 5533 | struct extent_buffer *b; | ||
| 5534 | struct btrfs_key key; | ||
| 5535 | u32 nritems; | ||
| 5536 | int level = 1; | ||
| 5537 | int slot; | ||
| 5538 | int ret = 1; | ||
| 5539 | int write_lock_level = BTRFS_MAX_LEVEL; | ||
| 5540 | int ins_len = del ? -1 : 0; | ||
| 5541 | |||
| 5542 | WARN_ON(!(path->keep_locks || path->really_keep_locks)); | ||
| 5543 | |||
| 5544 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
| 5545 | btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); | ||
| 5546 | |||
| 5547 | while (path->nodes[level]) { | ||
| 5548 | nritems = btrfs_header_nritems(path->nodes[level]); | ||
| 5549 | if (!(path->locks[level] & BTRFS_WRITE_LOCK)) { | ||
| 5550 | search: | ||
| 5551 | btrfs_release_path(path); | ||
| 5552 | ret = btrfs_search_slot(trans, root, &key, path, | ||
| 5553 | ins_len, 1); | ||
| 5554 | if (ret < 0) | ||
| 5555 | goto out; | ||
| 5556 | level = 1; | ||
| 5557 | continue; | ||
| 5558 | } | ||
| 5559 | |||
| 5560 | if (path->slots[level] >= nritems - 1) { | ||
| 5561 | level++; | ||
| 5562 | continue; | ||
| 5563 | } | ||
| 5564 | |||
| 5565 | btrfs_release_level(path, level); | ||
| 5566 | break; | ||
| 5567 | } | ||
| 5568 | |||
| 5569 | if (!path->nodes[level]) { | ||
| 5570 | ret = 1; | ||
| 5571 | goto out; | ||
| 5572 | } | ||
| 5573 | |||
| 5574 | path->slots[level]++; | ||
| 5575 | b = path->nodes[level]; | ||
| 5576 | |||
| 5577 | while (b) { | ||
| 5578 | level = btrfs_header_level(b); | ||
| 5579 | |||
| 5580 | if (!should_cow_block(trans, root, b)) | ||
| 5581 | goto cow_done; | ||
| 5582 | |||
| 5583 | btrfs_set_path_blocking(path); | ||
| 5584 | ret = btrfs_cow_block(trans, root, b, | ||
| 5585 | path->nodes[level + 1], | ||
| 5586 | path->slots[level + 1], &b); | ||
| 5587 | if (ret) | ||
| 5588 | goto out; | ||
| 5589 | cow_done: | ||
| 5590 | path->nodes[level] = b; | ||
| 5591 | btrfs_clear_path_blocking(path, NULL, 0); | ||
| 5592 | if (level != 0) { | ||
| 5593 | ret = setup_nodes_for_search(trans, root, path, b, | ||
| 5594 | level, ins_len, | ||
| 5595 | &write_lock_level); | ||
| 5596 | if (ret == -EAGAIN) | ||
| 5597 | goto search; | ||
| 5598 | if (ret) | ||
| 5599 | goto out; | ||
| 5600 | |||
| 5601 | b = path->nodes[level]; | ||
| 5602 | slot = path->slots[level]; | ||
| 5603 | |||
| 5604 | ret = read_block_for_search(trans, root, path, | ||
| 5605 | &b, level, slot, &key, 0); | ||
| 5606 | if (ret == -EAGAIN) | ||
| 5607 | goto search; | ||
| 5608 | if (ret) | ||
| 5609 | goto out; | ||
| 5610 | level = btrfs_header_level(b); | ||
| 5611 | if (!btrfs_try_tree_write_lock(b)) { | ||
| 5612 | btrfs_set_path_blocking(path); | ||
| 5613 | btrfs_tree_lock(b); | ||
| 5614 | btrfs_clear_path_blocking(path, b, | ||
| 5615 | BTRFS_WRITE_LOCK); | ||
| 5616 | } | ||
| 5617 | path->locks[level] = BTRFS_WRITE_LOCK; | ||
| 5618 | path->nodes[level] = b; | ||
| 5619 | path->slots[level] = 0; | ||
| 5620 | } else { | ||
| 5621 | path->slots[level] = 0; | ||
| 5622 | ret = 0; | ||
| 5623 | break; | ||
| 5624 | } | ||
| 5625 | } | ||
| 5626 | |||
| 5627 | out: | ||
| 5628 | if (ret) | ||
| 5629 | btrfs_release_path(path); | ||
| 5630 | |||
| 5631 | return ret; | ||
| 5632 | } | ||
| 5633 | |||
| 5499 | int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, | 5634 | int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, |
| 5500 | u64 time_seq) | 5635 | u64 time_seq) |
| 5501 | { | 5636 | { |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c72ead869507..547b7b05727f 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -48,7 +48,7 @@ struct btrfs_ordered_sum; | |||
| 48 | 48 | ||
| 49 | #define BTRFS_MAGIC "_BHRfS_M" | 49 | #define BTRFS_MAGIC "_BHRfS_M" |
| 50 | 50 | ||
| 51 | #define BTRFS_MAX_MIRRORS 2 | 51 | #define BTRFS_MAX_MIRRORS 3 |
| 52 | 52 | ||
| 53 | #define BTRFS_MAX_LEVEL 8 | 53 | #define BTRFS_MAX_LEVEL 8 |
| 54 | 54 | ||
| @@ -142,6 +142,8 @@ struct btrfs_ordered_sum; | |||
| 142 | 142 | ||
| 143 | #define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2 | 143 | #define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2 |
| 144 | 144 | ||
| 145 | #define BTRFS_DEV_REPLACE_DEVID 0 | ||
| 146 | |||
| 145 | /* | 147 | /* |
| 146 | * the max metadata block size. This limit is somewhat artificial, | 148 | * the max metadata block size. This limit is somewhat artificial, |
| 147 | * but the memmove costs go through the roof for larger blocks. | 149 | * but the memmove costs go through the roof for larger blocks. |
| @@ -172,6 +174,9 @@ static int btrfs_csum_sizes[] = { 4, 0 }; | |||
| 172 | /* four bytes for CRC32 */ | 174 | /* four bytes for CRC32 */ |
| 173 | #define BTRFS_EMPTY_DIR_SIZE 0 | 175 | #define BTRFS_EMPTY_DIR_SIZE 0 |
| 174 | 176 | ||
| 177 | /* spefic to btrfs_map_block(), therefore not in include/linux/blk_types.h */ | ||
| 178 | #define REQ_GET_READ_MIRRORS (1 << 30) | ||
| 179 | |||
| 175 | #define BTRFS_FT_UNKNOWN 0 | 180 | #define BTRFS_FT_UNKNOWN 0 |
| 176 | #define BTRFS_FT_REG_FILE 1 | 181 | #define BTRFS_FT_REG_FILE 1 |
| 177 | #define BTRFS_FT_DIR 2 | 182 | #define BTRFS_FT_DIR 2 |
| @@ -413,7 +418,7 @@ struct btrfs_root_backup { | |||
| 413 | __le64 bytes_used; | 418 | __le64 bytes_used; |
| 414 | __le64 num_devices; | 419 | __le64 num_devices; |
| 415 | /* future */ | 420 | /* future */ |
| 416 | __le64 unsed_64[4]; | 421 | __le64 unused_64[4]; |
| 417 | 422 | ||
| 418 | u8 tree_root_level; | 423 | u8 tree_root_level; |
| 419 | u8 chunk_root_level; | 424 | u8 chunk_root_level; |
| @@ -571,6 +576,7 @@ struct btrfs_path { | |||
| 571 | unsigned int skip_locking:1; | 576 | unsigned int skip_locking:1; |
| 572 | unsigned int leave_spinning:1; | 577 | unsigned int leave_spinning:1; |
| 573 | unsigned int search_commit_root:1; | 578 | unsigned int search_commit_root:1; |
| 579 | unsigned int really_keep_locks:1; | ||
| 574 | }; | 580 | }; |
| 575 | 581 | ||
| 576 | /* | 582 | /* |
| @@ -885,6 +891,59 @@ struct btrfs_dev_stats_item { | |||
| 885 | __le64 values[BTRFS_DEV_STAT_VALUES_MAX]; | 891 | __le64 values[BTRFS_DEV_STAT_VALUES_MAX]; |
| 886 | } __attribute__ ((__packed__)); | 892 | } __attribute__ ((__packed__)); |
| 887 | 893 | ||
| 894 | #define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0 | ||
| 895 | #define BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID 1 | ||
| 896 | #define BTRFS_DEV_REPLACE_ITEM_STATE_NEVER_STARTED 0 | ||
| 897 | #define BTRFS_DEV_REPLACE_ITEM_STATE_STARTED 1 | ||
| 898 | #define BTRFS_DEV_REPLACE_ITEM_STATE_SUSPENDED 2 | ||
| 899 | #define BTRFS_DEV_REPLACE_ITEM_STATE_FINISHED 3 | ||
| 900 | #define BTRFS_DEV_REPLACE_ITEM_STATE_CANCELED 4 | ||
| 901 | |||
| 902 | struct btrfs_dev_replace { | ||
| 903 | u64 replace_state; /* see #define above */ | ||
| 904 | u64 time_started; /* seconds since 1-Jan-1970 */ | ||
| 905 | u64 time_stopped; /* seconds since 1-Jan-1970 */ | ||
| 906 | atomic64_t num_write_errors; | ||
| 907 | atomic64_t num_uncorrectable_read_errors; | ||
| 908 | |||
| 909 | u64 cursor_left; | ||
| 910 | u64 committed_cursor_left; | ||
| 911 | u64 cursor_left_last_write_of_item; | ||
| 912 | u64 cursor_right; | ||
| 913 | |||
| 914 | u64 cont_reading_from_srcdev_mode; /* see #define above */ | ||
| 915 | |||
| 916 | int is_valid; | ||
| 917 | int item_needs_writeback; | ||
| 918 | struct btrfs_device *srcdev; | ||
| 919 | struct btrfs_device *tgtdev; | ||
| 920 | |||
| 921 | pid_t lock_owner; | ||
| 922 | atomic_t nesting_level; | ||
| 923 | struct mutex lock_finishing_cancel_unmount; | ||
| 924 | struct mutex lock_management_lock; | ||
| 925 | struct mutex lock; | ||
| 926 | |||
| 927 | struct btrfs_scrub_progress scrub_progress; | ||
| 928 | }; | ||
| 929 | |||
| 930 | struct btrfs_dev_replace_item { | ||
| 931 | /* | ||
| 932 | * grow this item struct at the end for future enhancements and keep | ||
| 933 | * the existing values unchanged | ||
| 934 | */ | ||
| 935 | __le64 src_devid; | ||
| 936 | __le64 cursor_left; | ||
| 937 | __le64 cursor_right; | ||
| 938 | __le64 cont_reading_from_srcdev_mode; | ||
| 939 | |||
| 940 | __le64 replace_state; | ||
| 941 | __le64 time_started; | ||
| 942 | __le64 time_stopped; | ||
| 943 | __le64 num_write_errors; | ||
| 944 | __le64 num_uncorrectable_read_errors; | ||
| 945 | } __attribute__ ((__packed__)); | ||
| 946 | |||
| 888 | /* different types of block groups (and chunks) */ | 947 | /* different types of block groups (and chunks) */ |
| 889 | #define BTRFS_BLOCK_GROUP_DATA (1ULL << 0) | 948 | #define BTRFS_BLOCK_GROUP_DATA (1ULL << 0) |
| 890 | #define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1) | 949 | #define BTRFS_BLOCK_GROUP_SYSTEM (1ULL << 1) |
| @@ -1333,6 +1392,7 @@ struct btrfs_fs_info { | |||
| 1333 | struct btrfs_workers generic_worker; | 1392 | struct btrfs_workers generic_worker; |
| 1334 | struct btrfs_workers workers; | 1393 | struct btrfs_workers workers; |
| 1335 | struct btrfs_workers delalloc_workers; | 1394 | struct btrfs_workers delalloc_workers; |
| 1395 | struct btrfs_workers flush_workers; | ||
| 1336 | struct btrfs_workers endio_workers; | 1396 | struct btrfs_workers endio_workers; |
| 1337 | struct btrfs_workers endio_meta_workers; | 1397 | struct btrfs_workers endio_meta_workers; |
| 1338 | struct btrfs_workers endio_meta_write_workers; | 1398 | struct btrfs_workers endio_meta_write_workers; |
| @@ -1429,6 +1489,8 @@ struct btrfs_fs_info { | |||
| 1429 | struct rw_semaphore scrub_super_lock; | 1489 | struct rw_semaphore scrub_super_lock; |
| 1430 | int scrub_workers_refcnt; | 1490 | int scrub_workers_refcnt; |
| 1431 | struct btrfs_workers scrub_workers; | 1491 | struct btrfs_workers scrub_workers; |
| 1492 | struct btrfs_workers scrub_wr_completion_workers; | ||
| 1493 | struct btrfs_workers scrub_nocow_workers; | ||
| 1432 | 1494 | ||
| 1433 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | 1495 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY |
| 1434 | u32 check_integrity_print_mask; | 1496 | u32 check_integrity_print_mask; |
| @@ -1470,6 +1532,11 @@ struct btrfs_fs_info { | |||
| 1470 | int backup_root_index; | 1532 | int backup_root_index; |
| 1471 | 1533 | ||
| 1472 | int num_tolerated_disk_barrier_failures; | 1534 | int num_tolerated_disk_barrier_failures; |
| 1535 | |||
| 1536 | /* device replace state */ | ||
| 1537 | struct btrfs_dev_replace dev_replace; | ||
| 1538 | |||
| 1539 | atomic_t mutually_exclusive_operation_running; | ||
| 1473 | }; | 1540 | }; |
| 1474 | 1541 | ||
| 1475 | /* | 1542 | /* |
| @@ -1579,7 +1646,7 @@ struct btrfs_root { | |||
| 1579 | 1646 | ||
| 1580 | int force_cow; | 1647 | int force_cow; |
| 1581 | 1648 | ||
| 1582 | spinlock_t root_times_lock; | 1649 | spinlock_t root_item_lock; |
| 1583 | }; | 1650 | }; |
| 1584 | 1651 | ||
| 1585 | struct btrfs_ioctl_defrag_range_args { | 1652 | struct btrfs_ioctl_defrag_range_args { |
| @@ -1723,6 +1790,12 @@ struct btrfs_ioctl_defrag_range_args { | |||
| 1723 | #define BTRFS_DEV_STATS_KEY 249 | 1790 | #define BTRFS_DEV_STATS_KEY 249 |
| 1724 | 1791 | ||
| 1725 | /* | 1792 | /* |
| 1793 | * Persistantly stores the device replace state in the device tree. | ||
| 1794 | * The key is built like this: (0, BTRFS_DEV_REPLACE_KEY, 0). | ||
| 1795 | */ | ||
| 1796 | #define BTRFS_DEV_REPLACE_KEY 250 | ||
| 1797 | |||
| 1798 | /* | ||
| 1726 | * string items are for debugging. They just store a short string of | 1799 | * string items are for debugging. They just store a short string of |
| 1727 | * data in the FS | 1800 | * data in the FS |
| 1728 | */ | 1801 | */ |
| @@ -1787,7 +1860,7 @@ struct btrfs_map_token { | |||
| 1787 | 1860 | ||
| 1788 | static inline void btrfs_init_map_token (struct btrfs_map_token *token) | 1861 | static inline void btrfs_init_map_token (struct btrfs_map_token *token) |
| 1789 | { | 1862 | { |
| 1790 | memset(token, 0, sizeof(*token)); | 1863 | token->kaddr = NULL; |
| 1791 | } | 1864 | } |
| 1792 | 1865 | ||
| 1793 | /* some macros to generate set/get funcs for the struct fields. This | 1866 | /* some macros to generate set/get funcs for the struct fields. This |
| @@ -2755,6 +2828,49 @@ BTRFS_SETGET_FUNCS(qgroup_limit_rsv_rfer, struct btrfs_qgroup_limit_item, | |||
| 2755 | BTRFS_SETGET_FUNCS(qgroup_limit_rsv_excl, struct btrfs_qgroup_limit_item, | 2828 | BTRFS_SETGET_FUNCS(qgroup_limit_rsv_excl, struct btrfs_qgroup_limit_item, |
| 2756 | rsv_excl, 64); | 2829 | rsv_excl, 64); |
| 2757 | 2830 | ||
| 2831 | /* btrfs_dev_replace_item */ | ||
| 2832 | BTRFS_SETGET_FUNCS(dev_replace_src_devid, | ||
| 2833 | struct btrfs_dev_replace_item, src_devid, 64); | ||
| 2834 | BTRFS_SETGET_FUNCS(dev_replace_cont_reading_from_srcdev_mode, | ||
| 2835 | struct btrfs_dev_replace_item, cont_reading_from_srcdev_mode, | ||
| 2836 | 64); | ||
| 2837 | BTRFS_SETGET_FUNCS(dev_replace_replace_state, struct btrfs_dev_replace_item, | ||
| 2838 | replace_state, 64); | ||
| 2839 | BTRFS_SETGET_FUNCS(dev_replace_time_started, struct btrfs_dev_replace_item, | ||
| 2840 | time_started, 64); | ||
| 2841 | BTRFS_SETGET_FUNCS(dev_replace_time_stopped, struct btrfs_dev_replace_item, | ||
| 2842 | time_stopped, 64); | ||
| 2843 | BTRFS_SETGET_FUNCS(dev_replace_num_write_errors, struct btrfs_dev_replace_item, | ||
| 2844 | num_write_errors, 64); | ||
| 2845 | BTRFS_SETGET_FUNCS(dev_replace_num_uncorrectable_read_errors, | ||
| 2846 | struct btrfs_dev_replace_item, num_uncorrectable_read_errors, | ||
| 2847 | 64); | ||
| 2848 | BTRFS_SETGET_FUNCS(dev_replace_cursor_left, struct btrfs_dev_replace_item, | ||
| 2849 | cursor_left, 64); | ||
| 2850 | BTRFS_SETGET_FUNCS(dev_replace_cursor_right, struct btrfs_dev_replace_item, | ||
| 2851 | cursor_right, 64); | ||
| 2852 | |||
| 2853 | BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_src_devid, | ||
| 2854 | struct btrfs_dev_replace_item, src_devid, 64); | ||
| 2855 | BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cont_reading_from_srcdev_mode, | ||
| 2856 | struct btrfs_dev_replace_item, | ||
| 2857 | cont_reading_from_srcdev_mode, 64); | ||
| 2858 | BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_replace_state, | ||
| 2859 | struct btrfs_dev_replace_item, replace_state, 64); | ||
| 2860 | BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_time_started, | ||
| 2861 | struct btrfs_dev_replace_item, time_started, 64); | ||
| 2862 | BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_time_stopped, | ||
| 2863 | struct btrfs_dev_replace_item, time_stopped, 64); | ||
| 2864 | BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_num_write_errors, | ||
| 2865 | struct btrfs_dev_replace_item, num_write_errors, 64); | ||
| 2866 | BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_num_uncorrectable_read_errors, | ||
| 2867 | struct btrfs_dev_replace_item, | ||
| 2868 | num_uncorrectable_read_errors, 64); | ||
| 2869 | BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cursor_left, | ||
| 2870 | struct btrfs_dev_replace_item, cursor_left, 64); | ||
| 2871 | BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cursor_right, | ||
| 2872 | struct btrfs_dev_replace_item, cursor_right, 64); | ||
| 2873 | |||
| 2758 | static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) | 2874 | static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) |
| 2759 | { | 2875 | { |
| 2760 | return sb->s_fs_info; | 2876 | return sb->s_fs_info; |
| @@ -2900,6 +3016,18 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans, | |||
| 2900 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); | 3016 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); |
| 2901 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); | 3017 | u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data); |
| 2902 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 3018 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
| 3019 | |||
| 3020 | enum btrfs_reserve_flush_enum { | ||
| 3021 | /* If we are in the transaction, we can't flush anything.*/ | ||
| 3022 | BTRFS_RESERVE_NO_FLUSH, | ||
| 3023 | /* | ||
| 3024 | * Flushing delalloc may cause deadlock somewhere, in this | ||
| 3025 | * case, use FLUSH LIMIT | ||
| 3026 | */ | ||
| 3027 | BTRFS_RESERVE_FLUSH_LIMIT, | ||
| 3028 | BTRFS_RESERVE_FLUSH_ALL, | ||
| 3029 | }; | ||
| 3030 | |||
| 2903 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); | 3031 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); |
| 2904 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); | 3032 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); |
| 2905 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | 3033 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, |
| @@ -2919,19 +3047,13 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root, | |||
| 2919 | void btrfs_free_block_rsv(struct btrfs_root *root, | 3047 | void btrfs_free_block_rsv(struct btrfs_root *root, |
| 2920 | struct btrfs_block_rsv *rsv); | 3048 | struct btrfs_block_rsv *rsv); |
| 2921 | int btrfs_block_rsv_add(struct btrfs_root *root, | 3049 | int btrfs_block_rsv_add(struct btrfs_root *root, |
| 2922 | struct btrfs_block_rsv *block_rsv, | 3050 | struct btrfs_block_rsv *block_rsv, u64 num_bytes, |
| 2923 | u64 num_bytes); | 3051 | enum btrfs_reserve_flush_enum flush); |
| 2924 | int btrfs_block_rsv_add_noflush(struct btrfs_root *root, | ||
| 2925 | struct btrfs_block_rsv *block_rsv, | ||
| 2926 | u64 num_bytes); | ||
| 2927 | int btrfs_block_rsv_check(struct btrfs_root *root, | 3052 | int btrfs_block_rsv_check(struct btrfs_root *root, |
| 2928 | struct btrfs_block_rsv *block_rsv, int min_factor); | 3053 | struct btrfs_block_rsv *block_rsv, int min_factor); |
| 2929 | int btrfs_block_rsv_refill(struct btrfs_root *root, | 3054 | int btrfs_block_rsv_refill(struct btrfs_root *root, |
| 2930 | struct btrfs_block_rsv *block_rsv, | 3055 | struct btrfs_block_rsv *block_rsv, u64 min_reserved, |
| 2931 | u64 min_reserved); | 3056 | enum btrfs_reserve_flush_enum flush); |
| 2932 | int btrfs_block_rsv_refill_noflush(struct btrfs_root *root, | ||
| 2933 | struct btrfs_block_rsv *block_rsv, | ||
| 2934 | u64 min_reserved); | ||
| 2935 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | 3057 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, |
| 2936 | struct btrfs_block_rsv *dst_rsv, | 3058 | struct btrfs_block_rsv *dst_rsv, |
| 2937 | u64 num_bytes); | 3059 | u64 num_bytes); |
| @@ -2955,6 +3077,7 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range); | |||
| 2955 | int btrfs_init_space_info(struct btrfs_fs_info *fs_info); | 3077 | int btrfs_init_space_info(struct btrfs_fs_info *fs_info); |
| 2956 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, | 3078 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, |
| 2957 | struct btrfs_fs_info *fs_info); | 3079 | struct btrfs_fs_info *fs_info); |
| 3080 | int __get_raid_index(u64 flags); | ||
| 2958 | /* ctree.c */ | 3081 | /* ctree.c */ |
| 2959 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 3082 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
| 2960 | int level, int *slot); | 3083 | int level, int *slot); |
| @@ -3065,6 +3188,9 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, | |||
| 3065 | } | 3188 | } |
| 3066 | 3189 | ||
| 3067 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); | 3190 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); |
| 3191 | int btrfs_next_leaf_write(struct btrfs_trans_handle *trans, | ||
| 3192 | struct btrfs_root *root, struct btrfs_path *path, | ||
| 3193 | int del); | ||
| 3068 | int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, | 3194 | int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path, |
| 3069 | u64 time_seq); | 3195 | u64 time_seq); |
| 3070 | static inline int btrfs_next_old_item(struct btrfs_root *root, | 3196 | static inline int btrfs_next_old_item(struct btrfs_root *root, |
| @@ -3157,6 +3283,8 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans, | |||
| 3157 | struct btrfs_root *root); | 3283 | struct btrfs_root *root); |
| 3158 | 3284 | ||
| 3159 | /* dir-item.c */ | 3285 | /* dir-item.c */ |
| 3286 | int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, | ||
| 3287 | const char *name, int name_len); | ||
| 3160 | int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, | 3288 | int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, |
| 3161 | struct btrfs_root *root, const char *name, | 3289 | struct btrfs_root *root, const char *name, |
| 3162 | int name_len, struct inode *dir, | 3290 | int name_len, struct inode *dir, |
| @@ -3256,6 +3384,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, | |||
| 3256 | struct btrfs_root *root, | 3384 | struct btrfs_root *root, |
| 3257 | struct btrfs_path *path, u64 objectid, | 3385 | struct btrfs_path *path, u64 objectid, |
| 3258 | u64 bytenr, int mod); | 3386 | u64 bytenr, int mod); |
| 3387 | u64 btrfs_file_extent_length(struct btrfs_path *path); | ||
| 3259 | int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, | 3388 | int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, |
| 3260 | struct btrfs_root *root, | 3389 | struct btrfs_root *root, |
| 3261 | struct btrfs_ordered_sum *sums); | 3390 | struct btrfs_ordered_sum *sums); |
| @@ -3271,6 +3400,19 @@ int btrfs_csum_truncate(struct btrfs_trans_handle *trans, | |||
| 3271 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | 3400 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, |
| 3272 | struct list_head *list, int search_commit); | 3401 | struct list_head *list, int search_commit); |
| 3273 | /* inode.c */ | 3402 | /* inode.c */ |
| 3403 | struct btrfs_delalloc_work { | ||
| 3404 | struct inode *inode; | ||
| 3405 | int wait; | ||
| 3406 | int delay_iput; | ||
| 3407 | struct completion completion; | ||
| 3408 | struct list_head list; | ||
| 3409 | struct btrfs_work work; | ||
| 3410 | }; | ||
| 3411 | |||
| 3412 | struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode, | ||
| 3413 | int wait, int delay_iput); | ||
| 3414 | void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work); | ||
| 3415 | |||
| 3274 | struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page, | 3416 | struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page, |
| 3275 | size_t pg_offset, u64 start, u64 len, | 3417 | size_t pg_offset, u64 start, u64 len, |
| 3276 | int create); | 3418 | int create); |
| @@ -3370,9 +3512,12 @@ void btrfs_get_block_group_info(struct list_head *groups_list, | |||
| 3370 | struct btrfs_ioctl_space_info *space); | 3512 | struct btrfs_ioctl_space_info *space); |
| 3371 | 3513 | ||
| 3372 | /* file.c */ | 3514 | /* file.c */ |
| 3515 | int btrfs_auto_defrag_init(void); | ||
| 3516 | void btrfs_auto_defrag_exit(void); | ||
| 3373 | int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | 3517 | int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, |
| 3374 | struct inode *inode); | 3518 | struct inode *inode); |
| 3375 | int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); | 3519 | int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); |
| 3520 | void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info); | ||
| 3376 | int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); | 3521 | int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); |
| 3377 | void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | 3522 | void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, |
| 3378 | int skip_pinned); | 3523 | int skip_pinned); |
| @@ -3519,15 +3664,16 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, | |||
| 3519 | struct btrfs_pending_snapshot *pending); | 3664 | struct btrfs_pending_snapshot *pending); |
| 3520 | 3665 | ||
| 3521 | /* scrub.c */ | 3666 | /* scrub.c */ |
| 3522 | int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, | 3667 | int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, |
| 3523 | struct btrfs_scrub_progress *progress, int readonly); | 3668 | u64 end, struct btrfs_scrub_progress *progress, |
| 3669 | int readonly, int is_dev_replace); | ||
| 3524 | void btrfs_scrub_pause(struct btrfs_root *root); | 3670 | void btrfs_scrub_pause(struct btrfs_root *root); |
| 3525 | void btrfs_scrub_pause_super(struct btrfs_root *root); | 3671 | void btrfs_scrub_pause_super(struct btrfs_root *root); |
| 3526 | void btrfs_scrub_continue(struct btrfs_root *root); | 3672 | void btrfs_scrub_continue(struct btrfs_root *root); |
| 3527 | void btrfs_scrub_continue_super(struct btrfs_root *root); | 3673 | void btrfs_scrub_continue_super(struct btrfs_root *root); |
| 3528 | int __btrfs_scrub_cancel(struct btrfs_fs_info *info); | 3674 | int btrfs_scrub_cancel(struct btrfs_fs_info *info); |
| 3529 | int btrfs_scrub_cancel(struct btrfs_root *root); | 3675 | int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info, |
| 3530 | int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev); | 3676 | struct btrfs_device *dev); |
| 3531 | int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid); | 3677 | int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid); |
| 3532 | int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, | 3678 | int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, |
| 3533 | struct btrfs_scrub_progress *progress); | 3679 | struct btrfs_scrub_progress *progress); |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 478f66bdc57b..34836036f01b 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
| @@ -651,7 +651,8 @@ static int btrfs_delayed_inode_reserve_metadata( | |||
| 651 | */ | 651 | */ |
| 652 | if (!src_rsv || (!trans->bytes_reserved && | 652 | if (!src_rsv || (!trans->bytes_reserved && |
| 653 | src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) { | 653 | src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) { |
| 654 | ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes); | 654 | ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes, |
| 655 | BTRFS_RESERVE_NO_FLUSH); | ||
| 655 | /* | 656 | /* |
| 656 | * Since we're under a transaction reserve_metadata_bytes could | 657 | * Since we're under a transaction reserve_metadata_bytes could |
| 657 | * try to commit the transaction which will make it return | 658 | * try to commit the transaction which will make it return |
| @@ -686,7 +687,8 @@ static int btrfs_delayed_inode_reserve_metadata( | |||
| 686 | * reserve something strictly for us. If not be a pain and try | 687 | * reserve something strictly for us. If not be a pain and try |
| 687 | * to steal from the delalloc block rsv. | 688 | * to steal from the delalloc block rsv. |
| 688 | */ | 689 | */ |
| 689 | ret = btrfs_block_rsv_add_noflush(root, dst_rsv, num_bytes); | 690 | ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes, |
| 691 | BTRFS_RESERVE_NO_FLUSH); | ||
| 690 | if (!ret) | 692 | if (!ret) |
| 691 | goto out; | 693 | goto out; |
| 692 | 694 | ||
| @@ -1255,7 +1257,6 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work) | |||
| 1255 | struct btrfs_delayed_node *delayed_node = NULL; | 1257 | struct btrfs_delayed_node *delayed_node = NULL; |
| 1256 | struct btrfs_root *root; | 1258 | struct btrfs_root *root; |
| 1257 | struct btrfs_block_rsv *block_rsv; | 1259 | struct btrfs_block_rsv *block_rsv; |
| 1258 | unsigned long nr = 0; | ||
| 1259 | int need_requeue = 0; | 1260 | int need_requeue = 0; |
| 1260 | int ret; | 1261 | int ret; |
| 1261 | 1262 | ||
| @@ -1316,11 +1317,9 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work) | |||
| 1316 | delayed_node); | 1317 | delayed_node); |
| 1317 | mutex_unlock(&delayed_node->mutex); | 1318 | mutex_unlock(&delayed_node->mutex); |
| 1318 | 1319 | ||
| 1319 | nr = trans->blocks_used; | ||
| 1320 | |||
| 1321 | trans->block_rsv = block_rsv; | 1320 | trans->block_rsv = block_rsv; |
| 1322 | btrfs_end_transaction_dmeta(trans, root); | 1321 | btrfs_end_transaction_dmeta(trans, root); |
| 1323 | __btrfs_btree_balance_dirty(root, nr); | 1322 | btrfs_btree_balance_dirty_nodelay(root); |
| 1324 | free_path: | 1323 | free_path: |
| 1325 | btrfs_free_path(path); | 1324 | btrfs_free_path(path); |
| 1326 | out: | 1325 | out: |
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c new file mode 100644 index 000000000000..66dbc8dbddf7 --- /dev/null +++ b/fs/btrfs/dev-replace.c | |||
| @@ -0,0 +1,856 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) STRATO AG 2012. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | #include <linux/sched.h> | ||
| 19 | #include <linux/bio.h> | ||
| 20 | #include <linux/slab.h> | ||
| 21 | #include <linux/buffer_head.h> | ||
| 22 | #include <linux/blkdev.h> | ||
| 23 | #include <linux/random.h> | ||
| 24 | #include <linux/iocontext.h> | ||
| 25 | #include <linux/capability.h> | ||
| 26 | #include <linux/kthread.h> | ||
| 27 | #include <linux/math64.h> | ||
| 28 | #include <asm/div64.h> | ||
| 29 | #include "compat.h" | ||
| 30 | #include "ctree.h" | ||
| 31 | #include "extent_map.h" | ||
| 32 | #include "disk-io.h" | ||
| 33 | #include "transaction.h" | ||
| 34 | #include "print-tree.h" | ||
| 35 | #include "volumes.h" | ||
| 36 | #include "async-thread.h" | ||
| 37 | #include "check-integrity.h" | ||
| 38 | #include "rcu-string.h" | ||
| 39 | #include "dev-replace.h" | ||
| 40 | |||
| 41 | static u64 btrfs_get_seconds_since_1970(void); | ||
| 42 | static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | ||
| 43 | int scrub_ret); | ||
| 44 | static void btrfs_dev_replace_update_device_in_mapping_tree( | ||
| 45 | struct btrfs_fs_info *fs_info, | ||
| 46 | struct btrfs_device *srcdev, | ||
| 47 | struct btrfs_device *tgtdev); | ||
| 48 | static int btrfs_dev_replace_find_srcdev(struct btrfs_root *root, u64 srcdevid, | ||
| 49 | char *srcdev_name, | ||
| 50 | struct btrfs_device **device); | ||
| 51 | static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info); | ||
| 52 | static int btrfs_dev_replace_kthread(void *data); | ||
| 53 | static int btrfs_dev_replace_continue_on_mount(struct btrfs_fs_info *fs_info); | ||
| 54 | |||
| 55 | |||
| 56 | int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info) | ||
| 57 | { | ||
| 58 | struct btrfs_key key; | ||
| 59 | struct btrfs_root *dev_root = fs_info->dev_root; | ||
| 60 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
| 61 | struct extent_buffer *eb; | ||
| 62 | int slot; | ||
| 63 | int ret = 0; | ||
| 64 | struct btrfs_path *path = NULL; | ||
| 65 | int item_size; | ||
| 66 | struct btrfs_dev_replace_item *ptr; | ||
| 67 | u64 src_devid; | ||
| 68 | |||
| 69 | path = btrfs_alloc_path(); | ||
| 70 | if (!path) { | ||
| 71 | ret = -ENOMEM; | ||
| 72 | goto out; | ||
| 73 | } | ||
| 74 | |||
| 75 | key.objectid = 0; | ||
| 76 | key.type = BTRFS_DEV_REPLACE_KEY; | ||
| 77 | key.offset = 0; | ||
| 78 | ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0); | ||
| 79 | if (ret) { | ||
| 80 | no_valid_dev_replace_entry_found: | ||
| 81 | ret = 0; | ||
| 82 | dev_replace->replace_state = | ||
| 83 | BTRFS_DEV_REPLACE_ITEM_STATE_NEVER_STARTED; | ||
| 84 | dev_replace->cont_reading_from_srcdev_mode = | ||
| 85 | BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_ALWAYS; | ||
| 86 | dev_replace->replace_state = 0; | ||
| 87 | dev_replace->time_started = 0; | ||
| 88 | dev_replace->time_stopped = 0; | ||
| 89 | atomic64_set(&dev_replace->num_write_errors, 0); | ||
| 90 | atomic64_set(&dev_replace->num_uncorrectable_read_errors, 0); | ||
| 91 | dev_replace->cursor_left = 0; | ||
| 92 | dev_replace->committed_cursor_left = 0; | ||
| 93 | dev_replace->cursor_left_last_write_of_item = 0; | ||
| 94 | dev_replace->cursor_right = 0; | ||
| 95 | dev_replace->srcdev = NULL; | ||
| 96 | dev_replace->tgtdev = NULL; | ||
| 97 | dev_replace->is_valid = 0; | ||
| 98 | dev_replace->item_needs_writeback = 0; | ||
| 99 | goto out; | ||
| 100 | } | ||
| 101 | slot = path->slots[0]; | ||
| 102 | eb = path->nodes[0]; | ||
| 103 | item_size = btrfs_item_size_nr(eb, slot); | ||
| 104 | ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_replace_item); | ||
| 105 | |||
| 106 | if (item_size != sizeof(struct btrfs_dev_replace_item)) { | ||
| 107 | pr_warn("btrfs: dev_replace entry found has unexpected size, ignore entry\n"); | ||
| 108 | goto no_valid_dev_replace_entry_found; | ||
| 109 | } | ||
| 110 | |||
| 111 | src_devid = btrfs_dev_replace_src_devid(eb, ptr); | ||
| 112 | dev_replace->cont_reading_from_srcdev_mode = | ||
| 113 | btrfs_dev_replace_cont_reading_from_srcdev_mode(eb, ptr); | ||
| 114 | dev_replace->replace_state = btrfs_dev_replace_replace_state(eb, ptr); | ||
| 115 | dev_replace->time_started = btrfs_dev_replace_time_started(eb, ptr); | ||
| 116 | dev_replace->time_stopped = | ||
| 117 | btrfs_dev_replace_time_stopped(eb, ptr); | ||
| 118 | atomic64_set(&dev_replace->num_write_errors, | ||
| 119 | btrfs_dev_replace_num_write_errors(eb, ptr)); | ||
| 120 | atomic64_set(&dev_replace->num_uncorrectable_read_errors, | ||
| 121 | btrfs_dev_replace_num_uncorrectable_read_errors(eb, ptr)); | ||
| 122 | dev_replace->cursor_left = btrfs_dev_replace_cursor_left(eb, ptr); | ||
| 123 | dev_replace->committed_cursor_left = dev_replace->cursor_left; | ||
| 124 | dev_replace->cursor_left_last_write_of_item = dev_replace->cursor_left; | ||
| 125 | dev_replace->cursor_right = btrfs_dev_replace_cursor_right(eb, ptr); | ||
| 126 | dev_replace->is_valid = 1; | ||
| 127 | |||
| 128 | dev_replace->item_needs_writeback = 0; | ||
| 129 | switch (dev_replace->replace_state) { | ||
| 130 | case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: | ||
| 131 | case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: | ||
| 132 | case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: | ||
| 133 | dev_replace->srcdev = NULL; | ||
| 134 | dev_replace->tgtdev = NULL; | ||
| 135 | break; | ||
| 136 | case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: | ||
| 137 | case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: | ||
| 138 | dev_replace->srcdev = btrfs_find_device(fs_info, src_devid, | ||
| 139 | NULL, NULL); | ||
| 140 | dev_replace->tgtdev = btrfs_find_device(fs_info, | ||
| 141 | BTRFS_DEV_REPLACE_DEVID, | ||
| 142 | NULL, NULL); | ||
| 143 | /* | ||
| 144 | * allow 'btrfs dev replace_cancel' if src/tgt device is | ||
| 145 | * missing | ||
| 146 | */ | ||
| 147 | if (!dev_replace->srcdev && | ||
| 148 | !btrfs_test_opt(dev_root, DEGRADED)) { | ||
| 149 | ret = -EIO; | ||
| 150 | pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?\n", | ||
| 151 | (unsigned long long)src_devid); | ||
| 152 | } | ||
| 153 | if (!dev_replace->tgtdev && | ||
| 154 | !btrfs_test_opt(dev_root, DEGRADED)) { | ||
| 155 | ret = -EIO; | ||
| 156 | pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "tgtdev (devid %llu) is missing, need to run btrfs dev scan?\n", | ||
| 157 | (unsigned long long)BTRFS_DEV_REPLACE_DEVID); | ||
| 158 | } | ||
| 159 | if (dev_replace->tgtdev) { | ||
| 160 | if (dev_replace->srcdev) { | ||
| 161 | dev_replace->tgtdev->total_bytes = | ||
| 162 | dev_replace->srcdev->total_bytes; | ||
| 163 | dev_replace->tgtdev->disk_total_bytes = | ||
| 164 | dev_replace->srcdev->disk_total_bytes; | ||
| 165 | dev_replace->tgtdev->bytes_used = | ||
| 166 | dev_replace->srcdev->bytes_used; | ||
| 167 | } | ||
| 168 | dev_replace->tgtdev->is_tgtdev_for_dev_replace = 1; | ||
| 169 | btrfs_init_dev_replace_tgtdev_for_resume(fs_info, | ||
| 170 | dev_replace->tgtdev); | ||
| 171 | } | ||
| 172 | break; | ||
| 173 | } | ||
| 174 | |||
| 175 | out: | ||
| 176 | if (path) | ||
| 177 | btrfs_free_path(path); | ||
| 178 | return ret; | ||
| 179 | } | ||
| 180 | |||
| 181 | /* | ||
| 182 | * called from commit_transaction. Writes changed device replace state to | ||
| 183 | * disk. | ||
| 184 | */ | ||
| 185 | int btrfs_run_dev_replace(struct btrfs_trans_handle *trans, | ||
| 186 | struct btrfs_fs_info *fs_info) | ||
| 187 | { | ||
| 188 | int ret; | ||
| 189 | struct btrfs_root *dev_root = fs_info->dev_root; | ||
| 190 | struct btrfs_path *path; | ||
| 191 | struct btrfs_key key; | ||
| 192 | struct extent_buffer *eb; | ||
| 193 | struct btrfs_dev_replace_item *ptr; | ||
| 194 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
| 195 | |||
| 196 | btrfs_dev_replace_lock(dev_replace); | ||
| 197 | if (!dev_replace->is_valid || | ||
| 198 | !dev_replace->item_needs_writeback) { | ||
| 199 | btrfs_dev_replace_unlock(dev_replace); | ||
| 200 | return 0; | ||
| 201 | } | ||
| 202 | btrfs_dev_replace_unlock(dev_replace); | ||
| 203 | |||
| 204 | key.objectid = 0; | ||
| 205 | key.type = BTRFS_DEV_REPLACE_KEY; | ||
| 206 | key.offset = 0; | ||
| 207 | |||
| 208 | path = btrfs_alloc_path(); | ||
| 209 | if (!path) { | ||
| 210 | ret = -ENOMEM; | ||
| 211 | goto out; | ||
| 212 | } | ||
| 213 | ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); | ||
| 214 | if (ret < 0) { | ||
| 215 | pr_warn("btrfs: error %d while searching for dev_replace item!\n", | ||
| 216 | ret); | ||
| 217 | goto out; | ||
| 218 | } | ||
| 219 | |||
| 220 | if (ret == 0 && | ||
| 221 | btrfs_item_size_nr(path->nodes[0], path->slots[0]) < sizeof(*ptr)) { | ||
| 222 | /* | ||
| 223 | * need to delete old one and insert a new one. | ||
| 224 | * Since no attempt is made to recover any old state, if the | ||
| 225 | * dev_replace state is 'running', the data on the target | ||
| 226 | * drive is lost. | ||
| 227 | * It would be possible to recover the state: just make sure | ||
| 228 | * that the beginning of the item is never changed and always | ||
| 229 | * contains all the essential information. Then read this | ||
| 230 | * minimal set of information and use it as a base for the | ||
| 231 | * new state. | ||
| 232 | */ | ||
| 233 | ret = btrfs_del_item(trans, dev_root, path); | ||
| 234 | if (ret != 0) { | ||
| 235 | pr_warn("btrfs: delete too small dev_replace item failed %d!\n", | ||
| 236 | ret); | ||
| 237 | goto out; | ||
| 238 | } | ||
| 239 | ret = 1; | ||
| 240 | } | ||
| 241 | |||
| 242 | if (ret == 1) { | ||
| 243 | /* need to insert a new item */ | ||
| 244 | btrfs_release_path(path); | ||
| 245 | ret = btrfs_insert_empty_item(trans, dev_root, path, | ||
| 246 | &key, sizeof(*ptr)); | ||
| 247 | if (ret < 0) { | ||
| 248 | pr_warn("btrfs: insert dev_replace item failed %d!\n", | ||
| 249 | ret); | ||
| 250 | goto out; | ||
| 251 | } | ||
| 252 | } | ||
| 253 | |||
| 254 | eb = path->nodes[0]; | ||
| 255 | ptr = btrfs_item_ptr(eb, path->slots[0], | ||
| 256 | struct btrfs_dev_replace_item); | ||
| 257 | |||
| 258 | btrfs_dev_replace_lock(dev_replace); | ||
| 259 | if (dev_replace->srcdev) | ||
| 260 | btrfs_set_dev_replace_src_devid(eb, ptr, | ||
| 261 | dev_replace->srcdev->devid); | ||
| 262 | else | ||
| 263 | btrfs_set_dev_replace_src_devid(eb, ptr, (u64)-1); | ||
| 264 | btrfs_set_dev_replace_cont_reading_from_srcdev_mode(eb, ptr, | ||
| 265 | dev_replace->cont_reading_from_srcdev_mode); | ||
| 266 | btrfs_set_dev_replace_replace_state(eb, ptr, | ||
| 267 | dev_replace->replace_state); | ||
| 268 | btrfs_set_dev_replace_time_started(eb, ptr, dev_replace->time_started); | ||
| 269 | btrfs_set_dev_replace_time_stopped(eb, ptr, dev_replace->time_stopped); | ||
| 270 | btrfs_set_dev_replace_num_write_errors(eb, ptr, | ||
| 271 | atomic64_read(&dev_replace->num_write_errors)); | ||
| 272 | btrfs_set_dev_replace_num_uncorrectable_read_errors(eb, ptr, | ||
| 273 | atomic64_read(&dev_replace->num_uncorrectable_read_errors)); | ||
| 274 | dev_replace->cursor_left_last_write_of_item = | ||
| 275 | dev_replace->cursor_left; | ||
| 276 | btrfs_set_dev_replace_cursor_left(eb, ptr, | ||
| 277 | dev_replace->cursor_left_last_write_of_item); | ||
| 278 | btrfs_set_dev_replace_cursor_right(eb, ptr, | ||
| 279 | dev_replace->cursor_right); | ||
| 280 | dev_replace->item_needs_writeback = 0; | ||
| 281 | btrfs_dev_replace_unlock(dev_replace); | ||
| 282 | |||
| 283 | btrfs_mark_buffer_dirty(eb); | ||
| 284 | |||
| 285 | out: | ||
| 286 | btrfs_free_path(path); | ||
| 287 | |||
| 288 | return ret; | ||
| 289 | } | ||
| 290 | |||
| 291 | void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info) | ||
| 292 | { | ||
| 293 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
| 294 | |||
| 295 | dev_replace->committed_cursor_left = | ||
| 296 | dev_replace->cursor_left_last_write_of_item; | ||
| 297 | } | ||
| 298 | |||
| 299 | static u64 btrfs_get_seconds_since_1970(void) | ||
| 300 | { | ||
| 301 | struct timespec t = CURRENT_TIME_SEC; | ||
| 302 | |||
| 303 | return t.tv_sec; | ||
| 304 | } | ||
| 305 | |||
| 306 | int btrfs_dev_replace_start(struct btrfs_root *root, | ||
| 307 | struct btrfs_ioctl_dev_replace_args *args) | ||
| 308 | { | ||
| 309 | struct btrfs_trans_handle *trans; | ||
| 310 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 311 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
| 312 | int ret; | ||
| 313 | struct btrfs_device *tgt_device = NULL; | ||
| 314 | struct btrfs_device *src_device = NULL; | ||
| 315 | |||
| 316 | switch (args->start.cont_reading_from_srcdev_mode) { | ||
| 317 | case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS: | ||
| 318 | case BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID: | ||
| 319 | break; | ||
| 320 | default: | ||
| 321 | return -EINVAL; | ||
| 322 | } | ||
| 323 | |||
| 324 | if ((args->start.srcdevid == 0 && args->start.srcdev_name[0] == '\0') || | ||
| 325 | args->start.tgtdev_name[0] == '\0') | ||
| 326 | return -EINVAL; | ||
| 327 | |||
| 328 | mutex_lock(&fs_info->volume_mutex); | ||
| 329 | ret = btrfs_init_dev_replace_tgtdev(root, args->start.tgtdev_name, | ||
| 330 | &tgt_device); | ||
| 331 | if (ret) { | ||
| 332 | pr_err("btrfs: target device %s is invalid!\n", | ||
| 333 | args->start.tgtdev_name); | ||
| 334 | mutex_unlock(&fs_info->volume_mutex); | ||
| 335 | return -EINVAL; | ||
| 336 | } | ||
| 337 | |||
| 338 | ret = btrfs_dev_replace_find_srcdev(root, args->start.srcdevid, | ||
| 339 | args->start.srcdev_name, | ||
| 340 | &src_device); | ||
| 341 | mutex_unlock(&fs_info->volume_mutex); | ||
| 342 | if (ret) { | ||
| 343 | ret = -EINVAL; | ||
| 344 | goto leave_no_lock; | ||
| 345 | } | ||
| 346 | |||
| 347 | if (tgt_device->total_bytes < src_device->total_bytes) { | ||
| 348 | pr_err("btrfs: target device is smaller than source device!\n"); | ||
| 349 | ret = -EINVAL; | ||
| 350 | goto leave_no_lock; | ||
| 351 | } | ||
| 352 | |||
| 353 | btrfs_dev_replace_lock(dev_replace); | ||
| 354 | switch (dev_replace->replace_state) { | ||
| 355 | case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: | ||
| 356 | case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: | ||
| 357 | case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: | ||
| 358 | break; | ||
| 359 | case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: | ||
| 360 | case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: | ||
| 361 | args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED; | ||
| 362 | goto leave; | ||
| 363 | } | ||
| 364 | |||
| 365 | dev_replace->cont_reading_from_srcdev_mode = | ||
| 366 | args->start.cont_reading_from_srcdev_mode; | ||
| 367 | WARN_ON(!src_device); | ||
| 368 | dev_replace->srcdev = src_device; | ||
| 369 | WARN_ON(!tgt_device); | ||
| 370 | dev_replace->tgtdev = tgt_device; | ||
| 371 | |||
| 372 | printk_in_rcu(KERN_INFO | ||
| 373 | "btrfs: dev_replace from %s (devid %llu) to %s) started\n", | ||
| 374 | src_device->missing ? "<missing disk>" : | ||
| 375 | rcu_str_deref(src_device->name), | ||
| 376 | src_device->devid, | ||
| 377 | rcu_str_deref(tgt_device->name)); | ||
| 378 | |||
| 379 | tgt_device->total_bytes = src_device->total_bytes; | ||
| 380 | tgt_device->disk_total_bytes = src_device->disk_total_bytes; | ||
| 381 | tgt_device->bytes_used = src_device->bytes_used; | ||
| 382 | |||
| 383 | /* | ||
| 384 | * from now on, the writes to the srcdev are all duplicated to | ||
| 385 | * go to the tgtdev as well (refer to btrfs_map_block()). | ||
| 386 | */ | ||
| 387 | dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED; | ||
| 388 | dev_replace->time_started = btrfs_get_seconds_since_1970(); | ||
| 389 | dev_replace->cursor_left = 0; | ||
| 390 | dev_replace->committed_cursor_left = 0; | ||
| 391 | dev_replace->cursor_left_last_write_of_item = 0; | ||
| 392 | dev_replace->cursor_right = 0; | ||
| 393 | dev_replace->is_valid = 1; | ||
| 394 | dev_replace->item_needs_writeback = 1; | ||
| 395 | args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; | ||
| 396 | btrfs_dev_replace_unlock(dev_replace); | ||
| 397 | |||
| 398 | btrfs_wait_ordered_extents(root, 0); | ||
| 399 | |||
| 400 | /* force writing the updated state information to disk */ | ||
| 401 | trans = btrfs_start_transaction(root, 0); | ||
| 402 | if (IS_ERR(trans)) { | ||
| 403 | ret = PTR_ERR(trans); | ||
| 404 | btrfs_dev_replace_lock(dev_replace); | ||
| 405 | goto leave; | ||
| 406 | } | ||
| 407 | |||
| 408 | ret = btrfs_commit_transaction(trans, root); | ||
| 409 | WARN_ON(ret); | ||
| 410 | |||
| 411 | /* the disk copy procedure reuses the scrub code */ | ||
| 412 | ret = btrfs_scrub_dev(fs_info, src_device->devid, 0, | ||
| 413 | src_device->total_bytes, | ||
| 414 | &dev_replace->scrub_progress, 0, 1); | ||
| 415 | |||
| 416 | ret = btrfs_dev_replace_finishing(root->fs_info, ret); | ||
| 417 | WARN_ON(ret); | ||
| 418 | |||
| 419 | return 0; | ||
| 420 | |||
| 421 | leave: | ||
| 422 | dev_replace->srcdev = NULL; | ||
| 423 | dev_replace->tgtdev = NULL; | ||
| 424 | btrfs_dev_replace_unlock(dev_replace); | ||
| 425 | leave_no_lock: | ||
| 426 | if (tgt_device) | ||
| 427 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); | ||
| 428 | return ret; | ||
| 429 | } | ||
| 430 | |||
| 431 | static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | ||
| 432 | int scrub_ret) | ||
| 433 | { | ||
| 434 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
| 435 | struct btrfs_device *tgt_device; | ||
| 436 | struct btrfs_device *src_device; | ||
| 437 | struct btrfs_root *root = fs_info->tree_root; | ||
| 438 | u8 uuid_tmp[BTRFS_UUID_SIZE]; | ||
| 439 | struct btrfs_trans_handle *trans; | ||
| 440 | int ret = 0; | ||
| 441 | |||
| 442 | /* don't allow cancel or unmount to disturb the finishing procedure */ | ||
| 443 | mutex_lock(&dev_replace->lock_finishing_cancel_unmount); | ||
| 444 | |||
| 445 | btrfs_dev_replace_lock(dev_replace); | ||
| 446 | /* was the operation canceled, or is it finished? */ | ||
| 447 | if (dev_replace->replace_state != | ||
| 448 | BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED) { | ||
| 449 | btrfs_dev_replace_unlock(dev_replace); | ||
| 450 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | ||
| 451 | return 0; | ||
| 452 | } | ||
| 453 | |||
| 454 | tgt_device = dev_replace->tgtdev; | ||
| 455 | src_device = dev_replace->srcdev; | ||
| 456 | btrfs_dev_replace_unlock(dev_replace); | ||
| 457 | |||
| 458 | /* replace old device with new one in mapping tree */ | ||
| 459 | if (!scrub_ret) | ||
| 460 | btrfs_dev_replace_update_device_in_mapping_tree(fs_info, | ||
| 461 | src_device, | ||
| 462 | tgt_device); | ||
| 463 | |||
| 464 | /* | ||
| 465 | * flush all outstanding I/O and inode extent mappings before the | ||
| 466 | * copy operation is declared as being finished | ||
| 467 | */ | ||
| 468 | btrfs_start_delalloc_inodes(root, 0); | ||
| 469 | btrfs_wait_ordered_extents(root, 0); | ||
| 470 | |||
| 471 | trans = btrfs_start_transaction(root, 0); | ||
| 472 | if (IS_ERR(trans)) { | ||
| 473 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | ||
| 474 | return PTR_ERR(trans); | ||
| 475 | } | ||
| 476 | ret = btrfs_commit_transaction(trans, root); | ||
| 477 | WARN_ON(ret); | ||
| 478 | |||
| 479 | /* keep away write_all_supers() during the finishing procedure */ | ||
| 480 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | ||
| 481 | btrfs_dev_replace_lock(dev_replace); | ||
| 482 | dev_replace->replace_state = | ||
| 483 | scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED | ||
| 484 | : BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED; | ||
| 485 | dev_replace->tgtdev = NULL; | ||
| 486 | dev_replace->srcdev = NULL; | ||
| 487 | dev_replace->time_stopped = btrfs_get_seconds_since_1970(); | ||
| 488 | dev_replace->item_needs_writeback = 1; | ||
| 489 | |||
| 490 | if (scrub_ret) { | ||
| 491 | printk_in_rcu(KERN_ERR | ||
| 492 | "btrfs: btrfs_scrub_dev(%s, %llu, %s) failed %d\n", | ||
| 493 | src_device->missing ? "<missing disk>" : | ||
| 494 | rcu_str_deref(src_device->name), | ||
| 495 | src_device->devid, | ||
| 496 | rcu_str_deref(tgt_device->name), scrub_ret); | ||
| 497 | btrfs_dev_replace_unlock(dev_replace); | ||
| 498 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
| 499 | if (tgt_device) | ||
| 500 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); | ||
| 501 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | ||
| 502 | |||
| 503 | return 0; | ||
| 504 | } | ||
| 505 | |||
| 506 | printk_in_rcu(KERN_INFO | ||
| 507 | "btrfs: dev_replace from %s (devid %llu) to %s) finished\n", | ||
| 508 | src_device->missing ? "<missing disk>" : | ||
| 509 | rcu_str_deref(src_device->name), | ||
| 510 | src_device->devid, | ||
| 511 | rcu_str_deref(tgt_device->name)); | ||
| 512 | tgt_device->is_tgtdev_for_dev_replace = 0; | ||
| 513 | tgt_device->devid = src_device->devid; | ||
| 514 | src_device->devid = BTRFS_DEV_REPLACE_DEVID; | ||
| 515 | tgt_device->bytes_used = src_device->bytes_used; | ||
| 516 | memcpy(uuid_tmp, tgt_device->uuid, sizeof(uuid_tmp)); | ||
| 517 | memcpy(tgt_device->uuid, src_device->uuid, sizeof(tgt_device->uuid)); | ||
| 518 | memcpy(src_device->uuid, uuid_tmp, sizeof(src_device->uuid)); | ||
| 519 | tgt_device->total_bytes = src_device->total_bytes; | ||
| 520 | tgt_device->disk_total_bytes = src_device->disk_total_bytes; | ||
| 521 | tgt_device->bytes_used = src_device->bytes_used; | ||
| 522 | if (fs_info->sb->s_bdev == src_device->bdev) | ||
| 523 | fs_info->sb->s_bdev = tgt_device->bdev; | ||
| 524 | if (fs_info->fs_devices->latest_bdev == src_device->bdev) | ||
| 525 | fs_info->fs_devices->latest_bdev = tgt_device->bdev; | ||
| 526 | list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); | ||
| 527 | |||
| 528 | btrfs_rm_dev_replace_srcdev(fs_info, src_device); | ||
| 529 | if (src_device->bdev) { | ||
| 530 | /* zero out the old super */ | ||
| 531 | btrfs_scratch_superblock(src_device); | ||
| 532 | } | ||
| 533 | /* | ||
| 534 | * this is again a consistent state where no dev_replace procedure | ||
| 535 | * is running, the target device is part of the filesystem, the | ||
| 536 | * source device is not part of the filesystem anymore and its 1st | ||
| 537 | * superblock is scratched out so that it is no longer marked to | ||
| 538 | * belong to this filesystem. | ||
| 539 | */ | ||
| 540 | btrfs_dev_replace_unlock(dev_replace); | ||
| 541 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
| 542 | |||
| 543 | /* write back the superblocks */ | ||
| 544 | trans = btrfs_start_transaction(root, 0); | ||
| 545 | if (!IS_ERR(trans)) | ||
| 546 | btrfs_commit_transaction(trans, root); | ||
| 547 | |||
| 548 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | ||
| 549 | |||
| 550 | return 0; | ||
| 551 | } | ||
| 552 | |||
| 553 | static void btrfs_dev_replace_update_device_in_mapping_tree( | ||
| 554 | struct btrfs_fs_info *fs_info, | ||
| 555 | struct btrfs_device *srcdev, | ||
| 556 | struct btrfs_device *tgtdev) | ||
| 557 | { | ||
| 558 | struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; | ||
| 559 | struct extent_map *em; | ||
| 560 | struct map_lookup *map; | ||
| 561 | u64 start = 0; | ||
| 562 | int i; | ||
| 563 | |||
| 564 | write_lock(&em_tree->lock); | ||
| 565 | do { | ||
| 566 | em = lookup_extent_mapping(em_tree, start, (u64)-1); | ||
| 567 | if (!em) | ||
| 568 | break; | ||
| 569 | map = (struct map_lookup *)em->bdev; | ||
| 570 | for (i = 0; i < map->num_stripes; i++) | ||
| 571 | if (srcdev == map->stripes[i].dev) | ||
| 572 | map->stripes[i].dev = tgtdev; | ||
| 573 | start = em->start + em->len; | ||
| 574 | free_extent_map(em); | ||
| 575 | } while (start); | ||
| 576 | write_unlock(&em_tree->lock); | ||
| 577 | } | ||
| 578 | |||
| 579 | static int btrfs_dev_replace_find_srcdev(struct btrfs_root *root, u64 srcdevid, | ||
| 580 | char *srcdev_name, | ||
| 581 | struct btrfs_device **device) | ||
| 582 | { | ||
| 583 | int ret; | ||
| 584 | |||
| 585 | if (srcdevid) { | ||
| 586 | ret = 0; | ||
| 587 | *device = btrfs_find_device(root->fs_info, srcdevid, NULL, | ||
| 588 | NULL); | ||
| 589 | if (!*device) | ||
| 590 | ret = -ENOENT; | ||
| 591 | } else { | ||
| 592 | ret = btrfs_find_device_missing_or_by_path(root, srcdev_name, | ||
| 593 | device); | ||
| 594 | } | ||
| 595 | return ret; | ||
| 596 | } | ||
| 597 | |||
| 598 | void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, | ||
| 599 | struct btrfs_ioctl_dev_replace_args *args) | ||
| 600 | { | ||
| 601 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
| 602 | |||
| 603 | btrfs_dev_replace_lock(dev_replace); | ||
| 604 | /* even if !dev_replace_is_valid, the values are good enough for | ||
| 605 | * the replace_status ioctl */ | ||
| 606 | args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; | ||
| 607 | args->status.replace_state = dev_replace->replace_state; | ||
| 608 | args->status.time_started = dev_replace->time_started; | ||
| 609 | args->status.time_stopped = dev_replace->time_stopped; | ||
| 610 | args->status.num_write_errors = | ||
| 611 | atomic64_read(&dev_replace->num_write_errors); | ||
| 612 | args->status.num_uncorrectable_read_errors = | ||
| 613 | atomic64_read(&dev_replace->num_uncorrectable_read_errors); | ||
| 614 | switch (dev_replace->replace_state) { | ||
| 615 | case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: | ||
| 616 | case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: | ||
| 617 | args->status.progress_1000 = 0; | ||
| 618 | break; | ||
| 619 | case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: | ||
| 620 | args->status.progress_1000 = 1000; | ||
| 621 | break; | ||
| 622 | case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: | ||
| 623 | case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: | ||
| 624 | args->status.progress_1000 = div64_u64(dev_replace->cursor_left, | ||
| 625 | div64_u64(dev_replace->srcdev->total_bytes, 1000)); | ||
| 626 | break; | ||
| 627 | } | ||
| 628 | btrfs_dev_replace_unlock(dev_replace); | ||
| 629 | } | ||
| 630 | |||
| 631 | int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info, | ||
| 632 | struct btrfs_ioctl_dev_replace_args *args) | ||
| 633 | { | ||
| 634 | args->result = __btrfs_dev_replace_cancel(fs_info); | ||
| 635 | return 0; | ||
| 636 | } | ||
| 637 | |||
| 638 | static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info) | ||
| 639 | { | ||
| 640 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
| 641 | struct btrfs_device *tgt_device = NULL; | ||
| 642 | struct btrfs_trans_handle *trans; | ||
| 643 | struct btrfs_root *root = fs_info->tree_root; | ||
| 644 | u64 result; | ||
| 645 | int ret; | ||
| 646 | |||
| 647 | mutex_lock(&dev_replace->lock_finishing_cancel_unmount); | ||
| 648 | btrfs_dev_replace_lock(dev_replace); | ||
| 649 | switch (dev_replace->replace_state) { | ||
| 650 | case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: | ||
| 651 | case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: | ||
| 652 | case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: | ||
| 653 | result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED; | ||
| 654 | btrfs_dev_replace_unlock(dev_replace); | ||
| 655 | goto leave; | ||
| 656 | case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: | ||
| 657 | case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: | ||
| 658 | result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; | ||
| 659 | tgt_device = dev_replace->tgtdev; | ||
| 660 | dev_replace->tgtdev = NULL; | ||
| 661 | dev_replace->srcdev = NULL; | ||
| 662 | break; | ||
| 663 | } | ||
| 664 | dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED; | ||
| 665 | dev_replace->time_stopped = btrfs_get_seconds_since_1970(); | ||
| 666 | dev_replace->item_needs_writeback = 1; | ||
| 667 | btrfs_dev_replace_unlock(dev_replace); | ||
| 668 | btrfs_scrub_cancel(fs_info); | ||
| 669 | |||
| 670 | trans = btrfs_start_transaction(root, 0); | ||
| 671 | if (IS_ERR(trans)) { | ||
| 672 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | ||
| 673 | return PTR_ERR(trans); | ||
| 674 | } | ||
| 675 | ret = btrfs_commit_transaction(trans, root); | ||
| 676 | WARN_ON(ret); | ||
| 677 | if (tgt_device) | ||
| 678 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); | ||
| 679 | |||
| 680 | leave: | ||
| 681 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | ||
| 682 | return result; | ||
| 683 | } | ||
| 684 | |||
| 685 | void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info) | ||
| 686 | { | ||
| 687 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
| 688 | |||
| 689 | mutex_lock(&dev_replace->lock_finishing_cancel_unmount); | ||
| 690 | btrfs_dev_replace_lock(dev_replace); | ||
| 691 | switch (dev_replace->replace_state) { | ||
| 692 | case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: | ||
| 693 | case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: | ||
| 694 | case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: | ||
| 695 | case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: | ||
| 696 | break; | ||
| 697 | case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: | ||
| 698 | dev_replace->replace_state = | ||
| 699 | BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED; | ||
| 700 | dev_replace->time_stopped = btrfs_get_seconds_since_1970(); | ||
| 701 | dev_replace->item_needs_writeback = 1; | ||
| 702 | pr_info("btrfs: suspending dev_replace for unmount\n"); | ||
| 703 | break; | ||
| 704 | } | ||
| 705 | |||
| 706 | btrfs_dev_replace_unlock(dev_replace); | ||
| 707 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | ||
| 708 | } | ||
| 709 | |||
| 710 | /* resume dev_replace procedure that was interrupted by unmount */ | ||
| 711 | int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info) | ||
| 712 | { | ||
| 713 | struct task_struct *task; | ||
| 714 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
| 715 | |||
| 716 | btrfs_dev_replace_lock(dev_replace); | ||
| 717 | switch (dev_replace->replace_state) { | ||
| 718 | case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: | ||
| 719 | case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: | ||
| 720 | case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: | ||
| 721 | btrfs_dev_replace_unlock(dev_replace); | ||
| 722 | return 0; | ||
| 723 | case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: | ||
| 724 | break; | ||
| 725 | case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: | ||
| 726 | dev_replace->replace_state = | ||
| 727 | BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED; | ||
| 728 | break; | ||
| 729 | } | ||
| 730 | if (!dev_replace->tgtdev || !dev_replace->tgtdev->bdev) { | ||
| 731 | pr_info("btrfs: cannot continue dev_replace, tgtdev is missing\n" | ||
| 732 | "btrfs: you may cancel the operation after 'mount -o degraded'\n"); | ||
| 733 | btrfs_dev_replace_unlock(dev_replace); | ||
| 734 | return 0; | ||
| 735 | } | ||
| 736 | btrfs_dev_replace_unlock(dev_replace); | ||
| 737 | |||
| 738 | WARN_ON(atomic_xchg( | ||
| 739 | &fs_info->mutually_exclusive_operation_running, 1)); | ||
| 740 | task = kthread_run(btrfs_dev_replace_kthread, fs_info, "btrfs-devrepl"); | ||
| 741 | return PTR_RET(task); | ||
| 742 | } | ||
| 743 | |||
| 744 | static int btrfs_dev_replace_kthread(void *data) | ||
| 745 | { | ||
| 746 | struct btrfs_fs_info *fs_info = data; | ||
| 747 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
| 748 | struct btrfs_ioctl_dev_replace_args *status_args; | ||
| 749 | u64 progress; | ||
| 750 | |||
| 751 | status_args = kzalloc(sizeof(*status_args), GFP_NOFS); | ||
| 752 | if (status_args) { | ||
| 753 | btrfs_dev_replace_status(fs_info, status_args); | ||
| 754 | progress = status_args->status.progress_1000; | ||
| 755 | kfree(status_args); | ||
| 756 | do_div(progress, 10); | ||
| 757 | printk_in_rcu(KERN_INFO | ||
| 758 | "btrfs: continuing dev_replace from %s (devid %llu) to %s @%u%%\n", | ||
| 759 | dev_replace->srcdev->missing ? "<missing disk>" : | ||
| 760 | rcu_str_deref(dev_replace->srcdev->name), | ||
| 761 | dev_replace->srcdev->devid, | ||
| 762 | dev_replace->tgtdev ? | ||
| 763 | rcu_str_deref(dev_replace->tgtdev->name) : | ||
| 764 | "<missing target disk>", | ||
| 765 | (unsigned int)progress); | ||
| 766 | } | ||
| 767 | btrfs_dev_replace_continue_on_mount(fs_info); | ||
| 768 | atomic_set(&fs_info->mutually_exclusive_operation_running, 0); | ||
| 769 | |||
| 770 | return 0; | ||
| 771 | } | ||
| 772 | |||
| 773 | static int btrfs_dev_replace_continue_on_mount(struct btrfs_fs_info *fs_info) | ||
| 774 | { | ||
| 775 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
| 776 | int ret; | ||
| 777 | |||
| 778 | ret = btrfs_scrub_dev(fs_info, dev_replace->srcdev->devid, | ||
| 779 | dev_replace->committed_cursor_left, | ||
| 780 | dev_replace->srcdev->total_bytes, | ||
| 781 | &dev_replace->scrub_progress, 0, 1); | ||
| 782 | ret = btrfs_dev_replace_finishing(fs_info, ret); | ||
| 783 | WARN_ON(ret); | ||
| 784 | return 0; | ||
| 785 | } | ||
| 786 | |||
| 787 | int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace) | ||
| 788 | { | ||
| 789 | if (!dev_replace->is_valid) | ||
| 790 | return 0; | ||
| 791 | |||
| 792 | switch (dev_replace->replace_state) { | ||
| 793 | case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED: | ||
| 794 | case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED: | ||
| 795 | case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED: | ||
| 796 | return 0; | ||
| 797 | case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: | ||
| 798 | case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED: | ||
| 799 | /* | ||
| 800 | * return true even if tgtdev is missing (this is | ||
| 801 | * something that can happen if the dev_replace | ||
| 802 | * procedure is suspended by an umount and then | ||
| 803 | * the tgtdev is missing (or "btrfs dev scan") was | ||
| 804 | * not called and the the filesystem is remounted | ||
| 805 | * in degraded state. This does not stop the | ||
| 806 | * dev_replace procedure. It needs to be canceled | ||
| 807 | * manually if the cancelation is wanted. | ||
| 808 | */ | ||
| 809 | break; | ||
| 810 | } | ||
| 811 | return 1; | ||
| 812 | } | ||
| 813 | |||
| 814 | void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace) | ||
| 815 | { | ||
| 816 | /* the beginning is just an optimization for the typical case */ | ||
| 817 | if (atomic_read(&dev_replace->nesting_level) == 0) { | ||
| 818 | acquire_lock: | ||
| 819 | /* this is not a nested case where the same thread | ||
| 820 | * is trying to acqurire the same lock twice */ | ||
| 821 | mutex_lock(&dev_replace->lock); | ||
| 822 | mutex_lock(&dev_replace->lock_management_lock); | ||
| 823 | dev_replace->lock_owner = current->pid; | ||
| 824 | atomic_inc(&dev_replace->nesting_level); | ||
| 825 | mutex_unlock(&dev_replace->lock_management_lock); | ||
| 826 | return; | ||
| 827 | } | ||
| 828 | |||
| 829 | mutex_lock(&dev_replace->lock_management_lock); | ||
| 830 | if (atomic_read(&dev_replace->nesting_level) > 0 && | ||
| 831 | dev_replace->lock_owner == current->pid) { | ||
| 832 | WARN_ON(!mutex_is_locked(&dev_replace->lock)); | ||
| 833 | atomic_inc(&dev_replace->nesting_level); | ||
| 834 | mutex_unlock(&dev_replace->lock_management_lock); | ||
| 835 | return; | ||
| 836 | } | ||
| 837 | |||
| 838 | mutex_unlock(&dev_replace->lock_management_lock); | ||
| 839 | goto acquire_lock; | ||
| 840 | } | ||
| 841 | |||
| 842 | void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace) | ||
| 843 | { | ||
| 844 | WARN_ON(!mutex_is_locked(&dev_replace->lock)); | ||
| 845 | mutex_lock(&dev_replace->lock_management_lock); | ||
| 846 | WARN_ON(atomic_read(&dev_replace->nesting_level) < 1); | ||
| 847 | WARN_ON(dev_replace->lock_owner != current->pid); | ||
| 848 | atomic_dec(&dev_replace->nesting_level); | ||
| 849 | if (atomic_read(&dev_replace->nesting_level) == 0) { | ||
| 850 | dev_replace->lock_owner = 0; | ||
| 851 | mutex_unlock(&dev_replace->lock_management_lock); | ||
| 852 | mutex_unlock(&dev_replace->lock); | ||
| 853 | } else { | ||
| 854 | mutex_unlock(&dev_replace->lock_management_lock); | ||
| 855 | } | ||
| 856 | } | ||
diff --git a/fs/btrfs/dev-replace.h b/fs/btrfs/dev-replace.h new file mode 100644 index 000000000000..20035cbbf021 --- /dev/null +++ b/fs/btrfs/dev-replace.h | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | /* | ||
| 2 | * Copyright (C) STRATO AG 2012. All rights reserved. | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or | ||
| 5 | * modify it under the terms of the GNU General Public | ||
| 6 | * License v2 as published by the Free Software Foundation. | ||
| 7 | * | ||
| 8 | * This program is distributed in the hope that it will be useful, | ||
| 9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 11 | * General Public License for more details. | ||
| 12 | * | ||
| 13 | * You should have received a copy of the GNU General Public | ||
| 14 | * License along with this program; if not, write to the | ||
| 15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 16 | * Boston, MA 021110-1307, USA. | ||
| 17 | */ | ||
| 18 | |||
| 19 | #if !defined(__BTRFS_DEV_REPLACE__) | ||
| 20 | #define __BTRFS_DEV_REPLACE__ | ||
| 21 | |||
| 22 | struct btrfs_ioctl_dev_replace_args; | ||
| 23 | |||
| 24 | int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info); | ||
| 25 | int btrfs_run_dev_replace(struct btrfs_trans_handle *trans, | ||
| 26 | struct btrfs_fs_info *fs_info); | ||
| 27 | void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info); | ||
| 28 | int btrfs_dev_replace_start(struct btrfs_root *root, | ||
| 29 | struct btrfs_ioctl_dev_replace_args *args); | ||
| 30 | void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info, | ||
| 31 | struct btrfs_ioctl_dev_replace_args *args); | ||
| 32 | int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info, | ||
| 33 | struct btrfs_ioctl_dev_replace_args *args); | ||
| 34 | void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info); | ||
| 35 | int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info); | ||
| 36 | int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace); | ||
| 37 | void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace); | ||
| 38 | void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace); | ||
| 39 | |||
| 40 | static inline void btrfs_dev_replace_stats_inc(atomic64_t *stat_value) | ||
| 41 | { | ||
| 42 | atomic64_inc(stat_value); | ||
| 43 | } | ||
| 44 | #endif | ||
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index c1a074d0696f..502c2158167c 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c | |||
| @@ -213,6 +213,65 @@ struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans, | |||
| 213 | return btrfs_match_dir_item_name(root, path, name, name_len); | 213 | return btrfs_match_dir_item_name(root, path, name, name_len); |
| 214 | } | 214 | } |
| 215 | 215 | ||
| 216 | int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir, | ||
| 217 | const char *name, int name_len) | ||
| 218 | { | ||
| 219 | int ret; | ||
| 220 | struct btrfs_key key; | ||
| 221 | struct btrfs_dir_item *di; | ||
| 222 | int data_size; | ||
| 223 | struct extent_buffer *leaf; | ||
| 224 | int slot; | ||
| 225 | struct btrfs_path *path; | ||
| 226 | |||
| 227 | |||
| 228 | path = btrfs_alloc_path(); | ||
| 229 | if (!path) | ||
| 230 | return -ENOMEM; | ||
| 231 | |||
| 232 | key.objectid = dir; | ||
| 233 | btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); | ||
| 234 | key.offset = btrfs_name_hash(name, name_len); | ||
| 235 | |||
| 236 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
| 237 | |||
| 238 | /* return back any errors */ | ||
| 239 | if (ret < 0) | ||
| 240 | goto out; | ||
| 241 | |||
| 242 | /* nothing found, we're safe */ | ||
| 243 | if (ret > 0) { | ||
| 244 | ret = 0; | ||
| 245 | goto out; | ||
| 246 | } | ||
| 247 | |||
| 248 | /* we found an item, look for our name in the item */ | ||
| 249 | di = btrfs_match_dir_item_name(root, path, name, name_len); | ||
| 250 | if (di) { | ||
| 251 | /* our exact name was found */ | ||
| 252 | ret = -EEXIST; | ||
| 253 | goto out; | ||
| 254 | } | ||
| 255 | |||
| 256 | /* | ||
| 257 | * see if there is room in the item to insert this | ||
| 258 | * name | ||
| 259 | */ | ||
| 260 | data_size = sizeof(*di) + name_len + sizeof(struct btrfs_item); | ||
| 261 | leaf = path->nodes[0]; | ||
| 262 | slot = path->slots[0]; | ||
| 263 | if (data_size + btrfs_item_size_nr(leaf, slot) + | ||
| 264 | sizeof(struct btrfs_item) > BTRFS_LEAF_DATA_SIZE(root)) { | ||
| 265 | ret = -EOVERFLOW; | ||
| 266 | } else { | ||
| 267 | /* plenty of insertion room */ | ||
| 268 | ret = 0; | ||
| 269 | } | ||
| 270 | out: | ||
| 271 | btrfs_free_path(path); | ||
| 272 | return ret; | ||
| 273 | } | ||
| 274 | |||
| 216 | /* | 275 | /* |
| 217 | * lookup a directory item based on index. 'dir' is the objectid | 276 | * lookup a directory item based on index. 'dir' is the objectid |
| 218 | * we're searching in, and 'mod' tells us if you plan on deleting the | 277 | * we're searching in, and 'mod' tells us if you plan on deleting the |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7cda51995c1e..a8f652dc940b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -45,6 +45,7 @@ | |||
| 45 | #include "inode-map.h" | 45 | #include "inode-map.h" |
| 46 | #include "check-integrity.h" | 46 | #include "check-integrity.h" |
| 47 | #include "rcu-string.h" | 47 | #include "rcu-string.h" |
| 48 | #include "dev-replace.h" | ||
| 48 | 49 | ||
| 49 | #ifdef CONFIG_X86 | 50 | #ifdef CONFIG_X86 |
| 50 | #include <asm/cpufeature.h> | 51 | #include <asm/cpufeature.h> |
| @@ -387,7 +388,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
| 387 | if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) | 388 | if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) |
| 388 | break; | 389 | break; |
| 389 | 390 | ||
| 390 | num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, | 391 | num_copies = btrfs_num_copies(root->fs_info, |
| 391 | eb->start, eb->len); | 392 | eb->start, eb->len); |
| 392 | if (num_copies == 1) | 393 | if (num_copies == 1) |
| 393 | break; | 394 | break; |
| @@ -852,11 +853,16 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | |||
| 852 | int mirror_num, unsigned long bio_flags, | 853 | int mirror_num, unsigned long bio_flags, |
| 853 | u64 bio_offset) | 854 | u64 bio_offset) |
| 854 | { | 855 | { |
| 856 | int ret; | ||
| 857 | |||
| 855 | /* | 858 | /* |
| 856 | * when we're called for a write, we're already in the async | 859 | * when we're called for a write, we're already in the async |
| 857 | * submission context. Just jump into btrfs_map_bio | 860 | * submission context. Just jump into btrfs_map_bio |
| 858 | */ | 861 | */ |
| 859 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); | 862 | ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, mirror_num, 1); |
| 863 | if (ret) | ||
| 864 | bio_endio(bio, ret); | ||
| 865 | return ret; | ||
| 860 | } | 866 | } |
| 861 | 867 | ||
| 862 | static int check_async_write(struct inode *inode, unsigned long bio_flags) | 868 | static int check_async_write(struct inode *inode, unsigned long bio_flags) |
| @@ -878,7 +884,6 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
| 878 | int ret; | 884 | int ret; |
| 879 | 885 | ||
| 880 | if (!(rw & REQ_WRITE)) { | 886 | if (!(rw & REQ_WRITE)) { |
| 881 | |||
| 882 | /* | 887 | /* |
| 883 | * called for a read, do the setup so that checksum validation | 888 | * called for a read, do the setup so that checksum validation |
| 884 | * can happen in the async kernel threads | 889 | * can happen in the async kernel threads |
| @@ -886,26 +891,32 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
| 886 | ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, | 891 | ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, |
| 887 | bio, 1); | 892 | bio, 1); |
| 888 | if (ret) | 893 | if (ret) |
| 889 | return ret; | 894 | goto out_w_error; |
| 890 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, | 895 | ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, |
| 891 | mirror_num, 0); | 896 | mirror_num, 0); |
| 892 | } else if (!async) { | 897 | } else if (!async) { |
| 893 | ret = btree_csum_one_bio(bio); | 898 | ret = btree_csum_one_bio(bio); |
| 894 | if (ret) | 899 | if (ret) |
| 895 | return ret; | 900 | goto out_w_error; |
| 896 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, | 901 | ret = btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, |
| 897 | mirror_num, 0); | 902 | mirror_num, 0); |
| 903 | } else { | ||
| 904 | /* | ||
| 905 | * kthread helpers are used to submit writes so that | ||
| 906 | * checksumming can happen in parallel across all CPUs | ||
| 907 | */ | ||
| 908 | ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | ||
| 909 | inode, rw, bio, mirror_num, 0, | ||
| 910 | bio_offset, | ||
| 911 | __btree_submit_bio_start, | ||
| 912 | __btree_submit_bio_done); | ||
| 898 | } | 913 | } |
| 899 | 914 | ||
| 900 | /* | 915 | if (ret) { |
| 901 | * kthread helpers are used to submit writes so that checksumming | 916 | out_w_error: |
| 902 | * can happen in parallel across all CPUs | 917 | bio_endio(bio, ret); |
| 903 | */ | 918 | } |
| 904 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | 919 | return ret; |
| 905 | inode, rw, bio, mirror_num, 0, | ||
| 906 | bio_offset, | ||
| 907 | __btree_submit_bio_start, | ||
| 908 | __btree_submit_bio_done); | ||
| 909 | } | 920 | } |
| 910 | 921 | ||
| 911 | #ifdef CONFIG_MIGRATION | 922 | #ifdef CONFIG_MIGRATION |
| @@ -990,6 +1001,7 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) | |||
| 990 | 1001 | ||
| 991 | static int btree_set_page_dirty(struct page *page) | 1002 | static int btree_set_page_dirty(struct page *page) |
| 992 | { | 1003 | { |
| 1004 | #ifdef DEBUG | ||
| 993 | struct extent_buffer *eb; | 1005 | struct extent_buffer *eb; |
| 994 | 1006 | ||
| 995 | BUG_ON(!PagePrivate(page)); | 1007 | BUG_ON(!PagePrivate(page)); |
| @@ -998,6 +1010,7 @@ static int btree_set_page_dirty(struct page *page) | |||
| 998 | BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); | 1010 | BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); |
| 999 | BUG_ON(!atomic_read(&eb->refs)); | 1011 | BUG_ON(!atomic_read(&eb->refs)); |
| 1000 | btrfs_assert_tree_locked(eb); | 1012 | btrfs_assert_tree_locked(eb); |
| 1013 | #endif | ||
| 1001 | return __set_page_dirty_nobuffers(page); | 1014 | return __set_page_dirty_nobuffers(page); |
| 1002 | } | 1015 | } |
| 1003 | 1016 | ||
| @@ -1129,11 +1142,11 @@ void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
| 1129 | root->fs_info->dirty_metadata_bytes); | 1142 | root->fs_info->dirty_metadata_bytes); |
| 1130 | } | 1143 | } |
| 1131 | spin_unlock(&root->fs_info->delalloc_lock); | 1144 | spin_unlock(&root->fs_info->delalloc_lock); |
| 1132 | } | ||
| 1133 | 1145 | ||
| 1134 | /* ugh, clear_extent_buffer_dirty needs to lock the page */ | 1146 | /* ugh, clear_extent_buffer_dirty needs to lock the page */ |
| 1135 | btrfs_set_lock_blocking(buf); | 1147 | btrfs_set_lock_blocking(buf); |
| 1136 | clear_extent_buffer_dirty(buf); | 1148 | clear_extent_buffer_dirty(buf); |
| 1149 | } | ||
| 1137 | } | 1150 | } |
| 1138 | } | 1151 | } |
| 1139 | 1152 | ||
| @@ -1193,7 +1206,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
| 1193 | root->root_key.objectid = objectid; | 1206 | root->root_key.objectid = objectid; |
| 1194 | root->anon_dev = 0; | 1207 | root->anon_dev = 0; |
| 1195 | 1208 | ||
| 1196 | spin_lock_init(&root->root_times_lock); | 1209 | spin_lock_init(&root->root_item_lock); |
| 1197 | } | 1210 | } |
| 1198 | 1211 | ||
| 1199 | static int __must_check find_and_setup_root(struct btrfs_root *tree_root, | 1212 | static int __must_check find_and_setup_root(struct btrfs_root *tree_root, |
| @@ -2131,6 +2144,11 @@ int open_ctree(struct super_block *sb, | |||
| 2131 | init_rwsem(&fs_info->extent_commit_sem); | 2144 | init_rwsem(&fs_info->extent_commit_sem); |
| 2132 | init_rwsem(&fs_info->cleanup_work_sem); | 2145 | init_rwsem(&fs_info->cleanup_work_sem); |
| 2133 | init_rwsem(&fs_info->subvol_sem); | 2146 | init_rwsem(&fs_info->subvol_sem); |
| 2147 | fs_info->dev_replace.lock_owner = 0; | ||
| 2148 | atomic_set(&fs_info->dev_replace.nesting_level, 0); | ||
| 2149 | mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount); | ||
| 2150 | mutex_init(&fs_info->dev_replace.lock_management_lock); | ||
| 2151 | mutex_init(&fs_info->dev_replace.lock); | ||
| 2134 | 2152 | ||
| 2135 | spin_lock_init(&fs_info->qgroup_lock); | 2153 | spin_lock_init(&fs_info->qgroup_lock); |
| 2136 | fs_info->qgroup_tree = RB_ROOT; | 2154 | fs_info->qgroup_tree = RB_ROOT; |
| @@ -2279,6 +2297,10 @@ int open_ctree(struct super_block *sb, | |||
| 2279 | fs_info->thread_pool_size, | 2297 | fs_info->thread_pool_size, |
| 2280 | &fs_info->generic_worker); | 2298 | &fs_info->generic_worker); |
| 2281 | 2299 | ||
| 2300 | btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc", | ||
| 2301 | fs_info->thread_pool_size, | ||
| 2302 | &fs_info->generic_worker); | ||
| 2303 | |||
| 2282 | btrfs_init_workers(&fs_info->submit_workers, "submit", | 2304 | btrfs_init_workers(&fs_info->submit_workers, "submit", |
| 2283 | min_t(u64, fs_devices->num_devices, | 2305 | min_t(u64, fs_devices->num_devices, |
| 2284 | fs_info->thread_pool_size), | 2306 | fs_info->thread_pool_size), |
| @@ -2350,6 +2372,7 @@ int open_ctree(struct super_block *sb, | |||
| 2350 | ret |= btrfs_start_workers(&fs_info->delayed_workers); | 2372 | ret |= btrfs_start_workers(&fs_info->delayed_workers); |
| 2351 | ret |= btrfs_start_workers(&fs_info->caching_workers); | 2373 | ret |= btrfs_start_workers(&fs_info->caching_workers); |
| 2352 | ret |= btrfs_start_workers(&fs_info->readahead_workers); | 2374 | ret |= btrfs_start_workers(&fs_info->readahead_workers); |
| 2375 | ret |= btrfs_start_workers(&fs_info->flush_workers); | ||
| 2353 | if (ret) { | 2376 | if (ret) { |
| 2354 | err = -ENOMEM; | 2377 | err = -ENOMEM; |
| 2355 | goto fail_sb_buffer; | 2378 | goto fail_sb_buffer; |
| @@ -2418,7 +2441,11 @@ int open_ctree(struct super_block *sb, | |||
| 2418 | goto fail_tree_roots; | 2441 | goto fail_tree_roots; |
| 2419 | } | 2442 | } |
| 2420 | 2443 | ||
| 2421 | btrfs_close_extra_devices(fs_devices); | 2444 | /* |
| 2445 | * keep the device that is marked to be the target device for the | ||
| 2446 | * dev_replace procedure | ||
| 2447 | */ | ||
| 2448 | btrfs_close_extra_devices(fs_info, fs_devices, 0); | ||
| 2422 | 2449 | ||
| 2423 | if (!fs_devices->latest_bdev) { | 2450 | if (!fs_devices->latest_bdev) { |
| 2424 | printk(KERN_CRIT "btrfs: failed to read devices on %s\n", | 2451 | printk(KERN_CRIT "btrfs: failed to read devices on %s\n", |
| @@ -2490,6 +2517,14 @@ retry_root_backup: | |||
| 2490 | goto fail_block_groups; | 2517 | goto fail_block_groups; |
| 2491 | } | 2518 | } |
| 2492 | 2519 | ||
| 2520 | ret = btrfs_init_dev_replace(fs_info); | ||
| 2521 | if (ret) { | ||
| 2522 | pr_err("btrfs: failed to init dev_replace: %d\n", ret); | ||
| 2523 | goto fail_block_groups; | ||
| 2524 | } | ||
| 2525 | |||
| 2526 | btrfs_close_extra_devices(fs_info, fs_devices, 1); | ||
| 2527 | |||
| 2493 | ret = btrfs_init_space_info(fs_info); | 2528 | ret = btrfs_init_space_info(fs_info); |
| 2494 | if (ret) { | 2529 | if (ret) { |
| 2495 | printk(KERN_ERR "Failed to initial space info: %d\n", ret); | 2530 | printk(KERN_ERR "Failed to initial space info: %d\n", ret); |
| @@ -2503,6 +2538,13 @@ retry_root_backup: | |||
| 2503 | } | 2538 | } |
| 2504 | fs_info->num_tolerated_disk_barrier_failures = | 2539 | fs_info->num_tolerated_disk_barrier_failures = |
| 2505 | btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); | 2540 | btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); |
| 2541 | if (fs_info->fs_devices->missing_devices > | ||
| 2542 | fs_info->num_tolerated_disk_barrier_failures && | ||
| 2543 | !(sb->s_flags & MS_RDONLY)) { | ||
| 2544 | printk(KERN_WARNING | ||
| 2545 | "Btrfs: too many missing devices, writeable mount is not allowed\n"); | ||
| 2546 | goto fail_block_groups; | ||
| 2547 | } | ||
| 2506 | 2548 | ||
| 2507 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, | 2549 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, |
| 2508 | "btrfs-cleaner"); | 2550 | "btrfs-cleaner"); |
| @@ -2631,6 +2673,13 @@ retry_root_backup: | |||
| 2631 | return ret; | 2673 | return ret; |
| 2632 | } | 2674 | } |
| 2633 | 2675 | ||
| 2676 | ret = btrfs_resume_dev_replace_async(fs_info); | ||
| 2677 | if (ret) { | ||
| 2678 | pr_warn("btrfs: failed to resume dev_replace\n"); | ||
| 2679 | close_ctree(tree_root); | ||
| 2680 | return ret; | ||
| 2681 | } | ||
| 2682 | |||
| 2634 | return 0; | 2683 | return 0; |
| 2635 | 2684 | ||
| 2636 | fail_qgroup: | 2685 | fail_qgroup: |
| @@ -2667,6 +2716,7 @@ fail_sb_buffer: | |||
| 2667 | btrfs_stop_workers(&fs_info->submit_workers); | 2716 | btrfs_stop_workers(&fs_info->submit_workers); |
| 2668 | btrfs_stop_workers(&fs_info->delayed_workers); | 2717 | btrfs_stop_workers(&fs_info->delayed_workers); |
| 2669 | btrfs_stop_workers(&fs_info->caching_workers); | 2718 | btrfs_stop_workers(&fs_info->caching_workers); |
| 2719 | btrfs_stop_workers(&fs_info->flush_workers); | ||
| 2670 | fail_alloc: | 2720 | fail_alloc: |
| 2671 | fail_iput: | 2721 | fail_iput: |
| 2672 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2722 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
| @@ -3270,16 +3320,18 @@ int close_ctree(struct btrfs_root *root) | |||
| 3270 | smp_mb(); | 3320 | smp_mb(); |
| 3271 | 3321 | ||
| 3272 | /* pause restriper - we want to resume on mount */ | 3322 | /* pause restriper - we want to resume on mount */ |
| 3273 | btrfs_pause_balance(root->fs_info); | 3323 | btrfs_pause_balance(fs_info); |
| 3324 | |||
| 3325 | btrfs_dev_replace_suspend_for_unmount(fs_info); | ||
| 3274 | 3326 | ||
| 3275 | btrfs_scrub_cancel(root); | 3327 | btrfs_scrub_cancel(fs_info); |
| 3276 | 3328 | ||
| 3277 | /* wait for any defraggers to finish */ | 3329 | /* wait for any defraggers to finish */ |
| 3278 | wait_event(fs_info->transaction_wait, | 3330 | wait_event(fs_info->transaction_wait, |
| 3279 | (atomic_read(&fs_info->defrag_running) == 0)); | 3331 | (atomic_read(&fs_info->defrag_running) == 0)); |
| 3280 | 3332 | ||
| 3281 | /* clear out the rbtree of defraggable inodes */ | 3333 | /* clear out the rbtree of defraggable inodes */ |
| 3282 | btrfs_run_defrag_inodes(fs_info); | 3334 | btrfs_cleanup_defrag_inodes(fs_info); |
| 3283 | 3335 | ||
| 3284 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 3336 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { |
| 3285 | ret = btrfs_commit_super(root); | 3337 | ret = btrfs_commit_super(root); |
| @@ -3339,6 +3391,7 @@ int close_ctree(struct btrfs_root *root) | |||
| 3339 | btrfs_stop_workers(&fs_info->delayed_workers); | 3391 | btrfs_stop_workers(&fs_info->delayed_workers); |
| 3340 | btrfs_stop_workers(&fs_info->caching_workers); | 3392 | btrfs_stop_workers(&fs_info->caching_workers); |
| 3341 | btrfs_stop_workers(&fs_info->readahead_workers); | 3393 | btrfs_stop_workers(&fs_info->readahead_workers); |
| 3394 | btrfs_stop_workers(&fs_info->flush_workers); | ||
| 3342 | 3395 | ||
| 3343 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | 3396 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY |
| 3344 | if (btrfs_test_opt(root, CHECK_INTEGRITY)) | 3397 | if (btrfs_test_opt(root, CHECK_INTEGRITY)) |
| @@ -3383,14 +3436,12 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
| 3383 | int was_dirty; | 3436 | int was_dirty; |
| 3384 | 3437 | ||
| 3385 | btrfs_assert_tree_locked(buf); | 3438 | btrfs_assert_tree_locked(buf); |
| 3386 | if (transid != root->fs_info->generation) { | 3439 | if (transid != root->fs_info->generation) |
| 3387 | printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " | 3440 | WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, " |
| 3388 | "found %llu running %llu\n", | 3441 | "found %llu running %llu\n", |
| 3389 | (unsigned long long)buf->start, | 3442 | (unsigned long long)buf->start, |
| 3390 | (unsigned long long)transid, | 3443 | (unsigned long long)transid, |
| 3391 | (unsigned long long)root->fs_info->generation); | 3444 | (unsigned long long)root->fs_info->generation); |
| 3392 | WARN_ON(1); | ||
| 3393 | } | ||
| 3394 | was_dirty = set_extent_buffer_dirty(buf); | 3445 | was_dirty = set_extent_buffer_dirty(buf); |
| 3395 | if (!was_dirty) { | 3446 | if (!was_dirty) { |
| 3396 | spin_lock(&root->fs_info->delalloc_lock); | 3447 | spin_lock(&root->fs_info->delalloc_lock); |
| @@ -3399,7 +3450,8 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
| 3399 | } | 3450 | } |
| 3400 | } | 3451 | } |
| 3401 | 3452 | ||
| 3402 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | 3453 | static void __btrfs_btree_balance_dirty(struct btrfs_root *root, |
| 3454 | int flush_delayed) | ||
| 3403 | { | 3455 | { |
| 3404 | /* | 3456 | /* |
| 3405 | * looks as though older kernels can get into trouble with | 3457 | * looks as though older kernels can get into trouble with |
| @@ -3411,36 +3463,26 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | |||
| 3411 | if (current->flags & PF_MEMALLOC) | 3463 | if (current->flags & PF_MEMALLOC) |
| 3412 | return; | 3464 | return; |
| 3413 | 3465 | ||
| 3414 | btrfs_balance_delayed_items(root); | 3466 | if (flush_delayed) |
| 3467 | btrfs_balance_delayed_items(root); | ||
| 3415 | 3468 | ||
| 3416 | num_dirty = root->fs_info->dirty_metadata_bytes; | 3469 | num_dirty = root->fs_info->dirty_metadata_bytes; |
| 3417 | 3470 | ||
| 3418 | if (num_dirty > thresh) { | 3471 | if (num_dirty > thresh) { |
| 3419 | balance_dirty_pages_ratelimited_nr( | 3472 | balance_dirty_pages_ratelimited( |
| 3420 | root->fs_info->btree_inode->i_mapping, 1); | 3473 | root->fs_info->btree_inode->i_mapping); |
| 3421 | } | 3474 | } |
| 3422 | return; | 3475 | return; |
| 3423 | } | 3476 | } |
| 3424 | 3477 | ||
| 3425 | void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | 3478 | void btrfs_btree_balance_dirty(struct btrfs_root *root) |
| 3426 | { | 3479 | { |
| 3427 | /* | 3480 | __btrfs_btree_balance_dirty(root, 1); |
| 3428 | * looks as though older kernels can get into trouble with | 3481 | } |
| 3429 | * this code, they end up stuck in balance_dirty_pages forever | ||
| 3430 | */ | ||
| 3431 | u64 num_dirty; | ||
| 3432 | unsigned long thresh = 32 * 1024 * 1024; | ||
| 3433 | |||
| 3434 | if (current->flags & PF_MEMALLOC) | ||
| 3435 | return; | ||
| 3436 | |||
| 3437 | num_dirty = root->fs_info->dirty_metadata_bytes; | ||
| 3438 | 3482 | ||
| 3439 | if (num_dirty > thresh) { | 3483 | void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root) |
| 3440 | balance_dirty_pages_ratelimited_nr( | 3484 | { |
| 3441 | root->fs_info->btree_inode->i_mapping, 1); | 3485 | __btrfs_btree_balance_dirty(root, 0); |
| 3442 | } | ||
| 3443 | return; | ||
| 3444 | } | 3486 | } |
| 3445 | 3487 | ||
| 3446 | int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) | 3488 | int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 2025a9132c16..305c33efb0e3 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
| @@ -62,8 +62,8 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
| 62 | struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, | 62 | struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, |
| 63 | struct btrfs_key *location); | 63 | struct btrfs_key *location); |
| 64 | int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info); | 64 | int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info); |
| 65 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); | 65 | void btrfs_btree_balance_dirty(struct btrfs_root *root); |
| 66 | void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); | 66 | void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root); |
| 67 | void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); | 67 | void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); |
| 68 | void btrfs_mark_buffer_dirty(struct extent_buffer *buf); | 68 | void btrfs_mark_buffer_dirty(struct extent_buffer *buf); |
| 69 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, | 69 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 3d3e2c17d8d1..5a3327b8f90d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -33,6 +33,7 @@ | |||
| 33 | #include "volumes.h" | 33 | #include "volumes.h" |
| 34 | #include "locking.h" | 34 | #include "locking.h" |
| 35 | #include "free-space-cache.h" | 35 | #include "free-space-cache.h" |
| 36 | #include "math.h" | ||
| 36 | 37 | ||
| 37 | #undef SCRAMBLE_DELAYED_REFS | 38 | #undef SCRAMBLE_DELAYED_REFS |
| 38 | 39 | ||
| @@ -649,24 +650,6 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info) | |||
| 649 | rcu_read_unlock(); | 650 | rcu_read_unlock(); |
| 650 | } | 651 | } |
| 651 | 652 | ||
| 652 | static u64 div_factor(u64 num, int factor) | ||
| 653 | { | ||
| 654 | if (factor == 10) | ||
| 655 | return num; | ||
| 656 | num *= factor; | ||
| 657 | do_div(num, 10); | ||
| 658 | return num; | ||
| 659 | } | ||
| 660 | |||
| 661 | static u64 div_factor_fine(u64 num, int factor) | ||
| 662 | { | ||
| 663 | if (factor == 100) | ||
| 664 | return num; | ||
| 665 | num *= factor; | ||
| 666 | do_div(num, 100); | ||
| 667 | return num; | ||
| 668 | } | ||
| 669 | |||
| 670 | u64 btrfs_find_block_group(struct btrfs_root *root, | 653 | u64 btrfs_find_block_group(struct btrfs_root *root, |
| 671 | u64 search_start, u64 search_hint, int owner) | 654 | u64 search_start, u64 search_hint, int owner) |
| 672 | { | 655 | { |
| @@ -1835,7 +1818,7 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
| 1835 | 1818 | ||
| 1836 | 1819 | ||
| 1837 | /* Tell the block device(s) that the sectors can be discarded */ | 1820 | /* Tell the block device(s) that the sectors can be discarded */ |
| 1838 | ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD, | 1821 | ret = btrfs_map_block(root->fs_info, REQ_DISCARD, |
| 1839 | bytenr, &num_bytes, &bbio, 0); | 1822 | bytenr, &num_bytes, &bbio, 0); |
| 1840 | /* Error condition is -ENOMEM */ | 1823 | /* Error condition is -ENOMEM */ |
| 1841 | if (!ret) { | 1824 | if (!ret) { |
| @@ -2314,6 +2297,9 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
| 2314 | kfree(extent_op); | 2297 | kfree(extent_op); |
| 2315 | 2298 | ||
| 2316 | if (ret) { | 2299 | if (ret) { |
| 2300 | list_del_init(&locked_ref->cluster); | ||
| 2301 | mutex_unlock(&locked_ref->mutex); | ||
| 2302 | |||
| 2317 | printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret); | 2303 | printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret); |
| 2318 | spin_lock(&delayed_refs->lock); | 2304 | spin_lock(&delayed_refs->lock); |
| 2319 | return ret; | 2305 | return ret; |
| @@ -2356,6 +2342,10 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
| 2356 | count++; | 2342 | count++; |
| 2357 | 2343 | ||
| 2358 | if (ret) { | 2344 | if (ret) { |
| 2345 | if (locked_ref) { | ||
| 2346 | list_del_init(&locked_ref->cluster); | ||
| 2347 | mutex_unlock(&locked_ref->mutex); | ||
| 2348 | } | ||
| 2359 | printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret); | 2349 | printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret); |
| 2360 | spin_lock(&delayed_refs->lock); | 2350 | spin_lock(&delayed_refs->lock); |
| 2361 | return ret; | 2351 | return ret; |
| @@ -3661,7 +3651,7 @@ out: | |||
| 3661 | 3651 | ||
| 3662 | static int can_overcommit(struct btrfs_root *root, | 3652 | static int can_overcommit(struct btrfs_root *root, |
| 3663 | struct btrfs_space_info *space_info, u64 bytes, | 3653 | struct btrfs_space_info *space_info, u64 bytes, |
| 3664 | int flush) | 3654 | enum btrfs_reserve_flush_enum flush) |
| 3665 | { | 3655 | { |
| 3666 | u64 profile = btrfs_get_alloc_profile(root, 0); | 3656 | u64 profile = btrfs_get_alloc_profile(root, 0); |
| 3667 | u64 avail; | 3657 | u64 avail; |
| @@ -3685,11 +3675,11 @@ static int can_overcommit(struct btrfs_root *root, | |||
| 3685 | avail >>= 1; | 3675 | avail >>= 1; |
| 3686 | 3676 | ||
| 3687 | /* | 3677 | /* |
| 3688 | * If we aren't flushing don't let us overcommit too much, say | 3678 | * If we aren't flushing all things, let us overcommit up to |
| 3689 | * 1/8th of the space. If we can flush, let it overcommit up to | 3679 | * 1/2th of the space. If we can flush, don't let us overcommit |
| 3690 | * 1/2 of the space. | 3680 | * too much, let it overcommit up to 1/8 of the space. |
| 3691 | */ | 3681 | */ |
| 3692 | if (flush) | 3682 | if (flush == BTRFS_RESERVE_FLUSH_ALL) |
| 3693 | avail >>= 3; | 3683 | avail >>= 3; |
| 3694 | else | 3684 | else |
| 3695 | avail >>= 1; | 3685 | avail >>= 1; |
| @@ -3699,6 +3689,20 @@ static int can_overcommit(struct btrfs_root *root, | |||
| 3699 | return 0; | 3689 | return 0; |
| 3700 | } | 3690 | } |
| 3701 | 3691 | ||
| 3692 | static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb, | ||
| 3693 | unsigned long nr_pages, | ||
| 3694 | enum wb_reason reason) | ||
| 3695 | { | ||
| 3696 | if (!writeback_in_progress(sb->s_bdi) && | ||
| 3697 | down_read_trylock(&sb->s_umount)) { | ||
| 3698 | writeback_inodes_sb_nr(sb, nr_pages, reason); | ||
| 3699 | up_read(&sb->s_umount); | ||
| 3700 | return 1; | ||
| 3701 | } | ||
| 3702 | |||
| 3703 | return 0; | ||
| 3704 | } | ||
| 3705 | |||
| 3702 | /* | 3706 | /* |
| 3703 | * shrink metadata reservation for delalloc | 3707 | * shrink metadata reservation for delalloc |
| 3704 | */ | 3708 | */ |
| @@ -3713,6 +3717,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
| 3713 | long time_left; | 3717 | long time_left; |
| 3714 | unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; | 3718 | unsigned long nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; |
| 3715 | int loops = 0; | 3719 | int loops = 0; |
| 3720 | enum btrfs_reserve_flush_enum flush; | ||
| 3716 | 3721 | ||
| 3717 | trans = (struct btrfs_trans_handle *)current->journal_info; | 3722 | trans = (struct btrfs_trans_handle *)current->journal_info; |
| 3718 | block_rsv = &root->fs_info->delalloc_block_rsv; | 3723 | block_rsv = &root->fs_info->delalloc_block_rsv; |
| @@ -3730,8 +3735,9 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
| 3730 | while (delalloc_bytes && loops < 3) { | 3735 | while (delalloc_bytes && loops < 3) { |
| 3731 | max_reclaim = min(delalloc_bytes, to_reclaim); | 3736 | max_reclaim = min(delalloc_bytes, to_reclaim); |
| 3732 | nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; | 3737 | nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; |
| 3733 | writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages, | 3738 | writeback_inodes_sb_nr_if_idle_safe(root->fs_info->sb, |
| 3734 | WB_REASON_FS_FREE_SPACE); | 3739 | nr_pages, |
| 3740 | WB_REASON_FS_FREE_SPACE); | ||
| 3735 | 3741 | ||
| 3736 | /* | 3742 | /* |
| 3737 | * We need to wait for the async pages to actually start before | 3743 | * We need to wait for the async pages to actually start before |
| @@ -3740,8 +3746,12 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
| 3740 | wait_event(root->fs_info->async_submit_wait, | 3746 | wait_event(root->fs_info->async_submit_wait, |
| 3741 | !atomic_read(&root->fs_info->async_delalloc_pages)); | 3747 | !atomic_read(&root->fs_info->async_delalloc_pages)); |
| 3742 | 3748 | ||
| 3749 | if (!trans) | ||
| 3750 | flush = BTRFS_RESERVE_FLUSH_ALL; | ||
| 3751 | else | ||
| 3752 | flush = BTRFS_RESERVE_NO_FLUSH; | ||
| 3743 | spin_lock(&space_info->lock); | 3753 | spin_lock(&space_info->lock); |
| 3744 | if (can_overcommit(root, space_info, orig, !trans)) { | 3754 | if (can_overcommit(root, space_info, orig, flush)) { |
| 3745 | spin_unlock(&space_info->lock); | 3755 | spin_unlock(&space_info->lock); |
| 3746 | break; | 3756 | break; |
| 3747 | } | 3757 | } |
| @@ -3888,7 +3898,7 @@ static int flush_space(struct btrfs_root *root, | |||
| 3888 | * @root - the root we're allocating for | 3898 | * @root - the root we're allocating for |
| 3889 | * @block_rsv - the block_rsv we're allocating for | 3899 | * @block_rsv - the block_rsv we're allocating for |
| 3890 | * @orig_bytes - the number of bytes we want | 3900 | * @orig_bytes - the number of bytes we want |
| 3891 | * @flush - wether or not we can flush to make our reservation | 3901 | * @flush - whether or not we can flush to make our reservation |
| 3892 | * | 3902 | * |
| 3893 | * This will reserve orgi_bytes number of bytes from the space info associated | 3903 | * This will reserve orgi_bytes number of bytes from the space info associated |
| 3894 | * with the block_rsv. If there is not enough space it will make an attempt to | 3904 | * with the block_rsv. If there is not enough space it will make an attempt to |
| @@ -3899,7 +3909,8 @@ static int flush_space(struct btrfs_root *root, | |||
| 3899 | */ | 3909 | */ |
| 3900 | static int reserve_metadata_bytes(struct btrfs_root *root, | 3910 | static int reserve_metadata_bytes(struct btrfs_root *root, |
| 3901 | struct btrfs_block_rsv *block_rsv, | 3911 | struct btrfs_block_rsv *block_rsv, |
| 3902 | u64 orig_bytes, int flush) | 3912 | u64 orig_bytes, |
| 3913 | enum btrfs_reserve_flush_enum flush) | ||
| 3903 | { | 3914 | { |
| 3904 | struct btrfs_space_info *space_info = block_rsv->space_info; | 3915 | struct btrfs_space_info *space_info = block_rsv->space_info; |
| 3905 | u64 used; | 3916 | u64 used; |
| @@ -3912,10 +3923,11 @@ again: | |||
| 3912 | ret = 0; | 3923 | ret = 0; |
| 3913 | spin_lock(&space_info->lock); | 3924 | spin_lock(&space_info->lock); |
| 3914 | /* | 3925 | /* |
| 3915 | * We only want to wait if somebody other than us is flushing and we are | 3926 | * We only want to wait if somebody other than us is flushing and we |
| 3916 | * actually alloed to flush. | 3927 | * are actually allowed to flush all things. |
| 3917 | */ | 3928 | */ |
| 3918 | while (flush && !flushing && space_info->flush) { | 3929 | while (flush == BTRFS_RESERVE_FLUSH_ALL && !flushing && |
| 3930 | space_info->flush) { | ||
| 3919 | spin_unlock(&space_info->lock); | 3931 | spin_unlock(&space_info->lock); |
| 3920 | /* | 3932 | /* |
| 3921 | * If we have a trans handle we can't wait because the flusher | 3933 | * If we have a trans handle we can't wait because the flusher |
| @@ -3981,23 +3993,40 @@ again: | |||
| 3981 | * Couldn't make our reservation, save our place so while we're trying | 3993 | * Couldn't make our reservation, save our place so while we're trying |
| 3982 | * to reclaim space we can actually use it instead of somebody else | 3994 | * to reclaim space we can actually use it instead of somebody else |
| 3983 | * stealing it from us. | 3995 | * stealing it from us. |
| 3996 | * | ||
| 3997 | * We make the other tasks wait for the flush only when we can flush | ||
| 3998 | * all things. | ||
| 3984 | */ | 3999 | */ |
| 3985 | if (ret && flush) { | 4000 | if (ret && flush != BTRFS_RESERVE_NO_FLUSH) { |
| 3986 | flushing = true; | 4001 | flushing = true; |
| 3987 | space_info->flush = 1; | 4002 | space_info->flush = 1; |
| 3988 | } | 4003 | } |
| 3989 | 4004 | ||
| 3990 | spin_unlock(&space_info->lock); | 4005 | spin_unlock(&space_info->lock); |
| 3991 | 4006 | ||
| 3992 | if (!ret || !flush) | 4007 | if (!ret || flush == BTRFS_RESERVE_NO_FLUSH) |
| 3993 | goto out; | 4008 | goto out; |
| 3994 | 4009 | ||
| 3995 | ret = flush_space(root, space_info, num_bytes, orig_bytes, | 4010 | ret = flush_space(root, space_info, num_bytes, orig_bytes, |
| 3996 | flush_state); | 4011 | flush_state); |
| 3997 | flush_state++; | 4012 | flush_state++; |
| 4013 | |||
| 4014 | /* | ||
| 4015 | * If we are FLUSH_LIMIT, we can not flush delalloc, or the deadlock | ||
| 4016 | * would happen. So skip delalloc flush. | ||
| 4017 | */ | ||
| 4018 | if (flush == BTRFS_RESERVE_FLUSH_LIMIT && | ||
| 4019 | (flush_state == FLUSH_DELALLOC || | ||
| 4020 | flush_state == FLUSH_DELALLOC_WAIT)) | ||
| 4021 | flush_state = ALLOC_CHUNK; | ||
| 4022 | |||
| 3998 | if (!ret) | 4023 | if (!ret) |
| 3999 | goto again; | 4024 | goto again; |
| 4000 | else if (flush_state <= COMMIT_TRANS) | 4025 | else if (flush == BTRFS_RESERVE_FLUSH_LIMIT && |
| 4026 | flush_state < COMMIT_TRANS) | ||
| 4027 | goto again; | ||
| 4028 | else if (flush == BTRFS_RESERVE_FLUSH_ALL && | ||
| 4029 | flush_state <= COMMIT_TRANS) | ||
| 4001 | goto again; | 4030 | goto again; |
| 4002 | 4031 | ||
| 4003 | out: | 4032 | out: |
| @@ -4148,9 +4177,9 @@ void btrfs_free_block_rsv(struct btrfs_root *root, | |||
| 4148 | kfree(rsv); | 4177 | kfree(rsv); |
| 4149 | } | 4178 | } |
| 4150 | 4179 | ||
| 4151 | static inline int __block_rsv_add(struct btrfs_root *root, | 4180 | int btrfs_block_rsv_add(struct btrfs_root *root, |
| 4152 | struct btrfs_block_rsv *block_rsv, | 4181 | struct btrfs_block_rsv *block_rsv, u64 num_bytes, |
| 4153 | u64 num_bytes, int flush) | 4182 | enum btrfs_reserve_flush_enum flush) |
| 4154 | { | 4183 | { |
| 4155 | int ret; | 4184 | int ret; |
| 4156 | 4185 | ||
| @@ -4166,20 +4195,6 @@ static inline int __block_rsv_add(struct btrfs_root *root, | |||
| 4166 | return ret; | 4195 | return ret; |
| 4167 | } | 4196 | } |
| 4168 | 4197 | ||
| 4169 | int btrfs_block_rsv_add(struct btrfs_root *root, | ||
| 4170 | struct btrfs_block_rsv *block_rsv, | ||
| 4171 | u64 num_bytes) | ||
| 4172 | { | ||
| 4173 | return __block_rsv_add(root, block_rsv, num_bytes, 1); | ||
| 4174 | } | ||
| 4175 | |||
| 4176 | int btrfs_block_rsv_add_noflush(struct btrfs_root *root, | ||
| 4177 | struct btrfs_block_rsv *block_rsv, | ||
| 4178 | u64 num_bytes) | ||
| 4179 | { | ||
| 4180 | return __block_rsv_add(root, block_rsv, num_bytes, 0); | ||
| 4181 | } | ||
| 4182 | |||
| 4183 | int btrfs_block_rsv_check(struct btrfs_root *root, | 4198 | int btrfs_block_rsv_check(struct btrfs_root *root, |
| 4184 | struct btrfs_block_rsv *block_rsv, int min_factor) | 4199 | struct btrfs_block_rsv *block_rsv, int min_factor) |
| 4185 | { | 4200 | { |
| @@ -4198,9 +4213,9 @@ int btrfs_block_rsv_check(struct btrfs_root *root, | |||
| 4198 | return ret; | 4213 | return ret; |
| 4199 | } | 4214 | } |
| 4200 | 4215 | ||
| 4201 | static inline int __btrfs_block_rsv_refill(struct btrfs_root *root, | 4216 | int btrfs_block_rsv_refill(struct btrfs_root *root, |
| 4202 | struct btrfs_block_rsv *block_rsv, | 4217 | struct btrfs_block_rsv *block_rsv, u64 min_reserved, |
| 4203 | u64 min_reserved, int flush) | 4218 | enum btrfs_reserve_flush_enum flush) |
| 4204 | { | 4219 | { |
| 4205 | u64 num_bytes = 0; | 4220 | u64 num_bytes = 0; |
| 4206 | int ret = -ENOSPC; | 4221 | int ret = -ENOSPC; |
| @@ -4228,20 +4243,6 @@ static inline int __btrfs_block_rsv_refill(struct btrfs_root *root, | |||
| 4228 | return ret; | 4243 | return ret; |
| 4229 | } | 4244 | } |
| 4230 | 4245 | ||
| 4231 | int btrfs_block_rsv_refill(struct btrfs_root *root, | ||
| 4232 | struct btrfs_block_rsv *block_rsv, | ||
| 4233 | u64 min_reserved) | ||
| 4234 | { | ||
| 4235 | return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 1); | ||
| 4236 | } | ||
| 4237 | |||
| 4238 | int btrfs_block_rsv_refill_noflush(struct btrfs_root *root, | ||
| 4239 | struct btrfs_block_rsv *block_rsv, | ||
| 4240 | u64 min_reserved) | ||
| 4241 | { | ||
| 4242 | return __btrfs_block_rsv_refill(root, block_rsv, min_reserved, 0); | ||
| 4243 | } | ||
| 4244 | |||
| 4245 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | 4246 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, |
| 4246 | struct btrfs_block_rsv *dst_rsv, | 4247 | struct btrfs_block_rsv *dst_rsv, |
| 4247 | u64 num_bytes) | 4248 | u64 num_bytes) |
| @@ -4532,17 +4533,27 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
| 4532 | u64 csum_bytes; | 4533 | u64 csum_bytes; |
| 4533 | unsigned nr_extents = 0; | 4534 | unsigned nr_extents = 0; |
| 4534 | int extra_reserve = 0; | 4535 | int extra_reserve = 0; |
| 4535 | int flush = 1; | 4536 | enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; |
| 4536 | int ret; | 4537 | int ret = 0; |
| 4538 | bool delalloc_lock = true; | ||
| 4537 | 4539 | ||
| 4538 | /* Need to be holding the i_mutex here if we aren't free space cache */ | 4540 | /* If we are a free space inode we need to not flush since we will be in |
| 4539 | if (btrfs_is_free_space_inode(inode)) | 4541 | * the middle of a transaction commit. We also don't need the delalloc |
| 4540 | flush = 0; | 4542 | * mutex since we won't race with anybody. We need this mostly to make |
| 4543 | * lockdep shut its filthy mouth. | ||
| 4544 | */ | ||
| 4545 | if (btrfs_is_free_space_inode(inode)) { | ||
| 4546 | flush = BTRFS_RESERVE_NO_FLUSH; | ||
| 4547 | delalloc_lock = false; | ||
| 4548 | } | ||
| 4541 | 4549 | ||
| 4542 | if (flush && btrfs_transaction_in_commit(root->fs_info)) | 4550 | if (flush != BTRFS_RESERVE_NO_FLUSH && |
| 4551 | btrfs_transaction_in_commit(root->fs_info)) | ||
| 4543 | schedule_timeout(1); | 4552 | schedule_timeout(1); |
| 4544 | 4553 | ||
| 4545 | mutex_lock(&BTRFS_I(inode)->delalloc_mutex); | 4554 | if (delalloc_lock) |
| 4555 | mutex_lock(&BTRFS_I(inode)->delalloc_mutex); | ||
| 4556 | |||
| 4546 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 4557 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
| 4547 | 4558 | ||
| 4548 | spin_lock(&BTRFS_I(inode)->lock); | 4559 | spin_lock(&BTRFS_I(inode)->lock); |
| @@ -4568,16 +4579,18 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
| 4568 | csum_bytes = BTRFS_I(inode)->csum_bytes; | 4579 | csum_bytes = BTRFS_I(inode)->csum_bytes; |
| 4569 | spin_unlock(&BTRFS_I(inode)->lock); | 4580 | spin_unlock(&BTRFS_I(inode)->lock); |
| 4570 | 4581 | ||
| 4571 | if (root->fs_info->quota_enabled) { | 4582 | if (root->fs_info->quota_enabled) |
| 4572 | ret = btrfs_qgroup_reserve(root, num_bytes + | 4583 | ret = btrfs_qgroup_reserve(root, num_bytes + |
| 4573 | nr_extents * root->leafsize); | 4584 | nr_extents * root->leafsize); |
| 4574 | if (ret) { | ||
| 4575 | mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); | ||
| 4576 | return ret; | ||
| 4577 | } | ||
| 4578 | } | ||
| 4579 | 4585 | ||
| 4580 | ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); | 4586 | /* |
| 4587 | * ret != 0 here means the qgroup reservation failed, we go straight to | ||
| 4588 | * the shared error handling then. | ||
| 4589 | */ | ||
| 4590 | if (ret == 0) | ||
| 4591 | ret = reserve_metadata_bytes(root, block_rsv, | ||
| 4592 | to_reserve, flush); | ||
| 4593 | |||
| 4581 | if (ret) { | 4594 | if (ret) { |
| 4582 | u64 to_free = 0; | 4595 | u64 to_free = 0; |
| 4583 | unsigned dropped; | 4596 | unsigned dropped; |
| @@ -4607,7 +4620,12 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
| 4607 | btrfs_ino(inode), | 4620 | btrfs_ino(inode), |
| 4608 | to_free, 0); | 4621 | to_free, 0); |
| 4609 | } | 4622 | } |
| 4610 | mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); | 4623 | if (root->fs_info->quota_enabled) { |
| 4624 | btrfs_qgroup_free(root, num_bytes + | ||
| 4625 | nr_extents * root->leafsize); | ||
| 4626 | } | ||
| 4627 | if (delalloc_lock) | ||
| 4628 | mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); | ||
| 4611 | return ret; | 4629 | return ret; |
| 4612 | } | 4630 | } |
| 4613 | 4631 | ||
| @@ -4619,7 +4637,9 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
| 4619 | } | 4637 | } |
| 4620 | BTRFS_I(inode)->reserved_extents += nr_extents; | 4638 | BTRFS_I(inode)->reserved_extents += nr_extents; |
| 4621 | spin_unlock(&BTRFS_I(inode)->lock); | 4639 | spin_unlock(&BTRFS_I(inode)->lock); |
| 4622 | mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); | 4640 | |
| 4641 | if (delalloc_lock) | ||
| 4642 | mutex_unlock(&BTRFS_I(inode)->delalloc_mutex); | ||
| 4623 | 4643 | ||
| 4624 | if (to_reserve) | 4644 | if (to_reserve) |
| 4625 | trace_btrfs_space_reservation(root->fs_info,"delalloc", | 4645 | trace_btrfs_space_reservation(root->fs_info,"delalloc", |
| @@ -4969,9 +4989,13 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) | |||
| 4969 | { | 4989 | { |
| 4970 | struct btrfs_fs_info *fs_info = root->fs_info; | 4990 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 4971 | struct btrfs_block_group_cache *cache = NULL; | 4991 | struct btrfs_block_group_cache *cache = NULL; |
| 4992 | struct btrfs_space_info *space_info; | ||
| 4993 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; | ||
| 4972 | u64 len; | 4994 | u64 len; |
| 4995 | bool readonly; | ||
| 4973 | 4996 | ||
| 4974 | while (start <= end) { | 4997 | while (start <= end) { |
| 4998 | readonly = false; | ||
| 4975 | if (!cache || | 4999 | if (!cache || |
| 4976 | start >= cache->key.objectid + cache->key.offset) { | 5000 | start >= cache->key.objectid + cache->key.offset) { |
| 4977 | if (cache) | 5001 | if (cache) |
| @@ -4989,15 +5013,30 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) | |||
| 4989 | } | 5013 | } |
| 4990 | 5014 | ||
| 4991 | start += len; | 5015 | start += len; |
| 5016 | space_info = cache->space_info; | ||
| 4992 | 5017 | ||
| 4993 | spin_lock(&cache->space_info->lock); | 5018 | spin_lock(&space_info->lock); |
| 4994 | spin_lock(&cache->lock); | 5019 | spin_lock(&cache->lock); |
| 4995 | cache->pinned -= len; | 5020 | cache->pinned -= len; |
| 4996 | cache->space_info->bytes_pinned -= len; | 5021 | space_info->bytes_pinned -= len; |
| 4997 | if (cache->ro) | 5022 | if (cache->ro) { |
| 4998 | cache->space_info->bytes_readonly += len; | 5023 | space_info->bytes_readonly += len; |
| 5024 | readonly = true; | ||
| 5025 | } | ||
| 4999 | spin_unlock(&cache->lock); | 5026 | spin_unlock(&cache->lock); |
| 5000 | spin_unlock(&cache->space_info->lock); | 5027 | if (!readonly && global_rsv->space_info == space_info) { |
| 5028 | spin_lock(&global_rsv->lock); | ||
| 5029 | if (!global_rsv->full) { | ||
| 5030 | len = min(len, global_rsv->size - | ||
| 5031 | global_rsv->reserved); | ||
| 5032 | global_rsv->reserved += len; | ||
| 5033 | space_info->bytes_may_use += len; | ||
| 5034 | if (global_rsv->reserved >= global_rsv->size) | ||
| 5035 | global_rsv->full = 1; | ||
| 5036 | } | ||
| 5037 | spin_unlock(&global_rsv->lock); | ||
| 5038 | } | ||
| 5039 | spin_unlock(&space_info->lock); | ||
| 5001 | } | 5040 | } |
| 5002 | 5041 | ||
| 5003 | if (cache) | 5042 | if (cache) |
| @@ -5466,7 +5505,7 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache) | |||
| 5466 | return 0; | 5505 | return 0; |
| 5467 | } | 5506 | } |
| 5468 | 5507 | ||
| 5469 | static int __get_block_group_index(u64 flags) | 5508 | int __get_raid_index(u64 flags) |
| 5470 | { | 5509 | { |
| 5471 | int index; | 5510 | int index; |
| 5472 | 5511 | ||
| @@ -5486,7 +5525,7 @@ static int __get_block_group_index(u64 flags) | |||
| 5486 | 5525 | ||
| 5487 | static int get_block_group_index(struct btrfs_block_group_cache *cache) | 5526 | static int get_block_group_index(struct btrfs_block_group_cache *cache) |
| 5488 | { | 5527 | { |
| 5489 | return __get_block_group_index(cache->flags); | 5528 | return __get_raid_index(cache->flags); |
| 5490 | } | 5529 | } |
| 5491 | 5530 | ||
| 5492 | enum btrfs_loop_type { | 5531 | enum btrfs_loop_type { |
| @@ -5519,7 +5558,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
| 5519 | int empty_cluster = 2 * 1024 * 1024; | 5558 | int empty_cluster = 2 * 1024 * 1024; |
| 5520 | struct btrfs_space_info *space_info; | 5559 | struct btrfs_space_info *space_info; |
| 5521 | int loop = 0; | 5560 | int loop = 0; |
| 5522 | int index = 0; | 5561 | int index = __get_raid_index(data); |
| 5523 | int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ? | 5562 | int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ? |
| 5524 | RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC; | 5563 | RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC; |
| 5525 | bool found_uncached_bg = false; | 5564 | bool found_uncached_bg = false; |
| @@ -6269,7 +6308,8 @@ use_block_rsv(struct btrfs_trans_handle *trans, | |||
| 6269 | block_rsv = get_block_rsv(trans, root); | 6308 | block_rsv = get_block_rsv(trans, root); |
| 6270 | 6309 | ||
| 6271 | if (block_rsv->size == 0) { | 6310 | if (block_rsv->size == 0) { |
| 6272 | ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0); | 6311 | ret = reserve_metadata_bytes(root, block_rsv, blocksize, |
| 6312 | BTRFS_RESERVE_NO_FLUSH); | ||
| 6273 | /* | 6313 | /* |
| 6274 | * If we couldn't reserve metadata bytes try and use some from | 6314 | * If we couldn't reserve metadata bytes try and use some from |
| 6275 | * the global reserve. | 6315 | * the global reserve. |
| @@ -6292,11 +6332,11 @@ use_block_rsv(struct btrfs_trans_handle *trans, | |||
| 6292 | static DEFINE_RATELIMIT_STATE(_rs, | 6332 | static DEFINE_RATELIMIT_STATE(_rs, |
| 6293 | DEFAULT_RATELIMIT_INTERVAL, | 6333 | DEFAULT_RATELIMIT_INTERVAL, |
| 6294 | /*DEFAULT_RATELIMIT_BURST*/ 2); | 6334 | /*DEFAULT_RATELIMIT_BURST*/ 2); |
| 6295 | if (__ratelimit(&_rs)) { | 6335 | if (__ratelimit(&_rs)) |
| 6296 | printk(KERN_DEBUG "btrfs: block rsv returned %d\n", ret); | 6336 | WARN(1, KERN_DEBUG "btrfs: block rsv returned %d\n", |
| 6297 | WARN_ON(1); | 6337 | ret); |
| 6298 | } | 6338 | ret = reserve_metadata_bytes(root, block_rsv, blocksize, |
| 6299 | ret = reserve_metadata_bytes(root, block_rsv, blocksize, 0); | 6339 | BTRFS_RESERVE_NO_FLUSH); |
| 6300 | if (!ret) { | 6340 | if (!ret) { |
| 6301 | return block_rsv; | 6341 | return block_rsv; |
| 6302 | } else if (ret && block_rsv != global_rsv) { | 6342 | } else if (ret && block_rsv != global_rsv) { |
| @@ -6746,11 +6786,13 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
| 6746 | &wc->flags[level]); | 6786 | &wc->flags[level]); |
| 6747 | if (ret < 0) { | 6787 | if (ret < 0) { |
| 6748 | btrfs_tree_unlock_rw(eb, path->locks[level]); | 6788 | btrfs_tree_unlock_rw(eb, path->locks[level]); |
| 6789 | path->locks[level] = 0; | ||
| 6749 | return ret; | 6790 | return ret; |
| 6750 | } | 6791 | } |
| 6751 | BUG_ON(wc->refs[level] == 0); | 6792 | BUG_ON(wc->refs[level] == 0); |
| 6752 | if (wc->refs[level] == 1) { | 6793 | if (wc->refs[level] == 1) { |
| 6753 | btrfs_tree_unlock_rw(eb, path->locks[level]); | 6794 | btrfs_tree_unlock_rw(eb, path->locks[level]); |
| 6795 | path->locks[level] = 0; | ||
| 6754 | return 1; | 6796 | return 1; |
| 6755 | } | 6797 | } |
| 6756 | } | 6798 | } |
| @@ -7427,7 +7469,7 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
| 7427 | */ | 7469 | */ |
| 7428 | target = get_restripe_target(root->fs_info, block_group->flags); | 7470 | target = get_restripe_target(root->fs_info, block_group->flags); |
| 7429 | if (target) { | 7471 | if (target) { |
| 7430 | index = __get_block_group_index(extended_to_chunk(target)); | 7472 | index = __get_raid_index(extended_to_chunk(target)); |
| 7431 | } else { | 7473 | } else { |
| 7432 | /* | 7474 | /* |
| 7433 | * this is just a balance, so if we were marked as full | 7475 | * this is just a balance, so if we were marked as full |
| @@ -7461,7 +7503,8 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
| 7461 | * check to make sure we can actually find a chunk with enough | 7503 | * check to make sure we can actually find a chunk with enough |
| 7462 | * space to fit our block group in. | 7504 | * space to fit our block group in. |
| 7463 | */ | 7505 | */ |
| 7464 | if (device->total_bytes > device->bytes_used + min_free) { | 7506 | if (device->total_bytes > device->bytes_used + min_free && |
| 7507 | !device->is_tgtdev_for_dev_replace) { | ||
| 7465 | ret = find_free_dev_extent(device, min_free, | 7508 | ret = find_free_dev_extent(device, min_free, |
| 7466 | &dev_offset, NULL); | 7509 | &dev_offset, NULL); |
| 7467 | if (!ret) | 7510 | if (!ret) |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 472873a94d96..1b319df29eee 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -341,12 +341,10 @@ static int insert_state(struct extent_io_tree *tree, | |||
| 341 | { | 341 | { |
| 342 | struct rb_node *node; | 342 | struct rb_node *node; |
| 343 | 343 | ||
| 344 | if (end < start) { | 344 | if (end < start) |
| 345 | printk(KERN_ERR "btrfs end < start %llu %llu\n", | 345 | WARN(1, KERN_ERR "btrfs end < start %llu %llu\n", |
| 346 | (unsigned long long)end, | 346 | (unsigned long long)end, |
| 347 | (unsigned long long)start); | 347 | (unsigned long long)start); |
| 348 | WARN_ON(1); | ||
| 349 | } | ||
| 350 | state->start = start; | 348 | state->start = start; |
| 351 | state->end = end; | 349 | state->end = end; |
| 352 | 350 | ||
| @@ -1919,12 +1917,12 @@ static void repair_io_failure_callback(struct bio *bio, int err) | |||
| 1919 | * the standard behavior is to write all copies in a raid setup. here we only | 1917 | * the standard behavior is to write all copies in a raid setup. here we only |
| 1920 | * want to write the one bad copy. so we do the mapping for ourselves and issue | 1918 | * want to write the one bad copy. so we do the mapping for ourselves and issue |
| 1921 | * submit_bio directly. | 1919 | * submit_bio directly. |
| 1922 | * to avoid any synchonization issues, wait for the data after writing, which | 1920 | * to avoid any synchronization issues, wait for the data after writing, which |
| 1923 | * actually prevents the read that triggered the error from finishing. | 1921 | * actually prevents the read that triggered the error from finishing. |
| 1924 | * currently, there can be no more than two copies of every data bit. thus, | 1922 | * currently, there can be no more than two copies of every data bit. thus, |
| 1925 | * exactly one rewrite is required. | 1923 | * exactly one rewrite is required. |
| 1926 | */ | 1924 | */ |
| 1927 | int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, | 1925 | int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, |
| 1928 | u64 length, u64 logical, struct page *page, | 1926 | u64 length, u64 logical, struct page *page, |
| 1929 | int mirror_num) | 1927 | int mirror_num) |
| 1930 | { | 1928 | { |
| @@ -1946,7 +1944,7 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, | |||
| 1946 | bio->bi_size = 0; | 1944 | bio->bi_size = 0; |
| 1947 | map_length = length; | 1945 | map_length = length; |
| 1948 | 1946 | ||
| 1949 | ret = btrfs_map_block(map_tree, WRITE, logical, | 1947 | ret = btrfs_map_block(fs_info, WRITE, logical, |
| 1950 | &map_length, &bbio, mirror_num); | 1948 | &map_length, &bbio, mirror_num); |
| 1951 | if (ret) { | 1949 | if (ret) { |
| 1952 | bio_put(bio); | 1950 | bio_put(bio); |
| @@ -1984,14 +1982,13 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, | |||
| 1984 | int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, | 1982 | int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, |
| 1985 | int mirror_num) | 1983 | int mirror_num) |
| 1986 | { | 1984 | { |
| 1987 | struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; | ||
| 1988 | u64 start = eb->start; | 1985 | u64 start = eb->start; |
| 1989 | unsigned long i, num_pages = num_extent_pages(eb->start, eb->len); | 1986 | unsigned long i, num_pages = num_extent_pages(eb->start, eb->len); |
| 1990 | int ret = 0; | 1987 | int ret = 0; |
| 1991 | 1988 | ||
| 1992 | for (i = 0; i < num_pages; i++) { | 1989 | for (i = 0; i < num_pages; i++) { |
| 1993 | struct page *p = extent_buffer_page(eb, i); | 1990 | struct page *p = extent_buffer_page(eb, i); |
| 1994 | ret = repair_io_failure(map_tree, start, PAGE_CACHE_SIZE, | 1991 | ret = repair_io_failure(root->fs_info, start, PAGE_CACHE_SIZE, |
| 1995 | start, p, mirror_num); | 1992 | start, p, mirror_num); |
| 1996 | if (ret) | 1993 | if (ret) |
| 1997 | break; | 1994 | break; |
| @@ -2010,7 +2007,7 @@ static int clean_io_failure(u64 start, struct page *page) | |||
| 2010 | u64 private; | 2007 | u64 private; |
| 2011 | u64 private_failure; | 2008 | u64 private_failure; |
| 2012 | struct io_failure_record *failrec; | 2009 | struct io_failure_record *failrec; |
| 2013 | struct btrfs_mapping_tree *map_tree; | 2010 | struct btrfs_fs_info *fs_info; |
| 2014 | struct extent_state *state; | 2011 | struct extent_state *state; |
| 2015 | int num_copies; | 2012 | int num_copies; |
| 2016 | int did_repair = 0; | 2013 | int did_repair = 0; |
| @@ -2046,11 +2043,11 @@ static int clean_io_failure(u64 start, struct page *page) | |||
| 2046 | spin_unlock(&BTRFS_I(inode)->io_tree.lock); | 2043 | spin_unlock(&BTRFS_I(inode)->io_tree.lock); |
| 2047 | 2044 | ||
| 2048 | if (state && state->start == failrec->start) { | 2045 | if (state && state->start == failrec->start) { |
| 2049 | map_tree = &BTRFS_I(inode)->root->fs_info->mapping_tree; | 2046 | fs_info = BTRFS_I(inode)->root->fs_info; |
| 2050 | num_copies = btrfs_num_copies(map_tree, failrec->logical, | 2047 | num_copies = btrfs_num_copies(fs_info, failrec->logical, |
| 2051 | failrec->len); | 2048 | failrec->len); |
| 2052 | if (num_copies > 1) { | 2049 | if (num_copies > 1) { |
| 2053 | ret = repair_io_failure(map_tree, start, failrec->len, | 2050 | ret = repair_io_failure(fs_info, start, failrec->len, |
| 2054 | failrec->logical, page, | 2051 | failrec->logical, page, |
| 2055 | failrec->failed_mirror); | 2052 | failrec->failed_mirror); |
| 2056 | did_repair = !ret; | 2053 | did_repair = !ret; |
| @@ -2159,9 +2156,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, | |||
| 2159 | * clean_io_failure() clean all those errors at once. | 2156 | * clean_io_failure() clean all those errors at once. |
| 2160 | */ | 2157 | */ |
| 2161 | } | 2158 | } |
| 2162 | num_copies = btrfs_num_copies( | 2159 | num_copies = btrfs_num_copies(BTRFS_I(inode)->root->fs_info, |
| 2163 | &BTRFS_I(inode)->root->fs_info->mapping_tree, | 2160 | failrec->logical, failrec->len); |
| 2164 | failrec->logical, failrec->len); | ||
| 2165 | if (num_copies == 1) { | 2161 | if (num_copies == 1) { |
| 2166 | /* | 2162 | /* |
| 2167 | * we only have a single copy of the data, so don't bother with | 2163 | * we only have a single copy of the data, so don't bother with |
| @@ -2466,10 +2462,6 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, | |||
| 2466 | return bio; | 2462 | return bio; |
| 2467 | } | 2463 | } |
| 2468 | 2464 | ||
| 2469 | /* | ||
| 2470 | * Since writes are async, they will only return -ENOMEM. | ||
| 2471 | * Reads can return the full range of I/O error conditions. | ||
| 2472 | */ | ||
| 2473 | static int __must_check submit_one_bio(int rw, struct bio *bio, | 2465 | static int __must_check submit_one_bio(int rw, struct bio *bio, |
| 2474 | int mirror_num, unsigned long bio_flags) | 2466 | int mirror_num, unsigned long bio_flags) |
| 2475 | { | 2467 | { |
| @@ -4721,10 +4713,9 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, | |||
| 4721 | } | 4713 | } |
| 4722 | 4714 | ||
| 4723 | if (start + min_len > eb->len) { | 4715 | if (start + min_len > eb->len) { |
| 4724 | printk(KERN_ERR "btrfs bad mapping eb start %llu len %lu, " | 4716 | WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, " |
| 4725 | "wanted %lu %lu\n", (unsigned long long)eb->start, | 4717 | "wanted %lu %lu\n", (unsigned long long)eb->start, |
| 4726 | eb->len, start, min_len); | 4718 | eb->len, start, min_len); |
| 4727 | WARN_ON(1); | ||
| 4728 | return -EINVAL; | 4719 | return -EINVAL; |
| 4729 | } | 4720 | } |
| 4730 | 4721 | ||
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 711d12b80028..2eacfabd3263 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
| @@ -337,9 +337,9 @@ struct bio * | |||
| 337 | btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, | 337 | btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs, |
| 338 | gfp_t gfp_flags); | 338 | gfp_t gfp_flags); |
| 339 | 339 | ||
| 340 | struct btrfs_mapping_tree; | 340 | struct btrfs_fs_info; |
| 341 | 341 | ||
| 342 | int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, | 342 | int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, |
| 343 | u64 length, u64 logical, struct page *page, | 343 | u64 length, u64 logical, struct page *page, |
| 344 | int mirror_num); | 344 | int mirror_num); |
| 345 | int end_extent_writepage(struct page *page, int err, u64 start, u64 end); | 345 | int end_extent_writepage(struct page *page, int err, u64 start, u64 end); |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index b8cbc8d5c7f7..fdb7a8db3b57 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
| @@ -49,7 +49,7 @@ void extent_map_tree_init(struct extent_map_tree *tree) | |||
| 49 | struct extent_map *alloc_extent_map(void) | 49 | struct extent_map *alloc_extent_map(void) |
| 50 | { | 50 | { |
| 51 | struct extent_map *em; | 51 | struct extent_map *em; |
| 52 | em = kmem_cache_alloc(extent_map_cache, GFP_NOFS); | 52 | em = kmem_cache_zalloc(extent_map_cache, GFP_NOFS); |
| 53 | if (!em) | 53 | if (!em) |
| 54 | return NULL; | 54 | return NULL; |
| 55 | em->in_tree = 0; | 55 | em->in_tree = 0; |
| @@ -171,6 +171,10 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next) | |||
| 171 | if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags)) | 171 | if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags)) |
| 172 | return 0; | 172 | return 0; |
| 173 | 173 | ||
| 174 | if (test_bit(EXTENT_FLAG_LOGGING, &prev->flags) || | ||
| 175 | test_bit(EXTENT_FLAG_LOGGING, &next->flags)) | ||
| 176 | return 0; | ||
| 177 | |||
| 174 | if (extent_map_end(prev) == next->start && | 178 | if (extent_map_end(prev) == next->start && |
| 175 | prev->flags == next->flags && | 179 | prev->flags == next->flags && |
| 176 | prev->bdev == next->bdev && | 180 | prev->bdev == next->bdev && |
| @@ -198,16 +202,15 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) | |||
| 198 | merge = rb_entry(rb, struct extent_map, rb_node); | 202 | merge = rb_entry(rb, struct extent_map, rb_node); |
| 199 | if (rb && mergable_maps(merge, em)) { | 203 | if (rb && mergable_maps(merge, em)) { |
| 200 | em->start = merge->start; | 204 | em->start = merge->start; |
| 205 | em->orig_start = merge->orig_start; | ||
| 201 | em->len += merge->len; | 206 | em->len += merge->len; |
| 202 | em->block_len += merge->block_len; | 207 | em->block_len += merge->block_len; |
| 203 | em->block_start = merge->block_start; | 208 | em->block_start = merge->block_start; |
| 204 | merge->in_tree = 0; | 209 | merge->in_tree = 0; |
| 205 | if (merge->generation > em->generation) { | 210 | em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start; |
| 206 | em->mod_start = em->start; | 211 | em->mod_start = merge->mod_start; |
| 207 | em->mod_len = em->len; | 212 | em->generation = max(em->generation, merge->generation); |
| 208 | em->generation = merge->generation; | 213 | list_move(&em->list, &tree->modified_extents); |
| 209 | list_move(&em->list, &tree->modified_extents); | ||
| 210 | } | ||
| 211 | 214 | ||
| 212 | list_del_init(&merge->list); | 215 | list_del_init(&merge->list); |
| 213 | rb_erase(&merge->rb_node, &tree->map); | 216 | rb_erase(&merge->rb_node, &tree->map); |
| @@ -223,23 +226,19 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) | |||
| 223 | em->block_len += merge->len; | 226 | em->block_len += merge->len; |
| 224 | rb_erase(&merge->rb_node, &tree->map); | 227 | rb_erase(&merge->rb_node, &tree->map); |
| 225 | merge->in_tree = 0; | 228 | merge->in_tree = 0; |
| 226 | if (merge->generation > em->generation) { | 229 | em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start; |
| 227 | em->mod_len = em->len; | 230 | em->generation = max(em->generation, merge->generation); |
| 228 | em->generation = merge->generation; | ||
| 229 | list_move(&em->list, &tree->modified_extents); | ||
| 230 | } | ||
| 231 | list_del_init(&merge->list); | 231 | list_del_init(&merge->list); |
| 232 | free_extent_map(merge); | 232 | free_extent_map(merge); |
| 233 | } | 233 | } |
| 234 | } | 234 | } |
| 235 | 235 | ||
| 236 | /** | 236 | /** |
| 237 | * unpint_extent_cache - unpin an extent from the cache | 237 | * unpin_extent_cache - unpin an extent from the cache |
| 238 | * @tree: tree to unpin the extent in | 238 | * @tree: tree to unpin the extent in |
| 239 | * @start: logical offset in the file | 239 | * @start: logical offset in the file |
| 240 | * @len: length of the extent | 240 | * @len: length of the extent |
| 241 | * @gen: generation that this extent has been modified in | 241 | * @gen: generation that this extent has been modified in |
| 242 | * @prealloc: if this is set we need to clear the prealloc flag | ||
| 243 | * | 242 | * |
| 244 | * Called after an extent has been written to disk properly. Set the generation | 243 | * Called after an extent has been written to disk properly. Set the generation |
| 245 | * to the generation that actually added the file item to the inode so we know | 244 | * to the generation that actually added the file item to the inode so we know |
| @@ -260,15 +259,16 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, | |||
| 260 | if (!em) | 259 | if (!em) |
| 261 | goto out; | 260 | goto out; |
| 262 | 261 | ||
| 263 | list_move(&em->list, &tree->modified_extents); | 262 | if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags)) |
| 263 | list_move(&em->list, &tree->modified_extents); | ||
| 264 | em->generation = gen; | 264 | em->generation = gen; |
| 265 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | 265 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); |
| 266 | em->mod_start = em->start; | 266 | em->mod_start = em->start; |
| 267 | em->mod_len = em->len; | 267 | em->mod_len = em->len; |
| 268 | 268 | ||
| 269 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { | 269 | if (test_bit(EXTENT_FLAG_FILLING, &em->flags)) { |
| 270 | prealloc = true; | 270 | prealloc = true; |
| 271 | clear_bit(EXTENT_FLAG_PREALLOC, &em->flags); | 271 | clear_bit(EXTENT_FLAG_FILLING, &em->flags); |
| 272 | } | 272 | } |
| 273 | 273 | ||
| 274 | try_merge_map(tree, em); | 274 | try_merge_map(tree, em); |
| @@ -285,6 +285,13 @@ out: | |||
| 285 | 285 | ||
| 286 | } | 286 | } |
| 287 | 287 | ||
| 288 | void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em) | ||
| 289 | { | ||
| 290 | clear_bit(EXTENT_FLAG_LOGGING, &em->flags); | ||
| 291 | if (em->in_tree) | ||
| 292 | try_merge_map(tree, em); | ||
| 293 | } | ||
| 294 | |||
| 288 | /** | 295 | /** |
| 289 | * add_extent_mapping - add new extent map to the extent tree | 296 | * add_extent_mapping - add new extent map to the extent tree |
| 290 | * @tree: tree to insert new map in | 297 | * @tree: tree to insert new map in |
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 679225555f7b..c6598c89cff8 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #define EXTENT_FLAG_VACANCY 2 /* no file extent item found */ | 14 | #define EXTENT_FLAG_VACANCY 2 /* no file extent item found */ |
| 15 | #define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */ | 15 | #define EXTENT_FLAG_PREALLOC 3 /* pre-allocated extent */ |
| 16 | #define EXTENT_FLAG_LOGGING 4 /* Logging this extent */ | 16 | #define EXTENT_FLAG_LOGGING 4 /* Logging this extent */ |
| 17 | #define EXTENT_FLAG_FILLING 5 /* Filling in a preallocated extent */ | ||
| 17 | 18 | ||
| 18 | struct extent_map { | 19 | struct extent_map { |
| 19 | struct rb_node rb_node; | 20 | struct rb_node rb_node; |
| @@ -24,6 +25,7 @@ struct extent_map { | |||
| 24 | u64 mod_start; | 25 | u64 mod_start; |
| 25 | u64 mod_len; | 26 | u64 mod_len; |
| 26 | u64 orig_start; | 27 | u64 orig_start; |
| 28 | u64 orig_block_len; | ||
| 27 | u64 block_start; | 29 | u64 block_start; |
| 28 | u64 block_len; | 30 | u64 block_len; |
| 29 | u64 generation; | 31 | u64 generation; |
| @@ -67,6 +69,7 @@ void free_extent_map(struct extent_map *em); | |||
| 67 | int __init extent_map_init(void); | 69 | int __init extent_map_init(void); |
| 68 | void extent_map_exit(void); | 70 | void extent_map_exit(void); |
| 69 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen); | 71 | int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen); |
| 72 | void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em); | ||
| 70 | struct extent_map *search_extent_mapping(struct extent_map_tree *tree, | 73 | struct extent_map *search_extent_mapping(struct extent_map_tree *tree, |
| 71 | u64 start, u64 len); | 74 | u64 start, u64 len); |
| 72 | #endif | 75 | #endif |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 1ad08e4e4a15..94aa53b38721 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
| @@ -133,7 +133,6 @@ fail: | |||
| 133 | return ERR_PTR(ret); | 133 | return ERR_PTR(ret); |
| 134 | } | 134 | } |
| 135 | 135 | ||
| 136 | |||
| 137 | int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, | 136 | int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, |
| 138 | struct btrfs_root *root, | 137 | struct btrfs_root *root, |
| 139 | struct btrfs_path *path, u64 objectid, | 138 | struct btrfs_path *path, u64 objectid, |
| @@ -151,6 +150,26 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, | |||
| 151 | return ret; | 150 | return ret; |
| 152 | } | 151 | } |
| 153 | 152 | ||
| 153 | u64 btrfs_file_extent_length(struct btrfs_path *path) | ||
| 154 | { | ||
| 155 | int extent_type; | ||
| 156 | struct btrfs_file_extent_item *fi; | ||
| 157 | u64 len; | ||
| 158 | |||
| 159 | fi = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
| 160 | struct btrfs_file_extent_item); | ||
| 161 | extent_type = btrfs_file_extent_type(path->nodes[0], fi); | ||
| 162 | |||
| 163 | if (extent_type == BTRFS_FILE_EXTENT_REG || | ||
| 164 | extent_type == BTRFS_FILE_EXTENT_PREALLOC) | ||
| 165 | len = btrfs_file_extent_num_bytes(path->nodes[0], fi); | ||
| 166 | else if (extent_type == BTRFS_FILE_EXTENT_INLINE) | ||
| 167 | len = btrfs_file_extent_inline_len(path->nodes[0], fi); | ||
| 168 | else | ||
| 169 | BUG(); | ||
| 170 | |||
| 171 | return len; | ||
| 172 | } | ||
| 154 | 173 | ||
| 155 | static int __btrfs_lookup_bio_sums(struct btrfs_root *root, | 174 | static int __btrfs_lookup_bio_sums(struct btrfs_root *root, |
| 156 | struct inode *inode, struct bio *bio, | 175 | struct inode *inode, struct bio *bio, |
| @@ -441,8 +460,8 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, | |||
| 441 | if (!contig) | 460 | if (!contig) |
| 442 | offset = page_offset(bvec->bv_page) + bvec->bv_offset; | 461 | offset = page_offset(bvec->bv_page) + bvec->bv_offset; |
| 443 | 462 | ||
| 444 | if (!contig && (offset >= ordered->file_offset + ordered->len || | 463 | if (offset >= ordered->file_offset + ordered->len || |
| 445 | offset < ordered->file_offset)) { | 464 | offset < ordered->file_offset) { |
| 446 | unsigned long bytes_left; | 465 | unsigned long bytes_left; |
| 447 | sums->len = this_sum_bytes; | 466 | sums->len = this_sum_bytes; |
| 448 | this_sum_bytes = 0; | 467 | this_sum_bytes = 0; |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 9ab1bed88116..aeb84469d2c4 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
| @@ -41,6 +41,7 @@ | |||
| 41 | #include "compat.h" | 41 | #include "compat.h" |
| 42 | #include "volumes.h" | 42 | #include "volumes.h" |
| 43 | 43 | ||
| 44 | static struct kmem_cache *btrfs_inode_defrag_cachep; | ||
| 44 | /* | 45 | /* |
| 45 | * when auto defrag is enabled we | 46 | * when auto defrag is enabled we |
| 46 | * queue up these defrag structs to remember which | 47 | * queue up these defrag structs to remember which |
| @@ -90,7 +91,7 @@ static int __compare_inode_defrag(struct inode_defrag *defrag1, | |||
| 90 | * If an existing record is found the defrag item you | 91 | * If an existing record is found the defrag item you |
| 91 | * pass in is freed | 92 | * pass in is freed |
| 92 | */ | 93 | */ |
| 93 | static void __btrfs_add_inode_defrag(struct inode *inode, | 94 | static int __btrfs_add_inode_defrag(struct inode *inode, |
| 94 | struct inode_defrag *defrag) | 95 | struct inode_defrag *defrag) |
| 95 | { | 96 | { |
| 96 | struct btrfs_root *root = BTRFS_I(inode)->root; | 97 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| @@ -118,18 +119,24 @@ static void __btrfs_add_inode_defrag(struct inode *inode, | |||
| 118 | entry->transid = defrag->transid; | 119 | entry->transid = defrag->transid; |
| 119 | if (defrag->last_offset > entry->last_offset) | 120 | if (defrag->last_offset > entry->last_offset) |
| 120 | entry->last_offset = defrag->last_offset; | 121 | entry->last_offset = defrag->last_offset; |
| 121 | goto exists; | 122 | return -EEXIST; |
| 122 | } | 123 | } |
| 123 | } | 124 | } |
| 124 | set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); | 125 | set_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); |
| 125 | rb_link_node(&defrag->rb_node, parent, p); | 126 | rb_link_node(&defrag->rb_node, parent, p); |
| 126 | rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); | 127 | rb_insert_color(&defrag->rb_node, &root->fs_info->defrag_inodes); |
| 127 | return; | 128 | return 0; |
| 129 | } | ||
| 128 | 130 | ||
| 129 | exists: | 131 | static inline int __need_auto_defrag(struct btrfs_root *root) |
| 130 | kfree(defrag); | 132 | { |
| 131 | return; | 133 | if (!btrfs_test_opt(root, AUTO_DEFRAG)) |
| 134 | return 0; | ||
| 132 | 135 | ||
| 136 | if (btrfs_fs_closing(root->fs_info)) | ||
| 137 | return 0; | ||
| 138 | |||
| 139 | return 1; | ||
| 133 | } | 140 | } |
| 134 | 141 | ||
| 135 | /* | 142 | /* |
| @@ -142,11 +149,9 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
| 142 | struct btrfs_root *root = BTRFS_I(inode)->root; | 149 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 143 | struct inode_defrag *defrag; | 150 | struct inode_defrag *defrag; |
| 144 | u64 transid; | 151 | u64 transid; |
| 152 | int ret; | ||
| 145 | 153 | ||
| 146 | if (!btrfs_test_opt(root, AUTO_DEFRAG)) | 154 | if (!__need_auto_defrag(root)) |
| 147 | return 0; | ||
| 148 | |||
| 149 | if (btrfs_fs_closing(root->fs_info)) | ||
| 150 | return 0; | 155 | return 0; |
| 151 | 156 | ||
| 152 | if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) | 157 | if (test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) |
| @@ -157,7 +162,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
| 157 | else | 162 | else |
| 158 | transid = BTRFS_I(inode)->root->last_trans; | 163 | transid = BTRFS_I(inode)->root->last_trans; |
| 159 | 164 | ||
| 160 | defrag = kzalloc(sizeof(*defrag), GFP_NOFS); | 165 | defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS); |
| 161 | if (!defrag) | 166 | if (!defrag) |
| 162 | return -ENOMEM; | 167 | return -ENOMEM; |
| 163 | 168 | ||
| @@ -166,20 +171,56 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, | |||
| 166 | defrag->root = root->root_key.objectid; | 171 | defrag->root = root->root_key.objectid; |
| 167 | 172 | ||
| 168 | spin_lock(&root->fs_info->defrag_inodes_lock); | 173 | spin_lock(&root->fs_info->defrag_inodes_lock); |
| 169 | if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) | 174 | if (!test_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags)) { |
| 170 | __btrfs_add_inode_defrag(inode, defrag); | 175 | /* |
| 171 | else | 176 | * If we set IN_DEFRAG flag and evict the inode from memory, |
| 172 | kfree(defrag); | 177 | * and then re-read this inode, this new inode doesn't have |
| 178 | * IN_DEFRAG flag. At the case, we may find the existed defrag. | ||
| 179 | */ | ||
| 180 | ret = __btrfs_add_inode_defrag(inode, defrag); | ||
| 181 | if (ret) | ||
| 182 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | ||
| 183 | } else { | ||
| 184 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | ||
| 185 | } | ||
| 173 | spin_unlock(&root->fs_info->defrag_inodes_lock); | 186 | spin_unlock(&root->fs_info->defrag_inodes_lock); |
| 174 | return 0; | 187 | return 0; |
| 175 | } | 188 | } |
| 176 | 189 | ||
| 177 | /* | 190 | /* |
| 178 | * must be called with the defrag_inodes lock held | 191 | * Requeue the defrag object. If there is a defrag object that points to |
| 192 | * the same inode in the tree, we will merge them together (by | ||
| 193 | * __btrfs_add_inode_defrag()) and free the one that we want to requeue. | ||
| 179 | */ | 194 | */ |
| 180 | struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, | 195 | void btrfs_requeue_inode_defrag(struct inode *inode, |
| 181 | u64 root, u64 ino, | 196 | struct inode_defrag *defrag) |
| 182 | struct rb_node **next) | 197 | { |
| 198 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 199 | int ret; | ||
| 200 | |||
| 201 | if (!__need_auto_defrag(root)) | ||
| 202 | goto out; | ||
| 203 | |||
| 204 | /* | ||
| 205 | * Here we don't check the IN_DEFRAG flag, because we need merge | ||
| 206 | * them together. | ||
| 207 | */ | ||
| 208 | spin_lock(&root->fs_info->defrag_inodes_lock); | ||
| 209 | ret = __btrfs_add_inode_defrag(inode, defrag); | ||
| 210 | spin_unlock(&root->fs_info->defrag_inodes_lock); | ||
| 211 | if (ret) | ||
| 212 | goto out; | ||
| 213 | return; | ||
| 214 | out: | ||
| 215 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | ||
| 216 | } | ||
| 217 | |||
| 218 | /* | ||
| 219 | * pick the defragable inode that we want, if it doesn't exist, we will get | ||
| 220 | * the next one. | ||
| 221 | */ | ||
| 222 | static struct inode_defrag * | ||
| 223 | btrfs_pick_defrag_inode(struct btrfs_fs_info *fs_info, u64 root, u64 ino) | ||
| 183 | { | 224 | { |
| 184 | struct inode_defrag *entry = NULL; | 225 | struct inode_defrag *entry = NULL; |
| 185 | struct inode_defrag tmp; | 226 | struct inode_defrag tmp; |
| @@ -190,7 +231,8 @@ struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, | |||
| 190 | tmp.ino = ino; | 231 | tmp.ino = ino; |
| 191 | tmp.root = root; | 232 | tmp.root = root; |
| 192 | 233 | ||
| 193 | p = info->defrag_inodes.rb_node; | 234 | spin_lock(&fs_info->defrag_inodes_lock); |
| 235 | p = fs_info->defrag_inodes.rb_node; | ||
| 194 | while (p) { | 236 | while (p) { |
| 195 | parent = p; | 237 | parent = p; |
| 196 | entry = rb_entry(parent, struct inode_defrag, rb_node); | 238 | entry = rb_entry(parent, struct inode_defrag, rb_node); |
| @@ -201,52 +243,145 @@ struct inode_defrag *btrfs_find_defrag_inode(struct btrfs_fs_info *info, | |||
| 201 | else if (ret > 0) | 243 | else if (ret > 0) |
| 202 | p = parent->rb_right; | 244 | p = parent->rb_right; |
| 203 | else | 245 | else |
| 204 | return entry; | 246 | goto out; |
| 205 | } | 247 | } |
| 206 | 248 | ||
| 207 | if (next) { | 249 | if (parent && __compare_inode_defrag(&tmp, entry) > 0) { |
| 208 | while (parent && __compare_inode_defrag(&tmp, entry) > 0) { | 250 | parent = rb_next(parent); |
| 209 | parent = rb_next(parent); | 251 | if (parent) |
| 210 | entry = rb_entry(parent, struct inode_defrag, rb_node); | 252 | entry = rb_entry(parent, struct inode_defrag, rb_node); |
| 211 | } | 253 | else |
| 212 | *next = parent; | 254 | entry = NULL; |
| 213 | } | 255 | } |
| 214 | return NULL; | 256 | out: |
| 257 | if (entry) | ||
| 258 | rb_erase(parent, &fs_info->defrag_inodes); | ||
| 259 | spin_unlock(&fs_info->defrag_inodes_lock); | ||
| 260 | return entry; | ||
| 215 | } | 261 | } |
| 216 | 262 | ||
| 217 | /* | 263 | void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info) |
| 218 | * run through the list of inodes in the FS that need | ||
| 219 | * defragging | ||
| 220 | */ | ||
| 221 | int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | ||
| 222 | { | 264 | { |
| 223 | struct inode_defrag *defrag; | 265 | struct inode_defrag *defrag; |
| 266 | struct rb_node *node; | ||
| 267 | |||
| 268 | spin_lock(&fs_info->defrag_inodes_lock); | ||
| 269 | node = rb_first(&fs_info->defrag_inodes); | ||
| 270 | while (node) { | ||
| 271 | rb_erase(node, &fs_info->defrag_inodes); | ||
| 272 | defrag = rb_entry(node, struct inode_defrag, rb_node); | ||
| 273 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | ||
| 274 | |||
| 275 | if (need_resched()) { | ||
| 276 | spin_unlock(&fs_info->defrag_inodes_lock); | ||
| 277 | cond_resched(); | ||
| 278 | spin_lock(&fs_info->defrag_inodes_lock); | ||
| 279 | } | ||
| 280 | |||
| 281 | node = rb_first(&fs_info->defrag_inodes); | ||
| 282 | } | ||
| 283 | spin_unlock(&fs_info->defrag_inodes_lock); | ||
| 284 | } | ||
| 285 | |||
| 286 | #define BTRFS_DEFRAG_BATCH 1024 | ||
| 287 | |||
| 288 | static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info, | ||
| 289 | struct inode_defrag *defrag) | ||
| 290 | { | ||
| 224 | struct btrfs_root *inode_root; | 291 | struct btrfs_root *inode_root; |
| 225 | struct inode *inode; | 292 | struct inode *inode; |
| 226 | struct rb_node *n; | ||
| 227 | struct btrfs_key key; | 293 | struct btrfs_key key; |
| 228 | struct btrfs_ioctl_defrag_range_args range; | 294 | struct btrfs_ioctl_defrag_range_args range; |
| 229 | u64 first_ino = 0; | ||
| 230 | u64 root_objectid = 0; | ||
| 231 | int num_defrag; | 295 | int num_defrag; |
| 232 | int defrag_batch = 1024; | 296 | int index; |
| 297 | int ret; | ||
| 298 | |||
| 299 | /* get the inode */ | ||
| 300 | key.objectid = defrag->root; | ||
| 301 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | ||
| 302 | key.offset = (u64)-1; | ||
| 303 | |||
| 304 | index = srcu_read_lock(&fs_info->subvol_srcu); | ||
| 305 | |||
| 306 | inode_root = btrfs_read_fs_root_no_name(fs_info, &key); | ||
| 307 | if (IS_ERR(inode_root)) { | ||
| 308 | ret = PTR_ERR(inode_root); | ||
| 309 | goto cleanup; | ||
| 310 | } | ||
| 311 | if (btrfs_root_refs(&inode_root->root_item) == 0) { | ||
| 312 | ret = -ENOENT; | ||
| 313 | goto cleanup; | ||
| 314 | } | ||
| 233 | 315 | ||
| 316 | key.objectid = defrag->ino; | ||
| 317 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | ||
| 318 | key.offset = 0; | ||
| 319 | inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); | ||
| 320 | if (IS_ERR(inode)) { | ||
| 321 | ret = PTR_ERR(inode); | ||
| 322 | goto cleanup; | ||
| 323 | } | ||
| 324 | srcu_read_unlock(&fs_info->subvol_srcu, index); | ||
| 325 | |||
| 326 | /* do a chunk of defrag */ | ||
| 327 | clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); | ||
| 234 | memset(&range, 0, sizeof(range)); | 328 | memset(&range, 0, sizeof(range)); |
| 235 | range.len = (u64)-1; | 329 | range.len = (u64)-1; |
| 330 | range.start = defrag->last_offset; | ||
| 331 | |||
| 332 | sb_start_write(fs_info->sb); | ||
| 333 | num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, | ||
| 334 | BTRFS_DEFRAG_BATCH); | ||
| 335 | sb_end_write(fs_info->sb); | ||
| 336 | /* | ||
| 337 | * if we filled the whole defrag batch, there | ||
| 338 | * must be more work to do. Queue this defrag | ||
| 339 | * again | ||
| 340 | */ | ||
| 341 | if (num_defrag == BTRFS_DEFRAG_BATCH) { | ||
| 342 | defrag->last_offset = range.start; | ||
| 343 | btrfs_requeue_inode_defrag(inode, defrag); | ||
| 344 | } else if (defrag->last_offset && !defrag->cycled) { | ||
| 345 | /* | ||
| 346 | * we didn't fill our defrag batch, but | ||
| 347 | * we didn't start at zero. Make sure we loop | ||
| 348 | * around to the start of the file. | ||
| 349 | */ | ||
| 350 | defrag->last_offset = 0; | ||
| 351 | defrag->cycled = 1; | ||
| 352 | btrfs_requeue_inode_defrag(inode, defrag); | ||
| 353 | } else { | ||
| 354 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | ||
| 355 | } | ||
| 356 | |||
| 357 | iput(inode); | ||
| 358 | return 0; | ||
| 359 | cleanup: | ||
| 360 | srcu_read_unlock(&fs_info->subvol_srcu, index); | ||
| 361 | kmem_cache_free(btrfs_inode_defrag_cachep, defrag); | ||
| 362 | return ret; | ||
| 363 | } | ||
| 364 | |||
| 365 | /* | ||
| 366 | * run through the list of inodes in the FS that need | ||
| 367 | * defragging | ||
| 368 | */ | ||
| 369 | int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | ||
| 370 | { | ||
| 371 | struct inode_defrag *defrag; | ||
| 372 | u64 first_ino = 0; | ||
| 373 | u64 root_objectid = 0; | ||
| 236 | 374 | ||
| 237 | atomic_inc(&fs_info->defrag_running); | 375 | atomic_inc(&fs_info->defrag_running); |
| 238 | spin_lock(&fs_info->defrag_inodes_lock); | ||
| 239 | while(1) { | 376 | while(1) { |
| 240 | n = NULL; | 377 | if (!__need_auto_defrag(fs_info->tree_root)) |
| 378 | break; | ||
| 241 | 379 | ||
| 242 | /* find an inode to defrag */ | 380 | /* find an inode to defrag */ |
| 243 | defrag = btrfs_find_defrag_inode(fs_info, root_objectid, | 381 | defrag = btrfs_pick_defrag_inode(fs_info, root_objectid, |
| 244 | first_ino, &n); | 382 | first_ino); |
| 245 | if (!defrag) { | 383 | if (!defrag) { |
| 246 | if (n) { | 384 | if (root_objectid || first_ino) { |
| 247 | defrag = rb_entry(n, struct inode_defrag, | ||
| 248 | rb_node); | ||
| 249 | } else if (root_objectid || first_ino) { | ||
| 250 | root_objectid = 0; | 385 | root_objectid = 0; |
| 251 | first_ino = 0; | 386 | first_ino = 0; |
| 252 | continue; | 387 | continue; |
| @@ -255,70 +390,11 @@ int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info) | |||
| 255 | } | 390 | } |
| 256 | } | 391 | } |
| 257 | 392 | ||
| 258 | /* remove it from the rbtree */ | ||
| 259 | first_ino = defrag->ino + 1; | 393 | first_ino = defrag->ino + 1; |
| 260 | root_objectid = defrag->root; | 394 | root_objectid = defrag->root; |
| 261 | rb_erase(&defrag->rb_node, &fs_info->defrag_inodes); | ||
| 262 | |||
| 263 | if (btrfs_fs_closing(fs_info)) | ||
| 264 | goto next_free; | ||
| 265 | |||
| 266 | spin_unlock(&fs_info->defrag_inodes_lock); | ||
| 267 | |||
| 268 | /* get the inode */ | ||
| 269 | key.objectid = defrag->root; | ||
| 270 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | ||
| 271 | key.offset = (u64)-1; | ||
| 272 | inode_root = btrfs_read_fs_root_no_name(fs_info, &key); | ||
| 273 | if (IS_ERR(inode_root)) | ||
| 274 | goto next; | ||
| 275 | |||
| 276 | key.objectid = defrag->ino; | ||
| 277 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | ||
| 278 | key.offset = 0; | ||
| 279 | |||
| 280 | inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); | ||
| 281 | if (IS_ERR(inode)) | ||
| 282 | goto next; | ||
| 283 | |||
| 284 | /* do a chunk of defrag */ | ||
| 285 | clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); | ||
| 286 | range.start = defrag->last_offset; | ||
| 287 | num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid, | ||
| 288 | defrag_batch); | ||
| 289 | /* | ||
| 290 | * if we filled the whole defrag batch, there | ||
| 291 | * must be more work to do. Queue this defrag | ||
| 292 | * again | ||
| 293 | */ | ||
| 294 | if (num_defrag == defrag_batch) { | ||
| 295 | defrag->last_offset = range.start; | ||
| 296 | __btrfs_add_inode_defrag(inode, defrag); | ||
| 297 | /* | ||
| 298 | * we don't want to kfree defrag, we added it back to | ||
| 299 | * the rbtree | ||
| 300 | */ | ||
| 301 | defrag = NULL; | ||
| 302 | } else if (defrag->last_offset && !defrag->cycled) { | ||
| 303 | /* | ||
| 304 | * we didn't fill our defrag batch, but | ||
| 305 | * we didn't start at zero. Make sure we loop | ||
| 306 | * around to the start of the file. | ||
| 307 | */ | ||
| 308 | defrag->last_offset = 0; | ||
| 309 | defrag->cycled = 1; | ||
| 310 | __btrfs_add_inode_defrag(inode, defrag); | ||
| 311 | defrag = NULL; | ||
| 312 | } | ||
| 313 | 395 | ||
| 314 | iput(inode); | 396 | __btrfs_run_defrag_inode(fs_info, defrag); |
| 315 | next: | ||
| 316 | spin_lock(&fs_info->defrag_inodes_lock); | ||
| 317 | next_free: | ||
| 318 | kfree(defrag); | ||
| 319 | } | 397 | } |
| 320 | spin_unlock(&fs_info->defrag_inodes_lock); | ||
| 321 | |||
| 322 | atomic_dec(&fs_info->defrag_running); | 398 | atomic_dec(&fs_info->defrag_running); |
| 323 | 399 | ||
| 324 | /* | 400 | /* |
| @@ -526,6 +602,8 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 526 | split->block_len = em->block_len; | 602 | split->block_len = em->block_len; |
| 527 | else | 603 | else |
| 528 | split->block_len = split->len; | 604 | split->block_len = split->len; |
| 605 | split->orig_block_len = max(split->block_len, | ||
| 606 | em->orig_block_len); | ||
| 529 | split->generation = gen; | 607 | split->generation = gen; |
| 530 | split->bdev = em->bdev; | 608 | split->bdev = em->bdev; |
| 531 | split->flags = flags; | 609 | split->flags = flags; |
| @@ -547,6 +625,8 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 547 | split->flags = flags; | 625 | split->flags = flags; |
| 548 | split->compress_type = em->compress_type; | 626 | split->compress_type = em->compress_type; |
| 549 | split->generation = gen; | 627 | split->generation = gen; |
| 628 | split->orig_block_len = max(em->block_len, | ||
| 629 | em->orig_block_len); | ||
| 550 | 630 | ||
| 551 | if (compressed) { | 631 | if (compressed) { |
| 552 | split->block_len = em->block_len; | 632 | split->block_len = em->block_len; |
| @@ -555,7 +635,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
| 555 | } else { | 635 | } else { |
| 556 | split->block_len = split->len; | 636 | split->block_len = split->len; |
| 557 | split->block_start = em->block_start + diff; | 637 | split->block_start = em->block_start + diff; |
| 558 | split->orig_start = split->start; | 638 | split->orig_start = em->orig_start; |
| 559 | } | 639 | } |
| 560 | 640 | ||
| 561 | ret = add_extent_mapping(em_tree, split); | 641 | ret = add_extent_mapping(em_tree, split); |
| @@ -1346,10 +1426,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
| 1346 | 1426 | ||
| 1347 | cond_resched(); | 1427 | cond_resched(); |
| 1348 | 1428 | ||
| 1349 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | 1429 | balance_dirty_pages_ratelimited(inode->i_mapping); |
| 1350 | dirty_pages); | ||
| 1351 | if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) | 1430 | if (dirty_pages < (root->leafsize >> PAGE_CACHE_SHIFT) + 1) |
| 1352 | btrfs_btree_balance_dirty(root, 1); | 1431 | btrfs_btree_balance_dirty(root); |
| 1353 | 1432 | ||
| 1354 | pos += copied; | 1433 | pos += copied; |
| 1355 | num_written += copied; | 1434 | num_written += copied; |
| @@ -1398,6 +1477,24 @@ out: | |||
| 1398 | return written ? written : err; | 1477 | return written ? written : err; |
| 1399 | } | 1478 | } |
| 1400 | 1479 | ||
| 1480 | static void update_time_for_write(struct inode *inode) | ||
| 1481 | { | ||
| 1482 | struct timespec now; | ||
| 1483 | |||
| 1484 | if (IS_NOCMTIME(inode)) | ||
| 1485 | return; | ||
| 1486 | |||
| 1487 | now = current_fs_time(inode->i_sb); | ||
| 1488 | if (!timespec_equal(&inode->i_mtime, &now)) | ||
| 1489 | inode->i_mtime = now; | ||
| 1490 | |||
| 1491 | if (!timespec_equal(&inode->i_ctime, &now)) | ||
| 1492 | inode->i_ctime = now; | ||
| 1493 | |||
| 1494 | if (IS_I_VERSION(inode)) | ||
| 1495 | inode_inc_iversion(inode); | ||
| 1496 | } | ||
| 1497 | |||
| 1401 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | 1498 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, |
| 1402 | const struct iovec *iov, | 1499 | const struct iovec *iov, |
| 1403 | unsigned long nr_segs, loff_t pos) | 1500 | unsigned long nr_segs, loff_t pos) |
| @@ -1410,6 +1507,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
| 1410 | ssize_t num_written = 0; | 1507 | ssize_t num_written = 0; |
| 1411 | ssize_t err = 0; | 1508 | ssize_t err = 0; |
| 1412 | size_t count, ocount; | 1509 | size_t count, ocount; |
| 1510 | bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); | ||
| 1413 | 1511 | ||
| 1414 | sb_start_write(inode->i_sb); | 1512 | sb_start_write(inode->i_sb); |
| 1415 | 1513 | ||
| @@ -1452,11 +1550,13 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
| 1452 | goto out; | 1550 | goto out; |
| 1453 | } | 1551 | } |
| 1454 | 1552 | ||
| 1455 | err = file_update_time(file); | 1553 | /* |
| 1456 | if (err) { | 1554 | * We reserve space for updating the inode when we reserve space for the |
| 1457 | mutex_unlock(&inode->i_mutex); | 1555 | * extent we are going to write, so we will enospc out there. We don't |
| 1458 | goto out; | 1556 | * need to start yet another transaction to update the inode as we will |
| 1459 | } | 1557 | * update the inode when we finish writing whatever data we write. |
| 1558 | */ | ||
| 1559 | update_time_for_write(inode); | ||
| 1460 | 1560 | ||
| 1461 | start_pos = round_down(pos, root->sectorsize); | 1561 | start_pos = round_down(pos, root->sectorsize); |
| 1462 | if (start_pos > i_size_read(inode)) { | 1562 | if (start_pos > i_size_read(inode)) { |
| @@ -1467,6 +1567,9 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
| 1467 | } | 1567 | } |
| 1468 | } | 1568 | } |
| 1469 | 1569 | ||
| 1570 | if (sync) | ||
| 1571 | atomic_inc(&BTRFS_I(inode)->sync_writers); | ||
| 1572 | |||
| 1470 | if (unlikely(file->f_flags & O_DIRECT)) { | 1573 | if (unlikely(file->f_flags & O_DIRECT)) { |
| 1471 | num_written = __btrfs_direct_write(iocb, iov, nr_segs, | 1574 | num_written = __btrfs_direct_write(iocb, iov, nr_segs, |
| 1472 | pos, ppos, count, ocount); | 1575 | pos, ppos, count, ocount); |
| @@ -1493,13 +1596,21 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
| 1493 | * this will either be one more than the running transaction | 1596 | * this will either be one more than the running transaction |
| 1494 | * or the generation used for the next transaction if there isn't | 1597 | * or the generation used for the next transaction if there isn't |
| 1495 | * one running right now. | 1598 | * one running right now. |
| 1599 | * | ||
| 1600 | * We also have to set last_sub_trans to the current log transid, | ||
| 1601 | * otherwise subsequent syncs to a file that's been synced in this | ||
| 1602 | * transaction will appear to have already occured. | ||
| 1496 | */ | 1603 | */ |
| 1497 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; | 1604 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; |
| 1605 | BTRFS_I(inode)->last_sub_trans = root->log_transid; | ||
| 1498 | if (num_written > 0 || num_written == -EIOCBQUEUED) { | 1606 | if (num_written > 0 || num_written == -EIOCBQUEUED) { |
| 1499 | err = generic_write_sync(file, pos, num_written); | 1607 | err = generic_write_sync(file, pos, num_written); |
| 1500 | if (err < 0 && num_written > 0) | 1608 | if (err < 0 && num_written > 0) |
| 1501 | num_written = err; | 1609 | num_written = err; |
| 1502 | } | 1610 | } |
| 1611 | |||
| 1612 | if (sync) | ||
| 1613 | atomic_dec(&BTRFS_I(inode)->sync_writers); | ||
| 1503 | out: | 1614 | out: |
| 1504 | sb_end_write(inode->i_sb); | 1615 | sb_end_write(inode->i_sb); |
| 1505 | current->backing_dev_info = NULL; | 1616 | current->backing_dev_info = NULL; |
| @@ -1551,7 +1662,9 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 1551 | * out of the ->i_mutex. If so, we can flush the dirty pages by | 1662 | * out of the ->i_mutex. If so, we can flush the dirty pages by |
| 1552 | * multi-task, and make the performance up. | 1663 | * multi-task, and make the performance up. |
| 1553 | */ | 1664 | */ |
| 1665 | atomic_inc(&BTRFS_I(inode)->sync_writers); | ||
| 1554 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | 1666 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); |
| 1667 | atomic_dec(&BTRFS_I(inode)->sync_writers); | ||
| 1555 | if (ret) | 1668 | if (ret) |
| 1556 | return ret; | 1669 | return ret; |
| 1557 | 1670 | ||
| @@ -1562,7 +1675,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 1562 | * range being left. | 1675 | * range being left. |
| 1563 | */ | 1676 | */ |
| 1564 | atomic_inc(&root->log_batch); | 1677 | atomic_inc(&root->log_batch); |
| 1565 | btrfs_wait_ordered_range(inode, start, end); | 1678 | btrfs_wait_ordered_range(inode, start, end - start + 1); |
| 1566 | atomic_inc(&root->log_batch); | 1679 | atomic_inc(&root->log_batch); |
| 1567 | 1680 | ||
| 1568 | /* | 1681 | /* |
| @@ -1768,6 +1881,7 @@ out: | |||
| 1768 | 1881 | ||
| 1769 | hole_em->block_start = EXTENT_MAP_HOLE; | 1882 | hole_em->block_start = EXTENT_MAP_HOLE; |
| 1770 | hole_em->block_len = 0; | 1883 | hole_em->block_len = 0; |
| 1884 | hole_em->orig_block_len = 0; | ||
| 1771 | hole_em->bdev = root->fs_info->fs_devices->latest_bdev; | 1885 | hole_em->bdev = root->fs_info->fs_devices->latest_bdev; |
| 1772 | hole_em->compress_type = BTRFS_COMPRESS_NONE; | 1886 | hole_em->compress_type = BTRFS_COMPRESS_NONE; |
| 1773 | hole_em->generation = trans->transid; | 1887 | hole_em->generation = trans->transid; |
| @@ -1797,48 +1911,51 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
| 1797 | struct btrfs_path *path; | 1911 | struct btrfs_path *path; |
| 1798 | struct btrfs_block_rsv *rsv; | 1912 | struct btrfs_block_rsv *rsv; |
| 1799 | struct btrfs_trans_handle *trans; | 1913 | struct btrfs_trans_handle *trans; |
| 1800 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; | 1914 | u64 lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize); |
| 1801 | u64 lockstart = (offset + mask) & ~mask; | 1915 | u64 lockend = round_down(offset + len, |
| 1802 | u64 lockend = ((offset + len) & ~mask) - 1; | 1916 | BTRFS_I(inode)->root->sectorsize) - 1; |
| 1803 | u64 cur_offset = lockstart; | 1917 | u64 cur_offset = lockstart; |
| 1804 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); | 1918 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); |
| 1805 | u64 drop_end; | 1919 | u64 drop_end; |
| 1806 | unsigned long nr; | ||
| 1807 | int ret = 0; | 1920 | int ret = 0; |
| 1808 | int err = 0; | 1921 | int err = 0; |
| 1809 | bool same_page = (offset >> PAGE_CACHE_SHIFT) == | 1922 | bool same_page = ((offset >> PAGE_CACHE_SHIFT) == |
| 1810 | ((offset + len) >> PAGE_CACHE_SHIFT); | 1923 | ((offset + len - 1) >> PAGE_CACHE_SHIFT)); |
| 1811 | 1924 | ||
| 1812 | btrfs_wait_ordered_range(inode, offset, len); | 1925 | btrfs_wait_ordered_range(inode, offset, len); |
| 1813 | 1926 | ||
| 1814 | mutex_lock(&inode->i_mutex); | 1927 | mutex_lock(&inode->i_mutex); |
| 1815 | if (offset >= inode->i_size) { | 1928 | /* |
| 1816 | mutex_unlock(&inode->i_mutex); | 1929 | * We needn't truncate any page which is beyond the end of the file |
| 1817 | return 0; | 1930 | * because we are sure there is no data there. |
| 1818 | } | 1931 | */ |
| 1819 | |||
| 1820 | /* | 1932 | /* |
| 1821 | * Only do this if we are in the same page and we aren't doing the | 1933 | * Only do this if we are in the same page and we aren't doing the |
| 1822 | * entire page. | 1934 | * entire page. |
| 1823 | */ | 1935 | */ |
| 1824 | if (same_page && len < PAGE_CACHE_SIZE) { | 1936 | if (same_page && len < PAGE_CACHE_SIZE) { |
| 1825 | ret = btrfs_truncate_page(inode, offset, len, 0); | 1937 | if (offset < round_up(inode->i_size, PAGE_CACHE_SIZE)) |
| 1938 | ret = btrfs_truncate_page(inode, offset, len, 0); | ||
| 1826 | mutex_unlock(&inode->i_mutex); | 1939 | mutex_unlock(&inode->i_mutex); |
| 1827 | return ret; | 1940 | return ret; |
| 1828 | } | 1941 | } |
| 1829 | 1942 | ||
| 1830 | /* zero back part of the first page */ | 1943 | /* zero back part of the first page */ |
| 1831 | ret = btrfs_truncate_page(inode, offset, 0, 0); | 1944 | if (offset < round_up(inode->i_size, PAGE_CACHE_SIZE)) { |
| 1832 | if (ret) { | 1945 | ret = btrfs_truncate_page(inode, offset, 0, 0); |
| 1833 | mutex_unlock(&inode->i_mutex); | 1946 | if (ret) { |
| 1834 | return ret; | 1947 | mutex_unlock(&inode->i_mutex); |
| 1948 | return ret; | ||
| 1949 | } | ||
| 1835 | } | 1950 | } |
| 1836 | 1951 | ||
| 1837 | /* zero the front end of the last page */ | 1952 | /* zero the front end of the last page */ |
| 1838 | ret = btrfs_truncate_page(inode, offset + len, 0, 1); | 1953 | if (offset + len < round_up(inode->i_size, PAGE_CACHE_SIZE)) { |
| 1839 | if (ret) { | 1954 | ret = btrfs_truncate_page(inode, offset + len, 0, 1); |
| 1840 | mutex_unlock(&inode->i_mutex); | 1955 | if (ret) { |
| 1841 | return ret; | 1956 | mutex_unlock(&inode->i_mutex); |
| 1957 | return ret; | ||
| 1958 | } | ||
| 1842 | } | 1959 | } |
| 1843 | 1960 | ||
| 1844 | if (lockend < lockstart) { | 1961 | if (lockend < lockstart) { |
| @@ -1931,9 +2048,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
| 1931 | break; | 2048 | break; |
| 1932 | } | 2049 | } |
| 1933 | 2050 | ||
| 1934 | nr = trans->blocks_used; | ||
| 1935 | btrfs_end_transaction(trans, root); | 2051 | btrfs_end_transaction(trans, root); |
| 1936 | btrfs_btree_balance_dirty(root, nr); | 2052 | btrfs_btree_balance_dirty(root); |
| 1937 | 2053 | ||
| 1938 | trans = btrfs_start_transaction(root, 3); | 2054 | trans = btrfs_start_transaction(root, 3); |
| 1939 | if (IS_ERR(trans)) { | 2055 | if (IS_ERR(trans)) { |
| @@ -1964,11 +2080,13 @@ out_trans: | |||
| 1964 | if (!trans) | 2080 | if (!trans) |
| 1965 | goto out_free; | 2081 | goto out_free; |
| 1966 | 2082 | ||
| 2083 | inode_inc_iversion(inode); | ||
| 2084 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
| 2085 | |||
| 1967 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 2086 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
| 1968 | ret = btrfs_update_inode(trans, root, inode); | 2087 | ret = btrfs_update_inode(trans, root, inode); |
| 1969 | nr = trans->blocks_used; | ||
| 1970 | btrfs_end_transaction(trans, root); | 2088 | btrfs_end_transaction(trans, root); |
| 1971 | btrfs_btree_balance_dirty(root, nr); | 2089 | btrfs_btree_balance_dirty(root); |
| 1972 | out_free: | 2090 | out_free: |
| 1973 | btrfs_free_path(path); | 2091 | btrfs_free_path(path); |
| 1974 | btrfs_free_block_rsv(root, rsv); | 2092 | btrfs_free_block_rsv(root, rsv); |
| @@ -1992,12 +2110,12 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
| 1992 | u64 alloc_end; | 2110 | u64 alloc_end; |
| 1993 | u64 alloc_hint = 0; | 2111 | u64 alloc_hint = 0; |
| 1994 | u64 locked_end; | 2112 | u64 locked_end; |
| 1995 | u64 mask = BTRFS_I(inode)->root->sectorsize - 1; | ||
| 1996 | struct extent_map *em; | 2113 | struct extent_map *em; |
| 2114 | int blocksize = BTRFS_I(inode)->root->sectorsize; | ||
| 1997 | int ret; | 2115 | int ret; |
| 1998 | 2116 | ||
| 1999 | alloc_start = offset & ~mask; | 2117 | alloc_start = round_down(offset, blocksize); |
| 2000 | alloc_end = (offset + len + mask) & ~mask; | 2118 | alloc_end = round_up(offset + len, blocksize); |
| 2001 | 2119 | ||
| 2002 | /* Make sure we aren't being give some crap mode */ | 2120 | /* Make sure we aren't being give some crap mode */ |
| 2003 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) | 2121 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) |
| @@ -2010,7 +2128,7 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
| 2010 | * Make sure we have enough space before we do the | 2128 | * Make sure we have enough space before we do the |
| 2011 | * allocation. | 2129 | * allocation. |
| 2012 | */ | 2130 | */ |
| 2013 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start + 1); | 2131 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); |
| 2014 | if (ret) | 2132 | if (ret) |
| 2015 | return ret; | 2133 | return ret; |
| 2016 | 2134 | ||
| @@ -2078,7 +2196,7 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
| 2078 | } | 2196 | } |
| 2079 | last_byte = min(extent_map_end(em), alloc_end); | 2197 | last_byte = min(extent_map_end(em), alloc_end); |
| 2080 | actual_end = min_t(u64, extent_map_end(em), offset + len); | 2198 | actual_end = min_t(u64, extent_map_end(em), offset + len); |
| 2081 | last_byte = (last_byte + mask) & ~mask; | 2199 | last_byte = ALIGN(last_byte, blocksize); |
| 2082 | 2200 | ||
| 2083 | if (em->block_start == EXTENT_MAP_HOLE || | 2201 | if (em->block_start == EXTENT_MAP_HOLE || |
| 2084 | (cur_offset >= inode->i_size && | 2202 | (cur_offset >= inode->i_size && |
| @@ -2117,11 +2235,11 @@ static long btrfs_fallocate(struct file *file, int mode, | |||
| 2117 | out: | 2235 | out: |
| 2118 | mutex_unlock(&inode->i_mutex); | 2236 | mutex_unlock(&inode->i_mutex); |
| 2119 | /* Let go of our reservation. */ | 2237 | /* Let go of our reservation. */ |
| 2120 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start + 1); | 2238 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); |
| 2121 | return ret; | 2239 | return ret; |
| 2122 | } | 2240 | } |
| 2123 | 2241 | ||
| 2124 | static int find_desired_extent(struct inode *inode, loff_t *offset, int origin) | 2242 | static int find_desired_extent(struct inode *inode, loff_t *offset, int whence) |
| 2125 | { | 2243 | { |
| 2126 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2244 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 2127 | struct extent_map *em; | 2245 | struct extent_map *em; |
| @@ -2138,6 +2256,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin) | |||
| 2138 | if (lockend <= lockstart) | 2256 | if (lockend <= lockstart) |
| 2139 | lockend = lockstart + root->sectorsize; | 2257 | lockend = lockstart + root->sectorsize; |
| 2140 | 2258 | ||
| 2259 | lockend--; | ||
| 2141 | len = lockend - lockstart + 1; | 2260 | len = lockend - lockstart + 1; |
| 2142 | 2261 | ||
| 2143 | len = max_t(u64, len, root->sectorsize); | 2262 | len = max_t(u64, len, root->sectorsize); |
| @@ -2155,7 +2274,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin) | |||
| 2155 | * before the position we want in case there is outstanding delalloc | 2274 | * before the position we want in case there is outstanding delalloc |
| 2156 | * going on here. | 2275 | * going on here. |
| 2157 | */ | 2276 | */ |
| 2158 | if (origin == SEEK_HOLE && start != 0) { | 2277 | if (whence == SEEK_HOLE && start != 0) { |
| 2159 | if (start <= root->sectorsize) | 2278 | if (start <= root->sectorsize) |
| 2160 | em = btrfs_get_extent_fiemap(inode, NULL, 0, 0, | 2279 | em = btrfs_get_extent_fiemap(inode, NULL, 0, 0, |
| 2161 | root->sectorsize, 0); | 2280 | root->sectorsize, 0); |
| @@ -2189,13 +2308,13 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin) | |||
| 2189 | } | 2308 | } |
| 2190 | } | 2309 | } |
| 2191 | 2310 | ||
| 2192 | if (origin == SEEK_HOLE) { | 2311 | if (whence == SEEK_HOLE) { |
| 2193 | *offset = start; | 2312 | *offset = start; |
| 2194 | free_extent_map(em); | 2313 | free_extent_map(em); |
| 2195 | break; | 2314 | break; |
| 2196 | } | 2315 | } |
| 2197 | } else { | 2316 | } else { |
| 2198 | if (origin == SEEK_DATA) { | 2317 | if (whence == SEEK_DATA) { |
| 2199 | if (em->block_start == EXTENT_MAP_DELALLOC) { | 2318 | if (em->block_start == EXTENT_MAP_DELALLOC) { |
| 2200 | if (start >= inode->i_size) { | 2319 | if (start >= inode->i_size) { |
| 2201 | free_extent_map(em); | 2320 | free_extent_map(em); |
| @@ -2204,9 +2323,12 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int origin) | |||
| 2204 | } | 2323 | } |
| 2205 | } | 2324 | } |
| 2206 | 2325 | ||
| 2207 | *offset = start; | 2326 | if (!test_bit(EXTENT_FLAG_PREALLOC, |
| 2208 | free_extent_map(em); | 2327 | &em->flags)) { |
| 2209 | break; | 2328 | *offset = start; |
| 2329 | free_extent_map(em); | ||
| 2330 | break; | ||
| 2331 | } | ||
| 2210 | } | 2332 | } |
| 2211 | } | 2333 | } |
| 2212 | 2334 | ||
| @@ -2232,16 +2354,16 @@ out: | |||
| 2232 | return ret; | 2354 | return ret; |
| 2233 | } | 2355 | } |
| 2234 | 2356 | ||
| 2235 | static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin) | 2357 | static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int whence) |
| 2236 | { | 2358 | { |
| 2237 | struct inode *inode = file->f_mapping->host; | 2359 | struct inode *inode = file->f_mapping->host; |
| 2238 | int ret; | 2360 | int ret; |
| 2239 | 2361 | ||
| 2240 | mutex_lock(&inode->i_mutex); | 2362 | mutex_lock(&inode->i_mutex); |
| 2241 | switch (origin) { | 2363 | switch (whence) { |
| 2242 | case SEEK_END: | 2364 | case SEEK_END: |
| 2243 | case SEEK_CUR: | 2365 | case SEEK_CUR: |
| 2244 | offset = generic_file_llseek(file, offset, origin); | 2366 | offset = generic_file_llseek(file, offset, whence); |
| 2245 | goto out; | 2367 | goto out; |
| 2246 | case SEEK_DATA: | 2368 | case SEEK_DATA: |
| 2247 | case SEEK_HOLE: | 2369 | case SEEK_HOLE: |
| @@ -2250,7 +2372,7 @@ static loff_t btrfs_file_llseek(struct file *file, loff_t offset, int origin) | |||
| 2250 | return -ENXIO; | 2372 | return -ENXIO; |
| 2251 | } | 2373 | } |
| 2252 | 2374 | ||
| 2253 | ret = find_desired_extent(inode, &offset, origin); | 2375 | ret = find_desired_extent(inode, &offset, whence); |
| 2254 | if (ret) { | 2376 | if (ret) { |
| 2255 | mutex_unlock(&inode->i_mutex); | 2377 | mutex_unlock(&inode->i_mutex); |
| 2256 | return ret; | 2378 | return ret; |
| @@ -2293,3 +2415,21 @@ const struct file_operations btrfs_file_operations = { | |||
| 2293 | .compat_ioctl = btrfs_ioctl, | 2415 | .compat_ioctl = btrfs_ioctl, |
| 2294 | #endif | 2416 | #endif |
| 2295 | }; | 2417 | }; |
| 2418 | |||
| 2419 | void btrfs_auto_defrag_exit(void) | ||
| 2420 | { | ||
| 2421 | if (btrfs_inode_defrag_cachep) | ||
| 2422 | kmem_cache_destroy(btrfs_inode_defrag_cachep); | ||
| 2423 | } | ||
| 2424 | |||
| 2425 | int btrfs_auto_defrag_init(void) | ||
| 2426 | { | ||
| 2427 | btrfs_inode_defrag_cachep = kmem_cache_create("btrfs_inode_defrag", | ||
| 2428 | sizeof(struct inode_defrag), 0, | ||
| 2429 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, | ||
| 2430 | NULL); | ||
| 2431 | if (!btrfs_inode_defrag_cachep) | ||
| 2432 | return -ENOMEM; | ||
| 2433 | |||
| 2434 | return 0; | ||
| 2435 | } | ||
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 1027b854b90c..0be7a8742a43 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
| @@ -307,7 +307,6 @@ static void io_ctl_unmap_page(struct io_ctl *io_ctl) | |||
| 307 | 307 | ||
| 308 | static void io_ctl_map_page(struct io_ctl *io_ctl, int clear) | 308 | static void io_ctl_map_page(struct io_ctl *io_ctl, int clear) |
| 309 | { | 309 | { |
| 310 | WARN_ON(io_ctl->cur); | ||
| 311 | BUG_ON(io_ctl->index >= io_ctl->num_pages); | 310 | BUG_ON(io_ctl->index >= io_ctl->num_pages); |
| 312 | io_ctl->page = io_ctl->pages[io_ctl->index++]; | 311 | io_ctl->page = io_ctl->pages[io_ctl->index++]; |
| 313 | io_ctl->cur = kmap(io_ctl->page); | 312 | io_ctl->cur = kmap(io_ctl->page); |
| @@ -1250,18 +1249,13 @@ tree_search_offset(struct btrfs_free_space_ctl *ctl, | |||
| 1250 | * if previous extent entry covers the offset, | 1249 | * if previous extent entry covers the offset, |
| 1251 | * we should return it instead of the bitmap entry | 1250 | * we should return it instead of the bitmap entry |
| 1252 | */ | 1251 | */ |
| 1253 | n = &entry->offset_index; | 1252 | n = rb_prev(&entry->offset_index); |
| 1254 | while (1) { | 1253 | if (n) { |
| 1255 | n = rb_prev(n); | ||
| 1256 | if (!n) | ||
| 1257 | break; | ||
| 1258 | prev = rb_entry(n, struct btrfs_free_space, | 1254 | prev = rb_entry(n, struct btrfs_free_space, |
| 1259 | offset_index); | 1255 | offset_index); |
| 1260 | if (!prev->bitmap) { | 1256 | if (!prev->bitmap && |
| 1261 | if (prev->offset + prev->bytes > offset) | 1257 | prev->offset + prev->bytes > offset) |
| 1262 | entry = prev; | 1258 | entry = prev; |
| 1263 | break; | ||
| 1264 | } | ||
| 1265 | } | 1259 | } |
| 1266 | } | 1260 | } |
| 1267 | return entry; | 1261 | return entry; |
| @@ -1287,18 +1281,13 @@ tree_search_offset(struct btrfs_free_space_ctl *ctl, | |||
| 1287 | } | 1281 | } |
| 1288 | 1282 | ||
| 1289 | if (entry->bitmap) { | 1283 | if (entry->bitmap) { |
| 1290 | n = &entry->offset_index; | 1284 | n = rb_prev(&entry->offset_index); |
| 1291 | while (1) { | 1285 | if (n) { |
| 1292 | n = rb_prev(n); | ||
| 1293 | if (!n) | ||
| 1294 | break; | ||
| 1295 | prev = rb_entry(n, struct btrfs_free_space, | 1286 | prev = rb_entry(n, struct btrfs_free_space, |
| 1296 | offset_index); | 1287 | offset_index); |
| 1297 | if (!prev->bitmap) { | 1288 | if (!prev->bitmap && |
| 1298 | if (prev->offset + prev->bytes > offset) | 1289 | prev->offset + prev->bytes > offset) |
| 1299 | return prev; | 1290 | return prev; |
| 1300 | break; | ||
| 1301 | } | ||
| 1302 | } | 1291 | } |
| 1303 | if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) | 1292 | if (entry->offset + BITS_PER_BITMAP * ctl->unit > offset) |
| 1304 | return entry; | 1293 | return entry; |
| @@ -1364,7 +1353,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) | |||
| 1364 | u64 bitmap_bytes; | 1353 | u64 bitmap_bytes; |
| 1365 | u64 extent_bytes; | 1354 | u64 extent_bytes; |
| 1366 | u64 size = block_group->key.offset; | 1355 | u64 size = block_group->key.offset; |
| 1367 | u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize; | 1356 | u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; |
| 1368 | int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); | 1357 | int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); |
| 1369 | 1358 | ||
| 1370 | BUG_ON(ctl->total_bitmaps > max_bitmaps); | 1359 | BUG_ON(ctl->total_bitmaps > max_bitmaps); |
| @@ -1650,8 +1639,7 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl, | |||
| 1650 | * some block groups are so tiny they can't be enveloped by a bitmap, so | 1639 | * some block groups are so tiny they can't be enveloped by a bitmap, so |
| 1651 | * don't even bother to create a bitmap for this | 1640 | * don't even bother to create a bitmap for this |
| 1652 | */ | 1641 | */ |
| 1653 | if (BITS_PER_BITMAP * block_group->sectorsize > | 1642 | if (BITS_PER_BITMAP * ctl->unit > block_group->key.offset) |
| 1654 | block_group->key.offset) | ||
| 1655 | return false; | 1643 | return false; |
| 1656 | 1644 | ||
| 1657 | return true; | 1645 | return true; |
| @@ -1874,11 +1862,13 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | |||
| 1874 | { | 1862 | { |
| 1875 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; | 1863 | struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; |
| 1876 | struct btrfs_free_space *info; | 1864 | struct btrfs_free_space *info; |
| 1877 | int ret = 0; | 1865 | int ret; |
| 1866 | bool re_search = false; | ||
| 1878 | 1867 | ||
| 1879 | spin_lock(&ctl->tree_lock); | 1868 | spin_lock(&ctl->tree_lock); |
| 1880 | 1869 | ||
| 1881 | again: | 1870 | again: |
| 1871 | ret = 0; | ||
| 1882 | if (!bytes) | 1872 | if (!bytes) |
| 1883 | goto out_lock; | 1873 | goto out_lock; |
| 1884 | 1874 | ||
| @@ -1891,17 +1881,17 @@ again: | |||
| 1891 | info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), | 1881 | info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), |
| 1892 | 1, 0); | 1882 | 1, 0); |
| 1893 | if (!info) { | 1883 | if (!info) { |
| 1894 | /* the tree logging code might be calling us before we | 1884 | /* |
| 1895 | * have fully loaded the free space rbtree for this | 1885 | * If we found a partial bit of our free space in a |
| 1896 | * block group. So it is possible the entry won't | 1886 | * bitmap but then couldn't find the other part this may |
| 1897 | * be in the rbtree yet at all. The caching code | 1887 | * be a problem, so WARN about it. |
| 1898 | * will make sure not to put it in the rbtree if | ||
| 1899 | * the logging code has pinned it. | ||
| 1900 | */ | 1888 | */ |
| 1889 | WARN_ON(re_search); | ||
| 1901 | goto out_lock; | 1890 | goto out_lock; |
| 1902 | } | 1891 | } |
| 1903 | } | 1892 | } |
| 1904 | 1893 | ||
| 1894 | re_search = false; | ||
| 1905 | if (!info->bitmap) { | 1895 | if (!info->bitmap) { |
| 1906 | unlink_free_space(ctl, info); | 1896 | unlink_free_space(ctl, info); |
| 1907 | if (offset == info->offset) { | 1897 | if (offset == info->offset) { |
| @@ -1947,8 +1937,10 @@ again: | |||
| 1947 | } | 1937 | } |
| 1948 | 1938 | ||
| 1949 | ret = remove_from_bitmap(ctl, info, &offset, &bytes); | 1939 | ret = remove_from_bitmap(ctl, info, &offset, &bytes); |
| 1950 | if (ret == -EAGAIN) | 1940 | if (ret == -EAGAIN) { |
| 1941 | re_search = true; | ||
| 1951 | goto again; | 1942 | goto again; |
| 1943 | } | ||
| 1952 | BUG_ON(ret); /* logic error */ | 1944 | BUG_ON(ret); /* logic error */ |
| 1953 | out_lock: | 1945 | out_lock: |
| 1954 | spin_unlock(&ctl->tree_lock); | 1946 | spin_unlock(&ctl->tree_lock); |
| @@ -2298,10 +2290,10 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, | |||
| 2298 | unsigned long total_found = 0; | 2290 | unsigned long total_found = 0; |
| 2299 | int ret; | 2291 | int ret; |
| 2300 | 2292 | ||
| 2301 | i = offset_to_bit(entry->offset, block_group->sectorsize, | 2293 | i = offset_to_bit(entry->offset, ctl->unit, |
| 2302 | max_t(u64, offset, entry->offset)); | 2294 | max_t(u64, offset, entry->offset)); |
| 2303 | want_bits = bytes_to_bits(bytes, block_group->sectorsize); | 2295 | want_bits = bytes_to_bits(bytes, ctl->unit); |
| 2304 | min_bits = bytes_to_bits(min_bytes, block_group->sectorsize); | 2296 | min_bits = bytes_to_bits(min_bytes, ctl->unit); |
| 2305 | 2297 | ||
| 2306 | again: | 2298 | again: |
| 2307 | found_bits = 0; | 2299 | found_bits = 0; |
| @@ -2325,23 +2317,22 @@ again: | |||
| 2325 | 2317 | ||
| 2326 | total_found += found_bits; | 2318 | total_found += found_bits; |
| 2327 | 2319 | ||
| 2328 | if (cluster->max_size < found_bits * block_group->sectorsize) | 2320 | if (cluster->max_size < found_bits * ctl->unit) |
| 2329 | cluster->max_size = found_bits * block_group->sectorsize; | 2321 | cluster->max_size = found_bits * ctl->unit; |
| 2330 | 2322 | ||
| 2331 | if (total_found < want_bits || cluster->max_size < cont1_bytes) { | 2323 | if (total_found < want_bits || cluster->max_size < cont1_bytes) { |
| 2332 | i = next_zero + 1; | 2324 | i = next_zero + 1; |
| 2333 | goto again; | 2325 | goto again; |
| 2334 | } | 2326 | } |
| 2335 | 2327 | ||
| 2336 | cluster->window_start = start * block_group->sectorsize + | 2328 | cluster->window_start = start * ctl->unit + entry->offset; |
| 2337 | entry->offset; | ||
| 2338 | rb_erase(&entry->offset_index, &ctl->free_space_offset); | 2329 | rb_erase(&entry->offset_index, &ctl->free_space_offset); |
| 2339 | ret = tree_insert_offset(&cluster->root, entry->offset, | 2330 | ret = tree_insert_offset(&cluster->root, entry->offset, |
| 2340 | &entry->offset_index, 1); | 2331 | &entry->offset_index, 1); |
| 2341 | BUG_ON(ret); /* -EEXIST; Logic error */ | 2332 | BUG_ON(ret); /* -EEXIST; Logic error */ |
| 2342 | 2333 | ||
| 2343 | trace_btrfs_setup_cluster(block_group, cluster, | 2334 | trace_btrfs_setup_cluster(block_group, cluster, |
| 2344 | total_found * block_group->sectorsize, 1); | 2335 | total_found * ctl->unit, 1); |
| 2345 | return 0; | 2336 | return 0; |
| 2346 | } | 2337 | } |
| 2347 | 2338 | ||
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index b1a1c929ba80..d26f67a59e36 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
| @@ -434,8 +434,9 @@ int btrfs_save_ino_cache(struct btrfs_root *root, | |||
| 434 | * 3 items for pre-allocation | 434 | * 3 items for pre-allocation |
| 435 | */ | 435 | */ |
| 436 | trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 8); | 436 | trans->bytes_reserved = btrfs_calc_trans_metadata_size(root, 8); |
| 437 | ret = btrfs_block_rsv_add_noflush(root, trans->block_rsv, | 437 | ret = btrfs_block_rsv_add(root, trans->block_rsv, |
| 438 | trans->bytes_reserved); | 438 | trans->bytes_reserved, |
| 439 | BTRFS_RESERVE_NO_FLUSH); | ||
| 439 | if (ret) | 440 | if (ret) |
| 440 | goto out; | 441 | goto out; |
| 441 | trace_btrfs_space_reservation(root->fs_info, "ino_cache", | 442 | trace_btrfs_space_reservation(root->fs_info, "ino_cache", |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 95542a1b3dfc..cc93b23ca352 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -71,6 +71,7 @@ static const struct file_operations btrfs_dir_file_operations; | |||
| 71 | static struct extent_io_ops btrfs_extent_io_ops; | 71 | static struct extent_io_ops btrfs_extent_io_ops; |
| 72 | 72 | ||
| 73 | static struct kmem_cache *btrfs_inode_cachep; | 73 | static struct kmem_cache *btrfs_inode_cachep; |
| 74 | static struct kmem_cache *btrfs_delalloc_work_cachep; | ||
| 74 | struct kmem_cache *btrfs_trans_handle_cachep; | 75 | struct kmem_cache *btrfs_trans_handle_cachep; |
| 75 | struct kmem_cache *btrfs_transaction_cachep; | 76 | struct kmem_cache *btrfs_transaction_cachep; |
| 76 | struct kmem_cache *btrfs_path_cachep; | 77 | struct kmem_cache *btrfs_path_cachep; |
| @@ -87,13 +88,17 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = { | |||
| 87 | [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, | 88 | [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, |
| 88 | }; | 89 | }; |
| 89 | 90 | ||
| 90 | static int btrfs_setsize(struct inode *inode, loff_t newsize); | 91 | static int btrfs_setsize(struct inode *inode, struct iattr *attr); |
| 91 | static int btrfs_truncate(struct inode *inode); | 92 | static int btrfs_truncate(struct inode *inode); |
| 92 | static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent); | 93 | static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent); |
| 93 | static noinline int cow_file_range(struct inode *inode, | 94 | static noinline int cow_file_range(struct inode *inode, |
| 94 | struct page *locked_page, | 95 | struct page *locked_page, |
| 95 | u64 start, u64 end, int *page_started, | 96 | u64 start, u64 end, int *page_started, |
| 96 | unsigned long *nr_written, int unlock); | 97 | unsigned long *nr_written, int unlock); |
| 98 | static struct extent_map *create_pinned_em(struct inode *inode, u64 start, | ||
| 99 | u64 len, u64 orig_start, | ||
| 100 | u64 block_start, u64 block_len, | ||
| 101 | u64 orig_block_len, int type); | ||
| 97 | 102 | ||
| 98 | static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, | 103 | static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, |
| 99 | struct inode *inode, struct inode *dir, | 104 | struct inode *inode, struct inode *dir, |
| @@ -698,14 +703,19 @@ retry: | |||
| 698 | 703 | ||
| 699 | em->block_start = ins.objectid; | 704 | em->block_start = ins.objectid; |
| 700 | em->block_len = ins.offset; | 705 | em->block_len = ins.offset; |
| 706 | em->orig_block_len = ins.offset; | ||
| 701 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 707 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
| 702 | em->compress_type = async_extent->compress_type; | 708 | em->compress_type = async_extent->compress_type; |
| 703 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 709 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
| 704 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 710 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
| 711 | em->generation = -1; | ||
| 705 | 712 | ||
| 706 | while (1) { | 713 | while (1) { |
| 707 | write_lock(&em_tree->lock); | 714 | write_lock(&em_tree->lock); |
| 708 | ret = add_extent_mapping(em_tree, em); | 715 | ret = add_extent_mapping(em_tree, em); |
| 716 | if (!ret) | ||
| 717 | list_move(&em->list, | ||
| 718 | &em_tree->modified_extents); | ||
| 709 | write_unlock(&em_tree->lock); | 719 | write_unlock(&em_tree->lock); |
| 710 | if (ret != -EEXIST) { | 720 | if (ret != -EEXIST) { |
| 711 | free_extent_map(em); | 721 | free_extent_map(em); |
| @@ -803,14 +813,14 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start, | |||
| 803 | * required to start IO on it. It may be clean and already done with | 813 | * required to start IO on it. It may be clean and already done with |
| 804 | * IO when we return. | 814 | * IO when we return. |
| 805 | */ | 815 | */ |
| 806 | static noinline int cow_file_range(struct inode *inode, | 816 | static noinline int __cow_file_range(struct btrfs_trans_handle *trans, |
| 807 | struct page *locked_page, | 817 | struct inode *inode, |
| 808 | u64 start, u64 end, int *page_started, | 818 | struct btrfs_root *root, |
| 809 | unsigned long *nr_written, | 819 | struct page *locked_page, |
| 810 | int unlock) | 820 | u64 start, u64 end, int *page_started, |
| 821 | unsigned long *nr_written, | ||
| 822 | int unlock) | ||
| 811 | { | 823 | { |
| 812 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 813 | struct btrfs_trans_handle *trans; | ||
| 814 | u64 alloc_hint = 0; | 824 | u64 alloc_hint = 0; |
| 815 | u64 num_bytes; | 825 | u64 num_bytes; |
| 816 | unsigned long ram_size; | 826 | unsigned long ram_size; |
| @@ -823,25 +833,10 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 823 | int ret = 0; | 833 | int ret = 0; |
| 824 | 834 | ||
| 825 | BUG_ON(btrfs_is_free_space_inode(inode)); | 835 | BUG_ON(btrfs_is_free_space_inode(inode)); |
| 826 | trans = btrfs_join_transaction(root); | ||
| 827 | if (IS_ERR(trans)) { | ||
| 828 | extent_clear_unlock_delalloc(inode, | ||
| 829 | &BTRFS_I(inode)->io_tree, | ||
| 830 | start, end, locked_page, | ||
| 831 | EXTENT_CLEAR_UNLOCK_PAGE | | ||
| 832 | EXTENT_CLEAR_UNLOCK | | ||
| 833 | EXTENT_CLEAR_DELALLOC | | ||
| 834 | EXTENT_CLEAR_DIRTY | | ||
| 835 | EXTENT_SET_WRITEBACK | | ||
| 836 | EXTENT_END_WRITEBACK); | ||
| 837 | return PTR_ERR(trans); | ||
| 838 | } | ||
| 839 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
| 840 | 836 | ||
| 841 | num_bytes = (end - start + blocksize) & ~(blocksize - 1); | 837 | num_bytes = (end - start + blocksize) & ~(blocksize - 1); |
| 842 | num_bytes = max(blocksize, num_bytes); | 838 | num_bytes = max(blocksize, num_bytes); |
| 843 | disk_num_bytes = num_bytes; | 839 | disk_num_bytes = num_bytes; |
| 844 | ret = 0; | ||
| 845 | 840 | ||
| 846 | /* if this is a small write inside eof, kick off defrag */ | 841 | /* if this is a small write inside eof, kick off defrag */ |
| 847 | if (num_bytes < 64 * 1024 && | 842 | if (num_bytes < 64 * 1024 && |
| @@ -900,12 +895,17 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 900 | 895 | ||
| 901 | em->block_start = ins.objectid; | 896 | em->block_start = ins.objectid; |
| 902 | em->block_len = ins.offset; | 897 | em->block_len = ins.offset; |
| 898 | em->orig_block_len = ins.offset; | ||
| 903 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 899 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
| 904 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 900 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
| 901 | em->generation = -1; | ||
| 905 | 902 | ||
| 906 | while (1) { | 903 | while (1) { |
| 907 | write_lock(&em_tree->lock); | 904 | write_lock(&em_tree->lock); |
| 908 | ret = add_extent_mapping(em_tree, em); | 905 | ret = add_extent_mapping(em_tree, em); |
| 906 | if (!ret) | ||
| 907 | list_move(&em->list, | ||
| 908 | &em_tree->modified_extents); | ||
| 909 | write_unlock(&em_tree->lock); | 909 | write_unlock(&em_tree->lock); |
| 910 | if (ret != -EEXIST) { | 910 | if (ret != -EEXIST) { |
| 911 | free_extent_map(em); | 911 | free_extent_map(em); |
| @@ -952,11 +952,9 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 952 | alloc_hint = ins.objectid + ins.offset; | 952 | alloc_hint = ins.objectid + ins.offset; |
| 953 | start += cur_alloc_size; | 953 | start += cur_alloc_size; |
| 954 | } | 954 | } |
| 955 | ret = 0; | ||
| 956 | out: | 955 | out: |
| 957 | btrfs_end_transaction(trans, root); | ||
| 958 | |||
| 959 | return ret; | 956 | return ret; |
| 957 | |||
| 960 | out_unlock: | 958 | out_unlock: |
| 961 | extent_clear_unlock_delalloc(inode, | 959 | extent_clear_unlock_delalloc(inode, |
| 962 | &BTRFS_I(inode)->io_tree, | 960 | &BTRFS_I(inode)->io_tree, |
| @@ -971,6 +969,39 @@ out_unlock: | |||
| 971 | goto out; | 969 | goto out; |
| 972 | } | 970 | } |
| 973 | 971 | ||
| 972 | static noinline int cow_file_range(struct inode *inode, | ||
| 973 | struct page *locked_page, | ||
| 974 | u64 start, u64 end, int *page_started, | ||
| 975 | unsigned long *nr_written, | ||
| 976 | int unlock) | ||
| 977 | { | ||
| 978 | struct btrfs_trans_handle *trans; | ||
| 979 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 980 | int ret; | ||
| 981 | |||
| 982 | trans = btrfs_join_transaction(root); | ||
| 983 | if (IS_ERR(trans)) { | ||
| 984 | extent_clear_unlock_delalloc(inode, | ||
| 985 | &BTRFS_I(inode)->io_tree, | ||
| 986 | start, end, locked_page, | ||
| 987 | EXTENT_CLEAR_UNLOCK_PAGE | | ||
| 988 | EXTENT_CLEAR_UNLOCK | | ||
| 989 | EXTENT_CLEAR_DELALLOC | | ||
| 990 | EXTENT_CLEAR_DIRTY | | ||
| 991 | EXTENT_SET_WRITEBACK | | ||
| 992 | EXTENT_END_WRITEBACK); | ||
| 993 | return PTR_ERR(trans); | ||
| 994 | } | ||
| 995 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
| 996 | |||
| 997 | ret = __cow_file_range(trans, inode, root, locked_page, start, end, | ||
| 998 | page_started, nr_written, unlock); | ||
| 999 | |||
| 1000 | btrfs_end_transaction(trans, root); | ||
| 1001 | |||
| 1002 | return ret; | ||
| 1003 | } | ||
| 1004 | |||
| 974 | /* | 1005 | /* |
| 975 | * work queue call back to started compression on a file and pages | 1006 | * work queue call back to started compression on a file and pages |
| 976 | */ | 1007 | */ |
| @@ -1126,6 +1157,7 @@ static noinline int run_delalloc_nocow(struct inode *inode, | |||
| 1126 | u64 extent_offset; | 1157 | u64 extent_offset; |
| 1127 | u64 disk_bytenr; | 1158 | u64 disk_bytenr; |
| 1128 | u64 num_bytes; | 1159 | u64 num_bytes; |
| 1160 | u64 disk_num_bytes; | ||
| 1129 | int extent_type; | 1161 | int extent_type; |
| 1130 | int ret, err; | 1162 | int ret, err; |
| 1131 | int type; | 1163 | int type; |
| @@ -1228,6 +1260,8 @@ next_slot: | |||
| 1228 | extent_offset = btrfs_file_extent_offset(leaf, fi); | 1260 | extent_offset = btrfs_file_extent_offset(leaf, fi); |
| 1229 | extent_end = found_key.offset + | 1261 | extent_end = found_key.offset + |
| 1230 | btrfs_file_extent_num_bytes(leaf, fi); | 1262 | btrfs_file_extent_num_bytes(leaf, fi); |
| 1263 | disk_num_bytes = | ||
| 1264 | btrfs_file_extent_disk_num_bytes(leaf, fi); | ||
| 1231 | if (extent_end <= start) { | 1265 | if (extent_end <= start) { |
| 1232 | path->slots[0]++; | 1266 | path->slots[0]++; |
| 1233 | goto next_slot; | 1267 | goto next_slot; |
| @@ -1281,9 +1315,9 @@ out_check: | |||
| 1281 | 1315 | ||
| 1282 | btrfs_release_path(path); | 1316 | btrfs_release_path(path); |
| 1283 | if (cow_start != (u64)-1) { | 1317 | if (cow_start != (u64)-1) { |
| 1284 | ret = cow_file_range(inode, locked_page, cow_start, | 1318 | ret = __cow_file_range(trans, inode, root, locked_page, |
| 1285 | found_key.offset - 1, page_started, | 1319 | cow_start, found_key.offset - 1, |
| 1286 | nr_written, 1); | 1320 | page_started, nr_written, 1); |
| 1287 | if (ret) { | 1321 | if (ret) { |
| 1288 | btrfs_abort_transaction(trans, root, ret); | 1322 | btrfs_abort_transaction(trans, root, ret); |
| 1289 | goto error; | 1323 | goto error; |
| @@ -1298,16 +1332,21 @@ out_check: | |||
| 1298 | em = alloc_extent_map(); | 1332 | em = alloc_extent_map(); |
| 1299 | BUG_ON(!em); /* -ENOMEM */ | 1333 | BUG_ON(!em); /* -ENOMEM */ |
| 1300 | em->start = cur_offset; | 1334 | em->start = cur_offset; |
| 1301 | em->orig_start = em->start; | 1335 | em->orig_start = found_key.offset - extent_offset; |
| 1302 | em->len = num_bytes; | 1336 | em->len = num_bytes; |
| 1303 | em->block_len = num_bytes; | 1337 | em->block_len = num_bytes; |
| 1304 | em->block_start = disk_bytenr; | 1338 | em->block_start = disk_bytenr; |
| 1339 | em->orig_block_len = disk_num_bytes; | ||
| 1305 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 1340 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
| 1306 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 1341 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
| 1307 | set_bit(EXTENT_FLAG_PREALLOC, &em->flags); | 1342 | set_bit(EXTENT_FLAG_FILLING, &em->flags); |
| 1343 | em->generation = -1; | ||
| 1308 | while (1) { | 1344 | while (1) { |
| 1309 | write_lock(&em_tree->lock); | 1345 | write_lock(&em_tree->lock); |
| 1310 | ret = add_extent_mapping(em_tree, em); | 1346 | ret = add_extent_mapping(em_tree, em); |
| 1347 | if (!ret) | ||
| 1348 | list_move(&em->list, | ||
| 1349 | &em_tree->modified_extents); | ||
| 1311 | write_unlock(&em_tree->lock); | 1350 | write_unlock(&em_tree->lock); |
| 1312 | if (ret != -EEXIST) { | 1351 | if (ret != -EEXIST) { |
| 1313 | free_extent_map(em); | 1352 | free_extent_map(em); |
| @@ -1352,8 +1391,9 @@ out_check: | |||
| 1352 | } | 1391 | } |
| 1353 | 1392 | ||
| 1354 | if (cow_start != (u64)-1) { | 1393 | if (cow_start != (u64)-1) { |
| 1355 | ret = cow_file_range(inode, locked_page, cow_start, end, | 1394 | ret = __cow_file_range(trans, inode, root, locked_page, |
| 1356 | page_started, nr_written, 1); | 1395 | cow_start, end, |
| 1396 | page_started, nr_written, 1); | ||
| 1357 | if (ret) { | 1397 | if (ret) { |
| 1358 | btrfs_abort_transaction(trans, root, ret); | 1398 | btrfs_abort_transaction(trans, root, ret); |
| 1359 | goto error; | 1399 | goto error; |
| @@ -1531,7 +1571,6 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | |||
| 1531 | unsigned long bio_flags) | 1571 | unsigned long bio_flags) |
| 1532 | { | 1572 | { |
| 1533 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | 1573 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; |
| 1534 | struct btrfs_mapping_tree *map_tree; | ||
| 1535 | u64 logical = (u64)bio->bi_sector << 9; | 1574 | u64 logical = (u64)bio->bi_sector << 9; |
| 1536 | u64 length = 0; | 1575 | u64 length = 0; |
| 1537 | u64 map_length; | 1576 | u64 map_length; |
| @@ -1541,11 +1580,10 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | |||
| 1541 | return 0; | 1580 | return 0; |
| 1542 | 1581 | ||
| 1543 | length = bio->bi_size; | 1582 | length = bio->bi_size; |
| 1544 | map_tree = &root->fs_info->mapping_tree; | ||
| 1545 | map_length = length; | 1583 | map_length = length; |
| 1546 | ret = btrfs_map_block(map_tree, READ, logical, | 1584 | ret = btrfs_map_block(root->fs_info, READ, logical, |
| 1547 | &map_length, NULL, 0); | 1585 | &map_length, NULL, 0); |
| 1548 | /* Will always return 0 or 1 with map_multi == NULL */ | 1586 | /* Will always return 0 with map_multi == NULL */ |
| 1549 | BUG_ON(ret < 0); | 1587 | BUG_ON(ret < 0); |
| 1550 | if (map_length < length + size) | 1588 | if (map_length < length + size) |
| 1551 | return 1; | 1589 | return 1; |
| @@ -1586,7 +1624,12 @@ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | |||
| 1586 | u64 bio_offset) | 1624 | u64 bio_offset) |
| 1587 | { | 1625 | { |
| 1588 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1626 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1589 | return btrfs_map_bio(root, rw, bio, mirror_num, 1); | 1627 | int ret; |
| 1628 | |||
| 1629 | ret = btrfs_map_bio(root, rw, bio, mirror_num, 1); | ||
| 1630 | if (ret) | ||
| 1631 | bio_endio(bio, ret); | ||
| 1632 | return ret; | ||
| 1590 | } | 1633 | } |
| 1591 | 1634 | ||
| 1592 | /* | 1635 | /* |
| @@ -1601,6 +1644,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
| 1601 | int ret = 0; | 1644 | int ret = 0; |
| 1602 | int skip_sum; | 1645 | int skip_sum; |
| 1603 | int metadata = 0; | 1646 | int metadata = 0; |
| 1647 | int async = !atomic_read(&BTRFS_I(inode)->sync_writers); | ||
| 1604 | 1648 | ||
| 1605 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | 1649 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; |
| 1606 | 1650 | ||
| @@ -1610,31 +1654,43 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
| 1610 | if (!(rw & REQ_WRITE)) { | 1654 | if (!(rw & REQ_WRITE)) { |
| 1611 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata); | 1655 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, metadata); |
| 1612 | if (ret) | 1656 | if (ret) |
| 1613 | return ret; | 1657 | goto out; |
| 1614 | 1658 | ||
| 1615 | if (bio_flags & EXTENT_BIO_COMPRESSED) { | 1659 | if (bio_flags & EXTENT_BIO_COMPRESSED) { |
| 1616 | return btrfs_submit_compressed_read(inode, bio, | 1660 | ret = btrfs_submit_compressed_read(inode, bio, |
| 1617 | mirror_num, bio_flags); | 1661 | mirror_num, |
| 1662 | bio_flags); | ||
| 1663 | goto out; | ||
| 1618 | } else if (!skip_sum) { | 1664 | } else if (!skip_sum) { |
| 1619 | ret = btrfs_lookup_bio_sums(root, inode, bio, NULL); | 1665 | ret = btrfs_lookup_bio_sums(root, inode, bio, NULL); |
| 1620 | if (ret) | 1666 | if (ret) |
| 1621 | return ret; | 1667 | goto out; |
| 1622 | } | 1668 | } |
| 1623 | goto mapit; | 1669 | goto mapit; |
| 1624 | } else if (!skip_sum) { | 1670 | } else if (async && !skip_sum) { |
| 1625 | /* csum items have already been cloned */ | 1671 | /* csum items have already been cloned */ |
| 1626 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) | 1672 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) |
| 1627 | goto mapit; | 1673 | goto mapit; |
| 1628 | /* we're doing a write, do the async checksumming */ | 1674 | /* we're doing a write, do the async checksumming */ |
| 1629 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | 1675 | ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, |
| 1630 | inode, rw, bio, mirror_num, | 1676 | inode, rw, bio, mirror_num, |
| 1631 | bio_flags, bio_offset, | 1677 | bio_flags, bio_offset, |
| 1632 | __btrfs_submit_bio_start, | 1678 | __btrfs_submit_bio_start, |
| 1633 | __btrfs_submit_bio_done); | 1679 | __btrfs_submit_bio_done); |
| 1680 | goto out; | ||
| 1681 | } else if (!skip_sum) { | ||
| 1682 | ret = btrfs_csum_one_bio(root, inode, bio, 0, 0); | ||
| 1683 | if (ret) | ||
| 1684 | goto out; | ||
| 1634 | } | 1685 | } |
| 1635 | 1686 | ||
| 1636 | mapit: | 1687 | mapit: |
| 1637 | return btrfs_map_bio(root, rw, bio, mirror_num, 0); | 1688 | ret = btrfs_map_bio(root, rw, bio, mirror_num, 0); |
| 1689 | |||
| 1690 | out: | ||
| 1691 | if (ret < 0) | ||
| 1692 | bio_endio(bio, ret); | ||
| 1693 | return ret; | ||
| 1638 | } | 1694 | } |
| 1639 | 1695 | ||
| 1640 | /* | 1696 | /* |
| @@ -1657,8 +1713,7 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans, | |||
| 1657 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, | 1713 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, |
| 1658 | struct extent_state **cached_state) | 1714 | struct extent_state **cached_state) |
| 1659 | { | 1715 | { |
| 1660 | if ((end & (PAGE_CACHE_SIZE - 1)) == 0) | 1716 | WARN_ON((end & (PAGE_CACHE_SIZE - 1)) == 0); |
| 1661 | WARN_ON(1); | ||
| 1662 | return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, | 1717 | return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, |
| 1663 | cached_state, GFP_NOFS); | 1718 | cached_state, GFP_NOFS); |
| 1664 | } | 1719 | } |
| @@ -1867,22 +1922,20 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) | |||
| 1867 | 1922 | ||
| 1868 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { | 1923 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { |
| 1869 | BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */ | 1924 | BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */ |
| 1870 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1925 | btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
| 1871 | if (!ret) { | 1926 | if (nolock) |
| 1872 | if (nolock) | 1927 | trans = btrfs_join_transaction_nolock(root); |
| 1873 | trans = btrfs_join_transaction_nolock(root); | 1928 | else |
| 1874 | else | 1929 | trans = btrfs_join_transaction(root); |
| 1875 | trans = btrfs_join_transaction(root); | 1930 | if (IS_ERR(trans)) { |
| 1876 | if (IS_ERR(trans)) { | 1931 | ret = PTR_ERR(trans); |
| 1877 | ret = PTR_ERR(trans); | 1932 | trans = NULL; |
| 1878 | trans = NULL; | 1933 | goto out; |
| 1879 | goto out; | ||
| 1880 | } | ||
| 1881 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
| 1882 | ret = btrfs_update_inode_fallback(trans, root, inode); | ||
| 1883 | if (ret) /* -ENOMEM or corruption */ | ||
| 1884 | btrfs_abort_transaction(trans, root, ret); | ||
| 1885 | } | 1934 | } |
| 1935 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
| 1936 | ret = btrfs_update_inode_fallback(trans, root, inode); | ||
| 1937 | if (ret) /* -ENOMEM or corruption */ | ||
| 1938 | btrfs_abort_transaction(trans, root, ret); | ||
| 1886 | goto out; | 1939 | goto out; |
| 1887 | } | 1940 | } |
| 1888 | 1941 | ||
| @@ -1931,15 +1984,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) | |||
| 1931 | add_pending_csums(trans, inode, ordered_extent->file_offset, | 1984 | add_pending_csums(trans, inode, ordered_extent->file_offset, |
| 1932 | &ordered_extent->list); | 1985 | &ordered_extent->list); |
| 1933 | 1986 | ||
| 1934 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1987 | btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
| 1935 | if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { | 1988 | ret = btrfs_update_inode_fallback(trans, root, inode); |
| 1936 | ret = btrfs_update_inode_fallback(trans, root, inode); | 1989 | if (ret) { /* -ENOMEM or corruption */ |
| 1937 | if (ret) { /* -ENOMEM or corruption */ | 1990 | btrfs_abort_transaction(trans, root, ret); |
| 1938 | btrfs_abort_transaction(trans, root, ret); | 1991 | goto out_unlock; |
| 1939 | goto out_unlock; | ||
| 1940 | } | ||
| 1941 | } else { | ||
| 1942 | btrfs_set_inode_last_trans(trans, inode); | ||
| 1943 | } | 1992 | } |
| 1944 | ret = 0; | 1993 | ret = 0; |
| 1945 | out_unlock: | 1994 | out_unlock: |
| @@ -2429,6 +2478,18 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
| 2429 | continue; | 2478 | continue; |
| 2430 | } | 2479 | } |
| 2431 | nr_truncate++; | 2480 | nr_truncate++; |
| 2481 | |||
| 2482 | /* 1 for the orphan item deletion. */ | ||
| 2483 | trans = btrfs_start_transaction(root, 1); | ||
| 2484 | if (IS_ERR(trans)) { | ||
| 2485 | ret = PTR_ERR(trans); | ||
| 2486 | goto out; | ||
| 2487 | } | ||
| 2488 | ret = btrfs_orphan_add(trans, inode); | ||
| 2489 | btrfs_end_transaction(trans, root); | ||
| 2490 | if (ret) | ||
| 2491 | goto out; | ||
| 2492 | |||
| 2432 | ret = btrfs_truncate(inode); | 2493 | ret = btrfs_truncate(inode); |
| 2433 | } else { | 2494 | } else { |
| 2434 | nr_unlink++; | 2495 | nr_unlink++; |
| @@ -3074,7 +3135,6 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
| 3074 | struct btrfs_trans_handle *trans; | 3135 | struct btrfs_trans_handle *trans; |
| 3075 | struct inode *inode = dentry->d_inode; | 3136 | struct inode *inode = dentry->d_inode; |
| 3076 | int ret; | 3137 | int ret; |
| 3077 | unsigned long nr = 0; | ||
| 3078 | 3138 | ||
| 3079 | trans = __unlink_start_trans(dir, dentry); | 3139 | trans = __unlink_start_trans(dir, dentry); |
| 3080 | if (IS_ERR(trans)) | 3140 | if (IS_ERR(trans)) |
| @@ -3094,9 +3154,8 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
| 3094 | } | 3154 | } |
| 3095 | 3155 | ||
| 3096 | out: | 3156 | out: |
| 3097 | nr = trans->blocks_used; | ||
| 3098 | __unlink_end_trans(trans, root); | 3157 | __unlink_end_trans(trans, root); |
| 3099 | btrfs_btree_balance_dirty(root, nr); | 3158 | btrfs_btree_balance_dirty(root); |
| 3100 | return ret; | 3159 | return ret; |
| 3101 | } | 3160 | } |
| 3102 | 3161 | ||
| @@ -3186,7 +3245,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 3186 | int err = 0; | 3245 | int err = 0; |
| 3187 | struct btrfs_root *root = BTRFS_I(dir)->root; | 3246 | struct btrfs_root *root = BTRFS_I(dir)->root; |
| 3188 | struct btrfs_trans_handle *trans; | 3247 | struct btrfs_trans_handle *trans; |
| 3189 | unsigned long nr = 0; | ||
| 3190 | 3248 | ||
| 3191 | if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) | 3249 | if (inode->i_size > BTRFS_EMPTY_DIR_SIZE) |
| 3192 | return -ENOTEMPTY; | 3250 | return -ENOTEMPTY; |
| @@ -3215,9 +3273,8 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 3215 | if (!err) | 3273 | if (!err) |
| 3216 | btrfs_i_size_write(inode, 0); | 3274 | btrfs_i_size_write(inode, 0); |
| 3217 | out: | 3275 | out: |
| 3218 | nr = trans->blocks_used; | ||
| 3219 | __unlink_end_trans(trans, root); | 3276 | __unlink_end_trans(trans, root); |
| 3220 | btrfs_btree_balance_dirty(root, nr); | 3277 | btrfs_btree_balance_dirty(root); |
| 3221 | 3278 | ||
| 3222 | return err; | 3279 | return err; |
| 3223 | } | 3280 | } |
| @@ -3497,11 +3554,11 @@ int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len, | |||
| 3497 | if (ret) | 3554 | if (ret) |
| 3498 | goto out; | 3555 | goto out; |
| 3499 | 3556 | ||
| 3500 | ret = -ENOMEM; | ||
| 3501 | again: | 3557 | again: |
| 3502 | page = find_or_create_page(mapping, index, mask); | 3558 | page = find_or_create_page(mapping, index, mask); |
| 3503 | if (!page) { | 3559 | if (!page) { |
| 3504 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | 3560 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); |
| 3561 | ret = -ENOMEM; | ||
| 3505 | goto out; | 3562 | goto out; |
| 3506 | } | 3563 | } |
| 3507 | 3564 | ||
| @@ -3550,7 +3607,6 @@ again: | |||
| 3550 | goto out_unlock; | 3607 | goto out_unlock; |
| 3551 | } | 3608 | } |
| 3552 | 3609 | ||
| 3553 | ret = 0; | ||
| 3554 | if (offset != PAGE_CACHE_SIZE) { | 3610 | if (offset != PAGE_CACHE_SIZE) { |
| 3555 | if (!len) | 3611 | if (!len) |
| 3556 | len = PAGE_CACHE_SIZE - offset; | 3612 | len = PAGE_CACHE_SIZE - offset; |
| @@ -3621,6 +3677,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | |||
| 3621 | block_end - cur_offset, 0); | 3677 | block_end - cur_offset, 0); |
| 3622 | if (IS_ERR(em)) { | 3678 | if (IS_ERR(em)) { |
| 3623 | err = PTR_ERR(em); | 3679 | err = PTR_ERR(em); |
| 3680 | em = NULL; | ||
| 3624 | break; | 3681 | break; |
| 3625 | } | 3682 | } |
| 3626 | last_byte = min(extent_map_end(em), block_end); | 3683 | last_byte = min(extent_map_end(em), block_end); |
| @@ -3668,6 +3725,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) | |||
| 3668 | 3725 | ||
| 3669 | hole_em->block_start = EXTENT_MAP_HOLE; | 3726 | hole_em->block_start = EXTENT_MAP_HOLE; |
| 3670 | hole_em->block_len = 0; | 3727 | hole_em->block_len = 0; |
| 3728 | hole_em->orig_block_len = 0; | ||
| 3671 | hole_em->bdev = root->fs_info->fs_devices->latest_bdev; | 3729 | hole_em->bdev = root->fs_info->fs_devices->latest_bdev; |
| 3672 | hole_em->compress_type = BTRFS_COMPRESS_NONE; | 3730 | hole_em->compress_type = BTRFS_COMPRESS_NONE; |
| 3673 | hole_em->generation = trans->transid; | 3731 | hole_em->generation = trans->transid; |
| @@ -3703,16 +3761,27 @@ next: | |||
| 3703 | return err; | 3761 | return err; |
| 3704 | } | 3762 | } |
| 3705 | 3763 | ||
| 3706 | static int btrfs_setsize(struct inode *inode, loff_t newsize) | 3764 | static int btrfs_setsize(struct inode *inode, struct iattr *attr) |
| 3707 | { | 3765 | { |
| 3708 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3766 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 3709 | struct btrfs_trans_handle *trans; | 3767 | struct btrfs_trans_handle *trans; |
| 3710 | loff_t oldsize = i_size_read(inode); | 3768 | loff_t oldsize = i_size_read(inode); |
| 3769 | loff_t newsize = attr->ia_size; | ||
| 3770 | int mask = attr->ia_valid; | ||
| 3711 | int ret; | 3771 | int ret; |
| 3712 | 3772 | ||
| 3713 | if (newsize == oldsize) | 3773 | if (newsize == oldsize) |
| 3714 | return 0; | 3774 | return 0; |
| 3715 | 3775 | ||
| 3776 | /* | ||
| 3777 | * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a | ||
| 3778 | * special case where we need to update the times despite not having | ||
| 3779 | * these flags set. For all other operations the VFS set these flags | ||
| 3780 | * explicitly if it wants a timestamp update. | ||
| 3781 | */ | ||
| 3782 | if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME)))) | ||
| 3783 | inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb); | ||
| 3784 | |||
| 3716 | if (newsize > oldsize) { | 3785 | if (newsize > oldsize) { |
| 3717 | truncate_pagecache(inode, oldsize, newsize); | 3786 | truncate_pagecache(inode, oldsize, newsize); |
| 3718 | ret = btrfs_cont_expand(inode, oldsize, newsize); | 3787 | ret = btrfs_cont_expand(inode, oldsize, newsize); |
| @@ -3738,9 +3807,34 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize) | |||
| 3738 | set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, | 3807 | set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, |
| 3739 | &BTRFS_I(inode)->runtime_flags); | 3808 | &BTRFS_I(inode)->runtime_flags); |
| 3740 | 3809 | ||
| 3810 | /* | ||
| 3811 | * 1 for the orphan item we're going to add | ||
| 3812 | * 1 for the orphan item deletion. | ||
| 3813 | */ | ||
| 3814 | trans = btrfs_start_transaction(root, 2); | ||
| 3815 | if (IS_ERR(trans)) | ||
| 3816 | return PTR_ERR(trans); | ||
| 3817 | |||
| 3818 | /* | ||
| 3819 | * We need to do this in case we fail at _any_ point during the | ||
| 3820 | * actual truncate. Once we do the truncate_setsize we could | ||
| 3821 | * invalidate pages which forces any outstanding ordered io to | ||
| 3822 | * be instantly completed which will give us extents that need | ||
| 3823 | * to be truncated. If we fail to get an orphan inode down we | ||
| 3824 | * could have left over extents that were never meant to live, | ||
| 3825 | * so we need to garuntee from this point on that everything | ||
| 3826 | * will be consistent. | ||
| 3827 | */ | ||
| 3828 | ret = btrfs_orphan_add(trans, inode); | ||
| 3829 | btrfs_end_transaction(trans, root); | ||
| 3830 | if (ret) | ||
| 3831 | return ret; | ||
| 3832 | |||
| 3741 | /* we don't support swapfiles, so vmtruncate shouldn't fail */ | 3833 | /* we don't support swapfiles, so vmtruncate shouldn't fail */ |
| 3742 | truncate_setsize(inode, newsize); | 3834 | truncate_setsize(inode, newsize); |
| 3743 | ret = btrfs_truncate(inode); | 3835 | ret = btrfs_truncate(inode); |
| 3836 | if (ret && inode->i_nlink) | ||
| 3837 | btrfs_orphan_del(NULL, inode); | ||
| 3744 | } | 3838 | } |
| 3745 | 3839 | ||
| 3746 | return ret; | 3840 | return ret; |
| @@ -3760,7 +3854,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 3760 | return err; | 3854 | return err; |
| 3761 | 3855 | ||
| 3762 | if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { | 3856 | if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { |
| 3763 | err = btrfs_setsize(inode, attr->ia_size); | 3857 | err = btrfs_setsize(inode, attr); |
| 3764 | if (err) | 3858 | if (err) |
| 3765 | return err; | 3859 | return err; |
| 3766 | } | 3860 | } |
| @@ -3783,7 +3877,6 @@ void btrfs_evict_inode(struct inode *inode) | |||
| 3783 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3877 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 3784 | struct btrfs_block_rsv *rsv, *global_rsv; | 3878 | struct btrfs_block_rsv *rsv, *global_rsv; |
| 3785 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); | 3879 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); |
| 3786 | unsigned long nr; | ||
| 3787 | int ret; | 3880 | int ret; |
| 3788 | 3881 | ||
| 3789 | trace_btrfs_inode_evict(inode); | 3882 | trace_btrfs_inode_evict(inode); |
| @@ -3829,7 +3922,8 @@ void btrfs_evict_inode(struct inode *inode) | |||
| 3829 | * inode item when doing the truncate. | 3922 | * inode item when doing the truncate. |
| 3830 | */ | 3923 | */ |
| 3831 | while (1) { | 3924 | while (1) { |
| 3832 | ret = btrfs_block_rsv_refill_noflush(root, rsv, min_size); | 3925 | ret = btrfs_block_rsv_refill(root, rsv, min_size, |
| 3926 | BTRFS_RESERVE_FLUSH_LIMIT); | ||
| 3833 | 3927 | ||
| 3834 | /* | 3928 | /* |
| 3835 | * Try and steal from the global reserve since we will | 3929 | * Try and steal from the global reserve since we will |
| @@ -3847,7 +3941,7 @@ void btrfs_evict_inode(struct inode *inode) | |||
| 3847 | goto no_delete; | 3941 | goto no_delete; |
| 3848 | } | 3942 | } |
| 3849 | 3943 | ||
| 3850 | trans = btrfs_start_transaction_noflush(root, 1); | 3944 | trans = btrfs_start_transaction_lflush(root, 1); |
| 3851 | if (IS_ERR(trans)) { | 3945 | if (IS_ERR(trans)) { |
| 3852 | btrfs_orphan_del(NULL, inode); | 3946 | btrfs_orphan_del(NULL, inode); |
| 3853 | btrfs_free_block_rsv(root, rsv); | 3947 | btrfs_free_block_rsv(root, rsv); |
| @@ -3864,10 +3958,9 @@ void btrfs_evict_inode(struct inode *inode) | |||
| 3864 | ret = btrfs_update_inode(trans, root, inode); | 3958 | ret = btrfs_update_inode(trans, root, inode); |
| 3865 | BUG_ON(ret); | 3959 | BUG_ON(ret); |
| 3866 | 3960 | ||
| 3867 | nr = trans->blocks_used; | ||
| 3868 | btrfs_end_transaction(trans, root); | 3961 | btrfs_end_transaction(trans, root); |
| 3869 | trans = NULL; | 3962 | trans = NULL; |
| 3870 | btrfs_btree_balance_dirty(root, nr); | 3963 | btrfs_btree_balance_dirty(root); |
| 3871 | } | 3964 | } |
| 3872 | 3965 | ||
| 3873 | btrfs_free_block_rsv(root, rsv); | 3966 | btrfs_free_block_rsv(root, rsv); |
| @@ -3883,9 +3976,8 @@ void btrfs_evict_inode(struct inode *inode) | |||
| 3883 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)) | 3976 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)) |
| 3884 | btrfs_return_ino(root, btrfs_ino(inode)); | 3977 | btrfs_return_ino(root, btrfs_ino(inode)); |
| 3885 | 3978 | ||
| 3886 | nr = trans->blocks_used; | ||
| 3887 | btrfs_end_transaction(trans, root); | 3979 | btrfs_end_transaction(trans, root); |
| 3888 | btrfs_btree_balance_dirty(root, nr); | 3980 | btrfs_btree_balance_dirty(root); |
| 3889 | no_delete: | 3981 | no_delete: |
| 3890 | clear_inode(inode); | 3982 | clear_inode(inode); |
| 3891 | return; | 3983 | return; |
| @@ -4219,16 +4311,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) | |||
| 4219 | if (dentry->d_name.len > BTRFS_NAME_LEN) | 4311 | if (dentry->d_name.len > BTRFS_NAME_LEN) |
| 4220 | return ERR_PTR(-ENAMETOOLONG); | 4312 | return ERR_PTR(-ENAMETOOLONG); |
| 4221 | 4313 | ||
| 4222 | if (unlikely(d_need_lookup(dentry))) { | 4314 | ret = btrfs_inode_by_name(dir, dentry, &location); |
| 4223 | memcpy(&location, dentry->d_fsdata, sizeof(struct btrfs_key)); | ||
| 4224 | kfree(dentry->d_fsdata); | ||
| 4225 | dentry->d_fsdata = NULL; | ||
| 4226 | /* This thing is hashed, drop it for now */ | ||
| 4227 | d_drop(dentry); | ||
| 4228 | } else { | ||
| 4229 | ret = btrfs_inode_by_name(dir, dentry, &location); | ||
| 4230 | } | ||
| 4231 | |||
| 4232 | if (ret < 0) | 4315 | if (ret < 0) |
| 4233 | return ERR_PTR(ret); | 4316 | return ERR_PTR(ret); |
| 4234 | 4317 | ||
| @@ -4298,11 +4381,6 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry, | |||
| 4298 | struct dentry *ret; | 4381 | struct dentry *ret; |
| 4299 | 4382 | ||
| 4300 | ret = d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry); | 4383 | ret = d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry); |
| 4301 | if (unlikely(d_need_lookup(dentry))) { | ||
| 4302 | spin_lock(&dentry->d_lock); | ||
| 4303 | dentry->d_flags &= ~DCACHE_NEED_LOOKUP; | ||
| 4304 | spin_unlock(&dentry->d_lock); | ||
| 4305 | } | ||
| 4306 | return ret; | 4384 | return ret; |
| 4307 | } | 4385 | } |
| 4308 | 4386 | ||
| @@ -4775,8 +4853,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
| 4775 | if (S_ISREG(mode)) { | 4853 | if (S_ISREG(mode)) { |
| 4776 | if (btrfs_test_opt(root, NODATASUM)) | 4854 | if (btrfs_test_opt(root, NODATASUM)) |
| 4777 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; | 4855 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; |
| 4778 | if (btrfs_test_opt(root, NODATACOW) || | 4856 | if (btrfs_test_opt(root, NODATACOW)) |
| 4779 | (BTRFS_I(dir)->flags & BTRFS_INODE_NODATACOW)) | ||
| 4780 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; | 4857 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; |
| 4781 | } | 4858 | } |
| 4782 | 4859 | ||
| @@ -4842,7 +4919,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, | |||
| 4842 | ret = btrfs_insert_dir_item(trans, root, name, name_len, | 4919 | ret = btrfs_insert_dir_item(trans, root, name, name_len, |
| 4843 | parent_inode, &key, | 4920 | parent_inode, &key, |
| 4844 | btrfs_inode_type(inode), index); | 4921 | btrfs_inode_type(inode), index); |
| 4845 | if (ret == -EEXIST) | 4922 | if (ret == -EEXIST || ret == -EOVERFLOW) |
| 4846 | goto fail_dir_item; | 4923 | goto fail_dir_item; |
| 4847 | else if (ret) { | 4924 | else if (ret) { |
| 4848 | btrfs_abort_transaction(trans, root, ret); | 4925 | btrfs_abort_transaction(trans, root, ret); |
| @@ -4897,7 +4974,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
| 4897 | int err; | 4974 | int err; |
| 4898 | int drop_inode = 0; | 4975 | int drop_inode = 0; |
| 4899 | u64 objectid; | 4976 | u64 objectid; |
| 4900 | unsigned long nr = 0; | ||
| 4901 | u64 index = 0; | 4977 | u64 index = 0; |
| 4902 | 4978 | ||
| 4903 | if (!new_valid_dev(rdev)) | 4979 | if (!new_valid_dev(rdev)) |
| @@ -4930,6 +5006,12 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
| 4930 | goto out_unlock; | 5006 | goto out_unlock; |
| 4931 | } | 5007 | } |
| 4932 | 5008 | ||
| 5009 | err = btrfs_update_inode(trans, root, inode); | ||
| 5010 | if (err) { | ||
| 5011 | drop_inode = 1; | ||
| 5012 | goto out_unlock; | ||
| 5013 | } | ||
| 5014 | |||
| 4933 | /* | 5015 | /* |
| 4934 | * If the active LSM wants to access the inode during | 5016 | * If the active LSM wants to access the inode during |
| 4935 | * d_instantiate it needs these. Smack checks to see | 5017 | * d_instantiate it needs these. Smack checks to see |
| @@ -4947,9 +5029,8 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
| 4947 | d_instantiate(dentry, inode); | 5029 | d_instantiate(dentry, inode); |
| 4948 | } | 5030 | } |
| 4949 | out_unlock: | 5031 | out_unlock: |
| 4950 | nr = trans->blocks_used; | ||
| 4951 | btrfs_end_transaction(trans, root); | 5032 | btrfs_end_transaction(trans, root); |
| 4952 | btrfs_btree_balance_dirty(root, nr); | 5033 | btrfs_btree_balance_dirty(root); |
| 4953 | if (drop_inode) { | 5034 | if (drop_inode) { |
| 4954 | inode_dec_link_count(inode); | 5035 | inode_dec_link_count(inode); |
| 4955 | iput(inode); | 5036 | iput(inode); |
| @@ -4963,9 +5044,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
| 4963 | struct btrfs_trans_handle *trans; | 5044 | struct btrfs_trans_handle *trans; |
| 4964 | struct btrfs_root *root = BTRFS_I(dir)->root; | 5045 | struct btrfs_root *root = BTRFS_I(dir)->root; |
| 4965 | struct inode *inode = NULL; | 5046 | struct inode *inode = NULL; |
| 4966 | int drop_inode = 0; | 5047 | int drop_inode_on_err = 0; |
| 4967 | int err; | 5048 | int err; |
| 4968 | unsigned long nr = 0; | ||
| 4969 | u64 objectid; | 5049 | u64 objectid; |
| 4970 | u64 index = 0; | 5050 | u64 index = 0; |
| 4971 | 5051 | ||
| @@ -4989,12 +5069,15 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
| 4989 | err = PTR_ERR(inode); | 5069 | err = PTR_ERR(inode); |
| 4990 | goto out_unlock; | 5070 | goto out_unlock; |
| 4991 | } | 5071 | } |
| 5072 | drop_inode_on_err = 1; | ||
| 4992 | 5073 | ||
| 4993 | err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); | 5074 | err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name); |
| 4994 | if (err) { | 5075 | if (err) |
| 4995 | drop_inode = 1; | 5076 | goto out_unlock; |
| 5077 | |||
| 5078 | err = btrfs_update_inode(trans, root, inode); | ||
| 5079 | if (err) | ||
| 4996 | goto out_unlock; | 5080 | goto out_unlock; |
| 4997 | } | ||
| 4998 | 5081 | ||
| 4999 | /* | 5082 | /* |
| 5000 | * If the active LSM wants to access the inode during | 5083 | * If the active LSM wants to access the inode during |
| @@ -5007,21 +5090,20 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
| 5007 | 5090 | ||
| 5008 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); | 5091 | err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index); |
| 5009 | if (err) | 5092 | if (err) |
| 5010 | drop_inode = 1; | 5093 | goto out_unlock; |
| 5011 | else { | 5094 | |
| 5012 | inode->i_mapping->a_ops = &btrfs_aops; | 5095 | inode->i_mapping->a_ops = &btrfs_aops; |
| 5013 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; | 5096 | inode->i_mapping->backing_dev_info = &root->fs_info->bdi; |
| 5014 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; | 5097 | BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; |
| 5015 | d_instantiate(dentry, inode); | 5098 | d_instantiate(dentry, inode); |
| 5016 | } | 5099 | |
| 5017 | out_unlock: | 5100 | out_unlock: |
| 5018 | nr = trans->blocks_used; | ||
| 5019 | btrfs_end_transaction(trans, root); | 5101 | btrfs_end_transaction(trans, root); |
| 5020 | if (drop_inode) { | 5102 | if (err && drop_inode_on_err) { |
| 5021 | inode_dec_link_count(inode); | 5103 | inode_dec_link_count(inode); |
| 5022 | iput(inode); | 5104 | iput(inode); |
| 5023 | } | 5105 | } |
| 5024 | btrfs_btree_balance_dirty(root, nr); | 5106 | btrfs_btree_balance_dirty(root); |
| 5025 | return err; | 5107 | return err; |
| 5026 | } | 5108 | } |
| 5027 | 5109 | ||
| @@ -5032,7 +5114,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
| 5032 | struct btrfs_root *root = BTRFS_I(dir)->root; | 5114 | struct btrfs_root *root = BTRFS_I(dir)->root; |
| 5033 | struct inode *inode = old_dentry->d_inode; | 5115 | struct inode *inode = old_dentry->d_inode; |
| 5034 | u64 index; | 5116 | u64 index; |
| 5035 | unsigned long nr = 0; | ||
| 5036 | int err; | 5117 | int err; |
| 5037 | int drop_inode = 0; | 5118 | int drop_inode = 0; |
| 5038 | 5119 | ||
| @@ -5062,6 +5143,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
| 5062 | inode_inc_iversion(inode); | 5143 | inode_inc_iversion(inode); |
| 5063 | inode->i_ctime = CURRENT_TIME; | 5144 | inode->i_ctime = CURRENT_TIME; |
| 5064 | ihold(inode); | 5145 | ihold(inode); |
| 5146 | set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); | ||
| 5065 | 5147 | ||
| 5066 | err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index); | 5148 | err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index); |
| 5067 | 5149 | ||
| @@ -5076,14 +5158,13 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
| 5076 | btrfs_log_new_name(trans, inode, NULL, parent); | 5158 | btrfs_log_new_name(trans, inode, NULL, parent); |
| 5077 | } | 5159 | } |
| 5078 | 5160 | ||
| 5079 | nr = trans->blocks_used; | ||
| 5080 | btrfs_end_transaction(trans, root); | 5161 | btrfs_end_transaction(trans, root); |
| 5081 | fail: | 5162 | fail: |
| 5082 | if (drop_inode) { | 5163 | if (drop_inode) { |
| 5083 | inode_dec_link_count(inode); | 5164 | inode_dec_link_count(inode); |
| 5084 | iput(inode); | 5165 | iput(inode); |
| 5085 | } | 5166 | } |
| 5086 | btrfs_btree_balance_dirty(root, nr); | 5167 | btrfs_btree_balance_dirty(root); |
| 5087 | return err; | 5168 | return err; |
| 5088 | } | 5169 | } |
| 5089 | 5170 | ||
| @@ -5096,7 +5177,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
| 5096 | int drop_on_err = 0; | 5177 | int drop_on_err = 0; |
| 5097 | u64 objectid = 0; | 5178 | u64 objectid = 0; |
| 5098 | u64 index = 0; | 5179 | u64 index = 0; |
| 5099 | unsigned long nr = 1; | ||
| 5100 | 5180 | ||
| 5101 | /* | 5181 | /* |
| 5102 | * 2 items for inode and ref | 5182 | * 2 items for inode and ref |
| @@ -5142,11 +5222,10 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
| 5142 | drop_on_err = 0; | 5222 | drop_on_err = 0; |
| 5143 | 5223 | ||
| 5144 | out_fail: | 5224 | out_fail: |
| 5145 | nr = trans->blocks_used; | ||
| 5146 | btrfs_end_transaction(trans, root); | 5225 | btrfs_end_transaction(trans, root); |
| 5147 | if (drop_on_err) | 5226 | if (drop_on_err) |
| 5148 | iput(inode); | 5227 | iput(inode); |
| 5149 | btrfs_btree_balance_dirty(root, nr); | 5228 | btrfs_btree_balance_dirty(root); |
| 5150 | return err; | 5229 | return err; |
| 5151 | } | 5230 | } |
| 5152 | 5231 | ||
| @@ -5340,6 +5419,7 @@ again: | |||
| 5340 | if (start + len <= found_key.offset) | 5419 | if (start + len <= found_key.offset) |
| 5341 | goto not_found; | 5420 | goto not_found; |
| 5342 | em->start = start; | 5421 | em->start = start; |
| 5422 | em->orig_start = start; | ||
| 5343 | em->len = found_key.offset - start; | 5423 | em->len = found_key.offset - start; |
| 5344 | goto not_found_em; | 5424 | goto not_found_em; |
| 5345 | } | 5425 | } |
| @@ -5350,6 +5430,8 @@ again: | |||
| 5350 | em->len = extent_end - extent_start; | 5430 | em->len = extent_end - extent_start; |
| 5351 | em->orig_start = extent_start - | 5431 | em->orig_start = extent_start - |
| 5352 | btrfs_file_extent_offset(leaf, item); | 5432 | btrfs_file_extent_offset(leaf, item); |
| 5433 | em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, | ||
| 5434 | item); | ||
| 5353 | bytenr = btrfs_file_extent_disk_bytenr(leaf, item); | 5435 | bytenr = btrfs_file_extent_disk_bytenr(leaf, item); |
| 5354 | if (bytenr == 0) { | 5436 | if (bytenr == 0) { |
| 5355 | em->block_start = EXTENT_MAP_HOLE; | 5437 | em->block_start = EXTENT_MAP_HOLE; |
| @@ -5359,8 +5441,7 @@ again: | |||
| 5359 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 5441 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
| 5360 | em->compress_type = compress_type; | 5442 | em->compress_type = compress_type; |
| 5361 | em->block_start = bytenr; | 5443 | em->block_start = bytenr; |
| 5362 | em->block_len = btrfs_file_extent_disk_num_bytes(leaf, | 5444 | em->block_len = em->orig_block_len; |
| 5363 | item); | ||
| 5364 | } else { | 5445 | } else { |
| 5365 | bytenr += btrfs_file_extent_offset(leaf, item); | 5446 | bytenr += btrfs_file_extent_offset(leaf, item); |
| 5366 | em->block_start = bytenr; | 5447 | em->block_start = bytenr; |
| @@ -5390,7 +5471,8 @@ again: | |||
| 5390 | em->start = extent_start + extent_offset; | 5471 | em->start = extent_start + extent_offset; |
| 5391 | em->len = (copy_size + root->sectorsize - 1) & | 5472 | em->len = (copy_size + root->sectorsize - 1) & |
| 5392 | ~((u64)root->sectorsize - 1); | 5473 | ~((u64)root->sectorsize - 1); |
| 5393 | em->orig_start = EXTENT_MAP_INLINE; | 5474 | em->orig_block_len = em->len; |
| 5475 | em->orig_start = em->start; | ||
| 5394 | if (compress_type) { | 5476 | if (compress_type) { |
| 5395 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 5477 | set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
| 5396 | em->compress_type = compress_type; | 5478 | em->compress_type = compress_type; |
| @@ -5439,11 +5521,11 @@ again: | |||
| 5439 | extent_map_end(em) - 1, NULL, GFP_NOFS); | 5521 | extent_map_end(em) - 1, NULL, GFP_NOFS); |
| 5440 | goto insert; | 5522 | goto insert; |
| 5441 | } else { | 5523 | } else { |
| 5442 | printk(KERN_ERR "btrfs unknown found_type %d\n", found_type); | 5524 | WARN(1, KERN_ERR "btrfs unknown found_type %d\n", found_type); |
| 5443 | WARN_ON(1); | ||
| 5444 | } | 5525 | } |
| 5445 | not_found: | 5526 | not_found: |
| 5446 | em->start = start; | 5527 | em->start = start; |
| 5528 | em->orig_start = start; | ||
| 5447 | em->len = len; | 5529 | em->len = len; |
| 5448 | not_found_em: | 5530 | not_found_em: |
| 5449 | em->block_start = EXTENT_MAP_HOLE; | 5531 | em->block_start = EXTENT_MAP_HOLE; |
| @@ -5539,10 +5621,13 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag | |||
| 5539 | return em; | 5621 | return em; |
| 5540 | if (em) { | 5622 | if (em) { |
| 5541 | /* | 5623 | /* |
| 5542 | * if our em maps to a hole, there might | 5624 | * if our em maps to |
| 5543 | * actually be delalloc bytes behind it | 5625 | * - a hole or |
| 5626 | * - a pre-alloc extent, | ||
| 5627 | * there might actually be delalloc bytes behind it. | ||
| 5544 | */ | 5628 | */ |
| 5545 | if (em->block_start != EXTENT_MAP_HOLE) | 5629 | if (em->block_start != EXTENT_MAP_HOLE && |
| 5630 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) | ||
| 5546 | return em; | 5631 | return em; |
| 5547 | else | 5632 | else |
| 5548 | hole_em = em; | 5633 | hole_em = em; |
| @@ -5624,6 +5709,8 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag | |||
| 5624 | */ | 5709 | */ |
| 5625 | em->block_start = hole_em->block_start; | 5710 | em->block_start = hole_em->block_start; |
| 5626 | em->block_len = hole_len; | 5711 | em->block_len = hole_len; |
| 5712 | if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags)) | ||
| 5713 | set_bit(EXTENT_FLAG_PREALLOC, &em->flags); | ||
| 5627 | } else { | 5714 | } else { |
| 5628 | em->start = range_start; | 5715 | em->start = range_start; |
| 5629 | em->len = found; | 5716 | em->len = found; |
| @@ -5645,38 +5732,19 @@ out: | |||
| 5645 | } | 5732 | } |
| 5646 | 5733 | ||
| 5647 | static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | 5734 | static struct extent_map *btrfs_new_extent_direct(struct inode *inode, |
| 5648 | struct extent_map *em, | ||
| 5649 | u64 start, u64 len) | 5735 | u64 start, u64 len) |
| 5650 | { | 5736 | { |
| 5651 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5737 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 5652 | struct btrfs_trans_handle *trans; | 5738 | struct btrfs_trans_handle *trans; |
| 5653 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 5739 | struct extent_map *em; |
| 5654 | struct btrfs_key ins; | 5740 | struct btrfs_key ins; |
| 5655 | u64 alloc_hint; | 5741 | u64 alloc_hint; |
| 5656 | int ret; | 5742 | int ret; |
| 5657 | bool insert = false; | ||
| 5658 | |||
| 5659 | /* | ||
| 5660 | * Ok if the extent map we looked up is a hole and is for the exact | ||
| 5661 | * range we want, there is no reason to allocate a new one, however if | ||
| 5662 | * it is not right then we need to free this one and drop the cache for | ||
| 5663 | * our range. | ||
| 5664 | */ | ||
| 5665 | if (em->block_start != EXTENT_MAP_HOLE || em->start != start || | ||
| 5666 | em->len != len) { | ||
| 5667 | free_extent_map(em); | ||
| 5668 | em = NULL; | ||
| 5669 | insert = true; | ||
| 5670 | btrfs_drop_extent_cache(inode, start, start + len - 1, 0); | ||
| 5671 | } | ||
| 5672 | 5743 | ||
| 5673 | trans = btrfs_join_transaction(root); | 5744 | trans = btrfs_join_transaction(root); |
| 5674 | if (IS_ERR(trans)) | 5745 | if (IS_ERR(trans)) |
| 5675 | return ERR_CAST(trans); | 5746 | return ERR_CAST(trans); |
| 5676 | 5747 | ||
| 5677 | if (start <= BTRFS_I(inode)->disk_i_size && len < 64 * 1024) | ||
| 5678 | btrfs_add_inode_defrag(trans, inode); | ||
| 5679 | |||
| 5680 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 5748 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
| 5681 | 5749 | ||
| 5682 | alloc_hint = get_extent_allocation_hint(inode, start, len); | 5750 | alloc_hint = get_extent_allocation_hint(inode, start, len); |
| @@ -5687,37 +5755,10 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | |||
| 5687 | goto out; | 5755 | goto out; |
| 5688 | } | 5756 | } |
| 5689 | 5757 | ||
| 5690 | if (!em) { | 5758 | em = create_pinned_em(inode, start, ins.offset, start, ins.objectid, |
| 5691 | em = alloc_extent_map(); | 5759 | ins.offset, ins.offset, 0); |
| 5692 | if (!em) { | 5760 | if (IS_ERR(em)) |
| 5693 | em = ERR_PTR(-ENOMEM); | 5761 | goto out; |
| 5694 | goto out; | ||
| 5695 | } | ||
| 5696 | } | ||
| 5697 | |||
| 5698 | em->start = start; | ||
| 5699 | em->orig_start = em->start; | ||
| 5700 | em->len = ins.offset; | ||
| 5701 | |||
| 5702 | em->block_start = ins.objectid; | ||
| 5703 | em->block_len = ins.offset; | ||
| 5704 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
| 5705 | |||
| 5706 | /* | ||
| 5707 | * We need to do this because if we're using the original em we searched | ||
| 5708 | * for, we could have EXTENT_FLAG_VACANCY set, and we don't want that. | ||
| 5709 | */ | ||
| 5710 | em->flags = 0; | ||
| 5711 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
| 5712 | |||
| 5713 | while (insert) { | ||
| 5714 | write_lock(&em_tree->lock); | ||
| 5715 | ret = add_extent_mapping(em_tree, em); | ||
| 5716 | write_unlock(&em_tree->lock); | ||
| 5717 | if (ret != -EEXIST) | ||
| 5718 | break; | ||
| 5719 | btrfs_drop_extent_cache(inode, start, start + em->len - 1, 0); | ||
| 5720 | } | ||
| 5721 | 5762 | ||
| 5722 | ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, | 5763 | ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, |
| 5723 | ins.offset, ins.offset, 0); | 5764 | ins.offset, ins.offset, 0); |
| @@ -5894,7 +5935,7 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, | |||
| 5894 | static struct extent_map *create_pinned_em(struct inode *inode, u64 start, | 5935 | static struct extent_map *create_pinned_em(struct inode *inode, u64 start, |
| 5895 | u64 len, u64 orig_start, | 5936 | u64 len, u64 orig_start, |
| 5896 | u64 block_start, u64 block_len, | 5937 | u64 block_start, u64 block_len, |
| 5897 | int type) | 5938 | u64 orig_block_len, int type) |
| 5898 | { | 5939 | { |
| 5899 | struct extent_map_tree *em_tree; | 5940 | struct extent_map_tree *em_tree; |
| 5900 | struct extent_map *em; | 5941 | struct extent_map *em; |
| @@ -5912,15 +5953,20 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start, | |||
| 5912 | em->block_len = block_len; | 5953 | em->block_len = block_len; |
| 5913 | em->block_start = block_start; | 5954 | em->block_start = block_start; |
| 5914 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 5955 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
| 5956 | em->orig_block_len = orig_block_len; | ||
| 5957 | em->generation = -1; | ||
| 5915 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 5958 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
| 5916 | if (type == BTRFS_ORDERED_PREALLOC) | 5959 | if (type == BTRFS_ORDERED_PREALLOC) |
| 5917 | set_bit(EXTENT_FLAG_PREALLOC, &em->flags); | 5960 | set_bit(EXTENT_FLAG_FILLING, &em->flags); |
| 5918 | 5961 | ||
| 5919 | do { | 5962 | do { |
| 5920 | btrfs_drop_extent_cache(inode, em->start, | 5963 | btrfs_drop_extent_cache(inode, em->start, |
| 5921 | em->start + em->len - 1, 0); | 5964 | em->start + em->len - 1, 0); |
| 5922 | write_lock(&em_tree->lock); | 5965 | write_lock(&em_tree->lock); |
| 5923 | ret = add_extent_mapping(em_tree, em); | 5966 | ret = add_extent_mapping(em_tree, em); |
| 5967 | if (!ret) | ||
| 5968 | list_move(&em->list, | ||
| 5969 | &em_tree->modified_extents); | ||
| 5924 | write_unlock(&em_tree->lock); | 5970 | write_unlock(&em_tree->lock); |
| 5925 | } while (ret == -EEXIST); | 5971 | } while (ret == -EEXIST); |
| 5926 | 5972 | ||
| @@ -6047,13 +6093,15 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
| 6047 | goto must_cow; | 6093 | goto must_cow; |
| 6048 | 6094 | ||
| 6049 | if (can_nocow_odirect(trans, inode, start, len) == 1) { | 6095 | if (can_nocow_odirect(trans, inode, start, len) == 1) { |
| 6050 | u64 orig_start = em->start; | 6096 | u64 orig_start = em->orig_start; |
| 6097 | u64 orig_block_len = em->orig_block_len; | ||
| 6051 | 6098 | ||
| 6052 | if (type == BTRFS_ORDERED_PREALLOC) { | 6099 | if (type == BTRFS_ORDERED_PREALLOC) { |
| 6053 | free_extent_map(em); | 6100 | free_extent_map(em); |
| 6054 | em = create_pinned_em(inode, start, len, | 6101 | em = create_pinned_em(inode, start, len, |
| 6055 | orig_start, | 6102 | orig_start, |
| 6056 | block_start, len, type); | 6103 | block_start, len, |
| 6104 | orig_block_len, type); | ||
| 6057 | if (IS_ERR(em)) { | 6105 | if (IS_ERR(em)) { |
| 6058 | btrfs_end_transaction(trans, root); | 6106 | btrfs_end_transaction(trans, root); |
| 6059 | goto unlock_err; | 6107 | goto unlock_err; |
| @@ -6077,7 +6125,8 @@ must_cow: | |||
| 6077 | * it above | 6125 | * it above |
| 6078 | */ | 6126 | */ |
| 6079 | len = bh_result->b_size; | 6127 | len = bh_result->b_size; |
| 6080 | em = btrfs_new_extent_direct(inode, em, start, len); | 6128 | free_extent_map(em); |
| 6129 | em = btrfs_new_extent_direct(inode, start, len); | ||
| 6081 | if (IS_ERR(em)) { | 6130 | if (IS_ERR(em)) { |
| 6082 | ret = PTR_ERR(em); | 6131 | ret = PTR_ERR(em); |
| 6083 | goto unlock_err; | 6132 | goto unlock_err; |
| @@ -6318,6 +6367,9 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | |||
| 6318 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6367 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 6319 | int ret; | 6368 | int ret; |
| 6320 | 6369 | ||
| 6370 | if (async_submit) | ||
| 6371 | async_submit = !atomic_read(&BTRFS_I(inode)->sync_writers); | ||
| 6372 | |||
| 6321 | bio_get(bio); | 6373 | bio_get(bio); |
| 6322 | 6374 | ||
| 6323 | if (!write) { | 6375 | if (!write) { |
| @@ -6362,7 +6414,6 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
| 6362 | { | 6414 | { |
| 6363 | struct inode *inode = dip->inode; | 6415 | struct inode *inode = dip->inode; |
| 6364 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6416 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 6365 | struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; | ||
| 6366 | struct bio *bio; | 6417 | struct bio *bio; |
| 6367 | struct bio *orig_bio = dip->orig_bio; | 6418 | struct bio *orig_bio = dip->orig_bio; |
| 6368 | struct bio_vec *bvec = orig_bio->bi_io_vec; | 6419 | struct bio_vec *bvec = orig_bio->bi_io_vec; |
| @@ -6375,7 +6426,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
| 6375 | int async_submit = 0; | 6426 | int async_submit = 0; |
| 6376 | 6427 | ||
| 6377 | map_length = orig_bio->bi_size; | 6428 | map_length = orig_bio->bi_size; |
| 6378 | ret = btrfs_map_block(map_tree, READ, start_sector << 9, | 6429 | ret = btrfs_map_block(root->fs_info, READ, start_sector << 9, |
| 6379 | &map_length, NULL, 0); | 6430 | &map_length, NULL, 0); |
| 6380 | if (ret) { | 6431 | if (ret) { |
| 6381 | bio_put(orig_bio); | 6432 | bio_put(orig_bio); |
| @@ -6429,7 +6480,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
| 6429 | bio->bi_end_io = btrfs_end_dio_bio; | 6480 | bio->bi_end_io = btrfs_end_dio_bio; |
| 6430 | 6481 | ||
| 6431 | map_length = orig_bio->bi_size; | 6482 | map_length = orig_bio->bi_size; |
| 6432 | ret = btrfs_map_block(map_tree, READ, start_sector << 9, | 6483 | ret = btrfs_map_block(root->fs_info, READ, |
| 6484 | start_sector << 9, | ||
| 6433 | &map_length, NULL, 0); | 6485 | &map_length, NULL, 0); |
| 6434 | if (ret) { | 6486 | if (ret) { |
| 6435 | bio_put(bio); | 6487 | bio_put(bio); |
| @@ -6582,9 +6634,17 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | |||
| 6582 | btrfs_submit_direct, 0); | 6634 | btrfs_submit_direct, 0); |
| 6583 | } | 6635 | } |
| 6584 | 6636 | ||
| 6637 | #define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC) | ||
| 6638 | |||
| 6585 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 6639 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
| 6586 | __u64 start, __u64 len) | 6640 | __u64 start, __u64 len) |
| 6587 | { | 6641 | { |
| 6642 | int ret; | ||
| 6643 | |||
| 6644 | ret = fiemap_check_flags(fieinfo, BTRFS_FIEMAP_FLAGS); | ||
| 6645 | if (ret) | ||
| 6646 | return ret; | ||
| 6647 | |||
| 6588 | return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap); | 6648 | return extent_fiemap(inode, fieinfo, start, len, btrfs_get_extent_fiemap); |
| 6589 | } | 6649 | } |
| 6590 | 6650 | ||
| @@ -6855,7 +6915,6 @@ static int btrfs_truncate(struct inode *inode) | |||
| 6855 | int ret; | 6915 | int ret; |
| 6856 | int err = 0; | 6916 | int err = 0; |
| 6857 | struct btrfs_trans_handle *trans; | 6917 | struct btrfs_trans_handle *trans; |
| 6858 | unsigned long nr; | ||
| 6859 | u64 mask = root->sectorsize - 1; | 6918 | u64 mask = root->sectorsize - 1; |
| 6860 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); | 6919 | u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); |
| 6861 | 6920 | ||
| @@ -6910,11 +6969,9 @@ static int btrfs_truncate(struct inode *inode) | |||
| 6910 | 6969 | ||
| 6911 | /* | 6970 | /* |
| 6912 | * 1 for the truncate slack space | 6971 | * 1 for the truncate slack space |
| 6913 | * 1 for the orphan item we're going to add | ||
| 6914 | * 1 for the orphan item deletion | ||
| 6915 | * 1 for updating the inode. | 6972 | * 1 for updating the inode. |
| 6916 | */ | 6973 | */ |
| 6917 | trans = btrfs_start_transaction(root, 4); | 6974 | trans = btrfs_start_transaction(root, 2); |
| 6918 | if (IS_ERR(trans)) { | 6975 | if (IS_ERR(trans)) { |
| 6919 | err = PTR_ERR(trans); | 6976 | err = PTR_ERR(trans); |
| 6920 | goto out; | 6977 | goto out; |
| @@ -6925,12 +6982,6 @@ static int btrfs_truncate(struct inode *inode) | |||
| 6925 | min_size); | 6982 | min_size); |
| 6926 | BUG_ON(ret); | 6983 | BUG_ON(ret); |
| 6927 | 6984 | ||
| 6928 | ret = btrfs_orphan_add(trans, inode); | ||
| 6929 | if (ret) { | ||
| 6930 | btrfs_end_transaction(trans, root); | ||
| 6931 | goto out; | ||
| 6932 | } | ||
| 6933 | |||
| 6934 | /* | 6985 | /* |
| 6935 | * setattr is responsible for setting the ordered_data_close flag, | 6986 | * setattr is responsible for setting the ordered_data_close flag, |
| 6936 | * but that is only tested during the last file release. That | 6987 | * but that is only tested during the last file release. That |
| @@ -6978,9 +7029,8 @@ static int btrfs_truncate(struct inode *inode) | |||
| 6978 | break; | 7029 | break; |
| 6979 | } | 7030 | } |
| 6980 | 7031 | ||
| 6981 | nr = trans->blocks_used; | ||
| 6982 | btrfs_end_transaction(trans, root); | 7032 | btrfs_end_transaction(trans, root); |
| 6983 | btrfs_btree_balance_dirty(root, nr); | 7033 | btrfs_btree_balance_dirty(root); |
| 6984 | 7034 | ||
| 6985 | trans = btrfs_start_transaction(root, 2); | 7035 | trans = btrfs_start_transaction(root, 2); |
| 6986 | if (IS_ERR(trans)) { | 7036 | if (IS_ERR(trans)) { |
| @@ -7000,12 +7050,6 @@ static int btrfs_truncate(struct inode *inode) | |||
| 7000 | ret = btrfs_orphan_del(trans, inode); | 7050 | ret = btrfs_orphan_del(trans, inode); |
| 7001 | if (ret) | 7051 | if (ret) |
| 7002 | err = ret; | 7052 | err = ret; |
| 7003 | } else if (ret && inode->i_nlink > 0) { | ||
| 7004 | /* | ||
| 7005 | * Failed to do the truncate, remove us from the in memory | ||
| 7006 | * orphan list. | ||
| 7007 | */ | ||
| 7008 | ret = btrfs_orphan_del(NULL, inode); | ||
| 7009 | } | 7053 | } |
| 7010 | 7054 | ||
| 7011 | if (trans) { | 7055 | if (trans) { |
| @@ -7014,9 +7058,8 @@ static int btrfs_truncate(struct inode *inode) | |||
| 7014 | if (ret && !err) | 7058 | if (ret && !err) |
| 7015 | err = ret; | 7059 | err = ret; |
| 7016 | 7060 | ||
| 7017 | nr = trans->blocks_used; | ||
| 7018 | ret = btrfs_end_transaction(trans, root); | 7061 | ret = btrfs_end_transaction(trans, root); |
| 7019 | btrfs_btree_balance_dirty(root, nr); | 7062 | btrfs_btree_balance_dirty(root); |
| 7020 | } | 7063 | } |
| 7021 | 7064 | ||
| 7022 | out: | 7065 | out: |
| @@ -7093,6 +7136,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
| 7093 | extent_io_tree_init(&ei->io_failure_tree, &inode->i_data); | 7136 | extent_io_tree_init(&ei->io_failure_tree, &inode->i_data); |
| 7094 | ei->io_tree.track_uptodate = 1; | 7137 | ei->io_tree.track_uptodate = 1; |
| 7095 | ei->io_failure_tree.track_uptodate = 1; | 7138 | ei->io_failure_tree.track_uptodate = 1; |
| 7139 | atomic_set(&ei->sync_writers, 0); | ||
| 7096 | mutex_init(&ei->log_mutex); | 7140 | mutex_init(&ei->log_mutex); |
| 7097 | mutex_init(&ei->delalloc_mutex); | 7141 | mutex_init(&ei->delalloc_mutex); |
| 7098 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); | 7142 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); |
| @@ -7203,6 +7247,8 @@ void btrfs_destroy_cachep(void) | |||
| 7203 | kmem_cache_destroy(btrfs_path_cachep); | 7247 | kmem_cache_destroy(btrfs_path_cachep); |
| 7204 | if (btrfs_free_space_cachep) | 7248 | if (btrfs_free_space_cachep) |
| 7205 | kmem_cache_destroy(btrfs_free_space_cachep); | 7249 | kmem_cache_destroy(btrfs_free_space_cachep); |
| 7250 | if (btrfs_delalloc_work_cachep) | ||
| 7251 | kmem_cache_destroy(btrfs_delalloc_work_cachep); | ||
| 7206 | } | 7252 | } |
| 7207 | 7253 | ||
| 7208 | int btrfs_init_cachep(void) | 7254 | int btrfs_init_cachep(void) |
| @@ -7237,6 +7283,13 @@ int btrfs_init_cachep(void) | |||
| 7237 | if (!btrfs_free_space_cachep) | 7283 | if (!btrfs_free_space_cachep) |
| 7238 | goto fail; | 7284 | goto fail; |
| 7239 | 7285 | ||
| 7286 | btrfs_delalloc_work_cachep = kmem_cache_create("btrfs_delalloc_work", | ||
| 7287 | sizeof(struct btrfs_delalloc_work), 0, | ||
| 7288 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, | ||
| 7289 | NULL); | ||
| 7290 | if (!btrfs_delalloc_work_cachep) | ||
| 7291 | goto fail; | ||
| 7292 | |||
| 7240 | return 0; | 7293 | return 0; |
| 7241 | fail: | 7294 | fail: |
| 7242 | btrfs_destroy_cachep(); | 7295 | btrfs_destroy_cachep(); |
| @@ -7308,6 +7361,28 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 7308 | if (S_ISDIR(old_inode->i_mode) && new_inode && | 7361 | if (S_ISDIR(old_inode->i_mode) && new_inode && |
| 7309 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) | 7362 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) |
| 7310 | return -ENOTEMPTY; | 7363 | return -ENOTEMPTY; |
| 7364 | |||
| 7365 | |||
| 7366 | /* check for collisions, even if the name isn't there */ | ||
| 7367 | ret = btrfs_check_dir_item_collision(root, new_dir->i_ino, | ||
| 7368 | new_dentry->d_name.name, | ||
| 7369 | new_dentry->d_name.len); | ||
| 7370 | |||
| 7371 | if (ret) { | ||
| 7372 | if (ret == -EEXIST) { | ||
| 7373 | /* we shouldn't get | ||
| 7374 | * eexist without a new_inode */ | ||
| 7375 | if (!new_inode) { | ||
| 7376 | WARN_ON(1); | ||
| 7377 | return ret; | ||
| 7378 | } | ||
| 7379 | } else { | ||
| 7380 | /* maybe -EOVERFLOW */ | ||
| 7381 | return ret; | ||
| 7382 | } | ||
| 7383 | } | ||
| 7384 | ret = 0; | ||
| 7385 | |||
| 7311 | /* | 7386 | /* |
| 7312 | * we're using rename to replace one file with another. | 7387 | * we're using rename to replace one file with another. |
| 7313 | * and the replacement file is large. Start IO on it now so | 7388 | * and the replacement file is large. Start IO on it now so |
| @@ -7447,39 +7522,110 @@ out_notrans: | |||
| 7447 | return ret; | 7522 | return ret; |
| 7448 | } | 7523 | } |
| 7449 | 7524 | ||
| 7525 | static void btrfs_run_delalloc_work(struct btrfs_work *work) | ||
| 7526 | { | ||
| 7527 | struct btrfs_delalloc_work *delalloc_work; | ||
| 7528 | |||
| 7529 | delalloc_work = container_of(work, struct btrfs_delalloc_work, | ||
| 7530 | work); | ||
| 7531 | if (delalloc_work->wait) | ||
| 7532 | btrfs_wait_ordered_range(delalloc_work->inode, 0, (u64)-1); | ||
| 7533 | else | ||
| 7534 | filemap_flush(delalloc_work->inode->i_mapping); | ||
| 7535 | |||
| 7536 | if (delalloc_work->delay_iput) | ||
| 7537 | btrfs_add_delayed_iput(delalloc_work->inode); | ||
| 7538 | else | ||
| 7539 | iput(delalloc_work->inode); | ||
| 7540 | complete(&delalloc_work->completion); | ||
| 7541 | } | ||
| 7542 | |||
| 7543 | struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode, | ||
| 7544 | int wait, int delay_iput) | ||
| 7545 | { | ||
| 7546 | struct btrfs_delalloc_work *work; | ||
| 7547 | |||
| 7548 | work = kmem_cache_zalloc(btrfs_delalloc_work_cachep, GFP_NOFS); | ||
| 7549 | if (!work) | ||
| 7550 | return NULL; | ||
| 7551 | |||
| 7552 | init_completion(&work->completion); | ||
| 7553 | INIT_LIST_HEAD(&work->list); | ||
| 7554 | work->inode = inode; | ||
| 7555 | work->wait = wait; | ||
| 7556 | work->delay_iput = delay_iput; | ||
| 7557 | work->work.func = btrfs_run_delalloc_work; | ||
| 7558 | |||
| 7559 | return work; | ||
| 7560 | } | ||
| 7561 | |||
| 7562 | void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work) | ||
| 7563 | { | ||
| 7564 | wait_for_completion(&work->completion); | ||
| 7565 | kmem_cache_free(btrfs_delalloc_work_cachep, work); | ||
| 7566 | } | ||
| 7567 | |||
| 7450 | /* | 7568 | /* |
| 7451 | * some fairly slow code that needs optimization. This walks the list | 7569 | * some fairly slow code that needs optimization. This walks the list |
| 7452 | * of all the inodes with pending delalloc and forces them to disk. | 7570 | * of all the inodes with pending delalloc and forces them to disk. |
| 7453 | */ | 7571 | */ |
| 7454 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | 7572 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) |
| 7455 | { | 7573 | { |
| 7456 | struct list_head *head = &root->fs_info->delalloc_inodes; | ||
| 7457 | struct btrfs_inode *binode; | 7574 | struct btrfs_inode *binode; |
| 7458 | struct inode *inode; | 7575 | struct inode *inode; |
| 7576 | struct btrfs_delalloc_work *work, *next; | ||
| 7577 | struct list_head works; | ||
| 7578 | struct list_head splice; | ||
| 7579 | int ret = 0; | ||
| 7459 | 7580 | ||
| 7460 | if (root->fs_info->sb->s_flags & MS_RDONLY) | 7581 | if (root->fs_info->sb->s_flags & MS_RDONLY) |
| 7461 | return -EROFS; | 7582 | return -EROFS; |
| 7462 | 7583 | ||
| 7584 | INIT_LIST_HEAD(&works); | ||
| 7585 | INIT_LIST_HEAD(&splice); | ||
| 7586 | again: | ||
| 7463 | spin_lock(&root->fs_info->delalloc_lock); | 7587 | spin_lock(&root->fs_info->delalloc_lock); |
| 7464 | while (!list_empty(head)) { | 7588 | list_splice_init(&root->fs_info->delalloc_inodes, &splice); |
| 7465 | binode = list_entry(head->next, struct btrfs_inode, | 7589 | while (!list_empty(&splice)) { |
| 7590 | binode = list_entry(splice.next, struct btrfs_inode, | ||
| 7466 | delalloc_inodes); | 7591 | delalloc_inodes); |
| 7592 | |||
| 7593 | list_del_init(&binode->delalloc_inodes); | ||
| 7594 | |||
| 7467 | inode = igrab(&binode->vfs_inode); | 7595 | inode = igrab(&binode->vfs_inode); |
| 7468 | if (!inode) | 7596 | if (!inode) |
| 7469 | list_del_init(&binode->delalloc_inodes); | 7597 | continue; |
| 7598 | |||
| 7599 | list_add_tail(&binode->delalloc_inodes, | ||
| 7600 | &root->fs_info->delalloc_inodes); | ||
| 7470 | spin_unlock(&root->fs_info->delalloc_lock); | 7601 | spin_unlock(&root->fs_info->delalloc_lock); |
| 7471 | if (inode) { | 7602 | |
| 7472 | filemap_flush(inode->i_mapping); | 7603 | work = btrfs_alloc_delalloc_work(inode, 0, delay_iput); |
| 7473 | if (delay_iput) | 7604 | if (unlikely(!work)) { |
| 7474 | btrfs_add_delayed_iput(inode); | 7605 | ret = -ENOMEM; |
| 7475 | else | 7606 | goto out; |
| 7476 | iput(inode); | ||
| 7477 | } | 7607 | } |
| 7608 | list_add_tail(&work->list, &works); | ||
| 7609 | btrfs_queue_worker(&root->fs_info->flush_workers, | ||
| 7610 | &work->work); | ||
| 7611 | |||
| 7478 | cond_resched(); | 7612 | cond_resched(); |
| 7479 | spin_lock(&root->fs_info->delalloc_lock); | 7613 | spin_lock(&root->fs_info->delalloc_lock); |
| 7480 | } | 7614 | } |
| 7481 | spin_unlock(&root->fs_info->delalloc_lock); | 7615 | spin_unlock(&root->fs_info->delalloc_lock); |
| 7482 | 7616 | ||
| 7617 | list_for_each_entry_safe(work, next, &works, list) { | ||
| 7618 | list_del_init(&work->list); | ||
| 7619 | btrfs_wait_and_free_delalloc_work(work); | ||
| 7620 | } | ||
| 7621 | |||
| 7622 | spin_lock(&root->fs_info->delalloc_lock); | ||
| 7623 | if (!list_empty(&root->fs_info->delalloc_inodes)) { | ||
| 7624 | spin_unlock(&root->fs_info->delalloc_lock); | ||
| 7625 | goto again; | ||
| 7626 | } | ||
| 7627 | spin_unlock(&root->fs_info->delalloc_lock); | ||
| 7628 | |||
| 7483 | /* the filemap_flush will queue IO into the worker threads, but | 7629 | /* the filemap_flush will queue IO into the worker threads, but |
| 7484 | * we have to make sure the IO is actually started and that | 7630 | * we have to make sure the IO is actually started and that |
| 7485 | * ordered extents get created before we return | 7631 | * ordered extents get created before we return |
| @@ -7493,6 +7639,18 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
| 7493 | } | 7639 | } |
| 7494 | atomic_dec(&root->fs_info->async_submit_draining); | 7640 | atomic_dec(&root->fs_info->async_submit_draining); |
| 7495 | return 0; | 7641 | return 0; |
| 7642 | out: | ||
| 7643 | list_for_each_entry_safe(work, next, &works, list) { | ||
| 7644 | list_del_init(&work->list); | ||
| 7645 | btrfs_wait_and_free_delalloc_work(work); | ||
| 7646 | } | ||
| 7647 | |||
| 7648 | if (!list_empty_careful(&splice)) { | ||
| 7649 | spin_lock(&root->fs_info->delalloc_lock); | ||
| 7650 | list_splice_tail(&splice, &root->fs_info->delalloc_inodes); | ||
| 7651 | spin_unlock(&root->fs_info->delalloc_lock); | ||
| 7652 | } | ||
| 7653 | return ret; | ||
| 7496 | } | 7654 | } |
| 7497 | 7655 | ||
| 7498 | static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | 7656 | static int btrfs_symlink(struct inode *dir, struct dentry *dentry, |
| @@ -7512,7 +7670,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
| 7512 | unsigned long ptr; | 7670 | unsigned long ptr; |
| 7513 | struct btrfs_file_extent_item *ei; | 7671 | struct btrfs_file_extent_item *ei; |
| 7514 | struct extent_buffer *leaf; | 7672 | struct extent_buffer *leaf; |
| 7515 | unsigned long nr = 0; | ||
| 7516 | 7673 | ||
| 7517 | name_len = strlen(symname) + 1; | 7674 | name_len = strlen(symname) + 1; |
| 7518 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) | 7675 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) |
| @@ -7610,13 +7767,12 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
| 7610 | out_unlock: | 7767 | out_unlock: |
| 7611 | if (!err) | 7768 | if (!err) |
| 7612 | d_instantiate(dentry, inode); | 7769 | d_instantiate(dentry, inode); |
| 7613 | nr = trans->blocks_used; | ||
| 7614 | btrfs_end_transaction(trans, root); | 7770 | btrfs_end_transaction(trans, root); |
| 7615 | if (drop_inode) { | 7771 | if (drop_inode) { |
| 7616 | inode_dec_link_count(inode); | 7772 | inode_dec_link_count(inode); |
| 7617 | iput(inode); | 7773 | iput(inode); |
| 7618 | } | 7774 | } |
| 7619 | btrfs_btree_balance_dirty(root, nr); | 7775 | btrfs_btree_balance_dirty(root); |
| 7620 | return err; | 7776 | return err; |
| 7621 | } | 7777 | } |
| 7622 | 7778 | ||
| @@ -7679,6 +7835,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode, | |||
| 7679 | em->len = ins.offset; | 7835 | em->len = ins.offset; |
| 7680 | em->block_start = ins.objectid; | 7836 | em->block_start = ins.objectid; |
| 7681 | em->block_len = ins.offset; | 7837 | em->block_len = ins.offset; |
| 7838 | em->orig_block_len = ins.offset; | ||
| 7682 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 7839 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
| 7683 | set_bit(EXTENT_FLAG_PREALLOC, &em->flags); | 7840 | set_bit(EXTENT_FLAG_PREALLOC, &em->flags); |
| 7684 | em->generation = trans->transid; | 7841 | em->generation = trans->transid; |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 8fcf9a59c28d..338f2597bf7f 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -55,6 +55,7 @@ | |||
| 55 | #include "backref.h" | 55 | #include "backref.h" |
| 56 | #include "rcu-string.h" | 56 | #include "rcu-string.h" |
| 57 | #include "send.h" | 57 | #include "send.h" |
| 58 | #include "dev-replace.h" | ||
| 58 | 59 | ||
| 59 | /* Mask out flags that are inappropriate for the given type of inode. */ | 60 | /* Mask out flags that are inappropriate for the given type of inode. */ |
| 60 | static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) | 61 | static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) |
| @@ -140,8 +141,11 @@ void btrfs_inherit_iflags(struct inode *inode, struct inode *dir) | |||
| 140 | BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS; | 141 | BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS; |
| 141 | } | 142 | } |
| 142 | 143 | ||
| 143 | if (flags & BTRFS_INODE_NODATACOW) | 144 | if (flags & BTRFS_INODE_NODATACOW) { |
| 144 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; | 145 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; |
| 146 | if (S_ISREG(inode->i_mode)) | ||
| 147 | BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; | ||
| 148 | } | ||
| 145 | 149 | ||
| 146 | btrfs_update_iflags(inode); | 150 | btrfs_update_iflags(inode); |
| 147 | } | 151 | } |
| @@ -511,7 +515,6 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
| 511 | 515 | ||
| 512 | BUG_ON(ret); | 516 | BUG_ON(ret); |
| 513 | 517 | ||
| 514 | d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); | ||
| 515 | fail: | 518 | fail: |
| 516 | if (async_transid) { | 519 | if (async_transid) { |
| 517 | *async_transid = trans->transid; | 520 | *async_transid = trans->transid; |
| @@ -521,6 +524,10 @@ fail: | |||
| 521 | } | 524 | } |
| 522 | if (err && !ret) | 525 | if (err && !ret) |
| 523 | ret = err; | 526 | ret = err; |
| 527 | |||
| 528 | if (!ret) | ||
| 529 | d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); | ||
| 530 | |||
| 524 | return ret; | 531 | return ret; |
| 525 | } | 532 | } |
| 526 | 533 | ||
| @@ -571,8 +578,12 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
| 571 | ret = btrfs_commit_transaction(trans, | 578 | ret = btrfs_commit_transaction(trans, |
| 572 | root->fs_info->extent_root); | 579 | root->fs_info->extent_root); |
| 573 | } | 580 | } |
| 574 | if (ret) | 581 | if (ret) { |
| 582 | /* cleanup_transaction has freed this for us */ | ||
| 583 | if (trans->aborted) | ||
| 584 | pending_snapshot = NULL; | ||
| 575 | goto fail; | 585 | goto fail; |
| 586 | } | ||
| 576 | 587 | ||
| 577 | ret = pending_snapshot->error; | 588 | ret = pending_snapshot->error; |
| 578 | if (ret) | 589 | if (ret) |
| @@ -705,6 +716,16 @@ static noinline int btrfs_mksubvol(struct path *parent, | |||
| 705 | if (error) | 716 | if (error) |
| 706 | goto out_dput; | 717 | goto out_dput; |
| 707 | 718 | ||
| 719 | /* | ||
| 720 | * even if this name doesn't exist, we may get hash collisions. | ||
| 721 | * check for them now when we can safely fail | ||
| 722 | */ | ||
| 723 | error = btrfs_check_dir_item_collision(BTRFS_I(dir)->root, | ||
| 724 | dir->i_ino, name, | ||
| 725 | namelen); | ||
| 726 | if (error) | ||
| 727 | goto out_dput; | ||
| 728 | |||
| 708 | down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); | 729 | down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); |
| 709 | 730 | ||
| 710 | if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0) | 731 | if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0) |
| @@ -1225,7 +1246,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
| 1225 | } | 1246 | } |
| 1226 | 1247 | ||
| 1227 | defrag_count += ret; | 1248 | defrag_count += ret; |
| 1228 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, ret); | 1249 | balance_dirty_pages_ratelimited(inode->i_mapping); |
| 1229 | mutex_unlock(&inode->i_mutex); | 1250 | mutex_unlock(&inode->i_mutex); |
| 1230 | 1251 | ||
| 1231 | if (newer_than) { | 1252 | if (newer_than) { |
| @@ -1293,12 +1314,13 @@ out_ra: | |||
| 1293 | return ret; | 1314 | return ret; |
| 1294 | } | 1315 | } |
| 1295 | 1316 | ||
| 1296 | static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | 1317 | static noinline int btrfs_ioctl_resize(struct file *file, |
| 1297 | void __user *arg) | 1318 | void __user *arg) |
| 1298 | { | 1319 | { |
| 1299 | u64 new_size; | 1320 | u64 new_size; |
| 1300 | u64 old_size; | 1321 | u64 old_size; |
| 1301 | u64 devid = 1; | 1322 | u64 devid = 1; |
| 1323 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | ||
| 1302 | struct btrfs_ioctl_vol_args *vol_args; | 1324 | struct btrfs_ioctl_vol_args *vol_args; |
| 1303 | struct btrfs_trans_handle *trans; | 1325 | struct btrfs_trans_handle *trans; |
| 1304 | struct btrfs_device *device = NULL; | 1326 | struct btrfs_device *device = NULL; |
| @@ -1313,13 +1335,18 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
| 1313 | if (!capable(CAP_SYS_ADMIN)) | 1335 | if (!capable(CAP_SYS_ADMIN)) |
| 1314 | return -EPERM; | 1336 | return -EPERM; |
| 1315 | 1337 | ||
| 1316 | mutex_lock(&root->fs_info->volume_mutex); | 1338 | ret = mnt_want_write_file(file); |
| 1317 | if (root->fs_info->balance_ctl) { | 1339 | if (ret) |
| 1318 | printk(KERN_INFO "btrfs: balance in progress\n"); | 1340 | return ret; |
| 1319 | ret = -EINVAL; | 1341 | |
| 1320 | goto out; | 1342 | if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, |
| 1343 | 1)) { | ||
| 1344 | pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); | ||
| 1345 | mnt_drop_write_file(file); | ||
| 1346 | return -EINVAL; | ||
| 1321 | } | 1347 | } |
| 1322 | 1348 | ||
| 1349 | mutex_lock(&root->fs_info->volume_mutex); | ||
| 1323 | vol_args = memdup_user(arg, sizeof(*vol_args)); | 1350 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
| 1324 | if (IS_ERR(vol_args)) { | 1351 | if (IS_ERR(vol_args)) { |
| 1325 | ret = PTR_ERR(vol_args); | 1352 | ret = PTR_ERR(vol_args); |
| @@ -1339,16 +1366,18 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
| 1339 | printk(KERN_INFO "btrfs: resizing devid %llu\n", | 1366 | printk(KERN_INFO "btrfs: resizing devid %llu\n", |
| 1340 | (unsigned long long)devid); | 1367 | (unsigned long long)devid); |
| 1341 | } | 1368 | } |
| 1342 | device = btrfs_find_device(root, devid, NULL, NULL); | 1369 | |
| 1370 | device = btrfs_find_device(root->fs_info, devid, NULL, NULL); | ||
| 1343 | if (!device) { | 1371 | if (!device) { |
| 1344 | printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", | 1372 | printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", |
| 1345 | (unsigned long long)devid); | 1373 | (unsigned long long)devid); |
| 1346 | ret = -EINVAL; | 1374 | ret = -EINVAL; |
| 1347 | goto out_free; | 1375 | goto out_free; |
| 1348 | } | 1376 | } |
| 1349 | if (device->fs_devices && device->fs_devices->seeding) { | 1377 | |
| 1378 | if (!device->writeable) { | ||
| 1350 | printk(KERN_INFO "btrfs: resizer unable to apply on " | 1379 | printk(KERN_INFO "btrfs: resizer unable to apply on " |
| 1351 | "seeding device %llu\n", | 1380 | "readonly device %llu\n", |
| 1352 | (unsigned long long)devid); | 1381 | (unsigned long long)devid); |
| 1353 | ret = -EINVAL; | 1382 | ret = -EINVAL; |
| 1354 | goto out_free; | 1383 | goto out_free; |
| @@ -1371,6 +1400,11 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
| 1371 | } | 1400 | } |
| 1372 | } | 1401 | } |
| 1373 | 1402 | ||
| 1403 | if (device->is_tgtdev_for_dev_replace) { | ||
| 1404 | ret = -EINVAL; | ||
| 1405 | goto out_free; | ||
| 1406 | } | ||
| 1407 | |||
| 1374 | old_size = device->total_bytes; | 1408 | old_size = device->total_bytes; |
| 1375 | 1409 | ||
| 1376 | if (mod < 0) { | 1410 | if (mod < 0) { |
| @@ -1409,12 +1443,14 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
| 1409 | btrfs_commit_transaction(trans, root); | 1443 | btrfs_commit_transaction(trans, root); |
| 1410 | } else if (new_size < old_size) { | 1444 | } else if (new_size < old_size) { |
| 1411 | ret = btrfs_shrink_device(device, new_size); | 1445 | ret = btrfs_shrink_device(device, new_size); |
| 1412 | } | 1446 | } /* equal, nothing need to do */ |
| 1413 | 1447 | ||
| 1414 | out_free: | 1448 | out_free: |
| 1415 | kfree(vol_args); | 1449 | kfree(vol_args); |
| 1416 | out: | 1450 | out: |
| 1417 | mutex_unlock(&root->fs_info->volume_mutex); | 1451 | mutex_unlock(&root->fs_info->volume_mutex); |
| 1452 | atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); | ||
| 1453 | mnt_drop_write_file(file); | ||
| 1418 | return ret; | 1454 | return ret; |
| 1419 | } | 1455 | } |
| 1420 | 1456 | ||
| @@ -2065,13 +2101,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
| 2065 | err = inode_permission(inode, MAY_WRITE | MAY_EXEC); | 2101 | err = inode_permission(inode, MAY_WRITE | MAY_EXEC); |
| 2066 | if (err) | 2102 | if (err) |
| 2067 | goto out_dput; | 2103 | goto out_dput; |
| 2068 | |||
| 2069 | /* check if subvolume may be deleted by a non-root user */ | ||
| 2070 | err = btrfs_may_delete(dir, dentry, 1); | ||
| 2071 | if (err) | ||
| 2072 | goto out_dput; | ||
| 2073 | } | 2104 | } |
| 2074 | 2105 | ||
| 2106 | /* check if subvolume may be deleted by a user */ | ||
| 2107 | err = btrfs_may_delete(dir, dentry, 1); | ||
| 2108 | if (err) | ||
| 2109 | goto out_dput; | ||
| 2110 | |||
| 2075 | if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { | 2111 | if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { |
| 2076 | err = -EINVAL; | 2112 | err = -EINVAL; |
| 2077 | goto out_dput; | 2113 | goto out_dput; |
| @@ -2153,13 +2189,22 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) | |||
| 2153 | struct btrfs_ioctl_defrag_range_args *range; | 2189 | struct btrfs_ioctl_defrag_range_args *range; |
| 2154 | int ret; | 2190 | int ret; |
| 2155 | 2191 | ||
| 2156 | if (btrfs_root_readonly(root)) | ||
| 2157 | return -EROFS; | ||
| 2158 | |||
| 2159 | ret = mnt_want_write_file(file); | 2192 | ret = mnt_want_write_file(file); |
| 2160 | if (ret) | 2193 | if (ret) |
| 2161 | return ret; | 2194 | return ret; |
| 2162 | 2195 | ||
| 2196 | if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, | ||
| 2197 | 1)) { | ||
| 2198 | pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); | ||
| 2199 | mnt_drop_write_file(file); | ||
| 2200 | return -EINVAL; | ||
| 2201 | } | ||
| 2202 | |||
| 2203 | if (btrfs_root_readonly(root)) { | ||
| 2204 | ret = -EROFS; | ||
| 2205 | goto out; | ||
| 2206 | } | ||
| 2207 | |||
| 2163 | switch (inode->i_mode & S_IFMT) { | 2208 | switch (inode->i_mode & S_IFMT) { |
| 2164 | case S_IFDIR: | 2209 | case S_IFDIR: |
| 2165 | if (!capable(CAP_SYS_ADMIN)) { | 2210 | if (!capable(CAP_SYS_ADMIN)) { |
| @@ -2209,6 +2254,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) | |||
| 2209 | ret = -EINVAL; | 2254 | ret = -EINVAL; |
| 2210 | } | 2255 | } |
| 2211 | out: | 2256 | out: |
| 2257 | atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); | ||
| 2212 | mnt_drop_write_file(file); | 2258 | mnt_drop_write_file(file); |
| 2213 | return ret; | 2259 | return ret; |
| 2214 | } | 2260 | } |
| @@ -2221,13 +2267,13 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) | |||
| 2221 | if (!capable(CAP_SYS_ADMIN)) | 2267 | if (!capable(CAP_SYS_ADMIN)) |
| 2222 | return -EPERM; | 2268 | return -EPERM; |
| 2223 | 2269 | ||
| 2224 | mutex_lock(&root->fs_info->volume_mutex); | 2270 | if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, |
| 2225 | if (root->fs_info->balance_ctl) { | 2271 | 1)) { |
| 2226 | printk(KERN_INFO "btrfs: balance in progress\n"); | 2272 | pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); |
| 2227 | ret = -EINVAL; | 2273 | return -EINVAL; |
| 2228 | goto out; | ||
| 2229 | } | 2274 | } |
| 2230 | 2275 | ||
| 2276 | mutex_lock(&root->fs_info->volume_mutex); | ||
| 2231 | vol_args = memdup_user(arg, sizeof(*vol_args)); | 2277 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
| 2232 | if (IS_ERR(vol_args)) { | 2278 | if (IS_ERR(vol_args)) { |
| 2233 | ret = PTR_ERR(vol_args); | 2279 | ret = PTR_ERR(vol_args); |
| @@ -2240,27 +2286,31 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) | |||
| 2240 | kfree(vol_args); | 2286 | kfree(vol_args); |
| 2241 | out: | 2287 | out: |
| 2242 | mutex_unlock(&root->fs_info->volume_mutex); | 2288 | mutex_unlock(&root->fs_info->volume_mutex); |
| 2289 | atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); | ||
| 2243 | return ret; | 2290 | return ret; |
| 2244 | } | 2291 | } |
| 2245 | 2292 | ||
| 2246 | static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) | 2293 | static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) |
| 2247 | { | 2294 | { |
| 2295 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | ||
| 2248 | struct btrfs_ioctl_vol_args *vol_args; | 2296 | struct btrfs_ioctl_vol_args *vol_args; |
| 2249 | int ret; | 2297 | int ret; |
| 2250 | 2298 | ||
| 2251 | if (!capable(CAP_SYS_ADMIN)) | 2299 | if (!capable(CAP_SYS_ADMIN)) |
| 2252 | return -EPERM; | 2300 | return -EPERM; |
| 2253 | 2301 | ||
| 2254 | if (root->fs_info->sb->s_flags & MS_RDONLY) | 2302 | ret = mnt_want_write_file(file); |
| 2255 | return -EROFS; | 2303 | if (ret) |
| 2304 | return ret; | ||
| 2256 | 2305 | ||
| 2257 | mutex_lock(&root->fs_info->volume_mutex); | 2306 | if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, |
| 2258 | if (root->fs_info->balance_ctl) { | 2307 | 1)) { |
| 2259 | printk(KERN_INFO "btrfs: balance in progress\n"); | 2308 | pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); |
| 2260 | ret = -EINVAL; | 2309 | mnt_drop_write_file(file); |
| 2261 | goto out; | 2310 | return -EINVAL; |
| 2262 | } | 2311 | } |
| 2263 | 2312 | ||
| 2313 | mutex_lock(&root->fs_info->volume_mutex); | ||
| 2264 | vol_args = memdup_user(arg, sizeof(*vol_args)); | 2314 | vol_args = memdup_user(arg, sizeof(*vol_args)); |
| 2265 | if (IS_ERR(vol_args)) { | 2315 | if (IS_ERR(vol_args)) { |
| 2266 | ret = PTR_ERR(vol_args); | 2316 | ret = PTR_ERR(vol_args); |
| @@ -2273,6 +2323,8 @@ static long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg) | |||
| 2273 | kfree(vol_args); | 2323 | kfree(vol_args); |
| 2274 | out: | 2324 | out: |
| 2275 | mutex_unlock(&root->fs_info->volume_mutex); | 2325 | mutex_unlock(&root->fs_info->volume_mutex); |
| 2326 | atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); | ||
| 2327 | mnt_drop_write_file(file); | ||
| 2276 | return ret; | 2328 | return ret; |
| 2277 | } | 2329 | } |
| 2278 | 2330 | ||
| @@ -2328,7 +2380,7 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) | |||
| 2328 | s_uuid = di_args->uuid; | 2380 | s_uuid = di_args->uuid; |
| 2329 | 2381 | ||
| 2330 | mutex_lock(&fs_devices->device_list_mutex); | 2382 | mutex_lock(&fs_devices->device_list_mutex); |
| 2331 | dev = btrfs_find_device(root, di_args->devid, s_uuid, NULL); | 2383 | dev = btrfs_find_device(root->fs_info, di_args->devid, s_uuid, NULL); |
| 2332 | mutex_unlock(&fs_devices->device_list_mutex); | 2384 | mutex_unlock(&fs_devices->device_list_mutex); |
| 2333 | 2385 | ||
| 2334 | if (!dev) { | 2386 | if (!dev) { |
| @@ -2821,12 +2873,19 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) | |||
| 2821 | struct btrfs_disk_key disk_key; | 2873 | struct btrfs_disk_key disk_key; |
| 2822 | u64 objectid = 0; | 2874 | u64 objectid = 0; |
| 2823 | u64 dir_id; | 2875 | u64 dir_id; |
| 2876 | int ret; | ||
| 2824 | 2877 | ||
| 2825 | if (!capable(CAP_SYS_ADMIN)) | 2878 | if (!capable(CAP_SYS_ADMIN)) |
| 2826 | return -EPERM; | 2879 | return -EPERM; |
| 2827 | 2880 | ||
| 2828 | if (copy_from_user(&objectid, argp, sizeof(objectid))) | 2881 | ret = mnt_want_write_file(file); |
| 2829 | return -EFAULT; | 2882 | if (ret) |
| 2883 | return ret; | ||
| 2884 | |||
| 2885 | if (copy_from_user(&objectid, argp, sizeof(objectid))) { | ||
| 2886 | ret = -EFAULT; | ||
| 2887 | goto out; | ||
| 2888 | } | ||
| 2830 | 2889 | ||
| 2831 | if (!objectid) | 2890 | if (!objectid) |
| 2832 | objectid = root->root_key.objectid; | 2891 | objectid = root->root_key.objectid; |
| @@ -2836,21 +2895,28 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) | |||
| 2836 | location.offset = (u64)-1; | 2895 | location.offset = (u64)-1; |
| 2837 | 2896 | ||
| 2838 | new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); | 2897 | new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); |
| 2839 | if (IS_ERR(new_root)) | 2898 | if (IS_ERR(new_root)) { |
| 2840 | return PTR_ERR(new_root); | 2899 | ret = PTR_ERR(new_root); |
| 2900 | goto out; | ||
| 2901 | } | ||
| 2841 | 2902 | ||
| 2842 | if (btrfs_root_refs(&new_root->root_item) == 0) | 2903 | if (btrfs_root_refs(&new_root->root_item) == 0) { |
| 2843 | return -ENOENT; | 2904 | ret = -ENOENT; |
| 2905 | goto out; | ||
| 2906 | } | ||
| 2844 | 2907 | ||
| 2845 | path = btrfs_alloc_path(); | 2908 | path = btrfs_alloc_path(); |
| 2846 | if (!path) | 2909 | if (!path) { |
| 2847 | return -ENOMEM; | 2910 | ret = -ENOMEM; |
| 2911 | goto out; | ||
| 2912 | } | ||
| 2848 | path->leave_spinning = 1; | 2913 | path->leave_spinning = 1; |
| 2849 | 2914 | ||
| 2850 | trans = btrfs_start_transaction(root, 1); | 2915 | trans = btrfs_start_transaction(root, 1); |
| 2851 | if (IS_ERR(trans)) { | 2916 | if (IS_ERR(trans)) { |
| 2852 | btrfs_free_path(path); | 2917 | btrfs_free_path(path); |
| 2853 | return PTR_ERR(trans); | 2918 | ret = PTR_ERR(trans); |
| 2919 | goto out; | ||
| 2854 | } | 2920 | } |
| 2855 | 2921 | ||
| 2856 | dir_id = btrfs_super_root_dir(root->fs_info->super_copy); | 2922 | dir_id = btrfs_super_root_dir(root->fs_info->super_copy); |
| @@ -2861,7 +2927,8 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) | |||
| 2861 | btrfs_end_transaction(trans, root); | 2927 | btrfs_end_transaction(trans, root); |
| 2862 | printk(KERN_ERR "Umm, you don't have the default dir item, " | 2928 | printk(KERN_ERR "Umm, you don't have the default dir item, " |
| 2863 | "this isn't going to work\n"); | 2929 | "this isn't going to work\n"); |
| 2864 | return -ENOENT; | 2930 | ret = -ENOENT; |
| 2931 | goto out; | ||
| 2865 | } | 2932 | } |
| 2866 | 2933 | ||
| 2867 | btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key); | 2934 | btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key); |
| @@ -2871,8 +2938,9 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) | |||
| 2871 | 2938 | ||
| 2872 | btrfs_set_fs_incompat(root->fs_info, DEFAULT_SUBVOL); | 2939 | btrfs_set_fs_incompat(root->fs_info, DEFAULT_SUBVOL); |
| 2873 | btrfs_end_transaction(trans, root); | 2940 | btrfs_end_transaction(trans, root); |
| 2874 | 2941 | out: | |
| 2875 | return 0; | 2942 | mnt_drop_write_file(file); |
| 2943 | return ret; | ||
| 2876 | } | 2944 | } |
| 2877 | 2945 | ||
| 2878 | void btrfs_get_block_group_info(struct list_head *groups_list, | 2946 | void btrfs_get_block_group_info(struct list_head *groups_list, |
| @@ -3036,32 +3104,38 @@ long btrfs_ioctl_trans_end(struct file *file) | |||
| 3036 | return 0; | 3104 | return 0; |
| 3037 | } | 3105 | } |
| 3038 | 3106 | ||
| 3039 | static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp) | 3107 | static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root, |
| 3108 | void __user *argp) | ||
| 3040 | { | 3109 | { |
| 3041 | struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; | ||
| 3042 | struct btrfs_trans_handle *trans; | 3110 | struct btrfs_trans_handle *trans; |
| 3043 | u64 transid; | 3111 | u64 transid; |
| 3044 | int ret; | 3112 | int ret; |
| 3045 | 3113 | ||
| 3046 | trans = btrfs_start_transaction(root, 0); | 3114 | trans = btrfs_attach_transaction(root); |
| 3047 | if (IS_ERR(trans)) | 3115 | if (IS_ERR(trans)) { |
| 3048 | return PTR_ERR(trans); | 3116 | if (PTR_ERR(trans) != -ENOENT) |
| 3117 | return PTR_ERR(trans); | ||
| 3118 | |||
| 3119 | /* No running transaction, don't bother */ | ||
| 3120 | transid = root->fs_info->last_trans_committed; | ||
| 3121 | goto out; | ||
| 3122 | } | ||
| 3049 | transid = trans->transid; | 3123 | transid = trans->transid; |
| 3050 | ret = btrfs_commit_transaction_async(trans, root, 0); | 3124 | ret = btrfs_commit_transaction_async(trans, root, 0); |
| 3051 | if (ret) { | 3125 | if (ret) { |
| 3052 | btrfs_end_transaction(trans, root); | 3126 | btrfs_end_transaction(trans, root); |
| 3053 | return ret; | 3127 | return ret; |
| 3054 | } | 3128 | } |
| 3055 | 3129 | out: | |
| 3056 | if (argp) | 3130 | if (argp) |
| 3057 | if (copy_to_user(argp, &transid, sizeof(transid))) | 3131 | if (copy_to_user(argp, &transid, sizeof(transid))) |
| 3058 | return -EFAULT; | 3132 | return -EFAULT; |
| 3059 | return 0; | 3133 | return 0; |
| 3060 | } | 3134 | } |
| 3061 | 3135 | ||
| 3062 | static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp) | 3136 | static noinline long btrfs_ioctl_wait_sync(struct btrfs_root *root, |
| 3137 | void __user *argp) | ||
| 3063 | { | 3138 | { |
| 3064 | struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root; | ||
| 3065 | u64 transid; | 3139 | u64 transid; |
| 3066 | 3140 | ||
| 3067 | if (argp) { | 3141 | if (argp) { |
| @@ -3073,10 +3147,11 @@ static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp) | |||
| 3073 | return btrfs_wait_for_commit(root, transid); | 3147 | return btrfs_wait_for_commit(root, transid); |
| 3074 | } | 3148 | } |
| 3075 | 3149 | ||
| 3076 | static long btrfs_ioctl_scrub(struct btrfs_root *root, void __user *arg) | 3150 | static long btrfs_ioctl_scrub(struct file *file, void __user *arg) |
| 3077 | { | 3151 | { |
| 3078 | int ret; | 3152 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; |
| 3079 | struct btrfs_ioctl_scrub_args *sa; | 3153 | struct btrfs_ioctl_scrub_args *sa; |
| 3154 | int ret; | ||
| 3080 | 3155 | ||
| 3081 | if (!capable(CAP_SYS_ADMIN)) | 3156 | if (!capable(CAP_SYS_ADMIN)) |
| 3082 | return -EPERM; | 3157 | return -EPERM; |
| @@ -3085,12 +3160,22 @@ static long btrfs_ioctl_scrub(struct btrfs_root *root, void __user *arg) | |||
| 3085 | if (IS_ERR(sa)) | 3160 | if (IS_ERR(sa)) |
| 3086 | return PTR_ERR(sa); | 3161 | return PTR_ERR(sa); |
| 3087 | 3162 | ||
| 3088 | ret = btrfs_scrub_dev(root, sa->devid, sa->start, sa->end, | 3163 | if (!(sa->flags & BTRFS_SCRUB_READONLY)) { |
| 3089 | &sa->progress, sa->flags & BTRFS_SCRUB_READONLY); | 3164 | ret = mnt_want_write_file(file); |
| 3165 | if (ret) | ||
| 3166 | goto out; | ||
| 3167 | } | ||
| 3168 | |||
| 3169 | ret = btrfs_scrub_dev(root->fs_info, sa->devid, sa->start, sa->end, | ||
| 3170 | &sa->progress, sa->flags & BTRFS_SCRUB_READONLY, | ||
| 3171 | 0); | ||
| 3090 | 3172 | ||
| 3091 | if (copy_to_user(arg, sa, sizeof(*sa))) | 3173 | if (copy_to_user(arg, sa, sizeof(*sa))) |
| 3092 | ret = -EFAULT; | 3174 | ret = -EFAULT; |
| 3093 | 3175 | ||
| 3176 | if (!(sa->flags & BTRFS_SCRUB_READONLY)) | ||
| 3177 | mnt_drop_write_file(file); | ||
| 3178 | out: | ||
| 3094 | kfree(sa); | 3179 | kfree(sa); |
| 3095 | return ret; | 3180 | return ret; |
| 3096 | } | 3181 | } |
| @@ -3100,7 +3185,7 @@ static long btrfs_ioctl_scrub_cancel(struct btrfs_root *root, void __user *arg) | |||
| 3100 | if (!capable(CAP_SYS_ADMIN)) | 3185 | if (!capable(CAP_SYS_ADMIN)) |
| 3101 | return -EPERM; | 3186 | return -EPERM; |
| 3102 | 3187 | ||
| 3103 | return btrfs_scrub_cancel(root); | 3188 | return btrfs_scrub_cancel(root->fs_info); |
| 3104 | } | 3189 | } |
| 3105 | 3190 | ||
| 3106 | static long btrfs_ioctl_scrub_progress(struct btrfs_root *root, | 3191 | static long btrfs_ioctl_scrub_progress(struct btrfs_root *root, |
| @@ -3149,6 +3234,51 @@ static long btrfs_ioctl_get_dev_stats(struct btrfs_root *root, | |||
| 3149 | return ret; | 3234 | return ret; |
| 3150 | } | 3235 | } |
| 3151 | 3236 | ||
| 3237 | static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg) | ||
| 3238 | { | ||
| 3239 | struct btrfs_ioctl_dev_replace_args *p; | ||
| 3240 | int ret; | ||
| 3241 | |||
| 3242 | if (!capable(CAP_SYS_ADMIN)) | ||
| 3243 | return -EPERM; | ||
| 3244 | |||
| 3245 | p = memdup_user(arg, sizeof(*p)); | ||
| 3246 | if (IS_ERR(p)) | ||
| 3247 | return PTR_ERR(p); | ||
| 3248 | |||
| 3249 | switch (p->cmd) { | ||
| 3250 | case BTRFS_IOCTL_DEV_REPLACE_CMD_START: | ||
| 3251 | if (atomic_xchg( | ||
| 3252 | &root->fs_info->mutually_exclusive_operation_running, | ||
| 3253 | 1)) { | ||
| 3254 | pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); | ||
| 3255 | ret = -EINPROGRESS; | ||
| 3256 | } else { | ||
| 3257 | ret = btrfs_dev_replace_start(root, p); | ||
| 3258 | atomic_set( | ||
| 3259 | &root->fs_info->mutually_exclusive_operation_running, | ||
| 3260 | 0); | ||
| 3261 | } | ||
| 3262 | break; | ||
| 3263 | case BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS: | ||
| 3264 | btrfs_dev_replace_status(root->fs_info, p); | ||
| 3265 | ret = 0; | ||
| 3266 | break; | ||
| 3267 | case BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL: | ||
| 3268 | ret = btrfs_dev_replace_cancel(root->fs_info, p); | ||
| 3269 | break; | ||
| 3270 | default: | ||
| 3271 | ret = -EINVAL; | ||
| 3272 | break; | ||
| 3273 | } | ||
| 3274 | |||
| 3275 | if (copy_to_user(arg, p, sizeof(*p))) | ||
| 3276 | ret = -EFAULT; | ||
| 3277 | |||
| 3278 | kfree(p); | ||
| 3279 | return ret; | ||
| 3280 | } | ||
| 3281 | |||
| 3152 | static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) | 3282 | static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) |
| 3153 | { | 3283 | { |
| 3154 | int ret = 0; | 3284 | int ret = 0; |
| @@ -3314,6 +3444,7 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) | |||
| 3314 | struct btrfs_fs_info *fs_info = root->fs_info; | 3444 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 3315 | struct btrfs_ioctl_balance_args *bargs; | 3445 | struct btrfs_ioctl_balance_args *bargs; |
| 3316 | struct btrfs_balance_control *bctl; | 3446 | struct btrfs_balance_control *bctl; |
| 3447 | bool need_unlock; /* for mut. excl. ops lock */ | ||
| 3317 | int ret; | 3448 | int ret; |
| 3318 | 3449 | ||
| 3319 | if (!capable(CAP_SYS_ADMIN)) | 3450 | if (!capable(CAP_SYS_ADMIN)) |
| @@ -3323,14 +3454,61 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) | |||
| 3323 | if (ret) | 3454 | if (ret) |
| 3324 | return ret; | 3455 | return ret; |
| 3325 | 3456 | ||
| 3326 | mutex_lock(&fs_info->volume_mutex); | 3457 | again: |
| 3458 | if (!atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) { | ||
| 3459 | mutex_lock(&fs_info->volume_mutex); | ||
| 3460 | mutex_lock(&fs_info->balance_mutex); | ||
| 3461 | need_unlock = true; | ||
| 3462 | goto locked; | ||
| 3463 | } | ||
| 3464 | |||
| 3465 | /* | ||
| 3466 | * mut. excl. ops lock is locked. Three possibilites: | ||
| 3467 | * (1) some other op is running | ||
| 3468 | * (2) balance is running | ||
| 3469 | * (3) balance is paused -- special case (think resume) | ||
| 3470 | */ | ||
| 3327 | mutex_lock(&fs_info->balance_mutex); | 3471 | mutex_lock(&fs_info->balance_mutex); |
| 3472 | if (fs_info->balance_ctl) { | ||
| 3473 | /* this is either (2) or (3) */ | ||
| 3474 | if (!atomic_read(&fs_info->balance_running)) { | ||
| 3475 | mutex_unlock(&fs_info->balance_mutex); | ||
| 3476 | if (!mutex_trylock(&fs_info->volume_mutex)) | ||
| 3477 | goto again; | ||
| 3478 | mutex_lock(&fs_info->balance_mutex); | ||
| 3479 | |||
| 3480 | if (fs_info->balance_ctl && | ||
| 3481 | !atomic_read(&fs_info->balance_running)) { | ||
| 3482 | /* this is (3) */ | ||
| 3483 | need_unlock = false; | ||
| 3484 | goto locked; | ||
| 3485 | } | ||
| 3486 | |||
| 3487 | mutex_unlock(&fs_info->balance_mutex); | ||
| 3488 | mutex_unlock(&fs_info->volume_mutex); | ||
| 3489 | goto again; | ||
| 3490 | } else { | ||
| 3491 | /* this is (2) */ | ||
| 3492 | mutex_unlock(&fs_info->balance_mutex); | ||
| 3493 | ret = -EINPROGRESS; | ||
| 3494 | goto out; | ||
| 3495 | } | ||
| 3496 | } else { | ||
| 3497 | /* this is (1) */ | ||
| 3498 | mutex_unlock(&fs_info->balance_mutex); | ||
| 3499 | pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); | ||
| 3500 | ret = -EINVAL; | ||
| 3501 | goto out; | ||
| 3502 | } | ||
| 3503 | |||
| 3504 | locked: | ||
| 3505 | BUG_ON(!atomic_read(&fs_info->mutually_exclusive_operation_running)); | ||
| 3328 | 3506 | ||
| 3329 | if (arg) { | 3507 | if (arg) { |
| 3330 | bargs = memdup_user(arg, sizeof(*bargs)); | 3508 | bargs = memdup_user(arg, sizeof(*bargs)); |
| 3331 | if (IS_ERR(bargs)) { | 3509 | if (IS_ERR(bargs)) { |
| 3332 | ret = PTR_ERR(bargs); | 3510 | ret = PTR_ERR(bargs); |
| 3333 | goto out; | 3511 | goto out_unlock; |
| 3334 | } | 3512 | } |
| 3335 | 3513 | ||
| 3336 | if (bargs->flags & BTRFS_BALANCE_RESUME) { | 3514 | if (bargs->flags & BTRFS_BALANCE_RESUME) { |
| @@ -3374,11 +3552,17 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg) | |||
| 3374 | } | 3552 | } |
| 3375 | 3553 | ||
| 3376 | do_balance: | 3554 | do_balance: |
| 3377 | ret = btrfs_balance(bctl, bargs); | ||
| 3378 | /* | 3555 | /* |
| 3379 | * bctl is freed in __cancel_balance or in free_fs_info if | 3556 | * Ownership of bctl and mutually_exclusive_operation_running |
| 3380 | * restriper was paused all the way until unmount | 3557 | * goes to to btrfs_balance. bctl is freed in __cancel_balance, |
| 3558 | * or, if restriper was paused all the way until unmount, in | ||
| 3559 | * free_fs_info. mutually_exclusive_operation_running is | ||
| 3560 | * cleared in __cancel_balance. | ||
| 3381 | */ | 3561 | */ |
| 3562 | need_unlock = false; | ||
| 3563 | |||
| 3564 | ret = btrfs_balance(bctl, bargs); | ||
| 3565 | |||
| 3382 | if (arg) { | 3566 | if (arg) { |
| 3383 | if (copy_to_user(arg, bargs, sizeof(*bargs))) | 3567 | if (copy_to_user(arg, bargs, sizeof(*bargs))) |
| 3384 | ret = -EFAULT; | 3568 | ret = -EFAULT; |
| @@ -3386,9 +3570,12 @@ do_balance: | |||
| 3386 | 3570 | ||
| 3387 | out_bargs: | 3571 | out_bargs: |
| 3388 | kfree(bargs); | 3572 | kfree(bargs); |
| 3389 | out: | 3573 | out_unlock: |
| 3390 | mutex_unlock(&fs_info->balance_mutex); | 3574 | mutex_unlock(&fs_info->balance_mutex); |
| 3391 | mutex_unlock(&fs_info->volume_mutex); | 3575 | mutex_unlock(&fs_info->volume_mutex); |
| 3576 | if (need_unlock) | ||
| 3577 | atomic_set(&fs_info->mutually_exclusive_operation_running, 0); | ||
| 3578 | out: | ||
| 3392 | mnt_drop_write_file(file); | 3579 | mnt_drop_write_file(file); |
| 3393 | return ret; | 3580 | return ret; |
| 3394 | } | 3581 | } |
| @@ -3441,8 +3628,9 @@ out: | |||
| 3441 | return ret; | 3628 | return ret; |
| 3442 | } | 3629 | } |
| 3443 | 3630 | ||
| 3444 | static long btrfs_ioctl_quota_ctl(struct btrfs_root *root, void __user *arg) | 3631 | static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) |
| 3445 | { | 3632 | { |
| 3633 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | ||
| 3446 | struct btrfs_ioctl_quota_ctl_args *sa; | 3634 | struct btrfs_ioctl_quota_ctl_args *sa; |
| 3447 | struct btrfs_trans_handle *trans = NULL; | 3635 | struct btrfs_trans_handle *trans = NULL; |
| 3448 | int ret; | 3636 | int ret; |
| @@ -3451,12 +3639,15 @@ static long btrfs_ioctl_quota_ctl(struct btrfs_root *root, void __user *arg) | |||
| 3451 | if (!capable(CAP_SYS_ADMIN)) | 3639 | if (!capable(CAP_SYS_ADMIN)) |
| 3452 | return -EPERM; | 3640 | return -EPERM; |
| 3453 | 3641 | ||
| 3454 | if (root->fs_info->sb->s_flags & MS_RDONLY) | 3642 | ret = mnt_want_write_file(file); |
| 3455 | return -EROFS; | 3643 | if (ret) |
| 3644 | return ret; | ||
| 3456 | 3645 | ||
| 3457 | sa = memdup_user(arg, sizeof(*sa)); | 3646 | sa = memdup_user(arg, sizeof(*sa)); |
| 3458 | if (IS_ERR(sa)) | 3647 | if (IS_ERR(sa)) { |
| 3459 | return PTR_ERR(sa); | 3648 | ret = PTR_ERR(sa); |
| 3649 | goto drop_write; | ||
| 3650 | } | ||
| 3460 | 3651 | ||
| 3461 | if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) { | 3652 | if (sa->cmd != BTRFS_QUOTA_CTL_RESCAN) { |
| 3462 | trans = btrfs_start_transaction(root, 2); | 3653 | trans = btrfs_start_transaction(root, 2); |
| @@ -3489,14 +3680,16 @@ static long btrfs_ioctl_quota_ctl(struct btrfs_root *root, void __user *arg) | |||
| 3489 | if (err && !ret) | 3680 | if (err && !ret) |
| 3490 | ret = err; | 3681 | ret = err; |
| 3491 | } | 3682 | } |
| 3492 | |||
| 3493 | out: | 3683 | out: |
| 3494 | kfree(sa); | 3684 | kfree(sa); |
| 3685 | drop_write: | ||
| 3686 | mnt_drop_write_file(file); | ||
| 3495 | return ret; | 3687 | return ret; |
| 3496 | } | 3688 | } |
| 3497 | 3689 | ||
| 3498 | static long btrfs_ioctl_qgroup_assign(struct btrfs_root *root, void __user *arg) | 3690 | static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) |
| 3499 | { | 3691 | { |
| 3692 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | ||
| 3500 | struct btrfs_ioctl_qgroup_assign_args *sa; | 3693 | struct btrfs_ioctl_qgroup_assign_args *sa; |
| 3501 | struct btrfs_trans_handle *trans; | 3694 | struct btrfs_trans_handle *trans; |
| 3502 | int ret; | 3695 | int ret; |
| @@ -3505,12 +3698,15 @@ static long btrfs_ioctl_qgroup_assign(struct btrfs_root *root, void __user *arg) | |||
| 3505 | if (!capable(CAP_SYS_ADMIN)) | 3698 | if (!capable(CAP_SYS_ADMIN)) |
| 3506 | return -EPERM; | 3699 | return -EPERM; |
| 3507 | 3700 | ||
| 3508 | if (root->fs_info->sb->s_flags & MS_RDONLY) | 3701 | ret = mnt_want_write_file(file); |
| 3509 | return -EROFS; | 3702 | if (ret) |
| 3703 | return ret; | ||
| 3510 | 3704 | ||
| 3511 | sa = memdup_user(arg, sizeof(*sa)); | 3705 | sa = memdup_user(arg, sizeof(*sa)); |
| 3512 | if (IS_ERR(sa)) | 3706 | if (IS_ERR(sa)) { |
| 3513 | return PTR_ERR(sa); | 3707 | ret = PTR_ERR(sa); |
| 3708 | goto drop_write; | ||
| 3709 | } | ||
| 3514 | 3710 | ||
| 3515 | trans = btrfs_join_transaction(root); | 3711 | trans = btrfs_join_transaction(root); |
| 3516 | if (IS_ERR(trans)) { | 3712 | if (IS_ERR(trans)) { |
| @@ -3533,11 +3729,14 @@ static long btrfs_ioctl_qgroup_assign(struct btrfs_root *root, void __user *arg) | |||
| 3533 | 3729 | ||
| 3534 | out: | 3730 | out: |
| 3535 | kfree(sa); | 3731 | kfree(sa); |
| 3732 | drop_write: | ||
| 3733 | mnt_drop_write_file(file); | ||
| 3536 | return ret; | 3734 | return ret; |
| 3537 | } | 3735 | } |
| 3538 | 3736 | ||
| 3539 | static long btrfs_ioctl_qgroup_create(struct btrfs_root *root, void __user *arg) | 3737 | static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) |
| 3540 | { | 3738 | { |
| 3739 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | ||
| 3541 | struct btrfs_ioctl_qgroup_create_args *sa; | 3740 | struct btrfs_ioctl_qgroup_create_args *sa; |
| 3542 | struct btrfs_trans_handle *trans; | 3741 | struct btrfs_trans_handle *trans; |
| 3543 | int ret; | 3742 | int ret; |
| @@ -3546,12 +3745,20 @@ static long btrfs_ioctl_qgroup_create(struct btrfs_root *root, void __user *arg) | |||
| 3546 | if (!capable(CAP_SYS_ADMIN)) | 3745 | if (!capable(CAP_SYS_ADMIN)) |
| 3547 | return -EPERM; | 3746 | return -EPERM; |
| 3548 | 3747 | ||
| 3549 | if (root->fs_info->sb->s_flags & MS_RDONLY) | 3748 | ret = mnt_want_write_file(file); |
| 3550 | return -EROFS; | 3749 | if (ret) |
| 3750 | return ret; | ||
| 3551 | 3751 | ||
| 3552 | sa = memdup_user(arg, sizeof(*sa)); | 3752 | sa = memdup_user(arg, sizeof(*sa)); |
| 3553 | if (IS_ERR(sa)) | 3753 | if (IS_ERR(sa)) { |
| 3554 | return PTR_ERR(sa); | 3754 | ret = PTR_ERR(sa); |
| 3755 | goto drop_write; | ||
| 3756 | } | ||
| 3757 | |||
| 3758 | if (!sa->qgroupid) { | ||
| 3759 | ret = -EINVAL; | ||
| 3760 | goto out; | ||
| 3761 | } | ||
| 3555 | 3762 | ||
| 3556 | trans = btrfs_join_transaction(root); | 3763 | trans = btrfs_join_transaction(root); |
| 3557 | if (IS_ERR(trans)) { | 3764 | if (IS_ERR(trans)) { |
| @@ -3573,11 +3780,14 @@ static long btrfs_ioctl_qgroup_create(struct btrfs_root *root, void __user *arg) | |||
| 3573 | 3780 | ||
| 3574 | out: | 3781 | out: |
| 3575 | kfree(sa); | 3782 | kfree(sa); |
| 3783 | drop_write: | ||
| 3784 | mnt_drop_write_file(file); | ||
| 3576 | return ret; | 3785 | return ret; |
| 3577 | } | 3786 | } |
| 3578 | 3787 | ||
| 3579 | static long btrfs_ioctl_qgroup_limit(struct btrfs_root *root, void __user *arg) | 3788 | static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg) |
| 3580 | { | 3789 | { |
| 3790 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | ||
| 3581 | struct btrfs_ioctl_qgroup_limit_args *sa; | 3791 | struct btrfs_ioctl_qgroup_limit_args *sa; |
| 3582 | struct btrfs_trans_handle *trans; | 3792 | struct btrfs_trans_handle *trans; |
| 3583 | int ret; | 3793 | int ret; |
| @@ -3587,12 +3797,15 @@ static long btrfs_ioctl_qgroup_limit(struct btrfs_root *root, void __user *arg) | |||
| 3587 | if (!capable(CAP_SYS_ADMIN)) | 3797 | if (!capable(CAP_SYS_ADMIN)) |
| 3588 | return -EPERM; | 3798 | return -EPERM; |
| 3589 | 3799 | ||
| 3590 | if (root->fs_info->sb->s_flags & MS_RDONLY) | 3800 | ret = mnt_want_write_file(file); |
| 3591 | return -EROFS; | 3801 | if (ret) |
| 3802 | return ret; | ||
| 3592 | 3803 | ||
| 3593 | sa = memdup_user(arg, sizeof(*sa)); | 3804 | sa = memdup_user(arg, sizeof(*sa)); |
| 3594 | if (IS_ERR(sa)) | 3805 | if (IS_ERR(sa)) { |
| 3595 | return PTR_ERR(sa); | 3806 | ret = PTR_ERR(sa); |
| 3807 | goto drop_write; | ||
| 3808 | } | ||
| 3596 | 3809 | ||
| 3597 | trans = btrfs_join_transaction(root); | 3810 | trans = btrfs_join_transaction(root); |
| 3598 | if (IS_ERR(trans)) { | 3811 | if (IS_ERR(trans)) { |
| @@ -3615,6 +3828,8 @@ static long btrfs_ioctl_qgroup_limit(struct btrfs_root *root, void __user *arg) | |||
| 3615 | 3828 | ||
| 3616 | out: | 3829 | out: |
| 3617 | kfree(sa); | 3830 | kfree(sa); |
| 3831 | drop_write: | ||
| 3832 | mnt_drop_write_file(file); | ||
| 3618 | return ret; | 3833 | return ret; |
| 3619 | } | 3834 | } |
| 3620 | 3835 | ||
| @@ -3735,11 +3950,11 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
| 3735 | case BTRFS_IOC_DEFRAG_RANGE: | 3950 | case BTRFS_IOC_DEFRAG_RANGE: |
| 3736 | return btrfs_ioctl_defrag(file, argp); | 3951 | return btrfs_ioctl_defrag(file, argp); |
| 3737 | case BTRFS_IOC_RESIZE: | 3952 | case BTRFS_IOC_RESIZE: |
| 3738 | return btrfs_ioctl_resize(root, argp); | 3953 | return btrfs_ioctl_resize(file, argp); |
| 3739 | case BTRFS_IOC_ADD_DEV: | 3954 | case BTRFS_IOC_ADD_DEV: |
| 3740 | return btrfs_ioctl_add_dev(root, argp); | 3955 | return btrfs_ioctl_add_dev(root, argp); |
| 3741 | case BTRFS_IOC_RM_DEV: | 3956 | case BTRFS_IOC_RM_DEV: |
| 3742 | return btrfs_ioctl_rm_dev(root, argp); | 3957 | return btrfs_ioctl_rm_dev(file, argp); |
| 3743 | case BTRFS_IOC_FS_INFO: | 3958 | case BTRFS_IOC_FS_INFO: |
| 3744 | return btrfs_ioctl_fs_info(root, argp); | 3959 | return btrfs_ioctl_fs_info(root, argp); |
| 3745 | case BTRFS_IOC_DEV_INFO: | 3960 | case BTRFS_IOC_DEV_INFO: |
| @@ -3768,11 +3983,11 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
| 3768 | btrfs_sync_fs(file->f_dentry->d_sb, 1); | 3983 | btrfs_sync_fs(file->f_dentry->d_sb, 1); |
| 3769 | return 0; | 3984 | return 0; |
| 3770 | case BTRFS_IOC_START_SYNC: | 3985 | case BTRFS_IOC_START_SYNC: |
| 3771 | return btrfs_ioctl_start_sync(file, argp); | 3986 | return btrfs_ioctl_start_sync(root, argp); |
| 3772 | case BTRFS_IOC_WAIT_SYNC: | 3987 | case BTRFS_IOC_WAIT_SYNC: |
| 3773 | return btrfs_ioctl_wait_sync(file, argp); | 3988 | return btrfs_ioctl_wait_sync(root, argp); |
| 3774 | case BTRFS_IOC_SCRUB: | 3989 | case BTRFS_IOC_SCRUB: |
| 3775 | return btrfs_ioctl_scrub(root, argp); | 3990 | return btrfs_ioctl_scrub(file, argp); |
| 3776 | case BTRFS_IOC_SCRUB_CANCEL: | 3991 | case BTRFS_IOC_SCRUB_CANCEL: |
| 3777 | return btrfs_ioctl_scrub_cancel(root, argp); | 3992 | return btrfs_ioctl_scrub_cancel(root, argp); |
| 3778 | case BTRFS_IOC_SCRUB_PROGRESS: | 3993 | case BTRFS_IOC_SCRUB_PROGRESS: |
| @@ -3790,13 +4005,15 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
| 3790 | case BTRFS_IOC_GET_DEV_STATS: | 4005 | case BTRFS_IOC_GET_DEV_STATS: |
| 3791 | return btrfs_ioctl_get_dev_stats(root, argp); | 4006 | return btrfs_ioctl_get_dev_stats(root, argp); |
| 3792 | case BTRFS_IOC_QUOTA_CTL: | 4007 | case BTRFS_IOC_QUOTA_CTL: |
| 3793 | return btrfs_ioctl_quota_ctl(root, argp); | 4008 | return btrfs_ioctl_quota_ctl(file, argp); |
| 3794 | case BTRFS_IOC_QGROUP_ASSIGN: | 4009 | case BTRFS_IOC_QGROUP_ASSIGN: |
| 3795 | return btrfs_ioctl_qgroup_assign(root, argp); | 4010 | return btrfs_ioctl_qgroup_assign(file, argp); |
| 3796 | case BTRFS_IOC_QGROUP_CREATE: | 4011 | case BTRFS_IOC_QGROUP_CREATE: |
| 3797 | return btrfs_ioctl_qgroup_create(root, argp); | 4012 | return btrfs_ioctl_qgroup_create(file, argp); |
| 3798 | case BTRFS_IOC_QGROUP_LIMIT: | 4013 | case BTRFS_IOC_QGROUP_LIMIT: |
| 3799 | return btrfs_ioctl_qgroup_limit(root, argp); | 4014 | return btrfs_ioctl_qgroup_limit(file, argp); |
| 4015 | case BTRFS_IOC_DEV_REPLACE: | ||
| 4016 | return btrfs_ioctl_dev_replace(root, argp); | ||
| 3800 | } | 4017 | } |
| 3801 | 4018 | ||
| 3802 | return -ENOTTY; | 4019 | return -ENOTTY; |
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 731e2875ab93..dabca9cc8c2e 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h | |||
| @@ -30,6 +30,8 @@ struct btrfs_ioctl_vol_args { | |||
| 30 | char name[BTRFS_PATH_NAME_MAX + 1]; | 30 | char name[BTRFS_PATH_NAME_MAX + 1]; |
| 31 | }; | 31 | }; |
| 32 | 32 | ||
| 33 | #define BTRFS_DEVICE_PATH_NAME_MAX 1024 | ||
| 34 | |||
| 33 | #define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) | 35 | #define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0) |
| 34 | #define BTRFS_SUBVOL_RDONLY (1ULL << 1) | 36 | #define BTRFS_SUBVOL_RDONLY (1ULL << 1) |
| 35 | #define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) | 37 | #define BTRFS_SUBVOL_QGROUP_INHERIT (1ULL << 2) |
| @@ -123,7 +125,48 @@ struct btrfs_ioctl_scrub_args { | |||
| 123 | __u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8]; | 125 | __u64 unused[(1024-32-sizeof(struct btrfs_scrub_progress))/8]; |
| 124 | }; | 126 | }; |
| 125 | 127 | ||
| 126 | #define BTRFS_DEVICE_PATH_NAME_MAX 1024 | 128 | #define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_ALWAYS 0 |
| 129 | #define BTRFS_IOCTL_DEV_REPLACE_CONT_READING_FROM_SRCDEV_MODE_AVOID 1 | ||
| 130 | struct btrfs_ioctl_dev_replace_start_params { | ||
| 131 | __u64 srcdevid; /* in, if 0, use srcdev_name instead */ | ||
| 132 | __u64 cont_reading_from_srcdev_mode; /* in, see #define | ||
| 133 | * above */ | ||
| 134 | __u8 srcdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ | ||
| 135 | __u8 tgtdev_name[BTRFS_DEVICE_PATH_NAME_MAX + 1]; /* in */ | ||
| 136 | }; | ||
| 137 | |||
| 138 | #define BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED 0 | ||
| 139 | #define BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED 1 | ||
| 140 | #define BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED 2 | ||
| 141 | #define BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED 3 | ||
| 142 | #define BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED 4 | ||
| 143 | struct btrfs_ioctl_dev_replace_status_params { | ||
| 144 | __u64 replace_state; /* out, see #define above */ | ||
| 145 | __u64 progress_1000; /* out, 0 <= x <= 1000 */ | ||
| 146 | __u64 time_started; /* out, seconds since 1-Jan-1970 */ | ||
| 147 | __u64 time_stopped; /* out, seconds since 1-Jan-1970 */ | ||
| 148 | __u64 num_write_errors; /* out */ | ||
| 149 | __u64 num_uncorrectable_read_errors; /* out */ | ||
| 150 | }; | ||
| 151 | |||
| 152 | #define BTRFS_IOCTL_DEV_REPLACE_CMD_START 0 | ||
| 153 | #define BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS 1 | ||
| 154 | #define BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL 2 | ||
| 155 | #define BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR 0 | ||
| 156 | #define BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED 1 | ||
| 157 | #define BTRFS_IOCTL_DEV_REPLACE_RESULT_ALREADY_STARTED 2 | ||
| 158 | struct btrfs_ioctl_dev_replace_args { | ||
| 159 | __u64 cmd; /* in */ | ||
| 160 | __u64 result; /* out */ | ||
| 161 | |||
| 162 | union { | ||
| 163 | struct btrfs_ioctl_dev_replace_start_params start; | ||
| 164 | struct btrfs_ioctl_dev_replace_status_params status; | ||
| 165 | }; /* in/out */ | ||
| 166 | |||
| 167 | __u64 spare[64]; | ||
| 168 | }; | ||
| 169 | |||
| 127 | struct btrfs_ioctl_dev_info_args { | 170 | struct btrfs_ioctl_dev_info_args { |
| 128 | __u64 devid; /* in/out */ | 171 | __u64 devid; /* in/out */ |
| 129 | __u8 uuid[BTRFS_UUID_SIZE]; /* in/out */ | 172 | __u8 uuid[BTRFS_UUID_SIZE]; /* in/out */ |
| @@ -453,4 +496,7 @@ struct btrfs_ioctl_send_args { | |||
| 453 | struct btrfs_ioctl_qgroup_limit_args) | 496 | struct btrfs_ioctl_qgroup_limit_args) |
| 454 | #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ | 497 | #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ |
| 455 | struct btrfs_ioctl_get_dev_stats) | 498 | struct btrfs_ioctl_get_dev_stats) |
| 499 | #define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \ | ||
| 500 | struct btrfs_ioctl_dev_replace_args) | ||
| 501 | |||
| 456 | #endif | 502 | #endif |
diff --git a/fs/btrfs/math.h b/fs/btrfs/math.h new file mode 100644 index 000000000000..b7816cefbd13 --- /dev/null +++ b/fs/btrfs/math.h | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | |||
| 2 | /* | ||
| 3 | * Copyright (C) 2012 Fujitsu. All rights reserved. | ||
| 4 | * Written by Miao Xie <miaox@cn.fujitsu.com> | ||
| 5 | * | ||
| 6 | * This program is free software; you can redistribute it and/or | ||
| 7 | * modify it under the terms of the GNU General Public | ||
| 8 | * License v2 as published by the Free Software Foundation. | ||
| 9 | * | ||
| 10 | * This program is distributed in the hope that it will be useful, | ||
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 13 | * General Public License for more details. | ||
| 14 | * | ||
| 15 | * You should have received a copy of the GNU General Public | ||
| 16 | * License along with this program; if not, write to the | ||
| 17 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 18 | * Boston, MA 021110-1307, USA. | ||
| 19 | */ | ||
| 20 | |||
| 21 | #ifndef __BTRFS_MATH_H | ||
| 22 | #define __BTRFS_MATH_H | ||
| 23 | |||
| 24 | #include <asm/div64.h> | ||
| 25 | |||
| 26 | static inline u64 div_factor(u64 num, int factor) | ||
| 27 | { | ||
| 28 | if (factor == 10) | ||
| 29 | return num; | ||
| 30 | num *= factor; | ||
| 31 | do_div(num, 10); | ||
| 32 | return num; | ||
| 33 | } | ||
| 34 | |||
| 35 | static inline u64 div_factor_fine(u64 num, int factor) | ||
| 36 | { | ||
| 37 | if (factor == 100) | ||
| 38 | return num; | ||
| 39 | num *= factor; | ||
| 40 | do_div(num, 100); | ||
| 41 | return num; | ||
| 42 | } | ||
| 43 | |||
| 44 | #endif | ||
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 7772f02ba28e..e5ed56729607 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
| @@ -211,6 +211,8 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
| 211 | init_waitqueue_head(&entry->wait); | 211 | init_waitqueue_head(&entry->wait); |
| 212 | INIT_LIST_HEAD(&entry->list); | 212 | INIT_LIST_HEAD(&entry->list); |
| 213 | INIT_LIST_HEAD(&entry->root_extent_list); | 213 | INIT_LIST_HEAD(&entry->root_extent_list); |
| 214 | INIT_LIST_HEAD(&entry->work_list); | ||
| 215 | init_completion(&entry->completion); | ||
| 214 | 216 | ||
| 215 | trace_btrfs_ordered_extent_add(inode, entry); | 217 | trace_btrfs_ordered_extent_add(inode, entry); |
| 216 | 218 | ||
| @@ -464,18 +466,28 @@ void btrfs_remove_ordered_extent(struct inode *inode, | |||
| 464 | wake_up(&entry->wait); | 466 | wake_up(&entry->wait); |
| 465 | } | 467 | } |
| 466 | 468 | ||
| 469 | static void btrfs_run_ordered_extent_work(struct btrfs_work *work) | ||
| 470 | { | ||
| 471 | struct btrfs_ordered_extent *ordered; | ||
| 472 | |||
| 473 | ordered = container_of(work, struct btrfs_ordered_extent, flush_work); | ||
| 474 | btrfs_start_ordered_extent(ordered->inode, ordered, 1); | ||
| 475 | complete(&ordered->completion); | ||
| 476 | } | ||
| 477 | |||
| 467 | /* | 478 | /* |
| 468 | * wait for all the ordered extents in a root. This is done when balancing | 479 | * wait for all the ordered extents in a root. This is done when balancing |
| 469 | * space between drives. | 480 | * space between drives. |
| 470 | */ | 481 | */ |
| 471 | void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) | 482 | void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) |
| 472 | { | 483 | { |
| 473 | struct list_head splice; | 484 | struct list_head splice, works; |
| 474 | struct list_head *cur; | 485 | struct list_head *cur; |
| 475 | struct btrfs_ordered_extent *ordered; | 486 | struct btrfs_ordered_extent *ordered, *next; |
| 476 | struct inode *inode; | 487 | struct inode *inode; |
| 477 | 488 | ||
| 478 | INIT_LIST_HEAD(&splice); | 489 | INIT_LIST_HEAD(&splice); |
| 490 | INIT_LIST_HEAD(&works); | ||
| 479 | 491 | ||
| 480 | spin_lock(&root->fs_info->ordered_extent_lock); | 492 | spin_lock(&root->fs_info->ordered_extent_lock); |
| 481 | list_splice_init(&root->fs_info->ordered_extents, &splice); | 493 | list_splice_init(&root->fs_info->ordered_extents, &splice); |
| @@ -494,19 +506,32 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) | |||
| 494 | spin_unlock(&root->fs_info->ordered_extent_lock); | 506 | spin_unlock(&root->fs_info->ordered_extent_lock); |
| 495 | 507 | ||
| 496 | if (inode) { | 508 | if (inode) { |
| 497 | btrfs_start_ordered_extent(inode, ordered, 1); | 509 | ordered->flush_work.func = btrfs_run_ordered_extent_work; |
| 498 | btrfs_put_ordered_extent(ordered); | 510 | list_add_tail(&ordered->work_list, &works); |
| 499 | if (delay_iput) | 511 | btrfs_queue_worker(&root->fs_info->flush_workers, |
| 500 | btrfs_add_delayed_iput(inode); | 512 | &ordered->flush_work); |
| 501 | else | ||
| 502 | iput(inode); | ||
| 503 | } else { | 513 | } else { |
| 504 | btrfs_put_ordered_extent(ordered); | 514 | btrfs_put_ordered_extent(ordered); |
| 505 | } | 515 | } |
| 506 | 516 | ||
| 517 | cond_resched(); | ||
| 507 | spin_lock(&root->fs_info->ordered_extent_lock); | 518 | spin_lock(&root->fs_info->ordered_extent_lock); |
| 508 | } | 519 | } |
| 509 | spin_unlock(&root->fs_info->ordered_extent_lock); | 520 | spin_unlock(&root->fs_info->ordered_extent_lock); |
| 521 | |||
| 522 | list_for_each_entry_safe(ordered, next, &works, work_list) { | ||
| 523 | list_del_init(&ordered->work_list); | ||
| 524 | wait_for_completion(&ordered->completion); | ||
| 525 | |||
| 526 | inode = ordered->inode; | ||
| 527 | btrfs_put_ordered_extent(ordered); | ||
| 528 | if (delay_iput) | ||
| 529 | btrfs_add_delayed_iput(inode); | ||
| 530 | else | ||
| 531 | iput(inode); | ||
| 532 | |||
| 533 | cond_resched(); | ||
| 534 | } | ||
| 510 | } | 535 | } |
| 511 | 536 | ||
| 512 | /* | 537 | /* |
| @@ -519,13 +544,17 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) | |||
| 519 | * extra check to make sure the ordered operation list really is empty | 544 | * extra check to make sure the ordered operation list really is empty |
| 520 | * before we return | 545 | * before we return |
| 521 | */ | 546 | */ |
| 522 | void btrfs_run_ordered_operations(struct btrfs_root *root, int wait) | 547 | int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) |
| 523 | { | 548 | { |
| 524 | struct btrfs_inode *btrfs_inode; | 549 | struct btrfs_inode *btrfs_inode; |
| 525 | struct inode *inode; | 550 | struct inode *inode; |
| 526 | struct list_head splice; | 551 | struct list_head splice; |
| 552 | struct list_head works; | ||
| 553 | struct btrfs_delalloc_work *work, *next; | ||
| 554 | int ret = 0; | ||
| 527 | 555 | ||
| 528 | INIT_LIST_HEAD(&splice); | 556 | INIT_LIST_HEAD(&splice); |
| 557 | INIT_LIST_HEAD(&works); | ||
| 529 | 558 | ||
| 530 | mutex_lock(&root->fs_info->ordered_operations_mutex); | 559 | mutex_lock(&root->fs_info->ordered_operations_mutex); |
| 531 | spin_lock(&root->fs_info->ordered_extent_lock); | 560 | spin_lock(&root->fs_info->ordered_extent_lock); |
| @@ -533,6 +562,7 @@ again: | |||
| 533 | list_splice_init(&root->fs_info->ordered_operations, &splice); | 562 | list_splice_init(&root->fs_info->ordered_operations, &splice); |
| 534 | 563 | ||
| 535 | while (!list_empty(&splice)) { | 564 | while (!list_empty(&splice)) { |
| 565 | |||
| 536 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | 566 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, |
| 537 | ordered_operations); | 567 | ordered_operations); |
| 538 | 568 | ||
| @@ -549,15 +579,26 @@ again: | |||
| 549 | list_add_tail(&BTRFS_I(inode)->ordered_operations, | 579 | list_add_tail(&BTRFS_I(inode)->ordered_operations, |
| 550 | &root->fs_info->ordered_operations); | 580 | &root->fs_info->ordered_operations); |
| 551 | } | 581 | } |
| 582 | |||
| 583 | if (!inode) | ||
| 584 | continue; | ||
| 552 | spin_unlock(&root->fs_info->ordered_extent_lock); | 585 | spin_unlock(&root->fs_info->ordered_extent_lock); |
| 553 | 586 | ||
| 554 | if (inode) { | 587 | work = btrfs_alloc_delalloc_work(inode, wait, 1); |
| 555 | if (wait) | 588 | if (!work) { |
| 556 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | 589 | if (list_empty(&BTRFS_I(inode)->ordered_operations)) |
| 557 | else | 590 | list_add_tail(&btrfs_inode->ordered_operations, |
| 558 | filemap_flush(inode->i_mapping); | 591 | &splice); |
| 559 | btrfs_add_delayed_iput(inode); | 592 | spin_lock(&root->fs_info->ordered_extent_lock); |
| 593 | list_splice_tail(&splice, | ||
| 594 | &root->fs_info->ordered_operations); | ||
| 595 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
| 596 | ret = -ENOMEM; | ||
| 597 | goto out; | ||
| 560 | } | 598 | } |
| 599 | list_add_tail(&work->list, &works); | ||
| 600 | btrfs_queue_worker(&root->fs_info->flush_workers, | ||
| 601 | &work->work); | ||
| 561 | 602 | ||
| 562 | cond_resched(); | 603 | cond_resched(); |
| 563 | spin_lock(&root->fs_info->ordered_extent_lock); | 604 | spin_lock(&root->fs_info->ordered_extent_lock); |
| @@ -566,7 +607,13 @@ again: | |||
| 566 | goto again; | 607 | goto again; |
| 567 | 608 | ||
| 568 | spin_unlock(&root->fs_info->ordered_extent_lock); | 609 | spin_unlock(&root->fs_info->ordered_extent_lock); |
| 610 | out: | ||
| 611 | list_for_each_entry_safe(work, next, &works, list) { | ||
| 612 | list_del_init(&work->list); | ||
| 613 | btrfs_wait_and_free_delalloc_work(work); | ||
| 614 | } | ||
| 569 | mutex_unlock(&root->fs_info->ordered_operations_mutex); | 615 | mutex_unlock(&root->fs_info->ordered_operations_mutex); |
| 616 | return ret; | ||
| 570 | } | 617 | } |
| 571 | 618 | ||
| 572 | /* | 619 | /* |
| @@ -606,7 +653,6 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | |||
| 606 | u64 end; | 653 | u64 end; |
| 607 | u64 orig_end; | 654 | u64 orig_end; |
| 608 | struct btrfs_ordered_extent *ordered; | 655 | struct btrfs_ordered_extent *ordered; |
| 609 | int found; | ||
| 610 | 656 | ||
| 611 | if (start + len < start) { | 657 | if (start + len < start) { |
| 612 | orig_end = INT_LIMIT(loff_t); | 658 | orig_end = INT_LIMIT(loff_t); |
| @@ -642,7 +688,6 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | |||
| 642 | filemap_fdatawait_range(inode->i_mapping, start, orig_end); | 688 | filemap_fdatawait_range(inode->i_mapping, start, orig_end); |
| 643 | 689 | ||
| 644 | end = orig_end; | 690 | end = orig_end; |
| 645 | found = 0; | ||
| 646 | while (1) { | 691 | while (1) { |
| 647 | ordered = btrfs_lookup_first_ordered_extent(inode, end); | 692 | ordered = btrfs_lookup_first_ordered_extent(inode, end); |
| 648 | if (!ordered) | 693 | if (!ordered) |
| @@ -655,7 +700,6 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | |||
| 655 | btrfs_put_ordered_extent(ordered); | 700 | btrfs_put_ordered_extent(ordered); |
| 656 | break; | 701 | break; |
| 657 | } | 702 | } |
| 658 | found++; | ||
| 659 | btrfs_start_ordered_extent(inode, ordered, 1); | 703 | btrfs_start_ordered_extent(inode, ordered, 1); |
| 660 | end = ordered->file_offset; | 704 | end = ordered->file_offset; |
| 661 | btrfs_put_ordered_extent(ordered); | 705 | btrfs_put_ordered_extent(ordered); |
| @@ -792,9 +836,16 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | |||
| 792 | * if the disk i_size is already at the inode->i_size, or | 836 | * if the disk i_size is already at the inode->i_size, or |
| 793 | * this ordered extent is inside the disk i_size, we're done | 837 | * this ordered extent is inside the disk i_size, we're done |
| 794 | */ | 838 | */ |
| 795 | if (disk_i_size == i_size || offset <= disk_i_size) { | 839 | if (disk_i_size == i_size) |
| 840 | goto out; | ||
| 841 | |||
| 842 | /* | ||
| 843 | * We still need to update disk_i_size if outstanding_isize is greater | ||
| 844 | * than disk_i_size. | ||
| 845 | */ | ||
| 846 | if (offset <= disk_i_size && | ||
| 847 | (!ordered || ordered->outstanding_isize <= disk_i_size)) | ||
| 796 | goto out; | 848 | goto out; |
| 797 | } | ||
| 798 | 849 | ||
| 799 | /* | 850 | /* |
| 800 | * walk backward from this ordered extent to disk_i_size. | 851 | * walk backward from this ordered extent to disk_i_size. |
| @@ -826,7 +877,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | |||
| 826 | break; | 877 | break; |
| 827 | if (test->file_offset >= i_size) | 878 | if (test->file_offset >= i_size) |
| 828 | break; | 879 | break; |
| 829 | if (test->file_offset >= disk_i_size) { | 880 | if (entry_end(test) > disk_i_size) { |
| 830 | /* | 881 | /* |
| 831 | * we don't update disk_i_size now, so record this | 882 | * we don't update disk_i_size now, so record this |
| 832 | * undealt i_size. Or we will not know the real | 883 | * undealt i_size. Or we will not know the real |
| @@ -934,15 +985,6 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | |||
| 934 | if (last_mod < root->fs_info->last_trans_committed) | 985 | if (last_mod < root->fs_info->last_trans_committed) |
| 935 | return; | 986 | return; |
| 936 | 987 | ||
| 937 | /* | ||
| 938 | * the transaction is already committing. Just start the IO and | ||
| 939 | * don't bother with all of this list nonsense | ||
| 940 | */ | ||
| 941 | if (trans && root->fs_info->running_transaction->blocked) { | ||
| 942 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | ||
| 943 | return; | ||
| 944 | } | ||
| 945 | |||
| 946 | spin_lock(&root->fs_info->ordered_extent_lock); | 988 | spin_lock(&root->fs_info->ordered_extent_lock); |
| 947 | if (list_empty(&BTRFS_I(inode)->ordered_operations)) { | 989 | if (list_empty(&BTRFS_I(inode)->ordered_operations)) { |
| 948 | list_add_tail(&BTRFS_I(inode)->ordered_operations, | 990 | list_add_tail(&BTRFS_I(inode)->ordered_operations, |
| @@ -959,6 +1001,7 @@ int __init ordered_data_init(void) | |||
| 959 | NULL); | 1001 | NULL); |
| 960 | if (!btrfs_ordered_extent_cache) | 1002 | if (!btrfs_ordered_extent_cache) |
| 961 | return -ENOMEM; | 1003 | return -ENOMEM; |
| 1004 | |||
| 962 | return 0; | 1005 | return 0; |
| 963 | } | 1006 | } |
| 964 | 1007 | ||
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index dd27a0b46a37..f29d4bf5fbe7 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
| @@ -76,7 +76,7 @@ struct btrfs_ordered_sum { | |||
| 76 | 76 | ||
| 77 | #define BTRFS_ORDERED_IOERR 6 /* We had an io error when writing this out */ | 77 | #define BTRFS_ORDERED_IOERR 6 /* We had an io error when writing this out */ |
| 78 | 78 | ||
| 79 | #define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates wether this ordered extent | 79 | #define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates whether this ordered extent |
| 80 | * has done its due diligence in updating | 80 | * has done its due diligence in updating |
| 81 | * the isize. */ | 81 | * the isize. */ |
| 82 | 82 | ||
| @@ -128,8 +128,11 @@ struct btrfs_ordered_extent { | |||
| 128 | struct list_head root_extent_list; | 128 | struct list_head root_extent_list; |
| 129 | 129 | ||
| 130 | struct btrfs_work work; | 130 | struct btrfs_work work; |
| 131 | }; | ||
| 132 | 131 | ||
| 132 | struct completion completion; | ||
| 133 | struct btrfs_work flush_work; | ||
| 134 | struct list_head work_list; | ||
| 135 | }; | ||
| 133 | 136 | ||
| 134 | /* | 137 | /* |
| 135 | * calculates the total size you need to allocate for an ordered sum | 138 | * calculates the total size you need to allocate for an ordered sum |
| @@ -186,7 +189,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, | |||
| 186 | int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | 189 | int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, |
| 187 | struct btrfs_ordered_extent *ordered); | 190 | struct btrfs_ordered_extent *ordered); |
| 188 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); | 191 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); |
| 189 | void btrfs_run_ordered_operations(struct btrfs_root *root, int wait); | 192 | int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); |
| 190 | void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | 193 | void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, |
| 191 | struct btrfs_root *root, | 194 | struct btrfs_root *root, |
| 192 | struct inode *inode); | 195 | struct inode *inode); |
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 5e23684887eb..50d95fd190a5 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c | |||
| @@ -297,6 +297,9 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) | |||
| 297 | case BTRFS_DEV_STATS_KEY: | 297 | case BTRFS_DEV_STATS_KEY: |
| 298 | printk(KERN_INFO "\t\tdevice stats\n"); | 298 | printk(KERN_INFO "\t\tdevice stats\n"); |
| 299 | break; | 299 | break; |
| 300 | case BTRFS_DEV_REPLACE_KEY: | ||
| 301 | printk(KERN_INFO "\t\tdev replace\n"); | ||
| 302 | break; | ||
| 300 | }; | 303 | }; |
| 301 | } | 304 | } |
| 302 | } | 305 | } |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index fe9d02c45f8e..a5c856234323 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
| @@ -379,6 +379,13 @@ next1: | |||
| 379 | 379 | ||
| 380 | ret = add_relation_rb(fs_info, found_key.objectid, | 380 | ret = add_relation_rb(fs_info, found_key.objectid, |
| 381 | found_key.offset); | 381 | found_key.offset); |
| 382 | if (ret == -ENOENT) { | ||
| 383 | printk(KERN_WARNING | ||
| 384 | "btrfs: orphan qgroup relation 0x%llx->0x%llx\n", | ||
| 385 | (unsigned long long)found_key.objectid, | ||
| 386 | (unsigned long long)found_key.offset); | ||
| 387 | ret = 0; /* ignore the error */ | ||
| 388 | } | ||
| 382 | if (ret) | 389 | if (ret) |
| 383 | goto out; | 390 | goto out; |
| 384 | next2: | 391 | next2: |
| @@ -956,17 +963,28 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, | |||
| 956 | struct btrfs_fs_info *fs_info, u64 qgroupid) | 963 | struct btrfs_fs_info *fs_info, u64 qgroupid) |
| 957 | { | 964 | { |
| 958 | struct btrfs_root *quota_root; | 965 | struct btrfs_root *quota_root; |
| 966 | struct btrfs_qgroup *qgroup; | ||
| 959 | int ret = 0; | 967 | int ret = 0; |
| 960 | 968 | ||
| 961 | quota_root = fs_info->quota_root; | 969 | quota_root = fs_info->quota_root; |
| 962 | if (!quota_root) | 970 | if (!quota_root) |
| 963 | return -EINVAL; | 971 | return -EINVAL; |
| 964 | 972 | ||
| 973 | /* check if there are no relations to this qgroup */ | ||
| 974 | spin_lock(&fs_info->qgroup_lock); | ||
| 975 | qgroup = find_qgroup_rb(fs_info, qgroupid); | ||
| 976 | if (qgroup) { | ||
| 977 | if (!list_empty(&qgroup->groups) || !list_empty(&qgroup->members)) { | ||
| 978 | spin_unlock(&fs_info->qgroup_lock); | ||
| 979 | return -EBUSY; | ||
| 980 | } | ||
| 981 | } | ||
| 982 | spin_unlock(&fs_info->qgroup_lock); | ||
| 983 | |||
| 965 | ret = del_qgroup_item(trans, quota_root, qgroupid); | 984 | ret = del_qgroup_item(trans, quota_root, qgroupid); |
| 966 | 985 | ||
| 967 | spin_lock(&fs_info->qgroup_lock); | 986 | spin_lock(&fs_info->qgroup_lock); |
| 968 | del_qgroup_rb(quota_root->fs_info, qgroupid); | 987 | del_qgroup_rb(quota_root->fs_info, qgroupid); |
| 969 | |||
| 970 | spin_unlock(&fs_info->qgroup_lock); | 988 | spin_unlock(&fs_info->qgroup_lock); |
| 971 | 989 | ||
| 972 | return ret; | 990 | return ret; |
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index a955669519a2..96b93daa0bbb 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include "volumes.h" | 27 | #include "volumes.h" |
| 28 | #include "disk-io.h" | 28 | #include "disk-io.h" |
| 29 | #include "transaction.h" | 29 | #include "transaction.h" |
| 30 | #include "dev-replace.h" | ||
| 30 | 31 | ||
| 31 | #undef DEBUG | 32 | #undef DEBUG |
| 32 | 33 | ||
| @@ -323,7 +324,6 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
| 323 | struct reada_extent *re = NULL; | 324 | struct reada_extent *re = NULL; |
| 324 | struct reada_extent *re_exist = NULL; | 325 | struct reada_extent *re_exist = NULL; |
| 325 | struct btrfs_fs_info *fs_info = root->fs_info; | 326 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 326 | struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; | ||
| 327 | struct btrfs_bio *bbio = NULL; | 327 | struct btrfs_bio *bbio = NULL; |
| 328 | struct btrfs_device *dev; | 328 | struct btrfs_device *dev; |
| 329 | struct btrfs_device *prev_dev; | 329 | struct btrfs_device *prev_dev; |
| @@ -332,6 +332,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
| 332 | int nzones = 0; | 332 | int nzones = 0; |
| 333 | int i; | 333 | int i; |
| 334 | unsigned long index = logical >> PAGE_CACHE_SHIFT; | 334 | unsigned long index = logical >> PAGE_CACHE_SHIFT; |
| 335 | int dev_replace_is_ongoing; | ||
| 335 | 336 | ||
| 336 | spin_lock(&fs_info->reada_lock); | 337 | spin_lock(&fs_info->reada_lock); |
| 337 | re = radix_tree_lookup(&fs_info->reada_tree, index); | 338 | re = radix_tree_lookup(&fs_info->reada_tree, index); |
| @@ -358,7 +359,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
| 358 | * map block | 359 | * map block |
| 359 | */ | 360 | */ |
| 360 | length = blocksize; | 361 | length = blocksize; |
| 361 | ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length, &bbio, 0); | 362 | ret = btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, logical, &length, |
| 363 | &bbio, 0); | ||
| 362 | if (ret || !bbio || length < blocksize) | 364 | if (ret || !bbio || length < blocksize) |
| 363 | goto error; | 365 | goto error; |
| 364 | 366 | ||
| @@ -393,6 +395,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
| 393 | } | 395 | } |
| 394 | 396 | ||
| 395 | /* insert extent in reada_tree + all per-device trees, all or nothing */ | 397 | /* insert extent in reada_tree + all per-device trees, all or nothing */ |
| 398 | btrfs_dev_replace_lock(&fs_info->dev_replace); | ||
| 396 | spin_lock(&fs_info->reada_lock); | 399 | spin_lock(&fs_info->reada_lock); |
| 397 | ret = radix_tree_insert(&fs_info->reada_tree, index, re); | 400 | ret = radix_tree_insert(&fs_info->reada_tree, index, re); |
| 398 | if (ret == -EEXIST) { | 401 | if (ret == -EEXIST) { |
| @@ -400,13 +403,17 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
| 400 | BUG_ON(!re_exist); | 403 | BUG_ON(!re_exist); |
| 401 | re_exist->refcnt++; | 404 | re_exist->refcnt++; |
| 402 | spin_unlock(&fs_info->reada_lock); | 405 | spin_unlock(&fs_info->reada_lock); |
| 406 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | ||
| 403 | goto error; | 407 | goto error; |
| 404 | } | 408 | } |
| 405 | if (ret) { | 409 | if (ret) { |
| 406 | spin_unlock(&fs_info->reada_lock); | 410 | spin_unlock(&fs_info->reada_lock); |
| 411 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | ||
| 407 | goto error; | 412 | goto error; |
| 408 | } | 413 | } |
| 409 | prev_dev = NULL; | 414 | prev_dev = NULL; |
| 415 | dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing( | ||
| 416 | &fs_info->dev_replace); | ||
| 410 | for (i = 0; i < nzones; ++i) { | 417 | for (i = 0; i < nzones; ++i) { |
| 411 | dev = bbio->stripes[i].dev; | 418 | dev = bbio->stripes[i].dev; |
| 412 | if (dev == prev_dev) { | 419 | if (dev == prev_dev) { |
| @@ -419,21 +426,36 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, | |||
| 419 | */ | 426 | */ |
| 420 | continue; | 427 | continue; |
| 421 | } | 428 | } |
| 429 | if (!dev->bdev) { | ||
| 430 | /* cannot read ahead on missing device */ | ||
| 431 | continue; | ||
| 432 | } | ||
| 433 | if (dev_replace_is_ongoing && | ||
| 434 | dev == fs_info->dev_replace.tgtdev) { | ||
| 435 | /* | ||
| 436 | * as this device is selected for reading only as | ||
| 437 | * a last resort, skip it for read ahead. | ||
| 438 | */ | ||
| 439 | continue; | ||
| 440 | } | ||
| 422 | prev_dev = dev; | 441 | prev_dev = dev; |
| 423 | ret = radix_tree_insert(&dev->reada_extents, index, re); | 442 | ret = radix_tree_insert(&dev->reada_extents, index, re); |
| 424 | if (ret) { | 443 | if (ret) { |
| 425 | while (--i >= 0) { | 444 | while (--i >= 0) { |
| 426 | dev = bbio->stripes[i].dev; | 445 | dev = bbio->stripes[i].dev; |
| 427 | BUG_ON(dev == NULL); | 446 | BUG_ON(dev == NULL); |
| 447 | /* ignore whether the entry was inserted */ | ||
| 428 | radix_tree_delete(&dev->reada_extents, index); | 448 | radix_tree_delete(&dev->reada_extents, index); |
| 429 | } | 449 | } |
| 430 | BUG_ON(fs_info == NULL); | 450 | BUG_ON(fs_info == NULL); |
| 431 | radix_tree_delete(&fs_info->reada_tree, index); | 451 | radix_tree_delete(&fs_info->reada_tree, index); |
| 432 | spin_unlock(&fs_info->reada_lock); | 452 | spin_unlock(&fs_info->reada_lock); |
| 453 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | ||
| 433 | goto error; | 454 | goto error; |
| 434 | } | 455 | } |
| 435 | } | 456 | } |
| 436 | spin_unlock(&fs_info->reada_lock); | 457 | spin_unlock(&fs_info->reada_lock); |
| 458 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | ||
| 437 | 459 | ||
| 438 | kfree(bbio); | 460 | kfree(bbio); |
| 439 | return re; | 461 | return re; |
| @@ -915,7 +937,10 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root, | |||
| 915 | generation = btrfs_header_generation(node); | 937 | generation = btrfs_header_generation(node); |
| 916 | free_extent_buffer(node); | 938 | free_extent_buffer(node); |
| 917 | 939 | ||
| 918 | reada_add_block(rc, start, &max_key, level, generation); | 940 | if (reada_add_block(rc, start, &max_key, level, generation)) { |
| 941 | kfree(rc); | ||
| 942 | return ERR_PTR(-ENOMEM); | ||
| 943 | } | ||
| 919 | 944 | ||
| 920 | reada_start_machine(root->fs_info); | 945 | reada_start_machine(root->fs_info); |
| 921 | 946 | ||
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 776f0aa128fc..300e09ac3659 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
| @@ -2025,7 +2025,6 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
| 2025 | struct btrfs_root_item *root_item; | 2025 | struct btrfs_root_item *root_item; |
| 2026 | struct btrfs_path *path; | 2026 | struct btrfs_path *path; |
| 2027 | struct extent_buffer *leaf; | 2027 | struct extent_buffer *leaf; |
| 2028 | unsigned long nr; | ||
| 2029 | int level; | 2028 | int level; |
| 2030 | int max_level; | 2029 | int max_level; |
| 2031 | int replaced = 0; | 2030 | int replaced = 0; |
| @@ -2074,7 +2073,8 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
| 2074 | BUG_ON(IS_ERR(trans)); | 2073 | BUG_ON(IS_ERR(trans)); |
| 2075 | trans->block_rsv = rc->block_rsv; | 2074 | trans->block_rsv = rc->block_rsv; |
| 2076 | 2075 | ||
| 2077 | ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved); | 2076 | ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved, |
| 2077 | BTRFS_RESERVE_FLUSH_ALL); | ||
| 2078 | if (ret) { | 2078 | if (ret) { |
| 2079 | BUG_ON(ret != -EAGAIN); | 2079 | BUG_ON(ret != -EAGAIN); |
| 2080 | ret = btrfs_commit_transaction(trans, root); | 2080 | ret = btrfs_commit_transaction(trans, root); |
| @@ -2125,10 +2125,9 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
| 2125 | path->slots[level]); | 2125 | path->slots[level]); |
| 2126 | root_item->drop_level = level; | 2126 | root_item->drop_level = level; |
| 2127 | 2127 | ||
| 2128 | nr = trans->blocks_used; | ||
| 2129 | btrfs_end_transaction_throttle(trans, root); | 2128 | btrfs_end_transaction_throttle(trans, root); |
| 2130 | 2129 | ||
| 2131 | btrfs_btree_balance_dirty(root, nr); | 2130 | btrfs_btree_balance_dirty(root); |
| 2132 | 2131 | ||
| 2133 | if (replaced && rc->stage == UPDATE_DATA_PTRS) | 2132 | if (replaced && rc->stage == UPDATE_DATA_PTRS) |
| 2134 | invalidate_extent_cache(root, &key, &next_key); | 2133 | invalidate_extent_cache(root, &key, &next_key); |
| @@ -2155,10 +2154,9 @@ out: | |||
| 2155 | btrfs_update_reloc_root(trans, root); | 2154 | btrfs_update_reloc_root(trans, root); |
| 2156 | } | 2155 | } |
| 2157 | 2156 | ||
| 2158 | nr = trans->blocks_used; | ||
| 2159 | btrfs_end_transaction_throttle(trans, root); | 2157 | btrfs_end_transaction_throttle(trans, root); |
| 2160 | 2158 | ||
| 2161 | btrfs_btree_balance_dirty(root, nr); | 2159 | btrfs_btree_balance_dirty(root); |
| 2162 | 2160 | ||
| 2163 | if (replaced && rc->stage == UPDATE_DATA_PTRS) | 2161 | if (replaced && rc->stage == UPDATE_DATA_PTRS) |
| 2164 | invalidate_extent_cache(root, &key, &next_key); | 2162 | invalidate_extent_cache(root, &key, &next_key); |
| @@ -2184,7 +2182,8 @@ int prepare_to_merge(struct reloc_control *rc, int err) | |||
| 2184 | again: | 2182 | again: |
| 2185 | if (!err) { | 2183 | if (!err) { |
| 2186 | num_bytes = rc->merging_rsv_size; | 2184 | num_bytes = rc->merging_rsv_size; |
| 2187 | ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes); | 2185 | ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes, |
| 2186 | BTRFS_RESERVE_FLUSH_ALL); | ||
| 2188 | if (ret) | 2187 | if (ret) |
| 2189 | err = ret; | 2188 | err = ret; |
| 2190 | } | 2189 | } |
| @@ -2459,7 +2458,8 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans, | |||
| 2459 | num_bytes = calcu_metadata_size(rc, node, 1) * 2; | 2458 | num_bytes = calcu_metadata_size(rc, node, 1) * 2; |
| 2460 | 2459 | ||
| 2461 | trans->block_rsv = rc->block_rsv; | 2460 | trans->block_rsv = rc->block_rsv; |
| 2462 | ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes); | 2461 | ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes, |
| 2462 | BTRFS_RESERVE_FLUSH_ALL); | ||
| 2463 | if (ret) { | 2463 | if (ret) { |
| 2464 | if (ret == -EAGAIN) | 2464 | if (ret == -EAGAIN) |
| 2465 | rc->commit_transaction = 1; | 2465 | rc->commit_transaction = 1; |
| @@ -3259,7 +3259,6 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info, | |||
| 3259 | struct btrfs_path *path; | 3259 | struct btrfs_path *path; |
| 3260 | struct btrfs_root *root = fs_info->tree_root; | 3260 | struct btrfs_root *root = fs_info->tree_root; |
| 3261 | struct btrfs_trans_handle *trans; | 3261 | struct btrfs_trans_handle *trans; |
| 3262 | unsigned long nr; | ||
| 3263 | int ret = 0; | 3262 | int ret = 0; |
| 3264 | 3263 | ||
| 3265 | if (inode) | 3264 | if (inode) |
| @@ -3293,9 +3292,8 @@ truncate: | |||
| 3293 | ret = btrfs_truncate_free_space_cache(root, trans, path, inode); | 3292 | ret = btrfs_truncate_free_space_cache(root, trans, path, inode); |
| 3294 | 3293 | ||
| 3295 | btrfs_free_path(path); | 3294 | btrfs_free_path(path); |
| 3296 | nr = trans->blocks_used; | ||
| 3297 | btrfs_end_transaction(trans, root); | 3295 | btrfs_end_transaction(trans, root); |
| 3298 | btrfs_btree_balance_dirty(root, nr); | 3296 | btrfs_btree_balance_dirty(root); |
| 3299 | out: | 3297 | out: |
| 3300 | iput(inode); | 3298 | iput(inode); |
| 3301 | return ret; | 3299 | return ret; |
| @@ -3685,7 +3683,8 @@ int prepare_to_relocate(struct reloc_control *rc) | |||
| 3685 | * is no reservation in transaction handle. | 3683 | * is no reservation in transaction handle. |
| 3686 | */ | 3684 | */ |
| 3687 | ret = btrfs_block_rsv_add(rc->extent_root, rc->block_rsv, | 3685 | ret = btrfs_block_rsv_add(rc->extent_root, rc->block_rsv, |
| 3688 | rc->extent_root->nodesize * 256); | 3686 | rc->extent_root->nodesize * 256, |
| 3687 | BTRFS_RESERVE_FLUSH_ALL); | ||
| 3689 | if (ret) | 3688 | if (ret) |
| 3690 | return ret; | 3689 | return ret; |
| 3691 | 3690 | ||
| @@ -3711,7 +3710,6 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
| 3711 | struct btrfs_trans_handle *trans = NULL; | 3710 | struct btrfs_trans_handle *trans = NULL; |
| 3712 | struct btrfs_path *path; | 3711 | struct btrfs_path *path; |
| 3713 | struct btrfs_extent_item *ei; | 3712 | struct btrfs_extent_item *ei; |
| 3714 | unsigned long nr; | ||
| 3715 | u64 flags; | 3713 | u64 flags; |
| 3716 | u32 item_size; | 3714 | u32 item_size; |
| 3717 | int ret; | 3715 | int ret; |
| @@ -3828,9 +3826,8 @@ restart: | |||
| 3828 | ret = btrfs_commit_transaction(trans, rc->extent_root); | 3826 | ret = btrfs_commit_transaction(trans, rc->extent_root); |
| 3829 | BUG_ON(ret); | 3827 | BUG_ON(ret); |
| 3830 | } else { | 3828 | } else { |
| 3831 | nr = trans->blocks_used; | ||
| 3832 | btrfs_end_transaction_throttle(trans, rc->extent_root); | 3829 | btrfs_end_transaction_throttle(trans, rc->extent_root); |
| 3833 | btrfs_btree_balance_dirty(rc->extent_root, nr); | 3830 | btrfs_btree_balance_dirty(rc->extent_root); |
| 3834 | } | 3831 | } |
| 3835 | trans = NULL; | 3832 | trans = NULL; |
| 3836 | 3833 | ||
| @@ -3860,9 +3857,8 @@ restart: | |||
| 3860 | GFP_NOFS); | 3857 | GFP_NOFS); |
| 3861 | 3858 | ||
| 3862 | if (trans) { | 3859 | if (trans) { |
| 3863 | nr = trans->blocks_used; | ||
| 3864 | btrfs_end_transaction_throttle(trans, rc->extent_root); | 3860 | btrfs_end_transaction_throttle(trans, rc->extent_root); |
| 3865 | btrfs_btree_balance_dirty(rc->extent_root, nr); | 3861 | btrfs_btree_balance_dirty(rc->extent_root); |
| 3866 | } | 3862 | } |
| 3867 | 3863 | ||
| 3868 | if (!err) { | 3864 | if (!err) { |
| @@ -3941,7 +3937,6 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | |||
| 3941 | struct btrfs_trans_handle *trans; | 3937 | struct btrfs_trans_handle *trans; |
| 3942 | struct btrfs_root *root; | 3938 | struct btrfs_root *root; |
| 3943 | struct btrfs_key key; | 3939 | struct btrfs_key key; |
| 3944 | unsigned long nr; | ||
| 3945 | u64 objectid = BTRFS_FIRST_FREE_OBJECTID; | 3940 | u64 objectid = BTRFS_FIRST_FREE_OBJECTID; |
| 3946 | int err = 0; | 3941 | int err = 0; |
| 3947 | 3942 | ||
| @@ -3969,9 +3964,8 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | |||
| 3969 | 3964 | ||
| 3970 | err = btrfs_orphan_add(trans, inode); | 3965 | err = btrfs_orphan_add(trans, inode); |
| 3971 | out: | 3966 | out: |
| 3972 | nr = trans->blocks_used; | ||
| 3973 | btrfs_end_transaction(trans, root); | 3967 | btrfs_end_transaction(trans, root); |
| 3974 | btrfs_btree_balance_dirty(root, nr); | 3968 | btrfs_btree_balance_dirty(root); |
| 3975 | if (err) { | 3969 | if (err) { |
| 3976 | if (inode) | 3970 | if (inode) |
| 3977 | iput(inode); | 3971 | iput(inode); |
| @@ -4057,7 +4051,11 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
| 4057 | (unsigned long long)rc->block_group->key.objectid, | 4051 | (unsigned long long)rc->block_group->key.objectid, |
| 4058 | (unsigned long long)rc->block_group->flags); | 4052 | (unsigned long long)rc->block_group->flags); |
| 4059 | 4053 | ||
| 4060 | btrfs_start_delalloc_inodes(fs_info->tree_root, 0); | 4054 | ret = btrfs_start_delalloc_inodes(fs_info->tree_root, 0); |
| 4055 | if (ret < 0) { | ||
| 4056 | err = ret; | ||
| 4057 | goto out; | ||
| 4058 | } | ||
| 4061 | btrfs_wait_ordered_extents(fs_info->tree_root, 0); | 4059 | btrfs_wait_ordered_extents(fs_info->tree_root, 0); |
| 4062 | 4060 | ||
| 4063 | while (1) { | 4061 | while (1) { |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index eb923d087da7..668af537a3ea 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
| @@ -548,9 +548,9 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans, | |||
| 548 | struct btrfs_root_item *item = &root->root_item; | 548 | struct btrfs_root_item *item = &root->root_item; |
| 549 | struct timespec ct = CURRENT_TIME; | 549 | struct timespec ct = CURRENT_TIME; |
| 550 | 550 | ||
| 551 | spin_lock(&root->root_times_lock); | 551 | spin_lock(&root->root_item_lock); |
| 552 | item->ctransid = cpu_to_le64(trans->transid); | 552 | item->ctransid = cpu_to_le64(trans->transid); |
| 553 | item->ctime.sec = cpu_to_le64(ct.tv_sec); | 553 | item->ctime.sec = cpu_to_le64(ct.tv_sec); |
| 554 | item->ctime.nsec = cpu_to_le32(ct.tv_nsec); | 554 | item->ctime.nsec = cpu_to_le32(ct.tv_nsec); |
| 555 | spin_unlock(&root->root_times_lock); | 555 | spin_unlock(&root->root_item_lock); |
| 556 | } | 556 | } |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 27892f67e69b..67783e03d121 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2011 STRATO. All rights reserved. | 2 | * Copyright (C) 2011, 2012 STRATO. All rights reserved. |
| 3 | * | 3 | * |
| 4 | * This program is free software; you can redistribute it and/or | 4 | * This program is free software; you can redistribute it and/or |
| 5 | * modify it under the terms of the GNU General Public | 5 | * modify it under the terms of the GNU General Public |
| @@ -25,6 +25,7 @@ | |||
| 25 | #include "transaction.h" | 25 | #include "transaction.h" |
| 26 | #include "backref.h" | 26 | #include "backref.h" |
| 27 | #include "extent_io.h" | 27 | #include "extent_io.h" |
| 28 | #include "dev-replace.h" | ||
| 28 | #include "check-integrity.h" | 29 | #include "check-integrity.h" |
| 29 | #include "rcu-string.h" | 30 | #include "rcu-string.h" |
| 30 | 31 | ||
| @@ -42,10 +43,23 @@ | |||
| 42 | */ | 43 | */ |
| 43 | 44 | ||
| 44 | struct scrub_block; | 45 | struct scrub_block; |
| 45 | struct scrub_dev; | 46 | struct scrub_ctx; |
| 46 | 47 | ||
| 47 | #define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */ | 48 | /* |
| 48 | #define SCRUB_BIOS_PER_DEV 16 /* 1 MB per device in flight */ | 49 | * the following three values only influence the performance. |
| 50 | * The last one configures the number of parallel and outstanding I/O | ||
| 51 | * operations. The first two values configure an upper limit for the number | ||
| 52 | * of (dynamically allocated) pages that are added to a bio. | ||
| 53 | */ | ||
| 54 | #define SCRUB_PAGES_PER_RD_BIO 32 /* 128k per bio */ | ||
| 55 | #define SCRUB_PAGES_PER_WR_BIO 32 /* 128k per bio */ | ||
| 56 | #define SCRUB_BIOS_PER_SCTX 64 /* 8MB per device in flight */ | ||
| 57 | |||
| 58 | /* | ||
| 59 | * the following value times PAGE_SIZE needs to be large enough to match the | ||
| 60 | * largest node/leaf/sector size that shall be supported. | ||
| 61 | * Values larger than BTRFS_STRIPE_LEN are not supported. | ||
| 62 | */ | ||
| 49 | #define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */ | 63 | #define SCRUB_MAX_PAGES_PER_BLOCK 16 /* 64k per node/leaf/sector */ |
| 50 | 64 | ||
| 51 | struct scrub_page { | 65 | struct scrub_page { |
| @@ -56,6 +70,8 @@ struct scrub_page { | |||
| 56 | u64 generation; | 70 | u64 generation; |
| 57 | u64 logical; | 71 | u64 logical; |
| 58 | u64 physical; | 72 | u64 physical; |
| 73 | u64 physical_for_dev_replace; | ||
| 74 | atomic_t ref_count; | ||
| 59 | struct { | 75 | struct { |
| 60 | unsigned int mirror_num:8; | 76 | unsigned int mirror_num:8; |
| 61 | unsigned int have_csum:1; | 77 | unsigned int have_csum:1; |
| @@ -66,23 +82,28 @@ struct scrub_page { | |||
| 66 | 82 | ||
| 67 | struct scrub_bio { | 83 | struct scrub_bio { |
| 68 | int index; | 84 | int index; |
| 69 | struct scrub_dev *sdev; | 85 | struct scrub_ctx *sctx; |
| 86 | struct btrfs_device *dev; | ||
| 70 | struct bio *bio; | 87 | struct bio *bio; |
| 71 | int err; | 88 | int err; |
| 72 | u64 logical; | 89 | u64 logical; |
| 73 | u64 physical; | 90 | u64 physical; |
| 74 | struct scrub_page *pagev[SCRUB_PAGES_PER_BIO]; | 91 | #if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO |
| 92 | struct scrub_page *pagev[SCRUB_PAGES_PER_WR_BIO]; | ||
| 93 | #else | ||
| 94 | struct scrub_page *pagev[SCRUB_PAGES_PER_RD_BIO]; | ||
| 95 | #endif | ||
| 75 | int page_count; | 96 | int page_count; |
| 76 | int next_free; | 97 | int next_free; |
| 77 | struct btrfs_work work; | 98 | struct btrfs_work work; |
| 78 | }; | 99 | }; |
| 79 | 100 | ||
| 80 | struct scrub_block { | 101 | struct scrub_block { |
| 81 | struct scrub_page pagev[SCRUB_MAX_PAGES_PER_BLOCK]; | 102 | struct scrub_page *pagev[SCRUB_MAX_PAGES_PER_BLOCK]; |
| 82 | int page_count; | 103 | int page_count; |
| 83 | atomic_t outstanding_pages; | 104 | atomic_t outstanding_pages; |
| 84 | atomic_t ref_count; /* free mem on transition to zero */ | 105 | atomic_t ref_count; /* free mem on transition to zero */ |
| 85 | struct scrub_dev *sdev; | 106 | struct scrub_ctx *sctx; |
| 86 | struct { | 107 | struct { |
| 87 | unsigned int header_error:1; | 108 | unsigned int header_error:1; |
| 88 | unsigned int checksum_error:1; | 109 | unsigned int checksum_error:1; |
| @@ -91,23 +112,35 @@ struct scrub_block { | |||
| 91 | }; | 112 | }; |
| 92 | }; | 113 | }; |
| 93 | 114 | ||
| 94 | struct scrub_dev { | 115 | struct scrub_wr_ctx { |
| 95 | struct scrub_bio *bios[SCRUB_BIOS_PER_DEV]; | 116 | struct scrub_bio *wr_curr_bio; |
| 96 | struct btrfs_device *dev; | 117 | struct btrfs_device *tgtdev; |
| 118 | int pages_per_wr_bio; /* <= SCRUB_PAGES_PER_WR_BIO */ | ||
| 119 | atomic_t flush_all_writes; | ||
| 120 | struct mutex wr_lock; | ||
| 121 | }; | ||
| 122 | |||
| 123 | struct scrub_ctx { | ||
| 124 | struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX]; | ||
| 125 | struct btrfs_root *dev_root; | ||
| 97 | int first_free; | 126 | int first_free; |
| 98 | int curr; | 127 | int curr; |
| 99 | atomic_t in_flight; | 128 | atomic_t bios_in_flight; |
| 100 | atomic_t fixup_cnt; | 129 | atomic_t workers_pending; |
| 101 | spinlock_t list_lock; | 130 | spinlock_t list_lock; |
| 102 | wait_queue_head_t list_wait; | 131 | wait_queue_head_t list_wait; |
| 103 | u16 csum_size; | 132 | u16 csum_size; |
| 104 | struct list_head csum_list; | 133 | struct list_head csum_list; |
| 105 | atomic_t cancel_req; | 134 | atomic_t cancel_req; |
| 106 | int readonly; | 135 | int readonly; |
| 107 | int pages_per_bio; /* <= SCRUB_PAGES_PER_BIO */ | 136 | int pages_per_rd_bio; |
| 108 | u32 sectorsize; | 137 | u32 sectorsize; |
| 109 | u32 nodesize; | 138 | u32 nodesize; |
| 110 | u32 leafsize; | 139 | u32 leafsize; |
| 140 | |||
| 141 | int is_dev_replace; | ||
| 142 | struct scrub_wr_ctx wr_ctx; | ||
| 143 | |||
| 111 | /* | 144 | /* |
| 112 | * statistics | 145 | * statistics |
| 113 | */ | 146 | */ |
| @@ -116,13 +149,23 @@ struct scrub_dev { | |||
| 116 | }; | 149 | }; |
| 117 | 150 | ||
| 118 | struct scrub_fixup_nodatasum { | 151 | struct scrub_fixup_nodatasum { |
| 119 | struct scrub_dev *sdev; | 152 | struct scrub_ctx *sctx; |
| 153 | struct btrfs_device *dev; | ||
| 120 | u64 logical; | 154 | u64 logical; |
| 121 | struct btrfs_root *root; | 155 | struct btrfs_root *root; |
| 122 | struct btrfs_work work; | 156 | struct btrfs_work work; |
| 123 | int mirror_num; | 157 | int mirror_num; |
| 124 | }; | 158 | }; |
| 125 | 159 | ||
| 160 | struct scrub_copy_nocow_ctx { | ||
| 161 | struct scrub_ctx *sctx; | ||
| 162 | u64 logical; | ||
| 163 | u64 len; | ||
| 164 | int mirror_num; | ||
| 165 | u64 physical_for_dev_replace; | ||
| 166 | struct btrfs_work work; | ||
| 167 | }; | ||
| 168 | |||
| 126 | struct scrub_warning { | 169 | struct scrub_warning { |
| 127 | struct btrfs_path *path; | 170 | struct btrfs_path *path; |
| 128 | u64 extent_item_size; | 171 | u64 extent_item_size; |
| @@ -137,15 +180,20 @@ struct scrub_warning { | |||
| 137 | }; | 180 | }; |
| 138 | 181 | ||
| 139 | 182 | ||
| 183 | static void scrub_pending_bio_inc(struct scrub_ctx *sctx); | ||
| 184 | static void scrub_pending_bio_dec(struct scrub_ctx *sctx); | ||
| 185 | static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx); | ||
| 186 | static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx); | ||
| 140 | static int scrub_handle_errored_block(struct scrub_block *sblock_to_check); | 187 | static int scrub_handle_errored_block(struct scrub_block *sblock_to_check); |
| 141 | static int scrub_setup_recheck_block(struct scrub_dev *sdev, | 188 | static int scrub_setup_recheck_block(struct scrub_ctx *sctx, |
| 142 | struct btrfs_mapping_tree *map_tree, | 189 | struct btrfs_fs_info *fs_info, |
| 190 | struct scrub_block *original_sblock, | ||
| 143 | u64 length, u64 logical, | 191 | u64 length, u64 logical, |
| 144 | struct scrub_block *sblock); | 192 | struct scrub_block *sblocks_for_recheck); |
| 145 | static int scrub_recheck_block(struct btrfs_fs_info *fs_info, | 193 | static void scrub_recheck_block(struct btrfs_fs_info *fs_info, |
| 146 | struct scrub_block *sblock, int is_metadata, | 194 | struct scrub_block *sblock, int is_metadata, |
| 147 | int have_csum, u8 *csum, u64 generation, | 195 | int have_csum, u8 *csum, u64 generation, |
| 148 | u16 csum_size); | 196 | u16 csum_size); |
| 149 | static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, | 197 | static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, |
| 150 | struct scrub_block *sblock, | 198 | struct scrub_block *sblock, |
| 151 | int is_metadata, int have_csum, | 199 | int is_metadata, int have_csum, |
| @@ -158,118 +206,221 @@ static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, | |||
| 158 | static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, | 206 | static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, |
| 159 | struct scrub_block *sblock_good, | 207 | struct scrub_block *sblock_good, |
| 160 | int page_num, int force_write); | 208 | int page_num, int force_write); |
| 209 | static void scrub_write_block_to_dev_replace(struct scrub_block *sblock); | ||
| 210 | static int scrub_write_page_to_dev_replace(struct scrub_block *sblock, | ||
| 211 | int page_num); | ||
| 161 | static int scrub_checksum_data(struct scrub_block *sblock); | 212 | static int scrub_checksum_data(struct scrub_block *sblock); |
| 162 | static int scrub_checksum_tree_block(struct scrub_block *sblock); | 213 | static int scrub_checksum_tree_block(struct scrub_block *sblock); |
| 163 | static int scrub_checksum_super(struct scrub_block *sblock); | 214 | static int scrub_checksum_super(struct scrub_block *sblock); |
| 164 | static void scrub_block_get(struct scrub_block *sblock); | 215 | static void scrub_block_get(struct scrub_block *sblock); |
| 165 | static void scrub_block_put(struct scrub_block *sblock); | 216 | static void scrub_block_put(struct scrub_block *sblock); |
| 166 | static int scrub_add_page_to_bio(struct scrub_dev *sdev, | 217 | static void scrub_page_get(struct scrub_page *spage); |
| 167 | struct scrub_page *spage); | 218 | static void scrub_page_put(struct scrub_page *spage); |
| 168 | static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len, | 219 | static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx, |
| 169 | u64 physical, u64 flags, u64 gen, int mirror_num, | 220 | struct scrub_page *spage); |
| 170 | u8 *csum, int force); | 221 | static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, |
| 222 | u64 physical, struct btrfs_device *dev, u64 flags, | ||
| 223 | u64 gen, int mirror_num, u8 *csum, int force, | ||
| 224 | u64 physical_for_dev_replace); | ||
| 171 | static void scrub_bio_end_io(struct bio *bio, int err); | 225 | static void scrub_bio_end_io(struct bio *bio, int err); |
| 172 | static void scrub_bio_end_io_worker(struct btrfs_work *work); | 226 | static void scrub_bio_end_io_worker(struct btrfs_work *work); |
| 173 | static void scrub_block_complete(struct scrub_block *sblock); | 227 | static void scrub_block_complete(struct scrub_block *sblock); |
| 228 | static void scrub_remap_extent(struct btrfs_fs_info *fs_info, | ||
| 229 | u64 extent_logical, u64 extent_len, | ||
| 230 | u64 *extent_physical, | ||
| 231 | struct btrfs_device **extent_dev, | ||
| 232 | int *extent_mirror_num); | ||
| 233 | static int scrub_setup_wr_ctx(struct scrub_ctx *sctx, | ||
| 234 | struct scrub_wr_ctx *wr_ctx, | ||
| 235 | struct btrfs_fs_info *fs_info, | ||
| 236 | struct btrfs_device *dev, | ||
| 237 | int is_dev_replace); | ||
| 238 | static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx); | ||
| 239 | static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx, | ||
| 240 | struct scrub_page *spage); | ||
| 241 | static void scrub_wr_submit(struct scrub_ctx *sctx); | ||
| 242 | static void scrub_wr_bio_end_io(struct bio *bio, int err); | ||
| 243 | static void scrub_wr_bio_end_io_worker(struct btrfs_work *work); | ||
| 244 | static int write_page_nocow(struct scrub_ctx *sctx, | ||
| 245 | u64 physical_for_dev_replace, struct page *page); | ||
| 246 | static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, | ||
| 247 | void *ctx); | ||
| 248 | static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, | ||
| 249 | int mirror_num, u64 physical_for_dev_replace); | ||
| 250 | static void copy_nocow_pages_worker(struct btrfs_work *work); | ||
| 251 | |||
| 252 | |||
| 253 | static void scrub_pending_bio_inc(struct scrub_ctx *sctx) | ||
| 254 | { | ||
| 255 | atomic_inc(&sctx->bios_in_flight); | ||
| 256 | } | ||
| 174 | 257 | ||
| 258 | static void scrub_pending_bio_dec(struct scrub_ctx *sctx) | ||
| 259 | { | ||
| 260 | atomic_dec(&sctx->bios_in_flight); | ||
| 261 | wake_up(&sctx->list_wait); | ||
| 262 | } | ||
| 175 | 263 | ||
| 176 | static void scrub_free_csums(struct scrub_dev *sdev) | 264 | /* |
| 265 | * used for workers that require transaction commits (i.e., for the | ||
| 266 | * NOCOW case) | ||
| 267 | */ | ||
| 268 | static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx) | ||
| 177 | { | 269 | { |
| 178 | while (!list_empty(&sdev->csum_list)) { | 270 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; |
| 271 | |||
| 272 | /* | ||
| 273 | * increment scrubs_running to prevent cancel requests from | ||
| 274 | * completing as long as a worker is running. we must also | ||
| 275 | * increment scrubs_paused to prevent deadlocking on pause | ||
| 276 | * requests used for transactions commits (as the worker uses a | ||
| 277 | * transaction context). it is safe to regard the worker | ||
| 278 | * as paused for all matters practical. effectively, we only | ||
| 279 | * avoid cancellation requests from completing. | ||
| 280 | */ | ||
| 281 | mutex_lock(&fs_info->scrub_lock); | ||
| 282 | atomic_inc(&fs_info->scrubs_running); | ||
| 283 | atomic_inc(&fs_info->scrubs_paused); | ||
| 284 | mutex_unlock(&fs_info->scrub_lock); | ||
| 285 | atomic_inc(&sctx->workers_pending); | ||
| 286 | } | ||
| 287 | |||
| 288 | /* used for workers that require transaction commits */ | ||
| 289 | static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx) | ||
| 290 | { | ||
| 291 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; | ||
| 292 | |||
| 293 | /* | ||
| 294 | * see scrub_pending_trans_workers_inc() why we're pretending | ||
| 295 | * to be paused in the scrub counters | ||
| 296 | */ | ||
| 297 | mutex_lock(&fs_info->scrub_lock); | ||
| 298 | atomic_dec(&fs_info->scrubs_running); | ||
| 299 | atomic_dec(&fs_info->scrubs_paused); | ||
| 300 | mutex_unlock(&fs_info->scrub_lock); | ||
| 301 | atomic_dec(&sctx->workers_pending); | ||
| 302 | wake_up(&fs_info->scrub_pause_wait); | ||
| 303 | wake_up(&sctx->list_wait); | ||
| 304 | } | ||
| 305 | |||
| 306 | static void scrub_free_csums(struct scrub_ctx *sctx) | ||
| 307 | { | ||
| 308 | while (!list_empty(&sctx->csum_list)) { | ||
| 179 | struct btrfs_ordered_sum *sum; | 309 | struct btrfs_ordered_sum *sum; |
| 180 | sum = list_first_entry(&sdev->csum_list, | 310 | sum = list_first_entry(&sctx->csum_list, |
| 181 | struct btrfs_ordered_sum, list); | 311 | struct btrfs_ordered_sum, list); |
| 182 | list_del(&sum->list); | 312 | list_del(&sum->list); |
| 183 | kfree(sum); | 313 | kfree(sum); |
| 184 | } | 314 | } |
| 185 | } | 315 | } |
| 186 | 316 | ||
| 187 | static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev) | 317 | static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx) |
| 188 | { | 318 | { |
| 189 | int i; | 319 | int i; |
| 190 | 320 | ||
| 191 | if (!sdev) | 321 | if (!sctx) |
| 192 | return; | 322 | return; |
| 193 | 323 | ||
| 324 | scrub_free_wr_ctx(&sctx->wr_ctx); | ||
| 325 | |||
| 194 | /* this can happen when scrub is cancelled */ | 326 | /* this can happen when scrub is cancelled */ |
| 195 | if (sdev->curr != -1) { | 327 | if (sctx->curr != -1) { |
| 196 | struct scrub_bio *sbio = sdev->bios[sdev->curr]; | 328 | struct scrub_bio *sbio = sctx->bios[sctx->curr]; |
| 197 | 329 | ||
| 198 | for (i = 0; i < sbio->page_count; i++) { | 330 | for (i = 0; i < sbio->page_count; i++) { |
| 199 | BUG_ON(!sbio->pagev[i]); | 331 | WARN_ON(!sbio->pagev[i]->page); |
| 200 | BUG_ON(!sbio->pagev[i]->page); | ||
| 201 | scrub_block_put(sbio->pagev[i]->sblock); | 332 | scrub_block_put(sbio->pagev[i]->sblock); |
| 202 | } | 333 | } |
| 203 | bio_put(sbio->bio); | 334 | bio_put(sbio->bio); |
| 204 | } | 335 | } |
| 205 | 336 | ||
| 206 | for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { | 337 | for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) { |
| 207 | struct scrub_bio *sbio = sdev->bios[i]; | 338 | struct scrub_bio *sbio = sctx->bios[i]; |
| 208 | 339 | ||
| 209 | if (!sbio) | 340 | if (!sbio) |
| 210 | break; | 341 | break; |
| 211 | kfree(sbio); | 342 | kfree(sbio); |
| 212 | } | 343 | } |
| 213 | 344 | ||
| 214 | scrub_free_csums(sdev); | 345 | scrub_free_csums(sctx); |
| 215 | kfree(sdev); | 346 | kfree(sctx); |
| 216 | } | 347 | } |
| 217 | 348 | ||
| 218 | static noinline_for_stack | 349 | static noinline_for_stack |
| 219 | struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev) | 350 | struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) |
| 220 | { | 351 | { |
| 221 | struct scrub_dev *sdev; | 352 | struct scrub_ctx *sctx; |
| 222 | int i; | 353 | int i; |
| 223 | struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; | 354 | struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; |
| 224 | int pages_per_bio; | 355 | int pages_per_rd_bio; |
| 356 | int ret; | ||
| 225 | 357 | ||
| 226 | pages_per_bio = min_t(int, SCRUB_PAGES_PER_BIO, | 358 | /* |
| 227 | bio_get_nr_vecs(dev->bdev)); | 359 | * the setting of pages_per_rd_bio is correct for scrub but might |
| 228 | sdev = kzalloc(sizeof(*sdev), GFP_NOFS); | 360 | * be wrong for the dev_replace code where we might read from |
| 229 | if (!sdev) | 361 | * different devices in the initial huge bios. However, that |
| 362 | * code is able to correctly handle the case when adding a page | ||
| 363 | * to a bio fails. | ||
| 364 | */ | ||
| 365 | if (dev->bdev) | ||
| 366 | pages_per_rd_bio = min_t(int, SCRUB_PAGES_PER_RD_BIO, | ||
| 367 | bio_get_nr_vecs(dev->bdev)); | ||
| 368 | else | ||
| 369 | pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO; | ||
| 370 | sctx = kzalloc(sizeof(*sctx), GFP_NOFS); | ||
| 371 | if (!sctx) | ||
| 230 | goto nomem; | 372 | goto nomem; |
| 231 | sdev->dev = dev; | 373 | sctx->is_dev_replace = is_dev_replace; |
| 232 | sdev->pages_per_bio = pages_per_bio; | 374 | sctx->pages_per_rd_bio = pages_per_rd_bio; |
| 233 | sdev->curr = -1; | 375 | sctx->curr = -1; |
| 234 | for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { | 376 | sctx->dev_root = dev->dev_root; |
| 377 | for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) { | ||
| 235 | struct scrub_bio *sbio; | 378 | struct scrub_bio *sbio; |
| 236 | 379 | ||
| 237 | sbio = kzalloc(sizeof(*sbio), GFP_NOFS); | 380 | sbio = kzalloc(sizeof(*sbio), GFP_NOFS); |
| 238 | if (!sbio) | 381 | if (!sbio) |
| 239 | goto nomem; | 382 | goto nomem; |
| 240 | sdev->bios[i] = sbio; | 383 | sctx->bios[i] = sbio; |
| 241 | 384 | ||
| 242 | sbio->index = i; | 385 | sbio->index = i; |
| 243 | sbio->sdev = sdev; | 386 | sbio->sctx = sctx; |
| 244 | sbio->page_count = 0; | 387 | sbio->page_count = 0; |
| 245 | sbio->work.func = scrub_bio_end_io_worker; | 388 | sbio->work.func = scrub_bio_end_io_worker; |
| 246 | 389 | ||
| 247 | if (i != SCRUB_BIOS_PER_DEV-1) | 390 | if (i != SCRUB_BIOS_PER_SCTX - 1) |
| 248 | sdev->bios[i]->next_free = i + 1; | 391 | sctx->bios[i]->next_free = i + 1; |
| 249 | else | 392 | else |
| 250 | sdev->bios[i]->next_free = -1; | 393 | sctx->bios[i]->next_free = -1; |
| 251 | } | 394 | } |
| 252 | sdev->first_free = 0; | 395 | sctx->first_free = 0; |
| 253 | sdev->nodesize = dev->dev_root->nodesize; | 396 | sctx->nodesize = dev->dev_root->nodesize; |
| 254 | sdev->leafsize = dev->dev_root->leafsize; | 397 | sctx->leafsize = dev->dev_root->leafsize; |
| 255 | sdev->sectorsize = dev->dev_root->sectorsize; | 398 | sctx->sectorsize = dev->dev_root->sectorsize; |
| 256 | atomic_set(&sdev->in_flight, 0); | 399 | atomic_set(&sctx->bios_in_flight, 0); |
| 257 | atomic_set(&sdev->fixup_cnt, 0); | 400 | atomic_set(&sctx->workers_pending, 0); |
| 258 | atomic_set(&sdev->cancel_req, 0); | 401 | atomic_set(&sctx->cancel_req, 0); |
| 259 | sdev->csum_size = btrfs_super_csum_size(fs_info->super_copy); | 402 | sctx->csum_size = btrfs_super_csum_size(fs_info->super_copy); |
| 260 | INIT_LIST_HEAD(&sdev->csum_list); | 403 | INIT_LIST_HEAD(&sctx->csum_list); |
| 261 | 404 | ||
| 262 | spin_lock_init(&sdev->list_lock); | 405 | spin_lock_init(&sctx->list_lock); |
| 263 | spin_lock_init(&sdev->stat_lock); | 406 | spin_lock_init(&sctx->stat_lock); |
| 264 | init_waitqueue_head(&sdev->list_wait); | 407 | init_waitqueue_head(&sctx->list_wait); |
| 265 | return sdev; | 408 | |
| 409 | ret = scrub_setup_wr_ctx(sctx, &sctx->wr_ctx, fs_info, | ||
| 410 | fs_info->dev_replace.tgtdev, is_dev_replace); | ||
| 411 | if (ret) { | ||
| 412 | scrub_free_ctx(sctx); | ||
| 413 | return ERR_PTR(ret); | ||
| 414 | } | ||
| 415 | return sctx; | ||
| 266 | 416 | ||
| 267 | nomem: | 417 | nomem: |
| 268 | scrub_free_dev(sdev); | 418 | scrub_free_ctx(sctx); |
| 269 | return ERR_PTR(-ENOMEM); | 419 | return ERR_PTR(-ENOMEM); |
| 270 | } | 420 | } |
| 271 | 421 | ||
| 272 | static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx) | 422 | static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, |
| 423 | void *warn_ctx) | ||
| 273 | { | 424 | { |
| 274 | u64 isize; | 425 | u64 isize; |
| 275 | u32 nlink; | 426 | u32 nlink; |
| @@ -277,7 +428,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx) | |||
| 277 | int i; | 428 | int i; |
| 278 | struct extent_buffer *eb; | 429 | struct extent_buffer *eb; |
| 279 | struct btrfs_inode_item *inode_item; | 430 | struct btrfs_inode_item *inode_item; |
| 280 | struct scrub_warning *swarn = ctx; | 431 | struct scrub_warning *swarn = warn_ctx; |
| 281 | struct btrfs_fs_info *fs_info = swarn->dev->dev_root->fs_info; | 432 | struct btrfs_fs_info *fs_info = swarn->dev->dev_root->fs_info; |
| 282 | struct inode_fs_paths *ipath = NULL; | 433 | struct inode_fs_paths *ipath = NULL; |
| 283 | struct btrfs_root *local_root; | 434 | struct btrfs_root *local_root; |
| @@ -345,8 +496,8 @@ err: | |||
| 345 | 496 | ||
| 346 | static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | 497 | static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) |
| 347 | { | 498 | { |
| 348 | struct btrfs_device *dev = sblock->sdev->dev; | 499 | struct btrfs_device *dev; |
| 349 | struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; | 500 | struct btrfs_fs_info *fs_info; |
| 350 | struct btrfs_path *path; | 501 | struct btrfs_path *path; |
| 351 | struct btrfs_key found_key; | 502 | struct btrfs_key found_key; |
| 352 | struct extent_buffer *eb; | 503 | struct extent_buffer *eb; |
| @@ -361,15 +512,18 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
| 361 | const int bufsize = 4096; | 512 | const int bufsize = 4096; |
| 362 | int ret; | 513 | int ret; |
| 363 | 514 | ||
| 515 | WARN_ON(sblock->page_count < 1); | ||
| 516 | dev = sblock->pagev[0]->dev; | ||
| 517 | fs_info = sblock->sctx->dev_root->fs_info; | ||
| 518 | |||
| 364 | path = btrfs_alloc_path(); | 519 | path = btrfs_alloc_path(); |
| 365 | 520 | ||
| 366 | swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS); | 521 | swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS); |
| 367 | swarn.msg_buf = kmalloc(bufsize, GFP_NOFS); | 522 | swarn.msg_buf = kmalloc(bufsize, GFP_NOFS); |
| 368 | BUG_ON(sblock->page_count < 1); | 523 | swarn.sector = (sblock->pagev[0]->physical) >> 9; |
| 369 | swarn.sector = (sblock->pagev[0].physical) >> 9; | 524 | swarn.logical = sblock->pagev[0]->logical; |
| 370 | swarn.logical = sblock->pagev[0].logical; | ||
| 371 | swarn.errstr = errstr; | 525 | swarn.errstr = errstr; |
| 372 | swarn.dev = dev; | 526 | swarn.dev = NULL; |
| 373 | swarn.msg_bufsize = bufsize; | 527 | swarn.msg_bufsize = bufsize; |
| 374 | swarn.scratch_bufsize = bufsize; | 528 | swarn.scratch_bufsize = bufsize; |
| 375 | 529 | ||
| @@ -405,6 +559,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
| 405 | } while (ret != 1); | 559 | } while (ret != 1); |
| 406 | } else { | 560 | } else { |
| 407 | swarn.path = path; | 561 | swarn.path = path; |
| 562 | swarn.dev = dev; | ||
| 408 | iterate_extent_inodes(fs_info, found_key.objectid, | 563 | iterate_extent_inodes(fs_info, found_key.objectid, |
| 409 | extent_item_pos, 1, | 564 | extent_item_pos, 1, |
| 410 | scrub_print_warning_inode, &swarn); | 565 | scrub_print_warning_inode, &swarn); |
| @@ -416,29 +571,38 @@ out: | |||
| 416 | kfree(swarn.msg_buf); | 571 | kfree(swarn.msg_buf); |
| 417 | } | 572 | } |
| 418 | 573 | ||
| 419 | static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *ctx) | 574 | static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx) |
| 420 | { | 575 | { |
| 421 | struct page *page = NULL; | 576 | struct page *page = NULL; |
| 422 | unsigned long index; | 577 | unsigned long index; |
| 423 | struct scrub_fixup_nodatasum *fixup = ctx; | 578 | struct scrub_fixup_nodatasum *fixup = fixup_ctx; |
| 424 | int ret; | 579 | int ret; |
| 425 | int corrected = 0; | 580 | int corrected = 0; |
| 426 | struct btrfs_key key; | 581 | struct btrfs_key key; |
| 427 | struct inode *inode = NULL; | 582 | struct inode *inode = NULL; |
| 583 | struct btrfs_fs_info *fs_info; | ||
| 428 | u64 end = offset + PAGE_SIZE - 1; | 584 | u64 end = offset + PAGE_SIZE - 1; |
| 429 | struct btrfs_root *local_root; | 585 | struct btrfs_root *local_root; |
| 586 | int srcu_index; | ||
| 430 | 587 | ||
| 431 | key.objectid = root; | 588 | key.objectid = root; |
| 432 | key.type = BTRFS_ROOT_ITEM_KEY; | 589 | key.type = BTRFS_ROOT_ITEM_KEY; |
| 433 | key.offset = (u64)-1; | 590 | key.offset = (u64)-1; |
| 434 | local_root = btrfs_read_fs_root_no_name(fixup->root->fs_info, &key); | 591 | |
| 435 | if (IS_ERR(local_root)) | 592 | fs_info = fixup->root->fs_info; |
| 593 | srcu_index = srcu_read_lock(&fs_info->subvol_srcu); | ||
| 594 | |||
| 595 | local_root = btrfs_read_fs_root_no_name(fs_info, &key); | ||
| 596 | if (IS_ERR(local_root)) { | ||
| 597 | srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); | ||
| 436 | return PTR_ERR(local_root); | 598 | return PTR_ERR(local_root); |
| 599 | } | ||
| 437 | 600 | ||
| 438 | key.type = BTRFS_INODE_ITEM_KEY; | 601 | key.type = BTRFS_INODE_ITEM_KEY; |
| 439 | key.objectid = inum; | 602 | key.objectid = inum; |
| 440 | key.offset = 0; | 603 | key.offset = 0; |
| 441 | inode = btrfs_iget(fixup->root->fs_info->sb, &key, local_root, NULL); | 604 | inode = btrfs_iget(fs_info->sb, &key, local_root, NULL); |
| 605 | srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); | ||
| 442 | if (IS_ERR(inode)) | 606 | if (IS_ERR(inode)) |
| 443 | return PTR_ERR(inode); | 607 | return PTR_ERR(inode); |
| 444 | 608 | ||
| @@ -451,7 +615,6 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *ctx) | |||
| 451 | } | 615 | } |
| 452 | 616 | ||
| 453 | if (PageUptodate(page)) { | 617 | if (PageUptodate(page)) { |
| 454 | struct btrfs_mapping_tree *map_tree; | ||
| 455 | if (PageDirty(page)) { | 618 | if (PageDirty(page)) { |
| 456 | /* | 619 | /* |
| 457 | * we need to write the data to the defect sector. the | 620 | * we need to write the data to the defect sector. the |
| @@ -472,8 +635,8 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *ctx) | |||
| 472 | ret = -EIO; | 635 | ret = -EIO; |
| 473 | goto out; | 636 | goto out; |
| 474 | } | 637 | } |
| 475 | map_tree = &BTRFS_I(inode)->root->fs_info->mapping_tree; | 638 | fs_info = BTRFS_I(inode)->root->fs_info; |
| 476 | ret = repair_io_failure(map_tree, offset, PAGE_SIZE, | 639 | ret = repair_io_failure(fs_info, offset, PAGE_SIZE, |
| 477 | fixup->logical, page, | 640 | fixup->logical, page, |
| 478 | fixup->mirror_num); | 641 | fixup->mirror_num); |
| 479 | unlock_page(page); | 642 | unlock_page(page); |
| @@ -530,21 +693,21 @@ static void scrub_fixup_nodatasum(struct btrfs_work *work) | |||
| 530 | { | 693 | { |
| 531 | int ret; | 694 | int ret; |
| 532 | struct scrub_fixup_nodatasum *fixup; | 695 | struct scrub_fixup_nodatasum *fixup; |
| 533 | struct scrub_dev *sdev; | 696 | struct scrub_ctx *sctx; |
| 534 | struct btrfs_trans_handle *trans = NULL; | 697 | struct btrfs_trans_handle *trans = NULL; |
| 535 | struct btrfs_fs_info *fs_info; | 698 | struct btrfs_fs_info *fs_info; |
| 536 | struct btrfs_path *path; | 699 | struct btrfs_path *path; |
| 537 | int uncorrectable = 0; | 700 | int uncorrectable = 0; |
| 538 | 701 | ||
| 539 | fixup = container_of(work, struct scrub_fixup_nodatasum, work); | 702 | fixup = container_of(work, struct scrub_fixup_nodatasum, work); |
| 540 | sdev = fixup->sdev; | 703 | sctx = fixup->sctx; |
| 541 | fs_info = fixup->root->fs_info; | 704 | fs_info = fixup->root->fs_info; |
| 542 | 705 | ||
| 543 | path = btrfs_alloc_path(); | 706 | path = btrfs_alloc_path(); |
| 544 | if (!path) { | 707 | if (!path) { |
| 545 | spin_lock(&sdev->stat_lock); | 708 | spin_lock(&sctx->stat_lock); |
| 546 | ++sdev->stat.malloc_errors; | 709 | ++sctx->stat.malloc_errors; |
| 547 | spin_unlock(&sdev->stat_lock); | 710 | spin_unlock(&sctx->stat_lock); |
| 548 | uncorrectable = 1; | 711 | uncorrectable = 1; |
| 549 | goto out; | 712 | goto out; |
| 550 | } | 713 | } |
| @@ -573,35 +736,30 @@ static void scrub_fixup_nodatasum(struct btrfs_work *work) | |||
| 573 | } | 736 | } |
| 574 | WARN_ON(ret != 1); | 737 | WARN_ON(ret != 1); |
| 575 | 738 | ||
| 576 | spin_lock(&sdev->stat_lock); | 739 | spin_lock(&sctx->stat_lock); |
| 577 | ++sdev->stat.corrected_errors; | 740 | ++sctx->stat.corrected_errors; |
| 578 | spin_unlock(&sdev->stat_lock); | 741 | spin_unlock(&sctx->stat_lock); |
| 579 | 742 | ||
| 580 | out: | 743 | out: |
| 581 | if (trans && !IS_ERR(trans)) | 744 | if (trans && !IS_ERR(trans)) |
| 582 | btrfs_end_transaction(trans, fixup->root); | 745 | btrfs_end_transaction(trans, fixup->root); |
| 583 | if (uncorrectable) { | 746 | if (uncorrectable) { |
| 584 | spin_lock(&sdev->stat_lock); | 747 | spin_lock(&sctx->stat_lock); |
| 585 | ++sdev->stat.uncorrectable_errors; | 748 | ++sctx->stat.uncorrectable_errors; |
| 586 | spin_unlock(&sdev->stat_lock); | 749 | spin_unlock(&sctx->stat_lock); |
| 587 | 750 | btrfs_dev_replace_stats_inc( | |
| 751 | &sctx->dev_root->fs_info->dev_replace. | ||
| 752 | num_uncorrectable_read_errors); | ||
| 588 | printk_ratelimited_in_rcu(KERN_ERR | 753 | printk_ratelimited_in_rcu(KERN_ERR |
| 589 | "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n", | 754 | "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n", |
| 590 | (unsigned long long)fixup->logical, | 755 | (unsigned long long)fixup->logical, |
| 591 | rcu_str_deref(sdev->dev->name)); | 756 | rcu_str_deref(fixup->dev->name)); |
| 592 | } | 757 | } |
| 593 | 758 | ||
| 594 | btrfs_free_path(path); | 759 | btrfs_free_path(path); |
| 595 | kfree(fixup); | 760 | kfree(fixup); |
| 596 | 761 | ||
| 597 | /* see caller why we're pretending to be paused in the scrub counters */ | 762 | scrub_pending_trans_workers_dec(sctx); |
| 598 | mutex_lock(&fs_info->scrub_lock); | ||
| 599 | atomic_dec(&fs_info->scrubs_running); | ||
| 600 | atomic_dec(&fs_info->scrubs_paused); | ||
| 601 | mutex_unlock(&fs_info->scrub_lock); | ||
| 602 | atomic_dec(&sdev->fixup_cnt); | ||
| 603 | wake_up(&fs_info->scrub_pause_wait); | ||
| 604 | wake_up(&sdev->list_wait); | ||
| 605 | } | 763 | } |
| 606 | 764 | ||
| 607 | /* | 765 | /* |
| @@ -614,7 +772,8 @@ out: | |||
| 614 | */ | 772 | */ |
| 615 | static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | 773 | static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) |
| 616 | { | 774 | { |
| 617 | struct scrub_dev *sdev = sblock_to_check->sdev; | 775 | struct scrub_ctx *sctx = sblock_to_check->sctx; |
| 776 | struct btrfs_device *dev; | ||
| 618 | struct btrfs_fs_info *fs_info; | 777 | struct btrfs_fs_info *fs_info; |
| 619 | u64 length; | 778 | u64 length; |
| 620 | u64 logical; | 779 | u64 logical; |
| @@ -633,16 +792,33 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
| 633 | DEFAULT_RATELIMIT_BURST); | 792 | DEFAULT_RATELIMIT_BURST); |
| 634 | 793 | ||
| 635 | BUG_ON(sblock_to_check->page_count < 1); | 794 | BUG_ON(sblock_to_check->page_count < 1); |
| 636 | fs_info = sdev->dev->dev_root->fs_info; | 795 | fs_info = sctx->dev_root->fs_info; |
| 796 | if (sblock_to_check->pagev[0]->flags & BTRFS_EXTENT_FLAG_SUPER) { | ||
| 797 | /* | ||
| 798 | * if we find an error in a super block, we just report it. | ||
| 799 | * They will get written with the next transaction commit | ||
| 800 | * anyway | ||
| 801 | */ | ||
| 802 | spin_lock(&sctx->stat_lock); | ||
| 803 | ++sctx->stat.super_errors; | ||
| 804 | spin_unlock(&sctx->stat_lock); | ||
| 805 | return 0; | ||
| 806 | } | ||
| 637 | length = sblock_to_check->page_count * PAGE_SIZE; | 807 | length = sblock_to_check->page_count * PAGE_SIZE; |
| 638 | logical = sblock_to_check->pagev[0].logical; | 808 | logical = sblock_to_check->pagev[0]->logical; |
| 639 | generation = sblock_to_check->pagev[0].generation; | 809 | generation = sblock_to_check->pagev[0]->generation; |
| 640 | BUG_ON(sblock_to_check->pagev[0].mirror_num < 1); | 810 | BUG_ON(sblock_to_check->pagev[0]->mirror_num < 1); |
| 641 | failed_mirror_index = sblock_to_check->pagev[0].mirror_num - 1; | 811 | failed_mirror_index = sblock_to_check->pagev[0]->mirror_num - 1; |
| 642 | is_metadata = !(sblock_to_check->pagev[0].flags & | 812 | is_metadata = !(sblock_to_check->pagev[0]->flags & |
| 643 | BTRFS_EXTENT_FLAG_DATA); | 813 | BTRFS_EXTENT_FLAG_DATA); |
| 644 | have_csum = sblock_to_check->pagev[0].have_csum; | 814 | have_csum = sblock_to_check->pagev[0]->have_csum; |
| 645 | csum = sblock_to_check->pagev[0].csum; | 815 | csum = sblock_to_check->pagev[0]->csum; |
| 816 | dev = sblock_to_check->pagev[0]->dev; | ||
| 817 | |||
| 818 | if (sctx->is_dev_replace && !is_metadata && !have_csum) { | ||
| 819 | sblocks_for_recheck = NULL; | ||
| 820 | goto nodatasum_case; | ||
| 821 | } | ||
| 646 | 822 | ||
| 647 | /* | 823 | /* |
| 648 | * read all mirrors one after the other. This includes to | 824 | * read all mirrors one after the other. This includes to |
| @@ -677,43 +853,32 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
| 677 | sizeof(*sblocks_for_recheck), | 853 | sizeof(*sblocks_for_recheck), |
| 678 | GFP_NOFS); | 854 | GFP_NOFS); |
| 679 | if (!sblocks_for_recheck) { | 855 | if (!sblocks_for_recheck) { |
| 680 | spin_lock(&sdev->stat_lock); | 856 | spin_lock(&sctx->stat_lock); |
| 681 | sdev->stat.malloc_errors++; | 857 | sctx->stat.malloc_errors++; |
| 682 | sdev->stat.read_errors++; | 858 | sctx->stat.read_errors++; |
| 683 | sdev->stat.uncorrectable_errors++; | 859 | sctx->stat.uncorrectable_errors++; |
| 684 | spin_unlock(&sdev->stat_lock); | 860 | spin_unlock(&sctx->stat_lock); |
| 685 | btrfs_dev_stat_inc_and_print(sdev->dev, | 861 | btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS); |
| 686 | BTRFS_DEV_STAT_READ_ERRS); | ||
| 687 | goto out; | 862 | goto out; |
| 688 | } | 863 | } |
| 689 | 864 | ||
| 690 | /* setup the context, map the logical blocks and alloc the pages */ | 865 | /* setup the context, map the logical blocks and alloc the pages */ |
| 691 | ret = scrub_setup_recheck_block(sdev, &fs_info->mapping_tree, length, | 866 | ret = scrub_setup_recheck_block(sctx, fs_info, sblock_to_check, length, |
| 692 | logical, sblocks_for_recheck); | 867 | logical, sblocks_for_recheck); |
| 693 | if (ret) { | 868 | if (ret) { |
| 694 | spin_lock(&sdev->stat_lock); | 869 | spin_lock(&sctx->stat_lock); |
| 695 | sdev->stat.read_errors++; | 870 | sctx->stat.read_errors++; |
| 696 | sdev->stat.uncorrectable_errors++; | 871 | sctx->stat.uncorrectable_errors++; |
| 697 | spin_unlock(&sdev->stat_lock); | 872 | spin_unlock(&sctx->stat_lock); |
| 698 | btrfs_dev_stat_inc_and_print(sdev->dev, | 873 | btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS); |
| 699 | BTRFS_DEV_STAT_READ_ERRS); | ||
| 700 | goto out; | 874 | goto out; |
| 701 | } | 875 | } |
| 702 | BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS); | 876 | BUG_ON(failed_mirror_index >= BTRFS_MAX_MIRRORS); |
| 703 | sblock_bad = sblocks_for_recheck + failed_mirror_index; | 877 | sblock_bad = sblocks_for_recheck + failed_mirror_index; |
| 704 | 878 | ||
| 705 | /* build and submit the bios for the failed mirror, check checksums */ | 879 | /* build and submit the bios for the failed mirror, check checksums */ |
| 706 | ret = scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum, | 880 | scrub_recheck_block(fs_info, sblock_bad, is_metadata, have_csum, |
| 707 | csum, generation, sdev->csum_size); | 881 | csum, generation, sctx->csum_size); |
| 708 | if (ret) { | ||
| 709 | spin_lock(&sdev->stat_lock); | ||
| 710 | sdev->stat.read_errors++; | ||
| 711 | sdev->stat.uncorrectable_errors++; | ||
| 712 | spin_unlock(&sdev->stat_lock); | ||
| 713 | btrfs_dev_stat_inc_and_print(sdev->dev, | ||
| 714 | BTRFS_DEV_STAT_READ_ERRS); | ||
| 715 | goto out; | ||
| 716 | } | ||
| 717 | 882 | ||
| 718 | if (!sblock_bad->header_error && !sblock_bad->checksum_error && | 883 | if (!sblock_bad->header_error && !sblock_bad->checksum_error && |
| 719 | sblock_bad->no_io_error_seen) { | 884 | sblock_bad->no_io_error_seen) { |
| @@ -725,50 +890,54 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
| 725 | * different bio (usually one of the two latter cases is | 890 | * different bio (usually one of the two latter cases is |
| 726 | * the cause) | 891 | * the cause) |
| 727 | */ | 892 | */ |
| 728 | spin_lock(&sdev->stat_lock); | 893 | spin_lock(&sctx->stat_lock); |
| 729 | sdev->stat.unverified_errors++; | 894 | sctx->stat.unverified_errors++; |
| 730 | spin_unlock(&sdev->stat_lock); | 895 | spin_unlock(&sctx->stat_lock); |
| 731 | 896 | ||
| 897 | if (sctx->is_dev_replace) | ||
| 898 | scrub_write_block_to_dev_replace(sblock_bad); | ||
| 732 | goto out; | 899 | goto out; |
| 733 | } | 900 | } |
| 734 | 901 | ||
| 735 | if (!sblock_bad->no_io_error_seen) { | 902 | if (!sblock_bad->no_io_error_seen) { |
| 736 | spin_lock(&sdev->stat_lock); | 903 | spin_lock(&sctx->stat_lock); |
| 737 | sdev->stat.read_errors++; | 904 | sctx->stat.read_errors++; |
| 738 | spin_unlock(&sdev->stat_lock); | 905 | spin_unlock(&sctx->stat_lock); |
| 739 | if (__ratelimit(&_rs)) | 906 | if (__ratelimit(&_rs)) |
| 740 | scrub_print_warning("i/o error", sblock_to_check); | 907 | scrub_print_warning("i/o error", sblock_to_check); |
| 741 | btrfs_dev_stat_inc_and_print(sdev->dev, | 908 | btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS); |
| 742 | BTRFS_DEV_STAT_READ_ERRS); | ||
| 743 | } else if (sblock_bad->checksum_error) { | 909 | } else if (sblock_bad->checksum_error) { |
| 744 | spin_lock(&sdev->stat_lock); | 910 | spin_lock(&sctx->stat_lock); |
| 745 | sdev->stat.csum_errors++; | 911 | sctx->stat.csum_errors++; |
| 746 | spin_unlock(&sdev->stat_lock); | 912 | spin_unlock(&sctx->stat_lock); |
| 747 | if (__ratelimit(&_rs)) | 913 | if (__ratelimit(&_rs)) |
| 748 | scrub_print_warning("checksum error", sblock_to_check); | 914 | scrub_print_warning("checksum error", sblock_to_check); |
| 749 | btrfs_dev_stat_inc_and_print(sdev->dev, | 915 | btrfs_dev_stat_inc_and_print(dev, |
| 750 | BTRFS_DEV_STAT_CORRUPTION_ERRS); | 916 | BTRFS_DEV_STAT_CORRUPTION_ERRS); |
| 751 | } else if (sblock_bad->header_error) { | 917 | } else if (sblock_bad->header_error) { |
| 752 | spin_lock(&sdev->stat_lock); | 918 | spin_lock(&sctx->stat_lock); |
| 753 | sdev->stat.verify_errors++; | 919 | sctx->stat.verify_errors++; |
| 754 | spin_unlock(&sdev->stat_lock); | 920 | spin_unlock(&sctx->stat_lock); |
| 755 | if (__ratelimit(&_rs)) | 921 | if (__ratelimit(&_rs)) |
| 756 | scrub_print_warning("checksum/header error", | 922 | scrub_print_warning("checksum/header error", |
| 757 | sblock_to_check); | 923 | sblock_to_check); |
| 758 | if (sblock_bad->generation_error) | 924 | if (sblock_bad->generation_error) |
| 759 | btrfs_dev_stat_inc_and_print(sdev->dev, | 925 | btrfs_dev_stat_inc_and_print(dev, |
| 760 | BTRFS_DEV_STAT_GENERATION_ERRS); | 926 | BTRFS_DEV_STAT_GENERATION_ERRS); |
| 761 | else | 927 | else |
| 762 | btrfs_dev_stat_inc_and_print(sdev->dev, | 928 | btrfs_dev_stat_inc_and_print(dev, |
| 763 | BTRFS_DEV_STAT_CORRUPTION_ERRS); | 929 | BTRFS_DEV_STAT_CORRUPTION_ERRS); |
| 764 | } | 930 | } |
| 765 | 931 | ||
| 766 | if (sdev->readonly) | 932 | if (sctx->readonly && !sctx->is_dev_replace) |
| 767 | goto did_not_correct_error; | 933 | goto did_not_correct_error; |
| 768 | 934 | ||
| 769 | if (!is_metadata && !have_csum) { | 935 | if (!is_metadata && !have_csum) { |
| 770 | struct scrub_fixup_nodatasum *fixup_nodatasum; | 936 | struct scrub_fixup_nodatasum *fixup_nodatasum; |
| 771 | 937 | ||
| 938 | nodatasum_case: | ||
| 939 | WARN_ON(sctx->is_dev_replace); | ||
| 940 | |||
| 772 | /* | 941 | /* |
| 773 | * !is_metadata and !have_csum, this means that the data | 942 | * !is_metadata and !have_csum, this means that the data |
| 774 | * might not be COW'ed, that it might be modified | 943 | * might not be COW'ed, that it might be modified |
| @@ -779,24 +948,12 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
| 779 | fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS); | 948 | fixup_nodatasum = kzalloc(sizeof(*fixup_nodatasum), GFP_NOFS); |
| 780 | if (!fixup_nodatasum) | 949 | if (!fixup_nodatasum) |
| 781 | goto did_not_correct_error; | 950 | goto did_not_correct_error; |
| 782 | fixup_nodatasum->sdev = sdev; | 951 | fixup_nodatasum->sctx = sctx; |
| 952 | fixup_nodatasum->dev = dev; | ||
| 783 | fixup_nodatasum->logical = logical; | 953 | fixup_nodatasum->logical = logical; |
| 784 | fixup_nodatasum->root = fs_info->extent_root; | 954 | fixup_nodatasum->root = fs_info->extent_root; |
| 785 | fixup_nodatasum->mirror_num = failed_mirror_index + 1; | 955 | fixup_nodatasum->mirror_num = failed_mirror_index + 1; |
| 786 | /* | 956 | scrub_pending_trans_workers_inc(sctx); |
| 787 | * increment scrubs_running to prevent cancel requests from | ||
| 788 | * completing as long as a fixup worker is running. we must also | ||
| 789 | * increment scrubs_paused to prevent deadlocking on pause | ||
| 790 | * requests used for transactions commits (as the worker uses a | ||
| 791 | * transaction context). it is safe to regard the fixup worker | ||
| 792 | * as paused for all matters practical. effectively, we only | ||
| 793 | * avoid cancellation requests from completing. | ||
| 794 | */ | ||
| 795 | mutex_lock(&fs_info->scrub_lock); | ||
| 796 | atomic_inc(&fs_info->scrubs_running); | ||
| 797 | atomic_inc(&fs_info->scrubs_paused); | ||
| 798 | mutex_unlock(&fs_info->scrub_lock); | ||
| 799 | atomic_inc(&sdev->fixup_cnt); | ||
| 800 | fixup_nodatasum->work.func = scrub_fixup_nodatasum; | 957 | fixup_nodatasum->work.func = scrub_fixup_nodatasum; |
| 801 | btrfs_queue_worker(&fs_info->scrub_workers, | 958 | btrfs_queue_worker(&fs_info->scrub_workers, |
| 802 | &fixup_nodatasum->work); | 959 | &fixup_nodatasum->work); |
| @@ -805,26 +962,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
| 805 | 962 | ||
| 806 | /* | 963 | /* |
| 807 | * now build and submit the bios for the other mirrors, check | 964 | * now build and submit the bios for the other mirrors, check |
| 808 | * checksums | 965 | * checksums. |
| 809 | */ | 966 | * First try to pick the mirror which is completely without I/O |
| 810 | for (mirror_index = 0; | ||
| 811 | mirror_index < BTRFS_MAX_MIRRORS && | ||
| 812 | sblocks_for_recheck[mirror_index].page_count > 0; | ||
| 813 | mirror_index++) { | ||
| 814 | if (mirror_index == failed_mirror_index) | ||
| 815 | continue; | ||
| 816 | |||
| 817 | /* build and submit the bios, check checksums */ | ||
| 818 | ret = scrub_recheck_block(fs_info, | ||
| 819 | sblocks_for_recheck + mirror_index, | ||
| 820 | is_metadata, have_csum, csum, | ||
| 821 | generation, sdev->csum_size); | ||
| 822 | if (ret) | ||
| 823 | goto did_not_correct_error; | ||
| 824 | } | ||
| 825 | |||
| 826 | /* | ||
| 827 | * first try to pick the mirror which is completely without I/O | ||
| 828 | * errors and also does not have a checksum error. | 967 | * errors and also does not have a checksum error. |
| 829 | * If one is found, and if a checksum is present, the full block | 968 | * If one is found, and if a checksum is present, the full block |
| 830 | * that is known to contain an error is rewritten. Afterwards | 969 | * that is known to contain an error is rewritten. Afterwards |
| @@ -840,24 +979,93 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
| 840 | mirror_index < BTRFS_MAX_MIRRORS && | 979 | mirror_index < BTRFS_MAX_MIRRORS && |
| 841 | sblocks_for_recheck[mirror_index].page_count > 0; | 980 | sblocks_for_recheck[mirror_index].page_count > 0; |
| 842 | mirror_index++) { | 981 | mirror_index++) { |
| 843 | struct scrub_block *sblock_other = sblocks_for_recheck + | 982 | struct scrub_block *sblock_other; |
| 844 | mirror_index; | 983 | |
| 984 | if (mirror_index == failed_mirror_index) | ||
| 985 | continue; | ||
| 986 | sblock_other = sblocks_for_recheck + mirror_index; | ||
| 987 | |||
| 988 | /* build and submit the bios, check checksums */ | ||
| 989 | scrub_recheck_block(fs_info, sblock_other, is_metadata, | ||
| 990 | have_csum, csum, generation, | ||
| 991 | sctx->csum_size); | ||
| 845 | 992 | ||
| 846 | if (!sblock_other->header_error && | 993 | if (!sblock_other->header_error && |
| 847 | !sblock_other->checksum_error && | 994 | !sblock_other->checksum_error && |
| 848 | sblock_other->no_io_error_seen) { | 995 | sblock_other->no_io_error_seen) { |
| 849 | int force_write = is_metadata || have_csum; | 996 | if (sctx->is_dev_replace) { |
| 850 | 997 | scrub_write_block_to_dev_replace(sblock_other); | |
| 851 | ret = scrub_repair_block_from_good_copy(sblock_bad, | 998 | } else { |
| 852 | sblock_other, | 999 | int force_write = is_metadata || have_csum; |
| 853 | force_write); | 1000 | |
| 1001 | ret = scrub_repair_block_from_good_copy( | ||
| 1002 | sblock_bad, sblock_other, | ||
| 1003 | force_write); | ||
| 1004 | } | ||
| 854 | if (0 == ret) | 1005 | if (0 == ret) |
| 855 | goto corrected_error; | 1006 | goto corrected_error; |
| 856 | } | 1007 | } |
| 857 | } | 1008 | } |
| 858 | 1009 | ||
| 859 | /* | 1010 | /* |
| 860 | * in case of I/O errors in the area that is supposed to be | 1011 | * for dev_replace, pick good pages and write to the target device. |
| 1012 | */ | ||
| 1013 | if (sctx->is_dev_replace) { | ||
| 1014 | success = 1; | ||
| 1015 | for (page_num = 0; page_num < sblock_bad->page_count; | ||
| 1016 | page_num++) { | ||
| 1017 | int sub_success; | ||
| 1018 | |||
| 1019 | sub_success = 0; | ||
| 1020 | for (mirror_index = 0; | ||
| 1021 | mirror_index < BTRFS_MAX_MIRRORS && | ||
| 1022 | sblocks_for_recheck[mirror_index].page_count > 0; | ||
| 1023 | mirror_index++) { | ||
| 1024 | struct scrub_block *sblock_other = | ||
| 1025 | sblocks_for_recheck + mirror_index; | ||
| 1026 | struct scrub_page *page_other = | ||
| 1027 | sblock_other->pagev[page_num]; | ||
| 1028 | |||
| 1029 | if (!page_other->io_error) { | ||
| 1030 | ret = scrub_write_page_to_dev_replace( | ||
| 1031 | sblock_other, page_num); | ||
| 1032 | if (ret == 0) { | ||
| 1033 | /* succeeded for this page */ | ||
| 1034 | sub_success = 1; | ||
| 1035 | break; | ||
| 1036 | } else { | ||
| 1037 | btrfs_dev_replace_stats_inc( | ||
| 1038 | &sctx->dev_root-> | ||
| 1039 | fs_info->dev_replace. | ||
| 1040 | num_write_errors); | ||
| 1041 | } | ||
| 1042 | } | ||
| 1043 | } | ||
| 1044 | |||
| 1045 | if (!sub_success) { | ||
| 1046 | /* | ||
| 1047 | * did not find a mirror to fetch the page | ||
| 1048 | * from. scrub_write_page_to_dev_replace() | ||
| 1049 | * handles this case (page->io_error), by | ||
| 1050 | * filling the block with zeros before | ||
| 1051 | * submitting the write request | ||
| 1052 | */ | ||
| 1053 | success = 0; | ||
| 1054 | ret = scrub_write_page_to_dev_replace( | ||
| 1055 | sblock_bad, page_num); | ||
| 1056 | if (ret) | ||
| 1057 | btrfs_dev_replace_stats_inc( | ||
| 1058 | &sctx->dev_root->fs_info-> | ||
| 1059 | dev_replace.num_write_errors); | ||
| 1060 | } | ||
| 1061 | } | ||
| 1062 | |||
| 1063 | goto out; | ||
| 1064 | } | ||
| 1065 | |||
| 1066 | /* | ||
| 1067 | * for regular scrub, repair those pages that are errored. | ||
| 1068 | * In case of I/O errors in the area that is supposed to be | ||
| 861 | * repaired, continue by picking good copies of those pages. | 1069 | * repaired, continue by picking good copies of those pages. |
| 862 | * Select the good pages from mirrors to rewrite bad pages from | 1070 | * Select the good pages from mirrors to rewrite bad pages from |
| 863 | * the area to fix. Afterwards verify the checksum of the block | 1071 | * the area to fix. Afterwards verify the checksum of the block |
| @@ -887,7 +1095,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
| 887 | 1095 | ||
| 888 | success = 1; | 1096 | success = 1; |
| 889 | for (page_num = 0; page_num < sblock_bad->page_count; page_num++) { | 1097 | for (page_num = 0; page_num < sblock_bad->page_count; page_num++) { |
| 890 | struct scrub_page *page_bad = sblock_bad->pagev + page_num; | 1098 | struct scrub_page *page_bad = sblock_bad->pagev[page_num]; |
| 891 | 1099 | ||
| 892 | if (!page_bad->io_error) | 1100 | if (!page_bad->io_error) |
| 893 | continue; | 1101 | continue; |
| @@ -898,8 +1106,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
| 898 | mirror_index++) { | 1106 | mirror_index++) { |
| 899 | struct scrub_block *sblock_other = sblocks_for_recheck + | 1107 | struct scrub_block *sblock_other = sblocks_for_recheck + |
| 900 | mirror_index; | 1108 | mirror_index; |
| 901 | struct scrub_page *page_other = sblock_other->pagev + | 1109 | struct scrub_page *page_other = sblock_other->pagev[ |
| 902 | page_num; | 1110 | page_num]; |
| 903 | 1111 | ||
| 904 | if (!page_other->io_error) { | 1112 | if (!page_other->io_error) { |
| 905 | ret = scrub_repair_page_from_good_copy( | 1113 | ret = scrub_repair_page_from_good_copy( |
| @@ -928,10 +1136,10 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
| 928 | * is verified, but most likely the data comes out | 1136 | * is verified, but most likely the data comes out |
| 929 | * of the page cache. | 1137 | * of the page cache. |
| 930 | */ | 1138 | */ |
| 931 | ret = scrub_recheck_block(fs_info, sblock_bad, | 1139 | scrub_recheck_block(fs_info, sblock_bad, |
| 932 | is_metadata, have_csum, csum, | 1140 | is_metadata, have_csum, csum, |
| 933 | generation, sdev->csum_size); | 1141 | generation, sctx->csum_size); |
| 934 | if (!ret && !sblock_bad->header_error && | 1142 | if (!sblock_bad->header_error && |
| 935 | !sblock_bad->checksum_error && | 1143 | !sblock_bad->checksum_error && |
| 936 | sblock_bad->no_io_error_seen) | 1144 | sblock_bad->no_io_error_seen) |
| 937 | goto corrected_error; | 1145 | goto corrected_error; |
| @@ -939,23 +1147,23 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
| 939 | goto did_not_correct_error; | 1147 | goto did_not_correct_error; |
| 940 | } else { | 1148 | } else { |
| 941 | corrected_error: | 1149 | corrected_error: |
| 942 | spin_lock(&sdev->stat_lock); | 1150 | spin_lock(&sctx->stat_lock); |
| 943 | sdev->stat.corrected_errors++; | 1151 | sctx->stat.corrected_errors++; |
| 944 | spin_unlock(&sdev->stat_lock); | 1152 | spin_unlock(&sctx->stat_lock); |
| 945 | printk_ratelimited_in_rcu(KERN_ERR | 1153 | printk_ratelimited_in_rcu(KERN_ERR |
| 946 | "btrfs: fixed up error at logical %llu on dev %s\n", | 1154 | "btrfs: fixed up error at logical %llu on dev %s\n", |
| 947 | (unsigned long long)logical, | 1155 | (unsigned long long)logical, |
| 948 | rcu_str_deref(sdev->dev->name)); | 1156 | rcu_str_deref(dev->name)); |
| 949 | } | 1157 | } |
| 950 | } else { | 1158 | } else { |
| 951 | did_not_correct_error: | 1159 | did_not_correct_error: |
| 952 | spin_lock(&sdev->stat_lock); | 1160 | spin_lock(&sctx->stat_lock); |
| 953 | sdev->stat.uncorrectable_errors++; | 1161 | sctx->stat.uncorrectable_errors++; |
| 954 | spin_unlock(&sdev->stat_lock); | 1162 | spin_unlock(&sctx->stat_lock); |
| 955 | printk_ratelimited_in_rcu(KERN_ERR | 1163 | printk_ratelimited_in_rcu(KERN_ERR |
| 956 | "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n", | 1164 | "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n", |
| 957 | (unsigned long long)logical, | 1165 | (unsigned long long)logical, |
| 958 | rcu_str_deref(sdev->dev->name)); | 1166 | rcu_str_deref(dev->name)); |
| 959 | } | 1167 | } |
| 960 | 1168 | ||
| 961 | out: | 1169 | out: |
| @@ -966,11 +1174,11 @@ out: | |||
| 966 | mirror_index; | 1174 | mirror_index; |
| 967 | int page_index; | 1175 | int page_index; |
| 968 | 1176 | ||
| 969 | for (page_index = 0; page_index < SCRUB_PAGES_PER_BIO; | 1177 | for (page_index = 0; page_index < sblock->page_count; |
| 970 | page_index++) | 1178 | page_index++) { |
| 971 | if (sblock->pagev[page_index].page) | 1179 | sblock->pagev[page_index]->sblock = NULL; |
| 972 | __free_page( | 1180 | scrub_page_put(sblock->pagev[page_index]); |
| 973 | sblock->pagev[page_index].page); | 1181 | } |
| 974 | } | 1182 | } |
| 975 | kfree(sblocks_for_recheck); | 1183 | kfree(sblocks_for_recheck); |
| 976 | } | 1184 | } |
| @@ -978,8 +1186,9 @@ out: | |||
| 978 | return 0; | 1186 | return 0; |
| 979 | } | 1187 | } |
| 980 | 1188 | ||
| 981 | static int scrub_setup_recheck_block(struct scrub_dev *sdev, | 1189 | static int scrub_setup_recheck_block(struct scrub_ctx *sctx, |
| 982 | struct btrfs_mapping_tree *map_tree, | 1190 | struct btrfs_fs_info *fs_info, |
| 1191 | struct scrub_block *original_sblock, | ||
| 983 | u64 length, u64 logical, | 1192 | u64 length, u64 logical, |
| 984 | struct scrub_block *sblocks_for_recheck) | 1193 | struct scrub_block *sblocks_for_recheck) |
| 985 | { | 1194 | { |
| @@ -988,7 +1197,7 @@ static int scrub_setup_recheck_block(struct scrub_dev *sdev, | |||
| 988 | int ret; | 1197 | int ret; |
| 989 | 1198 | ||
| 990 | /* | 1199 | /* |
| 991 | * note: the three members sdev, ref_count and outstanding_pages | 1200 | * note: the two members ref_count and outstanding_pages |
| 992 | * are not used (and not set) in the blocks that are used for | 1201 | * are not used (and not set) in the blocks that are used for |
| 993 | * the recheck procedure | 1202 | * the recheck procedure |
| 994 | */ | 1203 | */ |
| @@ -1003,14 +1212,14 @@ static int scrub_setup_recheck_block(struct scrub_dev *sdev, | |||
| 1003 | * with a length of PAGE_SIZE, each returned stripe | 1212 | * with a length of PAGE_SIZE, each returned stripe |
| 1004 | * represents one mirror | 1213 | * represents one mirror |
| 1005 | */ | 1214 | */ |
| 1006 | ret = btrfs_map_block(map_tree, WRITE, logical, &mapped_length, | 1215 | ret = btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, logical, |
| 1007 | &bbio, 0); | 1216 | &mapped_length, &bbio, 0); |
| 1008 | if (ret || !bbio || mapped_length < sublen) { | 1217 | if (ret || !bbio || mapped_length < sublen) { |
| 1009 | kfree(bbio); | 1218 | kfree(bbio); |
| 1010 | return -EIO; | 1219 | return -EIO; |
| 1011 | } | 1220 | } |
| 1012 | 1221 | ||
| 1013 | BUG_ON(page_index >= SCRUB_PAGES_PER_BIO); | 1222 | BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO); |
| 1014 | for (mirror_index = 0; mirror_index < (int)bbio->num_stripes; | 1223 | for (mirror_index = 0; mirror_index < (int)bbio->num_stripes; |
| 1015 | mirror_index++) { | 1224 | mirror_index++) { |
| 1016 | struct scrub_block *sblock; | 1225 | struct scrub_block *sblock; |
| @@ -1020,21 +1229,31 @@ static int scrub_setup_recheck_block(struct scrub_dev *sdev, | |||
| 1020 | continue; | 1229 | continue; |
| 1021 | 1230 | ||
| 1022 | sblock = sblocks_for_recheck + mirror_index; | 1231 | sblock = sblocks_for_recheck + mirror_index; |
| 1023 | page = sblock->pagev + page_index; | 1232 | sblock->sctx = sctx; |
| 1233 | page = kzalloc(sizeof(*page), GFP_NOFS); | ||
| 1234 | if (!page) { | ||
| 1235 | leave_nomem: | ||
| 1236 | spin_lock(&sctx->stat_lock); | ||
| 1237 | sctx->stat.malloc_errors++; | ||
| 1238 | spin_unlock(&sctx->stat_lock); | ||
| 1239 | kfree(bbio); | ||
| 1240 | return -ENOMEM; | ||
| 1241 | } | ||
| 1242 | scrub_page_get(page); | ||
| 1243 | sblock->pagev[page_index] = page; | ||
| 1024 | page->logical = logical; | 1244 | page->logical = logical; |
| 1025 | page->physical = bbio->stripes[mirror_index].physical; | 1245 | page->physical = bbio->stripes[mirror_index].physical; |
| 1246 | BUG_ON(page_index >= original_sblock->page_count); | ||
| 1247 | page->physical_for_dev_replace = | ||
| 1248 | original_sblock->pagev[page_index]-> | ||
| 1249 | physical_for_dev_replace; | ||
| 1026 | /* for missing devices, dev->bdev is NULL */ | 1250 | /* for missing devices, dev->bdev is NULL */ |
| 1027 | page->dev = bbio->stripes[mirror_index].dev; | 1251 | page->dev = bbio->stripes[mirror_index].dev; |
| 1028 | page->mirror_num = mirror_index + 1; | 1252 | page->mirror_num = mirror_index + 1; |
| 1029 | page->page = alloc_page(GFP_NOFS); | ||
| 1030 | if (!page->page) { | ||
| 1031 | spin_lock(&sdev->stat_lock); | ||
| 1032 | sdev->stat.malloc_errors++; | ||
| 1033 | spin_unlock(&sdev->stat_lock); | ||
| 1034 | kfree(bbio); | ||
| 1035 | return -ENOMEM; | ||
| 1036 | } | ||
| 1037 | sblock->page_count++; | 1253 | sblock->page_count++; |
| 1254 | page->page = alloc_page(GFP_NOFS); | ||
| 1255 | if (!page->page) | ||
| 1256 | goto leave_nomem; | ||
| 1038 | } | 1257 | } |
| 1039 | kfree(bbio); | 1258 | kfree(bbio); |
| 1040 | length -= sublen; | 1259 | length -= sublen; |
| @@ -1052,10 +1271,10 @@ static int scrub_setup_recheck_block(struct scrub_dev *sdev, | |||
| 1052 | * to take those pages that are not errored from all the mirrors so that | 1271 | * to take those pages that are not errored from all the mirrors so that |
| 1053 | * the pages that are errored in the just handled mirror can be repaired. | 1272 | * the pages that are errored in the just handled mirror can be repaired. |
| 1054 | */ | 1273 | */ |
| 1055 | static int scrub_recheck_block(struct btrfs_fs_info *fs_info, | 1274 | static void scrub_recheck_block(struct btrfs_fs_info *fs_info, |
| 1056 | struct scrub_block *sblock, int is_metadata, | 1275 | struct scrub_block *sblock, int is_metadata, |
| 1057 | int have_csum, u8 *csum, u64 generation, | 1276 | int have_csum, u8 *csum, u64 generation, |
| 1058 | u16 csum_size) | 1277 | u16 csum_size) |
| 1059 | { | 1278 | { |
| 1060 | int page_num; | 1279 | int page_num; |
| 1061 | 1280 | ||
| @@ -1065,8 +1284,7 @@ static int scrub_recheck_block(struct btrfs_fs_info *fs_info, | |||
| 1065 | 1284 | ||
| 1066 | for (page_num = 0; page_num < sblock->page_count; page_num++) { | 1285 | for (page_num = 0; page_num < sblock->page_count; page_num++) { |
| 1067 | struct bio *bio; | 1286 | struct bio *bio; |
| 1068 | int ret; | 1287 | struct scrub_page *page = sblock->pagev[page_num]; |
| 1069 | struct scrub_page *page = sblock->pagev + page_num; | ||
| 1070 | DECLARE_COMPLETION_ONSTACK(complete); | 1288 | DECLARE_COMPLETION_ONSTACK(complete); |
| 1071 | 1289 | ||
| 1072 | if (page->dev->bdev == NULL) { | 1290 | if (page->dev->bdev == NULL) { |
| @@ -1075,20 +1293,19 @@ static int scrub_recheck_block(struct btrfs_fs_info *fs_info, | |||
| 1075 | continue; | 1293 | continue; |
| 1076 | } | 1294 | } |
| 1077 | 1295 | ||
| 1078 | BUG_ON(!page->page); | 1296 | WARN_ON(!page->page); |
| 1079 | bio = bio_alloc(GFP_NOFS, 1); | 1297 | bio = bio_alloc(GFP_NOFS, 1); |
| 1080 | if (!bio) | 1298 | if (!bio) { |
| 1081 | return -EIO; | 1299 | page->io_error = 1; |
| 1300 | sblock->no_io_error_seen = 0; | ||
| 1301 | continue; | ||
| 1302 | } | ||
| 1082 | bio->bi_bdev = page->dev->bdev; | 1303 | bio->bi_bdev = page->dev->bdev; |
| 1083 | bio->bi_sector = page->physical >> 9; | 1304 | bio->bi_sector = page->physical >> 9; |
| 1084 | bio->bi_end_io = scrub_complete_bio_end_io; | 1305 | bio->bi_end_io = scrub_complete_bio_end_io; |
| 1085 | bio->bi_private = &complete; | 1306 | bio->bi_private = &complete; |
| 1086 | 1307 | ||
| 1087 | ret = bio_add_page(bio, page->page, PAGE_SIZE, 0); | 1308 | bio_add_page(bio, page->page, PAGE_SIZE, 0); |
| 1088 | if (PAGE_SIZE != ret) { | ||
| 1089 | bio_put(bio); | ||
| 1090 | return -EIO; | ||
| 1091 | } | ||
| 1092 | btrfsic_submit_bio(READ, bio); | 1309 | btrfsic_submit_bio(READ, bio); |
| 1093 | 1310 | ||
| 1094 | /* this will also unplug the queue */ | 1311 | /* this will also unplug the queue */ |
| @@ -1105,7 +1322,7 @@ static int scrub_recheck_block(struct btrfs_fs_info *fs_info, | |||
| 1105 | have_csum, csum, generation, | 1322 | have_csum, csum, generation, |
| 1106 | csum_size); | 1323 | csum_size); |
| 1107 | 1324 | ||
| 1108 | return 0; | 1325 | return; |
| 1109 | } | 1326 | } |
| 1110 | 1327 | ||
| 1111 | static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, | 1328 | static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, |
| @@ -1120,14 +1337,14 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, | |||
| 1120 | struct btrfs_root *root = fs_info->extent_root; | 1337 | struct btrfs_root *root = fs_info->extent_root; |
| 1121 | void *mapped_buffer; | 1338 | void *mapped_buffer; |
| 1122 | 1339 | ||
| 1123 | BUG_ON(!sblock->pagev[0].page); | 1340 | WARN_ON(!sblock->pagev[0]->page); |
| 1124 | if (is_metadata) { | 1341 | if (is_metadata) { |
| 1125 | struct btrfs_header *h; | 1342 | struct btrfs_header *h; |
| 1126 | 1343 | ||
| 1127 | mapped_buffer = kmap_atomic(sblock->pagev[0].page); | 1344 | mapped_buffer = kmap_atomic(sblock->pagev[0]->page); |
| 1128 | h = (struct btrfs_header *)mapped_buffer; | 1345 | h = (struct btrfs_header *)mapped_buffer; |
| 1129 | 1346 | ||
| 1130 | if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr) || | 1347 | if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr) || |
| 1131 | memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) || | 1348 | memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) || |
| 1132 | memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, | 1349 | memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, |
| 1133 | BTRFS_UUID_SIZE)) { | 1350 | BTRFS_UUID_SIZE)) { |
| @@ -1141,7 +1358,7 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, | |||
| 1141 | if (!have_csum) | 1358 | if (!have_csum) |
| 1142 | return; | 1359 | return; |
| 1143 | 1360 | ||
| 1144 | mapped_buffer = kmap_atomic(sblock->pagev[0].page); | 1361 | mapped_buffer = kmap_atomic(sblock->pagev[0]->page); |
| 1145 | } | 1362 | } |
| 1146 | 1363 | ||
| 1147 | for (page_num = 0;;) { | 1364 | for (page_num = 0;;) { |
| @@ -1157,9 +1374,9 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info, | |||
| 1157 | page_num++; | 1374 | page_num++; |
| 1158 | if (page_num >= sblock->page_count) | 1375 | if (page_num >= sblock->page_count) |
| 1159 | break; | 1376 | break; |
| 1160 | BUG_ON(!sblock->pagev[page_num].page); | 1377 | WARN_ON(!sblock->pagev[page_num]->page); |
| 1161 | 1378 | ||
| 1162 | mapped_buffer = kmap_atomic(sblock->pagev[page_num].page); | 1379 | mapped_buffer = kmap_atomic(sblock->pagev[page_num]->page); |
| 1163 | } | 1380 | } |
| 1164 | 1381 | ||
| 1165 | btrfs_csum_final(crc, calculated_csum); | 1382 | btrfs_csum_final(crc, calculated_csum); |
| @@ -1197,17 +1414,23 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, | |||
| 1197 | struct scrub_block *sblock_good, | 1414 | struct scrub_block *sblock_good, |
| 1198 | int page_num, int force_write) | 1415 | int page_num, int force_write) |
| 1199 | { | 1416 | { |
| 1200 | struct scrub_page *page_bad = sblock_bad->pagev + page_num; | 1417 | struct scrub_page *page_bad = sblock_bad->pagev[page_num]; |
| 1201 | struct scrub_page *page_good = sblock_good->pagev + page_num; | 1418 | struct scrub_page *page_good = sblock_good->pagev[page_num]; |
| 1202 | 1419 | ||
| 1203 | BUG_ON(sblock_bad->pagev[page_num].page == NULL); | 1420 | BUG_ON(page_bad->page == NULL); |
| 1204 | BUG_ON(sblock_good->pagev[page_num].page == NULL); | 1421 | BUG_ON(page_good->page == NULL); |
| 1205 | if (force_write || sblock_bad->header_error || | 1422 | if (force_write || sblock_bad->header_error || |
| 1206 | sblock_bad->checksum_error || page_bad->io_error) { | 1423 | sblock_bad->checksum_error || page_bad->io_error) { |
| 1207 | struct bio *bio; | 1424 | struct bio *bio; |
| 1208 | int ret; | 1425 | int ret; |
| 1209 | DECLARE_COMPLETION_ONSTACK(complete); | 1426 | DECLARE_COMPLETION_ONSTACK(complete); |
| 1210 | 1427 | ||
| 1428 | if (!page_bad->dev->bdev) { | ||
| 1429 | printk_ratelimited(KERN_WARNING | ||
| 1430 | "btrfs: scrub_repair_page_from_good_copy(bdev == NULL) is unexpected!\n"); | ||
| 1431 | return -EIO; | ||
| 1432 | } | ||
| 1433 | |||
| 1211 | bio = bio_alloc(GFP_NOFS, 1); | 1434 | bio = bio_alloc(GFP_NOFS, 1); |
| 1212 | if (!bio) | 1435 | if (!bio) |
| 1213 | return -EIO; | 1436 | return -EIO; |
| @@ -1228,6 +1451,9 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, | |||
| 1228 | if (!bio_flagged(bio, BIO_UPTODATE)) { | 1451 | if (!bio_flagged(bio, BIO_UPTODATE)) { |
| 1229 | btrfs_dev_stat_inc_and_print(page_bad->dev, | 1452 | btrfs_dev_stat_inc_and_print(page_bad->dev, |
| 1230 | BTRFS_DEV_STAT_WRITE_ERRS); | 1453 | BTRFS_DEV_STAT_WRITE_ERRS); |
| 1454 | btrfs_dev_replace_stats_inc( | ||
| 1455 | &sblock_bad->sctx->dev_root->fs_info-> | ||
| 1456 | dev_replace.num_write_errors); | ||
| 1231 | bio_put(bio); | 1457 | bio_put(bio); |
| 1232 | return -EIO; | 1458 | return -EIO; |
| 1233 | } | 1459 | } |
| @@ -1237,13 +1463,174 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, | |||
| 1237 | return 0; | 1463 | return 0; |
| 1238 | } | 1464 | } |
| 1239 | 1465 | ||
| 1240 | static void scrub_checksum(struct scrub_block *sblock) | 1466 | static void scrub_write_block_to_dev_replace(struct scrub_block *sblock) |
| 1467 | { | ||
| 1468 | int page_num; | ||
| 1469 | |||
| 1470 | for (page_num = 0; page_num < sblock->page_count; page_num++) { | ||
| 1471 | int ret; | ||
| 1472 | |||
| 1473 | ret = scrub_write_page_to_dev_replace(sblock, page_num); | ||
| 1474 | if (ret) | ||
| 1475 | btrfs_dev_replace_stats_inc( | ||
| 1476 | &sblock->sctx->dev_root->fs_info->dev_replace. | ||
| 1477 | num_write_errors); | ||
| 1478 | } | ||
| 1479 | } | ||
| 1480 | |||
| 1481 | static int scrub_write_page_to_dev_replace(struct scrub_block *sblock, | ||
| 1482 | int page_num) | ||
| 1483 | { | ||
| 1484 | struct scrub_page *spage = sblock->pagev[page_num]; | ||
| 1485 | |||
| 1486 | BUG_ON(spage->page == NULL); | ||
| 1487 | if (spage->io_error) { | ||
| 1488 | void *mapped_buffer = kmap_atomic(spage->page); | ||
| 1489 | |||
| 1490 | memset(mapped_buffer, 0, PAGE_CACHE_SIZE); | ||
| 1491 | flush_dcache_page(spage->page); | ||
| 1492 | kunmap_atomic(mapped_buffer); | ||
| 1493 | } | ||
| 1494 | return scrub_add_page_to_wr_bio(sblock->sctx, spage); | ||
| 1495 | } | ||
| 1496 | |||
| 1497 | static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx, | ||
| 1498 | struct scrub_page *spage) | ||
| 1499 | { | ||
| 1500 | struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx; | ||
| 1501 | struct scrub_bio *sbio; | ||
| 1502 | int ret; | ||
| 1503 | |||
| 1504 | mutex_lock(&wr_ctx->wr_lock); | ||
| 1505 | again: | ||
| 1506 | if (!wr_ctx->wr_curr_bio) { | ||
| 1507 | wr_ctx->wr_curr_bio = kzalloc(sizeof(*wr_ctx->wr_curr_bio), | ||
| 1508 | GFP_NOFS); | ||
| 1509 | if (!wr_ctx->wr_curr_bio) { | ||
| 1510 | mutex_unlock(&wr_ctx->wr_lock); | ||
| 1511 | return -ENOMEM; | ||
| 1512 | } | ||
| 1513 | wr_ctx->wr_curr_bio->sctx = sctx; | ||
| 1514 | wr_ctx->wr_curr_bio->page_count = 0; | ||
| 1515 | } | ||
| 1516 | sbio = wr_ctx->wr_curr_bio; | ||
| 1517 | if (sbio->page_count == 0) { | ||
| 1518 | struct bio *bio; | ||
| 1519 | |||
| 1520 | sbio->physical = spage->physical_for_dev_replace; | ||
| 1521 | sbio->logical = spage->logical; | ||
| 1522 | sbio->dev = wr_ctx->tgtdev; | ||
| 1523 | bio = sbio->bio; | ||
| 1524 | if (!bio) { | ||
| 1525 | bio = bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio); | ||
| 1526 | if (!bio) { | ||
| 1527 | mutex_unlock(&wr_ctx->wr_lock); | ||
| 1528 | return -ENOMEM; | ||
| 1529 | } | ||
| 1530 | sbio->bio = bio; | ||
| 1531 | } | ||
| 1532 | |||
| 1533 | bio->bi_private = sbio; | ||
| 1534 | bio->bi_end_io = scrub_wr_bio_end_io; | ||
| 1535 | bio->bi_bdev = sbio->dev->bdev; | ||
| 1536 | bio->bi_sector = sbio->physical >> 9; | ||
| 1537 | sbio->err = 0; | ||
| 1538 | } else if (sbio->physical + sbio->page_count * PAGE_SIZE != | ||
| 1539 | spage->physical_for_dev_replace || | ||
| 1540 | sbio->logical + sbio->page_count * PAGE_SIZE != | ||
| 1541 | spage->logical) { | ||
| 1542 | scrub_wr_submit(sctx); | ||
| 1543 | goto again; | ||
| 1544 | } | ||
| 1545 | |||
| 1546 | ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0); | ||
| 1547 | if (ret != PAGE_SIZE) { | ||
| 1548 | if (sbio->page_count < 1) { | ||
| 1549 | bio_put(sbio->bio); | ||
| 1550 | sbio->bio = NULL; | ||
| 1551 | mutex_unlock(&wr_ctx->wr_lock); | ||
| 1552 | return -EIO; | ||
| 1553 | } | ||
| 1554 | scrub_wr_submit(sctx); | ||
| 1555 | goto again; | ||
| 1556 | } | ||
| 1557 | |||
| 1558 | sbio->pagev[sbio->page_count] = spage; | ||
| 1559 | scrub_page_get(spage); | ||
| 1560 | sbio->page_count++; | ||
| 1561 | if (sbio->page_count == wr_ctx->pages_per_wr_bio) | ||
| 1562 | scrub_wr_submit(sctx); | ||
| 1563 | mutex_unlock(&wr_ctx->wr_lock); | ||
| 1564 | |||
| 1565 | return 0; | ||
| 1566 | } | ||
| 1567 | |||
| 1568 | static void scrub_wr_submit(struct scrub_ctx *sctx) | ||
| 1569 | { | ||
| 1570 | struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx; | ||
| 1571 | struct scrub_bio *sbio; | ||
| 1572 | |||
| 1573 | if (!wr_ctx->wr_curr_bio) | ||
| 1574 | return; | ||
| 1575 | |||
| 1576 | sbio = wr_ctx->wr_curr_bio; | ||
| 1577 | wr_ctx->wr_curr_bio = NULL; | ||
| 1578 | WARN_ON(!sbio->bio->bi_bdev); | ||
| 1579 | scrub_pending_bio_inc(sctx); | ||
| 1580 | /* process all writes in a single worker thread. Then the block layer | ||
| 1581 | * orders the requests before sending them to the driver which | ||
| 1582 | * doubled the write performance on spinning disks when measured | ||
| 1583 | * with Linux 3.5 */ | ||
| 1584 | btrfsic_submit_bio(WRITE, sbio->bio); | ||
| 1585 | } | ||
| 1586 | |||
| 1587 | static void scrub_wr_bio_end_io(struct bio *bio, int err) | ||
| 1588 | { | ||
| 1589 | struct scrub_bio *sbio = bio->bi_private; | ||
| 1590 | struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info; | ||
| 1591 | |||
| 1592 | sbio->err = err; | ||
| 1593 | sbio->bio = bio; | ||
| 1594 | |||
| 1595 | sbio->work.func = scrub_wr_bio_end_io_worker; | ||
| 1596 | btrfs_queue_worker(&fs_info->scrub_wr_completion_workers, &sbio->work); | ||
| 1597 | } | ||
| 1598 | |||
| 1599 | static void scrub_wr_bio_end_io_worker(struct btrfs_work *work) | ||
| 1600 | { | ||
| 1601 | struct scrub_bio *sbio = container_of(work, struct scrub_bio, work); | ||
| 1602 | struct scrub_ctx *sctx = sbio->sctx; | ||
| 1603 | int i; | ||
| 1604 | |||
| 1605 | WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO); | ||
| 1606 | if (sbio->err) { | ||
| 1607 | struct btrfs_dev_replace *dev_replace = | ||
| 1608 | &sbio->sctx->dev_root->fs_info->dev_replace; | ||
| 1609 | |||
| 1610 | for (i = 0; i < sbio->page_count; i++) { | ||
| 1611 | struct scrub_page *spage = sbio->pagev[i]; | ||
| 1612 | |||
| 1613 | spage->io_error = 1; | ||
| 1614 | btrfs_dev_replace_stats_inc(&dev_replace-> | ||
| 1615 | num_write_errors); | ||
| 1616 | } | ||
| 1617 | } | ||
| 1618 | |||
| 1619 | for (i = 0; i < sbio->page_count; i++) | ||
| 1620 | scrub_page_put(sbio->pagev[i]); | ||
| 1621 | |||
| 1622 | bio_put(sbio->bio); | ||
| 1623 | kfree(sbio); | ||
| 1624 | scrub_pending_bio_dec(sctx); | ||
| 1625 | } | ||
| 1626 | |||
| 1627 | static int scrub_checksum(struct scrub_block *sblock) | ||
| 1241 | { | 1628 | { |
| 1242 | u64 flags; | 1629 | u64 flags; |
| 1243 | int ret; | 1630 | int ret; |
| 1244 | 1631 | ||
| 1245 | BUG_ON(sblock->page_count < 1); | 1632 | WARN_ON(sblock->page_count < 1); |
| 1246 | flags = sblock->pagev[0].flags; | 1633 | flags = sblock->pagev[0]->flags; |
| 1247 | ret = 0; | 1634 | ret = 0; |
| 1248 | if (flags & BTRFS_EXTENT_FLAG_DATA) | 1635 | if (flags & BTRFS_EXTENT_FLAG_DATA) |
| 1249 | ret = scrub_checksum_data(sblock); | 1636 | ret = scrub_checksum_data(sblock); |
| @@ -1255,30 +1642,32 @@ static void scrub_checksum(struct scrub_block *sblock) | |||
| 1255 | WARN_ON(1); | 1642 | WARN_ON(1); |
| 1256 | if (ret) | 1643 | if (ret) |
| 1257 | scrub_handle_errored_block(sblock); | 1644 | scrub_handle_errored_block(sblock); |
| 1645 | |||
| 1646 | return ret; | ||
| 1258 | } | 1647 | } |
| 1259 | 1648 | ||
| 1260 | static int scrub_checksum_data(struct scrub_block *sblock) | 1649 | static int scrub_checksum_data(struct scrub_block *sblock) |
| 1261 | { | 1650 | { |
| 1262 | struct scrub_dev *sdev = sblock->sdev; | 1651 | struct scrub_ctx *sctx = sblock->sctx; |
| 1263 | u8 csum[BTRFS_CSUM_SIZE]; | 1652 | u8 csum[BTRFS_CSUM_SIZE]; |
| 1264 | u8 *on_disk_csum; | 1653 | u8 *on_disk_csum; |
| 1265 | struct page *page; | 1654 | struct page *page; |
| 1266 | void *buffer; | 1655 | void *buffer; |
| 1267 | u32 crc = ~(u32)0; | 1656 | u32 crc = ~(u32)0; |
| 1268 | int fail = 0; | 1657 | int fail = 0; |
| 1269 | struct btrfs_root *root = sdev->dev->dev_root; | 1658 | struct btrfs_root *root = sctx->dev_root; |
| 1270 | u64 len; | 1659 | u64 len; |
| 1271 | int index; | 1660 | int index; |
| 1272 | 1661 | ||
| 1273 | BUG_ON(sblock->page_count < 1); | 1662 | BUG_ON(sblock->page_count < 1); |
| 1274 | if (!sblock->pagev[0].have_csum) | 1663 | if (!sblock->pagev[0]->have_csum) |
| 1275 | return 0; | 1664 | return 0; |
| 1276 | 1665 | ||
| 1277 | on_disk_csum = sblock->pagev[0].csum; | 1666 | on_disk_csum = sblock->pagev[0]->csum; |
| 1278 | page = sblock->pagev[0].page; | 1667 | page = sblock->pagev[0]->page; |
| 1279 | buffer = kmap_atomic(page); | 1668 | buffer = kmap_atomic(page); |
| 1280 | 1669 | ||
| 1281 | len = sdev->sectorsize; | 1670 | len = sctx->sectorsize; |
| 1282 | index = 0; | 1671 | index = 0; |
| 1283 | for (;;) { | 1672 | for (;;) { |
| 1284 | u64 l = min_t(u64, len, PAGE_SIZE); | 1673 | u64 l = min_t(u64, len, PAGE_SIZE); |
| @@ -1290,13 +1679,13 @@ static int scrub_checksum_data(struct scrub_block *sblock) | |||
| 1290 | break; | 1679 | break; |
| 1291 | index++; | 1680 | index++; |
| 1292 | BUG_ON(index >= sblock->page_count); | 1681 | BUG_ON(index >= sblock->page_count); |
| 1293 | BUG_ON(!sblock->pagev[index].page); | 1682 | BUG_ON(!sblock->pagev[index]->page); |
| 1294 | page = sblock->pagev[index].page; | 1683 | page = sblock->pagev[index]->page; |
| 1295 | buffer = kmap_atomic(page); | 1684 | buffer = kmap_atomic(page); |
| 1296 | } | 1685 | } |
| 1297 | 1686 | ||
| 1298 | btrfs_csum_final(crc, csum); | 1687 | btrfs_csum_final(crc, csum); |
| 1299 | if (memcmp(csum, on_disk_csum, sdev->csum_size)) | 1688 | if (memcmp(csum, on_disk_csum, sctx->csum_size)) |
| 1300 | fail = 1; | 1689 | fail = 1; |
| 1301 | 1690 | ||
| 1302 | return fail; | 1691 | return fail; |
| @@ -1304,9 +1693,9 @@ static int scrub_checksum_data(struct scrub_block *sblock) | |||
| 1304 | 1693 | ||
| 1305 | static int scrub_checksum_tree_block(struct scrub_block *sblock) | 1694 | static int scrub_checksum_tree_block(struct scrub_block *sblock) |
| 1306 | { | 1695 | { |
| 1307 | struct scrub_dev *sdev = sblock->sdev; | 1696 | struct scrub_ctx *sctx = sblock->sctx; |
| 1308 | struct btrfs_header *h; | 1697 | struct btrfs_header *h; |
| 1309 | struct btrfs_root *root = sdev->dev->dev_root; | 1698 | struct btrfs_root *root = sctx->dev_root; |
| 1310 | struct btrfs_fs_info *fs_info = root->fs_info; | 1699 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 1311 | u8 calculated_csum[BTRFS_CSUM_SIZE]; | 1700 | u8 calculated_csum[BTRFS_CSUM_SIZE]; |
| 1312 | u8 on_disk_csum[BTRFS_CSUM_SIZE]; | 1701 | u8 on_disk_csum[BTRFS_CSUM_SIZE]; |
| @@ -1321,10 +1710,10 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) | |||
| 1321 | int index; | 1710 | int index; |
| 1322 | 1711 | ||
| 1323 | BUG_ON(sblock->page_count < 1); | 1712 | BUG_ON(sblock->page_count < 1); |
| 1324 | page = sblock->pagev[0].page; | 1713 | page = sblock->pagev[0]->page; |
| 1325 | mapped_buffer = kmap_atomic(page); | 1714 | mapped_buffer = kmap_atomic(page); |
| 1326 | h = (struct btrfs_header *)mapped_buffer; | 1715 | h = (struct btrfs_header *)mapped_buffer; |
| 1327 | memcpy(on_disk_csum, h->csum, sdev->csum_size); | 1716 | memcpy(on_disk_csum, h->csum, sctx->csum_size); |
| 1328 | 1717 | ||
| 1329 | /* | 1718 | /* |
| 1330 | * we don't use the getter functions here, as we | 1719 | * we don't use the getter functions here, as we |
| @@ -1332,10 +1721,10 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) | |||
| 1332 | * b) the page is already kmapped | 1721 | * b) the page is already kmapped |
| 1333 | */ | 1722 | */ |
| 1334 | 1723 | ||
| 1335 | if (sblock->pagev[0].logical != le64_to_cpu(h->bytenr)) | 1724 | if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr)) |
| 1336 | ++fail; | 1725 | ++fail; |
| 1337 | 1726 | ||
| 1338 | if (sblock->pagev[0].generation != le64_to_cpu(h->generation)) | 1727 | if (sblock->pagev[0]->generation != le64_to_cpu(h->generation)) |
| 1339 | ++fail; | 1728 | ++fail; |
| 1340 | 1729 | ||
| 1341 | if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) | 1730 | if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) |
| @@ -1345,8 +1734,8 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) | |||
| 1345 | BTRFS_UUID_SIZE)) | 1734 | BTRFS_UUID_SIZE)) |
| 1346 | ++fail; | 1735 | ++fail; |
| 1347 | 1736 | ||
| 1348 | BUG_ON(sdev->nodesize != sdev->leafsize); | 1737 | WARN_ON(sctx->nodesize != sctx->leafsize); |
| 1349 | len = sdev->nodesize - BTRFS_CSUM_SIZE; | 1738 | len = sctx->nodesize - BTRFS_CSUM_SIZE; |
| 1350 | mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; | 1739 | mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; |
| 1351 | p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE; | 1740 | p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE; |
| 1352 | index = 0; | 1741 | index = 0; |
| @@ -1360,15 +1749,15 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) | |||
| 1360 | break; | 1749 | break; |
| 1361 | index++; | 1750 | index++; |
| 1362 | BUG_ON(index >= sblock->page_count); | 1751 | BUG_ON(index >= sblock->page_count); |
| 1363 | BUG_ON(!sblock->pagev[index].page); | 1752 | BUG_ON(!sblock->pagev[index]->page); |
| 1364 | page = sblock->pagev[index].page; | 1753 | page = sblock->pagev[index]->page; |
| 1365 | mapped_buffer = kmap_atomic(page); | 1754 | mapped_buffer = kmap_atomic(page); |
| 1366 | mapped_size = PAGE_SIZE; | 1755 | mapped_size = PAGE_SIZE; |
| 1367 | p = mapped_buffer; | 1756 | p = mapped_buffer; |
| 1368 | } | 1757 | } |
| 1369 | 1758 | ||
| 1370 | btrfs_csum_final(crc, calculated_csum); | 1759 | btrfs_csum_final(crc, calculated_csum); |
| 1371 | if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size)) | 1760 | if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size)) |
| 1372 | ++crc_fail; | 1761 | ++crc_fail; |
| 1373 | 1762 | ||
| 1374 | return fail || crc_fail; | 1763 | return fail || crc_fail; |
| @@ -1377,8 +1766,8 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) | |||
| 1377 | static int scrub_checksum_super(struct scrub_block *sblock) | 1766 | static int scrub_checksum_super(struct scrub_block *sblock) |
| 1378 | { | 1767 | { |
| 1379 | struct btrfs_super_block *s; | 1768 | struct btrfs_super_block *s; |
| 1380 | struct scrub_dev *sdev = sblock->sdev; | 1769 | struct scrub_ctx *sctx = sblock->sctx; |
| 1381 | struct btrfs_root *root = sdev->dev->dev_root; | 1770 | struct btrfs_root *root = sctx->dev_root; |
| 1382 | struct btrfs_fs_info *fs_info = root->fs_info; | 1771 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 1383 | u8 calculated_csum[BTRFS_CSUM_SIZE]; | 1772 | u8 calculated_csum[BTRFS_CSUM_SIZE]; |
| 1384 | u8 on_disk_csum[BTRFS_CSUM_SIZE]; | 1773 | u8 on_disk_csum[BTRFS_CSUM_SIZE]; |
| @@ -1393,15 +1782,15 @@ static int scrub_checksum_super(struct scrub_block *sblock) | |||
| 1393 | int index; | 1782 | int index; |
| 1394 | 1783 | ||
| 1395 | BUG_ON(sblock->page_count < 1); | 1784 | BUG_ON(sblock->page_count < 1); |
| 1396 | page = sblock->pagev[0].page; | 1785 | page = sblock->pagev[0]->page; |
| 1397 | mapped_buffer = kmap_atomic(page); | 1786 | mapped_buffer = kmap_atomic(page); |
| 1398 | s = (struct btrfs_super_block *)mapped_buffer; | 1787 | s = (struct btrfs_super_block *)mapped_buffer; |
| 1399 | memcpy(on_disk_csum, s->csum, sdev->csum_size); | 1788 | memcpy(on_disk_csum, s->csum, sctx->csum_size); |
| 1400 | 1789 | ||
| 1401 | if (sblock->pagev[0].logical != le64_to_cpu(s->bytenr)) | 1790 | if (sblock->pagev[0]->logical != le64_to_cpu(s->bytenr)) |
| 1402 | ++fail_cor; | 1791 | ++fail_cor; |
| 1403 | 1792 | ||
| 1404 | if (sblock->pagev[0].generation != le64_to_cpu(s->generation)) | 1793 | if (sblock->pagev[0]->generation != le64_to_cpu(s->generation)) |
| 1405 | ++fail_gen; | 1794 | ++fail_gen; |
| 1406 | 1795 | ||
| 1407 | if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) | 1796 | if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) |
| @@ -1421,15 +1810,15 @@ static int scrub_checksum_super(struct scrub_block *sblock) | |||
| 1421 | break; | 1810 | break; |
| 1422 | index++; | 1811 | index++; |
| 1423 | BUG_ON(index >= sblock->page_count); | 1812 | BUG_ON(index >= sblock->page_count); |
| 1424 | BUG_ON(!sblock->pagev[index].page); | 1813 | BUG_ON(!sblock->pagev[index]->page); |
| 1425 | page = sblock->pagev[index].page; | 1814 | page = sblock->pagev[index]->page; |
| 1426 | mapped_buffer = kmap_atomic(page); | 1815 | mapped_buffer = kmap_atomic(page); |
| 1427 | mapped_size = PAGE_SIZE; | 1816 | mapped_size = PAGE_SIZE; |
| 1428 | p = mapped_buffer; | 1817 | p = mapped_buffer; |
| 1429 | } | 1818 | } |
| 1430 | 1819 | ||
| 1431 | btrfs_csum_final(crc, calculated_csum); | 1820 | btrfs_csum_final(crc, calculated_csum); |
| 1432 | if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size)) | 1821 | if (memcmp(calculated_csum, on_disk_csum, sctx->csum_size)) |
| 1433 | ++fail_cor; | 1822 | ++fail_cor; |
| 1434 | 1823 | ||
| 1435 | if (fail_cor + fail_gen) { | 1824 | if (fail_cor + fail_gen) { |
| @@ -1438,14 +1827,14 @@ static int scrub_checksum_super(struct scrub_block *sblock) | |||
| 1438 | * They will get written with the next transaction commit | 1827 | * They will get written with the next transaction commit |
| 1439 | * anyway | 1828 | * anyway |
| 1440 | */ | 1829 | */ |
| 1441 | spin_lock(&sdev->stat_lock); | 1830 | spin_lock(&sctx->stat_lock); |
| 1442 | ++sdev->stat.super_errors; | 1831 | ++sctx->stat.super_errors; |
| 1443 | spin_unlock(&sdev->stat_lock); | 1832 | spin_unlock(&sctx->stat_lock); |
| 1444 | if (fail_cor) | 1833 | if (fail_cor) |
| 1445 | btrfs_dev_stat_inc_and_print(sdev->dev, | 1834 | btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev, |
| 1446 | BTRFS_DEV_STAT_CORRUPTION_ERRS); | 1835 | BTRFS_DEV_STAT_CORRUPTION_ERRS); |
| 1447 | else | 1836 | else |
| 1448 | btrfs_dev_stat_inc_and_print(sdev->dev, | 1837 | btrfs_dev_stat_inc_and_print(sblock->pagev[0]->dev, |
| 1449 | BTRFS_DEV_STAT_GENERATION_ERRS); | 1838 | BTRFS_DEV_STAT_GENERATION_ERRS); |
| 1450 | } | 1839 | } |
| 1451 | 1840 | ||
| @@ -1463,28 +1852,54 @@ static void scrub_block_put(struct scrub_block *sblock) | |||
| 1463 | int i; | 1852 | int i; |
| 1464 | 1853 | ||
| 1465 | for (i = 0; i < sblock->page_count; i++) | 1854 | for (i = 0; i < sblock->page_count; i++) |
| 1466 | if (sblock->pagev[i].page) | 1855 | scrub_page_put(sblock->pagev[i]); |
| 1467 | __free_page(sblock->pagev[i].page); | ||
| 1468 | kfree(sblock); | 1856 | kfree(sblock); |
| 1469 | } | 1857 | } |
| 1470 | } | 1858 | } |
| 1471 | 1859 | ||
| 1472 | static void scrub_submit(struct scrub_dev *sdev) | 1860 | static void scrub_page_get(struct scrub_page *spage) |
| 1861 | { | ||
| 1862 | atomic_inc(&spage->ref_count); | ||
| 1863 | } | ||
| 1864 | |||
| 1865 | static void scrub_page_put(struct scrub_page *spage) | ||
| 1866 | { | ||
| 1867 | if (atomic_dec_and_test(&spage->ref_count)) { | ||
| 1868 | if (spage->page) | ||
| 1869 | __free_page(spage->page); | ||
| 1870 | kfree(spage); | ||
| 1871 | } | ||
| 1872 | } | ||
| 1873 | |||
| 1874 | static void scrub_submit(struct scrub_ctx *sctx) | ||
| 1473 | { | 1875 | { |
| 1474 | struct scrub_bio *sbio; | 1876 | struct scrub_bio *sbio; |
| 1475 | 1877 | ||
| 1476 | if (sdev->curr == -1) | 1878 | if (sctx->curr == -1) |
| 1477 | return; | 1879 | return; |
| 1478 | 1880 | ||
| 1479 | sbio = sdev->bios[sdev->curr]; | 1881 | sbio = sctx->bios[sctx->curr]; |
| 1480 | sdev->curr = -1; | 1882 | sctx->curr = -1; |
| 1481 | atomic_inc(&sdev->in_flight); | 1883 | scrub_pending_bio_inc(sctx); |
| 1482 | 1884 | ||
| 1483 | btrfsic_submit_bio(READ, sbio->bio); | 1885 | if (!sbio->bio->bi_bdev) { |
| 1886 | /* | ||
| 1887 | * this case should not happen. If btrfs_map_block() is | ||
| 1888 | * wrong, it could happen for dev-replace operations on | ||
| 1889 | * missing devices when no mirrors are available, but in | ||
| 1890 | * this case it should already fail the mount. | ||
| 1891 | * This case is handled correctly (but _very_ slowly). | ||
| 1892 | */ | ||
| 1893 | printk_ratelimited(KERN_WARNING | ||
| 1894 | "btrfs: scrub_submit(bio bdev == NULL) is unexpected!\n"); | ||
| 1895 | bio_endio(sbio->bio, -EIO); | ||
| 1896 | } else { | ||
| 1897 | btrfsic_submit_bio(READ, sbio->bio); | ||
| 1898 | } | ||
| 1484 | } | 1899 | } |
| 1485 | 1900 | ||
| 1486 | static int scrub_add_page_to_bio(struct scrub_dev *sdev, | 1901 | static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx, |
| 1487 | struct scrub_page *spage) | 1902 | struct scrub_page *spage) |
| 1488 | { | 1903 | { |
| 1489 | struct scrub_block *sblock = spage->sblock; | 1904 | struct scrub_block *sblock = spage->sblock; |
| 1490 | struct scrub_bio *sbio; | 1905 | struct scrub_bio *sbio; |
| @@ -1494,28 +1909,29 @@ again: | |||
| 1494 | /* | 1909 | /* |
| 1495 | * grab a fresh bio or wait for one to become available | 1910 | * grab a fresh bio or wait for one to become available |
| 1496 | */ | 1911 | */ |
| 1497 | while (sdev->curr == -1) { | 1912 | while (sctx->curr == -1) { |
| 1498 | spin_lock(&sdev->list_lock); | 1913 | spin_lock(&sctx->list_lock); |
| 1499 | sdev->curr = sdev->first_free; | 1914 | sctx->curr = sctx->first_free; |
| 1500 | if (sdev->curr != -1) { | 1915 | if (sctx->curr != -1) { |
| 1501 | sdev->first_free = sdev->bios[sdev->curr]->next_free; | 1916 | sctx->first_free = sctx->bios[sctx->curr]->next_free; |
| 1502 | sdev->bios[sdev->curr]->next_free = -1; | 1917 | sctx->bios[sctx->curr]->next_free = -1; |
| 1503 | sdev->bios[sdev->curr]->page_count = 0; | 1918 | sctx->bios[sctx->curr]->page_count = 0; |
| 1504 | spin_unlock(&sdev->list_lock); | 1919 | spin_unlock(&sctx->list_lock); |
| 1505 | } else { | 1920 | } else { |
| 1506 | spin_unlock(&sdev->list_lock); | 1921 | spin_unlock(&sctx->list_lock); |
| 1507 | wait_event(sdev->list_wait, sdev->first_free != -1); | 1922 | wait_event(sctx->list_wait, sctx->first_free != -1); |
| 1508 | } | 1923 | } |
| 1509 | } | 1924 | } |
| 1510 | sbio = sdev->bios[sdev->curr]; | 1925 | sbio = sctx->bios[sctx->curr]; |
| 1511 | if (sbio->page_count == 0) { | 1926 | if (sbio->page_count == 0) { |
| 1512 | struct bio *bio; | 1927 | struct bio *bio; |
| 1513 | 1928 | ||
| 1514 | sbio->physical = spage->physical; | 1929 | sbio->physical = spage->physical; |
| 1515 | sbio->logical = spage->logical; | 1930 | sbio->logical = spage->logical; |
| 1931 | sbio->dev = spage->dev; | ||
| 1516 | bio = sbio->bio; | 1932 | bio = sbio->bio; |
| 1517 | if (!bio) { | 1933 | if (!bio) { |
| 1518 | bio = bio_alloc(GFP_NOFS, sdev->pages_per_bio); | 1934 | bio = bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio); |
| 1519 | if (!bio) | 1935 | if (!bio) |
| 1520 | return -ENOMEM; | 1936 | return -ENOMEM; |
| 1521 | sbio->bio = bio; | 1937 | sbio->bio = bio; |
| @@ -1523,14 +1939,15 @@ again: | |||
| 1523 | 1939 | ||
| 1524 | bio->bi_private = sbio; | 1940 | bio->bi_private = sbio; |
| 1525 | bio->bi_end_io = scrub_bio_end_io; | 1941 | bio->bi_end_io = scrub_bio_end_io; |
| 1526 | bio->bi_bdev = sdev->dev->bdev; | 1942 | bio->bi_bdev = sbio->dev->bdev; |
| 1527 | bio->bi_sector = spage->physical >> 9; | 1943 | bio->bi_sector = sbio->physical >> 9; |
| 1528 | sbio->err = 0; | 1944 | sbio->err = 0; |
| 1529 | } else if (sbio->physical + sbio->page_count * PAGE_SIZE != | 1945 | } else if (sbio->physical + sbio->page_count * PAGE_SIZE != |
| 1530 | spage->physical || | 1946 | spage->physical || |
| 1531 | sbio->logical + sbio->page_count * PAGE_SIZE != | 1947 | sbio->logical + sbio->page_count * PAGE_SIZE != |
| 1532 | spage->logical) { | 1948 | spage->logical || |
| 1533 | scrub_submit(sdev); | 1949 | sbio->dev != spage->dev) { |
| 1950 | scrub_submit(sctx); | ||
| 1534 | goto again; | 1951 | goto again; |
| 1535 | } | 1952 | } |
| 1536 | 1953 | ||
| @@ -1542,81 +1959,87 @@ again: | |||
| 1542 | sbio->bio = NULL; | 1959 | sbio->bio = NULL; |
| 1543 | return -EIO; | 1960 | return -EIO; |
| 1544 | } | 1961 | } |
| 1545 | scrub_submit(sdev); | 1962 | scrub_submit(sctx); |
| 1546 | goto again; | 1963 | goto again; |
| 1547 | } | 1964 | } |
| 1548 | 1965 | ||
| 1549 | scrub_block_get(sblock); /* one for the added page */ | 1966 | scrub_block_get(sblock); /* one for the page added to the bio */ |
| 1550 | atomic_inc(&sblock->outstanding_pages); | 1967 | atomic_inc(&sblock->outstanding_pages); |
| 1551 | sbio->page_count++; | 1968 | sbio->page_count++; |
| 1552 | if (sbio->page_count == sdev->pages_per_bio) | 1969 | if (sbio->page_count == sctx->pages_per_rd_bio) |
| 1553 | scrub_submit(sdev); | 1970 | scrub_submit(sctx); |
| 1554 | 1971 | ||
| 1555 | return 0; | 1972 | return 0; |
| 1556 | } | 1973 | } |
| 1557 | 1974 | ||
| 1558 | static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len, | 1975 | static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, |
| 1559 | u64 physical, u64 flags, u64 gen, int mirror_num, | 1976 | u64 physical, struct btrfs_device *dev, u64 flags, |
| 1560 | u8 *csum, int force) | 1977 | u64 gen, int mirror_num, u8 *csum, int force, |
| 1978 | u64 physical_for_dev_replace) | ||
| 1561 | { | 1979 | { |
| 1562 | struct scrub_block *sblock; | 1980 | struct scrub_block *sblock; |
| 1563 | int index; | 1981 | int index; |
| 1564 | 1982 | ||
| 1565 | sblock = kzalloc(sizeof(*sblock), GFP_NOFS); | 1983 | sblock = kzalloc(sizeof(*sblock), GFP_NOFS); |
| 1566 | if (!sblock) { | 1984 | if (!sblock) { |
| 1567 | spin_lock(&sdev->stat_lock); | 1985 | spin_lock(&sctx->stat_lock); |
| 1568 | sdev->stat.malloc_errors++; | 1986 | sctx->stat.malloc_errors++; |
| 1569 | spin_unlock(&sdev->stat_lock); | 1987 | spin_unlock(&sctx->stat_lock); |
| 1570 | return -ENOMEM; | 1988 | return -ENOMEM; |
| 1571 | } | 1989 | } |
| 1572 | 1990 | ||
| 1573 | /* one ref inside this function, plus one for each page later on */ | 1991 | /* one ref inside this function, plus one for each page added to |
| 1992 | * a bio later on */ | ||
| 1574 | atomic_set(&sblock->ref_count, 1); | 1993 | atomic_set(&sblock->ref_count, 1); |
| 1575 | sblock->sdev = sdev; | 1994 | sblock->sctx = sctx; |
| 1576 | sblock->no_io_error_seen = 1; | 1995 | sblock->no_io_error_seen = 1; |
| 1577 | 1996 | ||
| 1578 | for (index = 0; len > 0; index++) { | 1997 | for (index = 0; len > 0; index++) { |
| 1579 | struct scrub_page *spage = sblock->pagev + index; | 1998 | struct scrub_page *spage; |
| 1580 | u64 l = min_t(u64, len, PAGE_SIZE); | 1999 | u64 l = min_t(u64, len, PAGE_SIZE); |
| 1581 | 2000 | ||
| 1582 | BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK); | 2001 | spage = kzalloc(sizeof(*spage), GFP_NOFS); |
| 1583 | spage->page = alloc_page(GFP_NOFS); | 2002 | if (!spage) { |
| 1584 | if (!spage->page) { | 2003 | leave_nomem: |
| 1585 | spin_lock(&sdev->stat_lock); | 2004 | spin_lock(&sctx->stat_lock); |
| 1586 | sdev->stat.malloc_errors++; | 2005 | sctx->stat.malloc_errors++; |
| 1587 | spin_unlock(&sdev->stat_lock); | 2006 | spin_unlock(&sctx->stat_lock); |
| 1588 | while (index > 0) { | 2007 | scrub_block_put(sblock); |
| 1589 | index--; | ||
| 1590 | __free_page(sblock->pagev[index].page); | ||
| 1591 | } | ||
| 1592 | kfree(sblock); | ||
| 1593 | return -ENOMEM; | 2008 | return -ENOMEM; |
| 1594 | } | 2009 | } |
| 2010 | BUG_ON(index >= SCRUB_MAX_PAGES_PER_BLOCK); | ||
| 2011 | scrub_page_get(spage); | ||
| 2012 | sblock->pagev[index] = spage; | ||
| 1595 | spage->sblock = sblock; | 2013 | spage->sblock = sblock; |
| 1596 | spage->dev = sdev->dev; | 2014 | spage->dev = dev; |
| 1597 | spage->flags = flags; | 2015 | spage->flags = flags; |
| 1598 | spage->generation = gen; | 2016 | spage->generation = gen; |
| 1599 | spage->logical = logical; | 2017 | spage->logical = logical; |
| 1600 | spage->physical = physical; | 2018 | spage->physical = physical; |
| 2019 | spage->physical_for_dev_replace = physical_for_dev_replace; | ||
| 1601 | spage->mirror_num = mirror_num; | 2020 | spage->mirror_num = mirror_num; |
| 1602 | if (csum) { | 2021 | if (csum) { |
| 1603 | spage->have_csum = 1; | 2022 | spage->have_csum = 1; |
| 1604 | memcpy(spage->csum, csum, sdev->csum_size); | 2023 | memcpy(spage->csum, csum, sctx->csum_size); |
| 1605 | } else { | 2024 | } else { |
| 1606 | spage->have_csum = 0; | 2025 | spage->have_csum = 0; |
| 1607 | } | 2026 | } |
| 1608 | sblock->page_count++; | 2027 | sblock->page_count++; |
| 2028 | spage->page = alloc_page(GFP_NOFS); | ||
| 2029 | if (!spage->page) | ||
| 2030 | goto leave_nomem; | ||
| 1609 | len -= l; | 2031 | len -= l; |
| 1610 | logical += l; | 2032 | logical += l; |
| 1611 | physical += l; | 2033 | physical += l; |
| 2034 | physical_for_dev_replace += l; | ||
| 1612 | } | 2035 | } |
| 1613 | 2036 | ||
| 1614 | BUG_ON(sblock->page_count == 0); | 2037 | WARN_ON(sblock->page_count == 0); |
| 1615 | for (index = 0; index < sblock->page_count; index++) { | 2038 | for (index = 0; index < sblock->page_count; index++) { |
| 1616 | struct scrub_page *spage = sblock->pagev + index; | 2039 | struct scrub_page *spage = sblock->pagev[index]; |
| 1617 | int ret; | 2040 | int ret; |
| 1618 | 2041 | ||
| 1619 | ret = scrub_add_page_to_bio(sdev, spage); | 2042 | ret = scrub_add_page_to_rd_bio(sctx, spage); |
| 1620 | if (ret) { | 2043 | if (ret) { |
| 1621 | scrub_block_put(sblock); | 2044 | scrub_block_put(sblock); |
| 1622 | return ret; | 2045 | return ret; |
| @@ -1624,7 +2047,7 @@ static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len, | |||
| 1624 | } | 2047 | } |
| 1625 | 2048 | ||
| 1626 | if (force) | 2049 | if (force) |
| 1627 | scrub_submit(sdev); | 2050 | scrub_submit(sctx); |
| 1628 | 2051 | ||
| 1629 | /* last one frees, either here or in bio completion for last page */ | 2052 | /* last one frees, either here or in bio completion for last page */ |
| 1630 | scrub_block_put(sblock); | 2053 | scrub_block_put(sblock); |
| @@ -1634,8 +2057,7 @@ static int scrub_pages(struct scrub_dev *sdev, u64 logical, u64 len, | |||
| 1634 | static void scrub_bio_end_io(struct bio *bio, int err) | 2057 | static void scrub_bio_end_io(struct bio *bio, int err) |
| 1635 | { | 2058 | { |
| 1636 | struct scrub_bio *sbio = bio->bi_private; | 2059 | struct scrub_bio *sbio = bio->bi_private; |
| 1637 | struct scrub_dev *sdev = sbio->sdev; | 2060 | struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info; |
| 1638 | struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; | ||
| 1639 | 2061 | ||
| 1640 | sbio->err = err; | 2062 | sbio->err = err; |
| 1641 | sbio->bio = bio; | 2063 | sbio->bio = bio; |
| @@ -1646,10 +2068,10 @@ static void scrub_bio_end_io(struct bio *bio, int err) | |||
| 1646 | static void scrub_bio_end_io_worker(struct btrfs_work *work) | 2068 | static void scrub_bio_end_io_worker(struct btrfs_work *work) |
| 1647 | { | 2069 | { |
| 1648 | struct scrub_bio *sbio = container_of(work, struct scrub_bio, work); | 2070 | struct scrub_bio *sbio = container_of(work, struct scrub_bio, work); |
| 1649 | struct scrub_dev *sdev = sbio->sdev; | 2071 | struct scrub_ctx *sctx = sbio->sctx; |
| 1650 | int i; | 2072 | int i; |
| 1651 | 2073 | ||
| 1652 | BUG_ON(sbio->page_count > SCRUB_PAGES_PER_BIO); | 2074 | BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO); |
| 1653 | if (sbio->err) { | 2075 | if (sbio->err) { |
| 1654 | for (i = 0; i < sbio->page_count; i++) { | 2076 | for (i = 0; i < sbio->page_count; i++) { |
| 1655 | struct scrub_page *spage = sbio->pagev[i]; | 2077 | struct scrub_page *spage = sbio->pagev[i]; |
| @@ -1671,23 +2093,37 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work) | |||
| 1671 | 2093 | ||
| 1672 | bio_put(sbio->bio); | 2094 | bio_put(sbio->bio); |
| 1673 | sbio->bio = NULL; | 2095 | sbio->bio = NULL; |
| 1674 | spin_lock(&sdev->list_lock); | 2096 | spin_lock(&sctx->list_lock); |
| 1675 | sbio->next_free = sdev->first_free; | 2097 | sbio->next_free = sctx->first_free; |
| 1676 | sdev->first_free = sbio->index; | 2098 | sctx->first_free = sbio->index; |
| 1677 | spin_unlock(&sdev->list_lock); | 2099 | spin_unlock(&sctx->list_lock); |
| 1678 | atomic_dec(&sdev->in_flight); | 2100 | |
| 1679 | wake_up(&sdev->list_wait); | 2101 | if (sctx->is_dev_replace && |
| 2102 | atomic_read(&sctx->wr_ctx.flush_all_writes)) { | ||
| 2103 | mutex_lock(&sctx->wr_ctx.wr_lock); | ||
| 2104 | scrub_wr_submit(sctx); | ||
| 2105 | mutex_unlock(&sctx->wr_ctx.wr_lock); | ||
| 2106 | } | ||
| 2107 | |||
| 2108 | scrub_pending_bio_dec(sctx); | ||
| 1680 | } | 2109 | } |
| 1681 | 2110 | ||
| 1682 | static void scrub_block_complete(struct scrub_block *sblock) | 2111 | static void scrub_block_complete(struct scrub_block *sblock) |
| 1683 | { | 2112 | { |
| 1684 | if (!sblock->no_io_error_seen) | 2113 | if (!sblock->no_io_error_seen) { |
| 1685 | scrub_handle_errored_block(sblock); | 2114 | scrub_handle_errored_block(sblock); |
| 1686 | else | 2115 | } else { |
| 1687 | scrub_checksum(sblock); | 2116 | /* |
| 2117 | * if has checksum error, write via repair mechanism in | ||
| 2118 | * dev replace case, otherwise write here in dev replace | ||
| 2119 | * case. | ||
| 2120 | */ | ||
| 2121 | if (!scrub_checksum(sblock) && sblock->sctx->is_dev_replace) | ||
| 2122 | scrub_write_block_to_dev_replace(sblock); | ||
| 2123 | } | ||
| 1688 | } | 2124 | } |
| 1689 | 2125 | ||
| 1690 | static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len, | 2126 | static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len, |
| 1691 | u8 *csum) | 2127 | u8 *csum) |
| 1692 | { | 2128 | { |
| 1693 | struct btrfs_ordered_sum *sum = NULL; | 2129 | struct btrfs_ordered_sum *sum = NULL; |
| @@ -1695,15 +2131,15 @@ static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len, | |||
| 1695 | unsigned long i; | 2131 | unsigned long i; |
| 1696 | unsigned long num_sectors; | 2132 | unsigned long num_sectors; |
| 1697 | 2133 | ||
| 1698 | while (!list_empty(&sdev->csum_list)) { | 2134 | while (!list_empty(&sctx->csum_list)) { |
| 1699 | sum = list_first_entry(&sdev->csum_list, | 2135 | sum = list_first_entry(&sctx->csum_list, |
| 1700 | struct btrfs_ordered_sum, list); | 2136 | struct btrfs_ordered_sum, list); |
| 1701 | if (sum->bytenr > logical) | 2137 | if (sum->bytenr > logical) |
| 1702 | return 0; | 2138 | return 0; |
| 1703 | if (sum->bytenr + sum->len > logical) | 2139 | if (sum->bytenr + sum->len > logical) |
| 1704 | break; | 2140 | break; |
| 1705 | 2141 | ||
| 1706 | ++sdev->stat.csum_discards; | 2142 | ++sctx->stat.csum_discards; |
| 1707 | list_del(&sum->list); | 2143 | list_del(&sum->list); |
| 1708 | kfree(sum); | 2144 | kfree(sum); |
| 1709 | sum = NULL; | 2145 | sum = NULL; |
| @@ -1711,10 +2147,10 @@ static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len, | |||
| 1711 | if (!sum) | 2147 | if (!sum) |
| 1712 | return 0; | 2148 | return 0; |
| 1713 | 2149 | ||
| 1714 | num_sectors = sum->len / sdev->sectorsize; | 2150 | num_sectors = sum->len / sctx->sectorsize; |
| 1715 | for (i = 0; i < num_sectors; ++i) { | 2151 | for (i = 0; i < num_sectors; ++i) { |
| 1716 | if (sum->sums[i].bytenr == logical) { | 2152 | if (sum->sums[i].bytenr == logical) { |
| 1717 | memcpy(csum, &sum->sums[i].sum, sdev->csum_size); | 2153 | memcpy(csum, &sum->sums[i].sum, sctx->csum_size); |
| 1718 | ret = 1; | 2154 | ret = 1; |
| 1719 | break; | 2155 | break; |
| 1720 | } | 2156 | } |
| @@ -1727,29 +2163,30 @@ static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len, | |||
| 1727 | } | 2163 | } |
| 1728 | 2164 | ||
| 1729 | /* scrub extent tries to collect up to 64 kB for each bio */ | 2165 | /* scrub extent tries to collect up to 64 kB for each bio */ |
| 1730 | static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len, | 2166 | static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len, |
| 1731 | u64 physical, u64 flags, u64 gen, int mirror_num) | 2167 | u64 physical, struct btrfs_device *dev, u64 flags, |
| 2168 | u64 gen, int mirror_num, u64 physical_for_dev_replace) | ||
| 1732 | { | 2169 | { |
| 1733 | int ret; | 2170 | int ret; |
| 1734 | u8 csum[BTRFS_CSUM_SIZE]; | 2171 | u8 csum[BTRFS_CSUM_SIZE]; |
| 1735 | u32 blocksize; | 2172 | u32 blocksize; |
| 1736 | 2173 | ||
| 1737 | if (flags & BTRFS_EXTENT_FLAG_DATA) { | 2174 | if (flags & BTRFS_EXTENT_FLAG_DATA) { |
| 1738 | blocksize = sdev->sectorsize; | 2175 | blocksize = sctx->sectorsize; |
| 1739 | spin_lock(&sdev->stat_lock); | 2176 | spin_lock(&sctx->stat_lock); |
| 1740 | sdev->stat.data_extents_scrubbed++; | 2177 | sctx->stat.data_extents_scrubbed++; |
| 1741 | sdev->stat.data_bytes_scrubbed += len; | 2178 | sctx->stat.data_bytes_scrubbed += len; |
| 1742 | spin_unlock(&sdev->stat_lock); | 2179 | spin_unlock(&sctx->stat_lock); |
| 1743 | } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 2180 | } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
| 1744 | BUG_ON(sdev->nodesize != sdev->leafsize); | 2181 | WARN_ON(sctx->nodesize != sctx->leafsize); |
| 1745 | blocksize = sdev->nodesize; | 2182 | blocksize = sctx->nodesize; |
| 1746 | spin_lock(&sdev->stat_lock); | 2183 | spin_lock(&sctx->stat_lock); |
| 1747 | sdev->stat.tree_extents_scrubbed++; | 2184 | sctx->stat.tree_extents_scrubbed++; |
| 1748 | sdev->stat.tree_bytes_scrubbed += len; | 2185 | sctx->stat.tree_bytes_scrubbed += len; |
| 1749 | spin_unlock(&sdev->stat_lock); | 2186 | spin_unlock(&sctx->stat_lock); |
| 1750 | } else { | 2187 | } else { |
| 1751 | blocksize = sdev->sectorsize; | 2188 | blocksize = sctx->sectorsize; |
| 1752 | BUG_ON(1); | 2189 | WARN_ON(1); |
| 1753 | } | 2190 | } |
| 1754 | 2191 | ||
| 1755 | while (len) { | 2192 | while (len) { |
| @@ -1758,26 +2195,38 @@ static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len, | |||
| 1758 | 2195 | ||
| 1759 | if (flags & BTRFS_EXTENT_FLAG_DATA) { | 2196 | if (flags & BTRFS_EXTENT_FLAG_DATA) { |
| 1760 | /* push csums to sbio */ | 2197 | /* push csums to sbio */ |
| 1761 | have_csum = scrub_find_csum(sdev, logical, l, csum); | 2198 | have_csum = scrub_find_csum(sctx, logical, l, csum); |
| 1762 | if (have_csum == 0) | 2199 | if (have_csum == 0) |
| 1763 | ++sdev->stat.no_csum; | 2200 | ++sctx->stat.no_csum; |
| 2201 | if (sctx->is_dev_replace && !have_csum) { | ||
| 2202 | ret = copy_nocow_pages(sctx, logical, l, | ||
| 2203 | mirror_num, | ||
| 2204 | physical_for_dev_replace); | ||
| 2205 | goto behind_scrub_pages; | ||
| 2206 | } | ||
| 1764 | } | 2207 | } |
| 1765 | ret = scrub_pages(sdev, logical, l, physical, flags, gen, | 2208 | ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen, |
| 1766 | mirror_num, have_csum ? csum : NULL, 0); | 2209 | mirror_num, have_csum ? csum : NULL, 0, |
| 2210 | physical_for_dev_replace); | ||
| 2211 | behind_scrub_pages: | ||
| 1767 | if (ret) | 2212 | if (ret) |
| 1768 | return ret; | 2213 | return ret; |
| 1769 | len -= l; | 2214 | len -= l; |
| 1770 | logical += l; | 2215 | logical += l; |
| 1771 | physical += l; | 2216 | physical += l; |
| 2217 | physical_for_dev_replace += l; | ||
| 1772 | } | 2218 | } |
| 1773 | return 0; | 2219 | return 0; |
| 1774 | } | 2220 | } |
| 1775 | 2221 | ||
| 1776 | static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, | 2222 | static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, |
| 1777 | struct map_lookup *map, int num, u64 base, u64 length) | 2223 | struct map_lookup *map, |
| 2224 | struct btrfs_device *scrub_dev, | ||
| 2225 | int num, u64 base, u64 length, | ||
| 2226 | int is_dev_replace) | ||
| 1778 | { | 2227 | { |
| 1779 | struct btrfs_path *path; | 2228 | struct btrfs_path *path; |
| 1780 | struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; | 2229 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; |
| 1781 | struct btrfs_root *root = fs_info->extent_root; | 2230 | struct btrfs_root *root = fs_info->extent_root; |
| 1782 | struct btrfs_root *csum_root = fs_info->csum_root; | 2231 | struct btrfs_root *csum_root = fs_info->csum_root; |
| 1783 | struct btrfs_extent_item *extent; | 2232 | struct btrfs_extent_item *extent; |
| @@ -1797,9 +2246,13 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, | |||
| 1797 | struct reada_control *reada2; | 2246 | struct reada_control *reada2; |
| 1798 | struct btrfs_key key_start; | 2247 | struct btrfs_key key_start; |
| 1799 | struct btrfs_key key_end; | 2248 | struct btrfs_key key_end; |
| 1800 | |||
| 1801 | u64 increment = map->stripe_len; | 2249 | u64 increment = map->stripe_len; |
| 1802 | u64 offset; | 2250 | u64 offset; |
| 2251 | u64 extent_logical; | ||
| 2252 | u64 extent_physical; | ||
| 2253 | u64 extent_len; | ||
| 2254 | struct btrfs_device *extent_dev; | ||
| 2255 | int extent_mirror_num; | ||
| 1803 | 2256 | ||
| 1804 | nstripes = length; | 2257 | nstripes = length; |
| 1805 | offset = 0; | 2258 | offset = 0; |
| @@ -1843,8 +2296,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, | |||
| 1843 | */ | 2296 | */ |
| 1844 | logical = base + offset; | 2297 | logical = base + offset; |
| 1845 | 2298 | ||
| 1846 | wait_event(sdev->list_wait, | 2299 | wait_event(sctx->list_wait, |
| 1847 | atomic_read(&sdev->in_flight) == 0); | 2300 | atomic_read(&sctx->bios_in_flight) == 0); |
| 1848 | atomic_inc(&fs_info->scrubs_paused); | 2301 | atomic_inc(&fs_info->scrubs_paused); |
| 1849 | wake_up(&fs_info->scrub_pause_wait); | 2302 | wake_up(&fs_info->scrub_pause_wait); |
| 1850 | 2303 | ||
| @@ -1898,7 +2351,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, | |||
| 1898 | * canceled? | 2351 | * canceled? |
| 1899 | */ | 2352 | */ |
| 1900 | if (atomic_read(&fs_info->scrub_cancel_req) || | 2353 | if (atomic_read(&fs_info->scrub_cancel_req) || |
| 1901 | atomic_read(&sdev->cancel_req)) { | 2354 | atomic_read(&sctx->cancel_req)) { |
| 1902 | ret = -ECANCELED; | 2355 | ret = -ECANCELED; |
| 1903 | goto out; | 2356 | goto out; |
| 1904 | } | 2357 | } |
| @@ -1907,9 +2360,14 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, | |||
| 1907 | */ | 2360 | */ |
| 1908 | if (atomic_read(&fs_info->scrub_pause_req)) { | 2361 | if (atomic_read(&fs_info->scrub_pause_req)) { |
| 1909 | /* push queued extents */ | 2362 | /* push queued extents */ |
| 1910 | scrub_submit(sdev); | 2363 | atomic_set(&sctx->wr_ctx.flush_all_writes, 1); |
| 1911 | wait_event(sdev->list_wait, | 2364 | scrub_submit(sctx); |
| 1912 | atomic_read(&sdev->in_flight) == 0); | 2365 | mutex_lock(&sctx->wr_ctx.wr_lock); |
| 2366 | scrub_wr_submit(sctx); | ||
| 2367 | mutex_unlock(&sctx->wr_ctx.wr_lock); | ||
| 2368 | wait_event(sctx->list_wait, | ||
| 2369 | atomic_read(&sctx->bios_in_flight) == 0); | ||
| 2370 | atomic_set(&sctx->wr_ctx.flush_all_writes, 0); | ||
| 1913 | atomic_inc(&fs_info->scrubs_paused); | 2371 | atomic_inc(&fs_info->scrubs_paused); |
| 1914 | wake_up(&fs_info->scrub_pause_wait); | 2372 | wake_up(&fs_info->scrub_pause_wait); |
| 1915 | mutex_lock(&fs_info->scrub_lock); | 2373 | mutex_lock(&fs_info->scrub_lock); |
| @@ -1926,7 +2384,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, | |||
| 1926 | 2384 | ||
| 1927 | ret = btrfs_lookup_csums_range(csum_root, logical, | 2385 | ret = btrfs_lookup_csums_range(csum_root, logical, |
| 1928 | logical + map->stripe_len - 1, | 2386 | logical + map->stripe_len - 1, |
| 1929 | &sdev->csum_list, 1); | 2387 | &sctx->csum_list, 1); |
| 1930 | if (ret) | 2388 | if (ret) |
| 1931 | goto out; | 2389 | goto out; |
| 1932 | 2390 | ||
| @@ -2004,9 +2462,20 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, | |||
| 2004 | key.objectid; | 2462 | key.objectid; |
| 2005 | } | 2463 | } |
| 2006 | 2464 | ||
| 2007 | ret = scrub_extent(sdev, key.objectid, key.offset, | 2465 | extent_logical = key.objectid; |
| 2008 | key.objectid - logical + physical, | 2466 | extent_physical = key.objectid - logical + physical; |
| 2009 | flags, generation, mirror_num); | 2467 | extent_len = key.offset; |
| 2468 | extent_dev = scrub_dev; | ||
| 2469 | extent_mirror_num = mirror_num; | ||
| 2470 | if (is_dev_replace) | ||
| 2471 | scrub_remap_extent(fs_info, extent_logical, | ||
| 2472 | extent_len, &extent_physical, | ||
| 2473 | &extent_dev, | ||
| 2474 | &extent_mirror_num); | ||
| 2475 | ret = scrub_extent(sctx, extent_logical, extent_len, | ||
| 2476 | extent_physical, extent_dev, flags, | ||
| 2477 | generation, extent_mirror_num, | ||
| 2478 | key.objectid - logical + physical); | ||
| 2010 | if (ret) | 2479 | if (ret) |
| 2011 | goto out; | 2480 | goto out; |
| 2012 | 2481 | ||
| @@ -2016,29 +2485,34 @@ next: | |||
| 2016 | btrfs_release_path(path); | 2485 | btrfs_release_path(path); |
| 2017 | logical += increment; | 2486 | logical += increment; |
| 2018 | physical += map->stripe_len; | 2487 | physical += map->stripe_len; |
| 2019 | spin_lock(&sdev->stat_lock); | 2488 | spin_lock(&sctx->stat_lock); |
| 2020 | sdev->stat.last_physical = physical; | 2489 | sctx->stat.last_physical = physical; |
| 2021 | spin_unlock(&sdev->stat_lock); | 2490 | spin_unlock(&sctx->stat_lock); |
| 2022 | } | 2491 | } |
| 2492 | out: | ||
| 2023 | /* push queued extents */ | 2493 | /* push queued extents */ |
| 2024 | scrub_submit(sdev); | 2494 | scrub_submit(sctx); |
| 2495 | mutex_lock(&sctx->wr_ctx.wr_lock); | ||
| 2496 | scrub_wr_submit(sctx); | ||
| 2497 | mutex_unlock(&sctx->wr_ctx.wr_lock); | ||
| 2025 | 2498 | ||
| 2026 | out: | ||
| 2027 | blk_finish_plug(&plug); | 2499 | blk_finish_plug(&plug); |
| 2028 | btrfs_free_path(path); | 2500 | btrfs_free_path(path); |
| 2029 | return ret < 0 ? ret : 0; | 2501 | return ret < 0 ? ret : 0; |
| 2030 | } | 2502 | } |
| 2031 | 2503 | ||
| 2032 | static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev, | 2504 | static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx, |
| 2033 | u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length, | 2505 | struct btrfs_device *scrub_dev, |
| 2034 | u64 dev_offset) | 2506 | u64 chunk_tree, u64 chunk_objectid, |
| 2507 | u64 chunk_offset, u64 length, | ||
| 2508 | u64 dev_offset, int is_dev_replace) | ||
| 2035 | { | 2509 | { |
| 2036 | struct btrfs_mapping_tree *map_tree = | 2510 | struct btrfs_mapping_tree *map_tree = |
| 2037 | &sdev->dev->dev_root->fs_info->mapping_tree; | 2511 | &sctx->dev_root->fs_info->mapping_tree; |
| 2038 | struct map_lookup *map; | 2512 | struct map_lookup *map; |
| 2039 | struct extent_map *em; | 2513 | struct extent_map *em; |
| 2040 | int i; | 2514 | int i; |
| 2041 | int ret = -EINVAL; | 2515 | int ret = 0; |
| 2042 | 2516 | ||
| 2043 | read_lock(&map_tree->map_tree.lock); | 2517 | read_lock(&map_tree->map_tree.lock); |
| 2044 | em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); | 2518 | em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); |
| @@ -2055,9 +2529,11 @@ static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev, | |||
| 2055 | goto out; | 2529 | goto out; |
| 2056 | 2530 | ||
| 2057 | for (i = 0; i < map->num_stripes; ++i) { | 2531 | for (i = 0; i < map->num_stripes; ++i) { |
| 2058 | if (map->stripes[i].dev == sdev->dev && | 2532 | if (map->stripes[i].dev->bdev == scrub_dev->bdev && |
| 2059 | map->stripes[i].physical == dev_offset) { | 2533 | map->stripes[i].physical == dev_offset) { |
| 2060 | ret = scrub_stripe(sdev, map, i, chunk_offset, length); | 2534 | ret = scrub_stripe(sctx, map, scrub_dev, i, |
| 2535 | chunk_offset, length, | ||
| 2536 | is_dev_replace); | ||
| 2061 | if (ret) | 2537 | if (ret) |
| 2062 | goto out; | 2538 | goto out; |
| 2063 | } | 2539 | } |
| @@ -2069,11 +2545,13 @@ out: | |||
| 2069 | } | 2545 | } |
| 2070 | 2546 | ||
| 2071 | static noinline_for_stack | 2547 | static noinline_for_stack |
| 2072 | int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end) | 2548 | int scrub_enumerate_chunks(struct scrub_ctx *sctx, |
| 2549 | struct btrfs_device *scrub_dev, u64 start, u64 end, | ||
| 2550 | int is_dev_replace) | ||
| 2073 | { | 2551 | { |
| 2074 | struct btrfs_dev_extent *dev_extent = NULL; | 2552 | struct btrfs_dev_extent *dev_extent = NULL; |
| 2075 | struct btrfs_path *path; | 2553 | struct btrfs_path *path; |
| 2076 | struct btrfs_root *root = sdev->dev->dev_root; | 2554 | struct btrfs_root *root = sctx->dev_root; |
| 2077 | struct btrfs_fs_info *fs_info = root->fs_info; | 2555 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 2078 | u64 length; | 2556 | u64 length; |
| 2079 | u64 chunk_tree; | 2557 | u64 chunk_tree; |
| @@ -2085,6 +2563,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end) | |||
| 2085 | struct btrfs_key key; | 2563 | struct btrfs_key key; |
| 2086 | struct btrfs_key found_key; | 2564 | struct btrfs_key found_key; |
| 2087 | struct btrfs_block_group_cache *cache; | 2565 | struct btrfs_block_group_cache *cache; |
| 2566 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
| 2088 | 2567 | ||
| 2089 | path = btrfs_alloc_path(); | 2568 | path = btrfs_alloc_path(); |
| 2090 | if (!path) | 2569 | if (!path) |
| @@ -2094,11 +2573,10 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end) | |||
| 2094 | path->search_commit_root = 1; | 2573 | path->search_commit_root = 1; |
| 2095 | path->skip_locking = 1; | 2574 | path->skip_locking = 1; |
| 2096 | 2575 | ||
| 2097 | key.objectid = sdev->dev->devid; | 2576 | key.objectid = scrub_dev->devid; |
| 2098 | key.offset = 0ull; | 2577 | key.offset = 0ull; |
| 2099 | key.type = BTRFS_DEV_EXTENT_KEY; | 2578 | key.type = BTRFS_DEV_EXTENT_KEY; |
| 2100 | 2579 | ||
| 2101 | |||
| 2102 | while (1) { | 2580 | while (1) { |
| 2103 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 2581 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
| 2104 | if (ret < 0) | 2582 | if (ret < 0) |
| @@ -2117,7 +2595,7 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end) | |||
| 2117 | 2595 | ||
| 2118 | btrfs_item_key_to_cpu(l, &found_key, slot); | 2596 | btrfs_item_key_to_cpu(l, &found_key, slot); |
| 2119 | 2597 | ||
| 2120 | if (found_key.objectid != sdev->dev->devid) | 2598 | if (found_key.objectid != scrub_dev->devid) |
| 2121 | break; | 2599 | break; |
| 2122 | 2600 | ||
| 2123 | if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY) | 2601 | if (btrfs_key_type(&found_key) != BTRFS_DEV_EXTENT_KEY) |
| @@ -2151,11 +2629,62 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end) | |||
| 2151 | ret = -ENOENT; | 2629 | ret = -ENOENT; |
| 2152 | break; | 2630 | break; |
| 2153 | } | 2631 | } |
| 2154 | ret = scrub_chunk(sdev, chunk_tree, chunk_objectid, | 2632 | dev_replace->cursor_right = found_key.offset + length; |
| 2155 | chunk_offset, length, found_key.offset); | 2633 | dev_replace->cursor_left = found_key.offset; |
| 2634 | dev_replace->item_needs_writeback = 1; | ||
| 2635 | ret = scrub_chunk(sctx, scrub_dev, chunk_tree, chunk_objectid, | ||
| 2636 | chunk_offset, length, found_key.offset, | ||
| 2637 | is_dev_replace); | ||
| 2638 | |||
| 2639 | /* | ||
| 2640 | * flush, submit all pending read and write bios, afterwards | ||
| 2641 | * wait for them. | ||
| 2642 | * Note that in the dev replace case, a read request causes | ||
| 2643 | * write requests that are submitted in the read completion | ||
| 2644 | * worker. Therefore in the current situation, it is required | ||
| 2645 | * that all write requests are flushed, so that all read and | ||
| 2646 | * write requests are really completed when bios_in_flight | ||
| 2647 | * changes to 0. | ||
| 2648 | */ | ||
| 2649 | atomic_set(&sctx->wr_ctx.flush_all_writes, 1); | ||
| 2650 | scrub_submit(sctx); | ||
| 2651 | mutex_lock(&sctx->wr_ctx.wr_lock); | ||
| 2652 | scrub_wr_submit(sctx); | ||
| 2653 | mutex_unlock(&sctx->wr_ctx.wr_lock); | ||
| 2654 | |||
| 2655 | wait_event(sctx->list_wait, | ||
| 2656 | atomic_read(&sctx->bios_in_flight) == 0); | ||
| 2657 | atomic_set(&sctx->wr_ctx.flush_all_writes, 0); | ||
| 2658 | atomic_inc(&fs_info->scrubs_paused); | ||
| 2659 | wake_up(&fs_info->scrub_pause_wait); | ||
| 2660 | wait_event(sctx->list_wait, | ||
| 2661 | atomic_read(&sctx->workers_pending) == 0); | ||
| 2662 | |||
| 2663 | mutex_lock(&fs_info->scrub_lock); | ||
| 2664 | while (atomic_read(&fs_info->scrub_pause_req)) { | ||
| 2665 | mutex_unlock(&fs_info->scrub_lock); | ||
| 2666 | wait_event(fs_info->scrub_pause_wait, | ||
| 2667 | atomic_read(&fs_info->scrub_pause_req) == 0); | ||
| 2668 | mutex_lock(&fs_info->scrub_lock); | ||
| 2669 | } | ||
| 2670 | atomic_dec(&fs_info->scrubs_paused); | ||
| 2671 | mutex_unlock(&fs_info->scrub_lock); | ||
| 2672 | wake_up(&fs_info->scrub_pause_wait); | ||
| 2673 | |||
| 2674 | dev_replace->cursor_left = dev_replace->cursor_right; | ||
| 2675 | dev_replace->item_needs_writeback = 1; | ||
| 2156 | btrfs_put_block_group(cache); | 2676 | btrfs_put_block_group(cache); |
| 2157 | if (ret) | 2677 | if (ret) |
| 2158 | break; | 2678 | break; |
| 2679 | if (is_dev_replace && | ||
| 2680 | atomic64_read(&dev_replace->num_write_errors) > 0) { | ||
| 2681 | ret = -EIO; | ||
| 2682 | break; | ||
| 2683 | } | ||
| 2684 | if (sctx->stat.malloc_errors > 0) { | ||
| 2685 | ret = -ENOMEM; | ||
| 2686 | break; | ||
| 2687 | } | ||
| 2159 | 2688 | ||
| 2160 | key.offset = found_key.offset + length; | 2689 | key.offset = found_key.offset + length; |
| 2161 | btrfs_release_path(path); | 2690 | btrfs_release_path(path); |
| @@ -2170,14 +2699,14 @@ int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end) | |||
| 2170 | return ret < 0 ? ret : 0; | 2699 | return ret < 0 ? ret : 0; |
| 2171 | } | 2700 | } |
| 2172 | 2701 | ||
| 2173 | static noinline_for_stack int scrub_supers(struct scrub_dev *sdev) | 2702 | static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, |
| 2703 | struct btrfs_device *scrub_dev) | ||
| 2174 | { | 2704 | { |
| 2175 | int i; | 2705 | int i; |
| 2176 | u64 bytenr; | 2706 | u64 bytenr; |
| 2177 | u64 gen; | 2707 | u64 gen; |
| 2178 | int ret; | 2708 | int ret; |
| 2179 | struct btrfs_device *device = sdev->dev; | 2709 | struct btrfs_root *root = sctx->dev_root; |
| 2180 | struct btrfs_root *root = device->dev_root; | ||
| 2181 | 2710 | ||
| 2182 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | 2711 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) |
| 2183 | return -EIO; | 2712 | return -EIO; |
| @@ -2186,15 +2715,16 @@ static noinline_for_stack int scrub_supers(struct scrub_dev *sdev) | |||
| 2186 | 2715 | ||
| 2187 | for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { | 2716 | for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { |
| 2188 | bytenr = btrfs_sb_offset(i); | 2717 | bytenr = btrfs_sb_offset(i); |
| 2189 | if (bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes) | 2718 | if (bytenr + BTRFS_SUPER_INFO_SIZE > scrub_dev->total_bytes) |
| 2190 | break; | 2719 | break; |
| 2191 | 2720 | ||
| 2192 | ret = scrub_pages(sdev, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr, | 2721 | ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr, |
| 2193 | BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1); | 2722 | scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i, |
| 2723 | NULL, 1, bytenr); | ||
| 2194 | if (ret) | 2724 | if (ret) |
| 2195 | return ret; | 2725 | return ret; |
| 2196 | } | 2726 | } |
| 2197 | wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0); | 2727 | wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0); |
| 2198 | 2728 | ||
| 2199 | return 0; | 2729 | return 0; |
| 2200 | } | 2730 | } |
| @@ -2202,19 +2732,38 @@ static noinline_for_stack int scrub_supers(struct scrub_dev *sdev) | |||
| 2202 | /* | 2732 | /* |
| 2203 | * get a reference count on fs_info->scrub_workers. start worker if necessary | 2733 | * get a reference count on fs_info->scrub_workers. start worker if necessary |
| 2204 | */ | 2734 | */ |
| 2205 | static noinline_for_stack int scrub_workers_get(struct btrfs_root *root) | 2735 | static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, |
| 2736 | int is_dev_replace) | ||
| 2206 | { | 2737 | { |
| 2207 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 2208 | int ret = 0; | 2738 | int ret = 0; |
| 2209 | 2739 | ||
| 2210 | mutex_lock(&fs_info->scrub_lock); | 2740 | mutex_lock(&fs_info->scrub_lock); |
| 2211 | if (fs_info->scrub_workers_refcnt == 0) { | 2741 | if (fs_info->scrub_workers_refcnt == 0) { |
| 2212 | btrfs_init_workers(&fs_info->scrub_workers, "scrub", | 2742 | if (is_dev_replace) |
| 2213 | fs_info->thread_pool_size, &fs_info->generic_worker); | 2743 | btrfs_init_workers(&fs_info->scrub_workers, "scrub", 1, |
| 2744 | &fs_info->generic_worker); | ||
| 2745 | else | ||
| 2746 | btrfs_init_workers(&fs_info->scrub_workers, "scrub", | ||
| 2747 | fs_info->thread_pool_size, | ||
| 2748 | &fs_info->generic_worker); | ||
| 2214 | fs_info->scrub_workers.idle_thresh = 4; | 2749 | fs_info->scrub_workers.idle_thresh = 4; |
| 2215 | ret = btrfs_start_workers(&fs_info->scrub_workers); | 2750 | ret = btrfs_start_workers(&fs_info->scrub_workers); |
| 2216 | if (ret) | 2751 | if (ret) |
| 2217 | goto out; | 2752 | goto out; |
| 2753 | btrfs_init_workers(&fs_info->scrub_wr_completion_workers, | ||
| 2754 | "scrubwrc", | ||
| 2755 | fs_info->thread_pool_size, | ||
| 2756 | &fs_info->generic_worker); | ||
| 2757 | fs_info->scrub_wr_completion_workers.idle_thresh = 2; | ||
| 2758 | ret = btrfs_start_workers( | ||
| 2759 | &fs_info->scrub_wr_completion_workers); | ||
| 2760 | if (ret) | ||
| 2761 | goto out; | ||
| 2762 | btrfs_init_workers(&fs_info->scrub_nocow_workers, "scrubnc", 1, | ||
| 2763 | &fs_info->generic_worker); | ||
| 2764 | ret = btrfs_start_workers(&fs_info->scrub_nocow_workers); | ||
| 2765 | if (ret) | ||
| 2766 | goto out; | ||
| 2218 | } | 2767 | } |
| 2219 | ++fs_info->scrub_workers_refcnt; | 2768 | ++fs_info->scrub_workers_refcnt; |
| 2220 | out: | 2769 | out: |
| @@ -2223,40 +2772,41 @@ out: | |||
| 2223 | return ret; | 2772 | return ret; |
| 2224 | } | 2773 | } |
| 2225 | 2774 | ||
| 2226 | static noinline_for_stack void scrub_workers_put(struct btrfs_root *root) | 2775 | static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) |
| 2227 | { | 2776 | { |
| 2228 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 2229 | |||
| 2230 | mutex_lock(&fs_info->scrub_lock); | 2777 | mutex_lock(&fs_info->scrub_lock); |
| 2231 | if (--fs_info->scrub_workers_refcnt == 0) | 2778 | if (--fs_info->scrub_workers_refcnt == 0) { |
| 2232 | btrfs_stop_workers(&fs_info->scrub_workers); | 2779 | btrfs_stop_workers(&fs_info->scrub_workers); |
| 2780 | btrfs_stop_workers(&fs_info->scrub_wr_completion_workers); | ||
| 2781 | btrfs_stop_workers(&fs_info->scrub_nocow_workers); | ||
| 2782 | } | ||
| 2233 | WARN_ON(fs_info->scrub_workers_refcnt < 0); | 2783 | WARN_ON(fs_info->scrub_workers_refcnt < 0); |
| 2234 | mutex_unlock(&fs_info->scrub_lock); | 2784 | mutex_unlock(&fs_info->scrub_lock); |
| 2235 | } | 2785 | } |
| 2236 | 2786 | ||
| 2237 | 2787 | int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, | |
| 2238 | int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, | 2788 | u64 end, struct btrfs_scrub_progress *progress, |
| 2239 | struct btrfs_scrub_progress *progress, int readonly) | 2789 | int readonly, int is_dev_replace) |
| 2240 | { | 2790 | { |
| 2241 | struct scrub_dev *sdev; | 2791 | struct scrub_ctx *sctx; |
| 2242 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 2243 | int ret; | 2792 | int ret; |
| 2244 | struct btrfs_device *dev; | 2793 | struct btrfs_device *dev; |
| 2245 | 2794 | ||
| 2246 | if (btrfs_fs_closing(root->fs_info)) | 2795 | if (btrfs_fs_closing(fs_info)) |
| 2247 | return -EINVAL; | 2796 | return -EINVAL; |
| 2248 | 2797 | ||
| 2249 | /* | 2798 | /* |
| 2250 | * check some assumptions | 2799 | * check some assumptions |
| 2251 | */ | 2800 | */ |
| 2252 | if (root->nodesize != root->leafsize) { | 2801 | if (fs_info->chunk_root->nodesize != fs_info->chunk_root->leafsize) { |
| 2253 | printk(KERN_ERR | 2802 | printk(KERN_ERR |
| 2254 | "btrfs_scrub: size assumption nodesize == leafsize (%d == %d) fails\n", | 2803 | "btrfs_scrub: size assumption nodesize == leafsize (%d == %d) fails\n", |
| 2255 | root->nodesize, root->leafsize); | 2804 | fs_info->chunk_root->nodesize, |
| 2805 | fs_info->chunk_root->leafsize); | ||
| 2256 | return -EINVAL; | 2806 | return -EINVAL; |
| 2257 | } | 2807 | } |
| 2258 | 2808 | ||
| 2259 | if (root->nodesize > BTRFS_STRIPE_LEN) { | 2809 | if (fs_info->chunk_root->nodesize > BTRFS_STRIPE_LEN) { |
| 2260 | /* | 2810 | /* |
| 2261 | * in this case scrub is unable to calculate the checksum | 2811 | * in this case scrub is unable to calculate the checksum |
| 2262 | * the way scrub is implemented. Do not handle this | 2812 | * the way scrub is implemented. Do not handle this |
| @@ -2264,80 +2814,105 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, | |||
| 2264 | */ | 2814 | */ |
| 2265 | printk(KERN_ERR | 2815 | printk(KERN_ERR |
| 2266 | "btrfs_scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails\n", | 2816 | "btrfs_scrub: size assumption nodesize <= BTRFS_STRIPE_LEN (%d <= %d) fails\n", |
| 2267 | root->nodesize, BTRFS_STRIPE_LEN); | 2817 | fs_info->chunk_root->nodesize, BTRFS_STRIPE_LEN); |
| 2268 | return -EINVAL; | 2818 | return -EINVAL; |
| 2269 | } | 2819 | } |
| 2270 | 2820 | ||
| 2271 | if (root->sectorsize != PAGE_SIZE) { | 2821 | if (fs_info->chunk_root->sectorsize != PAGE_SIZE) { |
| 2272 | /* not supported for data w/o checksums */ | 2822 | /* not supported for data w/o checksums */ |
| 2273 | printk(KERN_ERR | 2823 | printk(KERN_ERR |
| 2274 | "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lld) fails\n", | 2824 | "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lld) fails\n", |
| 2275 | root->sectorsize, (unsigned long long)PAGE_SIZE); | 2825 | fs_info->chunk_root->sectorsize, |
| 2826 | (unsigned long long)PAGE_SIZE); | ||
| 2276 | return -EINVAL; | 2827 | return -EINVAL; |
| 2277 | } | 2828 | } |
| 2278 | 2829 | ||
| 2279 | ret = scrub_workers_get(root); | 2830 | if (fs_info->chunk_root->nodesize > |
| 2831 | PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK || | ||
| 2832 | fs_info->chunk_root->sectorsize > | ||
| 2833 | PAGE_SIZE * SCRUB_MAX_PAGES_PER_BLOCK) { | ||
| 2834 | /* | ||
| 2835 | * would exhaust the array bounds of pagev member in | ||
| 2836 | * struct scrub_block | ||
| 2837 | */ | ||
| 2838 | pr_err("btrfs_scrub: size assumption nodesize and sectorsize <= SCRUB_MAX_PAGES_PER_BLOCK (%d <= %d && %d <= %d) fails\n", | ||
| 2839 | fs_info->chunk_root->nodesize, | ||
| 2840 | SCRUB_MAX_PAGES_PER_BLOCK, | ||
| 2841 | fs_info->chunk_root->sectorsize, | ||
| 2842 | SCRUB_MAX_PAGES_PER_BLOCK); | ||
| 2843 | return -EINVAL; | ||
| 2844 | } | ||
| 2845 | |||
| 2846 | ret = scrub_workers_get(fs_info, is_dev_replace); | ||
| 2280 | if (ret) | 2847 | if (ret) |
| 2281 | return ret; | 2848 | return ret; |
| 2282 | 2849 | ||
| 2283 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 2850 | mutex_lock(&fs_info->fs_devices->device_list_mutex); |
| 2284 | dev = btrfs_find_device(root, devid, NULL, NULL); | 2851 | dev = btrfs_find_device(fs_info, devid, NULL, NULL); |
| 2285 | if (!dev || dev->missing) { | 2852 | if (!dev || (dev->missing && !is_dev_replace)) { |
| 2286 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 2853 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); |
| 2287 | scrub_workers_put(root); | 2854 | scrub_workers_put(fs_info); |
| 2288 | return -ENODEV; | 2855 | return -ENODEV; |
| 2289 | } | 2856 | } |
| 2290 | mutex_lock(&fs_info->scrub_lock); | 2857 | mutex_lock(&fs_info->scrub_lock); |
| 2291 | 2858 | ||
| 2292 | if (!dev->in_fs_metadata) { | 2859 | if (!dev->in_fs_metadata || dev->is_tgtdev_for_dev_replace) { |
| 2293 | mutex_unlock(&fs_info->scrub_lock); | 2860 | mutex_unlock(&fs_info->scrub_lock); |
| 2294 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 2861 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); |
| 2295 | scrub_workers_put(root); | 2862 | scrub_workers_put(fs_info); |
| 2296 | return -ENODEV; | 2863 | return -EIO; |
| 2297 | } | 2864 | } |
| 2298 | 2865 | ||
| 2299 | if (dev->scrub_device) { | 2866 | btrfs_dev_replace_lock(&fs_info->dev_replace); |
| 2867 | if (dev->scrub_device || | ||
| 2868 | (!is_dev_replace && | ||
| 2869 | btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) { | ||
| 2870 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | ||
| 2300 | mutex_unlock(&fs_info->scrub_lock); | 2871 | mutex_unlock(&fs_info->scrub_lock); |
| 2301 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 2872 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); |
| 2302 | scrub_workers_put(root); | 2873 | scrub_workers_put(fs_info); |
| 2303 | return -EINPROGRESS; | 2874 | return -EINPROGRESS; |
| 2304 | } | 2875 | } |
| 2305 | sdev = scrub_setup_dev(dev); | 2876 | btrfs_dev_replace_unlock(&fs_info->dev_replace); |
| 2306 | if (IS_ERR(sdev)) { | 2877 | sctx = scrub_setup_ctx(dev, is_dev_replace); |
| 2878 | if (IS_ERR(sctx)) { | ||
| 2307 | mutex_unlock(&fs_info->scrub_lock); | 2879 | mutex_unlock(&fs_info->scrub_lock); |
| 2308 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 2880 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); |
| 2309 | scrub_workers_put(root); | 2881 | scrub_workers_put(fs_info); |
| 2310 | return PTR_ERR(sdev); | 2882 | return PTR_ERR(sctx); |
| 2311 | } | 2883 | } |
| 2312 | sdev->readonly = readonly; | 2884 | sctx->readonly = readonly; |
| 2313 | dev->scrub_device = sdev; | 2885 | dev->scrub_device = sctx; |
| 2314 | 2886 | ||
| 2315 | atomic_inc(&fs_info->scrubs_running); | 2887 | atomic_inc(&fs_info->scrubs_running); |
| 2316 | mutex_unlock(&fs_info->scrub_lock); | 2888 | mutex_unlock(&fs_info->scrub_lock); |
| 2317 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 2889 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); |
| 2318 | 2890 | ||
| 2319 | down_read(&fs_info->scrub_super_lock); | 2891 | if (!is_dev_replace) { |
| 2320 | ret = scrub_supers(sdev); | 2892 | down_read(&fs_info->scrub_super_lock); |
| 2321 | up_read(&fs_info->scrub_super_lock); | 2893 | ret = scrub_supers(sctx, dev); |
| 2894 | up_read(&fs_info->scrub_super_lock); | ||
| 2895 | } | ||
| 2322 | 2896 | ||
| 2323 | if (!ret) | 2897 | if (!ret) |
| 2324 | ret = scrub_enumerate_chunks(sdev, start, end); | 2898 | ret = scrub_enumerate_chunks(sctx, dev, start, end, |
| 2899 | is_dev_replace); | ||
| 2325 | 2900 | ||
| 2326 | wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0); | 2901 | wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0); |
| 2327 | atomic_dec(&fs_info->scrubs_running); | 2902 | atomic_dec(&fs_info->scrubs_running); |
| 2328 | wake_up(&fs_info->scrub_pause_wait); | 2903 | wake_up(&fs_info->scrub_pause_wait); |
| 2329 | 2904 | ||
| 2330 | wait_event(sdev->list_wait, atomic_read(&sdev->fixup_cnt) == 0); | 2905 | wait_event(sctx->list_wait, atomic_read(&sctx->workers_pending) == 0); |
| 2331 | 2906 | ||
| 2332 | if (progress) | 2907 | if (progress) |
| 2333 | memcpy(progress, &sdev->stat, sizeof(*progress)); | 2908 | memcpy(progress, &sctx->stat, sizeof(*progress)); |
| 2334 | 2909 | ||
| 2335 | mutex_lock(&fs_info->scrub_lock); | 2910 | mutex_lock(&fs_info->scrub_lock); |
| 2336 | dev->scrub_device = NULL; | 2911 | dev->scrub_device = NULL; |
| 2337 | mutex_unlock(&fs_info->scrub_lock); | 2912 | mutex_unlock(&fs_info->scrub_lock); |
| 2338 | 2913 | ||
| 2339 | scrub_free_dev(sdev); | 2914 | scrub_free_ctx(sctx); |
| 2340 | scrub_workers_put(root); | 2915 | scrub_workers_put(fs_info); |
| 2341 | 2916 | ||
| 2342 | return ret; | 2917 | return ret; |
| 2343 | } | 2918 | } |
| @@ -2377,9 +2952,8 @@ void btrfs_scrub_continue_super(struct btrfs_root *root) | |||
| 2377 | up_write(&root->fs_info->scrub_super_lock); | 2952 | up_write(&root->fs_info->scrub_super_lock); |
| 2378 | } | 2953 | } |
| 2379 | 2954 | ||
| 2380 | int __btrfs_scrub_cancel(struct btrfs_fs_info *fs_info) | 2955 | int btrfs_scrub_cancel(struct btrfs_fs_info *fs_info) |
| 2381 | { | 2956 | { |
| 2382 | |||
| 2383 | mutex_lock(&fs_info->scrub_lock); | 2957 | mutex_lock(&fs_info->scrub_lock); |
| 2384 | if (!atomic_read(&fs_info->scrubs_running)) { | 2958 | if (!atomic_read(&fs_info->scrubs_running)) { |
| 2385 | mutex_unlock(&fs_info->scrub_lock); | 2959 | mutex_unlock(&fs_info->scrub_lock); |
| @@ -2399,23 +2973,18 @@ int __btrfs_scrub_cancel(struct btrfs_fs_info *fs_info) | |||
| 2399 | return 0; | 2973 | return 0; |
| 2400 | } | 2974 | } |
| 2401 | 2975 | ||
| 2402 | int btrfs_scrub_cancel(struct btrfs_root *root) | 2976 | int btrfs_scrub_cancel_dev(struct btrfs_fs_info *fs_info, |
| 2977 | struct btrfs_device *dev) | ||
| 2403 | { | 2978 | { |
| 2404 | return __btrfs_scrub_cancel(root->fs_info); | 2979 | struct scrub_ctx *sctx; |
| 2405 | } | ||
| 2406 | |||
| 2407 | int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev) | ||
| 2408 | { | ||
| 2409 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 2410 | struct scrub_dev *sdev; | ||
| 2411 | 2980 | ||
| 2412 | mutex_lock(&fs_info->scrub_lock); | 2981 | mutex_lock(&fs_info->scrub_lock); |
| 2413 | sdev = dev->scrub_device; | 2982 | sctx = dev->scrub_device; |
| 2414 | if (!sdev) { | 2983 | if (!sctx) { |
| 2415 | mutex_unlock(&fs_info->scrub_lock); | 2984 | mutex_unlock(&fs_info->scrub_lock); |
| 2416 | return -ENOTCONN; | 2985 | return -ENOTCONN; |
| 2417 | } | 2986 | } |
| 2418 | atomic_inc(&sdev->cancel_req); | 2987 | atomic_inc(&sctx->cancel_req); |
| 2419 | while (dev->scrub_device) { | 2988 | while (dev->scrub_device) { |
| 2420 | mutex_unlock(&fs_info->scrub_lock); | 2989 | mutex_unlock(&fs_info->scrub_lock); |
| 2421 | wait_event(fs_info->scrub_pause_wait, | 2990 | wait_event(fs_info->scrub_pause_wait, |
| @@ -2438,12 +3007,12 @@ int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid) | |||
| 2438 | * does not go away in cancel_dev. FIXME: find a better solution | 3007 | * does not go away in cancel_dev. FIXME: find a better solution |
| 2439 | */ | 3008 | */ |
| 2440 | mutex_lock(&fs_info->fs_devices->device_list_mutex); | 3009 | mutex_lock(&fs_info->fs_devices->device_list_mutex); |
| 2441 | dev = btrfs_find_device(root, devid, NULL, NULL); | 3010 | dev = btrfs_find_device(fs_info, devid, NULL, NULL); |
| 2442 | if (!dev) { | 3011 | if (!dev) { |
| 2443 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); | 3012 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); |
| 2444 | return -ENODEV; | 3013 | return -ENODEV; |
| 2445 | } | 3014 | } |
| 2446 | ret = btrfs_scrub_cancel_dev(root, dev); | 3015 | ret = btrfs_scrub_cancel_dev(fs_info, dev); |
| 2447 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); | 3016 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); |
| 2448 | 3017 | ||
| 2449 | return ret; | 3018 | return ret; |
| @@ -2453,15 +3022,291 @@ int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, | |||
| 2453 | struct btrfs_scrub_progress *progress) | 3022 | struct btrfs_scrub_progress *progress) |
| 2454 | { | 3023 | { |
| 2455 | struct btrfs_device *dev; | 3024 | struct btrfs_device *dev; |
| 2456 | struct scrub_dev *sdev = NULL; | 3025 | struct scrub_ctx *sctx = NULL; |
| 2457 | 3026 | ||
| 2458 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 3027 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
| 2459 | dev = btrfs_find_device(root, devid, NULL, NULL); | 3028 | dev = btrfs_find_device(root->fs_info, devid, NULL, NULL); |
| 2460 | if (dev) | 3029 | if (dev) |
| 2461 | sdev = dev->scrub_device; | 3030 | sctx = dev->scrub_device; |
| 2462 | if (sdev) | 3031 | if (sctx) |
| 2463 | memcpy(progress, &sdev->stat, sizeof(*progress)); | 3032 | memcpy(progress, &sctx->stat, sizeof(*progress)); |
| 2464 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 3033 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); |
| 2465 | 3034 | ||
| 2466 | return dev ? (sdev ? 0 : -ENOTCONN) : -ENODEV; | 3035 | return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV; |
| 3036 | } | ||
| 3037 | |||
| 3038 | static void scrub_remap_extent(struct btrfs_fs_info *fs_info, | ||
| 3039 | u64 extent_logical, u64 extent_len, | ||
| 3040 | u64 *extent_physical, | ||
| 3041 | struct btrfs_device **extent_dev, | ||
| 3042 | int *extent_mirror_num) | ||
| 3043 | { | ||
| 3044 | u64 mapped_length; | ||
| 3045 | struct btrfs_bio *bbio = NULL; | ||
| 3046 | int ret; | ||
| 3047 | |||
| 3048 | mapped_length = extent_len; | ||
| 3049 | ret = btrfs_map_block(fs_info, READ, extent_logical, | ||
| 3050 | &mapped_length, &bbio, 0); | ||
| 3051 | if (ret || !bbio || mapped_length < extent_len || | ||
| 3052 | !bbio->stripes[0].dev->bdev) { | ||
| 3053 | kfree(bbio); | ||
| 3054 | return; | ||
| 3055 | } | ||
| 3056 | |||
| 3057 | *extent_physical = bbio->stripes[0].physical; | ||
| 3058 | *extent_mirror_num = bbio->mirror_num; | ||
| 3059 | *extent_dev = bbio->stripes[0].dev; | ||
| 3060 | kfree(bbio); | ||
| 3061 | } | ||
| 3062 | |||
| 3063 | static int scrub_setup_wr_ctx(struct scrub_ctx *sctx, | ||
| 3064 | struct scrub_wr_ctx *wr_ctx, | ||
| 3065 | struct btrfs_fs_info *fs_info, | ||
| 3066 | struct btrfs_device *dev, | ||
| 3067 | int is_dev_replace) | ||
| 3068 | { | ||
| 3069 | WARN_ON(wr_ctx->wr_curr_bio != NULL); | ||
| 3070 | |||
| 3071 | mutex_init(&wr_ctx->wr_lock); | ||
| 3072 | wr_ctx->wr_curr_bio = NULL; | ||
| 3073 | if (!is_dev_replace) | ||
| 3074 | return 0; | ||
| 3075 | |||
| 3076 | WARN_ON(!dev->bdev); | ||
| 3077 | wr_ctx->pages_per_wr_bio = min_t(int, SCRUB_PAGES_PER_WR_BIO, | ||
| 3078 | bio_get_nr_vecs(dev->bdev)); | ||
| 3079 | wr_ctx->tgtdev = dev; | ||
| 3080 | atomic_set(&wr_ctx->flush_all_writes, 0); | ||
| 3081 | return 0; | ||
| 3082 | } | ||
| 3083 | |||
| 3084 | static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx) | ||
| 3085 | { | ||
| 3086 | mutex_lock(&wr_ctx->wr_lock); | ||
| 3087 | kfree(wr_ctx->wr_curr_bio); | ||
| 3088 | wr_ctx->wr_curr_bio = NULL; | ||
| 3089 | mutex_unlock(&wr_ctx->wr_lock); | ||
| 3090 | } | ||
| 3091 | |||
| 3092 | static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, | ||
| 3093 | int mirror_num, u64 physical_for_dev_replace) | ||
| 3094 | { | ||
| 3095 | struct scrub_copy_nocow_ctx *nocow_ctx; | ||
| 3096 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; | ||
| 3097 | |||
| 3098 | nocow_ctx = kzalloc(sizeof(*nocow_ctx), GFP_NOFS); | ||
| 3099 | if (!nocow_ctx) { | ||
| 3100 | spin_lock(&sctx->stat_lock); | ||
| 3101 | sctx->stat.malloc_errors++; | ||
| 3102 | spin_unlock(&sctx->stat_lock); | ||
| 3103 | return -ENOMEM; | ||
| 3104 | } | ||
| 3105 | |||
| 3106 | scrub_pending_trans_workers_inc(sctx); | ||
| 3107 | |||
| 3108 | nocow_ctx->sctx = sctx; | ||
| 3109 | nocow_ctx->logical = logical; | ||
| 3110 | nocow_ctx->len = len; | ||
| 3111 | nocow_ctx->mirror_num = mirror_num; | ||
| 3112 | nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; | ||
| 3113 | nocow_ctx->work.func = copy_nocow_pages_worker; | ||
| 3114 | btrfs_queue_worker(&fs_info->scrub_nocow_workers, | ||
| 3115 | &nocow_ctx->work); | ||
| 3116 | |||
| 3117 | return 0; | ||
| 3118 | } | ||
| 3119 | |||
| 3120 | static void copy_nocow_pages_worker(struct btrfs_work *work) | ||
| 3121 | { | ||
| 3122 | struct scrub_copy_nocow_ctx *nocow_ctx = | ||
| 3123 | container_of(work, struct scrub_copy_nocow_ctx, work); | ||
| 3124 | struct scrub_ctx *sctx = nocow_ctx->sctx; | ||
| 3125 | u64 logical = nocow_ctx->logical; | ||
| 3126 | u64 len = nocow_ctx->len; | ||
| 3127 | int mirror_num = nocow_ctx->mirror_num; | ||
| 3128 | u64 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; | ||
| 3129 | int ret; | ||
| 3130 | struct btrfs_trans_handle *trans = NULL; | ||
| 3131 | struct btrfs_fs_info *fs_info; | ||
| 3132 | struct btrfs_path *path; | ||
| 3133 | struct btrfs_root *root; | ||
| 3134 | int not_written = 0; | ||
| 3135 | |||
| 3136 | fs_info = sctx->dev_root->fs_info; | ||
| 3137 | root = fs_info->extent_root; | ||
| 3138 | |||
| 3139 | path = btrfs_alloc_path(); | ||
| 3140 | if (!path) { | ||
| 3141 | spin_lock(&sctx->stat_lock); | ||
| 3142 | sctx->stat.malloc_errors++; | ||
| 3143 | spin_unlock(&sctx->stat_lock); | ||
| 3144 | not_written = 1; | ||
| 3145 | goto out; | ||
| 3146 | } | ||
| 3147 | |||
| 3148 | trans = btrfs_join_transaction(root); | ||
| 3149 | if (IS_ERR(trans)) { | ||
| 3150 | not_written = 1; | ||
| 3151 | goto out; | ||
| 3152 | } | ||
| 3153 | |||
| 3154 | ret = iterate_inodes_from_logical(logical, fs_info, path, | ||
| 3155 | copy_nocow_pages_for_inode, | ||
| 3156 | nocow_ctx); | ||
| 3157 | if (ret != 0 && ret != -ENOENT) { | ||
| 3158 | pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %llu, ret %d\n", | ||
| 3159 | (unsigned long long)logical, | ||
| 3160 | (unsigned long long)physical_for_dev_replace, | ||
| 3161 | (unsigned long long)len, | ||
| 3162 | (unsigned long long)mirror_num, ret); | ||
| 3163 | not_written = 1; | ||
| 3164 | goto out; | ||
| 3165 | } | ||
| 3166 | |||
| 3167 | out: | ||
| 3168 | if (trans && !IS_ERR(trans)) | ||
| 3169 | btrfs_end_transaction(trans, root); | ||
| 3170 | if (not_written) | ||
| 3171 | btrfs_dev_replace_stats_inc(&fs_info->dev_replace. | ||
| 3172 | num_uncorrectable_read_errors); | ||
| 3173 | |||
| 3174 | btrfs_free_path(path); | ||
| 3175 | kfree(nocow_ctx); | ||
| 3176 | |||
| 3177 | scrub_pending_trans_workers_dec(sctx); | ||
| 3178 | } | ||
| 3179 | |||
| 3180 | static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) | ||
| 3181 | { | ||
| 3182 | unsigned long index; | ||
| 3183 | struct scrub_copy_nocow_ctx *nocow_ctx = ctx; | ||
| 3184 | int ret = 0; | ||
| 3185 | struct btrfs_key key; | ||
| 3186 | struct inode *inode = NULL; | ||
| 3187 | struct btrfs_root *local_root; | ||
| 3188 | u64 physical_for_dev_replace; | ||
| 3189 | u64 len; | ||
| 3190 | struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info; | ||
| 3191 | int srcu_index; | ||
| 3192 | |||
| 3193 | key.objectid = root; | ||
| 3194 | key.type = BTRFS_ROOT_ITEM_KEY; | ||
| 3195 | key.offset = (u64)-1; | ||
| 3196 | |||
| 3197 | srcu_index = srcu_read_lock(&fs_info->subvol_srcu); | ||
| 3198 | |||
| 3199 | local_root = btrfs_read_fs_root_no_name(fs_info, &key); | ||
| 3200 | if (IS_ERR(local_root)) { | ||
| 3201 | srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); | ||
| 3202 | return PTR_ERR(local_root); | ||
| 3203 | } | ||
| 3204 | |||
| 3205 | key.type = BTRFS_INODE_ITEM_KEY; | ||
| 3206 | key.objectid = inum; | ||
| 3207 | key.offset = 0; | ||
| 3208 | inode = btrfs_iget(fs_info->sb, &key, local_root, NULL); | ||
| 3209 | srcu_read_unlock(&fs_info->subvol_srcu, srcu_index); | ||
| 3210 | if (IS_ERR(inode)) | ||
| 3211 | return PTR_ERR(inode); | ||
| 3212 | |||
| 3213 | physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; | ||
| 3214 | len = nocow_ctx->len; | ||
| 3215 | while (len >= PAGE_CACHE_SIZE) { | ||
| 3216 | struct page *page = NULL; | ||
| 3217 | int ret_sub; | ||
| 3218 | |||
| 3219 | index = offset >> PAGE_CACHE_SHIFT; | ||
| 3220 | |||
| 3221 | page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); | ||
| 3222 | if (!page) { | ||
| 3223 | pr_err("find_or_create_page() failed\n"); | ||
| 3224 | ret = -ENOMEM; | ||
| 3225 | goto next_page; | ||
| 3226 | } | ||
| 3227 | |||
| 3228 | if (PageUptodate(page)) { | ||
| 3229 | if (PageDirty(page)) | ||
| 3230 | goto next_page; | ||
| 3231 | } else { | ||
| 3232 | ClearPageError(page); | ||
| 3233 | ret_sub = extent_read_full_page(&BTRFS_I(inode)-> | ||
| 3234 | io_tree, | ||
| 3235 | page, btrfs_get_extent, | ||
| 3236 | nocow_ctx->mirror_num); | ||
| 3237 | if (ret_sub) { | ||
| 3238 | ret = ret_sub; | ||
| 3239 | goto next_page; | ||
| 3240 | } | ||
| 3241 | wait_on_page_locked(page); | ||
| 3242 | if (!PageUptodate(page)) { | ||
| 3243 | ret = -EIO; | ||
| 3244 | goto next_page; | ||
| 3245 | } | ||
| 3246 | } | ||
| 3247 | ret_sub = write_page_nocow(nocow_ctx->sctx, | ||
| 3248 | physical_for_dev_replace, page); | ||
| 3249 | if (ret_sub) { | ||
| 3250 | ret = ret_sub; | ||
| 3251 | goto next_page; | ||
| 3252 | } | ||
| 3253 | |||
| 3254 | next_page: | ||
| 3255 | if (page) { | ||
| 3256 | unlock_page(page); | ||
| 3257 | put_page(page); | ||
| 3258 | } | ||
| 3259 | offset += PAGE_CACHE_SIZE; | ||
| 3260 | physical_for_dev_replace += PAGE_CACHE_SIZE; | ||
| 3261 | len -= PAGE_CACHE_SIZE; | ||
| 3262 | } | ||
| 3263 | |||
| 3264 | if (inode) | ||
| 3265 | iput(inode); | ||
| 3266 | return ret; | ||
| 3267 | } | ||
| 3268 | |||
| 3269 | static int write_page_nocow(struct scrub_ctx *sctx, | ||
| 3270 | u64 physical_for_dev_replace, struct page *page) | ||
| 3271 | { | ||
| 3272 | struct bio *bio; | ||
| 3273 | struct btrfs_device *dev; | ||
| 3274 | int ret; | ||
| 3275 | DECLARE_COMPLETION_ONSTACK(compl); | ||
| 3276 | |||
| 3277 | dev = sctx->wr_ctx.tgtdev; | ||
| 3278 | if (!dev) | ||
| 3279 | return -EIO; | ||
| 3280 | if (!dev->bdev) { | ||
| 3281 | printk_ratelimited(KERN_WARNING | ||
| 3282 | "btrfs: scrub write_page_nocow(bdev == NULL) is unexpected!\n"); | ||
| 3283 | return -EIO; | ||
| 3284 | } | ||
| 3285 | bio = bio_alloc(GFP_NOFS, 1); | ||
| 3286 | if (!bio) { | ||
| 3287 | spin_lock(&sctx->stat_lock); | ||
| 3288 | sctx->stat.malloc_errors++; | ||
| 3289 | spin_unlock(&sctx->stat_lock); | ||
| 3290 | return -ENOMEM; | ||
| 3291 | } | ||
| 3292 | bio->bi_private = &compl; | ||
| 3293 | bio->bi_end_io = scrub_complete_bio_end_io; | ||
| 3294 | bio->bi_size = 0; | ||
| 3295 | bio->bi_sector = physical_for_dev_replace >> 9; | ||
| 3296 | bio->bi_bdev = dev->bdev; | ||
| 3297 | ret = bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); | ||
| 3298 | if (ret != PAGE_CACHE_SIZE) { | ||
| 3299 | leave_with_eio: | ||
| 3300 | bio_put(bio); | ||
| 3301 | btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); | ||
| 3302 | return -EIO; | ||
| 3303 | } | ||
| 3304 | btrfsic_submit_bio(WRITE_SYNC, bio); | ||
| 3305 | wait_for_completion(&compl); | ||
| 3306 | |||
| 3307 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | ||
| 3308 | goto leave_with_eio; | ||
| 3309 | |||
| 3310 | bio_put(bio); | ||
| 3311 | return 0; | ||
| 2467 | } | 3312 | } |
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index e78b297b0b00..321b7fb4e441 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
| @@ -1814,8 +1814,10 @@ static int name_cache_insert(struct send_ctx *sctx, | |||
| 1814 | (unsigned long)nce->ino); | 1814 | (unsigned long)nce->ino); |
| 1815 | if (!nce_head) { | 1815 | if (!nce_head) { |
| 1816 | nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS); | 1816 | nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS); |
| 1817 | if (!nce_head) | 1817 | if (!nce_head) { |
| 1818 | kfree(nce); | ||
| 1818 | return -ENOMEM; | 1819 | return -ENOMEM; |
| 1820 | } | ||
| 1819 | INIT_LIST_HEAD(nce_head); | 1821 | INIT_LIST_HEAD(nce_head); |
| 1820 | 1822 | ||
| 1821 | ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head); | 1823 | ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head); |
| @@ -4397,9 +4399,9 @@ static int full_send_tree(struct send_ctx *sctx) | |||
| 4397 | if (!path) | 4399 | if (!path) |
| 4398 | return -ENOMEM; | 4400 | return -ENOMEM; |
| 4399 | 4401 | ||
| 4400 | spin_lock(&send_root->root_times_lock); | 4402 | spin_lock(&send_root->root_item_lock); |
| 4401 | start_ctransid = btrfs_root_ctransid(&send_root->root_item); | 4403 | start_ctransid = btrfs_root_ctransid(&send_root->root_item); |
| 4402 | spin_unlock(&send_root->root_times_lock); | 4404 | spin_unlock(&send_root->root_item_lock); |
| 4403 | 4405 | ||
| 4404 | key.objectid = BTRFS_FIRST_FREE_OBJECTID; | 4406 | key.objectid = BTRFS_FIRST_FREE_OBJECTID; |
| 4405 | key.type = BTRFS_INODE_ITEM_KEY; | 4407 | key.type = BTRFS_INODE_ITEM_KEY; |
| @@ -4422,9 +4424,9 @@ join_trans: | |||
| 4422 | * Make sure the tree has not changed after re-joining. We detect this | 4424 | * Make sure the tree has not changed after re-joining. We detect this |
| 4423 | * by comparing start_ctransid and ctransid. They should always match. | 4425 | * by comparing start_ctransid and ctransid. They should always match. |
| 4424 | */ | 4426 | */ |
| 4425 | spin_lock(&send_root->root_times_lock); | 4427 | spin_lock(&send_root->root_item_lock); |
| 4426 | ctransid = btrfs_root_ctransid(&send_root->root_item); | 4428 | ctransid = btrfs_root_ctransid(&send_root->root_item); |
| 4427 | spin_unlock(&send_root->root_times_lock); | 4429 | spin_unlock(&send_root->root_item_lock); |
| 4428 | 4430 | ||
| 4429 | if (ctransid != start_ctransid) { | 4431 | if (ctransid != start_ctransid) { |
| 4430 | WARN(1, KERN_WARNING "btrfs: the root that you're trying to " | 4432 | WARN(1, KERN_WARNING "btrfs: the root that you're trying to " |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 915ac14c2064..d8982e9601d3 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
| @@ -55,6 +55,7 @@ | |||
| 55 | #include "export.h" | 55 | #include "export.h" |
| 56 | #include "compression.h" | 56 | #include "compression.h" |
| 57 | #include "rcu-string.h" | 57 | #include "rcu-string.h" |
| 58 | #include "dev-replace.h" | ||
| 58 | 59 | ||
| 59 | #define CREATE_TRACE_POINTS | 60 | #define CREATE_TRACE_POINTS |
| 60 | #include <trace/events/btrfs.h> | 61 | #include <trace/events/btrfs.h> |
| @@ -116,7 +117,16 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info) | |||
| 116 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | 117 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { |
| 117 | sb->s_flags |= MS_RDONLY; | 118 | sb->s_flags |= MS_RDONLY; |
| 118 | printk(KERN_INFO "btrfs is forced readonly\n"); | 119 | printk(KERN_INFO "btrfs is forced readonly\n"); |
| 119 | __btrfs_scrub_cancel(fs_info); | 120 | /* |
| 121 | * Note that a running device replace operation is not | ||
| 122 | * canceled here although there is no way to update | ||
| 123 | * the progress. It would add the risk of a deadlock, | ||
| 124 | * therefore the canceling is ommited. The only penalty | ||
| 125 | * is that some I/O remains active until the procedure | ||
| 126 | * completes. The next time when the filesystem is | ||
| 127 | * mounted writeable again, the device replace | ||
| 128 | * operation continues. | ||
| 129 | */ | ||
| 120 | // WARN_ON(1); | 130 | // WARN_ON(1); |
| 121 | } | 131 | } |
| 122 | } | 132 | } |
| @@ -257,7 +267,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, | |||
| 257 | function, line, errstr); | 267 | function, line, errstr); |
| 258 | return; | 268 | return; |
| 259 | } | 269 | } |
| 260 | trans->transaction->aborted = errno; | 270 | ACCESS_ONCE(trans->transaction->aborted) = errno; |
| 261 | __btrfs_std_error(root->fs_info, function, line, errno, NULL); | 271 | __btrfs_std_error(root->fs_info, function, line, errno, NULL); |
| 262 | } | 272 | } |
| 263 | /* | 273 | /* |
| @@ -1186,7 +1196,8 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, | |||
| 1186 | btrfs_set_max_workers(&fs_info->endio_freespace_worker, new_pool_size); | 1196 | btrfs_set_max_workers(&fs_info->endio_freespace_worker, new_pool_size); |
| 1187 | btrfs_set_max_workers(&fs_info->delayed_workers, new_pool_size); | 1197 | btrfs_set_max_workers(&fs_info->delayed_workers, new_pool_size); |
| 1188 | btrfs_set_max_workers(&fs_info->readahead_workers, new_pool_size); | 1198 | btrfs_set_max_workers(&fs_info->readahead_workers, new_pool_size); |
| 1189 | btrfs_set_max_workers(&fs_info->scrub_workers, new_pool_size); | 1199 | btrfs_set_max_workers(&fs_info->scrub_wr_completion_workers, |
| 1200 | new_pool_size); | ||
| 1190 | } | 1201 | } |
| 1191 | 1202 | ||
| 1192 | static int btrfs_remount(struct super_block *sb, int *flags, char *data) | 1203 | static int btrfs_remount(struct super_block *sb, int *flags, char *data) |
| @@ -1215,8 +1226,15 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
| 1215 | return 0; | 1226 | return 0; |
| 1216 | 1227 | ||
| 1217 | if (*flags & MS_RDONLY) { | 1228 | if (*flags & MS_RDONLY) { |
| 1229 | /* | ||
| 1230 | * this also happens on 'umount -rf' or on shutdown, when | ||
| 1231 | * the filesystem is busy. | ||
| 1232 | */ | ||
| 1218 | sb->s_flags |= MS_RDONLY; | 1233 | sb->s_flags |= MS_RDONLY; |
| 1219 | 1234 | ||
| 1235 | btrfs_dev_replace_suspend_for_unmount(fs_info); | ||
| 1236 | btrfs_scrub_cancel(fs_info); | ||
| 1237 | |||
| 1220 | ret = btrfs_commit_super(root); | 1238 | ret = btrfs_commit_super(root); |
| 1221 | if (ret) | 1239 | if (ret) |
| 1222 | goto restore; | 1240 | goto restore; |
| @@ -1226,6 +1244,15 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
| 1226 | goto restore; | 1244 | goto restore; |
| 1227 | } | 1245 | } |
| 1228 | 1246 | ||
| 1247 | if (fs_info->fs_devices->missing_devices > | ||
| 1248 | fs_info->num_tolerated_disk_barrier_failures && | ||
| 1249 | !(*flags & MS_RDONLY)) { | ||
| 1250 | printk(KERN_WARNING | ||
| 1251 | "Btrfs: too many missing devices, writeable remount is not allowed\n"); | ||
| 1252 | ret = -EACCES; | ||
| 1253 | goto restore; | ||
| 1254 | } | ||
| 1255 | |||
| 1229 | if (btrfs_super_log_root(fs_info->super_copy) != 0) { | 1256 | if (btrfs_super_log_root(fs_info->super_copy) != 0) { |
| 1230 | ret = -EINVAL; | 1257 | ret = -EINVAL; |
| 1231 | goto restore; | 1258 | goto restore; |
| @@ -1244,6 +1271,11 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
| 1244 | if (ret) | 1271 | if (ret) |
| 1245 | goto restore; | 1272 | goto restore; |
| 1246 | 1273 | ||
| 1274 | ret = btrfs_resume_dev_replace_async(fs_info); | ||
| 1275 | if (ret) { | ||
| 1276 | pr_warn("btrfs: failed to resume dev_replace\n"); | ||
| 1277 | goto restore; | ||
| 1278 | } | ||
| 1247 | sb->s_flags &= ~MS_RDONLY; | 1279 | sb->s_flags &= ~MS_RDONLY; |
| 1248 | } | 1280 | } |
| 1249 | 1281 | ||
| @@ -1336,7 +1368,8 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes) | |||
| 1336 | min_stripe_size = BTRFS_STRIPE_LEN; | 1368 | min_stripe_size = BTRFS_STRIPE_LEN; |
| 1337 | 1369 | ||
| 1338 | list_for_each_entry(device, &fs_devices->devices, dev_list) { | 1370 | list_for_each_entry(device, &fs_devices->devices, dev_list) { |
| 1339 | if (!device->in_fs_metadata || !device->bdev) | 1371 | if (!device->in_fs_metadata || !device->bdev || |
| 1372 | device->is_tgtdev_for_dev_replace) | ||
| 1340 | continue; | 1373 | continue; |
| 1341 | 1374 | ||
| 1342 | avail_space = device->total_bytes - device->bytes_used; | 1375 | avail_space = device->total_bytes - device->bytes_used; |
| @@ -1647,10 +1680,14 @@ static int __init init_btrfs_fs(void) | |||
| 1647 | if (err) | 1680 | if (err) |
| 1648 | goto free_ordered_data; | 1681 | goto free_ordered_data; |
| 1649 | 1682 | ||
| 1650 | err = btrfs_interface_init(); | 1683 | err = btrfs_auto_defrag_init(); |
| 1651 | if (err) | 1684 | if (err) |
| 1652 | goto free_delayed_inode; | 1685 | goto free_delayed_inode; |
| 1653 | 1686 | ||
| 1687 | err = btrfs_interface_init(); | ||
| 1688 | if (err) | ||
| 1689 | goto free_auto_defrag; | ||
| 1690 | |||
| 1654 | err = register_filesystem(&btrfs_fs_type); | 1691 | err = register_filesystem(&btrfs_fs_type); |
| 1655 | if (err) | 1692 | if (err) |
| 1656 | goto unregister_ioctl; | 1693 | goto unregister_ioctl; |
| @@ -1662,6 +1699,8 @@ static int __init init_btrfs_fs(void) | |||
| 1662 | 1699 | ||
| 1663 | unregister_ioctl: | 1700 | unregister_ioctl: |
| 1664 | btrfs_interface_exit(); | 1701 | btrfs_interface_exit(); |
| 1702 | free_auto_defrag: | ||
| 1703 | btrfs_auto_defrag_exit(); | ||
| 1665 | free_delayed_inode: | 1704 | free_delayed_inode: |
| 1666 | btrfs_delayed_inode_exit(); | 1705 | btrfs_delayed_inode_exit(); |
| 1667 | free_ordered_data: | 1706 | free_ordered_data: |
| @@ -1681,6 +1720,7 @@ free_compress: | |||
| 1681 | static void __exit exit_btrfs_fs(void) | 1720 | static void __exit exit_btrfs_fs(void) |
| 1682 | { | 1721 | { |
| 1683 | btrfs_destroy_cachep(); | 1722 | btrfs_destroy_cachep(); |
| 1723 | btrfs_auto_defrag_exit(); | ||
| 1684 | btrfs_delayed_inode_exit(); | 1724 | btrfs_delayed_inode_exit(); |
| 1685 | ordered_data_exit(); | 1725 | ordered_data_exit(); |
| 1686 | extent_map_exit(); | 1726 | extent_map_exit(); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 04bbfb1052eb..fc03aa60b684 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
| @@ -30,6 +30,7 @@ | |||
| 30 | #include "tree-log.h" | 30 | #include "tree-log.h" |
| 31 | #include "inode-map.h" | 31 | #include "inode-map.h" |
| 32 | #include "volumes.h" | 32 | #include "volumes.h" |
| 33 | #include "dev-replace.h" | ||
| 33 | 34 | ||
| 34 | #define BTRFS_ROOT_TRANS_TAG 0 | 35 | #define BTRFS_ROOT_TRANS_TAG 0 |
| 35 | 36 | ||
| @@ -145,16 +146,12 @@ loop: | |||
| 145 | * the log must never go across transaction boundaries. | 146 | * the log must never go across transaction boundaries. |
| 146 | */ | 147 | */ |
| 147 | smp_mb(); | 148 | smp_mb(); |
| 148 | if (!list_empty(&fs_info->tree_mod_seq_list)) { | 149 | if (!list_empty(&fs_info->tree_mod_seq_list)) |
| 149 | printk(KERN_ERR "btrfs: tree_mod_seq_list not empty when " | 150 | WARN(1, KERN_ERR "btrfs: tree_mod_seq_list not empty when " |
| 150 | "creating a fresh transaction\n"); | 151 | "creating a fresh transaction\n"); |
| 151 | WARN_ON(1); | 152 | if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log)) |
| 152 | } | 153 | WARN(1, KERN_ERR "btrfs: tree_mod_log rb tree not empty when " |
| 153 | if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log)) { | ||
| 154 | printk(KERN_ERR "btrfs: tree_mod_log rb tree not empty when " | ||
| 155 | "creating a fresh transaction\n"); | 154 | "creating a fresh transaction\n"); |
| 156 | WARN_ON(1); | ||
| 157 | } | ||
| 158 | atomic_set(&fs_info->tree_mod_seq, 0); | 155 | atomic_set(&fs_info->tree_mod_seq, 0); |
| 159 | 156 | ||
| 160 | spin_lock_init(&cur_trans->commit_lock); | 157 | spin_lock_init(&cur_trans->commit_lock); |
| @@ -295,9 +292,9 @@ static int may_wait_transaction(struct btrfs_root *root, int type) | |||
| 295 | return 0; | 292 | return 0; |
| 296 | } | 293 | } |
| 297 | 294 | ||
| 298 | static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | 295 | static struct btrfs_trans_handle * |
| 299 | u64 num_items, int type, | 296 | start_transaction(struct btrfs_root *root, u64 num_items, int type, |
| 300 | int noflush) | 297 | enum btrfs_reserve_flush_enum flush) |
| 301 | { | 298 | { |
| 302 | struct btrfs_trans_handle *h; | 299 | struct btrfs_trans_handle *h; |
| 303 | struct btrfs_transaction *cur_trans; | 300 | struct btrfs_transaction *cur_trans; |
| @@ -312,6 +309,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
| 312 | WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK); | 309 | WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK); |
| 313 | h = current->journal_info; | 310 | h = current->journal_info; |
| 314 | h->use_count++; | 311 | h->use_count++; |
| 312 | WARN_ON(h->use_count > 2); | ||
| 315 | h->orig_rsv = h->block_rsv; | 313 | h->orig_rsv = h->block_rsv; |
| 316 | h->block_rsv = NULL; | 314 | h->block_rsv = NULL; |
| 317 | goto got_it; | 315 | goto got_it; |
| @@ -331,21 +329,18 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
| 331 | } | 329 | } |
| 332 | 330 | ||
| 333 | num_bytes = btrfs_calc_trans_metadata_size(root, num_items); | 331 | num_bytes = btrfs_calc_trans_metadata_size(root, num_items); |
| 334 | if (noflush) | 332 | ret = btrfs_block_rsv_add(root, |
| 335 | ret = btrfs_block_rsv_add_noflush(root, | 333 | &root->fs_info->trans_block_rsv, |
| 336 | &root->fs_info->trans_block_rsv, | 334 | num_bytes, flush); |
| 337 | num_bytes); | ||
| 338 | else | ||
| 339 | ret = btrfs_block_rsv_add(root, | ||
| 340 | &root->fs_info->trans_block_rsv, | ||
| 341 | num_bytes); | ||
| 342 | if (ret) | 335 | if (ret) |
| 343 | return ERR_PTR(ret); | 336 | goto reserve_fail; |
| 344 | } | 337 | } |
| 345 | again: | 338 | again: |
| 346 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | 339 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); |
| 347 | if (!h) | 340 | if (!h) { |
| 348 | return ERR_PTR(-ENOMEM); | 341 | ret = -ENOMEM; |
| 342 | goto alloc_fail; | ||
| 343 | } | ||
| 349 | 344 | ||
| 350 | /* | 345 | /* |
| 351 | * If we are JOIN_NOLOCK we're already committing a transaction and | 346 | * If we are JOIN_NOLOCK we're already committing a transaction and |
| @@ -372,11 +367,7 @@ again: | |||
| 372 | if (ret < 0) { | 367 | if (ret < 0) { |
| 373 | /* We must get the transaction if we are JOIN_NOLOCK. */ | 368 | /* We must get the transaction if we are JOIN_NOLOCK. */ |
| 374 | BUG_ON(type == TRANS_JOIN_NOLOCK); | 369 | BUG_ON(type == TRANS_JOIN_NOLOCK); |
| 375 | 370 | goto join_fail; | |
| 376 | if (type < TRANS_JOIN_NOLOCK) | ||
| 377 | sb_end_intwrite(root->fs_info->sb); | ||
| 378 | kmem_cache_free(btrfs_trans_handle_cachep, h); | ||
| 379 | return ERR_PTR(ret); | ||
| 380 | } | 371 | } |
| 381 | 372 | ||
| 382 | cur_trans = root->fs_info->running_transaction; | 373 | cur_trans = root->fs_info->running_transaction; |
| @@ -417,18 +408,33 @@ got_it: | |||
| 417 | if (!current->journal_info && type != TRANS_USERSPACE) | 408 | if (!current->journal_info && type != TRANS_USERSPACE) |
| 418 | current->journal_info = h; | 409 | current->journal_info = h; |
| 419 | return h; | 410 | return h; |
| 411 | |||
| 412 | join_fail: | ||
| 413 | if (type < TRANS_JOIN_NOLOCK) | ||
| 414 | sb_end_intwrite(root->fs_info->sb); | ||
| 415 | kmem_cache_free(btrfs_trans_handle_cachep, h); | ||
| 416 | alloc_fail: | ||
| 417 | if (num_bytes) | ||
| 418 | btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv, | ||
| 419 | num_bytes); | ||
| 420 | reserve_fail: | ||
| 421 | if (qgroup_reserved) | ||
| 422 | btrfs_qgroup_free(root, qgroup_reserved); | ||
| 423 | return ERR_PTR(ret); | ||
| 420 | } | 424 | } |
| 421 | 425 | ||
| 422 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | 426 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, |
| 423 | int num_items) | 427 | int num_items) |
| 424 | { | 428 | { |
| 425 | return start_transaction(root, num_items, TRANS_START, 0); | 429 | return start_transaction(root, num_items, TRANS_START, |
| 430 | BTRFS_RESERVE_FLUSH_ALL); | ||
| 426 | } | 431 | } |
| 427 | 432 | ||
| 428 | struct btrfs_trans_handle *btrfs_start_transaction_noflush( | 433 | struct btrfs_trans_handle *btrfs_start_transaction_lflush( |
| 429 | struct btrfs_root *root, int num_items) | 434 | struct btrfs_root *root, int num_items) |
| 430 | { | 435 | { |
| 431 | return start_transaction(root, num_items, TRANS_START, 1); | 436 | return start_transaction(root, num_items, TRANS_START, |
| 437 | BTRFS_RESERVE_FLUSH_LIMIT); | ||
| 432 | } | 438 | } |
| 433 | 439 | ||
| 434 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root) | 440 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root) |
| @@ -461,28 +467,31 @@ static noinline void wait_for_commit(struct btrfs_root *root, | |||
| 461 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) | 467 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) |
| 462 | { | 468 | { |
| 463 | struct btrfs_transaction *cur_trans = NULL, *t; | 469 | struct btrfs_transaction *cur_trans = NULL, *t; |
| 464 | int ret; | 470 | int ret = 0; |
| 465 | 471 | ||
| 466 | ret = 0; | ||
| 467 | if (transid) { | 472 | if (transid) { |
| 468 | if (transid <= root->fs_info->last_trans_committed) | 473 | if (transid <= root->fs_info->last_trans_committed) |
| 469 | goto out; | 474 | goto out; |
| 470 | 475 | ||
| 476 | ret = -EINVAL; | ||
| 471 | /* find specified transaction */ | 477 | /* find specified transaction */ |
| 472 | spin_lock(&root->fs_info->trans_lock); | 478 | spin_lock(&root->fs_info->trans_lock); |
| 473 | list_for_each_entry(t, &root->fs_info->trans_list, list) { | 479 | list_for_each_entry(t, &root->fs_info->trans_list, list) { |
| 474 | if (t->transid == transid) { | 480 | if (t->transid == transid) { |
| 475 | cur_trans = t; | 481 | cur_trans = t; |
| 476 | atomic_inc(&cur_trans->use_count); | 482 | atomic_inc(&cur_trans->use_count); |
| 483 | ret = 0; | ||
| 477 | break; | 484 | break; |
| 478 | } | 485 | } |
| 479 | if (t->transid > transid) | 486 | if (t->transid > transid) { |
| 487 | ret = 0; | ||
| 480 | break; | 488 | break; |
| 489 | } | ||
| 481 | } | 490 | } |
| 482 | spin_unlock(&root->fs_info->trans_lock); | 491 | spin_unlock(&root->fs_info->trans_lock); |
| 483 | ret = -EINVAL; | 492 | /* The specified transaction doesn't exist */ |
| 484 | if (!cur_trans) | 493 | if (!cur_trans) |
| 485 | goto out; /* bad transid */ | 494 | goto out; |
| 486 | } else { | 495 | } else { |
| 487 | /* find newest transaction that is committing | committed */ | 496 | /* find newest transaction that is committing | committed */ |
| 488 | spin_lock(&root->fs_info->trans_lock); | 497 | spin_lock(&root->fs_info->trans_lock); |
| @@ -502,9 +511,7 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) | |||
| 502 | } | 511 | } |
| 503 | 512 | ||
| 504 | wait_for_commit(root, cur_trans); | 513 | wait_for_commit(root, cur_trans); |
| 505 | |||
| 506 | put_transaction(cur_trans); | 514 | put_transaction(cur_trans); |
| 507 | ret = 0; | ||
| 508 | out: | 515 | out: |
| 509 | return ret; | 516 | return ret; |
| 510 | } | 517 | } |
| @@ -851,7 +858,9 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, | |||
| 851 | return ret; | 858 | return ret; |
| 852 | 859 | ||
| 853 | ret = btrfs_run_dev_stats(trans, root->fs_info); | 860 | ret = btrfs_run_dev_stats(trans, root->fs_info); |
| 854 | BUG_ON(ret); | 861 | WARN_ON(ret); |
| 862 | ret = btrfs_run_dev_replace(trans, root->fs_info); | ||
| 863 | WARN_ON(ret); | ||
| 855 | 864 | ||
| 856 | ret = btrfs_run_qgroups(trans, root->fs_info); | 865 | ret = btrfs_run_qgroups(trans, root->fs_info); |
| 857 | BUG_ON(ret); | 866 | BUG_ON(ret); |
| @@ -874,6 +883,8 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, | |||
| 874 | switch_commit_root(fs_info->extent_root); | 883 | switch_commit_root(fs_info->extent_root); |
| 875 | up_write(&fs_info->extent_commit_sem); | 884 | up_write(&fs_info->extent_commit_sem); |
| 876 | 885 | ||
| 886 | btrfs_after_dev_replace_commit(fs_info); | ||
| 887 | |||
| 877 | return 0; | 888 | return 0; |
| 878 | } | 889 | } |
| 879 | 890 | ||
| @@ -958,7 +969,6 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) | |||
| 958 | struct btrfs_fs_info *info = root->fs_info; | 969 | struct btrfs_fs_info *info = root->fs_info; |
| 959 | struct btrfs_trans_handle *trans; | 970 | struct btrfs_trans_handle *trans; |
| 960 | int ret; | 971 | int ret; |
| 961 | unsigned long nr; | ||
| 962 | 972 | ||
| 963 | if (xchg(&root->defrag_running, 1)) | 973 | if (xchg(&root->defrag_running, 1)) |
| 964 | return 0; | 974 | return 0; |
| @@ -970,9 +980,8 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) | |||
| 970 | 980 | ||
| 971 | ret = btrfs_defrag_leaves(trans, root, cacheonly); | 981 | ret = btrfs_defrag_leaves(trans, root, cacheonly); |
| 972 | 982 | ||
| 973 | nr = trans->blocks_used; | ||
| 974 | btrfs_end_transaction(trans, root); | 983 | btrfs_end_transaction(trans, root); |
| 975 | btrfs_btree_balance_dirty(info->tree_root, nr); | 984 | btrfs_btree_balance_dirty(info->tree_root); |
| 976 | cond_resched(); | 985 | cond_resched(); |
| 977 | 986 | ||
| 978 | if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN) | 987 | if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN) |
| @@ -1032,8 +1041,9 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 1032 | btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); | 1041 | btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); |
| 1033 | 1042 | ||
| 1034 | if (to_reserve > 0) { | 1043 | if (to_reserve > 0) { |
| 1035 | ret = btrfs_block_rsv_add_noflush(root, &pending->block_rsv, | 1044 | ret = btrfs_block_rsv_add(root, &pending->block_rsv, |
| 1036 | to_reserve); | 1045 | to_reserve, |
| 1046 | BTRFS_RESERVE_NO_FLUSH); | ||
| 1037 | if (ret) { | 1047 | if (ret) { |
| 1038 | pending->error = ret; | 1048 | pending->error = ret; |
| 1039 | goto no_free_objectid; | 1049 | goto no_free_objectid; |
| @@ -1191,7 +1201,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 1191 | parent_inode, &key, | 1201 | parent_inode, &key, |
| 1192 | BTRFS_FT_DIR, index); | 1202 | BTRFS_FT_DIR, index); |
| 1193 | /* We have check then name at the beginning, so it is impossible. */ | 1203 | /* We have check then name at the beginning, so it is impossible. */ |
| 1194 | BUG_ON(ret == -EEXIST); | 1204 | BUG_ON(ret == -EEXIST || ret == -EOVERFLOW); |
| 1195 | if (ret) { | 1205 | if (ret) { |
| 1196 | btrfs_abort_transaction(trans, root, ret); | 1206 | btrfs_abort_transaction(trans, root, ret); |
| 1197 | goto fail; | 1207 | goto fail; |
| @@ -1309,9 +1319,10 @@ static void do_async_commit(struct work_struct *work) | |||
| 1309 | * We've got freeze protection passed with the transaction. | 1319 | * We've got freeze protection passed with the transaction. |
| 1310 | * Tell lockdep about it. | 1320 | * Tell lockdep about it. |
| 1311 | */ | 1321 | */ |
| 1312 | rwsem_acquire_read( | 1322 | if (ac->newtrans->type < TRANS_JOIN_NOLOCK) |
| 1313 | &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], | 1323 | rwsem_acquire_read( |
| 1314 | 0, 1, _THIS_IP_); | 1324 | &ac->root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], |
| 1325 | 0, 1, _THIS_IP_); | ||
| 1315 | 1326 | ||
| 1316 | current->journal_info = ac->newtrans; | 1327 | current->journal_info = ac->newtrans; |
| 1317 | 1328 | ||
| @@ -1349,8 +1360,10 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, | |||
| 1349 | * Tell lockdep we've released the freeze rwsem, since the | 1360 | * Tell lockdep we've released the freeze rwsem, since the |
| 1350 | * async commit thread will be the one to unlock it. | 1361 | * async commit thread will be the one to unlock it. |
| 1351 | */ | 1362 | */ |
| 1352 | rwsem_release(&root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], | 1363 | if (trans->type < TRANS_JOIN_NOLOCK) |
| 1353 | 1, _THIS_IP_); | 1364 | rwsem_release( |
| 1365 | &root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], | ||
| 1366 | 1, _THIS_IP_); | ||
| 1354 | 1367 | ||
| 1355 | schedule_delayed_work(&ac->work, 0); | 1368 | schedule_delayed_work(&ac->work, 0); |
| 1356 | 1369 | ||
| @@ -1400,6 +1413,48 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, | |||
| 1400 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 1413 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
| 1401 | } | 1414 | } |
| 1402 | 1415 | ||
| 1416 | static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, | ||
| 1417 | struct btrfs_root *root) | ||
| 1418 | { | ||
| 1419 | int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT); | ||
| 1420 | int snap_pending = 0; | ||
| 1421 | int ret; | ||
| 1422 | |||
| 1423 | if (!flush_on_commit) { | ||
| 1424 | spin_lock(&root->fs_info->trans_lock); | ||
| 1425 | if (!list_empty(&trans->transaction->pending_snapshots)) | ||
| 1426 | snap_pending = 1; | ||
| 1427 | spin_unlock(&root->fs_info->trans_lock); | ||
| 1428 | } | ||
| 1429 | |||
| 1430 | if (flush_on_commit || snap_pending) { | ||
| 1431 | btrfs_start_delalloc_inodes(root, 1); | ||
| 1432 | btrfs_wait_ordered_extents(root, 1); | ||
| 1433 | } | ||
| 1434 | |||
| 1435 | ret = btrfs_run_delayed_items(trans, root); | ||
| 1436 | if (ret) | ||
| 1437 | return ret; | ||
| 1438 | |||
| 1439 | /* | ||
| 1440 | * running the delayed items may have added new refs. account | ||
| 1441 | * them now so that they hinder processing of more delayed refs | ||
| 1442 | * as little as possible. | ||
| 1443 | */ | ||
| 1444 | btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); | ||
| 1445 | |||
| 1446 | /* | ||
| 1447 | * rename don't use btrfs_join_transaction, so, once we | ||
| 1448 | * set the transaction to blocked above, we aren't going | ||
| 1449 | * to get any new ordered operations. We can safely run | ||
| 1450 | * it here and no for sure that nothing new will be added | ||
| 1451 | * to the list | ||
| 1452 | */ | ||
| 1453 | btrfs_run_ordered_operations(root, 1); | ||
| 1454 | |||
| 1455 | return 0; | ||
| 1456 | } | ||
| 1457 | |||
| 1403 | /* | 1458 | /* |
| 1404 | * btrfs_transaction state sequence: | 1459 | * btrfs_transaction state sequence: |
| 1405 | * in_commit = 0, blocked = 0 (initial) | 1460 | * in_commit = 0, blocked = 0 (initial) |
| @@ -1414,15 +1469,21 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1414 | struct btrfs_transaction *cur_trans = trans->transaction; | 1469 | struct btrfs_transaction *cur_trans = trans->transaction; |
| 1415 | struct btrfs_transaction *prev_trans = NULL; | 1470 | struct btrfs_transaction *prev_trans = NULL; |
| 1416 | DEFINE_WAIT(wait); | 1471 | DEFINE_WAIT(wait); |
| 1417 | int ret = -EIO; | 1472 | int ret; |
| 1418 | int should_grow = 0; | 1473 | int should_grow = 0; |
| 1419 | unsigned long now = get_seconds(); | 1474 | unsigned long now = get_seconds(); |
| 1420 | int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT); | ||
| 1421 | 1475 | ||
| 1422 | btrfs_run_ordered_operations(root, 0); | 1476 | ret = btrfs_run_ordered_operations(root, 0); |
| 1477 | if (ret) { | ||
| 1478 | btrfs_abort_transaction(trans, root, ret); | ||
| 1479 | goto cleanup_transaction; | ||
| 1480 | } | ||
| 1423 | 1481 | ||
| 1424 | if (cur_trans->aborted) | 1482 | /* Stop the commit early if ->aborted is set */ |
| 1483 | if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { | ||
| 1484 | ret = cur_trans->aborted; | ||
| 1425 | goto cleanup_transaction; | 1485 | goto cleanup_transaction; |
| 1486 | } | ||
| 1426 | 1487 | ||
| 1427 | /* make a pass through all the delayed refs we have so far | 1488 | /* make a pass through all the delayed refs we have so far |
| 1428 | * any runnings procs may add more while we are here | 1489 | * any runnings procs may add more while we are here |
| @@ -1490,39 +1551,14 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1490 | should_grow = 1; | 1551 | should_grow = 1; |
| 1491 | 1552 | ||
| 1492 | do { | 1553 | do { |
| 1493 | int snap_pending = 0; | ||
| 1494 | |||
| 1495 | joined = cur_trans->num_joined; | 1554 | joined = cur_trans->num_joined; |
| 1496 | if (!list_empty(&trans->transaction->pending_snapshots)) | ||
| 1497 | snap_pending = 1; | ||
| 1498 | 1555 | ||
| 1499 | WARN_ON(cur_trans != trans->transaction); | 1556 | WARN_ON(cur_trans != trans->transaction); |
| 1500 | 1557 | ||
| 1501 | if (flush_on_commit || snap_pending) { | 1558 | ret = btrfs_flush_all_pending_stuffs(trans, root); |
| 1502 | btrfs_start_delalloc_inodes(root, 1); | ||
| 1503 | btrfs_wait_ordered_extents(root, 1); | ||
| 1504 | } | ||
| 1505 | |||
| 1506 | ret = btrfs_run_delayed_items(trans, root); | ||
| 1507 | if (ret) | 1559 | if (ret) |
| 1508 | goto cleanup_transaction; | 1560 | goto cleanup_transaction; |
| 1509 | 1561 | ||
| 1510 | /* | ||
| 1511 | * running the delayed items may have added new refs. account | ||
| 1512 | * them now so that they hinder processing of more delayed refs | ||
| 1513 | * as little as possible. | ||
| 1514 | */ | ||
| 1515 | btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); | ||
| 1516 | |||
| 1517 | /* | ||
| 1518 | * rename don't use btrfs_join_transaction, so, once we | ||
| 1519 | * set the transaction to blocked above, we aren't going | ||
| 1520 | * to get any new ordered operations. We can safely run | ||
| 1521 | * it here and no for sure that nothing new will be added | ||
| 1522 | * to the list | ||
| 1523 | */ | ||
| 1524 | btrfs_run_ordered_operations(root, 1); | ||
| 1525 | |||
| 1526 | prepare_to_wait(&cur_trans->writer_wait, &wait, | 1562 | prepare_to_wait(&cur_trans->writer_wait, &wait, |
| 1527 | TASK_UNINTERRUPTIBLE); | 1563 | TASK_UNINTERRUPTIBLE); |
| 1528 | 1564 | ||
| @@ -1535,6 +1571,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1535 | } while (atomic_read(&cur_trans->num_writers) > 1 || | 1571 | } while (atomic_read(&cur_trans->num_writers) > 1 || |
| 1536 | (should_grow && cur_trans->num_joined != joined)); | 1572 | (should_grow && cur_trans->num_joined != joined)); |
| 1537 | 1573 | ||
| 1574 | ret = btrfs_flush_all_pending_stuffs(trans, root); | ||
| 1575 | if (ret) | ||
| 1576 | goto cleanup_transaction; | ||
| 1577 | |||
| 1538 | /* | 1578 | /* |
| 1539 | * Ok now we need to make sure to block out any other joins while we | 1579 | * Ok now we need to make sure to block out any other joins while we |
| 1540 | * commit the transaction. We could have started a join before setting | 1580 | * commit the transaction. We could have started a join before setting |
| @@ -1546,6 +1586,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1546 | wait_event(cur_trans->writer_wait, | 1586 | wait_event(cur_trans->writer_wait, |
| 1547 | atomic_read(&cur_trans->num_writers) == 1); | 1587 | atomic_read(&cur_trans->num_writers) == 1); |
| 1548 | 1588 | ||
| 1589 | /* ->aborted might be set after the previous check, so check it */ | ||
| 1590 | if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { | ||
| 1591 | ret = cur_trans->aborted; | ||
| 1592 | goto cleanup_transaction; | ||
| 1593 | } | ||
| 1549 | /* | 1594 | /* |
| 1550 | * the reloc mutex makes sure that we stop | 1595 | * the reloc mutex makes sure that we stop |
| 1551 | * the balancing code from coming in and moving | 1596 | * the balancing code from coming in and moving |
| @@ -1629,6 +1674,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1629 | goto cleanup_transaction; | 1674 | goto cleanup_transaction; |
| 1630 | } | 1675 | } |
| 1631 | 1676 | ||
| 1677 | /* | ||
| 1678 | * The tasks which save the space cache and inode cache may also | ||
| 1679 | * update ->aborted, check it. | ||
| 1680 | */ | ||
| 1681 | if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { | ||
| 1682 | ret = cur_trans->aborted; | ||
| 1683 | mutex_unlock(&root->fs_info->tree_log_mutex); | ||
| 1684 | mutex_unlock(&root->fs_info->reloc_mutex); | ||
| 1685 | goto cleanup_transaction; | ||
| 1686 | } | ||
| 1687 | |||
| 1632 | btrfs_prepare_extent_commit(trans, root); | 1688 | btrfs_prepare_extent_commit(trans, root); |
| 1633 | 1689 | ||
| 1634 | cur_trans = root->fs_info->running_transaction; | 1690 | cur_trans = root->fs_info->running_transaction; |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 80961947a6b2..0e8aa1e6c287 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
| @@ -105,7 +105,7 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 105 | struct btrfs_root *root); | 105 | struct btrfs_root *root); |
| 106 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | 106 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, |
| 107 | int num_items); | 107 | int num_items); |
| 108 | struct btrfs_trans_handle *btrfs_start_transaction_noflush( | 108 | struct btrfs_trans_handle *btrfs_start_transaction_lflush( |
| 109 | struct btrfs_root *root, int num_items); | 109 | struct btrfs_root *root, int num_items); |
| 110 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); | 110 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); |
| 111 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root); | 111 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root); |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 81e407d9677a..9027bb1e7466 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
| @@ -2952,33 +2952,9 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
| 2952 | struct btrfs_inode_item *item, | 2952 | struct btrfs_inode_item *item, |
| 2953 | struct inode *inode, int log_inode_only) | 2953 | struct inode *inode, int log_inode_only) |
| 2954 | { | 2954 | { |
| 2955 | btrfs_set_inode_uid(leaf, item, i_uid_read(inode)); | 2955 | struct btrfs_map_token token; |
| 2956 | btrfs_set_inode_gid(leaf, item, i_gid_read(inode)); | 2956 | |
| 2957 | btrfs_set_inode_mode(leaf, item, inode->i_mode); | 2957 | btrfs_init_map_token(&token); |
| 2958 | btrfs_set_inode_nlink(leaf, item, inode->i_nlink); | ||
| 2959 | |||
| 2960 | btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item), | ||
| 2961 | inode->i_atime.tv_sec); | ||
| 2962 | btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item), | ||
| 2963 | inode->i_atime.tv_nsec); | ||
| 2964 | |||
| 2965 | btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item), | ||
| 2966 | inode->i_mtime.tv_sec); | ||
| 2967 | btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item), | ||
| 2968 | inode->i_mtime.tv_nsec); | ||
| 2969 | |||
| 2970 | btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item), | ||
| 2971 | inode->i_ctime.tv_sec); | ||
| 2972 | btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item), | ||
| 2973 | inode->i_ctime.tv_nsec); | ||
| 2974 | |||
| 2975 | btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode)); | ||
| 2976 | |||
| 2977 | btrfs_set_inode_sequence(leaf, item, inode->i_version); | ||
| 2978 | btrfs_set_inode_transid(leaf, item, trans->transid); | ||
| 2979 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); | ||
| 2980 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); | ||
| 2981 | btrfs_set_inode_block_group(leaf, item, 0); | ||
| 2982 | 2958 | ||
| 2983 | if (log_inode_only) { | 2959 | if (log_inode_only) { |
| 2984 | /* set the generation to zero so the recover code | 2960 | /* set the generation to zero so the recover code |
| @@ -2986,14 +2962,63 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
| 2986 | * just to say 'this inode exists' and a logging | 2962 | * just to say 'this inode exists' and a logging |
| 2987 | * to say 'update this inode with these values' | 2963 | * to say 'update this inode with these values' |
| 2988 | */ | 2964 | */ |
| 2989 | btrfs_set_inode_generation(leaf, item, 0); | 2965 | btrfs_set_token_inode_generation(leaf, item, 0, &token); |
| 2990 | btrfs_set_inode_size(leaf, item, 0); | 2966 | btrfs_set_token_inode_size(leaf, item, 0, &token); |
| 2991 | } else { | 2967 | } else { |
| 2992 | btrfs_set_inode_generation(leaf, item, | 2968 | btrfs_set_token_inode_generation(leaf, item, |
| 2993 | BTRFS_I(inode)->generation); | 2969 | BTRFS_I(inode)->generation, |
| 2994 | btrfs_set_inode_size(leaf, item, inode->i_size); | 2970 | &token); |
| 2995 | } | 2971 | btrfs_set_token_inode_size(leaf, item, inode->i_size, &token); |
| 2972 | } | ||
| 2973 | |||
| 2974 | btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token); | ||
| 2975 | btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token); | ||
| 2976 | btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token); | ||
| 2977 | btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token); | ||
| 2978 | |||
| 2979 | btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item), | ||
| 2980 | inode->i_atime.tv_sec, &token); | ||
| 2981 | btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item), | ||
| 2982 | inode->i_atime.tv_nsec, &token); | ||
| 2983 | |||
| 2984 | btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item), | ||
| 2985 | inode->i_mtime.tv_sec, &token); | ||
| 2986 | btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item), | ||
| 2987 | inode->i_mtime.tv_nsec, &token); | ||
| 2988 | |||
| 2989 | btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item), | ||
| 2990 | inode->i_ctime.tv_sec, &token); | ||
| 2991 | btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item), | ||
| 2992 | inode->i_ctime.tv_nsec, &token); | ||
| 2993 | |||
| 2994 | btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode), | ||
| 2995 | &token); | ||
| 2996 | |||
| 2997 | btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token); | ||
| 2998 | btrfs_set_token_inode_transid(leaf, item, trans->transid, &token); | ||
| 2999 | btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token); | ||
| 3000 | btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token); | ||
| 3001 | btrfs_set_token_inode_block_group(leaf, item, 0, &token); | ||
| 3002 | } | ||
| 2996 | 3003 | ||
| 3004 | static int log_inode_item(struct btrfs_trans_handle *trans, | ||
| 3005 | struct btrfs_root *log, struct btrfs_path *path, | ||
| 3006 | struct inode *inode) | ||
| 3007 | { | ||
| 3008 | struct btrfs_inode_item *inode_item; | ||
| 3009 | struct btrfs_key key; | ||
| 3010 | int ret; | ||
| 3011 | |||
| 3012 | memcpy(&key, &BTRFS_I(inode)->location, sizeof(key)); | ||
| 3013 | ret = btrfs_insert_empty_item(trans, log, path, &key, | ||
| 3014 | sizeof(*inode_item)); | ||
| 3015 | if (ret && ret != -EEXIST) | ||
| 3016 | return ret; | ||
| 3017 | inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
| 3018 | struct btrfs_inode_item); | ||
| 3019 | fill_inode_item(trans, path->nodes[0], inode_item, inode, 0); | ||
| 3020 | btrfs_release_path(path); | ||
| 3021 | return 0; | ||
| 2997 | } | 3022 | } |
| 2998 | 3023 | ||
| 2999 | static noinline int copy_items(struct btrfs_trans_handle *trans, | 3024 | static noinline int copy_items(struct btrfs_trans_handle *trans, |
| @@ -3130,151 +3155,239 @@ static int extent_cmp(void *priv, struct list_head *a, struct list_head *b) | |||
| 3130 | return 0; | 3155 | return 0; |
| 3131 | } | 3156 | } |
| 3132 | 3157 | ||
| 3133 | struct log_args { | 3158 | static int drop_adjacent_extents(struct btrfs_trans_handle *trans, |
| 3134 | struct extent_buffer *src; | 3159 | struct btrfs_root *root, struct inode *inode, |
| 3135 | u64 next_offset; | 3160 | struct extent_map *em, |
| 3136 | int start_slot; | 3161 | struct btrfs_path *path) |
| 3137 | int nr; | 3162 | { |
| 3138 | }; | 3163 | struct btrfs_file_extent_item *fi; |
| 3164 | struct extent_buffer *leaf; | ||
| 3165 | struct btrfs_key key, new_key; | ||
| 3166 | struct btrfs_map_token token; | ||
| 3167 | u64 extent_end; | ||
| 3168 | u64 extent_offset = 0; | ||
| 3169 | int extent_type; | ||
| 3170 | int del_slot = 0; | ||
| 3171 | int del_nr = 0; | ||
| 3172 | int ret = 0; | ||
| 3173 | |||
| 3174 | while (1) { | ||
| 3175 | btrfs_init_map_token(&token); | ||
| 3176 | leaf = path->nodes[0]; | ||
| 3177 | path->slots[0]++; | ||
| 3178 | if (path->slots[0] >= btrfs_header_nritems(leaf)) { | ||
| 3179 | if (del_nr) { | ||
| 3180 | ret = btrfs_del_items(trans, root, path, | ||
| 3181 | del_slot, del_nr); | ||
| 3182 | if (ret) | ||
| 3183 | return ret; | ||
| 3184 | del_nr = 0; | ||
| 3185 | } | ||
| 3186 | |||
| 3187 | ret = btrfs_next_leaf_write(trans, root, path, 1); | ||
| 3188 | if (ret < 0) | ||
| 3189 | return ret; | ||
| 3190 | if (ret > 0) | ||
| 3191 | return 0; | ||
| 3192 | leaf = path->nodes[0]; | ||
| 3193 | } | ||
| 3194 | |||
| 3195 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
| 3196 | if (key.objectid != btrfs_ino(inode) || | ||
| 3197 | key.type != BTRFS_EXTENT_DATA_KEY || | ||
| 3198 | key.offset >= em->start + em->len) | ||
| 3199 | break; | ||
| 3200 | |||
| 3201 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
| 3202 | struct btrfs_file_extent_item); | ||
| 3203 | extent_type = btrfs_token_file_extent_type(leaf, fi, &token); | ||
| 3204 | if (extent_type == BTRFS_FILE_EXTENT_REG || | ||
| 3205 | extent_type == BTRFS_FILE_EXTENT_PREALLOC) { | ||
| 3206 | extent_offset = btrfs_token_file_extent_offset(leaf, | ||
| 3207 | fi, &token); | ||
| 3208 | extent_end = key.offset + | ||
| 3209 | btrfs_token_file_extent_num_bytes(leaf, fi, | ||
| 3210 | &token); | ||
| 3211 | } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { | ||
| 3212 | extent_end = key.offset + | ||
| 3213 | btrfs_file_extent_inline_len(leaf, fi); | ||
| 3214 | } else { | ||
| 3215 | BUG(); | ||
| 3216 | } | ||
| 3217 | |||
| 3218 | if (extent_end <= em->len + em->start) { | ||
| 3219 | if (!del_nr) { | ||
| 3220 | del_slot = path->slots[0]; | ||
| 3221 | } | ||
| 3222 | del_nr++; | ||
| 3223 | continue; | ||
| 3224 | } | ||
| 3225 | |||
| 3226 | /* | ||
| 3227 | * Ok so we'll ignore previous items if we log a new extent, | ||
| 3228 | * which can lead to overlapping extents, so if we have an | ||
| 3229 | * existing extent we want to adjust we _have_ to check the next | ||
| 3230 | * guy to make sure we even need this extent anymore, this keeps | ||
| 3231 | * us from panicing in set_item_key_safe. | ||
| 3232 | */ | ||
| 3233 | if (path->slots[0] < btrfs_header_nritems(leaf) - 1) { | ||
| 3234 | struct btrfs_key tmp_key; | ||
| 3235 | |||
| 3236 | btrfs_item_key_to_cpu(leaf, &tmp_key, | ||
| 3237 | path->slots[0] + 1); | ||
| 3238 | if (tmp_key.objectid == btrfs_ino(inode) && | ||
| 3239 | tmp_key.type == BTRFS_EXTENT_DATA_KEY && | ||
| 3240 | tmp_key.offset <= em->start + em->len) { | ||
| 3241 | if (!del_nr) | ||
| 3242 | del_slot = path->slots[0]; | ||
| 3243 | del_nr++; | ||
| 3244 | continue; | ||
| 3245 | } | ||
| 3246 | } | ||
| 3247 | |||
| 3248 | BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE); | ||
| 3249 | memcpy(&new_key, &key, sizeof(new_key)); | ||
| 3250 | new_key.offset = em->start + em->len; | ||
| 3251 | btrfs_set_item_key_safe(trans, root, path, &new_key); | ||
| 3252 | extent_offset += em->start + em->len - key.offset; | ||
| 3253 | btrfs_set_token_file_extent_offset(leaf, fi, extent_offset, | ||
| 3254 | &token); | ||
| 3255 | btrfs_set_token_file_extent_num_bytes(leaf, fi, extent_end - | ||
| 3256 | (em->start + em->len), | ||
| 3257 | &token); | ||
| 3258 | btrfs_mark_buffer_dirty(leaf); | ||
| 3259 | } | ||
| 3260 | |||
| 3261 | if (del_nr) | ||
| 3262 | ret = btrfs_del_items(trans, root, path, del_slot, del_nr); | ||
| 3263 | |||
| 3264 | return ret; | ||
| 3265 | } | ||
| 3139 | 3266 | ||
| 3140 | static int log_one_extent(struct btrfs_trans_handle *trans, | 3267 | static int log_one_extent(struct btrfs_trans_handle *trans, |
| 3141 | struct inode *inode, struct btrfs_root *root, | 3268 | struct inode *inode, struct btrfs_root *root, |
| 3142 | struct extent_map *em, struct btrfs_path *path, | 3269 | struct extent_map *em, struct btrfs_path *path) |
| 3143 | struct btrfs_path *dst_path, struct log_args *args) | ||
| 3144 | { | 3270 | { |
| 3145 | struct btrfs_root *log = root->log_root; | 3271 | struct btrfs_root *log = root->log_root; |
| 3146 | struct btrfs_file_extent_item *fi; | 3272 | struct btrfs_file_extent_item *fi; |
| 3273 | struct extent_buffer *leaf; | ||
| 3274 | struct list_head ordered_sums; | ||
| 3275 | struct btrfs_map_token token; | ||
| 3147 | struct btrfs_key key; | 3276 | struct btrfs_key key; |
| 3148 | u64 start = em->mod_start; | 3277 | u64 csum_offset = em->mod_start - em->start; |
| 3149 | u64 search_start = start; | 3278 | u64 csum_len = em->mod_len; |
| 3150 | u64 len = em->mod_len; | 3279 | u64 extent_offset = em->start - em->orig_start; |
| 3151 | u64 num_bytes; | 3280 | u64 block_len; |
| 3152 | int nritems; | ||
| 3153 | int ret; | 3281 | int ret; |
| 3282 | bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | ||
| 3154 | 3283 | ||
| 3155 | if (BTRFS_I(inode)->logged_trans == trans->transid) { | 3284 | INIT_LIST_HEAD(&ordered_sums); |
| 3156 | ret = __btrfs_drop_extents(trans, log, inode, dst_path, start, | 3285 | btrfs_init_map_token(&token); |
| 3157 | start + len, NULL, 0); | 3286 | key.objectid = btrfs_ino(inode); |
| 3158 | if (ret) | 3287 | key.type = BTRFS_EXTENT_DATA_KEY; |
| 3159 | return ret; | 3288 | key.offset = em->start; |
| 3289 | path->really_keep_locks = 1; | ||
| 3290 | |||
| 3291 | ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*fi)); | ||
| 3292 | if (ret && ret != -EEXIST) { | ||
| 3293 | path->really_keep_locks = 0; | ||
| 3294 | return ret; | ||
| 3160 | } | 3295 | } |
| 3296 | leaf = path->nodes[0]; | ||
| 3297 | fi = btrfs_item_ptr(leaf, path->slots[0], | ||
| 3298 | struct btrfs_file_extent_item); | ||
| 3299 | btrfs_set_token_file_extent_generation(leaf, fi, em->generation, | ||
| 3300 | &token); | ||
| 3301 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) { | ||
| 3302 | skip_csum = true; | ||
| 3303 | btrfs_set_token_file_extent_type(leaf, fi, | ||
| 3304 | BTRFS_FILE_EXTENT_PREALLOC, | ||
| 3305 | &token); | ||
| 3306 | } else { | ||
| 3307 | btrfs_set_token_file_extent_type(leaf, fi, | ||
| 3308 | BTRFS_FILE_EXTENT_REG, | ||
| 3309 | &token); | ||
| 3310 | if (em->block_start == 0) | ||
| 3311 | skip_csum = true; | ||
| 3312 | } | ||
| 3313 | |||
| 3314 | block_len = max(em->block_len, em->orig_block_len); | ||
| 3315 | if (em->compress_type != BTRFS_COMPRESS_NONE) { | ||
| 3316 | btrfs_set_token_file_extent_disk_bytenr(leaf, fi, | ||
| 3317 | em->block_start, | ||
| 3318 | &token); | ||
| 3319 | btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len, | ||
| 3320 | &token); | ||
| 3321 | } else if (em->block_start < EXTENT_MAP_LAST_BYTE) { | ||
| 3322 | btrfs_set_token_file_extent_disk_bytenr(leaf, fi, | ||
| 3323 | em->block_start - | ||
| 3324 | extent_offset, &token); | ||
| 3325 | btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, block_len, | ||
| 3326 | &token); | ||
| 3327 | } else { | ||
| 3328 | btrfs_set_token_file_extent_disk_bytenr(leaf, fi, 0, &token); | ||
| 3329 | btrfs_set_token_file_extent_disk_num_bytes(leaf, fi, 0, | ||
| 3330 | &token); | ||
| 3331 | } | ||
| 3332 | |||
| 3333 | btrfs_set_token_file_extent_offset(leaf, fi, | ||
| 3334 | em->start - em->orig_start, | ||
| 3335 | &token); | ||
| 3336 | btrfs_set_token_file_extent_num_bytes(leaf, fi, em->len, &token); | ||
| 3337 | btrfs_set_token_file_extent_ram_bytes(leaf, fi, em->len, &token); | ||
| 3338 | btrfs_set_token_file_extent_compression(leaf, fi, em->compress_type, | ||
| 3339 | &token); | ||
| 3340 | btrfs_set_token_file_extent_encryption(leaf, fi, 0, &token); | ||
| 3341 | btrfs_set_token_file_extent_other_encoding(leaf, fi, 0, &token); | ||
| 3342 | btrfs_mark_buffer_dirty(leaf); | ||
| 3161 | 3343 | ||
| 3162 | while (len) { | 3344 | /* |
| 3163 | if (args->nr) | 3345 | * Have to check the extent to the right of us to make sure it doesn't |
| 3164 | goto next_slot; | 3346 | * fall in our current range. We're ok if the previous extent is in our |
| 3165 | again: | 3347 | * range since the recovery stuff will run us in key order and thus just |
| 3166 | key.objectid = btrfs_ino(inode); | 3348 | * drop the part we overwrote. |
| 3167 | key.type = BTRFS_EXTENT_DATA_KEY; | 3349 | */ |
| 3168 | key.offset = search_start; | 3350 | ret = drop_adjacent_extents(trans, log, inode, em, path); |
| 3169 | 3351 | btrfs_release_path(path); | |
| 3170 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 3352 | path->really_keep_locks = 0; |
| 3171 | if (ret < 0) | 3353 | if (ret) { |
| 3172 | return ret; | 3354 | return ret; |
| 3173 | 3355 | } | |
| 3174 | if (ret) { | ||
| 3175 | /* | ||
| 3176 | * A rare case were we can have an em for a section of a | ||
| 3177 | * larger extent so we need to make sure that this em | ||
| 3178 | * falls within the extent we've found. If not we just | ||
| 3179 | * bail and go back to ye-olde way of doing things but | ||
| 3180 | * it happens often enough in testing that we need to do | ||
| 3181 | * this dance to make sure. | ||
| 3182 | */ | ||
| 3183 | do { | ||
| 3184 | if (path->slots[0] == 0) { | ||
| 3185 | btrfs_release_path(path); | ||
| 3186 | if (search_start == 0) | ||
| 3187 | return -ENOENT; | ||
| 3188 | search_start--; | ||
| 3189 | goto again; | ||
| 3190 | } | ||
| 3191 | 3356 | ||
| 3192 | path->slots[0]--; | 3357 | if (skip_csum) |
| 3193 | btrfs_item_key_to_cpu(path->nodes[0], &key, | 3358 | return 0; |
| 3194 | path->slots[0]); | ||
| 3195 | if (key.objectid != btrfs_ino(inode) || | ||
| 3196 | key.type != BTRFS_EXTENT_DATA_KEY) { | ||
| 3197 | btrfs_release_path(path); | ||
| 3198 | return -ENOENT; | ||
| 3199 | } | ||
| 3200 | } while (key.offset > start); | ||
| 3201 | 3359 | ||
| 3202 | fi = btrfs_item_ptr(path->nodes[0], path->slots[0], | 3360 | if (em->compress_type) { |
| 3203 | struct btrfs_file_extent_item); | 3361 | csum_offset = 0; |
| 3204 | num_bytes = btrfs_file_extent_num_bytes(path->nodes[0], | 3362 | csum_len = block_len; |
| 3205 | fi); | 3363 | } |
| 3206 | if (key.offset + num_bytes <= start) { | ||
| 3207 | btrfs_release_path(path); | ||
| 3208 | return -ENOENT; | ||
| 3209 | } | ||
| 3210 | } | ||
| 3211 | args->src = path->nodes[0]; | ||
| 3212 | next_slot: | ||
| 3213 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | ||
| 3214 | fi = btrfs_item_ptr(args->src, path->slots[0], | ||
| 3215 | struct btrfs_file_extent_item); | ||
| 3216 | if (args->nr && | ||
| 3217 | args->start_slot + args->nr == path->slots[0]) { | ||
| 3218 | args->nr++; | ||
| 3219 | } else if (args->nr) { | ||
| 3220 | ret = copy_items(trans, inode, dst_path, args->src, | ||
| 3221 | args->start_slot, args->nr, | ||
| 3222 | LOG_INODE_ALL); | ||
| 3223 | if (ret) | ||
| 3224 | return ret; | ||
| 3225 | args->nr = 1; | ||
| 3226 | args->start_slot = path->slots[0]; | ||
| 3227 | } else if (!args->nr) { | ||
| 3228 | args->nr = 1; | ||
| 3229 | args->start_slot = path->slots[0]; | ||
| 3230 | } | ||
| 3231 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
| 3232 | path->slots[0]++; | ||
| 3233 | num_bytes = btrfs_file_extent_num_bytes(args->src, fi); | ||
| 3234 | if (len < num_bytes) { | ||
| 3235 | /* I _think_ this is ok, envision we write to a | ||
| 3236 | * preallocated space that is adjacent to a previously | ||
| 3237 | * written preallocated space that gets merged when we | ||
| 3238 | * mark this preallocated space written. If we do not | ||
| 3239 | * have the adjacent extent in cache then when we copy | ||
| 3240 | * this extent it could end up being larger than our EM | ||
| 3241 | * thinks it is, which is a-ok, so just set len to 0. | ||
| 3242 | */ | ||
| 3243 | len = 0; | ||
| 3244 | } else { | ||
| 3245 | len -= num_bytes; | ||
| 3246 | } | ||
| 3247 | start = key.offset + num_bytes; | ||
| 3248 | args->next_offset = start; | ||
| 3249 | search_start = start; | ||
| 3250 | 3364 | ||
| 3251 | if (path->slots[0] < nritems) { | 3365 | /* block start is already adjusted for the file extent offset. */ |
| 3252 | if (len) | 3366 | ret = btrfs_lookup_csums_range(log->fs_info->csum_root, |
| 3253 | goto next_slot; | 3367 | em->block_start + csum_offset, |
| 3254 | break; | 3368 | em->block_start + csum_offset + |
| 3255 | } | 3369 | csum_len - 1, &ordered_sums, 0); |
| 3370 | if (ret) | ||
| 3371 | return ret; | ||
| 3256 | 3372 | ||
| 3257 | if (args->nr) { | 3373 | while (!list_empty(&ordered_sums)) { |
| 3258 | ret = copy_items(trans, inode, dst_path, args->src, | 3374 | struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, |
| 3259 | args->start_slot, args->nr, | 3375 | struct btrfs_ordered_sum, |
| 3260 | LOG_INODE_ALL); | 3376 | list); |
| 3261 | if (ret) | 3377 | if (!ret) |
| 3262 | return ret; | 3378 | ret = btrfs_csum_file_blocks(trans, log, sums); |
| 3263 | args->nr = 0; | 3379 | list_del(&sums->list); |
| 3264 | btrfs_release_path(path); | 3380 | kfree(sums); |
| 3265 | } | ||
| 3266 | } | 3381 | } |
| 3267 | 3382 | ||
| 3268 | return 0; | 3383 | return ret; |
| 3269 | } | 3384 | } |
| 3270 | 3385 | ||
| 3271 | static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, | 3386 | static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, |
| 3272 | struct btrfs_root *root, | 3387 | struct btrfs_root *root, |
| 3273 | struct inode *inode, | 3388 | struct inode *inode, |
| 3274 | struct btrfs_path *path, | 3389 | struct btrfs_path *path) |
| 3275 | struct btrfs_path *dst_path) | ||
| 3276 | { | 3390 | { |
| 3277 | struct log_args args; | ||
| 3278 | struct extent_map *em, *n; | 3391 | struct extent_map *em, *n; |
| 3279 | struct list_head extents; | 3392 | struct list_head extents; |
| 3280 | struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; | 3393 | struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; |
| @@ -3283,8 +3396,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, | |||
| 3283 | 3396 | ||
| 3284 | INIT_LIST_HEAD(&extents); | 3397 | INIT_LIST_HEAD(&extents); |
| 3285 | 3398 | ||
| 3286 | memset(&args, 0, sizeof(args)); | ||
| 3287 | |||
| 3288 | write_lock(&tree->lock); | 3399 | write_lock(&tree->lock); |
| 3289 | test_gen = root->fs_info->last_trans_committed; | 3400 | test_gen = root->fs_info->last_trans_committed; |
| 3290 | 3401 | ||
| @@ -3304,47 +3415,27 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, | |||
| 3304 | em = list_entry(extents.next, struct extent_map, list); | 3415 | em = list_entry(extents.next, struct extent_map, list); |
| 3305 | 3416 | ||
| 3306 | list_del_init(&em->list); | 3417 | list_del_init(&em->list); |
| 3307 | clear_bit(EXTENT_FLAG_LOGGING, &em->flags); | ||
| 3308 | 3418 | ||
| 3309 | /* | 3419 | /* |
| 3310 | * If we had an error we just need to delete everybody from our | 3420 | * If we had an error we just need to delete everybody from our |
| 3311 | * private list. | 3421 | * private list. |
| 3312 | */ | 3422 | */ |
| 3313 | if (ret) { | 3423 | if (ret) { |
| 3424 | clear_em_logging(tree, em); | ||
| 3314 | free_extent_map(em); | 3425 | free_extent_map(em); |
| 3315 | continue; | 3426 | continue; |
| 3316 | } | 3427 | } |
| 3317 | 3428 | ||
| 3318 | write_unlock(&tree->lock); | 3429 | write_unlock(&tree->lock); |
| 3319 | 3430 | ||
| 3320 | /* | 3431 | ret = log_one_extent(trans, inode, root, em, path); |
| 3321 | * If the previous EM and the last extent we left off on aren't | ||
| 3322 | * sequential then we need to copy the items we have and redo | ||
| 3323 | * our search | ||
| 3324 | */ | ||
| 3325 | if (args.nr && em->mod_start != args.next_offset) { | ||
| 3326 | ret = copy_items(trans, inode, dst_path, args.src, | ||
| 3327 | args.start_slot, args.nr, | ||
| 3328 | LOG_INODE_ALL); | ||
| 3329 | if (ret) { | ||
| 3330 | free_extent_map(em); | ||
| 3331 | write_lock(&tree->lock); | ||
| 3332 | continue; | ||
| 3333 | } | ||
| 3334 | btrfs_release_path(path); | ||
| 3335 | args.nr = 0; | ||
| 3336 | } | ||
| 3337 | |||
| 3338 | ret = log_one_extent(trans, inode, root, em, path, dst_path, &args); | ||
| 3339 | free_extent_map(em); | ||
| 3340 | write_lock(&tree->lock); | 3432 | write_lock(&tree->lock); |
| 3433 | clear_em_logging(tree, em); | ||
| 3434 | free_extent_map(em); | ||
| 3341 | } | 3435 | } |
| 3342 | WARN_ON(!list_empty(&extents)); | 3436 | WARN_ON(!list_empty(&extents)); |
| 3343 | write_unlock(&tree->lock); | 3437 | write_unlock(&tree->lock); |
| 3344 | 3438 | ||
| 3345 | if (!ret && args.nr) | ||
| 3346 | ret = copy_items(trans, inode, dst_path, args.src, | ||
| 3347 | args.start_slot, args.nr, LOG_INODE_ALL); | ||
| 3348 | btrfs_release_path(path); | 3439 | btrfs_release_path(path); |
| 3349 | return ret; | 3440 | return ret; |
| 3350 | } | 3441 | } |
| @@ -3400,7 +3491,10 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
| 3400 | 3491 | ||
| 3401 | 3492 | ||
| 3402 | /* today the code can only do partial logging of directories */ | 3493 | /* today the code can only do partial logging of directories */ |
| 3403 | if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode)) | 3494 | if (S_ISDIR(inode->i_mode) || |
| 3495 | (!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
| 3496 | &BTRFS_I(inode)->runtime_flags) && | ||
| 3497 | inode_only == LOG_INODE_EXISTS)) | ||
| 3404 | max_key.type = BTRFS_XATTR_ITEM_KEY; | 3498 | max_key.type = BTRFS_XATTR_ITEM_KEY; |
| 3405 | else | 3499 | else |
| 3406 | max_key.type = (u8)-1; | 3500 | max_key.type = (u8)-1; |
| @@ -3432,14 +3526,28 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
| 3432 | } else { | 3526 | } else { |
| 3433 | if (test_and_clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | 3527 | if (test_and_clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, |
| 3434 | &BTRFS_I(inode)->runtime_flags)) { | 3528 | &BTRFS_I(inode)->runtime_flags)) { |
| 3529 | clear_bit(BTRFS_INODE_COPY_EVERYTHING, | ||
| 3530 | &BTRFS_I(inode)->runtime_flags); | ||
| 3435 | ret = btrfs_truncate_inode_items(trans, log, | 3531 | ret = btrfs_truncate_inode_items(trans, log, |
| 3436 | inode, 0, 0); | 3532 | inode, 0, 0); |
| 3437 | } else { | 3533 | } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING, |
| 3438 | fast_search = true; | 3534 | &BTRFS_I(inode)->runtime_flags)) { |
| 3535 | if (inode_only == LOG_INODE_ALL) | ||
| 3536 | fast_search = true; | ||
| 3439 | max_key.type = BTRFS_XATTR_ITEM_KEY; | 3537 | max_key.type = BTRFS_XATTR_ITEM_KEY; |
| 3440 | ret = drop_objectid_items(trans, log, path, ino, | 3538 | ret = drop_objectid_items(trans, log, path, ino, |
| 3441 | BTRFS_XATTR_ITEM_KEY); | 3539 | max_key.type); |
| 3540 | } else { | ||
| 3541 | if (inode_only == LOG_INODE_ALL) | ||
| 3542 | fast_search = true; | ||
| 3543 | ret = log_inode_item(trans, log, dst_path, inode); | ||
| 3544 | if (ret) { | ||
| 3545 | err = ret; | ||
| 3546 | goto out_unlock; | ||
| 3547 | } | ||
| 3548 | goto log_extents; | ||
| 3442 | } | 3549 | } |
| 3550 | |||
| 3443 | } | 3551 | } |
| 3444 | if (ret) { | 3552 | if (ret) { |
| 3445 | err = ret; | 3553 | err = ret; |
| @@ -3518,11 +3626,10 @@ next_slot: | |||
| 3518 | ins_nr = 0; | 3626 | ins_nr = 0; |
| 3519 | } | 3627 | } |
| 3520 | 3628 | ||
| 3629 | log_extents: | ||
| 3521 | if (fast_search) { | 3630 | if (fast_search) { |
| 3522 | btrfs_release_path(path); | ||
| 3523 | btrfs_release_path(dst_path); | 3631 | btrfs_release_path(dst_path); |
| 3524 | ret = btrfs_log_changed_extents(trans, root, inode, path, | 3632 | ret = btrfs_log_changed_extents(trans, root, inode, dst_path); |
| 3525 | dst_path); | ||
| 3526 | if (ret) { | 3633 | if (ret) { |
| 3527 | err = ret; | 3634 | err = ret; |
| 3528 | goto out_unlock; | 3635 | goto out_unlock; |
| @@ -3531,8 +3638,10 @@ next_slot: | |||
| 3531 | struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; | 3638 | struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; |
| 3532 | struct extent_map *em, *n; | 3639 | struct extent_map *em, *n; |
| 3533 | 3640 | ||
| 3641 | write_lock(&tree->lock); | ||
| 3534 | list_for_each_entry_safe(em, n, &tree->modified_extents, list) | 3642 | list_for_each_entry_safe(em, n, &tree->modified_extents, list) |
| 3535 | list_del_init(&em->list); | 3643 | list_del_init(&em->list); |
| 3644 | write_unlock(&tree->lock); | ||
| 3536 | } | 3645 | } |
| 3537 | 3646 | ||
| 3538 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { | 3647 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0f5ebb72a5ea..5cbb7f4b1672 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
| @@ -25,7 +25,6 @@ | |||
| 25 | #include <linux/capability.h> | 25 | #include <linux/capability.h> |
| 26 | #include <linux/ratelimit.h> | 26 | #include <linux/ratelimit.h> |
| 27 | #include <linux/kthread.h> | 27 | #include <linux/kthread.h> |
| 28 | #include <asm/div64.h> | ||
| 29 | #include "compat.h" | 28 | #include "compat.h" |
| 30 | #include "ctree.h" | 29 | #include "ctree.h" |
| 31 | #include "extent_map.h" | 30 | #include "extent_map.h" |
| @@ -36,6 +35,8 @@ | |||
| 36 | #include "async-thread.h" | 35 | #include "async-thread.h" |
| 37 | #include "check-integrity.h" | 36 | #include "check-integrity.h" |
| 38 | #include "rcu-string.h" | 37 | #include "rcu-string.h" |
| 38 | #include "math.h" | ||
| 39 | #include "dev-replace.h" | ||
| 39 | 40 | ||
| 40 | static int init_first_rw_device(struct btrfs_trans_handle *trans, | 41 | static int init_first_rw_device(struct btrfs_trans_handle *trans, |
| 41 | struct btrfs_root *root, | 42 | struct btrfs_root *root, |
| @@ -71,6 +72,19 @@ static void free_fs_devices(struct btrfs_fs_devices *fs_devices) | |||
| 71 | kfree(fs_devices); | 72 | kfree(fs_devices); |
| 72 | } | 73 | } |
| 73 | 74 | ||
| 75 | static void btrfs_kobject_uevent(struct block_device *bdev, | ||
| 76 | enum kobject_action action) | ||
| 77 | { | ||
| 78 | int ret; | ||
| 79 | |||
| 80 | ret = kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, action); | ||
| 81 | if (ret) | ||
| 82 | pr_warn("Sending event '%d' to kobject: '%s' (%p): failed\n", | ||
| 83 | action, | ||
| 84 | kobject_name(&disk_to_dev(bdev->bd_disk)->kobj), | ||
| 85 | &disk_to_dev(bdev->bd_disk)->kobj); | ||
| 86 | } | ||
| 87 | |||
| 74 | void btrfs_cleanup_fs_uuids(void) | 88 | void btrfs_cleanup_fs_uuids(void) |
| 75 | { | 89 | { |
| 76 | struct btrfs_fs_devices *fs_devices; | 90 | struct btrfs_fs_devices *fs_devices; |
| @@ -108,6 +122,44 @@ static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid) | |||
| 108 | return NULL; | 122 | return NULL; |
| 109 | } | 123 | } |
| 110 | 124 | ||
| 125 | static int | ||
| 126 | btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder, | ||
| 127 | int flush, struct block_device **bdev, | ||
| 128 | struct buffer_head **bh) | ||
| 129 | { | ||
| 130 | int ret; | ||
| 131 | |||
| 132 | *bdev = blkdev_get_by_path(device_path, flags, holder); | ||
| 133 | |||
| 134 | if (IS_ERR(*bdev)) { | ||
| 135 | ret = PTR_ERR(*bdev); | ||
| 136 | printk(KERN_INFO "btrfs: open %s failed\n", device_path); | ||
| 137 | goto error; | ||
| 138 | } | ||
| 139 | |||
| 140 | if (flush) | ||
| 141 | filemap_write_and_wait((*bdev)->bd_inode->i_mapping); | ||
| 142 | ret = set_blocksize(*bdev, 4096); | ||
| 143 | if (ret) { | ||
| 144 | blkdev_put(*bdev, flags); | ||
| 145 | goto error; | ||
| 146 | } | ||
| 147 | invalidate_bdev(*bdev); | ||
| 148 | *bh = btrfs_read_dev_super(*bdev); | ||
| 149 | if (!*bh) { | ||
| 150 | ret = -EINVAL; | ||
| 151 | blkdev_put(*bdev, flags); | ||
| 152 | goto error; | ||
| 153 | } | ||
| 154 | |||
| 155 | return 0; | ||
| 156 | |||
| 157 | error: | ||
| 158 | *bdev = NULL; | ||
| 159 | *bh = NULL; | ||
| 160 | return ret; | ||
| 161 | } | ||
| 162 | |||
| 111 | static void requeue_list(struct btrfs_pending_bios *pending_bios, | 163 | static void requeue_list(struct btrfs_pending_bios *pending_bios, |
| 112 | struct bio *head, struct bio *tail) | 164 | struct bio *head, struct bio *tail) |
| 113 | { | 165 | { |
| @@ -467,7 +519,8 @@ error: | |||
| 467 | return ERR_PTR(-ENOMEM); | 519 | return ERR_PTR(-ENOMEM); |
| 468 | } | 520 | } |
| 469 | 521 | ||
| 470 | void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) | 522 | void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info, |
| 523 | struct btrfs_fs_devices *fs_devices, int step) | ||
| 471 | { | 524 | { |
| 472 | struct btrfs_device *device, *next; | 525 | struct btrfs_device *device, *next; |
| 473 | 526 | ||
| @@ -480,8 +533,9 @@ again: | |||
| 480 | /* This is the initialized path, it is safe to release the devices. */ | 533 | /* This is the initialized path, it is safe to release the devices. */ |
| 481 | list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { | 534 | list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { |
| 482 | if (device->in_fs_metadata) { | 535 | if (device->in_fs_metadata) { |
| 483 | if (!latest_transid || | 536 | if (!device->is_tgtdev_for_dev_replace && |
| 484 | device->generation > latest_transid) { | 537 | (!latest_transid || |
| 538 | device->generation > latest_transid)) { | ||
| 485 | latest_devid = device->devid; | 539 | latest_devid = device->devid; |
| 486 | latest_transid = device->generation; | 540 | latest_transid = device->generation; |
| 487 | latest_bdev = device->bdev; | 541 | latest_bdev = device->bdev; |
| @@ -489,6 +543,21 @@ again: | |||
| 489 | continue; | 543 | continue; |
| 490 | } | 544 | } |
| 491 | 545 | ||
| 546 | if (device->devid == BTRFS_DEV_REPLACE_DEVID) { | ||
| 547 | /* | ||
| 548 | * In the first step, keep the device which has | ||
| 549 | * the correct fsid and the devid that is used | ||
| 550 | * for the dev_replace procedure. | ||
| 551 | * In the second step, the dev_replace state is | ||
| 552 | * read from the device tree and it is known | ||
| 553 | * whether the procedure is really active or | ||
| 554 | * not, which means whether this device is | ||
| 555 | * used or whether it should be removed. | ||
| 556 | */ | ||
| 557 | if (step == 0 || device->is_tgtdev_for_dev_replace) { | ||
| 558 | continue; | ||
| 559 | } | ||
| 560 | } | ||
| 492 | if (device->bdev) { | 561 | if (device->bdev) { |
| 493 | blkdev_put(device->bdev, device->mode); | 562 | blkdev_put(device->bdev, device->mode); |
| 494 | device->bdev = NULL; | 563 | device->bdev = NULL; |
| @@ -497,7 +566,8 @@ again: | |||
| 497 | if (device->writeable) { | 566 | if (device->writeable) { |
| 498 | list_del_init(&device->dev_alloc_list); | 567 | list_del_init(&device->dev_alloc_list); |
| 499 | device->writeable = 0; | 568 | device->writeable = 0; |
| 500 | fs_devices->rw_devices--; | 569 | if (!device->is_tgtdev_for_dev_replace) |
| 570 | fs_devices->rw_devices--; | ||
| 501 | } | 571 | } |
| 502 | list_del_init(&device->dev_list); | 572 | list_del_init(&device->dev_list); |
| 503 | fs_devices->num_devices--; | 573 | fs_devices->num_devices--; |
| @@ -555,7 +625,7 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) | |||
| 555 | if (device->bdev) | 625 | if (device->bdev) |
| 556 | fs_devices->open_devices--; | 626 | fs_devices->open_devices--; |
| 557 | 627 | ||
| 558 | if (device->writeable) { | 628 | if (device->writeable && !device->is_tgtdev_for_dev_replace) { |
| 559 | list_del_init(&device->dev_alloc_list); | 629 | list_del_init(&device->dev_alloc_list); |
| 560 | fs_devices->rw_devices--; | 630 | fs_devices->rw_devices--; |
| 561 | } | 631 | } |
| @@ -637,18 +707,10 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
| 637 | if (!device->name) | 707 | if (!device->name) |
| 638 | continue; | 708 | continue; |
| 639 | 709 | ||
| 640 | bdev = blkdev_get_by_path(device->name->str, flags, holder); | 710 | ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1, |
| 641 | if (IS_ERR(bdev)) { | 711 | &bdev, &bh); |
| 642 | printk(KERN_INFO "btrfs: open %s failed\n", device->name->str); | 712 | if (ret) |
| 643 | goto error; | 713 | continue; |
| 644 | } | ||
| 645 | filemap_write_and_wait(bdev->bd_inode->i_mapping); | ||
| 646 | invalidate_bdev(bdev); | ||
| 647 | set_blocksize(bdev, 4096); | ||
| 648 | |||
| 649 | bh = btrfs_read_dev_super(bdev); | ||
| 650 | if (!bh) | ||
| 651 | goto error_close; | ||
| 652 | 714 | ||
| 653 | disk_super = (struct btrfs_super_block *)bh->b_data; | 715 | disk_super = (struct btrfs_super_block *)bh->b_data; |
| 654 | devid = btrfs_stack_device_id(&disk_super->dev_item); | 716 | devid = btrfs_stack_device_id(&disk_super->dev_item); |
| @@ -687,7 +749,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
| 687 | fs_devices->rotating = 1; | 749 | fs_devices->rotating = 1; |
| 688 | 750 | ||
| 689 | fs_devices->open_devices++; | 751 | fs_devices->open_devices++; |
| 690 | if (device->writeable) { | 752 | if (device->writeable && !device->is_tgtdev_for_dev_replace) { |
| 691 | fs_devices->rw_devices++; | 753 | fs_devices->rw_devices++; |
| 692 | list_add(&device->dev_alloc_list, | 754 | list_add(&device->dev_alloc_list, |
| 693 | &fs_devices->alloc_list); | 755 | &fs_devices->alloc_list); |
| @@ -697,9 +759,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
| 697 | 759 | ||
| 698 | error_brelse: | 760 | error_brelse: |
| 699 | brelse(bh); | 761 | brelse(bh); |
| 700 | error_close: | ||
| 701 | blkdev_put(bdev, flags); | 762 | blkdev_put(bdev, flags); |
| 702 | error: | ||
| 703 | continue; | 763 | continue; |
| 704 | } | 764 | } |
| 705 | if (fs_devices->open_devices == 0) { | 765 | if (fs_devices->open_devices == 0) { |
| @@ -744,40 +804,30 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
| 744 | u64 total_devices; | 804 | u64 total_devices; |
| 745 | 805 | ||
| 746 | flags |= FMODE_EXCL; | 806 | flags |= FMODE_EXCL; |
| 747 | bdev = blkdev_get_by_path(path, flags, holder); | ||
| 748 | |||
| 749 | if (IS_ERR(bdev)) { | ||
| 750 | ret = PTR_ERR(bdev); | ||
| 751 | goto error; | ||
| 752 | } | ||
| 753 | |||
| 754 | mutex_lock(&uuid_mutex); | 807 | mutex_lock(&uuid_mutex); |
| 755 | ret = set_blocksize(bdev, 4096); | 808 | ret = btrfs_get_bdev_and_sb(path, flags, holder, 0, &bdev, &bh); |
| 756 | if (ret) | 809 | if (ret) |
| 757 | goto error_close; | 810 | goto error; |
| 758 | bh = btrfs_read_dev_super(bdev); | ||
| 759 | if (!bh) { | ||
| 760 | ret = -EINVAL; | ||
| 761 | goto error_close; | ||
| 762 | } | ||
| 763 | disk_super = (struct btrfs_super_block *)bh->b_data; | 811 | disk_super = (struct btrfs_super_block *)bh->b_data; |
| 764 | devid = btrfs_stack_device_id(&disk_super->dev_item); | 812 | devid = btrfs_stack_device_id(&disk_super->dev_item); |
| 765 | transid = btrfs_super_generation(disk_super); | 813 | transid = btrfs_super_generation(disk_super); |
| 766 | total_devices = btrfs_super_num_devices(disk_super); | 814 | total_devices = btrfs_super_num_devices(disk_super); |
| 767 | if (disk_super->label[0]) | 815 | if (disk_super->label[0]) { |
| 816 | if (disk_super->label[BTRFS_LABEL_SIZE - 1]) | ||
| 817 | disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0'; | ||
| 768 | printk(KERN_INFO "device label %s ", disk_super->label); | 818 | printk(KERN_INFO "device label %s ", disk_super->label); |
| 769 | else | 819 | } else { |
| 770 | printk(KERN_INFO "device fsid %pU ", disk_super->fsid); | 820 | printk(KERN_INFO "device fsid %pU ", disk_super->fsid); |
| 821 | } | ||
| 771 | printk(KERN_CONT "devid %llu transid %llu %s\n", | 822 | printk(KERN_CONT "devid %llu transid %llu %s\n", |
| 772 | (unsigned long long)devid, (unsigned long long)transid, path); | 823 | (unsigned long long)devid, (unsigned long long)transid, path); |
| 773 | ret = device_list_add(path, disk_super, devid, fs_devices_ret); | 824 | ret = device_list_add(path, disk_super, devid, fs_devices_ret); |
| 774 | if (!ret && fs_devices_ret) | 825 | if (!ret && fs_devices_ret) |
| 775 | (*fs_devices_ret)->total_devices = total_devices; | 826 | (*fs_devices_ret)->total_devices = total_devices; |
| 776 | brelse(bh); | 827 | brelse(bh); |
| 777 | error_close: | ||
| 778 | mutex_unlock(&uuid_mutex); | ||
| 779 | blkdev_put(bdev, flags); | 828 | blkdev_put(bdev, flags); |
| 780 | error: | 829 | error: |
| 830 | mutex_unlock(&uuid_mutex); | ||
| 781 | return ret; | 831 | return ret; |
| 782 | } | 832 | } |
| 783 | 833 | ||
| @@ -796,7 +846,7 @@ int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start, | |||
| 796 | 846 | ||
| 797 | *length = 0; | 847 | *length = 0; |
| 798 | 848 | ||
| 799 | if (start >= device->total_bytes) | 849 | if (start >= device->total_bytes || device->is_tgtdev_for_dev_replace) |
| 800 | return 0; | 850 | return 0; |
| 801 | 851 | ||
| 802 | path = btrfs_alloc_path(); | 852 | path = btrfs_alloc_path(); |
| @@ -913,7 +963,7 @@ int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes, | |||
| 913 | max_hole_size = 0; | 963 | max_hole_size = 0; |
| 914 | hole_size = 0; | 964 | hole_size = 0; |
| 915 | 965 | ||
| 916 | if (search_start >= search_end) { | 966 | if (search_start >= search_end || device->is_tgtdev_for_dev_replace) { |
| 917 | ret = -ENOSPC; | 967 | ret = -ENOSPC; |
| 918 | goto error; | 968 | goto error; |
| 919 | } | 969 | } |
| @@ -1096,6 +1146,7 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, | |||
| 1096 | struct btrfs_key key; | 1146 | struct btrfs_key key; |
| 1097 | 1147 | ||
| 1098 | WARN_ON(!device->in_fs_metadata); | 1148 | WARN_ON(!device->in_fs_metadata); |
| 1149 | WARN_ON(device->is_tgtdev_for_dev_replace); | ||
| 1099 | path = btrfs_alloc_path(); | 1150 | path = btrfs_alloc_path(); |
| 1100 | if (!path) | 1151 | if (!path) |
| 1101 | return -ENOMEM; | 1152 | return -ENOMEM; |
| @@ -1330,16 +1381,22 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
| 1330 | root->fs_info->avail_system_alloc_bits | | 1381 | root->fs_info->avail_system_alloc_bits | |
| 1331 | root->fs_info->avail_metadata_alloc_bits; | 1382 | root->fs_info->avail_metadata_alloc_bits; |
| 1332 | 1383 | ||
| 1333 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && | 1384 | num_devices = root->fs_info->fs_devices->num_devices; |
| 1334 | root->fs_info->fs_devices->num_devices <= 4) { | 1385 | btrfs_dev_replace_lock(&root->fs_info->dev_replace); |
| 1386 | if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) { | ||
| 1387 | WARN_ON(num_devices < 1); | ||
| 1388 | num_devices--; | ||
| 1389 | } | ||
| 1390 | btrfs_dev_replace_unlock(&root->fs_info->dev_replace); | ||
| 1391 | |||
| 1392 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) { | ||
| 1335 | printk(KERN_ERR "btrfs: unable to go below four devices " | 1393 | printk(KERN_ERR "btrfs: unable to go below four devices " |
| 1336 | "on raid10\n"); | 1394 | "on raid10\n"); |
| 1337 | ret = -EINVAL; | 1395 | ret = -EINVAL; |
| 1338 | goto out; | 1396 | goto out; |
| 1339 | } | 1397 | } |
| 1340 | 1398 | ||
| 1341 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && | 1399 | if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && num_devices <= 2) { |
| 1342 | root->fs_info->fs_devices->num_devices <= 2) { | ||
| 1343 | printk(KERN_ERR "btrfs: unable to go below two " | 1400 | printk(KERN_ERR "btrfs: unable to go below two " |
| 1344 | "devices on raid1\n"); | 1401 | "devices on raid1\n"); |
| 1345 | ret = -EINVAL; | 1402 | ret = -EINVAL; |
| @@ -1357,7 +1414,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
| 1357 | * is held. | 1414 | * is held. |
| 1358 | */ | 1415 | */ |
| 1359 | list_for_each_entry(tmp, devices, dev_list) { | 1416 | list_for_each_entry(tmp, devices, dev_list) { |
| 1360 | if (tmp->in_fs_metadata && !tmp->bdev) { | 1417 | if (tmp->in_fs_metadata && |
| 1418 | !tmp->is_tgtdev_for_dev_replace && | ||
| 1419 | !tmp->bdev) { | ||
| 1361 | device = tmp; | 1420 | device = tmp; |
| 1362 | break; | 1421 | break; |
| 1363 | } | 1422 | } |
| @@ -1371,24 +1430,16 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
| 1371 | goto out; | 1430 | goto out; |
| 1372 | } | 1431 | } |
| 1373 | } else { | 1432 | } else { |
| 1374 | bdev = blkdev_get_by_path(device_path, FMODE_READ | FMODE_EXCL, | 1433 | ret = btrfs_get_bdev_and_sb(device_path, |
| 1375 | root->fs_info->bdev_holder); | 1434 | FMODE_WRITE | FMODE_EXCL, |
| 1376 | if (IS_ERR(bdev)) { | 1435 | root->fs_info->bdev_holder, 0, |
| 1377 | ret = PTR_ERR(bdev); | 1436 | &bdev, &bh); |
| 1437 | if (ret) | ||
| 1378 | goto out; | 1438 | goto out; |
| 1379 | } | ||
| 1380 | |||
| 1381 | set_blocksize(bdev, 4096); | ||
| 1382 | invalidate_bdev(bdev); | ||
| 1383 | bh = btrfs_read_dev_super(bdev); | ||
| 1384 | if (!bh) { | ||
| 1385 | ret = -EINVAL; | ||
| 1386 | goto error_close; | ||
| 1387 | } | ||
| 1388 | disk_super = (struct btrfs_super_block *)bh->b_data; | 1439 | disk_super = (struct btrfs_super_block *)bh->b_data; |
| 1389 | devid = btrfs_stack_device_id(&disk_super->dev_item); | 1440 | devid = btrfs_stack_device_id(&disk_super->dev_item); |
| 1390 | dev_uuid = disk_super->dev_item.uuid; | 1441 | dev_uuid = disk_super->dev_item.uuid; |
| 1391 | device = btrfs_find_device(root, devid, dev_uuid, | 1442 | device = btrfs_find_device(root->fs_info, devid, dev_uuid, |
| 1392 | disk_super->fsid); | 1443 | disk_super->fsid); |
| 1393 | if (!device) { | 1444 | if (!device) { |
| 1394 | ret = -ENOENT; | 1445 | ret = -ENOENT; |
| @@ -1396,6 +1447,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
| 1396 | } | 1447 | } |
| 1397 | } | 1448 | } |
| 1398 | 1449 | ||
| 1450 | if (device->is_tgtdev_for_dev_replace) { | ||
| 1451 | pr_err("btrfs: unable to remove the dev_replace target dev\n"); | ||
| 1452 | ret = -EINVAL; | ||
| 1453 | goto error_brelse; | ||
| 1454 | } | ||
| 1455 | |||
| 1399 | if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) { | 1456 | if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) { |
| 1400 | printk(KERN_ERR "btrfs: unable to remove the only writeable " | 1457 | printk(KERN_ERR "btrfs: unable to remove the only writeable " |
| 1401 | "device\n"); | 1458 | "device\n"); |
| @@ -1415,6 +1472,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
| 1415 | if (ret) | 1472 | if (ret) |
| 1416 | goto error_undo; | 1473 | goto error_undo; |
| 1417 | 1474 | ||
| 1475 | /* | ||
| 1476 | * TODO: the superblock still includes this device in its num_devices | ||
| 1477 | * counter although write_all_supers() is not locked out. This | ||
| 1478 | * could give a filesystem state which requires a degraded mount. | ||
| 1479 | */ | ||
| 1418 | ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device); | 1480 | ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device); |
| 1419 | if (ret) | 1481 | if (ret) |
| 1420 | goto error_undo; | 1482 | goto error_undo; |
| @@ -1425,7 +1487,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
| 1425 | spin_unlock(&root->fs_info->free_chunk_lock); | 1487 | spin_unlock(&root->fs_info->free_chunk_lock); |
| 1426 | 1488 | ||
| 1427 | device->in_fs_metadata = 0; | 1489 | device->in_fs_metadata = 0; |
| 1428 | btrfs_scrub_cancel_dev(root, device); | 1490 | btrfs_scrub_cancel_dev(root->fs_info, device); |
| 1429 | 1491 | ||
| 1430 | /* | 1492 | /* |
| 1431 | * the device list mutex makes sure that we don't change | 1493 | * the device list mutex makes sure that we don't change |
| @@ -1482,7 +1544,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
| 1482 | * at this point, the device is zero sized. We want to | 1544 | * at this point, the device is zero sized. We want to |
| 1483 | * remove it from the devices list and zero out the old super | 1545 | * remove it from the devices list and zero out the old super |
| 1484 | */ | 1546 | */ |
| 1485 | if (clear_super) { | 1547 | if (clear_super && disk_super) { |
| 1486 | /* make sure this device isn't detected as part of | 1548 | /* make sure this device isn't detected as part of |
| 1487 | * the FS anymore | 1549 | * the FS anymore |
| 1488 | */ | 1550 | */ |
| @@ -1493,9 +1555,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
| 1493 | 1555 | ||
| 1494 | ret = 0; | 1556 | ret = 0; |
| 1495 | 1557 | ||
| 1558 | /* Notify udev that device has changed */ | ||
| 1559 | if (bdev) | ||
| 1560 | btrfs_kobject_uevent(bdev, KOBJ_CHANGE); | ||
| 1561 | |||
| 1496 | error_brelse: | 1562 | error_brelse: |
| 1497 | brelse(bh); | 1563 | brelse(bh); |
| 1498 | error_close: | ||
| 1499 | if (bdev) | 1564 | if (bdev) |
| 1500 | blkdev_put(bdev, FMODE_READ | FMODE_EXCL); | 1565 | blkdev_put(bdev, FMODE_READ | FMODE_EXCL); |
| 1501 | out: | 1566 | out: |
| @@ -1512,6 +1577,112 @@ error_undo: | |||
| 1512 | goto error_brelse; | 1577 | goto error_brelse; |
| 1513 | } | 1578 | } |
| 1514 | 1579 | ||
| 1580 | void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info, | ||
| 1581 | struct btrfs_device *srcdev) | ||
| 1582 | { | ||
| 1583 | WARN_ON(!mutex_is_locked(&fs_info->fs_devices->device_list_mutex)); | ||
| 1584 | list_del_rcu(&srcdev->dev_list); | ||
| 1585 | list_del_rcu(&srcdev->dev_alloc_list); | ||
| 1586 | fs_info->fs_devices->num_devices--; | ||
| 1587 | if (srcdev->missing) { | ||
| 1588 | fs_info->fs_devices->missing_devices--; | ||
| 1589 | fs_info->fs_devices->rw_devices++; | ||
| 1590 | } | ||
| 1591 | if (srcdev->can_discard) | ||
| 1592 | fs_info->fs_devices->num_can_discard--; | ||
| 1593 | if (srcdev->bdev) | ||
| 1594 | fs_info->fs_devices->open_devices--; | ||
| 1595 | |||
| 1596 | call_rcu(&srcdev->rcu, free_device); | ||
| 1597 | } | ||
| 1598 | |||
| 1599 | void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, | ||
| 1600 | struct btrfs_device *tgtdev) | ||
| 1601 | { | ||
| 1602 | struct btrfs_device *next_device; | ||
| 1603 | |||
| 1604 | WARN_ON(!tgtdev); | ||
| 1605 | mutex_lock(&fs_info->fs_devices->device_list_mutex); | ||
| 1606 | if (tgtdev->bdev) { | ||
| 1607 | btrfs_scratch_superblock(tgtdev); | ||
| 1608 | fs_info->fs_devices->open_devices--; | ||
| 1609 | } | ||
| 1610 | fs_info->fs_devices->num_devices--; | ||
| 1611 | if (tgtdev->can_discard) | ||
| 1612 | fs_info->fs_devices->num_can_discard++; | ||
| 1613 | |||
| 1614 | next_device = list_entry(fs_info->fs_devices->devices.next, | ||
| 1615 | struct btrfs_device, dev_list); | ||
| 1616 | if (tgtdev->bdev == fs_info->sb->s_bdev) | ||
| 1617 | fs_info->sb->s_bdev = next_device->bdev; | ||
| 1618 | if (tgtdev->bdev == fs_info->fs_devices->latest_bdev) | ||
| 1619 | fs_info->fs_devices->latest_bdev = next_device->bdev; | ||
| 1620 | list_del_rcu(&tgtdev->dev_list); | ||
| 1621 | |||
| 1622 | call_rcu(&tgtdev->rcu, free_device); | ||
| 1623 | |||
| 1624 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); | ||
| 1625 | } | ||
| 1626 | |||
| 1627 | int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path, | ||
| 1628 | struct btrfs_device **device) | ||
| 1629 | { | ||
| 1630 | int ret = 0; | ||
| 1631 | struct btrfs_super_block *disk_super; | ||
| 1632 | u64 devid; | ||
| 1633 | u8 *dev_uuid; | ||
| 1634 | struct block_device *bdev; | ||
| 1635 | struct buffer_head *bh; | ||
| 1636 | |||
| 1637 | *device = NULL; | ||
| 1638 | ret = btrfs_get_bdev_and_sb(device_path, FMODE_READ, | ||
| 1639 | root->fs_info->bdev_holder, 0, &bdev, &bh); | ||
| 1640 | if (ret) | ||
| 1641 | return ret; | ||
| 1642 | disk_super = (struct btrfs_super_block *)bh->b_data; | ||
| 1643 | devid = btrfs_stack_device_id(&disk_super->dev_item); | ||
| 1644 | dev_uuid = disk_super->dev_item.uuid; | ||
| 1645 | *device = btrfs_find_device(root->fs_info, devid, dev_uuid, | ||
| 1646 | disk_super->fsid); | ||
| 1647 | brelse(bh); | ||
| 1648 | if (!*device) | ||
| 1649 | ret = -ENOENT; | ||
| 1650 | blkdev_put(bdev, FMODE_READ); | ||
| 1651 | return ret; | ||
| 1652 | } | ||
| 1653 | |||
| 1654 | int btrfs_find_device_missing_or_by_path(struct btrfs_root *root, | ||
| 1655 | char *device_path, | ||
| 1656 | struct btrfs_device **device) | ||
| 1657 | { | ||
| 1658 | *device = NULL; | ||
| 1659 | if (strcmp(device_path, "missing") == 0) { | ||
| 1660 | struct list_head *devices; | ||
| 1661 | struct btrfs_device *tmp; | ||
| 1662 | |||
| 1663 | devices = &root->fs_info->fs_devices->devices; | ||
| 1664 | /* | ||
| 1665 | * It is safe to read the devices since the volume_mutex | ||
| 1666 | * is held by the caller. | ||
| 1667 | */ | ||
| 1668 | list_for_each_entry(tmp, devices, dev_list) { | ||
| 1669 | if (tmp->in_fs_metadata && !tmp->bdev) { | ||
| 1670 | *device = tmp; | ||
| 1671 | break; | ||
| 1672 | } | ||
| 1673 | } | ||
| 1674 | |||
| 1675 | if (!*device) { | ||
| 1676 | pr_err("btrfs: no missing device found\n"); | ||
| 1677 | return -ENOENT; | ||
| 1678 | } | ||
| 1679 | |||
| 1680 | return 0; | ||
| 1681 | } else { | ||
| 1682 | return btrfs_find_device_by_path(root, device_path, device); | ||
| 1683 | } | ||
| 1684 | } | ||
| 1685 | |||
| 1515 | /* | 1686 | /* |
| 1516 | * does all the dirty work required for changing file system's UUID. | 1687 | * does all the dirty work required for changing file system's UUID. |
| 1517 | */ | 1688 | */ |
| @@ -1630,7 +1801,8 @@ next_slot: | |||
| 1630 | read_extent_buffer(leaf, fs_uuid, | 1801 | read_extent_buffer(leaf, fs_uuid, |
| 1631 | (unsigned long)btrfs_device_fsid(dev_item), | 1802 | (unsigned long)btrfs_device_fsid(dev_item), |
| 1632 | BTRFS_UUID_SIZE); | 1803 | BTRFS_UUID_SIZE); |
| 1633 | device = btrfs_find_device(root, devid, dev_uuid, fs_uuid); | 1804 | device = btrfs_find_device(root->fs_info, devid, dev_uuid, |
| 1805 | fs_uuid); | ||
| 1634 | BUG_ON(!device); /* Logic error */ | 1806 | BUG_ON(!device); /* Logic error */ |
| 1635 | 1807 | ||
| 1636 | if (device->fs_devices->seeding) { | 1808 | if (device->fs_devices->seeding) { |
| @@ -1678,16 +1850,17 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
| 1678 | filemap_write_and_wait(bdev->bd_inode->i_mapping); | 1850 | filemap_write_and_wait(bdev->bd_inode->i_mapping); |
| 1679 | 1851 | ||
| 1680 | devices = &root->fs_info->fs_devices->devices; | 1852 | devices = &root->fs_info->fs_devices->devices; |
| 1681 | /* | 1853 | |
| 1682 | * we have the volume lock, so we don't need the extra | 1854 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
| 1683 | * device list mutex while reading the list here. | ||
| 1684 | */ | ||
| 1685 | list_for_each_entry(device, devices, dev_list) { | 1855 | list_for_each_entry(device, devices, dev_list) { |
| 1686 | if (device->bdev == bdev) { | 1856 | if (device->bdev == bdev) { |
| 1687 | ret = -EEXIST; | 1857 | ret = -EEXIST; |
| 1858 | mutex_unlock( | ||
| 1859 | &root->fs_info->fs_devices->device_list_mutex); | ||
| 1688 | goto error; | 1860 | goto error; |
| 1689 | } | 1861 | } |
| 1690 | } | 1862 | } |
| 1863 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
| 1691 | 1864 | ||
| 1692 | device = kzalloc(sizeof(*device), GFP_NOFS); | 1865 | device = kzalloc(sizeof(*device), GFP_NOFS); |
| 1693 | if (!device) { | 1866 | if (!device) { |
| @@ -1737,6 +1910,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
| 1737 | device->dev_root = root->fs_info->dev_root; | 1910 | device->dev_root = root->fs_info->dev_root; |
| 1738 | device->bdev = bdev; | 1911 | device->bdev = bdev; |
| 1739 | device->in_fs_metadata = 1; | 1912 | device->in_fs_metadata = 1; |
| 1913 | device->is_tgtdev_for_dev_replace = 0; | ||
| 1740 | device->mode = FMODE_EXCL; | 1914 | device->mode = FMODE_EXCL; |
| 1741 | set_blocksize(device->bdev, 4096); | 1915 | set_blocksize(device->bdev, 4096); |
| 1742 | 1916 | ||
| @@ -1844,6 +2018,98 @@ error: | |||
| 1844 | return ret; | 2018 | return ret; |
| 1845 | } | 2019 | } |
| 1846 | 2020 | ||
| 2021 | int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, | ||
| 2022 | struct btrfs_device **device_out) | ||
| 2023 | { | ||
| 2024 | struct request_queue *q; | ||
| 2025 | struct btrfs_device *device; | ||
| 2026 | struct block_device *bdev; | ||
| 2027 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 2028 | struct list_head *devices; | ||
| 2029 | struct rcu_string *name; | ||
| 2030 | int ret = 0; | ||
| 2031 | |||
| 2032 | *device_out = NULL; | ||
| 2033 | if (fs_info->fs_devices->seeding) | ||
| 2034 | return -EINVAL; | ||
| 2035 | |||
| 2036 | bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL, | ||
| 2037 | fs_info->bdev_holder); | ||
| 2038 | if (IS_ERR(bdev)) | ||
| 2039 | return PTR_ERR(bdev); | ||
| 2040 | |||
| 2041 | filemap_write_and_wait(bdev->bd_inode->i_mapping); | ||
| 2042 | |||
| 2043 | devices = &fs_info->fs_devices->devices; | ||
| 2044 | list_for_each_entry(device, devices, dev_list) { | ||
| 2045 | if (device->bdev == bdev) { | ||
| 2046 | ret = -EEXIST; | ||
| 2047 | goto error; | ||
| 2048 | } | ||
| 2049 | } | ||
| 2050 | |||
| 2051 | device = kzalloc(sizeof(*device), GFP_NOFS); | ||
| 2052 | if (!device) { | ||
| 2053 | ret = -ENOMEM; | ||
| 2054 | goto error; | ||
| 2055 | } | ||
| 2056 | |||
| 2057 | name = rcu_string_strdup(device_path, GFP_NOFS); | ||
| 2058 | if (!name) { | ||
| 2059 | kfree(device); | ||
| 2060 | ret = -ENOMEM; | ||
| 2061 | goto error; | ||
| 2062 | } | ||
| 2063 | rcu_assign_pointer(device->name, name); | ||
| 2064 | |||
| 2065 | q = bdev_get_queue(bdev); | ||
| 2066 | if (blk_queue_discard(q)) | ||
| 2067 | device->can_discard = 1; | ||
| 2068 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | ||
| 2069 | device->writeable = 1; | ||
| 2070 | device->work.func = pending_bios_fn; | ||
| 2071 | generate_random_uuid(device->uuid); | ||
| 2072 | device->devid = BTRFS_DEV_REPLACE_DEVID; | ||
| 2073 | spin_lock_init(&device->io_lock); | ||
| 2074 | device->generation = 0; | ||
| 2075 | device->io_width = root->sectorsize; | ||
| 2076 | device->io_align = root->sectorsize; | ||
| 2077 | device->sector_size = root->sectorsize; | ||
| 2078 | device->total_bytes = i_size_read(bdev->bd_inode); | ||
| 2079 | device->disk_total_bytes = device->total_bytes; | ||
| 2080 | device->dev_root = fs_info->dev_root; | ||
| 2081 | device->bdev = bdev; | ||
| 2082 | device->in_fs_metadata = 1; | ||
| 2083 | device->is_tgtdev_for_dev_replace = 1; | ||
| 2084 | device->mode = FMODE_EXCL; | ||
| 2085 | set_blocksize(device->bdev, 4096); | ||
| 2086 | device->fs_devices = fs_info->fs_devices; | ||
| 2087 | list_add(&device->dev_list, &fs_info->fs_devices->devices); | ||
| 2088 | fs_info->fs_devices->num_devices++; | ||
| 2089 | fs_info->fs_devices->open_devices++; | ||
| 2090 | if (device->can_discard) | ||
| 2091 | fs_info->fs_devices->num_can_discard++; | ||
| 2092 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
| 2093 | |||
| 2094 | *device_out = device; | ||
| 2095 | return ret; | ||
| 2096 | |||
| 2097 | error: | ||
| 2098 | blkdev_put(bdev, FMODE_EXCL); | ||
| 2099 | return ret; | ||
| 2100 | } | ||
| 2101 | |||
| 2102 | void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info, | ||
| 2103 | struct btrfs_device *tgtdev) | ||
| 2104 | { | ||
| 2105 | WARN_ON(fs_info->fs_devices->rw_devices == 0); | ||
| 2106 | tgtdev->io_width = fs_info->dev_root->sectorsize; | ||
| 2107 | tgtdev->io_align = fs_info->dev_root->sectorsize; | ||
| 2108 | tgtdev->sector_size = fs_info->dev_root->sectorsize; | ||
| 2109 | tgtdev->dev_root = fs_info->dev_root; | ||
| 2110 | tgtdev->in_fs_metadata = 1; | ||
| 2111 | } | ||
| 2112 | |||
| 1847 | static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, | 2113 | static noinline int btrfs_update_device(struct btrfs_trans_handle *trans, |
| 1848 | struct btrfs_device *device) | 2114 | struct btrfs_device *device) |
| 1849 | { | 2115 | { |
| @@ -1900,7 +2166,8 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans, | |||
| 1900 | 2166 | ||
| 1901 | if (!device->writeable) | 2167 | if (!device->writeable) |
| 1902 | return -EACCES; | 2168 | return -EACCES; |
| 1903 | if (new_size <= device->total_bytes) | 2169 | if (new_size <= device->total_bytes || |
| 2170 | device->is_tgtdev_for_dev_replace) | ||
| 1904 | return -EINVAL; | 2171 | return -EINVAL; |
| 1905 | 2172 | ||
| 1906 | btrfs_set_super_total_bytes(super_copy, old_total + diff); | 2173 | btrfs_set_super_total_bytes(super_copy, old_total + diff); |
| @@ -2338,18 +2605,6 @@ static int chunk_profiles_filter(u64 chunk_type, | |||
| 2338 | return 1; | 2605 | return 1; |
| 2339 | } | 2606 | } |
| 2340 | 2607 | ||
| 2341 | static u64 div_factor_fine(u64 num, int factor) | ||
| 2342 | { | ||
| 2343 | if (factor <= 0) | ||
| 2344 | return 0; | ||
| 2345 | if (factor >= 100) | ||
| 2346 | return num; | ||
| 2347 | |||
| 2348 | num *= factor; | ||
| 2349 | do_div(num, 100); | ||
| 2350 | return num; | ||
| 2351 | } | ||
| 2352 | |||
| 2353 | static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, | 2608 | static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, |
| 2354 | struct btrfs_balance_args *bargs) | 2609 | struct btrfs_balance_args *bargs) |
| 2355 | { | 2610 | { |
| @@ -2360,7 +2615,14 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, | |||
| 2360 | cache = btrfs_lookup_block_group(fs_info, chunk_offset); | 2615 | cache = btrfs_lookup_block_group(fs_info, chunk_offset); |
| 2361 | chunk_used = btrfs_block_group_used(&cache->item); | 2616 | chunk_used = btrfs_block_group_used(&cache->item); |
| 2362 | 2617 | ||
| 2363 | user_thresh = div_factor_fine(cache->key.offset, bargs->usage); | 2618 | if (bargs->usage == 0) |
| 2619 | user_thresh = 0; | ||
| 2620 | else if (bargs->usage > 100) | ||
| 2621 | user_thresh = cache->key.offset; | ||
| 2622 | else | ||
| 2623 | user_thresh = div_factor_fine(cache->key.offset, | ||
| 2624 | bargs->usage); | ||
| 2625 | |||
| 2364 | if (chunk_used < user_thresh) | 2626 | if (chunk_used < user_thresh) |
| 2365 | ret = 0; | 2627 | ret = 0; |
| 2366 | 2628 | ||
| @@ -2514,15 +2776,6 @@ static int should_balance_chunk(struct btrfs_root *root, | |||
| 2514 | return 1; | 2776 | return 1; |
| 2515 | } | 2777 | } |
| 2516 | 2778 | ||
| 2517 | static u64 div_factor(u64 num, int factor) | ||
| 2518 | { | ||
| 2519 | if (factor == 10) | ||
| 2520 | return num; | ||
| 2521 | num *= factor; | ||
| 2522 | do_div(num, 10); | ||
| 2523 | return num; | ||
| 2524 | } | ||
| 2525 | |||
| 2526 | static int __btrfs_balance(struct btrfs_fs_info *fs_info) | 2779 | static int __btrfs_balance(struct btrfs_fs_info *fs_info) |
| 2527 | { | 2780 | { |
| 2528 | struct btrfs_balance_control *bctl = fs_info->balance_ctl; | 2781 | struct btrfs_balance_control *bctl = fs_info->balance_ctl; |
| @@ -2550,7 +2803,8 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) | |||
| 2550 | size_to_free = div_factor(old_size, 1); | 2803 | size_to_free = div_factor(old_size, 1); |
| 2551 | size_to_free = min(size_to_free, (u64)1 * 1024 * 1024); | 2804 | size_to_free = min(size_to_free, (u64)1 * 1024 * 1024); |
| 2552 | if (!device->writeable || | 2805 | if (!device->writeable || |
| 2553 | device->total_bytes - device->bytes_used > size_to_free) | 2806 | device->total_bytes - device->bytes_used > size_to_free || |
| 2807 | device->is_tgtdev_for_dev_replace) | ||
| 2554 | continue; | 2808 | continue; |
| 2555 | 2809 | ||
| 2556 | ret = btrfs_shrink_device(device, old_size - size_to_free); | 2810 | ret = btrfs_shrink_device(device, old_size - size_to_free); |
| @@ -2713,6 +2967,8 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info) | |||
| 2713 | unset_balance_control(fs_info); | 2967 | unset_balance_control(fs_info); |
| 2714 | ret = del_balance_item(fs_info->tree_root); | 2968 | ret = del_balance_item(fs_info->tree_root); |
| 2715 | BUG_ON(ret); | 2969 | BUG_ON(ret); |
| 2970 | |||
| 2971 | atomic_set(&fs_info->mutually_exclusive_operation_running, 0); | ||
| 2716 | } | 2972 | } |
| 2717 | 2973 | ||
| 2718 | void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, | 2974 | void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, |
| @@ -2728,6 +2984,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl, | |||
| 2728 | u64 allowed; | 2984 | u64 allowed; |
| 2729 | int mixed = 0; | 2985 | int mixed = 0; |
| 2730 | int ret; | 2986 | int ret; |
| 2987 | u64 num_devices; | ||
| 2731 | 2988 | ||
| 2732 | if (btrfs_fs_closing(fs_info) || | 2989 | if (btrfs_fs_closing(fs_info) || |
| 2733 | atomic_read(&fs_info->balance_pause_req) || | 2990 | atomic_read(&fs_info->balance_pause_req) || |
| @@ -2756,10 +3013,17 @@ int btrfs_balance(struct btrfs_balance_control *bctl, | |||
| 2756 | } | 3013 | } |
| 2757 | } | 3014 | } |
| 2758 | 3015 | ||
| 3016 | num_devices = fs_info->fs_devices->num_devices; | ||
| 3017 | btrfs_dev_replace_lock(&fs_info->dev_replace); | ||
| 3018 | if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) { | ||
| 3019 | BUG_ON(num_devices < 1); | ||
| 3020 | num_devices--; | ||
| 3021 | } | ||
| 3022 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | ||
| 2759 | allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; | 3023 | allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE; |
| 2760 | if (fs_info->fs_devices->num_devices == 1) | 3024 | if (num_devices == 1) |
| 2761 | allowed |= BTRFS_BLOCK_GROUP_DUP; | 3025 | allowed |= BTRFS_BLOCK_GROUP_DUP; |
| 2762 | else if (fs_info->fs_devices->num_devices < 4) | 3026 | else if (num_devices < 4) |
| 2763 | allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1); | 3027 | allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1); |
| 2764 | else | 3028 | else |
| 2765 | allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | | 3029 | allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1 | |
| @@ -2884,8 +3148,10 @@ int btrfs_balance(struct btrfs_balance_control *bctl, | |||
| 2884 | out: | 3148 | out: |
| 2885 | if (bctl->flags & BTRFS_BALANCE_RESUME) | 3149 | if (bctl->flags & BTRFS_BALANCE_RESUME) |
| 2886 | __cancel_balance(fs_info); | 3150 | __cancel_balance(fs_info); |
| 2887 | else | 3151 | else { |
| 2888 | kfree(bctl); | 3152 | kfree(bctl); |
| 3153 | atomic_set(&fs_info->mutually_exclusive_operation_running, 0); | ||
| 3154 | } | ||
| 2889 | return ret; | 3155 | return ret; |
| 2890 | } | 3156 | } |
| 2891 | 3157 | ||
| @@ -2977,6 +3243,8 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info) | |||
| 2977 | btrfs_balance_sys(leaf, item, &disk_bargs); | 3243 | btrfs_balance_sys(leaf, item, &disk_bargs); |
| 2978 | btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs); | 3244 | btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs); |
| 2979 | 3245 | ||
| 3246 | WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)); | ||
| 3247 | |||
| 2980 | mutex_lock(&fs_info->volume_mutex); | 3248 | mutex_lock(&fs_info->volume_mutex); |
| 2981 | mutex_lock(&fs_info->balance_mutex); | 3249 | mutex_lock(&fs_info->balance_mutex); |
| 2982 | 3250 | ||
| @@ -3080,7 +3348,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) | |||
| 3080 | u64 old_size = device->total_bytes; | 3348 | u64 old_size = device->total_bytes; |
| 3081 | u64 diff = device->total_bytes - new_size; | 3349 | u64 diff = device->total_bytes - new_size; |
| 3082 | 3350 | ||
| 3083 | if (new_size >= device->total_bytes) | 3351 | if (device->is_tgtdev_for_dev_replace) |
| 3084 | return -EINVAL; | 3352 | return -EINVAL; |
| 3085 | 3353 | ||
| 3086 | path = btrfs_alloc_path(); | 3354 | path = btrfs_alloc_path(); |
| @@ -3235,6 +3503,14 @@ static int btrfs_cmp_device_info(const void *a, const void *b) | |||
| 3235 | return 0; | 3503 | return 0; |
| 3236 | } | 3504 | } |
| 3237 | 3505 | ||
| 3506 | struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { | ||
| 3507 | { 2, 1, 0, 4, 2, 2 /* raid10 */ }, | ||
| 3508 | { 1, 1, 2, 2, 2, 2 /* raid1 */ }, | ||
| 3509 | { 1, 2, 1, 1, 1, 2 /* dup */ }, | ||
| 3510 | { 1, 1, 0, 2, 1, 1 /* raid0 */ }, | ||
| 3511 | { 1, 1, 1, 1, 1, 1 /* single */ }, | ||
| 3512 | }; | ||
| 3513 | |||
| 3238 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | 3514 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, |
| 3239 | struct btrfs_root *extent_root, | 3515 | struct btrfs_root *extent_root, |
| 3240 | struct map_lookup **map_ret, | 3516 | struct map_lookup **map_ret, |
| @@ -3264,43 +3540,21 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
| 3264 | int ndevs; | 3540 | int ndevs; |
| 3265 | int i; | 3541 | int i; |
| 3266 | int j; | 3542 | int j; |
| 3543 | int index; | ||
| 3267 | 3544 | ||
| 3268 | BUG_ON(!alloc_profile_is_valid(type, 0)); | 3545 | BUG_ON(!alloc_profile_is_valid(type, 0)); |
| 3269 | 3546 | ||
| 3270 | if (list_empty(&fs_devices->alloc_list)) | 3547 | if (list_empty(&fs_devices->alloc_list)) |
| 3271 | return -ENOSPC; | 3548 | return -ENOSPC; |
| 3272 | 3549 | ||
| 3273 | sub_stripes = 1; | 3550 | index = __get_raid_index(type); |
| 3274 | dev_stripes = 1; | ||
| 3275 | devs_increment = 1; | ||
| 3276 | ncopies = 1; | ||
| 3277 | devs_max = 0; /* 0 == as many as possible */ | ||
| 3278 | devs_min = 1; | ||
| 3279 | 3551 | ||
| 3280 | /* | 3552 | sub_stripes = btrfs_raid_array[index].sub_stripes; |
| 3281 | * define the properties of each RAID type. | 3553 | dev_stripes = btrfs_raid_array[index].dev_stripes; |
| 3282 | * FIXME: move this to a global table and use it in all RAID | 3554 | devs_max = btrfs_raid_array[index].devs_max; |
| 3283 | * calculation code | 3555 | devs_min = btrfs_raid_array[index].devs_min; |
| 3284 | */ | 3556 | devs_increment = btrfs_raid_array[index].devs_increment; |
| 3285 | if (type & (BTRFS_BLOCK_GROUP_DUP)) { | 3557 | ncopies = btrfs_raid_array[index].ncopies; |
| 3286 | dev_stripes = 2; | ||
| 3287 | ncopies = 2; | ||
| 3288 | devs_max = 1; | ||
| 3289 | } else if (type & (BTRFS_BLOCK_GROUP_RAID0)) { | ||
| 3290 | devs_min = 2; | ||
| 3291 | } else if (type & (BTRFS_BLOCK_GROUP_RAID1)) { | ||
| 3292 | devs_increment = 2; | ||
| 3293 | ncopies = 2; | ||
| 3294 | devs_max = 2; | ||
| 3295 | devs_min = 2; | ||
| 3296 | } else if (type & (BTRFS_BLOCK_GROUP_RAID10)) { | ||
| 3297 | sub_stripes = 2; | ||
| 3298 | devs_increment = 2; | ||
| 3299 | ncopies = 2; | ||
| 3300 | devs_min = 4; | ||
| 3301 | } else { | ||
| 3302 | devs_max = 1; | ||
| 3303 | } | ||
| 3304 | 3558 | ||
| 3305 | if (type & BTRFS_BLOCK_GROUP_DATA) { | 3559 | if (type & BTRFS_BLOCK_GROUP_DATA) { |
| 3306 | max_stripe_size = 1024 * 1024 * 1024; | 3560 | max_stripe_size = 1024 * 1024 * 1024; |
| @@ -3347,13 +3601,13 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
| 3347 | cur = cur->next; | 3601 | cur = cur->next; |
| 3348 | 3602 | ||
| 3349 | if (!device->writeable) { | 3603 | if (!device->writeable) { |
| 3350 | printk(KERN_ERR | 3604 | WARN(1, KERN_ERR |
| 3351 | "btrfs: read-only device in alloc_list\n"); | 3605 | "btrfs: read-only device in alloc_list\n"); |
| 3352 | WARN_ON(1); | ||
| 3353 | continue; | 3606 | continue; |
| 3354 | } | 3607 | } |
| 3355 | 3608 | ||
| 3356 | if (!device->in_fs_metadata) | 3609 | if (!device->in_fs_metadata || |
| 3610 | device->is_tgtdev_for_dev_replace) | ||
| 3357 | continue; | 3611 | continue; |
| 3358 | 3612 | ||
| 3359 | if (device->total_bytes > device->bytes_used) | 3613 | if (device->total_bytes > device->bytes_used) |
| @@ -3382,6 +3636,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
| 3382 | devices_info[ndevs].total_avail = total_avail; | 3636 | devices_info[ndevs].total_avail = total_avail; |
| 3383 | devices_info[ndevs].dev = device; | 3637 | devices_info[ndevs].dev = device; |
| 3384 | ++ndevs; | 3638 | ++ndevs; |
| 3639 | WARN_ON(ndevs > fs_devices->rw_devices); | ||
| 3385 | } | 3640 | } |
| 3386 | 3641 | ||
| 3387 | /* | 3642 | /* |
| @@ -3740,8 +3995,9 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) | |||
| 3740 | } | 3995 | } |
| 3741 | } | 3996 | } |
| 3742 | 3997 | ||
| 3743 | int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len) | 3998 | int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) |
| 3744 | { | 3999 | { |
| 4000 | struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; | ||
| 3745 | struct extent_map *em; | 4001 | struct extent_map *em; |
| 3746 | struct map_lookup *map; | 4002 | struct map_lookup *map; |
| 3747 | struct extent_map_tree *em_tree = &map_tree->map_tree; | 4003 | struct extent_map_tree *em_tree = &map_tree->map_tree; |
| @@ -3761,32 +4017,60 @@ int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len) | |||
| 3761 | else | 4017 | else |
| 3762 | ret = 1; | 4018 | ret = 1; |
| 3763 | free_extent_map(em); | 4019 | free_extent_map(em); |
| 4020 | |||
| 4021 | btrfs_dev_replace_lock(&fs_info->dev_replace); | ||
| 4022 | if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) | ||
| 4023 | ret++; | ||
| 4024 | btrfs_dev_replace_unlock(&fs_info->dev_replace); | ||
| 4025 | |||
| 3764 | return ret; | 4026 | return ret; |
| 3765 | } | 4027 | } |
| 3766 | 4028 | ||
| 3767 | static int find_live_mirror(struct map_lookup *map, int first, int num, | 4029 | static int find_live_mirror(struct btrfs_fs_info *fs_info, |
| 3768 | int optimal) | 4030 | struct map_lookup *map, int first, int num, |
| 4031 | int optimal, int dev_replace_is_ongoing) | ||
| 3769 | { | 4032 | { |
| 3770 | int i; | 4033 | int i; |
| 3771 | if (map->stripes[optimal].dev->bdev) | 4034 | int tolerance; |
| 3772 | return optimal; | 4035 | struct btrfs_device *srcdev; |
| 3773 | for (i = first; i < first + num; i++) { | 4036 | |
| 3774 | if (map->stripes[i].dev->bdev) | 4037 | if (dev_replace_is_ongoing && |
| 3775 | return i; | 4038 | fs_info->dev_replace.cont_reading_from_srcdev_mode == |
| 4039 | BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID) | ||
| 4040 | srcdev = fs_info->dev_replace.srcdev; | ||
| 4041 | else | ||
| 4042 | srcdev = NULL; | ||
| 4043 | |||
| 4044 | /* | ||
| 4045 | * try to avoid the drive that is the source drive for a | ||
| 4046 | * dev-replace procedure, only choose it if no other non-missing | ||
| 4047 | * mirror is available | ||
| 4048 | */ | ||
| 4049 | for (tolerance = 0; tolerance < 2; tolerance++) { | ||
| 4050 | if (map->stripes[optimal].dev->bdev && | ||
| 4051 | (tolerance || map->stripes[optimal].dev != srcdev)) | ||
| 4052 | return optimal; | ||
| 4053 | for (i = first; i < first + num; i++) { | ||
| 4054 | if (map->stripes[i].dev->bdev && | ||
| 4055 | (tolerance || map->stripes[i].dev != srcdev)) | ||
| 4056 | return i; | ||
| 4057 | } | ||
| 3776 | } | 4058 | } |
| 4059 | |||
| 3777 | /* we couldn't find one that doesn't fail. Just return something | 4060 | /* we couldn't find one that doesn't fail. Just return something |
| 3778 | * and the io error handling code will clean up eventually | 4061 | * and the io error handling code will clean up eventually |
| 3779 | */ | 4062 | */ |
| 3780 | return optimal; | 4063 | return optimal; |
| 3781 | } | 4064 | } |
| 3782 | 4065 | ||
| 3783 | static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | 4066 | static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, |
| 3784 | u64 logical, u64 *length, | 4067 | u64 logical, u64 *length, |
| 3785 | struct btrfs_bio **bbio_ret, | 4068 | struct btrfs_bio **bbio_ret, |
| 3786 | int mirror_num) | 4069 | int mirror_num) |
| 3787 | { | 4070 | { |
| 3788 | struct extent_map *em; | 4071 | struct extent_map *em; |
| 3789 | struct map_lookup *map; | 4072 | struct map_lookup *map; |
| 4073 | struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; | ||
| 3790 | struct extent_map_tree *em_tree = &map_tree->map_tree; | 4074 | struct extent_map_tree *em_tree = &map_tree->map_tree; |
| 3791 | u64 offset; | 4075 | u64 offset; |
| 3792 | u64 stripe_offset; | 4076 | u64 stripe_offset; |
| @@ -3800,6 +4084,11 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
| 3800 | int num_stripes; | 4084 | int num_stripes; |
| 3801 | int max_errors = 0; | 4085 | int max_errors = 0; |
| 3802 | struct btrfs_bio *bbio = NULL; | 4086 | struct btrfs_bio *bbio = NULL; |
| 4087 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
| 4088 | int dev_replace_is_ongoing = 0; | ||
| 4089 | int num_alloc_stripes; | ||
| 4090 | int patch_the_first_stripe_for_dev_replace = 0; | ||
| 4091 | u64 physical_to_patch_in_first_stripe = 0; | ||
| 3803 | 4092 | ||
| 3804 | read_lock(&em_tree->lock); | 4093 | read_lock(&em_tree->lock); |
| 3805 | em = lookup_extent_mapping(em_tree, logical, *length); | 4094 | em = lookup_extent_mapping(em_tree, logical, *length); |
| @@ -3816,9 +4105,6 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
| 3816 | map = (struct map_lookup *)em->bdev; | 4105 | map = (struct map_lookup *)em->bdev; |
| 3817 | offset = logical - em->start; | 4106 | offset = logical - em->start; |
| 3818 | 4107 | ||
| 3819 | if (mirror_num > map->num_stripes) | ||
| 3820 | mirror_num = 0; | ||
| 3821 | |||
| 3822 | stripe_nr = offset; | 4108 | stripe_nr = offset; |
| 3823 | /* | 4109 | /* |
| 3824 | * stripe_nr counts the total number of stripes we have to stride | 4110 | * stripe_nr counts the total number of stripes we have to stride |
| @@ -3845,6 +4131,93 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
| 3845 | if (!bbio_ret) | 4131 | if (!bbio_ret) |
| 3846 | goto out; | 4132 | goto out; |
| 3847 | 4133 | ||
| 4134 | btrfs_dev_replace_lock(dev_replace); | ||
| 4135 | dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace); | ||
| 4136 | if (!dev_replace_is_ongoing) | ||
| 4137 | btrfs_dev_replace_unlock(dev_replace); | ||
| 4138 | |||
| 4139 | if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 && | ||
| 4140 | !(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) && | ||
| 4141 | dev_replace->tgtdev != NULL) { | ||
| 4142 | /* | ||
| 4143 | * in dev-replace case, for repair case (that's the only | ||
| 4144 | * case where the mirror is selected explicitly when | ||
| 4145 | * calling btrfs_map_block), blocks left of the left cursor | ||
| 4146 | * can also be read from the target drive. | ||
| 4147 | * For REQ_GET_READ_MIRRORS, the target drive is added as | ||
| 4148 | * the last one to the array of stripes. For READ, it also | ||
| 4149 | * needs to be supported using the same mirror number. | ||
| 4150 | * If the requested block is not left of the left cursor, | ||
| 4151 | * EIO is returned. This can happen because btrfs_num_copies() | ||
| 4152 | * returns one more in the dev-replace case. | ||
| 4153 | */ | ||
| 4154 | u64 tmp_length = *length; | ||
| 4155 | struct btrfs_bio *tmp_bbio = NULL; | ||
| 4156 | int tmp_num_stripes; | ||
| 4157 | u64 srcdev_devid = dev_replace->srcdev->devid; | ||
| 4158 | int index_srcdev = 0; | ||
| 4159 | int found = 0; | ||
| 4160 | u64 physical_of_found = 0; | ||
| 4161 | |||
| 4162 | ret = __btrfs_map_block(fs_info, REQ_GET_READ_MIRRORS, | ||
| 4163 | logical, &tmp_length, &tmp_bbio, 0); | ||
| 4164 | if (ret) { | ||
| 4165 | WARN_ON(tmp_bbio != NULL); | ||
| 4166 | goto out; | ||
| 4167 | } | ||
| 4168 | |||
| 4169 | tmp_num_stripes = tmp_bbio->num_stripes; | ||
| 4170 | if (mirror_num > tmp_num_stripes) { | ||
| 4171 | /* | ||
| 4172 | * REQ_GET_READ_MIRRORS does not contain this | ||
| 4173 | * mirror, that means that the requested area | ||
| 4174 | * is not left of the left cursor | ||
| 4175 | */ | ||
| 4176 | ret = -EIO; | ||
| 4177 | kfree(tmp_bbio); | ||
| 4178 | goto out; | ||
| 4179 | } | ||
| 4180 | |||
| 4181 | /* | ||
| 4182 | * process the rest of the function using the mirror_num | ||
| 4183 | * of the source drive. Therefore look it up first. | ||
| 4184 | * At the end, patch the device pointer to the one of the | ||
| 4185 | * target drive. | ||
| 4186 | */ | ||
| 4187 | for (i = 0; i < tmp_num_stripes; i++) { | ||
| 4188 | if (tmp_bbio->stripes[i].dev->devid == srcdev_devid) { | ||
| 4189 | /* | ||
| 4190 | * In case of DUP, in order to keep it | ||
| 4191 | * simple, only add the mirror with the | ||
| 4192 | * lowest physical address | ||
| 4193 | */ | ||
| 4194 | if (found && | ||
| 4195 | physical_of_found <= | ||
| 4196 | tmp_bbio->stripes[i].physical) | ||
| 4197 | continue; | ||
| 4198 | index_srcdev = i; | ||
| 4199 | found = 1; | ||
| 4200 | physical_of_found = | ||
| 4201 | tmp_bbio->stripes[i].physical; | ||
| 4202 | } | ||
| 4203 | } | ||
| 4204 | |||
| 4205 | if (found) { | ||
| 4206 | mirror_num = index_srcdev + 1; | ||
| 4207 | patch_the_first_stripe_for_dev_replace = 1; | ||
| 4208 | physical_to_patch_in_first_stripe = physical_of_found; | ||
| 4209 | } else { | ||
| 4210 | WARN_ON(1); | ||
| 4211 | ret = -EIO; | ||
| 4212 | kfree(tmp_bbio); | ||
| 4213 | goto out; | ||
| 4214 | } | ||
| 4215 | |||
| 4216 | kfree(tmp_bbio); | ||
| 4217 | } else if (mirror_num > map->num_stripes) { | ||
| 4218 | mirror_num = 0; | ||
| 4219 | } | ||
| 4220 | |||
| 3848 | num_stripes = 1; | 4221 | num_stripes = 1; |
| 3849 | stripe_index = 0; | 4222 | stripe_index = 0; |
| 3850 | stripe_nr_orig = stripe_nr; | 4223 | stripe_nr_orig = stripe_nr; |
| @@ -3859,19 +4232,20 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
| 3859 | stripe_nr_end - stripe_nr_orig); | 4232 | stripe_nr_end - stripe_nr_orig); |
| 3860 | stripe_index = do_div(stripe_nr, map->num_stripes); | 4233 | stripe_index = do_div(stripe_nr, map->num_stripes); |
| 3861 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { | 4234 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { |
| 3862 | if (rw & (REQ_WRITE | REQ_DISCARD)) | 4235 | if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) |
| 3863 | num_stripes = map->num_stripes; | 4236 | num_stripes = map->num_stripes; |
| 3864 | else if (mirror_num) | 4237 | else if (mirror_num) |
| 3865 | stripe_index = mirror_num - 1; | 4238 | stripe_index = mirror_num - 1; |
| 3866 | else { | 4239 | else { |
| 3867 | stripe_index = find_live_mirror(map, 0, | 4240 | stripe_index = find_live_mirror(fs_info, map, 0, |
| 3868 | map->num_stripes, | 4241 | map->num_stripes, |
| 3869 | current->pid % map->num_stripes); | 4242 | current->pid % map->num_stripes, |
| 4243 | dev_replace_is_ongoing); | ||
| 3870 | mirror_num = stripe_index + 1; | 4244 | mirror_num = stripe_index + 1; |
| 3871 | } | 4245 | } |
| 3872 | 4246 | ||
| 3873 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { | 4247 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { |
| 3874 | if (rw & (REQ_WRITE | REQ_DISCARD)) { | 4248 | if (rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) { |
| 3875 | num_stripes = map->num_stripes; | 4249 | num_stripes = map->num_stripes; |
| 3876 | } else if (mirror_num) { | 4250 | } else if (mirror_num) { |
| 3877 | stripe_index = mirror_num - 1; | 4251 | stripe_index = mirror_num - 1; |
| @@ -3885,7 +4259,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
| 3885 | stripe_index = do_div(stripe_nr, factor); | 4259 | stripe_index = do_div(stripe_nr, factor); |
| 3886 | stripe_index *= map->sub_stripes; | 4260 | stripe_index *= map->sub_stripes; |
| 3887 | 4261 | ||
| 3888 | if (rw & REQ_WRITE) | 4262 | if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) |
| 3889 | num_stripes = map->sub_stripes; | 4263 | num_stripes = map->sub_stripes; |
| 3890 | else if (rw & REQ_DISCARD) | 4264 | else if (rw & REQ_DISCARD) |
| 3891 | num_stripes = min_t(u64, map->sub_stripes * | 4265 | num_stripes = min_t(u64, map->sub_stripes * |
| @@ -3895,9 +4269,11 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
| 3895 | stripe_index += mirror_num - 1; | 4269 | stripe_index += mirror_num - 1; |
| 3896 | else { | 4270 | else { |
| 3897 | int old_stripe_index = stripe_index; | 4271 | int old_stripe_index = stripe_index; |
| 3898 | stripe_index = find_live_mirror(map, stripe_index, | 4272 | stripe_index = find_live_mirror(fs_info, map, |
| 4273 | stripe_index, | ||
| 3899 | map->sub_stripes, stripe_index + | 4274 | map->sub_stripes, stripe_index + |
| 3900 | current->pid % map->sub_stripes); | 4275 | current->pid % map->sub_stripes, |
| 4276 | dev_replace_is_ongoing); | ||
| 3901 | mirror_num = stripe_index - old_stripe_index + 1; | 4277 | mirror_num = stripe_index - old_stripe_index + 1; |
| 3902 | } | 4278 | } |
| 3903 | } else { | 4279 | } else { |
| @@ -3911,7 +4287,14 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
| 3911 | } | 4287 | } |
| 3912 | BUG_ON(stripe_index >= map->num_stripes); | 4288 | BUG_ON(stripe_index >= map->num_stripes); |
| 3913 | 4289 | ||
| 3914 | bbio = kzalloc(btrfs_bio_size(num_stripes), GFP_NOFS); | 4290 | num_alloc_stripes = num_stripes; |
| 4291 | if (dev_replace_is_ongoing) { | ||
| 4292 | if (rw & (REQ_WRITE | REQ_DISCARD)) | ||
| 4293 | num_alloc_stripes <<= 1; | ||
| 4294 | if (rw & REQ_GET_READ_MIRRORS) | ||
| 4295 | num_alloc_stripes++; | ||
| 4296 | } | ||
| 4297 | bbio = kzalloc(btrfs_bio_size(num_alloc_stripes), GFP_NOFS); | ||
| 3915 | if (!bbio) { | 4298 | if (!bbio) { |
| 3916 | ret = -ENOMEM; | 4299 | ret = -ENOMEM; |
| 3917 | goto out; | 4300 | goto out; |
| @@ -3998,7 +4381,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
| 3998 | } | 4381 | } |
| 3999 | } | 4382 | } |
| 4000 | 4383 | ||
| 4001 | if (rw & REQ_WRITE) { | 4384 | if (rw & (REQ_WRITE | REQ_GET_READ_MIRRORS)) { |
| 4002 | if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | | 4385 | if (map->type & (BTRFS_BLOCK_GROUP_RAID1 | |
| 4003 | BTRFS_BLOCK_GROUP_RAID10 | | 4386 | BTRFS_BLOCK_GROUP_RAID10 | |
| 4004 | BTRFS_BLOCK_GROUP_DUP)) { | 4387 | BTRFS_BLOCK_GROUP_DUP)) { |
| @@ -4006,20 +4389,115 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | |||
| 4006 | } | 4389 | } |
| 4007 | } | 4390 | } |
| 4008 | 4391 | ||
| 4392 | if (dev_replace_is_ongoing && (rw & (REQ_WRITE | REQ_DISCARD)) && | ||
| 4393 | dev_replace->tgtdev != NULL) { | ||
| 4394 | int index_where_to_add; | ||
| 4395 | u64 srcdev_devid = dev_replace->srcdev->devid; | ||
| 4396 | |||
| 4397 | /* | ||
| 4398 | * duplicate the write operations while the dev replace | ||
| 4399 | * procedure is running. Since the copying of the old disk | ||
| 4400 | * to the new disk takes place at run time while the | ||
| 4401 | * filesystem is mounted writable, the regular write | ||
| 4402 | * operations to the old disk have to be duplicated to go | ||
| 4403 | * to the new disk as well. | ||
| 4404 | * Note that device->missing is handled by the caller, and | ||
| 4405 | * that the write to the old disk is already set up in the | ||
| 4406 | * stripes array. | ||
| 4407 | */ | ||
| 4408 | index_where_to_add = num_stripes; | ||
| 4409 | for (i = 0; i < num_stripes; i++) { | ||
| 4410 | if (bbio->stripes[i].dev->devid == srcdev_devid) { | ||
| 4411 | /* write to new disk, too */ | ||
| 4412 | struct btrfs_bio_stripe *new = | ||
| 4413 | bbio->stripes + index_where_to_add; | ||
| 4414 | struct btrfs_bio_stripe *old = | ||
| 4415 | bbio->stripes + i; | ||
| 4416 | |||
| 4417 | new->physical = old->physical; | ||
| 4418 | new->length = old->length; | ||
| 4419 | new->dev = dev_replace->tgtdev; | ||
| 4420 | index_where_to_add++; | ||
| 4421 | max_errors++; | ||
| 4422 | } | ||
| 4423 | } | ||
| 4424 | num_stripes = index_where_to_add; | ||
| 4425 | } else if (dev_replace_is_ongoing && (rw & REQ_GET_READ_MIRRORS) && | ||
| 4426 | dev_replace->tgtdev != NULL) { | ||
| 4427 | u64 srcdev_devid = dev_replace->srcdev->devid; | ||
| 4428 | int index_srcdev = 0; | ||
| 4429 | int found = 0; | ||
| 4430 | u64 physical_of_found = 0; | ||
| 4431 | |||
| 4432 | /* | ||
| 4433 | * During the dev-replace procedure, the target drive can | ||
| 4434 | * also be used to read data in case it is needed to repair | ||
| 4435 | * a corrupt block elsewhere. This is possible if the | ||
| 4436 | * requested area is left of the left cursor. In this area, | ||
| 4437 | * the target drive is a full copy of the source drive. | ||
| 4438 | */ | ||
| 4439 | for (i = 0; i < num_stripes; i++) { | ||
| 4440 | if (bbio->stripes[i].dev->devid == srcdev_devid) { | ||
| 4441 | /* | ||
| 4442 | * In case of DUP, in order to keep it | ||
| 4443 | * simple, only add the mirror with the | ||
| 4444 | * lowest physical address | ||
| 4445 | */ | ||
| 4446 | if (found && | ||
| 4447 | physical_of_found <= | ||
| 4448 | bbio->stripes[i].physical) | ||
| 4449 | continue; | ||
| 4450 | index_srcdev = i; | ||
| 4451 | found = 1; | ||
| 4452 | physical_of_found = bbio->stripes[i].physical; | ||
| 4453 | } | ||
| 4454 | } | ||
| 4455 | if (found) { | ||
| 4456 | u64 length = map->stripe_len; | ||
| 4457 | |||
| 4458 | if (physical_of_found + length <= | ||
| 4459 | dev_replace->cursor_left) { | ||
| 4460 | struct btrfs_bio_stripe *tgtdev_stripe = | ||
| 4461 | bbio->stripes + num_stripes; | ||
| 4462 | |||
| 4463 | tgtdev_stripe->physical = physical_of_found; | ||
| 4464 | tgtdev_stripe->length = | ||
| 4465 | bbio->stripes[index_srcdev].length; | ||
| 4466 | tgtdev_stripe->dev = dev_replace->tgtdev; | ||
| 4467 | |||
| 4468 | num_stripes++; | ||
| 4469 | } | ||
| 4470 | } | ||
| 4471 | } | ||
| 4472 | |||
| 4009 | *bbio_ret = bbio; | 4473 | *bbio_ret = bbio; |
| 4010 | bbio->num_stripes = num_stripes; | 4474 | bbio->num_stripes = num_stripes; |
| 4011 | bbio->max_errors = max_errors; | 4475 | bbio->max_errors = max_errors; |
| 4012 | bbio->mirror_num = mirror_num; | 4476 | bbio->mirror_num = mirror_num; |
| 4477 | |||
| 4478 | /* | ||
| 4479 | * this is the case that REQ_READ && dev_replace_is_ongoing && | ||
| 4480 | * mirror_num == num_stripes + 1 && dev_replace target drive is | ||
| 4481 | * available as a mirror | ||
| 4482 | */ | ||
| 4483 | if (patch_the_first_stripe_for_dev_replace && num_stripes > 0) { | ||
| 4484 | WARN_ON(num_stripes > 1); | ||
| 4485 | bbio->stripes[0].dev = dev_replace->tgtdev; | ||
| 4486 | bbio->stripes[0].physical = physical_to_patch_in_first_stripe; | ||
| 4487 | bbio->mirror_num = map->num_stripes + 1; | ||
| 4488 | } | ||
| 4013 | out: | 4489 | out: |
| 4490 | if (dev_replace_is_ongoing) | ||
| 4491 | btrfs_dev_replace_unlock(dev_replace); | ||
| 4014 | free_extent_map(em); | 4492 | free_extent_map(em); |
| 4015 | return ret; | 4493 | return ret; |
| 4016 | } | 4494 | } |
| 4017 | 4495 | ||
| 4018 | int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | 4496 | int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, |
| 4019 | u64 logical, u64 *length, | 4497 | u64 logical, u64 *length, |
| 4020 | struct btrfs_bio **bbio_ret, int mirror_num) | 4498 | struct btrfs_bio **bbio_ret, int mirror_num) |
| 4021 | { | 4499 | { |
| 4022 | return __btrfs_map_block(map_tree, rw, logical, length, bbio_ret, | 4500 | return __btrfs_map_block(fs_info, rw, logical, length, bbio_ret, |
| 4023 | mirror_num); | 4501 | mirror_num); |
| 4024 | } | 4502 | } |
| 4025 | 4503 | ||
| @@ -4238,10 +4716,116 @@ static noinline void schedule_bio(struct btrfs_root *root, | |||
| 4238 | &device->work); | 4716 | &device->work); |
| 4239 | } | 4717 | } |
| 4240 | 4718 | ||
| 4719 | static int bio_size_ok(struct block_device *bdev, struct bio *bio, | ||
| 4720 | sector_t sector) | ||
| 4721 | { | ||
| 4722 | struct bio_vec *prev; | ||
| 4723 | struct request_queue *q = bdev_get_queue(bdev); | ||
| 4724 | unsigned short max_sectors = queue_max_sectors(q); | ||
| 4725 | struct bvec_merge_data bvm = { | ||
| 4726 | .bi_bdev = bdev, | ||
| 4727 | .bi_sector = sector, | ||
| 4728 | .bi_rw = bio->bi_rw, | ||
| 4729 | }; | ||
| 4730 | |||
| 4731 | if (bio->bi_vcnt == 0) { | ||
| 4732 | WARN_ON(1); | ||
| 4733 | return 1; | ||
| 4734 | } | ||
| 4735 | |||
| 4736 | prev = &bio->bi_io_vec[bio->bi_vcnt - 1]; | ||
| 4737 | if ((bio->bi_size >> 9) > max_sectors) | ||
| 4738 | return 0; | ||
| 4739 | |||
| 4740 | if (!q->merge_bvec_fn) | ||
| 4741 | return 1; | ||
| 4742 | |||
| 4743 | bvm.bi_size = bio->bi_size - prev->bv_len; | ||
| 4744 | if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) | ||
| 4745 | return 0; | ||
| 4746 | return 1; | ||
| 4747 | } | ||
| 4748 | |||
| 4749 | static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, | ||
| 4750 | struct bio *bio, u64 physical, int dev_nr, | ||
| 4751 | int rw, int async) | ||
| 4752 | { | ||
| 4753 | struct btrfs_device *dev = bbio->stripes[dev_nr].dev; | ||
| 4754 | |||
| 4755 | bio->bi_private = bbio; | ||
| 4756 | bio->bi_private = merge_stripe_index_into_bio_private( | ||
| 4757 | bio->bi_private, (unsigned int)dev_nr); | ||
| 4758 | bio->bi_end_io = btrfs_end_bio; | ||
| 4759 | bio->bi_sector = physical >> 9; | ||
| 4760 | #ifdef DEBUG | ||
| 4761 | { | ||
| 4762 | struct rcu_string *name; | ||
| 4763 | |||
| 4764 | rcu_read_lock(); | ||
| 4765 | name = rcu_dereference(dev->name); | ||
| 4766 | pr_debug("btrfs_map_bio: rw %d, sector=%llu, dev=%lu " | ||
| 4767 | "(%s id %llu), size=%u\n", rw, | ||
| 4768 | (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev, | ||
| 4769 | name->str, dev->devid, bio->bi_size); | ||
| 4770 | rcu_read_unlock(); | ||
| 4771 | } | ||
| 4772 | #endif | ||
| 4773 | bio->bi_bdev = dev->bdev; | ||
| 4774 | if (async) | ||
| 4775 | schedule_bio(root, dev, rw, bio); | ||
| 4776 | else | ||
| 4777 | btrfsic_submit_bio(rw, bio); | ||
| 4778 | } | ||
| 4779 | |||
| 4780 | static int breakup_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, | ||
| 4781 | struct bio *first_bio, struct btrfs_device *dev, | ||
| 4782 | int dev_nr, int rw, int async) | ||
| 4783 | { | ||
| 4784 | struct bio_vec *bvec = first_bio->bi_io_vec; | ||
| 4785 | struct bio *bio; | ||
| 4786 | int nr_vecs = bio_get_nr_vecs(dev->bdev); | ||
| 4787 | u64 physical = bbio->stripes[dev_nr].physical; | ||
| 4788 | |||
| 4789 | again: | ||
| 4790 | bio = btrfs_bio_alloc(dev->bdev, physical >> 9, nr_vecs, GFP_NOFS); | ||
| 4791 | if (!bio) | ||
| 4792 | return -ENOMEM; | ||
| 4793 | |||
| 4794 | while (bvec <= (first_bio->bi_io_vec + first_bio->bi_vcnt - 1)) { | ||
| 4795 | if (bio_add_page(bio, bvec->bv_page, bvec->bv_len, | ||
| 4796 | bvec->bv_offset) < bvec->bv_len) { | ||
| 4797 | u64 len = bio->bi_size; | ||
| 4798 | |||
| 4799 | atomic_inc(&bbio->stripes_pending); | ||
| 4800 | submit_stripe_bio(root, bbio, bio, physical, dev_nr, | ||
| 4801 | rw, async); | ||
| 4802 | physical += len; | ||
| 4803 | goto again; | ||
| 4804 | } | ||
| 4805 | bvec++; | ||
| 4806 | } | ||
| 4807 | |||
| 4808 | submit_stripe_bio(root, bbio, bio, physical, dev_nr, rw, async); | ||
| 4809 | return 0; | ||
| 4810 | } | ||
| 4811 | |||
| 4812 | static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical) | ||
| 4813 | { | ||
| 4814 | atomic_inc(&bbio->error); | ||
| 4815 | if (atomic_dec_and_test(&bbio->stripes_pending)) { | ||
| 4816 | bio->bi_private = bbio->private; | ||
| 4817 | bio->bi_end_io = bbio->end_io; | ||
| 4818 | bio->bi_bdev = (struct block_device *) | ||
| 4819 | (unsigned long)bbio->mirror_num; | ||
| 4820 | bio->bi_sector = logical >> 9; | ||
| 4821 | kfree(bbio); | ||
| 4822 | bio_endio(bio, -EIO); | ||
| 4823 | } | ||
| 4824 | } | ||
| 4825 | |||
| 4241 | int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | 4826 | int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, |
| 4242 | int mirror_num, int async_submit) | 4827 | int mirror_num, int async_submit) |
| 4243 | { | 4828 | { |
| 4244 | struct btrfs_mapping_tree *map_tree; | ||
| 4245 | struct btrfs_device *dev; | 4829 | struct btrfs_device *dev; |
| 4246 | struct bio *first_bio = bio; | 4830 | struct bio *first_bio = bio; |
| 4247 | u64 logical = (u64)bio->bi_sector << 9; | 4831 | u64 logical = (u64)bio->bi_sector << 9; |
| @@ -4253,12 +4837,11 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
| 4253 | struct btrfs_bio *bbio = NULL; | 4837 | struct btrfs_bio *bbio = NULL; |
| 4254 | 4838 | ||
| 4255 | length = bio->bi_size; | 4839 | length = bio->bi_size; |
| 4256 | map_tree = &root->fs_info->mapping_tree; | ||
| 4257 | map_length = length; | 4840 | map_length = length; |
| 4258 | 4841 | ||
| 4259 | ret = btrfs_map_block(map_tree, rw, logical, &map_length, &bbio, | 4842 | ret = btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio, |
| 4260 | mirror_num); | 4843 | mirror_num); |
| 4261 | if (ret) /* -ENOMEM */ | 4844 | if (ret) |
| 4262 | return ret; | 4845 | return ret; |
| 4263 | 4846 | ||
| 4264 | total_devs = bbio->num_stripes; | 4847 | total_devs = bbio->num_stripes; |
| @@ -4276,52 +4859,48 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
| 4276 | atomic_set(&bbio->stripes_pending, bbio->num_stripes); | 4859 | atomic_set(&bbio->stripes_pending, bbio->num_stripes); |
| 4277 | 4860 | ||
| 4278 | while (dev_nr < total_devs) { | 4861 | while (dev_nr < total_devs) { |
| 4862 | dev = bbio->stripes[dev_nr].dev; | ||
| 4863 | if (!dev || !dev->bdev || (rw & WRITE && !dev->writeable)) { | ||
| 4864 | bbio_error(bbio, first_bio, logical); | ||
| 4865 | dev_nr++; | ||
| 4866 | continue; | ||
| 4867 | } | ||
| 4868 | |||
| 4869 | /* | ||
| 4870 | * Check and see if we're ok with this bio based on it's size | ||
| 4871 | * and offset with the given device. | ||
| 4872 | */ | ||
| 4873 | if (!bio_size_ok(dev->bdev, first_bio, | ||
| 4874 | bbio->stripes[dev_nr].physical >> 9)) { | ||
| 4875 | ret = breakup_stripe_bio(root, bbio, first_bio, dev, | ||
| 4876 | dev_nr, rw, async_submit); | ||
| 4877 | BUG_ON(ret); | ||
| 4878 | dev_nr++; | ||
| 4879 | continue; | ||
| 4880 | } | ||
| 4881 | |||
| 4279 | if (dev_nr < total_devs - 1) { | 4882 | if (dev_nr < total_devs - 1) { |
| 4280 | bio = bio_clone(first_bio, GFP_NOFS); | 4883 | bio = bio_clone(first_bio, GFP_NOFS); |
| 4281 | BUG_ON(!bio); /* -ENOMEM */ | 4884 | BUG_ON(!bio); /* -ENOMEM */ |
| 4282 | } else { | 4885 | } else { |
| 4283 | bio = first_bio; | 4886 | bio = first_bio; |
| 4284 | } | 4887 | } |
| 4285 | bio->bi_private = bbio; | 4888 | |
| 4286 | bio->bi_private = merge_stripe_index_into_bio_private( | 4889 | submit_stripe_bio(root, bbio, bio, |
| 4287 | bio->bi_private, (unsigned int)dev_nr); | 4890 | bbio->stripes[dev_nr].physical, dev_nr, rw, |
| 4288 | bio->bi_end_io = btrfs_end_bio; | 4891 | async_submit); |
| 4289 | bio->bi_sector = bbio->stripes[dev_nr].physical >> 9; | ||
| 4290 | dev = bbio->stripes[dev_nr].dev; | ||
| 4291 | if (dev && dev->bdev && (rw != WRITE || dev->writeable)) { | ||
| 4292 | #ifdef DEBUG | ||
| 4293 | struct rcu_string *name; | ||
| 4294 | |||
| 4295 | rcu_read_lock(); | ||
| 4296 | name = rcu_dereference(dev->name); | ||
| 4297 | pr_debug("btrfs_map_bio: rw %d, secor=%llu, dev=%lu " | ||
| 4298 | "(%s id %llu), size=%u\n", rw, | ||
| 4299 | (u64)bio->bi_sector, (u_long)dev->bdev->bd_dev, | ||
| 4300 | name->str, dev->devid, bio->bi_size); | ||
| 4301 | rcu_read_unlock(); | ||
| 4302 | #endif | ||
| 4303 | bio->bi_bdev = dev->bdev; | ||
| 4304 | if (async_submit) | ||
| 4305 | schedule_bio(root, dev, rw, bio); | ||
| 4306 | else | ||
| 4307 | btrfsic_submit_bio(rw, bio); | ||
| 4308 | } else { | ||
| 4309 | bio->bi_bdev = root->fs_info->fs_devices->latest_bdev; | ||
| 4310 | bio->bi_sector = logical >> 9; | ||
| 4311 | bio_endio(bio, -EIO); | ||
| 4312 | } | ||
| 4313 | dev_nr++; | 4892 | dev_nr++; |
| 4314 | } | 4893 | } |
| 4315 | return 0; | 4894 | return 0; |
| 4316 | } | 4895 | } |
| 4317 | 4896 | ||
| 4318 | struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, | 4897 | struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid, |
| 4319 | u8 *uuid, u8 *fsid) | 4898 | u8 *uuid, u8 *fsid) |
| 4320 | { | 4899 | { |
| 4321 | struct btrfs_device *device; | 4900 | struct btrfs_device *device; |
| 4322 | struct btrfs_fs_devices *cur_devices; | 4901 | struct btrfs_fs_devices *cur_devices; |
| 4323 | 4902 | ||
| 4324 | cur_devices = root->fs_info->fs_devices; | 4903 | cur_devices = fs_info->fs_devices; |
| 4325 | while (cur_devices) { | 4904 | while (cur_devices) { |
| 4326 | if (!fsid || | 4905 | if (!fsid || |
| 4327 | !memcmp(cur_devices->fsid, fsid, BTRFS_UUID_SIZE)) { | 4906 | !memcmp(cur_devices->fsid, fsid, BTRFS_UUID_SIZE)) { |
| @@ -4402,6 +4981,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
| 4402 | em->bdev = (struct block_device *)map; | 4981 | em->bdev = (struct block_device *)map; |
| 4403 | em->start = logical; | 4982 | em->start = logical; |
| 4404 | em->len = length; | 4983 | em->len = length; |
| 4984 | em->orig_start = 0; | ||
| 4405 | em->block_start = 0; | 4985 | em->block_start = 0; |
| 4406 | em->block_len = em->len; | 4986 | em->block_len = em->len; |
| 4407 | 4987 | ||
| @@ -4419,8 +4999,8 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, | |||
| 4419 | read_extent_buffer(leaf, uuid, (unsigned long) | 4999 | read_extent_buffer(leaf, uuid, (unsigned long) |
| 4420 | btrfs_stripe_dev_uuid_nr(chunk, i), | 5000 | btrfs_stripe_dev_uuid_nr(chunk, i), |
| 4421 | BTRFS_UUID_SIZE); | 5001 | BTRFS_UUID_SIZE); |
| 4422 | map->stripes[i].dev = btrfs_find_device(root, devid, uuid, | 5002 | map->stripes[i].dev = btrfs_find_device(root->fs_info, devid, |
| 4423 | NULL); | 5003 | uuid, NULL); |
| 4424 | if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) { | 5004 | if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) { |
| 4425 | kfree(map); | 5005 | kfree(map); |
| 4426 | free_extent_map(em); | 5006 | free_extent_map(em); |
| @@ -4461,6 +5041,8 @@ static void fill_device_from_item(struct extent_buffer *leaf, | |||
| 4461 | device->io_align = btrfs_device_io_align(leaf, dev_item); | 5041 | device->io_align = btrfs_device_io_align(leaf, dev_item); |
| 4462 | device->io_width = btrfs_device_io_width(leaf, dev_item); | 5042 | device->io_width = btrfs_device_io_width(leaf, dev_item); |
| 4463 | device->sector_size = btrfs_device_sector_size(leaf, dev_item); | 5043 | device->sector_size = btrfs_device_sector_size(leaf, dev_item); |
| 5044 | WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID); | ||
| 5045 | device->is_tgtdev_for_dev_replace = 0; | ||
| 4464 | 5046 | ||
| 4465 | ptr = (unsigned long)btrfs_device_uuid(dev_item); | 5047 | ptr = (unsigned long)btrfs_device_uuid(dev_item); |
| 4466 | read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); | 5048 | read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); |
| @@ -4538,7 +5120,7 @@ static int read_one_dev(struct btrfs_root *root, | |||
| 4538 | return ret; | 5120 | return ret; |
| 4539 | } | 5121 | } |
| 4540 | 5122 | ||
| 4541 | device = btrfs_find_device(root, devid, dev_uuid, fs_uuid); | 5123 | device = btrfs_find_device(root->fs_info, devid, dev_uuid, fs_uuid); |
| 4542 | if (!device || !device->bdev) { | 5124 | if (!device || !device->bdev) { |
| 4543 | if (!btrfs_test_opt(root, DEGRADED)) | 5125 | if (!btrfs_test_opt(root, DEGRADED)) |
| 4544 | return -EIO; | 5126 | return -EIO; |
| @@ -4571,7 +5153,7 @@ static int read_one_dev(struct btrfs_root *root, | |||
| 4571 | fill_device_from_item(leaf, dev_item, device); | 5153 | fill_device_from_item(leaf, dev_item, device); |
| 4572 | device->dev_root = root->fs_info->dev_root; | 5154 | device->dev_root = root->fs_info->dev_root; |
| 4573 | device->in_fs_metadata = 1; | 5155 | device->in_fs_metadata = 1; |
| 4574 | if (device->writeable) { | 5156 | if (device->writeable && !device->is_tgtdev_for_dev_replace) { |
| 4575 | device->fs_devices->total_rw_bytes += device->total_bytes; | 5157 | device->fs_devices->total_rw_bytes += device->total_bytes; |
| 4576 | spin_lock(&root->fs_info->free_chunk_lock); | 5158 | spin_lock(&root->fs_info->free_chunk_lock); |
| 4577 | root->fs_info->free_chunk_space += device->total_bytes - | 5159 | root->fs_info->free_chunk_space += device->total_bytes - |
| @@ -4930,7 +5512,7 @@ int btrfs_get_dev_stats(struct btrfs_root *root, | |||
| 4930 | int i; | 5512 | int i; |
| 4931 | 5513 | ||
| 4932 | mutex_lock(&fs_devices->device_list_mutex); | 5514 | mutex_lock(&fs_devices->device_list_mutex); |
| 4933 | dev = btrfs_find_device(root, stats->devid, NULL, NULL); | 5515 | dev = btrfs_find_device(root->fs_info, stats->devid, NULL, NULL); |
| 4934 | mutex_unlock(&fs_devices->device_list_mutex); | 5516 | mutex_unlock(&fs_devices->device_list_mutex); |
| 4935 | 5517 | ||
| 4936 | if (!dev) { | 5518 | if (!dev) { |
| @@ -4958,3 +5540,21 @@ int btrfs_get_dev_stats(struct btrfs_root *root, | |||
| 4958 | stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX; | 5540 | stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX; |
| 4959 | return 0; | 5541 | return 0; |
| 4960 | } | 5542 | } |
| 5543 | |||
| 5544 | int btrfs_scratch_superblock(struct btrfs_device *device) | ||
| 5545 | { | ||
| 5546 | struct buffer_head *bh; | ||
| 5547 | struct btrfs_super_block *disk_super; | ||
| 5548 | |||
| 5549 | bh = btrfs_read_dev_super(device->bdev); | ||
| 5550 | if (!bh) | ||
| 5551 | return -EINVAL; | ||
| 5552 | disk_super = (struct btrfs_super_block *)bh->b_data; | ||
| 5553 | |||
| 5554 | memset(&disk_super->magic, 0, sizeof(disk_super->magic)); | ||
| 5555 | set_buffer_dirty(bh); | ||
| 5556 | sync_dirty_buffer(bh); | ||
| 5557 | brelse(bh); | ||
| 5558 | |||
| 5559 | return 0; | ||
| 5560 | } | ||
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 53c06af92e8d..d3c3939ac751 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
| @@ -50,6 +50,7 @@ struct btrfs_device { | |||
| 50 | int in_fs_metadata; | 50 | int in_fs_metadata; |
| 51 | int missing; | 51 | int missing; |
| 52 | int can_discard; | 52 | int can_discard; |
| 53 | int is_tgtdev_for_dev_replace; | ||
| 53 | 54 | ||
| 54 | spinlock_t io_lock; | 55 | spinlock_t io_lock; |
| 55 | 56 | ||
| @@ -88,7 +89,7 @@ struct btrfs_device { | |||
| 88 | u8 uuid[BTRFS_UUID_SIZE]; | 89 | u8 uuid[BTRFS_UUID_SIZE]; |
| 89 | 90 | ||
| 90 | /* per-device scrub information */ | 91 | /* per-device scrub information */ |
| 91 | struct scrub_dev *scrub_device; | 92 | struct scrub_ctx *scrub_device; |
| 92 | 93 | ||
| 93 | struct btrfs_work work; | 94 | struct btrfs_work work; |
| 94 | struct rcu_head rcu; | 95 | struct rcu_head rcu; |
| @@ -179,6 +180,15 @@ struct btrfs_device_info { | |||
| 179 | u64 total_avail; | 180 | u64 total_avail; |
| 180 | }; | 181 | }; |
| 181 | 182 | ||
| 183 | struct btrfs_raid_attr { | ||
| 184 | int sub_stripes; /* sub_stripes info for map */ | ||
| 185 | int dev_stripes; /* stripes per dev */ | ||
| 186 | int devs_max; /* max devs to use */ | ||
| 187 | int devs_min; /* min devs needed */ | ||
| 188 | int devs_increment; /* ndevs has to be a multiple of this */ | ||
| 189 | int ncopies; /* how many copies to data has */ | ||
| 190 | }; | ||
| 191 | |||
| 182 | struct map_lookup { | 192 | struct map_lookup { |
| 183 | u64 type; | 193 | u64 type; |
| 184 | int io_align; | 194 | int io_align; |
| @@ -248,7 +258,7 @@ int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, | |||
| 248 | struct btrfs_device *device, | 258 | struct btrfs_device *device, |
| 249 | u64 chunk_tree, u64 chunk_objectid, | 259 | u64 chunk_tree, u64 chunk_objectid, |
| 250 | u64 chunk_offset, u64 start, u64 num_bytes); | 260 | u64 chunk_offset, u64 start, u64 num_bytes); |
| 251 | int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, | 261 | int btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, |
| 252 | u64 logical, u64 *length, | 262 | u64 logical, u64 *length, |
| 253 | struct btrfs_bio **bbio_ret, int mirror_num); | 263 | struct btrfs_bio **bbio_ret, int mirror_num); |
| 254 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | 264 | int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, |
| @@ -267,19 +277,27 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
| 267 | int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | 277 | int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, |
| 268 | struct btrfs_fs_devices **fs_devices_ret); | 278 | struct btrfs_fs_devices **fs_devices_ret); |
| 269 | int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); | 279 | int btrfs_close_devices(struct btrfs_fs_devices *fs_devices); |
| 270 | void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices); | 280 | void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info, |
| 281 | struct btrfs_fs_devices *fs_devices, int step); | ||
| 282 | int btrfs_find_device_missing_or_by_path(struct btrfs_root *root, | ||
| 283 | char *device_path, | ||
| 284 | struct btrfs_device **device); | ||
| 285 | int btrfs_find_device_by_path(struct btrfs_root *root, char *device_path, | ||
| 286 | struct btrfs_device **device); | ||
| 271 | int btrfs_add_device(struct btrfs_trans_handle *trans, | 287 | int btrfs_add_device(struct btrfs_trans_handle *trans, |
| 272 | struct btrfs_root *root, | 288 | struct btrfs_root *root, |
| 273 | struct btrfs_device *device); | 289 | struct btrfs_device *device); |
| 274 | int btrfs_rm_device(struct btrfs_root *root, char *device_path); | 290 | int btrfs_rm_device(struct btrfs_root *root, char *device_path); |
| 275 | void btrfs_cleanup_fs_uuids(void); | 291 | void btrfs_cleanup_fs_uuids(void); |
| 276 | int btrfs_num_copies(struct btrfs_mapping_tree *map_tree, u64 logical, u64 len); | 292 | int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len); |
| 277 | int btrfs_grow_device(struct btrfs_trans_handle *trans, | 293 | int btrfs_grow_device(struct btrfs_trans_handle *trans, |
| 278 | struct btrfs_device *device, u64 new_size); | 294 | struct btrfs_device *device, u64 new_size); |
| 279 | struct btrfs_device *btrfs_find_device(struct btrfs_root *root, u64 devid, | 295 | struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid, |
| 280 | u8 *uuid, u8 *fsid); | 296 | u8 *uuid, u8 *fsid); |
| 281 | int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); | 297 | int btrfs_shrink_device(struct btrfs_device *device, u64 new_size); |
| 282 | int btrfs_init_new_device(struct btrfs_root *root, char *path); | 298 | int btrfs_init_new_device(struct btrfs_root *root, char *path); |
| 299 | int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, | ||
| 300 | struct btrfs_device **device_out); | ||
| 283 | int btrfs_balance(struct btrfs_balance_control *bctl, | 301 | int btrfs_balance(struct btrfs_balance_control *bctl, |
| 284 | struct btrfs_ioctl_balance_args *bargs); | 302 | struct btrfs_ioctl_balance_args *bargs); |
| 285 | int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info); | 303 | int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info); |
| @@ -296,6 +314,13 @@ int btrfs_get_dev_stats(struct btrfs_root *root, | |||
| 296 | int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); | 314 | int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info); |
| 297 | int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, | 315 | int btrfs_run_dev_stats(struct btrfs_trans_handle *trans, |
| 298 | struct btrfs_fs_info *fs_info); | 316 | struct btrfs_fs_info *fs_info); |
| 317 | void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info, | ||
| 318 | struct btrfs_device *srcdev); | ||
| 319 | void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, | ||
| 320 | struct btrfs_device *tgtdev); | ||
| 321 | void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info, | ||
| 322 | struct btrfs_device *tgtdev); | ||
| 323 | int btrfs_scratch_superblock(struct btrfs_device *device); | ||
| 299 | 324 | ||
| 300 | static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, | 325 | static inline void btrfs_dev_stat_inc(struct btrfs_device *dev, |
| 301 | int index) | 326 | int index) |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 3f4e2d69e83a..446a6848c554 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
| @@ -122,6 +122,16 @@ static int do_setxattr(struct btrfs_trans_handle *trans, | |||
| 122 | */ | 122 | */ |
| 123 | if (!value) | 123 | if (!value) |
| 124 | goto out; | 124 | goto out; |
| 125 | } else { | ||
| 126 | di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), | ||
| 127 | name, name_len, 0); | ||
| 128 | if (IS_ERR(di)) { | ||
| 129 | ret = PTR_ERR(di); | ||
| 130 | goto out; | ||
| 131 | } | ||
| 132 | if (!di && !value) | ||
| 133 | goto out; | ||
| 134 | btrfs_release_path(path); | ||
| 125 | } | 135 | } |
| 126 | 136 | ||
| 127 | again: | 137 | again: |
| @@ -198,6 +208,7 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, | |||
| 198 | 208 | ||
| 199 | inode_inc_iversion(inode); | 209 | inode_inc_iversion(inode); |
| 200 | inode->i_ctime = CURRENT_TIME; | 210 | inode->i_ctime = CURRENT_TIME; |
| 211 | set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags); | ||
| 201 | ret = btrfs_update_inode(trans, root, inode); | 212 | ret = btrfs_update_inode(trans, root, inode); |
| 202 | BUG_ON(ret); | 213 | BUG_ON(ret); |
| 203 | out: | 214 | out: |
| @@ -265,7 +276,7 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
| 265 | 276 | ||
| 266 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); | 277 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); |
| 267 | if (verify_dir_item(root, leaf, di)) | 278 | if (verify_dir_item(root, leaf, di)) |
| 268 | continue; | 279 | goto next; |
| 269 | 280 | ||
| 270 | name_len = btrfs_dir_name_len(leaf, di); | 281 | name_len = btrfs_dir_name_len(leaf, di); |
| 271 | total_size += name_len + 1; | 282 | total_size += name_len + 1; |
