diff options
| author | Russell King <rmk+kernel@arm.linux.org.uk> | 2010-08-06 13:13:54 -0400 |
|---|---|---|
| committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2010-08-06 13:13:54 -0400 |
| commit | 11e4afb49b7fa1fc8e1ffd850c1806dd86a08204 (patch) | |
| tree | 9e57efcb106ae912f7bec718feb3f8ec607559bb /fs/btrfs | |
| parent | 162500b3a3ff39d941d29db49b41a16667ae44f0 (diff) | |
| parent | 9b2a606d3898fcb2eedb6faded3bb37549590ac4 (diff) | |
Merge branches 'gemini' and 'misc' into devel
Diffstat (limited to 'fs/btrfs')
35 files changed, 5240 insertions, 2961 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 6df6d6ed74fd..2222d161c7b6 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include <linux/posix_acl_xattr.h> | 22 | #include <linux/posix_acl_xattr.h> |
| 23 | #include <linux/posix_acl.h> | 23 | #include <linux/posix_acl.h> |
| 24 | #include <linux/sched.h> | 24 | #include <linux/sched.h> |
| 25 | #include <linux/slab.h> | ||
| 25 | 26 | ||
| 26 | #include "ctree.h" | 27 | #include "ctree.h" |
| 27 | #include "btrfs_inode.h" | 28 | #include "btrfs_inode.h" |
| @@ -59,6 +60,8 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) | |||
| 59 | size = __btrfs_getxattr(inode, name, value, size); | 60 | size = __btrfs_getxattr(inode, name, value, size); |
| 60 | if (size > 0) { | 61 | if (size > 0) { |
| 61 | acl = posix_acl_from_xattr(value, size); | 62 | acl = posix_acl_from_xattr(value, size); |
| 63 | if (IS_ERR(acl)) | ||
| 64 | return acl; | ||
| 62 | set_cached_acl(inode, type, acl); | 65 | set_cached_acl(inode, type, acl); |
| 63 | } | 66 | } |
| 64 | kfree(value); | 67 | kfree(value); |
| @@ -159,6 +162,12 @@ static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name, | |||
| 159 | int ret; | 162 | int ret; |
| 160 | struct posix_acl *acl = NULL; | 163 | struct posix_acl *acl = NULL; |
| 161 | 164 | ||
| 165 | if (!is_owner_or_cap(dentry->d_inode)) | ||
| 166 | return -EPERM; | ||
| 167 | |||
| 168 | if (!IS_POSIXACL(dentry->d_inode)) | ||
| 169 | return -EOPNOTSUPP; | ||
| 170 | |||
| 162 | if (value) { | 171 | if (value) { |
| 163 | acl = posix_acl_from_xattr(value, size); | 172 | acl = posix_acl_from_xattr(value, size); |
| 164 | if (acl == NULL) { | 173 | if (acl == NULL) { |
| @@ -281,14 +290,14 @@ int btrfs_acl_chmod(struct inode *inode) | |||
| 281 | return ret; | 290 | return ret; |
| 282 | } | 291 | } |
| 283 | 292 | ||
| 284 | struct xattr_handler btrfs_xattr_acl_default_handler = { | 293 | const struct xattr_handler btrfs_xattr_acl_default_handler = { |
| 285 | .prefix = POSIX_ACL_XATTR_DEFAULT, | 294 | .prefix = POSIX_ACL_XATTR_DEFAULT, |
| 286 | .flags = ACL_TYPE_DEFAULT, | 295 | .flags = ACL_TYPE_DEFAULT, |
| 287 | .get = btrfs_xattr_acl_get, | 296 | .get = btrfs_xattr_acl_get, |
| 288 | .set = btrfs_xattr_acl_set, | 297 | .set = btrfs_xattr_acl_set, |
| 289 | }; | 298 | }; |
| 290 | 299 | ||
| 291 | struct xattr_handler btrfs_xattr_acl_access_handler = { | 300 | const struct xattr_handler btrfs_xattr_acl_access_handler = { |
| 292 | .prefix = POSIX_ACL_XATTR_ACCESS, | 301 | .prefix = POSIX_ACL_XATTR_ACCESS, |
| 293 | .flags = ACL_TYPE_ACCESS, | 302 | .flags = ACL_TYPE_ACCESS, |
| 294 | .get = btrfs_xattr_acl_get, | 303 | .get = btrfs_xattr_acl_get, |
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index c0861e781cdb..7ec14097fef1 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | */ | 17 | */ |
| 18 | 18 | ||
| 19 | #include <linux/kthread.h> | 19 | #include <linux/kthread.h> |
| 20 | #include <linux/slab.h> | ||
| 20 | #include <linux/list.h> | 21 | #include <linux/list.h> |
| 21 | #include <linux/spinlock.h> | 22 | #include <linux/spinlock.h> |
| 22 | #include <linux/freezer.h> | 23 | #include <linux/freezer.h> |
| @@ -376,6 +377,7 @@ again: | |||
| 376 | if (!list_empty(&worker->pending) || | 377 | if (!list_empty(&worker->pending) || |
| 377 | !list_empty(&worker->prio_pending)) { | 378 | !list_empty(&worker->prio_pending)) { |
| 378 | spin_unlock_irq(&worker->lock); | 379 | spin_unlock_irq(&worker->lock); |
| 380 | set_current_state(TASK_RUNNING); | ||
| 379 | goto again; | 381 | goto again; |
| 380 | } | 382 | } |
| 381 | 383 | ||
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 7a4dee199832..6ad63f17eca0 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
| @@ -137,8 +137,8 @@ struct btrfs_inode { | |||
| 137 | * of extent items we've reserved metadata for. | 137 | * of extent items we've reserved metadata for. |
| 138 | */ | 138 | */ |
| 139 | spinlock_t accounting_lock; | 139 | spinlock_t accounting_lock; |
| 140 | atomic_t outstanding_extents; | ||
| 140 | int reserved_extents; | 141 | int reserved_extents; |
| 141 | int outstanding_extents; | ||
| 142 | 142 | ||
| 143 | /* | 143 | /* |
| 144 | * ordered_data_close is set by truncate when a file that used | 144 | * ordered_data_close is set by truncate when a file that used |
| @@ -151,6 +151,7 @@ struct btrfs_inode { | |||
| 151 | * of these. | 151 | * of these. |
| 152 | */ | 152 | */ |
| 153 | unsigned ordered_data_close:1; | 153 | unsigned ordered_data_close:1; |
| 154 | unsigned orphan_meta_reserved:1; | ||
| 154 | unsigned dummy_inode:1; | 155 | unsigned dummy_inode:1; |
| 155 | 156 | ||
| 156 | /* | 157 | /* |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 28b92a7218ab..396039b3a8a2 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
| @@ -31,7 +31,7 @@ | |||
| 31 | #include <linux/swap.h> | 31 | #include <linux/swap.h> |
| 32 | #include <linux/writeback.h> | 32 | #include <linux/writeback.h> |
| 33 | #include <linux/bit_spinlock.h> | 33 | #include <linux/bit_spinlock.h> |
| 34 | #include <linux/pagevec.h> | 34 | #include <linux/slab.h> |
| 35 | #include "compat.h" | 35 | #include "compat.h" |
| 36 | #include "ctree.h" | 36 | #include "ctree.h" |
| 37 | #include "disk-io.h" | 37 | #include "disk-io.h" |
| @@ -445,7 +445,6 @@ static noinline int add_ra_bio_pages(struct inode *inode, | |||
| 445 | unsigned long nr_pages = 0; | 445 | unsigned long nr_pages = 0; |
| 446 | struct extent_map *em; | 446 | struct extent_map *em; |
| 447 | struct address_space *mapping = inode->i_mapping; | 447 | struct address_space *mapping = inode->i_mapping; |
| 448 | struct pagevec pvec; | ||
| 449 | struct extent_map_tree *em_tree; | 448 | struct extent_map_tree *em_tree; |
| 450 | struct extent_io_tree *tree; | 449 | struct extent_io_tree *tree; |
| 451 | u64 end; | 450 | u64 end; |
| @@ -461,7 +460,6 @@ static noinline int add_ra_bio_pages(struct inode *inode, | |||
| 461 | 460 | ||
| 462 | end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; | 461 | end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; |
| 463 | 462 | ||
| 464 | pagevec_init(&pvec, 0); | ||
| 465 | while (last_offset < compressed_end) { | 463 | while (last_offset < compressed_end) { |
| 466 | page_index = last_offset >> PAGE_CACHE_SHIFT; | 464 | page_index = last_offset >> PAGE_CACHE_SHIFT; |
| 467 | 465 | ||
| @@ -478,26 +476,17 @@ static noinline int add_ra_bio_pages(struct inode *inode, | |||
| 478 | goto next; | 476 | goto next; |
| 479 | } | 477 | } |
| 480 | 478 | ||
| 481 | page = alloc_page(mapping_gfp_mask(mapping) & ~__GFP_FS); | 479 | page = __page_cache_alloc(mapping_gfp_mask(mapping) & |
| 480 | ~__GFP_FS); | ||
| 482 | if (!page) | 481 | if (!page) |
| 483 | break; | 482 | break; |
| 484 | 483 | ||
| 485 | page->index = page_index; | 484 | if (add_to_page_cache_lru(page, mapping, page_index, |
| 486 | /* | 485 | GFP_NOFS)) { |
| 487 | * what we want to do here is call add_to_page_cache_lru, | ||
| 488 | * but that isn't exported, so we reproduce it here | ||
| 489 | */ | ||
| 490 | if (add_to_page_cache(page, mapping, | ||
| 491 | page->index, GFP_NOFS)) { | ||
| 492 | page_cache_release(page); | 486 | page_cache_release(page); |
| 493 | goto next; | 487 | goto next; |
| 494 | } | 488 | } |
| 495 | 489 | ||
| 496 | /* open coding of lru_cache_add, also not exported */ | ||
| 497 | page_cache_get(page); | ||
| 498 | if (!pagevec_add(&pvec, page)) | ||
| 499 | __pagevec_lru_add_file(&pvec); | ||
| 500 | |||
| 501 | end = last_offset + PAGE_CACHE_SIZE - 1; | 490 | end = last_offset + PAGE_CACHE_SIZE - 1; |
| 502 | /* | 491 | /* |
| 503 | * at this point, we have a locked page in the page cache | 492 | * at this point, we have a locked page in the page cache |
| @@ -551,8 +540,6 @@ static noinline int add_ra_bio_pages(struct inode *inode, | |||
| 551 | next: | 540 | next: |
| 552 | last_offset += PAGE_CACHE_SIZE; | 541 | last_offset += PAGE_CACHE_SIZE; |
| 553 | } | 542 | } |
| 554 | if (pagevec_count(&pvec)) | ||
| 555 | __pagevec_lru_add_file(&pvec); | ||
| 556 | return 0; | 543 | return 0; |
| 557 | } | 544 | } |
| 558 | 545 | ||
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index c4bc570a396e..c3df14ce2cc2 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | */ | 17 | */ |
| 18 | 18 | ||
| 19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
| 20 | #include <linux/slab.h> | ||
| 20 | #include "ctree.h" | 21 | #include "ctree.h" |
| 21 | #include "disk-io.h" | 22 | #include "disk-io.h" |
| 22 | #include "transaction.h" | 23 | #include "transaction.h" |
| @@ -279,7 +280,8 @@ int btrfs_block_can_be_shared(struct btrfs_root *root, | |||
| 279 | static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, | 280 | static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, |
| 280 | struct btrfs_root *root, | 281 | struct btrfs_root *root, |
| 281 | struct extent_buffer *buf, | 282 | struct extent_buffer *buf, |
| 282 | struct extent_buffer *cow) | 283 | struct extent_buffer *cow, |
| 284 | int *last_ref) | ||
| 283 | { | 285 | { |
| 284 | u64 refs; | 286 | u64 refs; |
| 285 | u64 owner; | 287 | u64 owner; |
| @@ -365,6 +367,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, | |||
| 365 | BUG_ON(ret); | 367 | BUG_ON(ret); |
| 366 | } | 368 | } |
| 367 | clean_tree_block(trans, root, buf); | 369 | clean_tree_block(trans, root, buf); |
| 370 | *last_ref = 1; | ||
| 368 | } | 371 | } |
| 369 | return 0; | 372 | return 0; |
| 370 | } | 373 | } |
| @@ -391,6 +394,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
| 391 | struct btrfs_disk_key disk_key; | 394 | struct btrfs_disk_key disk_key; |
| 392 | struct extent_buffer *cow; | 395 | struct extent_buffer *cow; |
| 393 | int level; | 396 | int level; |
| 397 | int last_ref = 0; | ||
| 394 | int unlock_orig = 0; | 398 | int unlock_orig = 0; |
| 395 | u64 parent_start; | 399 | u64 parent_start; |
| 396 | 400 | ||
| @@ -441,7 +445,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
| 441 | (unsigned long)btrfs_header_fsid(cow), | 445 | (unsigned long)btrfs_header_fsid(cow), |
| 442 | BTRFS_FSID_SIZE); | 446 | BTRFS_FSID_SIZE); |
| 443 | 447 | ||
| 444 | update_ref_for_cow(trans, root, buf, cow); | 448 | update_ref_for_cow(trans, root, buf, cow, &last_ref); |
| 449 | |||
| 450 | if (root->ref_cows) | ||
| 451 | btrfs_reloc_cow_block(trans, root, buf, cow); | ||
| 445 | 452 | ||
| 446 | if (buf == root->node) { | 453 | if (buf == root->node) { |
| 447 | WARN_ON(parent && parent != buf); | 454 | WARN_ON(parent && parent != buf); |
| @@ -456,8 +463,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
| 456 | extent_buffer_get(cow); | 463 | extent_buffer_get(cow); |
| 457 | spin_unlock(&root->node_lock); | 464 | spin_unlock(&root->node_lock); |
| 458 | 465 | ||
| 459 | btrfs_free_tree_block(trans, root, buf->start, buf->len, | 466 | btrfs_free_tree_block(trans, root, buf, parent_start, |
| 460 | parent_start, root->root_key.objectid, level); | 467 | last_ref); |
| 461 | free_extent_buffer(buf); | 468 | free_extent_buffer(buf); |
| 462 | add_root_to_dirty_list(root); | 469 | add_root_to_dirty_list(root); |
| 463 | } else { | 470 | } else { |
| @@ -472,8 +479,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
| 472 | btrfs_set_node_ptr_generation(parent, parent_slot, | 479 | btrfs_set_node_ptr_generation(parent, parent_slot, |
| 473 | trans->transid); | 480 | trans->transid); |
| 474 | btrfs_mark_buffer_dirty(parent); | 481 | btrfs_mark_buffer_dirty(parent); |
| 475 | btrfs_free_tree_block(trans, root, buf->start, buf->len, | 482 | btrfs_free_tree_block(trans, root, buf, parent_start, |
| 476 | parent_start, root->root_key.objectid, level); | 483 | last_ref); |
| 477 | } | 484 | } |
| 478 | if (unlock_orig) | 485 | if (unlock_orig) |
| 479 | btrfs_tree_unlock(buf); | 486 | btrfs_tree_unlock(buf); |
| @@ -948,6 +955,22 @@ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | |||
| 948 | return bin_search(eb, key, level, slot); | 955 | return bin_search(eb, key, level, slot); |
| 949 | } | 956 | } |
| 950 | 957 | ||
| 958 | static void root_add_used(struct btrfs_root *root, u32 size) | ||
| 959 | { | ||
| 960 | spin_lock(&root->accounting_lock); | ||
| 961 | btrfs_set_root_used(&root->root_item, | ||
| 962 | btrfs_root_used(&root->root_item) + size); | ||
| 963 | spin_unlock(&root->accounting_lock); | ||
| 964 | } | ||
| 965 | |||
| 966 | static void root_sub_used(struct btrfs_root *root, u32 size) | ||
| 967 | { | ||
| 968 | spin_lock(&root->accounting_lock); | ||
| 969 | btrfs_set_root_used(&root->root_item, | ||
| 970 | btrfs_root_used(&root->root_item) - size); | ||
| 971 | spin_unlock(&root->accounting_lock); | ||
| 972 | } | ||
| 973 | |||
| 951 | /* given a node and slot number, this reads the blocks it points to. The | 974 | /* given a node and slot number, this reads the blocks it points to. The |
| 952 | * extent buffer is returned with a reference taken (but unlocked). | 975 | * extent buffer is returned with a reference taken (but unlocked). |
| 953 | * NULL is returned on error. | 976 | * NULL is returned on error. |
| @@ -1018,7 +1041,11 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
| 1018 | btrfs_tree_lock(child); | 1041 | btrfs_tree_lock(child); |
| 1019 | btrfs_set_lock_blocking(child); | 1042 | btrfs_set_lock_blocking(child); |
| 1020 | ret = btrfs_cow_block(trans, root, child, mid, 0, &child); | 1043 | ret = btrfs_cow_block(trans, root, child, mid, 0, &child); |
| 1021 | BUG_ON(ret); | 1044 | if (ret) { |
| 1045 | btrfs_tree_unlock(child); | ||
| 1046 | free_extent_buffer(child); | ||
| 1047 | goto enospc; | ||
| 1048 | } | ||
| 1022 | 1049 | ||
| 1023 | spin_lock(&root->node_lock); | 1050 | spin_lock(&root->node_lock); |
| 1024 | root->node = child; | 1051 | root->node = child; |
| @@ -1033,11 +1060,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
| 1033 | btrfs_tree_unlock(mid); | 1060 | btrfs_tree_unlock(mid); |
| 1034 | /* once for the path */ | 1061 | /* once for the path */ |
| 1035 | free_extent_buffer(mid); | 1062 | free_extent_buffer(mid); |
| 1036 | ret = btrfs_free_tree_block(trans, root, mid->start, mid->len, | 1063 | |
| 1037 | 0, root->root_key.objectid, level); | 1064 | root_sub_used(root, mid->len); |
| 1065 | btrfs_free_tree_block(trans, root, mid, 0, 1); | ||
| 1038 | /* once for the root ptr */ | 1066 | /* once for the root ptr */ |
| 1039 | free_extent_buffer(mid); | 1067 | free_extent_buffer(mid); |
| 1040 | return ret; | 1068 | return 0; |
| 1041 | } | 1069 | } |
| 1042 | if (btrfs_header_nritems(mid) > | 1070 | if (btrfs_header_nritems(mid) > |
| 1043 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) | 1071 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) |
| @@ -1087,23 +1115,16 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
| 1087 | if (wret < 0 && wret != -ENOSPC) | 1115 | if (wret < 0 && wret != -ENOSPC) |
| 1088 | ret = wret; | 1116 | ret = wret; |
| 1089 | if (btrfs_header_nritems(right) == 0) { | 1117 | if (btrfs_header_nritems(right) == 0) { |
| 1090 | u64 bytenr = right->start; | ||
| 1091 | u32 blocksize = right->len; | ||
| 1092 | |||
| 1093 | clean_tree_block(trans, root, right); | 1118 | clean_tree_block(trans, root, right); |
| 1094 | btrfs_tree_unlock(right); | 1119 | btrfs_tree_unlock(right); |
| 1095 | free_extent_buffer(right); | ||
| 1096 | right = NULL; | ||
| 1097 | wret = del_ptr(trans, root, path, level + 1, pslot + | 1120 | wret = del_ptr(trans, root, path, level + 1, pslot + |
| 1098 | 1); | 1121 | 1); |
| 1099 | if (wret) | 1122 | if (wret) |
| 1100 | ret = wret; | 1123 | ret = wret; |
| 1101 | wret = btrfs_free_tree_block(trans, root, | 1124 | root_sub_used(root, right->len); |
| 1102 | bytenr, blocksize, 0, | 1125 | btrfs_free_tree_block(trans, root, right, 0, 1); |
| 1103 | root->root_key.objectid, | 1126 | free_extent_buffer(right); |
| 1104 | level); | 1127 | right = NULL; |
| 1105 | if (wret) | ||
| 1106 | ret = wret; | ||
| 1107 | } else { | 1128 | } else { |
| 1108 | struct btrfs_disk_key right_key; | 1129 | struct btrfs_disk_key right_key; |
| 1109 | btrfs_node_key(right, &right_key, 0); | 1130 | btrfs_node_key(right, &right_key, 0); |
| @@ -1135,21 +1156,15 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
| 1135 | BUG_ON(wret == 1); | 1156 | BUG_ON(wret == 1); |
| 1136 | } | 1157 | } |
| 1137 | if (btrfs_header_nritems(mid) == 0) { | 1158 | if (btrfs_header_nritems(mid) == 0) { |
| 1138 | /* we've managed to empty the middle node, drop it */ | ||
| 1139 | u64 bytenr = mid->start; | ||
| 1140 | u32 blocksize = mid->len; | ||
| 1141 | |||
| 1142 | clean_tree_block(trans, root, mid); | 1159 | clean_tree_block(trans, root, mid); |
| 1143 | btrfs_tree_unlock(mid); | 1160 | btrfs_tree_unlock(mid); |
| 1144 | free_extent_buffer(mid); | ||
| 1145 | mid = NULL; | ||
| 1146 | wret = del_ptr(trans, root, path, level + 1, pslot); | 1161 | wret = del_ptr(trans, root, path, level + 1, pslot); |
| 1147 | if (wret) | 1162 | if (wret) |
| 1148 | ret = wret; | 1163 | ret = wret; |
| 1149 | wret = btrfs_free_tree_block(trans, root, bytenr, blocksize, | 1164 | root_sub_used(root, mid->len); |
| 1150 | 0, root->root_key.objectid, level); | 1165 | btrfs_free_tree_block(trans, root, mid, 0, 1); |
| 1151 | if (wret) | 1166 | free_extent_buffer(mid); |
| 1152 | ret = wret; | 1167 | mid = NULL; |
| 1153 | } else { | 1168 | } else { |
| 1154 | /* update the parent key to reflect our changes */ | 1169 | /* update the parent key to reflect our changes */ |
| 1155 | struct btrfs_disk_key mid_key; | 1170 | struct btrfs_disk_key mid_key; |
| @@ -1589,7 +1604,7 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
| 1589 | btrfs_release_path(NULL, p); | 1604 | btrfs_release_path(NULL, p); |
| 1590 | 1605 | ||
| 1591 | ret = -EAGAIN; | 1606 | ret = -EAGAIN; |
| 1592 | tmp = read_tree_block(root, blocknr, blocksize, gen); | 1607 | tmp = read_tree_block(root, blocknr, blocksize, 0); |
| 1593 | if (tmp) { | 1608 | if (tmp) { |
| 1594 | /* | 1609 | /* |
| 1595 | * If the read above didn't mark this buffer up to date, | 1610 | * If the read above didn't mark this buffer up to date, |
| @@ -1739,7 +1754,6 @@ again: | |||
| 1739 | p->nodes[level + 1], | 1754 | p->nodes[level + 1], |
| 1740 | p->slots[level + 1], &b); | 1755 | p->slots[level + 1], &b); |
| 1741 | if (err) { | 1756 | if (err) { |
| 1742 | free_extent_buffer(b); | ||
| 1743 | ret = err; | 1757 | ret = err; |
| 1744 | goto done; | 1758 | goto done; |
| 1745 | } | 1759 | } |
| @@ -2075,6 +2089,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
| 2075 | if (IS_ERR(c)) | 2089 | if (IS_ERR(c)) |
| 2076 | return PTR_ERR(c); | 2090 | return PTR_ERR(c); |
| 2077 | 2091 | ||
| 2092 | root_add_used(root, root->nodesize); | ||
| 2093 | |||
| 2078 | memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); | 2094 | memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); |
| 2079 | btrfs_set_header_nritems(c, 1); | 2095 | btrfs_set_header_nritems(c, 1); |
| 2080 | btrfs_set_header_level(c, level); | 2096 | btrfs_set_header_level(c, level); |
| @@ -2133,6 +2149,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root | |||
| 2133 | int nritems; | 2149 | int nritems; |
| 2134 | 2150 | ||
| 2135 | BUG_ON(!path->nodes[level]); | 2151 | BUG_ON(!path->nodes[level]); |
| 2152 | btrfs_assert_tree_locked(path->nodes[level]); | ||
| 2136 | lower = path->nodes[level]; | 2153 | lower = path->nodes[level]; |
| 2137 | nritems = btrfs_header_nritems(lower); | 2154 | nritems = btrfs_header_nritems(lower); |
| 2138 | BUG_ON(slot > nritems); | 2155 | BUG_ON(slot > nritems); |
| @@ -2201,6 +2218,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
| 2201 | if (IS_ERR(split)) | 2218 | if (IS_ERR(split)) |
| 2202 | return PTR_ERR(split); | 2219 | return PTR_ERR(split); |
| 2203 | 2220 | ||
| 2221 | root_add_used(root, root->nodesize); | ||
| 2222 | |||
| 2204 | memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header)); | 2223 | memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header)); |
| 2205 | btrfs_set_header_level(split, btrfs_header_level(c)); | 2224 | btrfs_set_header_level(split, btrfs_header_level(c)); |
| 2206 | btrfs_set_header_bytenr(split, split->start); | 2225 | btrfs_set_header_bytenr(split, split->start); |
| @@ -2285,12 +2304,17 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root, | |||
| 2285 | return ret; | 2304 | return ret; |
| 2286 | } | 2305 | } |
| 2287 | 2306 | ||
| 2307 | /* | ||
| 2308 | * min slot controls the lowest index we're willing to push to the | ||
| 2309 | * right. We'll push up to and including min_slot, but no lower | ||
| 2310 | */ | ||
| 2288 | static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | 2311 | static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, |
| 2289 | struct btrfs_root *root, | 2312 | struct btrfs_root *root, |
| 2290 | struct btrfs_path *path, | 2313 | struct btrfs_path *path, |
| 2291 | int data_size, int empty, | 2314 | int data_size, int empty, |
| 2292 | struct extent_buffer *right, | 2315 | struct extent_buffer *right, |
| 2293 | int free_space, u32 left_nritems) | 2316 | int free_space, u32 left_nritems, |
| 2317 | u32 min_slot) | ||
| 2294 | { | 2318 | { |
| 2295 | struct extent_buffer *left = path->nodes[0]; | 2319 | struct extent_buffer *left = path->nodes[0]; |
| 2296 | struct extent_buffer *upper = path->nodes[1]; | 2320 | struct extent_buffer *upper = path->nodes[1]; |
| @@ -2308,7 +2332,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | |||
| 2308 | if (empty) | 2332 | if (empty) |
| 2309 | nr = 0; | 2333 | nr = 0; |
| 2310 | else | 2334 | else |
| 2311 | nr = 1; | 2335 | nr = max_t(u32, 1, min_slot); |
| 2312 | 2336 | ||
| 2313 | if (path->slots[0] >= left_nritems) | 2337 | if (path->slots[0] >= left_nritems) |
| 2314 | push_space += data_size; | 2338 | push_space += data_size; |
| @@ -2414,6 +2438,9 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | |||
| 2414 | 2438 | ||
| 2415 | if (left_nritems) | 2439 | if (left_nritems) |
| 2416 | btrfs_mark_buffer_dirty(left); | 2440 | btrfs_mark_buffer_dirty(left); |
| 2441 | else | ||
| 2442 | clean_tree_block(trans, root, left); | ||
| 2443 | |||
| 2417 | btrfs_mark_buffer_dirty(right); | 2444 | btrfs_mark_buffer_dirty(right); |
| 2418 | 2445 | ||
| 2419 | btrfs_item_key(right, &disk_key, 0); | 2446 | btrfs_item_key(right, &disk_key, 0); |
| @@ -2447,10 +2474,14 @@ out_unlock: | |||
| 2447 | * | 2474 | * |
| 2448 | * returns 1 if the push failed because the other node didn't have enough | 2475 | * returns 1 if the push failed because the other node didn't have enough |
| 2449 | * room, 0 if everything worked out and < 0 if there were major errors. | 2476 | * room, 0 if everything worked out and < 0 if there were major errors. |
| 2477 | * | ||
| 2478 | * this will push starting from min_slot to the end of the leaf. It won't | ||
| 2479 | * push any slot lower than min_slot | ||
| 2450 | */ | 2480 | */ |
| 2451 | static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root | 2481 | static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root |
| 2452 | *root, struct btrfs_path *path, int data_size, | 2482 | *root, struct btrfs_path *path, |
| 2453 | int empty) | 2483 | int min_data_size, int data_size, |
| 2484 | int empty, u32 min_slot) | ||
| 2454 | { | 2485 | { |
| 2455 | struct extent_buffer *left = path->nodes[0]; | 2486 | struct extent_buffer *left = path->nodes[0]; |
| 2456 | struct extent_buffer *right; | 2487 | struct extent_buffer *right; |
| @@ -2492,8 +2523,8 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root | |||
| 2492 | if (left_nritems == 0) | 2523 | if (left_nritems == 0) |
| 2493 | goto out_unlock; | 2524 | goto out_unlock; |
| 2494 | 2525 | ||
| 2495 | return __push_leaf_right(trans, root, path, data_size, empty, | 2526 | return __push_leaf_right(trans, root, path, min_data_size, empty, |
| 2496 | right, free_space, left_nritems); | 2527 | right, free_space, left_nritems, min_slot); |
| 2497 | out_unlock: | 2528 | out_unlock: |
| 2498 | btrfs_tree_unlock(right); | 2529 | btrfs_tree_unlock(right); |
| 2499 | free_extent_buffer(right); | 2530 | free_extent_buffer(right); |
| @@ -2503,12 +2534,17 @@ out_unlock: | |||
| 2503 | /* | 2534 | /* |
| 2504 | * push some data in the path leaf to the left, trying to free up at | 2535 | * push some data in the path leaf to the left, trying to free up at |
| 2505 | * least data_size bytes. returns zero if the push worked, nonzero otherwise | 2536 | * least data_size bytes. returns zero if the push worked, nonzero otherwise |
| 2537 | * | ||
| 2538 | * max_slot can put a limit on how far into the leaf we'll push items. The | ||
| 2539 | * item at 'max_slot' won't be touched. Use (u32)-1 to make us do all the | ||
| 2540 | * items | ||
| 2506 | */ | 2541 | */ |
| 2507 | static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | 2542 | static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, |
| 2508 | struct btrfs_root *root, | 2543 | struct btrfs_root *root, |
| 2509 | struct btrfs_path *path, int data_size, | 2544 | struct btrfs_path *path, int data_size, |
| 2510 | int empty, struct extent_buffer *left, | 2545 | int empty, struct extent_buffer *left, |
| 2511 | int free_space, int right_nritems) | 2546 | int free_space, u32 right_nritems, |
| 2547 | u32 max_slot) | ||
| 2512 | { | 2548 | { |
| 2513 | struct btrfs_disk_key disk_key; | 2549 | struct btrfs_disk_key disk_key; |
| 2514 | struct extent_buffer *right = path->nodes[0]; | 2550 | struct extent_buffer *right = path->nodes[0]; |
| @@ -2527,9 +2563,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
| 2527 | slot = path->slots[1]; | 2563 | slot = path->slots[1]; |
| 2528 | 2564 | ||
| 2529 | if (empty) | 2565 | if (empty) |
| 2530 | nr = right_nritems; | 2566 | nr = min(right_nritems, max_slot); |
| 2531 | else | 2567 | else |
| 2532 | nr = right_nritems - 1; | 2568 | nr = min(right_nritems - 1, max_slot); |
| 2533 | 2569 | ||
| 2534 | for (i = 0; i < nr; i++) { | 2570 | for (i = 0; i < nr; i++) { |
| 2535 | item = btrfs_item_nr(right, i); | 2571 | item = btrfs_item_nr(right, i); |
| @@ -2659,6 +2695,8 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
| 2659 | btrfs_mark_buffer_dirty(left); | 2695 | btrfs_mark_buffer_dirty(left); |
| 2660 | if (right_nritems) | 2696 | if (right_nritems) |
| 2661 | btrfs_mark_buffer_dirty(right); | 2697 | btrfs_mark_buffer_dirty(right); |
| 2698 | else | ||
| 2699 | clean_tree_block(trans, root, right); | ||
| 2662 | 2700 | ||
| 2663 | btrfs_item_key(right, &disk_key, 0); | 2701 | btrfs_item_key(right, &disk_key, 0); |
| 2664 | wret = fixup_low_keys(trans, root, path, &disk_key, 1); | 2702 | wret = fixup_low_keys(trans, root, path, &disk_key, 1); |
| @@ -2668,8 +2706,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
| 2668 | /* then fixup the leaf pointer in the path */ | 2706 | /* then fixup the leaf pointer in the path */ |
| 2669 | if (path->slots[0] < push_items) { | 2707 | if (path->slots[0] < push_items) { |
| 2670 | path->slots[0] += old_left_nritems; | 2708 | path->slots[0] += old_left_nritems; |
| 2671 | if (btrfs_header_nritems(path->nodes[0]) == 0) | ||
| 2672 | clean_tree_block(trans, root, path->nodes[0]); | ||
| 2673 | btrfs_tree_unlock(path->nodes[0]); | 2709 | btrfs_tree_unlock(path->nodes[0]); |
| 2674 | free_extent_buffer(path->nodes[0]); | 2710 | free_extent_buffer(path->nodes[0]); |
| 2675 | path->nodes[0] = left; | 2711 | path->nodes[0] = left; |
| @@ -2690,10 +2726,14 @@ out: | |||
| 2690 | /* | 2726 | /* |
| 2691 | * push some data in the path leaf to the left, trying to free up at | 2727 | * push some data in the path leaf to the left, trying to free up at |
| 2692 | * least data_size bytes. returns zero if the push worked, nonzero otherwise | 2728 | * least data_size bytes. returns zero if the push worked, nonzero otherwise |
| 2729 | * | ||
| 2730 | * max_slot can put a limit on how far into the leaf we'll push items. The | ||
| 2731 | * item at 'max_slot' won't be touched. Use (u32)-1 to make us push all the | ||
| 2732 | * items | ||
| 2693 | */ | 2733 | */ |
| 2694 | static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root | 2734 | static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root |
| 2695 | *root, struct btrfs_path *path, int data_size, | 2735 | *root, struct btrfs_path *path, int min_data_size, |
| 2696 | int empty) | 2736 | int data_size, int empty, u32 max_slot) |
| 2697 | { | 2737 | { |
| 2698 | struct extent_buffer *right = path->nodes[0]; | 2738 | struct extent_buffer *right = path->nodes[0]; |
| 2699 | struct extent_buffer *left; | 2739 | struct extent_buffer *left; |
| @@ -2739,8 +2779,9 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root | |||
| 2739 | goto out; | 2779 | goto out; |
| 2740 | } | 2780 | } |
| 2741 | 2781 | ||
| 2742 | return __push_leaf_left(trans, root, path, data_size, | 2782 | return __push_leaf_left(trans, root, path, min_data_size, |
| 2743 | empty, left, free_space, right_nritems); | 2783 | empty, left, free_space, right_nritems, |
| 2784 | max_slot); | ||
| 2744 | out: | 2785 | out: |
| 2745 | btrfs_tree_unlock(left); | 2786 | btrfs_tree_unlock(left); |
| 2746 | free_extent_buffer(left); | 2787 | free_extent_buffer(left); |
| @@ -2833,6 +2874,64 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans, | |||
| 2833 | } | 2874 | } |
| 2834 | 2875 | ||
| 2835 | /* | 2876 | /* |
| 2877 | * double splits happen when we need to insert a big item in the middle | ||
| 2878 | * of a leaf. A double split can leave us with 3 mostly empty leaves: | ||
| 2879 | * leaf: [ slots 0 - N] [ our target ] [ N + 1 - total in leaf ] | ||
| 2880 | * A B C | ||
| 2881 | * | ||
| 2882 | * We avoid this by trying to push the items on either side of our target | ||
| 2883 | * into the adjacent leaves. If all goes well we can avoid the double split | ||
| 2884 | * completely. | ||
| 2885 | */ | ||
| 2886 | static noinline int push_for_double_split(struct btrfs_trans_handle *trans, | ||
| 2887 | struct btrfs_root *root, | ||
| 2888 | struct btrfs_path *path, | ||
| 2889 | int data_size) | ||
| 2890 | { | ||
| 2891 | int ret; | ||
| 2892 | int progress = 0; | ||
| 2893 | int slot; | ||
| 2894 | u32 nritems; | ||
| 2895 | |||
| 2896 | slot = path->slots[0]; | ||
| 2897 | |||
| 2898 | /* | ||
| 2899 | * try to push all the items after our slot into the | ||
| 2900 | * right leaf | ||
| 2901 | */ | ||
| 2902 | ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot); | ||
| 2903 | if (ret < 0) | ||
| 2904 | return ret; | ||
| 2905 | |||
| 2906 | if (ret == 0) | ||
| 2907 | progress++; | ||
| 2908 | |||
| 2909 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
| 2910 | /* | ||
| 2911 | * our goal is to get our slot at the start or end of a leaf. If | ||
| 2912 | * we've done so we're done | ||
| 2913 | */ | ||
| 2914 | if (path->slots[0] == 0 || path->slots[0] == nritems) | ||
| 2915 | return 0; | ||
| 2916 | |||
| 2917 | if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size) | ||
| 2918 | return 0; | ||
| 2919 | |||
| 2920 | /* try to push all the items before our slot into the next leaf */ | ||
| 2921 | slot = path->slots[0]; | ||
| 2922 | ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot); | ||
| 2923 | if (ret < 0) | ||
| 2924 | return ret; | ||
| 2925 | |||
| 2926 | if (ret == 0) | ||
| 2927 | progress++; | ||
| 2928 | |||
| 2929 | if (progress) | ||
| 2930 | return 0; | ||
| 2931 | return 1; | ||
| 2932 | } | ||
| 2933 | |||
| 2934 | /* | ||
| 2836 | * split the path's leaf in two, making sure there is at least data_size | 2935 | * split the path's leaf in two, making sure there is at least data_size |
| 2837 | * available for the resulting leaf level of the path. | 2936 | * available for the resulting leaf level of the path. |
| 2838 | * | 2937 | * |
| @@ -2854,6 +2953,7 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, | |||
| 2854 | int wret; | 2953 | int wret; |
| 2855 | int split; | 2954 | int split; |
| 2856 | int num_doubles = 0; | 2955 | int num_doubles = 0; |
| 2956 | int tried_avoid_double = 0; | ||
| 2857 | 2957 | ||
| 2858 | l = path->nodes[0]; | 2958 | l = path->nodes[0]; |
| 2859 | slot = path->slots[0]; | 2959 | slot = path->slots[0]; |
| @@ -2862,12 +2962,14 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, | |||
| 2862 | return -EOVERFLOW; | 2962 | return -EOVERFLOW; |
| 2863 | 2963 | ||
| 2864 | /* first try to make some room by pushing left and right */ | 2964 | /* first try to make some room by pushing left and right */ |
| 2865 | if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { | 2965 | if (data_size) { |
| 2866 | wret = push_leaf_right(trans, root, path, data_size, 0); | 2966 | wret = push_leaf_right(trans, root, path, data_size, |
| 2967 | data_size, 0, 0); | ||
| 2867 | if (wret < 0) | 2968 | if (wret < 0) |
| 2868 | return wret; | 2969 | return wret; |
| 2869 | if (wret) { | 2970 | if (wret) { |
| 2870 | wret = push_leaf_left(trans, root, path, data_size, 0); | 2971 | wret = push_leaf_left(trans, root, path, data_size, |
| 2972 | data_size, 0, (u32)-1); | ||
| 2871 | if (wret < 0) | 2973 | if (wret < 0) |
| 2872 | return wret; | 2974 | return wret; |
| 2873 | } | 2975 | } |
| @@ -2901,6 +3003,8 @@ again: | |||
| 2901 | if (mid != nritems && | 3003 | if (mid != nritems && |
| 2902 | leaf_space_used(l, mid, nritems - mid) + | 3004 | leaf_space_used(l, mid, nritems - mid) + |
| 2903 | data_size > BTRFS_LEAF_DATA_SIZE(root)) { | 3005 | data_size > BTRFS_LEAF_DATA_SIZE(root)) { |
| 3006 | if (data_size && !tried_avoid_double) | ||
| 3007 | goto push_for_double; | ||
| 2904 | split = 2; | 3008 | split = 2; |
| 2905 | } | 3009 | } |
| 2906 | } | 3010 | } |
| @@ -2917,6 +3021,8 @@ again: | |||
| 2917 | if (mid != nritems && | 3021 | if (mid != nritems && |
| 2918 | leaf_space_used(l, mid, nritems - mid) + | 3022 | leaf_space_used(l, mid, nritems - mid) + |
| 2919 | data_size > BTRFS_LEAF_DATA_SIZE(root)) { | 3023 | data_size > BTRFS_LEAF_DATA_SIZE(root)) { |
| 3024 | if (data_size && !tried_avoid_double) | ||
| 3025 | goto push_for_double; | ||
| 2920 | split = 2 ; | 3026 | split = 2 ; |
| 2921 | } | 3027 | } |
| 2922 | } | 3028 | } |
| @@ -2931,10 +3037,10 @@ again: | |||
| 2931 | right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, | 3037 | right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, |
| 2932 | root->root_key.objectid, | 3038 | root->root_key.objectid, |
| 2933 | &disk_key, 0, l->start, 0); | 3039 | &disk_key, 0, l->start, 0); |
| 2934 | if (IS_ERR(right)) { | 3040 | if (IS_ERR(right)) |
| 2935 | BUG_ON(1); | ||
| 2936 | return PTR_ERR(right); | 3041 | return PTR_ERR(right); |
| 2937 | } | 3042 | |
| 3043 | root_add_used(root, root->leafsize); | ||
| 2938 | 3044 | ||
| 2939 | memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); | 3045 | memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); |
| 2940 | btrfs_set_header_bytenr(right, right->start); | 3046 | btrfs_set_header_bytenr(right, right->start); |
| @@ -2997,6 +3103,13 @@ again: | |||
| 2997 | } | 3103 | } |
| 2998 | 3104 | ||
| 2999 | return ret; | 3105 | return ret; |
| 3106 | |||
| 3107 | push_for_double: | ||
| 3108 | push_for_double_split(trans, root, path, data_size); | ||
| 3109 | tried_avoid_double = 1; | ||
| 3110 | if (btrfs_leaf_free_space(root, path->nodes[0]) >= data_size) | ||
| 3111 | return 0; | ||
| 3112 | goto again; | ||
| 3000 | } | 3113 | } |
| 3001 | 3114 | ||
| 3002 | static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, | 3115 | static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, |
| @@ -3040,6 +3153,10 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, | |||
| 3040 | if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0])) | 3153 | if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0])) |
| 3041 | goto err; | 3154 | goto err; |
| 3042 | 3155 | ||
| 3156 | /* the leaf has changed, it now has room. return now */ | ||
| 3157 | if (btrfs_leaf_free_space(root, path->nodes[0]) >= ins_len) | ||
| 3158 | goto err; | ||
| 3159 | |||
| 3043 | if (key.type == BTRFS_EXTENT_DATA_KEY) { | 3160 | if (key.type == BTRFS_EXTENT_DATA_KEY) { |
| 3044 | fi = btrfs_item_ptr(leaf, path->slots[0], | 3161 | fi = btrfs_item_ptr(leaf, path->slots[0], |
| 3045 | struct btrfs_file_extent_item); | 3162 | struct btrfs_file_extent_item); |
| @@ -3049,7 +3166,8 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, | |||
| 3049 | 3166 | ||
| 3050 | btrfs_set_path_blocking(path); | 3167 | btrfs_set_path_blocking(path); |
| 3051 | ret = split_leaf(trans, root, &key, path, ins_len, 1); | 3168 | ret = split_leaf(trans, root, &key, path, ins_len, 1); |
| 3052 | BUG_ON(ret); | 3169 | if (ret) |
| 3170 | goto err; | ||
| 3053 | 3171 | ||
| 3054 | path->keep_locks = 0; | 3172 | path->keep_locks = 0; |
| 3055 | btrfs_unlock_up_safe(path, 1); | 3173 | btrfs_unlock_up_safe(path, 1); |
| @@ -3791,9 +3909,10 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, | |||
| 3791 | */ | 3909 | */ |
| 3792 | btrfs_unlock_up_safe(path, 0); | 3910 | btrfs_unlock_up_safe(path, 0); |
| 3793 | 3911 | ||
| 3794 | ret = btrfs_free_tree_block(trans, root, leaf->start, leaf->len, | 3912 | root_sub_used(root, leaf->len); |
| 3795 | 0, root->root_key.objectid, 0); | 3913 | |
| 3796 | return ret; | 3914 | btrfs_free_tree_block(trans, root, leaf, 0, 1); |
| 3915 | return 0; | ||
| 3797 | } | 3916 | } |
| 3798 | /* | 3917 | /* |
| 3799 | * delete the item at the leaf level in path. If that empties | 3918 | * delete the item at the leaf level in path. If that empties |
| @@ -3860,6 +3979,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
| 3860 | if (leaf == root->node) { | 3979 | if (leaf == root->node) { |
| 3861 | btrfs_set_header_level(leaf, 0); | 3980 | btrfs_set_header_level(leaf, 0); |
| 3862 | } else { | 3981 | } else { |
| 3982 | btrfs_set_path_blocking(path); | ||
| 3983 | clean_tree_block(trans, root, leaf); | ||
| 3863 | ret = btrfs_del_leaf(trans, root, path, leaf); | 3984 | ret = btrfs_del_leaf(trans, root, path, leaf); |
| 3864 | BUG_ON(ret); | 3985 | BUG_ON(ret); |
| 3865 | } | 3986 | } |
| @@ -3885,13 +4006,15 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
| 3885 | extent_buffer_get(leaf); | 4006 | extent_buffer_get(leaf); |
| 3886 | 4007 | ||
| 3887 | btrfs_set_path_blocking(path); | 4008 | btrfs_set_path_blocking(path); |
| 3888 | wret = push_leaf_left(trans, root, path, 1, 1); | 4009 | wret = push_leaf_left(trans, root, path, 1, 1, |
| 4010 | 1, (u32)-1); | ||
| 3889 | if (wret < 0 && wret != -ENOSPC) | 4011 | if (wret < 0 && wret != -ENOSPC) |
| 3890 | ret = wret; | 4012 | ret = wret; |
| 3891 | 4013 | ||
| 3892 | if (path->nodes[0] == leaf && | 4014 | if (path->nodes[0] == leaf && |
| 3893 | btrfs_header_nritems(leaf)) { | 4015 | btrfs_header_nritems(leaf)) { |
| 3894 | wret = push_leaf_right(trans, root, path, 1, 1); | 4016 | wret = push_leaf_right(trans, root, path, 1, |
| 4017 | 1, 1, 0); | ||
| 3895 | if (wret < 0 && wret != -ENOSPC) | 4018 | if (wret < 0 && wret != -ENOSPC) |
| 3896 | ret = wret; | 4019 | ret = wret; |
| 3897 | } | 4020 | } |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0af2e3868573..29c20092847e 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -26,6 +26,7 @@ | |||
| 26 | #include <linux/completion.h> | 26 | #include <linux/completion.h> |
| 27 | #include <linux/backing-dev.h> | 27 | #include <linux/backing-dev.h> |
| 28 | #include <linux/wait.h> | 28 | #include <linux/wait.h> |
| 29 | #include <linux/slab.h> | ||
| 29 | #include <asm/kmap_types.h> | 30 | #include <asm/kmap_types.h> |
| 30 | #include "extent_io.h" | 31 | #include "extent_io.h" |
| 31 | #include "extent_map.h" | 32 | #include "extent_map.h" |
| @@ -33,6 +34,7 @@ | |||
| 33 | 34 | ||
| 34 | struct btrfs_trans_handle; | 35 | struct btrfs_trans_handle; |
| 35 | struct btrfs_transaction; | 36 | struct btrfs_transaction; |
| 37 | struct btrfs_pending_snapshot; | ||
| 36 | extern struct kmem_cache *btrfs_trans_handle_cachep; | 38 | extern struct kmem_cache *btrfs_trans_handle_cachep; |
| 37 | extern struct kmem_cache *btrfs_transaction_cachep; | 39 | extern struct kmem_cache *btrfs_transaction_cachep; |
| 38 | extern struct kmem_cache *btrfs_bit_radix_cachep; | 40 | extern struct kmem_cache *btrfs_bit_radix_cachep; |
| @@ -662,6 +664,7 @@ struct btrfs_csum_item { | |||
| 662 | #define BTRFS_BLOCK_GROUP_RAID1 (1 << 4) | 664 | #define BTRFS_BLOCK_GROUP_RAID1 (1 << 4) |
| 663 | #define BTRFS_BLOCK_GROUP_DUP (1 << 5) | 665 | #define BTRFS_BLOCK_GROUP_DUP (1 << 5) |
| 664 | #define BTRFS_BLOCK_GROUP_RAID10 (1 << 6) | 666 | #define BTRFS_BLOCK_GROUP_RAID10 (1 << 6) |
| 667 | #define BTRFS_NR_RAID_TYPES 5 | ||
| 665 | 668 | ||
| 666 | struct btrfs_block_group_item { | 669 | struct btrfs_block_group_item { |
| 667 | __le64 used; | 670 | __le64 used; |
| @@ -673,42 +676,46 @@ struct btrfs_space_info { | |||
| 673 | u64 flags; | 676 | u64 flags; |
| 674 | 677 | ||
| 675 | u64 total_bytes; /* total bytes in the space */ | 678 | u64 total_bytes; /* total bytes in the space */ |
| 676 | u64 bytes_used; /* total bytes used on disk */ | 679 | u64 bytes_used; /* total bytes used, |
| 680 | this does't take mirrors into account */ | ||
| 677 | u64 bytes_pinned; /* total bytes pinned, will be freed when the | 681 | u64 bytes_pinned; /* total bytes pinned, will be freed when the |
| 678 | transaction finishes */ | 682 | transaction finishes */ |
| 679 | u64 bytes_reserved; /* total bytes the allocator has reserved for | 683 | u64 bytes_reserved; /* total bytes the allocator has reserved for |
| 680 | current allocations */ | 684 | current allocations */ |
| 681 | u64 bytes_readonly; /* total bytes that are read only */ | 685 | u64 bytes_readonly; /* total bytes that are read only */ |
| 682 | u64 bytes_super; /* total bytes reserved for the super blocks */ | 686 | |
| 683 | u64 bytes_root; /* the number of bytes needed to commit a | ||
| 684 | transaction */ | ||
| 685 | u64 bytes_may_use; /* number of bytes that may be used for | 687 | u64 bytes_may_use; /* number of bytes that may be used for |
| 686 | delalloc/allocations */ | 688 | delalloc/allocations */ |
| 687 | u64 bytes_delalloc; /* number of bytes currently reserved for | 689 | u64 disk_used; /* total bytes used on disk */ |
| 688 | delayed allocation */ | ||
| 689 | 690 | ||
| 690 | int full; /* indicates that we cannot allocate any more | 691 | int full; /* indicates that we cannot allocate any more |
| 691 | chunks for this space */ | 692 | chunks for this space */ |
| 692 | int force_alloc; /* set if we need to force a chunk alloc for | 693 | int force_alloc; /* set if we need to force a chunk alloc for |
| 693 | this space */ | 694 | this space */ |
| 694 | int force_delalloc; /* make people start doing filemap_flush until | ||
| 695 | we're under a threshold */ | ||
| 696 | 695 | ||
| 697 | struct list_head list; | 696 | struct list_head list; |
| 698 | 697 | ||
| 699 | /* for controlling how we free up space for allocations */ | ||
| 700 | wait_queue_head_t allocate_wait; | ||
| 701 | wait_queue_head_t flush_wait; | ||
| 702 | int allocating_chunk; | ||
| 703 | int flushing; | ||
| 704 | |||
| 705 | /* for block groups in our same type */ | 698 | /* for block groups in our same type */ |
| 706 | struct list_head block_groups; | 699 | struct list_head block_groups[BTRFS_NR_RAID_TYPES]; |
| 707 | spinlock_t lock; | 700 | spinlock_t lock; |
| 708 | struct rw_semaphore groups_sem; | 701 | struct rw_semaphore groups_sem; |
| 709 | atomic_t caching_threads; | 702 | atomic_t caching_threads; |
| 710 | }; | 703 | }; |
| 711 | 704 | ||
| 705 | struct btrfs_block_rsv { | ||
| 706 | u64 size; | ||
| 707 | u64 reserved; | ||
| 708 | u64 freed[2]; | ||
| 709 | struct btrfs_space_info *space_info; | ||
| 710 | struct list_head list; | ||
| 711 | spinlock_t lock; | ||
| 712 | atomic_t usage; | ||
| 713 | unsigned int priority:8; | ||
| 714 | unsigned int durable:1; | ||
| 715 | unsigned int refill_used:1; | ||
| 716 | unsigned int full:1; | ||
| 717 | }; | ||
| 718 | |||
| 712 | /* | 719 | /* |
| 713 | * free clusters are used to claim free space in relatively large chunks, | 720 | * free clusters are used to claim free space in relatively large chunks, |
| 714 | * allowing us to do less seeky writes. They are used for all metadata | 721 | * allowing us to do less seeky writes. They are used for all metadata |
| @@ -759,6 +766,7 @@ struct btrfs_block_group_cache { | |||
| 759 | spinlock_t lock; | 766 | spinlock_t lock; |
| 760 | u64 pinned; | 767 | u64 pinned; |
| 761 | u64 reserved; | 768 | u64 reserved; |
| 769 | u64 reserved_pinned; | ||
| 762 | u64 bytes_super; | 770 | u64 bytes_super; |
| 763 | u64 flags; | 771 | u64 flags; |
| 764 | u64 sectorsize; | 772 | u64 sectorsize; |
| @@ -824,6 +832,22 @@ struct btrfs_fs_info { | |||
| 824 | /* logical->physical extent mapping */ | 832 | /* logical->physical extent mapping */ |
| 825 | struct btrfs_mapping_tree mapping_tree; | 833 | struct btrfs_mapping_tree mapping_tree; |
| 826 | 834 | ||
| 835 | /* block reservation for extent, checksum and root tree */ | ||
| 836 | struct btrfs_block_rsv global_block_rsv; | ||
| 837 | /* block reservation for delay allocation */ | ||
| 838 | struct btrfs_block_rsv delalloc_block_rsv; | ||
| 839 | /* block reservation for metadata operations */ | ||
| 840 | struct btrfs_block_rsv trans_block_rsv; | ||
| 841 | /* block reservation for chunk tree */ | ||
| 842 | struct btrfs_block_rsv chunk_block_rsv; | ||
| 843 | |||
| 844 | struct btrfs_block_rsv empty_block_rsv; | ||
| 845 | |||
| 846 | /* list of block reservations that cross multiple transactions */ | ||
| 847 | struct list_head durable_block_rsv_list; | ||
| 848 | |||
| 849 | struct mutex durable_block_rsv_mutex; | ||
| 850 | |||
| 827 | u64 generation; | 851 | u64 generation; |
| 828 | u64 last_trans_committed; | 852 | u64 last_trans_committed; |
| 829 | 853 | ||
| @@ -834,7 +858,6 @@ struct btrfs_fs_info { | |||
| 834 | u64 last_trans_log_full_commit; | 858 | u64 last_trans_log_full_commit; |
| 835 | u64 open_ioctl_trans; | 859 | u64 open_ioctl_trans; |
| 836 | unsigned long mount_opt; | 860 | unsigned long mount_opt; |
| 837 | u64 max_extent; | ||
| 838 | u64 max_inline; | 861 | u64 max_inline; |
| 839 | u64 alloc_start; | 862 | u64 alloc_start; |
| 840 | struct btrfs_transaction *running_transaction; | 863 | struct btrfs_transaction *running_transaction; |
| @@ -927,7 +950,6 @@ struct btrfs_fs_info { | |||
| 927 | struct btrfs_workers endio_meta_write_workers; | 950 | struct btrfs_workers endio_meta_write_workers; |
| 928 | struct btrfs_workers endio_write_workers; | 951 | struct btrfs_workers endio_write_workers; |
| 929 | struct btrfs_workers submit_workers; | 952 | struct btrfs_workers submit_workers; |
| 930 | struct btrfs_workers enospc_workers; | ||
| 931 | /* | 953 | /* |
| 932 | * fixup workers take dirty pages that didn't properly go through | 954 | * fixup workers take dirty pages that didn't properly go through |
| 933 | * the cow mechanism and make them safe to write. It happens | 955 | * the cow mechanism and make them safe to write. It happens |
| @@ -943,6 +965,7 @@ struct btrfs_fs_info { | |||
| 943 | int do_barriers; | 965 | int do_barriers; |
| 944 | int closing; | 966 | int closing; |
| 945 | int log_root_recovering; | 967 | int log_root_recovering; |
| 968 | int enospc_unlink; | ||
| 946 | 969 | ||
| 947 | u64 total_pinned; | 970 | u64 total_pinned; |
| 948 | 971 | ||
| @@ -1012,6 +1035,9 @@ struct btrfs_root { | |||
| 1012 | struct completion kobj_unregister; | 1035 | struct completion kobj_unregister; |
| 1013 | struct mutex objectid_mutex; | 1036 | struct mutex objectid_mutex; |
| 1014 | 1037 | ||
| 1038 | spinlock_t accounting_lock; | ||
| 1039 | struct btrfs_block_rsv *block_rsv; | ||
| 1040 | |||
| 1015 | struct mutex log_mutex; | 1041 | struct mutex log_mutex; |
| 1016 | wait_queue_head_t log_writer_wait; | 1042 | wait_queue_head_t log_writer_wait; |
| 1017 | wait_queue_head_t log_commit_wait[2]; | 1043 | wait_queue_head_t log_commit_wait[2]; |
| @@ -1043,7 +1069,6 @@ struct btrfs_root { | |||
| 1043 | int ref_cows; | 1069 | int ref_cows; |
| 1044 | int track_dirty; | 1070 | int track_dirty; |
| 1045 | int in_radix; | 1071 | int in_radix; |
| 1046 | int clean_orphans; | ||
| 1047 | 1072 | ||
| 1048 | u64 defrag_trans_start; | 1073 | u64 defrag_trans_start; |
| 1049 | struct btrfs_key defrag_progress; | 1074 | struct btrfs_key defrag_progress; |
| @@ -1057,8 +1082,11 @@ struct btrfs_root { | |||
| 1057 | 1082 | ||
| 1058 | struct list_head root_list; | 1083 | struct list_head root_list; |
| 1059 | 1084 | ||
| 1060 | spinlock_t list_lock; | 1085 | spinlock_t orphan_lock; |
| 1061 | struct list_head orphan_list; | 1086 | struct list_head orphan_list; |
| 1087 | struct btrfs_block_rsv *orphan_block_rsv; | ||
| 1088 | int orphan_item_inserted; | ||
| 1089 | int orphan_cleanup_state; | ||
| 1062 | 1090 | ||
| 1063 | spinlock_t inode_lock; | 1091 | spinlock_t inode_lock; |
| 1064 | /* red-black tree that keeps track of in-memory inodes */ | 1092 | /* red-black tree that keeps track of in-memory inodes */ |
| @@ -1965,6 +1993,9 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache); | |||
| 1965 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | 1993 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, |
| 1966 | struct btrfs_root *root, unsigned long count); | 1994 | struct btrfs_root *root, unsigned long count); |
| 1967 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); | 1995 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); |
| 1996 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | ||
| 1997 | struct btrfs_root *root, u64 bytenr, | ||
| 1998 | u64 num_bytes, u64 *refs, u64 *flags); | ||
| 1968 | int btrfs_pin_extent(struct btrfs_root *root, | 1999 | int btrfs_pin_extent(struct btrfs_root *root, |
| 1969 | u64 bytenr, u64 num, int reserved); | 2000 | u64 bytenr, u64 num, int reserved); |
| 1970 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | 2001 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, |
| @@ -1984,10 +2015,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
| 1984 | u64 parent, u64 root_objectid, | 2015 | u64 parent, u64 root_objectid, |
| 1985 | struct btrfs_disk_key *key, int level, | 2016 | struct btrfs_disk_key *key, int level, |
| 1986 | u64 hint, u64 empty_size); | 2017 | u64 hint, u64 empty_size); |
| 1987 | int btrfs_free_tree_block(struct btrfs_trans_handle *trans, | 2018 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, |
| 1988 | struct btrfs_root *root, | 2019 | struct btrfs_root *root, |
| 1989 | u64 bytenr, u32 blocksize, | 2020 | struct extent_buffer *buf, |
| 1990 | u64 parent, u64 root_objectid, int level); | 2021 | u64 parent, int last_ref); |
| 1991 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | 2022 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, |
| 1992 | struct btrfs_root *root, | 2023 | struct btrfs_root *root, |
| 1993 | u64 bytenr, u32 blocksize, | 2024 | u64 bytenr, u32 blocksize, |
| @@ -2041,27 +2072,49 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
| 2041 | u64 size); | 2072 | u64 size); |
| 2042 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | 2073 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
| 2043 | struct btrfs_root *root, u64 group_start); | 2074 | struct btrfs_root *root, u64 group_start); |
| 2044 | int btrfs_prepare_block_group_relocation(struct btrfs_root *root, | ||
| 2045 | struct btrfs_block_group_cache *group); | ||
| 2046 | |||
| 2047 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); | 2075 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); |
| 2048 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); | 2076 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); |
| 2049 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 2077 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
| 2050 | 2078 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); | |
| 2051 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items); | 2079 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); |
| 2052 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items); | 2080 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, |
| 2053 | int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, | 2081 | struct btrfs_root *root, |
| 2054 | struct inode *inode, int num_items); | 2082 | int num_items, int *retries); |
| 2055 | int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, | 2083 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, |
| 2056 | struct inode *inode, int num_items); | 2084 | struct btrfs_root *root); |
| 2057 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | 2085 | int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, |
| 2058 | u64 bytes); | 2086 | struct inode *inode); |
| 2059 | void btrfs_free_reserved_data_space(struct btrfs_root *root, | 2087 | void btrfs_orphan_release_metadata(struct inode *inode); |
| 2060 | struct inode *inode, u64 bytes); | 2088 | int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, |
| 2061 | void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, | 2089 | struct btrfs_pending_snapshot *pending); |
| 2062 | u64 bytes); | 2090 | int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes); |
| 2063 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, | 2091 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes); |
| 2064 | u64 bytes); | 2092 | int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes); |
| 2093 | void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes); | ||
| 2094 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv); | ||
| 2095 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root); | ||
| 2096 | void btrfs_free_block_rsv(struct btrfs_root *root, | ||
| 2097 | struct btrfs_block_rsv *rsv); | ||
| 2098 | void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, | ||
| 2099 | struct btrfs_block_rsv *rsv); | ||
| 2100 | int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, | ||
| 2101 | struct btrfs_root *root, | ||
| 2102 | struct btrfs_block_rsv *block_rsv, | ||
| 2103 | u64 num_bytes, int *retries); | ||
| 2104 | int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | ||
| 2105 | struct btrfs_root *root, | ||
| 2106 | struct btrfs_block_rsv *block_rsv, | ||
| 2107 | u64 min_reserved, int min_factor); | ||
| 2108 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | ||
| 2109 | struct btrfs_block_rsv *dst_rsv, | ||
| 2110 | u64 num_bytes); | ||
| 2111 | void btrfs_block_rsv_release(struct btrfs_root *root, | ||
| 2112 | struct btrfs_block_rsv *block_rsv, | ||
| 2113 | u64 num_bytes); | ||
| 2114 | int btrfs_set_block_group_ro(struct btrfs_root *root, | ||
| 2115 | struct btrfs_block_group_cache *cache); | ||
| 2116 | int btrfs_set_block_group_rw(struct btrfs_root *root, | ||
| 2117 | struct btrfs_block_group_cache *cache); | ||
| 2065 | /* ctree.c */ | 2118 | /* ctree.c */ |
| 2066 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 2119 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
| 2067 | int level, int *slot); | 2120 | int level, int *slot); |
| @@ -2152,7 +2205,8 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, | |||
| 2152 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); | 2205 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); |
| 2153 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); | 2206 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); |
| 2154 | int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); | 2207 | int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); |
| 2155 | int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref); | 2208 | int btrfs_drop_snapshot(struct btrfs_root *root, |
| 2209 | struct btrfs_block_rsv *block_rsv, int update_ref); | ||
| 2156 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | 2210 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, |
| 2157 | struct btrfs_root *root, | 2211 | struct btrfs_root *root, |
| 2158 | struct extent_buffer *node, | 2212 | struct extent_buffer *node, |
| @@ -2245,6 +2299,12 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | |||
| 2245 | struct btrfs_root *root, | 2299 | struct btrfs_root *root, |
| 2246 | const char *name, int name_len, | 2300 | const char *name, int name_len, |
| 2247 | u64 inode_objectid, u64 ref_objectid, u64 *index); | 2301 | u64 inode_objectid, u64 ref_objectid, u64 *index); |
| 2302 | struct btrfs_inode_ref * | ||
| 2303 | btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, | ||
| 2304 | struct btrfs_root *root, | ||
| 2305 | struct btrfs_path *path, | ||
| 2306 | const char *name, int name_len, | ||
| 2307 | u64 inode_objectid, u64 ref_objectid, int mod); | ||
| 2248 | int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, | 2308 | int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, |
| 2249 | struct btrfs_root *root, | 2309 | struct btrfs_root *root, |
| 2250 | struct btrfs_path *path, u64 objectid); | 2310 | struct btrfs_path *path, u64 objectid); |
| @@ -2257,6 +2317,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, | |||
| 2257 | struct btrfs_root *root, u64 bytenr, u64 len); | 2317 | struct btrfs_root *root, u64 bytenr, u64 len); |
| 2258 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | 2318 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, |
| 2259 | struct bio *bio, u32 *dst); | 2319 | struct bio *bio, u32 *dst); |
| 2320 | int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, | ||
| 2321 | struct bio *bio, u64 logical_offset, u32 *dst); | ||
| 2260 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | 2322 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, |
| 2261 | struct btrfs_root *root, | 2323 | struct btrfs_root *root, |
| 2262 | u64 objectid, u64 pos, | 2324 | u64 objectid, u64 pos, |
| @@ -2311,6 +2373,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
| 2311 | u32 min_type); | 2373 | u32 min_type); |
| 2312 | 2374 | ||
| 2313 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); | 2375 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); |
| 2376 | int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput); | ||
| 2314 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, | 2377 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, |
| 2315 | struct extent_state **cached_state); | 2378 | struct extent_state **cached_state); |
| 2316 | int btrfs_writepages(struct address_space *mapping, | 2379 | int btrfs_writepages(struct address_space *mapping, |
| @@ -2349,10 +2412,20 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, | |||
| 2349 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); | 2412 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); |
| 2350 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); | 2413 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); |
| 2351 | void btrfs_orphan_cleanup(struct btrfs_root *root); | 2414 | void btrfs_orphan_cleanup(struct btrfs_root *root); |
| 2415 | void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, | ||
| 2416 | struct btrfs_pending_snapshot *pending, | ||
| 2417 | u64 *bytes_to_reserve); | ||
| 2418 | void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, | ||
| 2419 | struct btrfs_pending_snapshot *pending); | ||
| 2420 | void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, | ||
| 2421 | struct btrfs_root *root); | ||
| 2352 | int btrfs_cont_expand(struct inode *inode, loff_t size); | 2422 | int btrfs_cont_expand(struct inode *inode, loff_t size); |
| 2353 | int btrfs_invalidate_inodes(struct btrfs_root *root); | 2423 | int btrfs_invalidate_inodes(struct btrfs_root *root); |
| 2354 | void btrfs_add_delayed_iput(struct inode *inode); | 2424 | void btrfs_add_delayed_iput(struct inode *inode); |
| 2355 | void btrfs_run_delayed_iputs(struct btrfs_root *root); | 2425 | void btrfs_run_delayed_iputs(struct btrfs_root *root); |
| 2426 | int btrfs_prealloc_file_range(struct inode *inode, int mode, | ||
| 2427 | u64 start, u64 num_bytes, u64 min_size, | ||
| 2428 | loff_t actual_len, u64 *alloc_hint); | ||
| 2356 | extern const struct dentry_operations btrfs_dentry_operations; | 2429 | extern const struct dentry_operations btrfs_dentry_operations; |
| 2357 | 2430 | ||
| 2358 | /* ioctl.c */ | 2431 | /* ioctl.c */ |
| @@ -2361,7 +2434,7 @@ void btrfs_update_iflags(struct inode *inode); | |||
| 2361 | void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); | 2434 | void btrfs_inherit_iflags(struct inode *inode, struct inode *dir); |
| 2362 | 2435 | ||
| 2363 | /* file.c */ | 2436 | /* file.c */ |
| 2364 | int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync); | 2437 | int btrfs_sync_file(struct file *file, int datasync); |
| 2365 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | 2438 | int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, |
| 2366 | int skip_pinned); | 2439 | int skip_pinned); |
| 2367 | int btrfs_check_file(struct btrfs_root *root, struct inode *inode); | 2440 | int btrfs_check_file(struct btrfs_root *root, struct inode *inode); |
| @@ -2409,4 +2482,12 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, | |||
| 2409 | struct btrfs_root *root); | 2482 | struct btrfs_root *root); |
| 2410 | int btrfs_recover_relocation(struct btrfs_root *root); | 2483 | int btrfs_recover_relocation(struct btrfs_root *root); |
| 2411 | int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); | 2484 | int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); |
| 2485 | void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, | ||
| 2486 | struct btrfs_root *root, struct extent_buffer *buf, | ||
| 2487 | struct extent_buffer *cow); | ||
| 2488 | void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans, | ||
| 2489 | struct btrfs_pending_snapshot *pending, | ||
| 2490 | u64 *bytes_to_reserve); | ||
| 2491 | void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, | ||
| 2492 | struct btrfs_pending_snapshot *pending); | ||
| 2412 | #endif | 2493 | #endif |
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 84e6781413b1..e807b143b857 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | */ | 17 | */ |
| 18 | 18 | ||
| 19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
| 20 | #include <linux/slab.h> | ||
| 20 | #include <linux/sort.h> | 21 | #include <linux/sort.h> |
| 21 | #include "ctree.h" | 22 | #include "ctree.h" |
| 22 | #include "delayed-ref.h" | 23 | #include "delayed-ref.h" |
| @@ -318,107 +319,6 @@ out: | |||
| 318 | } | 319 | } |
| 319 | 320 | ||
| 320 | /* | 321 | /* |
| 321 | * helper function to lookup reference count and flags of extent. | ||
| 322 | * | ||
| 323 | * the head node for delayed ref is used to store the sum of all the | ||
| 324 | * reference count modifications queued up in the rbtree. the head | ||
| 325 | * node may also store the extent flags to set. This way you can check | ||
| 326 | * to see what the reference count and extent flags would be if all of | ||
| 327 | * the delayed refs are not processed. | ||
| 328 | */ | ||
| 329 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | ||
| 330 | struct btrfs_root *root, u64 bytenr, | ||
| 331 | u64 num_bytes, u64 *refs, u64 *flags) | ||
| 332 | { | ||
| 333 | struct btrfs_delayed_ref_node *ref; | ||
| 334 | struct btrfs_delayed_ref_head *head; | ||
| 335 | struct btrfs_delayed_ref_root *delayed_refs; | ||
| 336 | struct btrfs_path *path; | ||
| 337 | struct btrfs_extent_item *ei; | ||
| 338 | struct extent_buffer *leaf; | ||
| 339 | struct btrfs_key key; | ||
| 340 | u32 item_size; | ||
| 341 | u64 num_refs; | ||
| 342 | u64 extent_flags; | ||
| 343 | int ret; | ||
| 344 | |||
| 345 | path = btrfs_alloc_path(); | ||
| 346 | if (!path) | ||
| 347 | return -ENOMEM; | ||
| 348 | |||
| 349 | key.objectid = bytenr; | ||
| 350 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 351 | key.offset = num_bytes; | ||
| 352 | delayed_refs = &trans->transaction->delayed_refs; | ||
| 353 | again: | ||
| 354 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, | ||
| 355 | &key, path, 0, 0); | ||
| 356 | if (ret < 0) | ||
| 357 | goto out; | ||
| 358 | |||
| 359 | if (ret == 0) { | ||
| 360 | leaf = path->nodes[0]; | ||
| 361 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
| 362 | if (item_size >= sizeof(*ei)) { | ||
| 363 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
| 364 | struct btrfs_extent_item); | ||
| 365 | num_refs = btrfs_extent_refs(leaf, ei); | ||
| 366 | extent_flags = btrfs_extent_flags(leaf, ei); | ||
| 367 | } else { | ||
| 368 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
| 369 | struct btrfs_extent_item_v0 *ei0; | ||
| 370 | BUG_ON(item_size != sizeof(*ei0)); | ||
| 371 | ei0 = btrfs_item_ptr(leaf, path->slots[0], | ||
| 372 | struct btrfs_extent_item_v0); | ||
| 373 | num_refs = btrfs_extent_refs_v0(leaf, ei0); | ||
| 374 | /* FIXME: this isn't correct for data */ | ||
| 375 | extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
| 376 | #else | ||
| 377 | BUG(); | ||
| 378 | #endif | ||
| 379 | } | ||
| 380 | BUG_ON(num_refs == 0); | ||
| 381 | } else { | ||
| 382 | num_refs = 0; | ||
| 383 | extent_flags = 0; | ||
| 384 | ret = 0; | ||
| 385 | } | ||
| 386 | |||
| 387 | spin_lock(&delayed_refs->lock); | ||
| 388 | ref = find_ref_head(&delayed_refs->root, bytenr, NULL); | ||
| 389 | if (ref) { | ||
| 390 | head = btrfs_delayed_node_to_head(ref); | ||
| 391 | if (!mutex_trylock(&head->mutex)) { | ||
| 392 | atomic_inc(&ref->refs); | ||
| 393 | spin_unlock(&delayed_refs->lock); | ||
| 394 | |||
| 395 | btrfs_release_path(root->fs_info->extent_root, path); | ||
| 396 | |||
| 397 | mutex_lock(&head->mutex); | ||
| 398 | mutex_unlock(&head->mutex); | ||
| 399 | btrfs_put_delayed_ref(ref); | ||
| 400 | goto again; | ||
| 401 | } | ||
| 402 | if (head->extent_op && head->extent_op->update_flags) | ||
| 403 | extent_flags |= head->extent_op->flags_to_set; | ||
| 404 | else | ||
| 405 | BUG_ON(num_refs == 0); | ||
| 406 | |||
| 407 | num_refs += ref->ref_mod; | ||
| 408 | mutex_unlock(&head->mutex); | ||
| 409 | } | ||
| 410 | WARN_ON(num_refs == 0); | ||
| 411 | if (refs) | ||
| 412 | *refs = num_refs; | ||
| 413 | if (flags) | ||
| 414 | *flags = extent_flags; | ||
| 415 | out: | ||
| 416 | spin_unlock(&delayed_refs->lock); | ||
| 417 | btrfs_free_path(path); | ||
| 418 | return ret; | ||
| 419 | } | ||
| 420 | |||
| 421 | /* | ||
| 422 | * helper function to update an extent delayed ref in the | 322 | * helper function to update an extent delayed ref in the |
| 423 | * rbtree. existing and update must both have the same | 323 | * rbtree. existing and update must both have the same |
| 424 | * bytenr and parent | 324 | * bytenr and parent |
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index f6fc67ddad36..50e3cf92fbda 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h | |||
| @@ -167,9 +167,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, | |||
| 167 | struct btrfs_delayed_ref_head * | 167 | struct btrfs_delayed_ref_head * |
| 168 | btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); | 168 | btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); |
| 169 | int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr); | 169 | int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr); |
| 170 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | ||
| 171 | struct btrfs_root *root, u64 bytenr, | ||
| 172 | u64 num_bytes, u64 *refs, u64 *flags); | ||
| 173 | int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, | 170 | int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, |
| 174 | u64 bytenr, u64 num_bytes, u64 orig_parent, | 171 | u64 bytenr, u64 num_bytes, u64 orig_parent, |
| 175 | u64 parent, u64 orig_ref_root, u64 ref_root, | 172 | u64 parent, u64 orig_ref_root, u64 ref_root, |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 11d0ad30e203..34f7c375567e 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include <linux/kthread.h> | 27 | #include <linux/kthread.h> |
| 28 | #include <linux/freezer.h> | 28 | #include <linux/freezer.h> |
| 29 | #include <linux/crc32c.h> | 29 | #include <linux/crc32c.h> |
| 30 | #include <linux/slab.h> | ||
| 30 | #include "compat.h" | 31 | #include "compat.h" |
| 31 | #include "ctree.h" | 32 | #include "ctree.h" |
| 32 | #include "disk-io.h" | 33 | #include "disk-io.h" |
| @@ -43,8 +44,6 @@ static struct extent_io_ops btree_extent_io_ops; | |||
| 43 | static void end_workqueue_fn(struct btrfs_work *work); | 44 | static void end_workqueue_fn(struct btrfs_work *work); |
| 44 | static void free_fs_root(struct btrfs_root *root); | 45 | static void free_fs_root(struct btrfs_root *root); |
| 45 | 46 | ||
| 46 | static atomic_t btrfs_bdi_num = ATOMIC_INIT(0); | ||
| 47 | |||
| 48 | /* | 47 | /* |
| 49 | * end_io_wq structs are used to do processing in task context when an IO is | 48 | * end_io_wq structs are used to do processing in task context when an IO is |
| 50 | * complete. This is used during reads to verify checksums, and it is used | 49 | * complete. This is used during reads to verify checksums, and it is used |
| @@ -75,6 +74,11 @@ struct async_submit_bio { | |||
| 75 | int rw; | 74 | int rw; |
| 76 | int mirror_num; | 75 | int mirror_num; |
| 77 | unsigned long bio_flags; | 76 | unsigned long bio_flags; |
| 77 | /* | ||
| 78 | * bio_offset is optional, can be used if the pages in the bio | ||
| 79 | * can't tell us where in the file the bio should go | ||
| 80 | */ | ||
| 81 | u64 bio_offset; | ||
| 78 | struct btrfs_work work; | 82 | struct btrfs_work work; |
| 79 | }; | 83 | }; |
| 80 | 84 | ||
| @@ -535,7 +539,8 @@ static void run_one_async_start(struct btrfs_work *work) | |||
| 535 | async = container_of(work, struct async_submit_bio, work); | 539 | async = container_of(work, struct async_submit_bio, work); |
| 536 | fs_info = BTRFS_I(async->inode)->root->fs_info; | 540 | fs_info = BTRFS_I(async->inode)->root->fs_info; |
| 537 | async->submit_bio_start(async->inode, async->rw, async->bio, | 541 | async->submit_bio_start(async->inode, async->rw, async->bio, |
| 538 | async->mirror_num, async->bio_flags); | 542 | async->mirror_num, async->bio_flags, |
| 543 | async->bio_offset); | ||
| 539 | } | 544 | } |
| 540 | 545 | ||
| 541 | static void run_one_async_done(struct btrfs_work *work) | 546 | static void run_one_async_done(struct btrfs_work *work) |
| @@ -557,7 +562,8 @@ static void run_one_async_done(struct btrfs_work *work) | |||
| 557 | wake_up(&fs_info->async_submit_wait); | 562 | wake_up(&fs_info->async_submit_wait); |
| 558 | 563 | ||
| 559 | async->submit_bio_done(async->inode, async->rw, async->bio, | 564 | async->submit_bio_done(async->inode, async->rw, async->bio, |
| 560 | async->mirror_num, async->bio_flags); | 565 | async->mirror_num, async->bio_flags, |
| 566 | async->bio_offset); | ||
| 561 | } | 567 | } |
| 562 | 568 | ||
| 563 | static void run_one_async_free(struct btrfs_work *work) | 569 | static void run_one_async_free(struct btrfs_work *work) |
| @@ -571,6 +577,7 @@ static void run_one_async_free(struct btrfs_work *work) | |||
| 571 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | 577 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, |
| 572 | int rw, struct bio *bio, int mirror_num, | 578 | int rw, struct bio *bio, int mirror_num, |
| 573 | unsigned long bio_flags, | 579 | unsigned long bio_flags, |
| 580 | u64 bio_offset, | ||
| 574 | extent_submit_bio_hook_t *submit_bio_start, | 581 | extent_submit_bio_hook_t *submit_bio_start, |
| 575 | extent_submit_bio_hook_t *submit_bio_done) | 582 | extent_submit_bio_hook_t *submit_bio_done) |
| 576 | { | 583 | { |
| @@ -593,6 +600,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
| 593 | 600 | ||
| 594 | async->work.flags = 0; | 601 | async->work.flags = 0; |
| 595 | async->bio_flags = bio_flags; | 602 | async->bio_flags = bio_flags; |
| 603 | async->bio_offset = bio_offset; | ||
| 596 | 604 | ||
| 597 | atomic_inc(&fs_info->nr_async_submits); | 605 | atomic_inc(&fs_info->nr_async_submits); |
| 598 | 606 | ||
| @@ -628,7 +636,8 @@ static int btree_csum_one_bio(struct bio *bio) | |||
| 628 | 636 | ||
| 629 | static int __btree_submit_bio_start(struct inode *inode, int rw, | 637 | static int __btree_submit_bio_start(struct inode *inode, int rw, |
| 630 | struct bio *bio, int mirror_num, | 638 | struct bio *bio, int mirror_num, |
| 631 | unsigned long bio_flags) | 639 | unsigned long bio_flags, |
| 640 | u64 bio_offset) | ||
| 632 | { | 641 | { |
| 633 | /* | 642 | /* |
| 634 | * when we're called for a write, we're already in the async | 643 | * when we're called for a write, we're already in the async |
| @@ -639,7 +648,8 @@ static int __btree_submit_bio_start(struct inode *inode, int rw, | |||
| 639 | } | 648 | } |
| 640 | 649 | ||
| 641 | static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | 650 | static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, |
| 642 | int mirror_num, unsigned long bio_flags) | 651 | int mirror_num, unsigned long bio_flags, |
| 652 | u64 bio_offset) | ||
| 643 | { | 653 | { |
| 644 | /* | 654 | /* |
| 645 | * when we're called for a write, we're already in the async | 655 | * when we're called for a write, we're already in the async |
| @@ -649,7 +659,8 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | |||
| 649 | } | 659 | } |
| 650 | 660 | ||
| 651 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | 661 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, |
| 652 | int mirror_num, unsigned long bio_flags) | 662 | int mirror_num, unsigned long bio_flags, |
| 663 | u64 bio_offset) | ||
| 653 | { | 664 | { |
| 654 | int ret; | 665 | int ret; |
| 655 | 666 | ||
| @@ -672,6 +683,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
| 672 | */ | 683 | */ |
| 673 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | 684 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, |
| 674 | inode, rw, bio, mirror_num, 0, | 685 | inode, rw, bio, mirror_num, 0, |
| 686 | bio_offset, | ||
| 675 | __btree_submit_bio_start, | 687 | __btree_submit_bio_start, |
| 676 | __btree_submit_bio_done); | 688 | __btree_submit_bio_done); |
| 677 | } | 689 | } |
| @@ -895,7 +907,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
| 895 | root->ref_cows = 0; | 907 | root->ref_cows = 0; |
| 896 | root->track_dirty = 0; | 908 | root->track_dirty = 0; |
| 897 | root->in_radix = 0; | 909 | root->in_radix = 0; |
| 898 | root->clean_orphans = 0; | 910 | root->orphan_item_inserted = 0; |
| 911 | root->orphan_cleanup_state = 0; | ||
| 899 | 912 | ||
| 900 | root->fs_info = fs_info; | 913 | root->fs_info = fs_info; |
| 901 | root->objectid = objectid; | 914 | root->objectid = objectid; |
| @@ -904,13 +917,16 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
| 904 | root->name = NULL; | 917 | root->name = NULL; |
| 905 | root->in_sysfs = 0; | 918 | root->in_sysfs = 0; |
| 906 | root->inode_tree = RB_ROOT; | 919 | root->inode_tree = RB_ROOT; |
| 920 | root->block_rsv = NULL; | ||
| 921 | root->orphan_block_rsv = NULL; | ||
| 907 | 922 | ||
| 908 | INIT_LIST_HEAD(&root->dirty_list); | 923 | INIT_LIST_HEAD(&root->dirty_list); |
| 909 | INIT_LIST_HEAD(&root->orphan_list); | 924 | INIT_LIST_HEAD(&root->orphan_list); |
| 910 | INIT_LIST_HEAD(&root->root_list); | 925 | INIT_LIST_HEAD(&root->root_list); |
| 911 | spin_lock_init(&root->node_lock); | 926 | spin_lock_init(&root->node_lock); |
| 912 | spin_lock_init(&root->list_lock); | 927 | spin_lock_init(&root->orphan_lock); |
| 913 | spin_lock_init(&root->inode_lock); | 928 | spin_lock_init(&root->inode_lock); |
| 929 | spin_lock_init(&root->accounting_lock); | ||
| 914 | mutex_init(&root->objectid_mutex); | 930 | mutex_init(&root->objectid_mutex); |
| 915 | mutex_init(&root->log_mutex); | 931 | mutex_init(&root->log_mutex); |
| 916 | init_waitqueue_head(&root->log_writer_wait); | 932 | init_waitqueue_head(&root->log_writer_wait); |
| @@ -969,42 +985,6 @@ static int find_and_setup_root(struct btrfs_root *tree_root, | |||
| 969 | return 0; | 985 | return 0; |
| 970 | } | 986 | } |
| 971 | 987 | ||
| 972 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | ||
| 973 | struct btrfs_fs_info *fs_info) | ||
| 974 | { | ||
| 975 | struct extent_buffer *eb; | ||
| 976 | struct btrfs_root *log_root_tree = fs_info->log_root_tree; | ||
| 977 | u64 start = 0; | ||
| 978 | u64 end = 0; | ||
| 979 | int ret; | ||
| 980 | |||
| 981 | if (!log_root_tree) | ||
| 982 | return 0; | ||
| 983 | |||
| 984 | while (1) { | ||
| 985 | ret = find_first_extent_bit(&log_root_tree->dirty_log_pages, | ||
| 986 | 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW); | ||
| 987 | if (ret) | ||
| 988 | break; | ||
| 989 | |||
| 990 | clear_extent_bits(&log_root_tree->dirty_log_pages, start, end, | ||
| 991 | EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); | ||
| 992 | } | ||
| 993 | eb = fs_info->log_root_tree->node; | ||
| 994 | |||
| 995 | WARN_ON(btrfs_header_level(eb) != 0); | ||
| 996 | WARN_ON(btrfs_header_nritems(eb) != 0); | ||
| 997 | |||
| 998 | ret = btrfs_free_reserved_extent(fs_info->tree_root, | ||
| 999 | eb->start, eb->len); | ||
| 1000 | BUG_ON(ret); | ||
| 1001 | |||
| 1002 | free_extent_buffer(eb); | ||
| 1003 | kfree(fs_info->log_root_tree); | ||
| 1004 | fs_info->log_root_tree = NULL; | ||
| 1005 | return 0; | ||
| 1006 | } | ||
| 1007 | |||
| 1008 | static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, | 988 | static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, |
| 1009 | struct btrfs_fs_info *fs_info) | 989 | struct btrfs_fs_info *fs_info) |
| 1010 | { | 990 | { |
| @@ -1192,19 +1172,23 @@ again: | |||
| 1192 | if (root) | 1172 | if (root) |
| 1193 | return root; | 1173 | return root; |
| 1194 | 1174 | ||
| 1195 | ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); | ||
| 1196 | if (ret == 0) | ||
| 1197 | ret = -ENOENT; | ||
| 1198 | if (ret < 0) | ||
| 1199 | return ERR_PTR(ret); | ||
| 1200 | |||
| 1201 | root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); | 1175 | root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); |
| 1202 | if (IS_ERR(root)) | 1176 | if (IS_ERR(root)) |
| 1203 | return root; | 1177 | return root; |
| 1204 | 1178 | ||
| 1205 | WARN_ON(btrfs_root_refs(&root->root_item) == 0); | ||
| 1206 | set_anon_super(&root->anon_super, NULL); | 1179 | set_anon_super(&root->anon_super, NULL); |
| 1207 | 1180 | ||
| 1181 | if (btrfs_root_refs(&root->root_item) == 0) { | ||
| 1182 | ret = -ENOENT; | ||
| 1183 | goto fail; | ||
| 1184 | } | ||
| 1185 | |||
| 1186 | ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); | ||
| 1187 | if (ret < 0) | ||
| 1188 | goto fail; | ||
| 1189 | if (ret == 0) | ||
| 1190 | root->orphan_item_inserted = 1; | ||
| 1191 | |||
| 1208 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | 1192 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); |
| 1209 | if (ret) | 1193 | if (ret) |
| 1210 | goto fail; | 1194 | goto fail; |
| @@ -1213,10 +1197,9 @@ again: | |||
| 1213 | ret = radix_tree_insert(&fs_info->fs_roots_radix, | 1197 | ret = radix_tree_insert(&fs_info->fs_roots_radix, |
| 1214 | (unsigned long)root->root_key.objectid, | 1198 | (unsigned long)root->root_key.objectid, |
| 1215 | root); | 1199 | root); |
| 1216 | if (ret == 0) { | 1200 | if (ret == 0) |
| 1217 | root->in_radix = 1; | 1201 | root->in_radix = 1; |
| 1218 | root->clean_orphans = 1; | 1202 | |
| 1219 | } | ||
| 1220 | spin_unlock(&fs_info->fs_roots_radix_lock); | 1203 | spin_unlock(&fs_info->fs_roots_radix_lock); |
| 1221 | radix_tree_preload_end(); | 1204 | radix_tree_preload_end(); |
| 1222 | if (ret) { | 1205 | if (ret) { |
| @@ -1374,19 +1357,11 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) | |||
| 1374 | { | 1357 | { |
| 1375 | int err; | 1358 | int err; |
| 1376 | 1359 | ||
| 1377 | bdi->name = "btrfs"; | ||
| 1378 | bdi->capabilities = BDI_CAP_MAP_COPY; | 1360 | bdi->capabilities = BDI_CAP_MAP_COPY; |
| 1379 | err = bdi_init(bdi); | 1361 | err = bdi_setup_and_register(bdi, "btrfs", BDI_CAP_MAP_COPY); |
| 1380 | if (err) | 1362 | if (err) |
| 1381 | return err; | 1363 | return err; |
| 1382 | 1364 | ||
| 1383 | err = bdi_register(bdi, NULL, "btrfs-%d", | ||
| 1384 | atomic_inc_return(&btrfs_bdi_num)); | ||
| 1385 | if (err) { | ||
| 1386 | bdi_destroy(bdi); | ||
| 1387 | return err; | ||
| 1388 | } | ||
| 1389 | |||
| 1390 | bdi->ra_pages = default_backing_dev_info.ra_pages; | 1365 | bdi->ra_pages = default_backing_dev_info.ra_pages; |
| 1391 | bdi->unplug_io_fn = btrfs_unplug_io_fn; | 1366 | bdi->unplug_io_fn = btrfs_unplug_io_fn; |
| 1392 | bdi->unplug_io_data = info; | 1367 | bdi->unplug_io_data = info; |
| @@ -1470,10 +1445,6 @@ static int cleaner_kthread(void *arg) | |||
| 1470 | struct btrfs_root *root = arg; | 1445 | struct btrfs_root *root = arg; |
| 1471 | 1446 | ||
| 1472 | do { | 1447 | do { |
| 1473 | smp_mb(); | ||
| 1474 | if (root->fs_info->closing) | ||
| 1475 | break; | ||
| 1476 | |||
| 1477 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | 1448 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); |
| 1478 | 1449 | ||
| 1479 | if (!(root->fs_info->sb->s_flags & MS_RDONLY) && | 1450 | if (!(root->fs_info->sb->s_flags & MS_RDONLY) && |
| @@ -1486,11 +1457,9 @@ static int cleaner_kthread(void *arg) | |||
| 1486 | if (freezing(current)) { | 1457 | if (freezing(current)) { |
| 1487 | refrigerator(); | 1458 | refrigerator(); |
| 1488 | } else { | 1459 | } else { |
| 1489 | smp_mb(); | ||
| 1490 | if (root->fs_info->closing) | ||
| 1491 | break; | ||
| 1492 | set_current_state(TASK_INTERRUPTIBLE); | 1460 | set_current_state(TASK_INTERRUPTIBLE); |
| 1493 | schedule(); | 1461 | if (!kthread_should_stop()) |
| 1462 | schedule(); | ||
| 1494 | __set_current_state(TASK_RUNNING); | 1463 | __set_current_state(TASK_RUNNING); |
| 1495 | } | 1464 | } |
| 1496 | } while (!kthread_should_stop()); | 1465 | } while (!kthread_should_stop()); |
| @@ -1502,36 +1471,40 @@ static int transaction_kthread(void *arg) | |||
| 1502 | struct btrfs_root *root = arg; | 1471 | struct btrfs_root *root = arg; |
| 1503 | struct btrfs_trans_handle *trans; | 1472 | struct btrfs_trans_handle *trans; |
| 1504 | struct btrfs_transaction *cur; | 1473 | struct btrfs_transaction *cur; |
| 1474 | u64 transid; | ||
| 1505 | unsigned long now; | 1475 | unsigned long now; |
| 1506 | unsigned long delay; | 1476 | unsigned long delay; |
| 1507 | int ret; | 1477 | int ret; |
| 1508 | 1478 | ||
| 1509 | do { | 1479 | do { |
| 1510 | smp_mb(); | ||
| 1511 | if (root->fs_info->closing) | ||
| 1512 | break; | ||
| 1513 | |||
| 1514 | delay = HZ * 30; | 1480 | delay = HZ * 30; |
| 1515 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | 1481 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); |
| 1516 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | 1482 | mutex_lock(&root->fs_info->transaction_kthread_mutex); |
| 1517 | 1483 | ||
| 1518 | mutex_lock(&root->fs_info->trans_mutex); | 1484 | spin_lock(&root->fs_info->new_trans_lock); |
| 1519 | cur = root->fs_info->running_transaction; | 1485 | cur = root->fs_info->running_transaction; |
| 1520 | if (!cur) { | 1486 | if (!cur) { |
| 1521 | mutex_unlock(&root->fs_info->trans_mutex); | 1487 | spin_unlock(&root->fs_info->new_trans_lock); |
| 1522 | goto sleep; | 1488 | goto sleep; |
| 1523 | } | 1489 | } |
| 1524 | 1490 | ||
| 1525 | now = get_seconds(); | 1491 | now = get_seconds(); |
| 1526 | if (now < cur->start_time || now - cur->start_time < 30) { | 1492 | if (!cur->blocked && |
| 1527 | mutex_unlock(&root->fs_info->trans_mutex); | 1493 | (now < cur->start_time || now - cur->start_time < 30)) { |
| 1494 | spin_unlock(&root->fs_info->new_trans_lock); | ||
| 1528 | delay = HZ * 5; | 1495 | delay = HZ * 5; |
| 1529 | goto sleep; | 1496 | goto sleep; |
| 1530 | } | 1497 | } |
| 1531 | mutex_unlock(&root->fs_info->trans_mutex); | 1498 | transid = cur->transid; |
| 1532 | trans = btrfs_start_transaction(root, 1); | 1499 | spin_unlock(&root->fs_info->new_trans_lock); |
| 1533 | ret = btrfs_commit_transaction(trans, root); | ||
| 1534 | 1500 | ||
| 1501 | trans = btrfs_join_transaction(root, 1); | ||
| 1502 | if (transid == trans->transid) { | ||
| 1503 | ret = btrfs_commit_transaction(trans, root); | ||
| 1504 | BUG_ON(ret); | ||
| 1505 | } else { | ||
| 1506 | btrfs_end_transaction(trans, root); | ||
| 1507 | } | ||
| 1535 | sleep: | 1508 | sleep: |
| 1536 | wake_up_process(root->fs_info->cleaner_kthread); | 1509 | wake_up_process(root->fs_info->cleaner_kthread); |
| 1537 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | 1510 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); |
| @@ -1539,10 +1512,10 @@ sleep: | |||
| 1539 | if (freezing(current)) { | 1512 | if (freezing(current)) { |
| 1540 | refrigerator(); | 1513 | refrigerator(); |
| 1541 | } else { | 1514 | } else { |
| 1542 | if (root->fs_info->closing) | ||
| 1543 | break; | ||
| 1544 | set_current_state(TASK_INTERRUPTIBLE); | 1515 | set_current_state(TASK_INTERRUPTIBLE); |
| 1545 | schedule_timeout(delay); | 1516 | if (!kthread_should_stop() && |
| 1517 | !btrfs_transaction_blocked(root->fs_info)) | ||
| 1518 | schedule_timeout(delay); | ||
| 1546 | __set_current_state(TASK_RUNNING); | 1519 | __set_current_state(TASK_RUNNING); |
| 1547 | } | 1520 | } |
| 1548 | } while (!kthread_should_stop()); | 1521 | } while (!kthread_should_stop()); |
| @@ -1629,12 +1602,18 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1629 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); | 1602 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); |
| 1630 | INIT_LIST_HEAD(&fs_info->space_info); | 1603 | INIT_LIST_HEAD(&fs_info->space_info); |
| 1631 | btrfs_mapping_init(&fs_info->mapping_tree); | 1604 | btrfs_mapping_init(&fs_info->mapping_tree); |
| 1605 | btrfs_init_block_rsv(&fs_info->global_block_rsv); | ||
| 1606 | btrfs_init_block_rsv(&fs_info->delalloc_block_rsv); | ||
| 1607 | btrfs_init_block_rsv(&fs_info->trans_block_rsv); | ||
| 1608 | btrfs_init_block_rsv(&fs_info->chunk_block_rsv); | ||
| 1609 | btrfs_init_block_rsv(&fs_info->empty_block_rsv); | ||
| 1610 | INIT_LIST_HEAD(&fs_info->durable_block_rsv_list); | ||
| 1611 | mutex_init(&fs_info->durable_block_rsv_mutex); | ||
| 1632 | atomic_set(&fs_info->nr_async_submits, 0); | 1612 | atomic_set(&fs_info->nr_async_submits, 0); |
| 1633 | atomic_set(&fs_info->async_delalloc_pages, 0); | 1613 | atomic_set(&fs_info->async_delalloc_pages, 0); |
| 1634 | atomic_set(&fs_info->async_submit_draining, 0); | 1614 | atomic_set(&fs_info->async_submit_draining, 0); |
| 1635 | atomic_set(&fs_info->nr_async_bios, 0); | 1615 | atomic_set(&fs_info->nr_async_bios, 0); |
| 1636 | fs_info->sb = sb; | 1616 | fs_info->sb = sb; |
| 1637 | fs_info->max_extent = (u64)-1; | ||
| 1638 | fs_info->max_inline = 8192 * 1024; | 1617 | fs_info->max_inline = 8192 * 1024; |
| 1639 | fs_info->metadata_ratio = 0; | 1618 | fs_info->metadata_ratio = 0; |
| 1640 | 1619 | ||
| @@ -1769,9 +1748,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1769 | min_t(u64, fs_devices->num_devices, | 1748 | min_t(u64, fs_devices->num_devices, |
| 1770 | fs_info->thread_pool_size), | 1749 | fs_info->thread_pool_size), |
| 1771 | &fs_info->generic_worker); | 1750 | &fs_info->generic_worker); |
| 1772 | btrfs_init_workers(&fs_info->enospc_workers, "enospc", | ||
| 1773 | fs_info->thread_pool_size, | ||
| 1774 | &fs_info->generic_worker); | ||
| 1775 | 1751 | ||
| 1776 | /* a higher idle thresh on the submit workers makes it much more | 1752 | /* a higher idle thresh on the submit workers makes it much more |
| 1777 | * likely that bios will be send down in a sane order to the | 1753 | * likely that bios will be send down in a sane order to the |
| @@ -1819,7 +1795,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1819 | btrfs_start_workers(&fs_info->endio_meta_workers, 1); | 1795 | btrfs_start_workers(&fs_info->endio_meta_workers, 1); |
| 1820 | btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); | 1796 | btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); |
| 1821 | btrfs_start_workers(&fs_info->endio_write_workers, 1); | 1797 | btrfs_start_workers(&fs_info->endio_write_workers, 1); |
| 1822 | btrfs_start_workers(&fs_info->enospc_workers, 1); | ||
| 1823 | 1798 | ||
| 1824 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); | 1799 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); |
| 1825 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, | 1800 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, |
| @@ -1922,17 +1897,22 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1922 | 1897 | ||
| 1923 | csum_root->track_dirty = 1; | 1898 | csum_root->track_dirty = 1; |
| 1924 | 1899 | ||
| 1925 | btrfs_read_block_groups(extent_root); | ||
| 1926 | |||
| 1927 | fs_info->generation = generation; | 1900 | fs_info->generation = generation; |
| 1928 | fs_info->last_trans_committed = generation; | 1901 | fs_info->last_trans_committed = generation; |
| 1929 | fs_info->data_alloc_profile = (u64)-1; | 1902 | fs_info->data_alloc_profile = (u64)-1; |
| 1930 | fs_info->metadata_alloc_profile = (u64)-1; | 1903 | fs_info->metadata_alloc_profile = (u64)-1; |
| 1931 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; | 1904 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; |
| 1905 | |||
| 1906 | ret = btrfs_read_block_groups(extent_root); | ||
| 1907 | if (ret) { | ||
| 1908 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); | ||
| 1909 | goto fail_block_groups; | ||
| 1910 | } | ||
| 1911 | |||
| 1932 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, | 1912 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, |
| 1933 | "btrfs-cleaner"); | 1913 | "btrfs-cleaner"); |
| 1934 | if (IS_ERR(fs_info->cleaner_kthread)) | 1914 | if (IS_ERR(fs_info->cleaner_kthread)) |
| 1935 | goto fail_csum_root; | 1915 | goto fail_block_groups; |
| 1936 | 1916 | ||
| 1937 | fs_info->transaction_kthread = kthread_run(transaction_kthread, | 1917 | fs_info->transaction_kthread = kthread_run(transaction_kthread, |
| 1938 | tree_root, | 1918 | tree_root, |
| @@ -1961,8 +1941,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1961 | btrfs_level_size(tree_root, | 1941 | btrfs_level_size(tree_root, |
| 1962 | btrfs_super_log_root_level(disk_super)); | 1942 | btrfs_super_log_root_level(disk_super)); |
| 1963 | 1943 | ||
| 1964 | log_tree_root = kzalloc(sizeof(struct btrfs_root), | 1944 | log_tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS); |
| 1965 | GFP_NOFS); | 1945 | if (!log_tree_root) { |
| 1946 | err = -ENOMEM; | ||
| 1947 | goto fail_trans_kthread; | ||
| 1948 | } | ||
| 1966 | 1949 | ||
| 1967 | __setup_root(nodesize, leafsize, sectorsize, stripesize, | 1950 | __setup_root(nodesize, leafsize, sectorsize, stripesize, |
| 1968 | log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); | 1951 | log_tree_root, fs_info, BTRFS_TREE_LOG_OBJECTID); |
| @@ -1983,6 +1966,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1983 | BUG_ON(ret); | 1966 | BUG_ON(ret); |
| 1984 | 1967 | ||
| 1985 | if (!(sb->s_flags & MS_RDONLY)) { | 1968 | if (!(sb->s_flags & MS_RDONLY)) { |
| 1969 | ret = btrfs_cleanup_fs_roots(fs_info); | ||
| 1970 | BUG_ON(ret); | ||
| 1971 | |||
| 1986 | ret = btrfs_recover_relocation(tree_root); | 1972 | ret = btrfs_recover_relocation(tree_root); |
| 1987 | if (ret < 0) { | 1973 | if (ret < 0) { |
| 1988 | printk(KERN_WARNING | 1974 | printk(KERN_WARNING |
| @@ -1999,6 +1985,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1999 | fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); | 1985 | fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); |
| 2000 | if (!fs_info->fs_root) | 1986 | if (!fs_info->fs_root) |
| 2001 | goto fail_trans_kthread; | 1987 | goto fail_trans_kthread; |
| 1988 | if (IS_ERR(fs_info->fs_root)) { | ||
| 1989 | err = PTR_ERR(fs_info->fs_root); | ||
| 1990 | goto fail_trans_kthread; | ||
| 1991 | } | ||
| 2002 | 1992 | ||
| 2003 | if (!(sb->s_flags & MS_RDONLY)) { | 1993 | if (!(sb->s_flags & MS_RDONLY)) { |
| 2004 | down_read(&fs_info->cleanup_work_sem); | 1994 | down_read(&fs_info->cleanup_work_sem); |
| @@ -2020,7 +2010,8 @@ fail_cleaner: | |||
| 2020 | filemap_write_and_wait(fs_info->btree_inode->i_mapping); | 2010 | filemap_write_and_wait(fs_info->btree_inode->i_mapping); |
| 2021 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | 2011 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); |
| 2022 | 2012 | ||
| 2023 | fail_csum_root: | 2013 | fail_block_groups: |
| 2014 | btrfs_free_block_groups(fs_info); | ||
| 2024 | free_extent_buffer(csum_root->node); | 2015 | free_extent_buffer(csum_root->node); |
| 2025 | free_extent_buffer(csum_root->commit_root); | 2016 | free_extent_buffer(csum_root->commit_root); |
| 2026 | fail_dev_root: | 2017 | fail_dev_root: |
| @@ -2045,7 +2036,6 @@ fail_sb_buffer: | |||
| 2045 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2036 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); |
| 2046 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2037 | btrfs_stop_workers(&fs_info->endio_write_workers); |
| 2047 | btrfs_stop_workers(&fs_info->submit_workers); | 2038 | btrfs_stop_workers(&fs_info->submit_workers); |
| 2048 | btrfs_stop_workers(&fs_info->enospc_workers); | ||
| 2049 | fail_iput: | 2039 | fail_iput: |
| 2050 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | 2040 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); |
| 2051 | iput(fs_info->btree_inode); | 2041 | iput(fs_info->btree_inode); |
| @@ -2410,11 +2400,11 @@ int btrfs_commit_super(struct btrfs_root *root) | |||
| 2410 | down_write(&root->fs_info->cleanup_work_sem); | 2400 | down_write(&root->fs_info->cleanup_work_sem); |
| 2411 | up_write(&root->fs_info->cleanup_work_sem); | 2401 | up_write(&root->fs_info->cleanup_work_sem); |
| 2412 | 2402 | ||
| 2413 | trans = btrfs_start_transaction(root, 1); | 2403 | trans = btrfs_join_transaction(root, 1); |
| 2414 | ret = btrfs_commit_transaction(trans, root); | 2404 | ret = btrfs_commit_transaction(trans, root); |
| 2415 | BUG_ON(ret); | 2405 | BUG_ON(ret); |
| 2416 | /* run commit again to drop the original snapshot */ | 2406 | /* run commit again to drop the original snapshot */ |
| 2417 | trans = btrfs_start_transaction(root, 1); | 2407 | trans = btrfs_join_transaction(root, 1); |
| 2418 | btrfs_commit_transaction(trans, root); | 2408 | btrfs_commit_transaction(trans, root); |
| 2419 | ret = btrfs_write_and_wait_transaction(NULL, root); | 2409 | ret = btrfs_write_and_wait_transaction(NULL, root); |
| 2420 | BUG_ON(ret); | 2410 | BUG_ON(ret); |
| @@ -2431,15 +2421,15 @@ int close_ctree(struct btrfs_root *root) | |||
| 2431 | fs_info->closing = 1; | 2421 | fs_info->closing = 1; |
| 2432 | smp_mb(); | 2422 | smp_mb(); |
| 2433 | 2423 | ||
| 2434 | kthread_stop(root->fs_info->transaction_kthread); | ||
| 2435 | kthread_stop(root->fs_info->cleaner_kthread); | ||
| 2436 | |||
| 2437 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 2424 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { |
| 2438 | ret = btrfs_commit_super(root); | 2425 | ret = btrfs_commit_super(root); |
| 2439 | if (ret) | 2426 | if (ret) |
| 2440 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | 2427 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); |
| 2441 | } | 2428 | } |
| 2442 | 2429 | ||
| 2430 | kthread_stop(root->fs_info->transaction_kthread); | ||
| 2431 | kthread_stop(root->fs_info->cleaner_kthread); | ||
| 2432 | |||
| 2443 | fs_info->closing = 2; | 2433 | fs_info->closing = 2; |
| 2444 | smp_mb(); | 2434 | smp_mb(); |
| 2445 | 2435 | ||
| @@ -2478,7 +2468,6 @@ int close_ctree(struct btrfs_root *root) | |||
| 2478 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2468 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); |
| 2479 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2469 | btrfs_stop_workers(&fs_info->endio_write_workers); |
| 2480 | btrfs_stop_workers(&fs_info->submit_workers); | 2470 | btrfs_stop_workers(&fs_info->submit_workers); |
| 2481 | btrfs_stop_workers(&fs_info->enospc_workers); | ||
| 2482 | 2471 | ||
| 2483 | btrfs_close_devices(fs_info->fs_devices); | 2472 | btrfs_close_devices(fs_info->fs_devices); |
| 2484 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2473 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index c958ecbc1916..88e825a0bf21 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
| @@ -87,7 +87,7 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, | |||
| 87 | int metadata); | 87 | int metadata); |
| 88 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | 88 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, |
| 89 | int rw, struct bio *bio, int mirror_num, | 89 | int rw, struct bio *bio, int mirror_num, |
| 90 | unsigned long bio_flags, | 90 | unsigned long bio_flags, u64 bio_offset, |
| 91 | extent_submit_bio_hook_t *submit_bio_start, | 91 | extent_submit_bio_hook_t *submit_bio_start, |
| 92 | extent_submit_bio_hook_t *submit_bio_done); | 92 | extent_submit_bio_hook_t *submit_bio_done); |
| 93 | 93 | ||
| @@ -95,8 +95,6 @@ int btrfs_congested_async(struct btrfs_fs_info *info, int iodone); | |||
| 95 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); | 95 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); |
| 96 | int btrfs_write_tree_block(struct extent_buffer *buf); | 96 | int btrfs_write_tree_block(struct extent_buffer *buf); |
| 97 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf); | 97 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf); |
| 98 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | ||
| 99 | struct btrfs_fs_info *fs_info); | ||
| 100 | int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, | 98 | int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, |
| 101 | struct btrfs_fs_info *fs_info); | 99 | struct btrfs_fs_info *fs_info); |
| 102 | int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | 100 | int btrfs_add_log_tree(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1727b26fb194..32d094002a57 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -22,6 +22,7 @@ | |||
| 22 | #include <linux/sort.h> | 22 | #include <linux/sort.h> |
| 23 | #include <linux/rcupdate.h> | 23 | #include <linux/rcupdate.h> |
| 24 | #include <linux/kthread.h> | 24 | #include <linux/kthread.h> |
| 25 | #include <linux/slab.h> | ||
| 25 | #include "compat.h" | 26 | #include "compat.h" |
| 26 | #include "hash.h" | 27 | #include "hash.h" |
| 27 | #include "ctree.h" | 28 | #include "ctree.h" |
| @@ -34,10 +35,9 @@ | |||
| 34 | 35 | ||
| 35 | static int update_block_group(struct btrfs_trans_handle *trans, | 36 | static int update_block_group(struct btrfs_trans_handle *trans, |
| 36 | struct btrfs_root *root, | 37 | struct btrfs_root *root, |
| 37 | u64 bytenr, u64 num_bytes, int alloc, | 38 | u64 bytenr, u64 num_bytes, int alloc); |
| 38 | int mark_free); | 39 | static int update_reserved_bytes(struct btrfs_block_group_cache *cache, |
| 39 | static int update_reserved_extents(struct btrfs_block_group_cache *cache, | 40 | u64 num_bytes, int reserve, int sinfo); |
| 40 | u64 num_bytes, int reserve); | ||
| 41 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 41 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
| 42 | struct btrfs_root *root, | 42 | struct btrfs_root *root, |
| 43 | u64 bytenr, u64 num_bytes, u64 parent, | 43 | u64 bytenr, u64 num_bytes, u64 parent, |
| @@ -60,12 +60,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
| 60 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 60 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
| 61 | struct btrfs_root *extent_root, u64 alloc_bytes, | 61 | struct btrfs_root *extent_root, u64 alloc_bytes, |
| 62 | u64 flags, int force); | 62 | u64 flags, int force); |
| 63 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | ||
| 64 | struct btrfs_root *root, | ||
| 65 | struct btrfs_path *path, | ||
| 66 | u64 bytenr, u64 num_bytes, | ||
| 67 | int is_data, int reserved, | ||
| 68 | struct extent_buffer **must_clean); | ||
| 69 | static int find_next_key(struct btrfs_path *path, int level, | 63 | static int find_next_key(struct btrfs_path *path, int level, |
| 70 | struct btrfs_key *key); | 64 | struct btrfs_key *key); |
| 71 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | 65 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, |
| @@ -90,8 +84,12 @@ void btrfs_get_block_group(struct btrfs_block_group_cache *cache) | |||
| 90 | 84 | ||
| 91 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache) | 85 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache) |
| 92 | { | 86 | { |
| 93 | if (atomic_dec_and_test(&cache->count)) | 87 | if (atomic_dec_and_test(&cache->count)) { |
| 88 | WARN_ON(cache->pinned > 0); | ||
| 89 | WARN_ON(cache->reserved > 0); | ||
| 90 | WARN_ON(cache->reserved_pinned > 0); | ||
| 94 | kfree(cache); | 91 | kfree(cache); |
| 92 | } | ||
| 95 | } | 93 | } |
| 96 | 94 | ||
| 97 | /* | 95 | /* |
| @@ -318,7 +316,7 @@ static int caching_kthread(void *data) | |||
| 318 | 316 | ||
| 319 | exclude_super_stripes(extent_root, block_group); | 317 | exclude_super_stripes(extent_root, block_group); |
| 320 | spin_lock(&block_group->space_info->lock); | 318 | spin_lock(&block_group->space_info->lock); |
| 321 | block_group->space_info->bytes_super += block_group->bytes_super; | 319 | block_group->space_info->bytes_readonly += block_group->bytes_super; |
| 322 | spin_unlock(&block_group->space_info->lock); | 320 | spin_unlock(&block_group->space_info->lock); |
| 323 | 321 | ||
| 324 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | 322 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); |
| @@ -506,6 +504,9 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, | |||
| 506 | struct list_head *head = &info->space_info; | 504 | struct list_head *head = &info->space_info; |
| 507 | struct btrfs_space_info *found; | 505 | struct btrfs_space_info *found; |
| 508 | 506 | ||
| 507 | flags &= BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_SYSTEM | | ||
| 508 | BTRFS_BLOCK_GROUP_METADATA; | ||
| 509 | |||
| 509 | rcu_read_lock(); | 510 | rcu_read_lock(); |
| 510 | list_for_each_entry_rcu(found, head, list) { | 511 | list_for_each_entry_rcu(found, head, list) { |
| 511 | if (found->flags == flags) { | 512 | if (found->flags == flags) { |
| @@ -609,6 +610,113 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) | |||
| 609 | } | 610 | } |
| 610 | 611 | ||
| 611 | /* | 612 | /* |
| 613 | * helper function to lookup reference count and flags of extent. | ||
| 614 | * | ||
| 615 | * the head node for delayed ref is used to store the sum of all the | ||
| 616 | * reference count modifications queued up in the rbtree. the head | ||
| 617 | * node may also store the extent flags to set. This way you can check | ||
| 618 | * to see what the reference count and extent flags would be if all of | ||
| 619 | * the delayed refs are not processed. | ||
| 620 | */ | ||
| 621 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | ||
| 622 | struct btrfs_root *root, u64 bytenr, | ||
| 623 | u64 num_bytes, u64 *refs, u64 *flags) | ||
| 624 | { | ||
| 625 | struct btrfs_delayed_ref_head *head; | ||
| 626 | struct btrfs_delayed_ref_root *delayed_refs; | ||
| 627 | struct btrfs_path *path; | ||
| 628 | struct btrfs_extent_item *ei; | ||
| 629 | struct extent_buffer *leaf; | ||
| 630 | struct btrfs_key key; | ||
| 631 | u32 item_size; | ||
| 632 | u64 num_refs; | ||
| 633 | u64 extent_flags; | ||
| 634 | int ret; | ||
| 635 | |||
| 636 | path = btrfs_alloc_path(); | ||
| 637 | if (!path) | ||
| 638 | return -ENOMEM; | ||
| 639 | |||
| 640 | key.objectid = bytenr; | ||
| 641 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 642 | key.offset = num_bytes; | ||
| 643 | if (!trans) { | ||
| 644 | path->skip_locking = 1; | ||
| 645 | path->search_commit_root = 1; | ||
| 646 | } | ||
| 647 | again: | ||
| 648 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, | ||
| 649 | &key, path, 0, 0); | ||
| 650 | if (ret < 0) | ||
| 651 | goto out_free; | ||
| 652 | |||
| 653 | if (ret == 0) { | ||
| 654 | leaf = path->nodes[0]; | ||
| 655 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
| 656 | if (item_size >= sizeof(*ei)) { | ||
| 657 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
| 658 | struct btrfs_extent_item); | ||
| 659 | num_refs = btrfs_extent_refs(leaf, ei); | ||
| 660 | extent_flags = btrfs_extent_flags(leaf, ei); | ||
| 661 | } else { | ||
| 662 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
| 663 | struct btrfs_extent_item_v0 *ei0; | ||
| 664 | BUG_ON(item_size != sizeof(*ei0)); | ||
| 665 | ei0 = btrfs_item_ptr(leaf, path->slots[0], | ||
| 666 | struct btrfs_extent_item_v0); | ||
| 667 | num_refs = btrfs_extent_refs_v0(leaf, ei0); | ||
| 668 | /* FIXME: this isn't correct for data */ | ||
| 669 | extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
| 670 | #else | ||
| 671 | BUG(); | ||
| 672 | #endif | ||
| 673 | } | ||
| 674 | BUG_ON(num_refs == 0); | ||
| 675 | } else { | ||
| 676 | num_refs = 0; | ||
| 677 | extent_flags = 0; | ||
| 678 | ret = 0; | ||
| 679 | } | ||
| 680 | |||
| 681 | if (!trans) | ||
| 682 | goto out; | ||
| 683 | |||
| 684 | delayed_refs = &trans->transaction->delayed_refs; | ||
| 685 | spin_lock(&delayed_refs->lock); | ||
| 686 | head = btrfs_find_delayed_ref_head(trans, bytenr); | ||
| 687 | if (head) { | ||
| 688 | if (!mutex_trylock(&head->mutex)) { | ||
| 689 | atomic_inc(&head->node.refs); | ||
| 690 | spin_unlock(&delayed_refs->lock); | ||
| 691 | |||
| 692 | btrfs_release_path(root->fs_info->extent_root, path); | ||
| 693 | |||
| 694 | mutex_lock(&head->mutex); | ||
| 695 | mutex_unlock(&head->mutex); | ||
| 696 | btrfs_put_delayed_ref(&head->node); | ||
| 697 | goto again; | ||
| 698 | } | ||
| 699 | if (head->extent_op && head->extent_op->update_flags) | ||
| 700 | extent_flags |= head->extent_op->flags_to_set; | ||
| 701 | else | ||
| 702 | BUG_ON(num_refs == 0); | ||
| 703 | |||
| 704 | num_refs += head->node.ref_mod; | ||
| 705 | mutex_unlock(&head->mutex); | ||
| 706 | } | ||
| 707 | spin_unlock(&delayed_refs->lock); | ||
| 708 | out: | ||
| 709 | WARN_ON(num_refs == 0); | ||
| 710 | if (refs) | ||
| 711 | *refs = num_refs; | ||
| 712 | if (flags) | ||
| 713 | *flags = extent_flags; | ||
| 714 | out_free: | ||
| 715 | btrfs_free_path(path); | ||
| 716 | return ret; | ||
| 717 | } | ||
| 718 | |||
| 719 | /* | ||
| 612 | * Back reference rules. Back refs have three main goals: | 720 | * Back reference rules. Back refs have three main goals: |
| 613 | * | 721 | * |
| 614 | * 1) differentiate between all holders of references to an extent so that | 722 | * 1) differentiate between all holders of references to an extent so that |
| @@ -1588,7 +1696,7 @@ static void btrfs_issue_discard(struct block_device *bdev, | |||
| 1588 | u64 start, u64 len) | 1696 | u64 start, u64 len) |
| 1589 | { | 1697 | { |
| 1590 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, | 1698 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, |
| 1591 | DISCARD_FL_BARRIER); | 1699 | BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); |
| 1592 | } | 1700 | } |
| 1593 | 1701 | ||
| 1594 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | 1702 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, |
| @@ -1870,7 +1978,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, | |||
| 1870 | return ret; | 1978 | return ret; |
| 1871 | } | 1979 | } |
| 1872 | 1980 | ||
| 1873 | |||
| 1874 | /* helper function to actually process a single delayed ref entry */ | 1981 | /* helper function to actually process a single delayed ref entry */ |
| 1875 | static int run_one_delayed_ref(struct btrfs_trans_handle *trans, | 1982 | static int run_one_delayed_ref(struct btrfs_trans_handle *trans, |
| 1876 | struct btrfs_root *root, | 1983 | struct btrfs_root *root, |
| @@ -1890,32 +1997,14 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, | |||
| 1890 | BUG_ON(extent_op); | 1997 | BUG_ON(extent_op); |
| 1891 | head = btrfs_delayed_node_to_head(node); | 1998 | head = btrfs_delayed_node_to_head(node); |
| 1892 | if (insert_reserved) { | 1999 | if (insert_reserved) { |
| 1893 | int mark_free = 0; | 2000 | btrfs_pin_extent(root, node->bytenr, |
| 1894 | struct extent_buffer *must_clean = NULL; | 2001 | node->num_bytes, 1); |
| 1895 | |||
| 1896 | ret = pin_down_bytes(trans, root, NULL, | ||
| 1897 | node->bytenr, node->num_bytes, | ||
| 1898 | head->is_data, 1, &must_clean); | ||
| 1899 | if (ret > 0) | ||
| 1900 | mark_free = 1; | ||
| 1901 | |||
| 1902 | if (must_clean) { | ||
| 1903 | clean_tree_block(NULL, root, must_clean); | ||
| 1904 | btrfs_tree_unlock(must_clean); | ||
| 1905 | free_extent_buffer(must_clean); | ||
| 1906 | } | ||
| 1907 | if (head->is_data) { | 2002 | if (head->is_data) { |
| 1908 | ret = btrfs_del_csums(trans, root, | 2003 | ret = btrfs_del_csums(trans, root, |
| 1909 | node->bytenr, | 2004 | node->bytenr, |
| 1910 | node->num_bytes); | 2005 | node->num_bytes); |
| 1911 | BUG_ON(ret); | 2006 | BUG_ON(ret); |
| 1912 | } | 2007 | } |
| 1913 | if (mark_free) { | ||
| 1914 | ret = btrfs_free_reserved_extent(root, | ||
| 1915 | node->bytenr, | ||
| 1916 | node->num_bytes); | ||
| 1917 | BUG_ON(ret); | ||
| 1918 | } | ||
| 1919 | } | 2008 | } |
| 1920 | mutex_unlock(&head->mutex); | 2009 | mutex_unlock(&head->mutex); |
| 1921 | return 0; | 2010 | return 0; |
| @@ -2346,6 +2435,8 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, | |||
| 2346 | ret = 0; | 2435 | ret = 0; |
| 2347 | out: | 2436 | out: |
| 2348 | btrfs_free_path(path); | 2437 | btrfs_free_path(path); |
| 2438 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) | ||
| 2439 | WARN_ON(ret > 0); | ||
| 2349 | return ret; | 2440 | return ret; |
| 2350 | } | 2441 | } |
| 2351 | 2442 | ||
| @@ -2659,12 +2750,21 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
| 2659 | struct btrfs_space_info **space_info) | 2750 | struct btrfs_space_info **space_info) |
| 2660 | { | 2751 | { |
| 2661 | struct btrfs_space_info *found; | 2752 | struct btrfs_space_info *found; |
| 2753 | int i; | ||
| 2754 | int factor; | ||
| 2755 | |||
| 2756 | if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | | ||
| 2757 | BTRFS_BLOCK_GROUP_RAID10)) | ||
| 2758 | factor = 2; | ||
| 2759 | else | ||
| 2760 | factor = 1; | ||
| 2662 | 2761 | ||
| 2663 | found = __find_space_info(info, flags); | 2762 | found = __find_space_info(info, flags); |
| 2664 | if (found) { | 2763 | if (found) { |
| 2665 | spin_lock(&found->lock); | 2764 | spin_lock(&found->lock); |
| 2666 | found->total_bytes += total_bytes; | 2765 | found->total_bytes += total_bytes; |
| 2667 | found->bytes_used += bytes_used; | 2766 | found->bytes_used += bytes_used; |
| 2767 | found->disk_used += bytes_used * factor; | ||
| 2668 | found->full = 0; | 2768 | found->full = 0; |
| 2669 | spin_unlock(&found->lock); | 2769 | spin_unlock(&found->lock); |
| 2670 | *space_info = found; | 2770 | *space_info = found; |
| @@ -2674,16 +2774,20 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
| 2674 | if (!found) | 2774 | if (!found) |
| 2675 | return -ENOMEM; | 2775 | return -ENOMEM; |
| 2676 | 2776 | ||
| 2677 | INIT_LIST_HEAD(&found->block_groups); | 2777 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) |
| 2778 | INIT_LIST_HEAD(&found->block_groups[i]); | ||
| 2678 | init_rwsem(&found->groups_sem); | 2779 | init_rwsem(&found->groups_sem); |
| 2679 | spin_lock_init(&found->lock); | 2780 | spin_lock_init(&found->lock); |
| 2680 | found->flags = flags; | 2781 | found->flags = flags & (BTRFS_BLOCK_GROUP_DATA | |
| 2782 | BTRFS_BLOCK_GROUP_SYSTEM | | ||
| 2783 | BTRFS_BLOCK_GROUP_METADATA); | ||
| 2681 | found->total_bytes = total_bytes; | 2784 | found->total_bytes = total_bytes; |
| 2682 | found->bytes_used = bytes_used; | 2785 | found->bytes_used = bytes_used; |
| 2786 | found->disk_used = bytes_used * factor; | ||
| 2683 | found->bytes_pinned = 0; | 2787 | found->bytes_pinned = 0; |
| 2684 | found->bytes_reserved = 0; | 2788 | found->bytes_reserved = 0; |
| 2685 | found->bytes_readonly = 0; | 2789 | found->bytes_readonly = 0; |
| 2686 | found->bytes_delalloc = 0; | 2790 | found->bytes_may_use = 0; |
| 2687 | found->full = 0; | 2791 | found->full = 0; |
| 2688 | found->force_alloc = 0; | 2792 | found->force_alloc = 0; |
| 2689 | *space_info = found; | 2793 | *space_info = found; |
| @@ -2708,19 +2812,6 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) | |||
| 2708 | } | 2812 | } |
| 2709 | } | 2813 | } |
| 2710 | 2814 | ||
| 2711 | static void set_block_group_readonly(struct btrfs_block_group_cache *cache) | ||
| 2712 | { | ||
| 2713 | spin_lock(&cache->space_info->lock); | ||
| 2714 | spin_lock(&cache->lock); | ||
| 2715 | if (!cache->ro) { | ||
| 2716 | cache->space_info->bytes_readonly += cache->key.offset - | ||
| 2717 | btrfs_block_group_used(&cache->item); | ||
| 2718 | cache->ro = 1; | ||
| 2719 | } | ||
| 2720 | spin_unlock(&cache->lock); | ||
| 2721 | spin_unlock(&cache->space_info->lock); | ||
| 2722 | } | ||
| 2723 | |||
| 2724 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | 2815 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) |
| 2725 | { | 2816 | { |
| 2726 | u64 num_devices = root->fs_info->fs_devices->rw_devices; | 2817 | u64 num_devices = root->fs_info->fs_devices->rw_devices; |
| @@ -2749,492 +2840,49 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | |||
| 2749 | return flags; | 2840 | return flags; |
| 2750 | } | 2841 | } |
| 2751 | 2842 | ||
| 2752 | static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data) | 2843 | static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) |
| 2753 | { | 2844 | { |
| 2754 | struct btrfs_fs_info *info = root->fs_info; | 2845 | if (flags & BTRFS_BLOCK_GROUP_DATA) |
| 2755 | u64 alloc_profile; | 2846 | flags |= root->fs_info->avail_data_alloc_bits & |
| 2756 | 2847 | root->fs_info->data_alloc_profile; | |
| 2757 | if (data) { | 2848 | else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) |
| 2758 | alloc_profile = info->avail_data_alloc_bits & | 2849 | flags |= root->fs_info->avail_system_alloc_bits & |
| 2759 | info->data_alloc_profile; | 2850 | root->fs_info->system_alloc_profile; |
| 2760 | data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; | 2851 | else if (flags & BTRFS_BLOCK_GROUP_METADATA) |
| 2761 | } else if (root == root->fs_info->chunk_root) { | 2852 | flags |= root->fs_info->avail_metadata_alloc_bits & |
| 2762 | alloc_profile = info->avail_system_alloc_bits & | 2853 | root->fs_info->metadata_alloc_profile; |
| 2763 | info->system_alloc_profile; | 2854 | return btrfs_reduce_alloc_profile(root, flags); |
| 2764 | data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; | ||
| 2765 | } else { | ||
| 2766 | alloc_profile = info->avail_metadata_alloc_bits & | ||
| 2767 | info->metadata_alloc_profile; | ||
| 2768 | data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; | ||
| 2769 | } | ||
| 2770 | |||
| 2771 | return btrfs_reduce_alloc_profile(root, data); | ||
| 2772 | } | 2855 | } |
| 2773 | 2856 | ||
| 2774 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) | 2857 | static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) |
| 2775 | { | 2858 | { |
| 2776 | u64 alloc_target; | 2859 | u64 flags; |
| 2777 | |||
| 2778 | alloc_target = btrfs_get_alloc_profile(root, 1); | ||
| 2779 | BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, | ||
| 2780 | alloc_target); | ||
| 2781 | } | ||
| 2782 | |||
| 2783 | static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items) | ||
| 2784 | { | ||
| 2785 | u64 num_bytes; | ||
| 2786 | int level; | ||
| 2787 | |||
| 2788 | level = BTRFS_MAX_LEVEL - 2; | ||
| 2789 | /* | ||
| 2790 | * NOTE: these calculations are absolutely the worst possible case. | ||
| 2791 | * This assumes that _every_ item we insert will require a new leaf, and | ||
| 2792 | * that the tree has grown to its maximum level size. | ||
| 2793 | */ | ||
| 2794 | |||
| 2795 | /* | ||
| 2796 | * for every item we insert we could insert both an extent item and a | ||
| 2797 | * extent ref item. Then for ever item we insert, we will need to cow | ||
| 2798 | * both the original leaf, plus the leaf to the left and right of it. | ||
| 2799 | * | ||
| 2800 | * Unless we are talking about the extent root, then we just want the | ||
| 2801 | * number of items * 2, since we just need the extent item plus its ref. | ||
| 2802 | */ | ||
| 2803 | if (root == root->fs_info->extent_root) | ||
| 2804 | num_bytes = num_items * 2; | ||
| 2805 | else | ||
| 2806 | num_bytes = (num_items + (2 * num_items)) * 3; | ||
| 2807 | |||
| 2808 | /* | ||
| 2809 | * num_bytes is total number of leaves we could need times the leaf | ||
| 2810 | * size, and then for every leaf we could end up cow'ing 2 nodes per | ||
| 2811 | * level, down to the leaf level. | ||
| 2812 | */ | ||
| 2813 | num_bytes = (num_bytes * root->leafsize) + | ||
| 2814 | (num_bytes * (level * 2)) * root->nodesize; | ||
| 2815 | |||
| 2816 | return num_bytes; | ||
| 2817 | } | ||
| 2818 | |||
| 2819 | /* | ||
| 2820 | * Unreserve metadata space for delalloc. If we have less reserved credits than | ||
| 2821 | * we have extents, this function does nothing. | ||
| 2822 | */ | ||
| 2823 | int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, | ||
| 2824 | struct inode *inode, int num_items) | ||
| 2825 | { | ||
| 2826 | struct btrfs_fs_info *info = root->fs_info; | ||
| 2827 | struct btrfs_space_info *meta_sinfo; | ||
| 2828 | u64 num_bytes; | ||
| 2829 | u64 alloc_target; | ||
| 2830 | bool bug = false; | ||
| 2831 | |||
| 2832 | /* get the space info for where the metadata will live */ | ||
| 2833 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
| 2834 | meta_sinfo = __find_space_info(info, alloc_target); | ||
| 2835 | |||
| 2836 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, | ||
| 2837 | num_items); | ||
| 2838 | |||
| 2839 | spin_lock(&meta_sinfo->lock); | ||
| 2840 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
| 2841 | if (BTRFS_I(inode)->reserved_extents <= | ||
| 2842 | BTRFS_I(inode)->outstanding_extents) { | ||
| 2843 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 2844 | spin_unlock(&meta_sinfo->lock); | ||
| 2845 | return 0; | ||
| 2846 | } | ||
| 2847 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 2848 | |||
| 2849 | BTRFS_I(inode)->reserved_extents--; | ||
| 2850 | BUG_ON(BTRFS_I(inode)->reserved_extents < 0); | ||
| 2851 | |||
| 2852 | if (meta_sinfo->bytes_delalloc < num_bytes) { | ||
| 2853 | bug = true; | ||
| 2854 | meta_sinfo->bytes_delalloc = 0; | ||
| 2855 | } else { | ||
| 2856 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
| 2857 | } | ||
| 2858 | spin_unlock(&meta_sinfo->lock); | ||
| 2859 | |||
| 2860 | BUG_ON(bug); | ||
| 2861 | |||
| 2862 | return 0; | ||
| 2863 | } | ||
| 2864 | |||
| 2865 | static void check_force_delalloc(struct btrfs_space_info *meta_sinfo) | ||
| 2866 | { | ||
| 2867 | u64 thresh; | ||
| 2868 | |||
| 2869 | thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
| 2870 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
| 2871 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
| 2872 | meta_sinfo->bytes_may_use; | ||
| 2873 | 2860 | ||
| 2874 | thresh = meta_sinfo->total_bytes - thresh; | 2861 | if (data) |
| 2875 | thresh *= 80; | 2862 | flags = BTRFS_BLOCK_GROUP_DATA; |
| 2876 | do_div(thresh, 100); | 2863 | else if (root == root->fs_info->chunk_root) |
| 2877 | if (thresh <= meta_sinfo->bytes_delalloc) | 2864 | flags = BTRFS_BLOCK_GROUP_SYSTEM; |
| 2878 | meta_sinfo->force_delalloc = 1; | ||
| 2879 | else | 2865 | else |
| 2880 | meta_sinfo->force_delalloc = 0; | 2866 | flags = BTRFS_BLOCK_GROUP_METADATA; |
| 2881 | } | ||
| 2882 | |||
| 2883 | struct async_flush { | ||
| 2884 | struct btrfs_root *root; | ||
| 2885 | struct btrfs_space_info *info; | ||
| 2886 | struct btrfs_work work; | ||
| 2887 | }; | ||
| 2888 | |||
| 2889 | static noinline void flush_delalloc_async(struct btrfs_work *work) | ||
| 2890 | { | ||
| 2891 | struct async_flush *async; | ||
| 2892 | struct btrfs_root *root; | ||
| 2893 | struct btrfs_space_info *info; | ||
| 2894 | |||
| 2895 | async = container_of(work, struct async_flush, work); | ||
| 2896 | root = async->root; | ||
| 2897 | info = async->info; | ||
| 2898 | |||
| 2899 | btrfs_start_delalloc_inodes(root, 0); | ||
| 2900 | wake_up(&info->flush_wait); | ||
| 2901 | btrfs_wait_ordered_extents(root, 0, 0); | ||
| 2902 | |||
| 2903 | spin_lock(&info->lock); | ||
| 2904 | info->flushing = 0; | ||
| 2905 | spin_unlock(&info->lock); | ||
| 2906 | wake_up(&info->flush_wait); | ||
| 2907 | |||
| 2908 | kfree(async); | ||
| 2909 | } | ||
| 2910 | |||
| 2911 | static void wait_on_flush(struct btrfs_space_info *info) | ||
| 2912 | { | ||
| 2913 | DEFINE_WAIT(wait); | ||
| 2914 | u64 used; | ||
| 2915 | |||
| 2916 | while (1) { | ||
| 2917 | prepare_to_wait(&info->flush_wait, &wait, | ||
| 2918 | TASK_UNINTERRUPTIBLE); | ||
| 2919 | spin_lock(&info->lock); | ||
| 2920 | if (!info->flushing) { | ||
| 2921 | spin_unlock(&info->lock); | ||
| 2922 | break; | ||
| 2923 | } | ||
| 2924 | |||
| 2925 | used = info->bytes_used + info->bytes_reserved + | ||
| 2926 | info->bytes_pinned + info->bytes_readonly + | ||
| 2927 | info->bytes_super + info->bytes_root + | ||
| 2928 | info->bytes_may_use + info->bytes_delalloc; | ||
| 2929 | if (used < info->total_bytes) { | ||
| 2930 | spin_unlock(&info->lock); | ||
| 2931 | break; | ||
| 2932 | } | ||
| 2933 | spin_unlock(&info->lock); | ||
| 2934 | schedule(); | ||
| 2935 | } | ||
| 2936 | finish_wait(&info->flush_wait, &wait); | ||
| 2937 | } | ||
| 2938 | |||
| 2939 | static void flush_delalloc(struct btrfs_root *root, | ||
| 2940 | struct btrfs_space_info *info) | ||
| 2941 | { | ||
| 2942 | struct async_flush *async; | ||
| 2943 | bool wait = false; | ||
| 2944 | |||
| 2945 | spin_lock(&info->lock); | ||
| 2946 | |||
| 2947 | if (!info->flushing) { | ||
| 2948 | info->flushing = 1; | ||
| 2949 | init_waitqueue_head(&info->flush_wait); | ||
| 2950 | } else { | ||
| 2951 | wait = true; | ||
| 2952 | } | ||
| 2953 | |||
| 2954 | spin_unlock(&info->lock); | ||
| 2955 | |||
| 2956 | if (wait) { | ||
| 2957 | wait_on_flush(info); | ||
| 2958 | return; | ||
| 2959 | } | ||
| 2960 | |||
| 2961 | async = kzalloc(sizeof(*async), GFP_NOFS); | ||
| 2962 | if (!async) | ||
| 2963 | goto flush; | ||
| 2964 | |||
| 2965 | async->root = root; | ||
| 2966 | async->info = info; | ||
| 2967 | async->work.func = flush_delalloc_async; | ||
| 2968 | |||
| 2969 | btrfs_queue_worker(&root->fs_info->enospc_workers, | ||
| 2970 | &async->work); | ||
| 2971 | wait_on_flush(info); | ||
| 2972 | return; | ||
| 2973 | |||
| 2974 | flush: | ||
| 2975 | btrfs_start_delalloc_inodes(root, 0); | ||
| 2976 | btrfs_wait_ordered_extents(root, 0, 0); | ||
| 2977 | |||
| 2978 | spin_lock(&info->lock); | ||
| 2979 | info->flushing = 0; | ||
| 2980 | spin_unlock(&info->lock); | ||
| 2981 | wake_up(&info->flush_wait); | ||
| 2982 | } | ||
| 2983 | |||
| 2984 | static int maybe_allocate_chunk(struct btrfs_root *root, | ||
| 2985 | struct btrfs_space_info *info) | ||
| 2986 | { | ||
| 2987 | struct btrfs_super_block *disk_super = &root->fs_info->super_copy; | ||
| 2988 | struct btrfs_trans_handle *trans; | ||
| 2989 | bool wait = false; | ||
| 2990 | int ret = 0; | ||
| 2991 | u64 min_metadata; | ||
| 2992 | u64 free_space; | ||
| 2993 | |||
| 2994 | free_space = btrfs_super_total_bytes(disk_super); | ||
| 2995 | /* | ||
| 2996 | * we allow the metadata to grow to a max of either 10gb or 5% of the | ||
| 2997 | * space in the volume. | ||
| 2998 | */ | ||
| 2999 | min_metadata = min((u64)10 * 1024 * 1024 * 1024, | ||
| 3000 | div64_u64(free_space * 5, 100)); | ||
| 3001 | if (info->total_bytes >= min_metadata) { | ||
| 3002 | spin_unlock(&info->lock); | ||
| 3003 | return 0; | ||
| 3004 | } | ||
| 3005 | |||
| 3006 | if (info->full) { | ||
| 3007 | spin_unlock(&info->lock); | ||
| 3008 | return 0; | ||
| 3009 | } | ||
| 3010 | |||
| 3011 | if (!info->allocating_chunk) { | ||
| 3012 | info->force_alloc = 1; | ||
| 3013 | info->allocating_chunk = 1; | ||
| 3014 | init_waitqueue_head(&info->allocate_wait); | ||
| 3015 | } else { | ||
| 3016 | wait = true; | ||
| 3017 | } | ||
| 3018 | |||
| 3019 | spin_unlock(&info->lock); | ||
| 3020 | |||
| 3021 | if (wait) { | ||
| 3022 | wait_event(info->allocate_wait, | ||
| 3023 | !info->allocating_chunk); | ||
| 3024 | return 1; | ||
| 3025 | } | ||
| 3026 | |||
| 3027 | trans = btrfs_start_transaction(root, 1); | ||
| 3028 | if (!trans) { | ||
| 3029 | ret = -ENOMEM; | ||
| 3030 | goto out; | ||
| 3031 | } | ||
| 3032 | |||
| 3033 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
| 3034 | 4096 + 2 * 1024 * 1024, | ||
| 3035 | info->flags, 0); | ||
| 3036 | btrfs_end_transaction(trans, root); | ||
| 3037 | if (ret) | ||
| 3038 | goto out; | ||
| 3039 | out: | ||
| 3040 | spin_lock(&info->lock); | ||
| 3041 | info->allocating_chunk = 0; | ||
| 3042 | spin_unlock(&info->lock); | ||
| 3043 | wake_up(&info->allocate_wait); | ||
| 3044 | |||
| 3045 | if (ret) | ||
| 3046 | return 0; | ||
| 3047 | return 1; | ||
| 3048 | } | ||
| 3049 | |||
| 3050 | /* | ||
| 3051 | * Reserve metadata space for delalloc. | ||
| 3052 | */ | ||
| 3053 | int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, | ||
| 3054 | struct inode *inode, int num_items) | ||
| 3055 | { | ||
| 3056 | struct btrfs_fs_info *info = root->fs_info; | ||
| 3057 | struct btrfs_space_info *meta_sinfo; | ||
| 3058 | u64 num_bytes; | ||
| 3059 | u64 used; | ||
| 3060 | u64 alloc_target; | ||
| 3061 | int flushed = 0; | ||
| 3062 | int force_delalloc; | ||
| 3063 | |||
| 3064 | /* get the space info for where the metadata will live */ | ||
| 3065 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
| 3066 | meta_sinfo = __find_space_info(info, alloc_target); | ||
| 3067 | |||
| 3068 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, | ||
| 3069 | num_items); | ||
| 3070 | again: | ||
| 3071 | spin_lock(&meta_sinfo->lock); | ||
| 3072 | |||
| 3073 | force_delalloc = meta_sinfo->force_delalloc; | ||
| 3074 | |||
| 3075 | if (unlikely(!meta_sinfo->bytes_root)) | ||
| 3076 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
| 3077 | |||
| 3078 | if (!flushed) | ||
| 3079 | meta_sinfo->bytes_delalloc += num_bytes; | ||
| 3080 | |||
| 3081 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
| 3082 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
| 3083 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
| 3084 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
| 3085 | |||
| 3086 | if (used > meta_sinfo->total_bytes) { | ||
| 3087 | flushed++; | ||
| 3088 | |||
| 3089 | if (flushed == 1) { | ||
| 3090 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
| 3091 | goto again; | ||
| 3092 | flushed++; | ||
| 3093 | } else { | ||
| 3094 | spin_unlock(&meta_sinfo->lock); | ||
| 3095 | } | ||
| 3096 | |||
| 3097 | if (flushed == 2) { | ||
| 3098 | filemap_flush(inode->i_mapping); | ||
| 3099 | goto again; | ||
| 3100 | } else if (flushed == 3) { | ||
| 3101 | flush_delalloc(root, meta_sinfo); | ||
| 3102 | goto again; | ||
| 3103 | } | ||
| 3104 | spin_lock(&meta_sinfo->lock); | ||
| 3105 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
| 3106 | spin_unlock(&meta_sinfo->lock); | ||
| 3107 | printk(KERN_ERR "enospc, has %d, reserved %d\n", | ||
| 3108 | BTRFS_I(inode)->outstanding_extents, | ||
| 3109 | BTRFS_I(inode)->reserved_extents); | ||
| 3110 | dump_space_info(meta_sinfo, 0, 0); | ||
| 3111 | return -ENOSPC; | ||
| 3112 | } | ||
| 3113 | 2867 | ||
| 3114 | BTRFS_I(inode)->reserved_extents++; | 2868 | return get_alloc_profile(root, flags); |
| 3115 | check_force_delalloc(meta_sinfo); | ||
| 3116 | spin_unlock(&meta_sinfo->lock); | ||
| 3117 | |||
| 3118 | if (!flushed && force_delalloc) | ||
| 3119 | filemap_flush(inode->i_mapping); | ||
| 3120 | |||
| 3121 | return 0; | ||
| 3122 | } | 2869 | } |
| 3123 | 2870 | ||
| 3124 | /* | 2871 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) |
| 3125 | * unreserve num_items number of items worth of metadata space. This needs to | ||
| 3126 | * be paired with btrfs_reserve_metadata_space. | ||
| 3127 | * | ||
| 3128 | * NOTE: if you have the option, run this _AFTER_ you do a | ||
| 3129 | * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref | ||
| 3130 | * oprations which will result in more used metadata, so we want to make sure we | ||
| 3131 | * can do that without issue. | ||
| 3132 | */ | ||
| 3133 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items) | ||
| 3134 | { | ||
| 3135 | struct btrfs_fs_info *info = root->fs_info; | ||
| 3136 | struct btrfs_space_info *meta_sinfo; | ||
| 3137 | u64 num_bytes; | ||
| 3138 | u64 alloc_target; | ||
| 3139 | bool bug = false; | ||
| 3140 | |||
| 3141 | /* get the space info for where the metadata will live */ | ||
| 3142 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
| 3143 | meta_sinfo = __find_space_info(info, alloc_target); | ||
| 3144 | |||
| 3145 | num_bytes = calculate_bytes_needed(root, num_items); | ||
| 3146 | |||
| 3147 | spin_lock(&meta_sinfo->lock); | ||
| 3148 | if (meta_sinfo->bytes_may_use < num_bytes) { | ||
| 3149 | bug = true; | ||
| 3150 | meta_sinfo->bytes_may_use = 0; | ||
| 3151 | } else { | ||
| 3152 | meta_sinfo->bytes_may_use -= num_bytes; | ||
| 3153 | } | ||
| 3154 | spin_unlock(&meta_sinfo->lock); | ||
| 3155 | |||
| 3156 | BUG_ON(bug); | ||
| 3157 | |||
| 3158 | return 0; | ||
| 3159 | } | ||
| 3160 | |||
| 3161 | /* | ||
| 3162 | * Reserve some metadata space for use. We'll calculate the worste case number | ||
| 3163 | * of bytes that would be needed to modify num_items number of items. If we | ||
| 3164 | * have space, fantastic, if not, you get -ENOSPC. Please call | ||
| 3165 | * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of | ||
| 3166 | * items you reserved, since whatever metadata you needed should have already | ||
| 3167 | * been allocated. | ||
| 3168 | * | ||
| 3169 | * This will commit the transaction to make more space if we don't have enough | ||
| 3170 | * metadata space. THe only time we don't do this is if we're reserving space | ||
| 3171 | * inside of a transaction, then we will just return -ENOSPC and it is the | ||
| 3172 | * callers responsibility to handle it properly. | ||
| 3173 | */ | ||
| 3174 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items) | ||
| 3175 | { | 2872 | { |
| 3176 | struct btrfs_fs_info *info = root->fs_info; | 2873 | BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, |
| 3177 | struct btrfs_space_info *meta_sinfo; | 2874 | BTRFS_BLOCK_GROUP_DATA); |
| 3178 | u64 num_bytes; | ||
| 3179 | u64 used; | ||
| 3180 | u64 alloc_target; | ||
| 3181 | int retries = 0; | ||
| 3182 | |||
| 3183 | /* get the space info for where the metadata will live */ | ||
| 3184 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
| 3185 | meta_sinfo = __find_space_info(info, alloc_target); | ||
| 3186 | |||
| 3187 | num_bytes = calculate_bytes_needed(root, num_items); | ||
| 3188 | again: | ||
| 3189 | spin_lock(&meta_sinfo->lock); | ||
| 3190 | |||
| 3191 | if (unlikely(!meta_sinfo->bytes_root)) | ||
| 3192 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
| 3193 | |||
| 3194 | if (!retries) | ||
| 3195 | meta_sinfo->bytes_may_use += num_bytes; | ||
| 3196 | |||
| 3197 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
| 3198 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
| 3199 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
| 3200 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
| 3201 | |||
| 3202 | if (used > meta_sinfo->total_bytes) { | ||
| 3203 | retries++; | ||
| 3204 | if (retries == 1) { | ||
| 3205 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
| 3206 | goto again; | ||
| 3207 | retries++; | ||
| 3208 | } else { | ||
| 3209 | spin_unlock(&meta_sinfo->lock); | ||
| 3210 | } | ||
| 3211 | |||
| 3212 | if (retries == 2) { | ||
| 3213 | flush_delalloc(root, meta_sinfo); | ||
| 3214 | goto again; | ||
| 3215 | } | ||
| 3216 | spin_lock(&meta_sinfo->lock); | ||
| 3217 | meta_sinfo->bytes_may_use -= num_bytes; | ||
| 3218 | spin_unlock(&meta_sinfo->lock); | ||
| 3219 | |||
| 3220 | dump_space_info(meta_sinfo, 0, 0); | ||
| 3221 | return -ENOSPC; | ||
| 3222 | } | ||
| 3223 | |||
| 3224 | check_force_delalloc(meta_sinfo); | ||
| 3225 | spin_unlock(&meta_sinfo->lock); | ||
| 3226 | |||
| 3227 | return 0; | ||
| 3228 | } | 2875 | } |
| 3229 | 2876 | ||
| 3230 | /* | 2877 | /* |
| 3231 | * This will check the space that the inode allocates from to make sure we have | 2878 | * This will check the space that the inode allocates from to make sure we have |
| 3232 | * enough space for bytes. | 2879 | * enough space for bytes. |
| 3233 | */ | 2880 | */ |
| 3234 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | 2881 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes) |
| 3235 | u64 bytes) | ||
| 3236 | { | 2882 | { |
| 3237 | struct btrfs_space_info *data_sinfo; | 2883 | struct btrfs_space_info *data_sinfo; |
| 2884 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 2885 | u64 used; | ||
| 3238 | int ret = 0, committed = 0; | 2886 | int ret = 0, committed = 0; |
| 3239 | 2887 | ||
| 3240 | /* make sure bytes are sectorsize aligned */ | 2888 | /* make sure bytes are sectorsize aligned */ |
| @@ -3247,10 +2895,11 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | |||
| 3247 | again: | 2895 | again: |
| 3248 | /* make sure we have enough space to handle the data first */ | 2896 | /* make sure we have enough space to handle the data first */ |
| 3249 | spin_lock(&data_sinfo->lock); | 2897 | spin_lock(&data_sinfo->lock); |
| 3250 | if (data_sinfo->total_bytes - data_sinfo->bytes_used - | 2898 | used = data_sinfo->bytes_used + data_sinfo->bytes_reserved + |
| 3251 | data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - | 2899 | data_sinfo->bytes_pinned + data_sinfo->bytes_readonly + |
| 3252 | data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - | 2900 | data_sinfo->bytes_may_use; |
| 3253 | data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) { | 2901 | |
| 2902 | if (used + bytes > data_sinfo->total_bytes) { | ||
| 3254 | struct btrfs_trans_handle *trans; | 2903 | struct btrfs_trans_handle *trans; |
| 3255 | 2904 | ||
| 3256 | /* | 2905 | /* |
| @@ -3264,15 +2913,15 @@ again: | |||
| 3264 | spin_unlock(&data_sinfo->lock); | 2913 | spin_unlock(&data_sinfo->lock); |
| 3265 | alloc: | 2914 | alloc: |
| 3266 | alloc_target = btrfs_get_alloc_profile(root, 1); | 2915 | alloc_target = btrfs_get_alloc_profile(root, 1); |
| 3267 | trans = btrfs_start_transaction(root, 1); | 2916 | trans = btrfs_join_transaction(root, 1); |
| 3268 | if (!trans) | 2917 | if (IS_ERR(trans)) |
| 3269 | return -ENOMEM; | 2918 | return PTR_ERR(trans); |
| 3270 | 2919 | ||
| 3271 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 2920 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, |
| 3272 | bytes + 2 * 1024 * 1024, | 2921 | bytes + 2 * 1024 * 1024, |
| 3273 | alloc_target, 0); | 2922 | alloc_target, 0); |
| 3274 | btrfs_end_transaction(trans, root); | 2923 | btrfs_end_transaction(trans, root); |
| 3275 | if (ret) | 2924 | if (ret < 0) |
| 3276 | return ret; | 2925 | return ret; |
| 3277 | 2926 | ||
| 3278 | if (!data_sinfo) { | 2927 | if (!data_sinfo) { |
| @@ -3287,25 +2936,26 @@ alloc: | |||
| 3287 | if (!committed && !root->fs_info->open_ioctl_trans) { | 2936 | if (!committed && !root->fs_info->open_ioctl_trans) { |
| 3288 | committed = 1; | 2937 | committed = 1; |
| 3289 | trans = btrfs_join_transaction(root, 1); | 2938 | trans = btrfs_join_transaction(root, 1); |
| 3290 | if (!trans) | 2939 | if (IS_ERR(trans)) |
| 3291 | return -ENOMEM; | 2940 | return PTR_ERR(trans); |
| 3292 | ret = btrfs_commit_transaction(trans, root); | 2941 | ret = btrfs_commit_transaction(trans, root); |
| 3293 | if (ret) | 2942 | if (ret) |
| 3294 | return ret; | 2943 | return ret; |
| 3295 | goto again; | 2944 | goto again; |
| 3296 | } | 2945 | } |
| 3297 | 2946 | ||
| 3298 | printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" | 2947 | #if 0 /* I hope we never need this code again, just in case */ |
| 3299 | ", %llu bytes_used, %llu bytes_reserved, " | 2948 | printk(KERN_ERR "no space left, need %llu, %llu bytes_used, " |
| 3300 | "%llu bytes_pinned, %llu bytes_readonly, %llu may use " | 2949 | "%llu bytes_reserved, " "%llu bytes_pinned, " |
| 3301 | "%llu total\n", (unsigned long long)bytes, | 2950 | "%llu bytes_readonly, %llu may use %llu total\n", |
| 3302 | (unsigned long long)data_sinfo->bytes_delalloc, | 2951 | (unsigned long long)bytes, |
| 3303 | (unsigned long long)data_sinfo->bytes_used, | 2952 | (unsigned long long)data_sinfo->bytes_used, |
| 3304 | (unsigned long long)data_sinfo->bytes_reserved, | 2953 | (unsigned long long)data_sinfo->bytes_reserved, |
| 3305 | (unsigned long long)data_sinfo->bytes_pinned, | 2954 | (unsigned long long)data_sinfo->bytes_pinned, |
| 3306 | (unsigned long long)data_sinfo->bytes_readonly, | 2955 | (unsigned long long)data_sinfo->bytes_readonly, |
| 3307 | (unsigned long long)data_sinfo->bytes_may_use, | 2956 | (unsigned long long)data_sinfo->bytes_may_use, |
| 3308 | (unsigned long long)data_sinfo->total_bytes); | 2957 | (unsigned long long)data_sinfo->total_bytes); |
| 2958 | #endif | ||
| 3309 | return -ENOSPC; | 2959 | return -ENOSPC; |
| 3310 | } | 2960 | } |
| 3311 | data_sinfo->bytes_may_use += bytes; | 2961 | data_sinfo->bytes_may_use += bytes; |
| @@ -3316,12 +2966,13 @@ alloc: | |||
| 3316 | } | 2966 | } |
| 3317 | 2967 | ||
| 3318 | /* | 2968 | /* |
| 3319 | * if there was an error for whatever reason after calling | 2969 | * called when we are clearing an delalloc extent from the |
| 3320 | * btrfs_check_data_free_space, call this so we can cleanup the counters. | 2970 | * inode's io_tree or there was an error for whatever reason |
| 2971 | * after calling btrfs_check_data_free_space | ||
| 3321 | */ | 2972 | */ |
| 3322 | void btrfs_free_reserved_data_space(struct btrfs_root *root, | 2973 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) |
| 3323 | struct inode *inode, u64 bytes) | ||
| 3324 | { | 2974 | { |
| 2975 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 3325 | struct btrfs_space_info *data_sinfo; | 2976 | struct btrfs_space_info *data_sinfo; |
| 3326 | 2977 | ||
| 3327 | /* make sure bytes are sectorsize aligned */ | 2978 | /* make sure bytes are sectorsize aligned */ |
| @@ -3334,48 +2985,6 @@ void btrfs_free_reserved_data_space(struct btrfs_root *root, | |||
| 3334 | spin_unlock(&data_sinfo->lock); | 2985 | spin_unlock(&data_sinfo->lock); |
| 3335 | } | 2986 | } |
| 3336 | 2987 | ||
| 3337 | /* called when we are adding a delalloc extent to the inode's io_tree */ | ||
| 3338 | void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, | ||
| 3339 | u64 bytes) | ||
| 3340 | { | ||
| 3341 | struct btrfs_space_info *data_sinfo; | ||
| 3342 | |||
| 3343 | /* get the space info for where this inode will be storing its data */ | ||
| 3344 | data_sinfo = BTRFS_I(inode)->space_info; | ||
| 3345 | |||
| 3346 | /* make sure we have enough space to handle the data first */ | ||
| 3347 | spin_lock(&data_sinfo->lock); | ||
| 3348 | data_sinfo->bytes_delalloc += bytes; | ||
| 3349 | |||
| 3350 | /* | ||
| 3351 | * we are adding a delalloc extent without calling | ||
| 3352 | * btrfs_check_data_free_space first. This happens on a weird | ||
| 3353 | * writepage condition, but shouldn't hurt our accounting | ||
| 3354 | */ | ||
| 3355 | if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) { | ||
| 3356 | data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes; | ||
| 3357 | BTRFS_I(inode)->reserved_bytes = 0; | ||
| 3358 | } else { | ||
| 3359 | data_sinfo->bytes_may_use -= bytes; | ||
| 3360 | BTRFS_I(inode)->reserved_bytes -= bytes; | ||
| 3361 | } | ||
| 3362 | |||
| 3363 | spin_unlock(&data_sinfo->lock); | ||
| 3364 | } | ||
| 3365 | |||
| 3366 | /* called when we are clearing an delalloc extent from the inode's io_tree */ | ||
| 3367 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, | ||
| 3368 | u64 bytes) | ||
| 3369 | { | ||
| 3370 | struct btrfs_space_info *info; | ||
| 3371 | |||
| 3372 | info = BTRFS_I(inode)->space_info; | ||
| 3373 | |||
| 3374 | spin_lock(&info->lock); | ||
| 3375 | info->bytes_delalloc -= bytes; | ||
| 3376 | spin_unlock(&info->lock); | ||
| 3377 | } | ||
| 3378 | |||
| 3379 | static void force_metadata_allocation(struct btrfs_fs_info *info) | 2988 | static void force_metadata_allocation(struct btrfs_fs_info *info) |
| 3380 | { | 2989 | { |
| 3381 | struct list_head *head = &info->space_info; | 2990 | struct list_head *head = &info->space_info; |
| @@ -3389,13 +2998,28 @@ static void force_metadata_allocation(struct btrfs_fs_info *info) | |||
| 3389 | rcu_read_unlock(); | 2998 | rcu_read_unlock(); |
| 3390 | } | 2999 | } |
| 3391 | 3000 | ||
| 3001 | static int should_alloc_chunk(struct btrfs_space_info *sinfo, | ||
| 3002 | u64 alloc_bytes) | ||
| 3003 | { | ||
| 3004 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; | ||
| 3005 | |||
| 3006 | if (sinfo->bytes_used + sinfo->bytes_reserved + | ||
| 3007 | alloc_bytes + 256 * 1024 * 1024 < num_bytes) | ||
| 3008 | return 0; | ||
| 3009 | |||
| 3010 | if (sinfo->bytes_used + sinfo->bytes_reserved + | ||
| 3011 | alloc_bytes < div_factor(num_bytes, 8)) | ||
| 3012 | return 0; | ||
| 3013 | |||
| 3014 | return 1; | ||
| 3015 | } | ||
| 3016 | |||
| 3392 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 3017 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
| 3393 | struct btrfs_root *extent_root, u64 alloc_bytes, | 3018 | struct btrfs_root *extent_root, u64 alloc_bytes, |
| 3394 | u64 flags, int force) | 3019 | u64 flags, int force) |
| 3395 | { | 3020 | { |
| 3396 | struct btrfs_space_info *space_info; | 3021 | struct btrfs_space_info *space_info; |
| 3397 | struct btrfs_fs_info *fs_info = extent_root->fs_info; | 3022 | struct btrfs_fs_info *fs_info = extent_root->fs_info; |
| 3398 | u64 thresh; | ||
| 3399 | int ret = 0; | 3023 | int ret = 0; |
| 3400 | 3024 | ||
| 3401 | mutex_lock(&fs_info->chunk_mutex); | 3025 | mutex_lock(&fs_info->chunk_mutex); |
| @@ -3418,11 +3042,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
| 3418 | goto out; | 3042 | goto out; |
| 3419 | } | 3043 | } |
| 3420 | 3044 | ||
| 3421 | thresh = space_info->total_bytes - space_info->bytes_readonly; | 3045 | if (!force && !should_alloc_chunk(space_info, alloc_bytes)) { |
| 3422 | thresh = div_factor(thresh, 8); | ||
| 3423 | if (!force && | ||
| 3424 | (space_info->bytes_used + space_info->bytes_pinned + | ||
| 3425 | space_info->bytes_reserved + alloc_bytes) < thresh) { | ||
| 3426 | spin_unlock(&space_info->lock); | 3046 | spin_unlock(&space_info->lock); |
| 3427 | goto out; | 3047 | goto out; |
| 3428 | } | 3048 | } |
| @@ -3444,6 +3064,8 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
| 3444 | spin_lock(&space_info->lock); | 3064 | spin_lock(&space_info->lock); |
| 3445 | if (ret) | 3065 | if (ret) |
| 3446 | space_info->full = 1; | 3066 | space_info->full = 1; |
| 3067 | else | ||
| 3068 | ret = 1; | ||
| 3447 | space_info->force_alloc = 0; | 3069 | space_info->force_alloc = 0; |
| 3448 | spin_unlock(&space_info->lock); | 3070 | spin_unlock(&space_info->lock); |
| 3449 | out: | 3071 | out: |
| @@ -3451,13 +3073,713 @@ out: | |||
| 3451 | return ret; | 3073 | return ret; |
| 3452 | } | 3074 | } |
| 3453 | 3075 | ||
| 3076 | static int maybe_allocate_chunk(struct btrfs_trans_handle *trans, | ||
| 3077 | struct btrfs_root *root, | ||
| 3078 | struct btrfs_space_info *sinfo, u64 num_bytes) | ||
| 3079 | { | ||
| 3080 | int ret; | ||
| 3081 | int end_trans = 0; | ||
| 3082 | |||
| 3083 | if (sinfo->full) | ||
| 3084 | return 0; | ||
| 3085 | |||
| 3086 | spin_lock(&sinfo->lock); | ||
| 3087 | ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024); | ||
| 3088 | spin_unlock(&sinfo->lock); | ||
| 3089 | if (!ret) | ||
| 3090 | return 0; | ||
| 3091 | |||
| 3092 | if (!trans) { | ||
| 3093 | trans = btrfs_join_transaction(root, 1); | ||
| 3094 | BUG_ON(IS_ERR(trans)); | ||
| 3095 | end_trans = 1; | ||
| 3096 | } | ||
| 3097 | |||
| 3098 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
| 3099 | num_bytes + 2 * 1024 * 1024, | ||
| 3100 | get_alloc_profile(root, sinfo->flags), 0); | ||
| 3101 | |||
| 3102 | if (end_trans) | ||
| 3103 | btrfs_end_transaction(trans, root); | ||
| 3104 | |||
| 3105 | return ret == 1 ? 1 : 0; | ||
| 3106 | } | ||
| 3107 | |||
| 3108 | /* | ||
| 3109 | * shrink metadata reservation for delalloc | ||
| 3110 | */ | ||
| 3111 | static int shrink_delalloc(struct btrfs_trans_handle *trans, | ||
| 3112 | struct btrfs_root *root, u64 to_reclaim) | ||
| 3113 | { | ||
| 3114 | struct btrfs_block_rsv *block_rsv; | ||
| 3115 | u64 reserved; | ||
| 3116 | u64 max_reclaim; | ||
| 3117 | u64 reclaimed = 0; | ||
| 3118 | int pause = 1; | ||
| 3119 | int ret; | ||
| 3120 | |||
| 3121 | block_rsv = &root->fs_info->delalloc_block_rsv; | ||
| 3122 | spin_lock(&block_rsv->lock); | ||
| 3123 | reserved = block_rsv->reserved; | ||
| 3124 | spin_unlock(&block_rsv->lock); | ||
| 3125 | |||
| 3126 | if (reserved == 0) | ||
| 3127 | return 0; | ||
| 3128 | |||
| 3129 | max_reclaim = min(reserved, to_reclaim); | ||
| 3130 | |||
| 3131 | while (1) { | ||
| 3132 | ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0); | ||
| 3133 | if (!ret) { | ||
| 3134 | __set_current_state(TASK_INTERRUPTIBLE); | ||
| 3135 | schedule_timeout(pause); | ||
| 3136 | pause <<= 1; | ||
| 3137 | if (pause > HZ / 10) | ||
| 3138 | pause = HZ / 10; | ||
| 3139 | } else { | ||
| 3140 | pause = 1; | ||
| 3141 | } | ||
| 3142 | |||
| 3143 | spin_lock(&block_rsv->lock); | ||
| 3144 | if (reserved > block_rsv->reserved) | ||
| 3145 | reclaimed = reserved - block_rsv->reserved; | ||
| 3146 | reserved = block_rsv->reserved; | ||
| 3147 | spin_unlock(&block_rsv->lock); | ||
| 3148 | |||
| 3149 | if (reserved == 0 || reclaimed >= max_reclaim) | ||
| 3150 | break; | ||
| 3151 | |||
| 3152 | if (trans && trans->transaction->blocked) | ||
| 3153 | return -EAGAIN; | ||
| 3154 | } | ||
| 3155 | return reclaimed >= to_reclaim; | ||
| 3156 | } | ||
| 3157 | |||
| 3158 | static int should_retry_reserve(struct btrfs_trans_handle *trans, | ||
| 3159 | struct btrfs_root *root, | ||
| 3160 | struct btrfs_block_rsv *block_rsv, | ||
| 3161 | u64 num_bytes, int *retries) | ||
| 3162 | { | ||
| 3163 | struct btrfs_space_info *space_info = block_rsv->space_info; | ||
| 3164 | int ret; | ||
| 3165 | |||
| 3166 | if ((*retries) > 2) | ||
| 3167 | return -ENOSPC; | ||
| 3168 | |||
| 3169 | ret = maybe_allocate_chunk(trans, root, space_info, num_bytes); | ||
| 3170 | if (ret) | ||
| 3171 | return 1; | ||
| 3172 | |||
| 3173 | if (trans && trans->transaction->in_commit) | ||
| 3174 | return -ENOSPC; | ||
| 3175 | |||
| 3176 | ret = shrink_delalloc(trans, root, num_bytes); | ||
| 3177 | if (ret) | ||
| 3178 | return ret; | ||
| 3179 | |||
| 3180 | spin_lock(&space_info->lock); | ||
| 3181 | if (space_info->bytes_pinned < num_bytes) | ||
| 3182 | ret = 1; | ||
| 3183 | spin_unlock(&space_info->lock); | ||
| 3184 | if (ret) | ||
| 3185 | return -ENOSPC; | ||
| 3186 | |||
| 3187 | (*retries)++; | ||
| 3188 | |||
| 3189 | if (trans) | ||
| 3190 | return -EAGAIN; | ||
| 3191 | |||
| 3192 | trans = btrfs_join_transaction(root, 1); | ||
| 3193 | BUG_ON(IS_ERR(trans)); | ||
| 3194 | ret = btrfs_commit_transaction(trans, root); | ||
| 3195 | BUG_ON(ret); | ||
| 3196 | |||
| 3197 | return 1; | ||
| 3198 | } | ||
| 3199 | |||
| 3200 | static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv, | ||
| 3201 | u64 num_bytes) | ||
| 3202 | { | ||
| 3203 | struct btrfs_space_info *space_info = block_rsv->space_info; | ||
| 3204 | u64 unused; | ||
| 3205 | int ret = -ENOSPC; | ||
| 3206 | |||
| 3207 | spin_lock(&space_info->lock); | ||
| 3208 | unused = space_info->bytes_used + space_info->bytes_reserved + | ||
| 3209 | space_info->bytes_pinned + space_info->bytes_readonly; | ||
| 3210 | |||
| 3211 | if (unused < space_info->total_bytes) | ||
| 3212 | unused = space_info->total_bytes - unused; | ||
| 3213 | else | ||
| 3214 | unused = 0; | ||
| 3215 | |||
| 3216 | if (unused >= num_bytes) { | ||
| 3217 | if (block_rsv->priority >= 10) { | ||
| 3218 | space_info->bytes_reserved += num_bytes; | ||
| 3219 | ret = 0; | ||
| 3220 | } else { | ||
| 3221 | if ((unused + block_rsv->reserved) * | ||
| 3222 | block_rsv->priority >= | ||
| 3223 | (num_bytes + block_rsv->reserved) * 10) { | ||
| 3224 | space_info->bytes_reserved += num_bytes; | ||
| 3225 | ret = 0; | ||
| 3226 | } | ||
| 3227 | } | ||
| 3228 | } | ||
| 3229 | spin_unlock(&space_info->lock); | ||
| 3230 | |||
| 3231 | return ret; | ||
| 3232 | } | ||
| 3233 | |||
| 3234 | static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans, | ||
| 3235 | struct btrfs_root *root) | ||
| 3236 | { | ||
| 3237 | struct btrfs_block_rsv *block_rsv; | ||
| 3238 | if (root->ref_cows) | ||
| 3239 | block_rsv = trans->block_rsv; | ||
| 3240 | else | ||
| 3241 | block_rsv = root->block_rsv; | ||
| 3242 | |||
| 3243 | if (!block_rsv) | ||
| 3244 | block_rsv = &root->fs_info->empty_block_rsv; | ||
| 3245 | |||
| 3246 | return block_rsv; | ||
| 3247 | } | ||
| 3248 | |||
| 3249 | static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, | ||
| 3250 | u64 num_bytes) | ||
| 3251 | { | ||
| 3252 | int ret = -ENOSPC; | ||
| 3253 | spin_lock(&block_rsv->lock); | ||
| 3254 | if (block_rsv->reserved >= num_bytes) { | ||
| 3255 | block_rsv->reserved -= num_bytes; | ||
| 3256 | if (block_rsv->reserved < block_rsv->size) | ||
| 3257 | block_rsv->full = 0; | ||
| 3258 | ret = 0; | ||
| 3259 | } | ||
| 3260 | spin_unlock(&block_rsv->lock); | ||
| 3261 | return ret; | ||
| 3262 | } | ||
| 3263 | |||
| 3264 | static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv, | ||
| 3265 | u64 num_bytes, int update_size) | ||
| 3266 | { | ||
| 3267 | spin_lock(&block_rsv->lock); | ||
| 3268 | block_rsv->reserved += num_bytes; | ||
| 3269 | if (update_size) | ||
| 3270 | block_rsv->size += num_bytes; | ||
| 3271 | else if (block_rsv->reserved >= block_rsv->size) | ||
| 3272 | block_rsv->full = 1; | ||
| 3273 | spin_unlock(&block_rsv->lock); | ||
| 3274 | } | ||
| 3275 | |||
| 3276 | void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, | ||
| 3277 | struct btrfs_block_rsv *dest, u64 num_bytes) | ||
| 3278 | { | ||
| 3279 | struct btrfs_space_info *space_info = block_rsv->space_info; | ||
| 3280 | |||
| 3281 | spin_lock(&block_rsv->lock); | ||
| 3282 | if (num_bytes == (u64)-1) | ||
| 3283 | num_bytes = block_rsv->size; | ||
| 3284 | block_rsv->size -= num_bytes; | ||
| 3285 | if (block_rsv->reserved >= block_rsv->size) { | ||
| 3286 | num_bytes = block_rsv->reserved - block_rsv->size; | ||
| 3287 | block_rsv->reserved = block_rsv->size; | ||
| 3288 | block_rsv->full = 1; | ||
| 3289 | } else { | ||
| 3290 | num_bytes = 0; | ||
| 3291 | } | ||
| 3292 | spin_unlock(&block_rsv->lock); | ||
| 3293 | |||
| 3294 | if (num_bytes > 0) { | ||
| 3295 | if (dest) { | ||
| 3296 | block_rsv_add_bytes(dest, num_bytes, 0); | ||
| 3297 | } else { | ||
| 3298 | spin_lock(&space_info->lock); | ||
| 3299 | space_info->bytes_reserved -= num_bytes; | ||
| 3300 | spin_unlock(&space_info->lock); | ||
| 3301 | } | ||
| 3302 | } | ||
| 3303 | } | ||
| 3304 | |||
| 3305 | static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src, | ||
| 3306 | struct btrfs_block_rsv *dst, u64 num_bytes) | ||
| 3307 | { | ||
| 3308 | int ret; | ||
| 3309 | |||
| 3310 | ret = block_rsv_use_bytes(src, num_bytes); | ||
| 3311 | if (ret) | ||
| 3312 | return ret; | ||
| 3313 | |||
| 3314 | block_rsv_add_bytes(dst, num_bytes, 1); | ||
| 3315 | return 0; | ||
| 3316 | } | ||
| 3317 | |||
| 3318 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv) | ||
| 3319 | { | ||
| 3320 | memset(rsv, 0, sizeof(*rsv)); | ||
| 3321 | spin_lock_init(&rsv->lock); | ||
| 3322 | atomic_set(&rsv->usage, 1); | ||
| 3323 | rsv->priority = 6; | ||
| 3324 | INIT_LIST_HEAD(&rsv->list); | ||
| 3325 | } | ||
| 3326 | |||
| 3327 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) | ||
| 3328 | { | ||
| 3329 | struct btrfs_block_rsv *block_rsv; | ||
| 3330 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 3331 | u64 alloc_target; | ||
| 3332 | |||
| 3333 | block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); | ||
| 3334 | if (!block_rsv) | ||
| 3335 | return NULL; | ||
| 3336 | |||
| 3337 | btrfs_init_block_rsv(block_rsv); | ||
| 3338 | |||
| 3339 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
| 3340 | block_rsv->space_info = __find_space_info(fs_info, | ||
| 3341 | BTRFS_BLOCK_GROUP_METADATA); | ||
| 3342 | |||
| 3343 | return block_rsv; | ||
| 3344 | } | ||
| 3345 | |||
| 3346 | void btrfs_free_block_rsv(struct btrfs_root *root, | ||
| 3347 | struct btrfs_block_rsv *rsv) | ||
| 3348 | { | ||
| 3349 | if (rsv && atomic_dec_and_test(&rsv->usage)) { | ||
| 3350 | btrfs_block_rsv_release(root, rsv, (u64)-1); | ||
| 3351 | if (!rsv->durable) | ||
| 3352 | kfree(rsv); | ||
| 3353 | } | ||
| 3354 | } | ||
| 3355 | |||
| 3356 | /* | ||
| 3357 | * make the block_rsv struct be able to capture freed space. | ||
| 3358 | * the captured space will re-add to the the block_rsv struct | ||
| 3359 | * after transaction commit | ||
| 3360 | */ | ||
| 3361 | void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, | ||
| 3362 | struct btrfs_block_rsv *block_rsv) | ||
| 3363 | { | ||
| 3364 | block_rsv->durable = 1; | ||
| 3365 | mutex_lock(&fs_info->durable_block_rsv_mutex); | ||
| 3366 | list_add_tail(&block_rsv->list, &fs_info->durable_block_rsv_list); | ||
| 3367 | mutex_unlock(&fs_info->durable_block_rsv_mutex); | ||
| 3368 | } | ||
| 3369 | |||
| 3370 | int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, | ||
| 3371 | struct btrfs_root *root, | ||
| 3372 | struct btrfs_block_rsv *block_rsv, | ||
| 3373 | u64 num_bytes, int *retries) | ||
| 3374 | { | ||
| 3375 | int ret; | ||
| 3376 | |||
| 3377 | if (num_bytes == 0) | ||
| 3378 | return 0; | ||
| 3379 | again: | ||
| 3380 | ret = reserve_metadata_bytes(block_rsv, num_bytes); | ||
| 3381 | if (!ret) { | ||
| 3382 | block_rsv_add_bytes(block_rsv, num_bytes, 1); | ||
| 3383 | return 0; | ||
| 3384 | } | ||
| 3385 | |||
| 3386 | ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries); | ||
| 3387 | if (ret > 0) | ||
| 3388 | goto again; | ||
| 3389 | |||
| 3390 | return ret; | ||
| 3391 | } | ||
| 3392 | |||
| 3393 | int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | ||
| 3394 | struct btrfs_root *root, | ||
| 3395 | struct btrfs_block_rsv *block_rsv, | ||
| 3396 | u64 min_reserved, int min_factor) | ||
| 3397 | { | ||
| 3398 | u64 num_bytes = 0; | ||
| 3399 | int commit_trans = 0; | ||
| 3400 | int ret = -ENOSPC; | ||
| 3401 | |||
| 3402 | if (!block_rsv) | ||
| 3403 | return 0; | ||
| 3404 | |||
| 3405 | spin_lock(&block_rsv->lock); | ||
| 3406 | if (min_factor > 0) | ||
| 3407 | num_bytes = div_factor(block_rsv->size, min_factor); | ||
| 3408 | if (min_reserved > num_bytes) | ||
| 3409 | num_bytes = min_reserved; | ||
| 3410 | |||
| 3411 | if (block_rsv->reserved >= num_bytes) { | ||
| 3412 | ret = 0; | ||
| 3413 | } else { | ||
| 3414 | num_bytes -= block_rsv->reserved; | ||
| 3415 | if (block_rsv->durable && | ||
| 3416 | block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes) | ||
| 3417 | commit_trans = 1; | ||
| 3418 | } | ||
| 3419 | spin_unlock(&block_rsv->lock); | ||
| 3420 | if (!ret) | ||
| 3421 | return 0; | ||
| 3422 | |||
| 3423 | if (block_rsv->refill_used) { | ||
| 3424 | ret = reserve_metadata_bytes(block_rsv, num_bytes); | ||
| 3425 | if (!ret) { | ||
| 3426 | block_rsv_add_bytes(block_rsv, num_bytes, 0); | ||
| 3427 | return 0; | ||
| 3428 | } | ||
| 3429 | } | ||
| 3430 | |||
| 3431 | if (commit_trans) { | ||
| 3432 | if (trans) | ||
| 3433 | return -EAGAIN; | ||
| 3434 | |||
| 3435 | trans = btrfs_join_transaction(root, 1); | ||
| 3436 | BUG_ON(IS_ERR(trans)); | ||
| 3437 | ret = btrfs_commit_transaction(trans, root); | ||
| 3438 | return 0; | ||
| 3439 | } | ||
| 3440 | |||
| 3441 | WARN_ON(1); | ||
| 3442 | printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n", | ||
| 3443 | block_rsv->size, block_rsv->reserved, | ||
| 3444 | block_rsv->freed[0], block_rsv->freed[1]); | ||
| 3445 | |||
| 3446 | return -ENOSPC; | ||
| 3447 | } | ||
| 3448 | |||
| 3449 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | ||
| 3450 | struct btrfs_block_rsv *dst_rsv, | ||
| 3451 | u64 num_bytes) | ||
| 3452 | { | ||
| 3453 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | ||
| 3454 | } | ||
| 3455 | |||
| 3456 | void btrfs_block_rsv_release(struct btrfs_root *root, | ||
| 3457 | struct btrfs_block_rsv *block_rsv, | ||
| 3458 | u64 num_bytes) | ||
| 3459 | { | ||
| 3460 | struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; | ||
| 3461 | if (global_rsv->full || global_rsv == block_rsv || | ||
| 3462 | block_rsv->space_info != global_rsv->space_info) | ||
| 3463 | global_rsv = NULL; | ||
| 3464 | block_rsv_release_bytes(block_rsv, global_rsv, num_bytes); | ||
| 3465 | } | ||
| 3466 | |||
| 3467 | /* | ||
| 3468 | * helper to calculate size of global block reservation. | ||
| 3469 | * the desired value is sum of space used by extent tree, | ||
| 3470 | * checksum tree and root tree | ||
| 3471 | */ | ||
| 3472 | static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) | ||
| 3473 | { | ||
| 3474 | struct btrfs_space_info *sinfo; | ||
| 3475 | u64 num_bytes; | ||
| 3476 | u64 meta_used; | ||
| 3477 | u64 data_used; | ||
| 3478 | int csum_size = btrfs_super_csum_size(&fs_info->super_copy); | ||
| 3479 | #if 0 | ||
| 3480 | /* | ||
| 3481 | * per tree used space accounting can be inaccuracy, so we | ||
| 3482 | * can't rely on it. | ||
| 3483 | */ | ||
| 3484 | spin_lock(&fs_info->extent_root->accounting_lock); | ||
| 3485 | num_bytes = btrfs_root_used(&fs_info->extent_root->root_item); | ||
| 3486 | spin_unlock(&fs_info->extent_root->accounting_lock); | ||
| 3487 | |||
| 3488 | spin_lock(&fs_info->csum_root->accounting_lock); | ||
| 3489 | num_bytes += btrfs_root_used(&fs_info->csum_root->root_item); | ||
| 3490 | spin_unlock(&fs_info->csum_root->accounting_lock); | ||
| 3491 | |||
| 3492 | spin_lock(&fs_info->tree_root->accounting_lock); | ||
| 3493 | num_bytes += btrfs_root_used(&fs_info->tree_root->root_item); | ||
| 3494 | spin_unlock(&fs_info->tree_root->accounting_lock); | ||
| 3495 | #endif | ||
| 3496 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); | ||
| 3497 | spin_lock(&sinfo->lock); | ||
| 3498 | data_used = sinfo->bytes_used; | ||
| 3499 | spin_unlock(&sinfo->lock); | ||
| 3500 | |||
| 3501 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | ||
| 3502 | spin_lock(&sinfo->lock); | ||
| 3503 | meta_used = sinfo->bytes_used; | ||
| 3504 | spin_unlock(&sinfo->lock); | ||
| 3505 | |||
| 3506 | num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) * | ||
| 3507 | csum_size * 2; | ||
| 3508 | num_bytes += div64_u64(data_used + meta_used, 50); | ||
| 3509 | |||
| 3510 | if (num_bytes * 3 > meta_used) | ||
| 3511 | num_bytes = div64_u64(meta_used, 3); | ||
| 3512 | |||
| 3513 | return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10); | ||
| 3514 | } | ||
| 3515 | |||
| 3516 | static void update_global_block_rsv(struct btrfs_fs_info *fs_info) | ||
| 3517 | { | ||
| 3518 | struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; | ||
| 3519 | struct btrfs_space_info *sinfo = block_rsv->space_info; | ||
| 3520 | u64 num_bytes; | ||
| 3521 | |||
| 3522 | num_bytes = calc_global_metadata_size(fs_info); | ||
| 3523 | |||
| 3524 | spin_lock(&block_rsv->lock); | ||
| 3525 | spin_lock(&sinfo->lock); | ||
| 3526 | |||
| 3527 | block_rsv->size = num_bytes; | ||
| 3528 | |||
| 3529 | num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + | ||
| 3530 | sinfo->bytes_reserved + sinfo->bytes_readonly; | ||
| 3531 | |||
| 3532 | if (sinfo->total_bytes > num_bytes) { | ||
| 3533 | num_bytes = sinfo->total_bytes - num_bytes; | ||
| 3534 | block_rsv->reserved += num_bytes; | ||
| 3535 | sinfo->bytes_reserved += num_bytes; | ||
| 3536 | } | ||
| 3537 | |||
| 3538 | if (block_rsv->reserved >= block_rsv->size) { | ||
| 3539 | num_bytes = block_rsv->reserved - block_rsv->size; | ||
| 3540 | sinfo->bytes_reserved -= num_bytes; | ||
| 3541 | block_rsv->reserved = block_rsv->size; | ||
| 3542 | block_rsv->full = 1; | ||
| 3543 | } | ||
| 3544 | #if 0 | ||
| 3545 | printk(KERN_INFO"global block rsv size %llu reserved %llu\n", | ||
| 3546 | block_rsv->size, block_rsv->reserved); | ||
| 3547 | #endif | ||
| 3548 | spin_unlock(&sinfo->lock); | ||
| 3549 | spin_unlock(&block_rsv->lock); | ||
| 3550 | } | ||
| 3551 | |||
| 3552 | static void init_global_block_rsv(struct btrfs_fs_info *fs_info) | ||
| 3553 | { | ||
| 3554 | struct btrfs_space_info *space_info; | ||
| 3555 | |||
| 3556 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); | ||
| 3557 | fs_info->chunk_block_rsv.space_info = space_info; | ||
| 3558 | fs_info->chunk_block_rsv.priority = 10; | ||
| 3559 | |||
| 3560 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | ||
| 3561 | fs_info->global_block_rsv.space_info = space_info; | ||
| 3562 | fs_info->global_block_rsv.priority = 10; | ||
| 3563 | fs_info->global_block_rsv.refill_used = 1; | ||
| 3564 | fs_info->delalloc_block_rsv.space_info = space_info; | ||
| 3565 | fs_info->trans_block_rsv.space_info = space_info; | ||
| 3566 | fs_info->empty_block_rsv.space_info = space_info; | ||
| 3567 | fs_info->empty_block_rsv.priority = 10; | ||
| 3568 | |||
| 3569 | fs_info->extent_root->block_rsv = &fs_info->global_block_rsv; | ||
| 3570 | fs_info->csum_root->block_rsv = &fs_info->global_block_rsv; | ||
| 3571 | fs_info->dev_root->block_rsv = &fs_info->global_block_rsv; | ||
| 3572 | fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; | ||
| 3573 | fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; | ||
| 3574 | |||
| 3575 | btrfs_add_durable_block_rsv(fs_info, &fs_info->global_block_rsv); | ||
| 3576 | |||
| 3577 | btrfs_add_durable_block_rsv(fs_info, &fs_info->delalloc_block_rsv); | ||
| 3578 | |||
| 3579 | update_global_block_rsv(fs_info); | ||
| 3580 | } | ||
| 3581 | |||
| 3582 | static void release_global_block_rsv(struct btrfs_fs_info *fs_info) | ||
| 3583 | { | ||
| 3584 | block_rsv_release_bytes(&fs_info->global_block_rsv, NULL, (u64)-1); | ||
| 3585 | WARN_ON(fs_info->delalloc_block_rsv.size > 0); | ||
| 3586 | WARN_ON(fs_info->delalloc_block_rsv.reserved > 0); | ||
| 3587 | WARN_ON(fs_info->trans_block_rsv.size > 0); | ||
| 3588 | WARN_ON(fs_info->trans_block_rsv.reserved > 0); | ||
| 3589 | WARN_ON(fs_info->chunk_block_rsv.size > 0); | ||
| 3590 | WARN_ON(fs_info->chunk_block_rsv.reserved > 0); | ||
| 3591 | } | ||
| 3592 | |||
| 3593 | static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items) | ||
| 3594 | { | ||
| 3595 | return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * | ||
| 3596 | 3 * num_items; | ||
| 3597 | } | ||
| 3598 | |||
| 3599 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, | ||
| 3600 | struct btrfs_root *root, | ||
| 3601 | int num_items, int *retries) | ||
| 3602 | { | ||
| 3603 | u64 num_bytes; | ||
| 3604 | int ret; | ||
| 3605 | |||
| 3606 | if (num_items == 0 || root->fs_info->chunk_root == root) | ||
| 3607 | return 0; | ||
| 3608 | |||
| 3609 | num_bytes = calc_trans_metadata_size(root, num_items); | ||
| 3610 | ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, | ||
| 3611 | num_bytes, retries); | ||
| 3612 | if (!ret) { | ||
| 3613 | trans->bytes_reserved += num_bytes; | ||
| 3614 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
| 3615 | } | ||
| 3616 | return ret; | ||
| 3617 | } | ||
| 3618 | |||
| 3619 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | ||
| 3620 | struct btrfs_root *root) | ||
| 3621 | { | ||
| 3622 | if (!trans->bytes_reserved) | ||
| 3623 | return; | ||
| 3624 | |||
| 3625 | BUG_ON(trans->block_rsv != &root->fs_info->trans_block_rsv); | ||
| 3626 | btrfs_block_rsv_release(root, trans->block_rsv, | ||
| 3627 | trans->bytes_reserved); | ||
| 3628 | trans->bytes_reserved = 0; | ||
| 3629 | } | ||
| 3630 | |||
| 3631 | int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, | ||
| 3632 | struct inode *inode) | ||
| 3633 | { | ||
| 3634 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 3635 | struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); | ||
| 3636 | struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; | ||
| 3637 | |||
| 3638 | /* | ||
| 3639 | * one for deleting orphan item, one for updating inode and | ||
| 3640 | * two for calling btrfs_truncate_inode_items. | ||
| 3641 | * | ||
| 3642 | * btrfs_truncate_inode_items is a delete operation, it frees | ||
| 3643 | * more space than it uses in most cases. So two units of | ||
| 3644 | * metadata space should be enough for calling it many times. | ||
| 3645 | * If all of the metadata space is used, we can commit | ||
| 3646 | * transaction and use space it freed. | ||
| 3647 | */ | ||
| 3648 | u64 num_bytes = calc_trans_metadata_size(root, 4); | ||
| 3649 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | ||
| 3650 | } | ||
| 3651 | |||
| 3652 | void btrfs_orphan_release_metadata(struct inode *inode) | ||
| 3653 | { | ||
| 3654 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 3655 | u64 num_bytes = calc_trans_metadata_size(root, 4); | ||
| 3656 | btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); | ||
| 3657 | } | ||
| 3658 | |||
| 3659 | int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, | ||
| 3660 | struct btrfs_pending_snapshot *pending) | ||
| 3661 | { | ||
| 3662 | struct btrfs_root *root = pending->root; | ||
| 3663 | struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); | ||
| 3664 | struct btrfs_block_rsv *dst_rsv = &pending->block_rsv; | ||
| 3665 | /* | ||
| 3666 | * two for root back/forward refs, two for directory entries | ||
| 3667 | * and one for root of the snapshot. | ||
| 3668 | */ | ||
| 3669 | u64 num_bytes = calc_trans_metadata_size(root, 5); | ||
| 3670 | dst_rsv->space_info = src_rsv->space_info; | ||
| 3671 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | ||
| 3672 | } | ||
| 3673 | |||
| 3674 | static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) | ||
| 3675 | { | ||
| 3676 | return num_bytes >>= 3; | ||
| 3677 | } | ||
| 3678 | |||
| 3679 | int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | ||
| 3680 | { | ||
| 3681 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 3682 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; | ||
| 3683 | u64 to_reserve; | ||
| 3684 | int nr_extents; | ||
| 3685 | int retries = 0; | ||
| 3686 | int ret; | ||
| 3687 | |||
| 3688 | if (btrfs_transaction_in_commit(root->fs_info)) | ||
| 3689 | schedule_timeout(1); | ||
| 3690 | |||
| 3691 | num_bytes = ALIGN(num_bytes, root->sectorsize); | ||
| 3692 | again: | ||
| 3693 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
| 3694 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; | ||
| 3695 | if (nr_extents > BTRFS_I(inode)->reserved_extents) { | ||
| 3696 | nr_extents -= BTRFS_I(inode)->reserved_extents; | ||
| 3697 | to_reserve = calc_trans_metadata_size(root, nr_extents); | ||
| 3698 | } else { | ||
| 3699 | nr_extents = 0; | ||
| 3700 | to_reserve = 0; | ||
| 3701 | } | ||
| 3702 | |||
| 3703 | to_reserve += calc_csum_metadata_size(inode, num_bytes); | ||
| 3704 | ret = reserve_metadata_bytes(block_rsv, to_reserve); | ||
| 3705 | if (ret) { | ||
| 3706 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 3707 | ret = should_retry_reserve(NULL, root, block_rsv, to_reserve, | ||
| 3708 | &retries); | ||
| 3709 | if (ret > 0) | ||
| 3710 | goto again; | ||
| 3711 | return ret; | ||
| 3712 | } | ||
| 3713 | |||
| 3714 | BTRFS_I(inode)->reserved_extents += nr_extents; | ||
| 3715 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | ||
| 3716 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 3717 | |||
| 3718 | block_rsv_add_bytes(block_rsv, to_reserve, 1); | ||
| 3719 | |||
| 3720 | if (block_rsv->size > 512 * 1024 * 1024) | ||
| 3721 | shrink_delalloc(NULL, root, to_reserve); | ||
| 3722 | |||
| 3723 | return 0; | ||
| 3724 | } | ||
| 3725 | |||
| 3726 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | ||
| 3727 | { | ||
| 3728 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 3729 | u64 to_free; | ||
| 3730 | int nr_extents; | ||
| 3731 | |||
| 3732 | num_bytes = ALIGN(num_bytes, root->sectorsize); | ||
| 3733 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | ||
| 3734 | |||
| 3735 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
| 3736 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); | ||
| 3737 | if (nr_extents < BTRFS_I(inode)->reserved_extents) { | ||
| 3738 | nr_extents = BTRFS_I(inode)->reserved_extents - nr_extents; | ||
| 3739 | BTRFS_I(inode)->reserved_extents -= nr_extents; | ||
| 3740 | } else { | ||
| 3741 | nr_extents = 0; | ||
| 3742 | } | ||
| 3743 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 3744 | |||
| 3745 | to_free = calc_csum_metadata_size(inode, num_bytes); | ||
| 3746 | if (nr_extents > 0) | ||
| 3747 | to_free += calc_trans_metadata_size(root, nr_extents); | ||
| 3748 | |||
| 3749 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, | ||
| 3750 | to_free); | ||
| 3751 | } | ||
| 3752 | |||
| 3753 | int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) | ||
| 3754 | { | ||
| 3755 | int ret; | ||
| 3756 | |||
| 3757 | ret = btrfs_check_data_free_space(inode, num_bytes); | ||
| 3758 | if (ret) | ||
| 3759 | return ret; | ||
| 3760 | |||
| 3761 | ret = btrfs_delalloc_reserve_metadata(inode, num_bytes); | ||
| 3762 | if (ret) { | ||
| 3763 | btrfs_free_reserved_data_space(inode, num_bytes); | ||
| 3764 | return ret; | ||
| 3765 | } | ||
| 3766 | |||
| 3767 | return 0; | ||
| 3768 | } | ||
| 3769 | |||
| 3770 | void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes) | ||
| 3771 | { | ||
| 3772 | btrfs_delalloc_release_metadata(inode, num_bytes); | ||
| 3773 | btrfs_free_reserved_data_space(inode, num_bytes); | ||
| 3774 | } | ||
| 3775 | |||
| 3454 | static int update_block_group(struct btrfs_trans_handle *trans, | 3776 | static int update_block_group(struct btrfs_trans_handle *trans, |
| 3455 | struct btrfs_root *root, | 3777 | struct btrfs_root *root, |
| 3456 | u64 bytenr, u64 num_bytes, int alloc, | 3778 | u64 bytenr, u64 num_bytes, int alloc) |
| 3457 | int mark_free) | ||
| 3458 | { | 3779 | { |
| 3459 | struct btrfs_block_group_cache *cache; | 3780 | struct btrfs_block_group_cache *cache; |
| 3460 | struct btrfs_fs_info *info = root->fs_info; | 3781 | struct btrfs_fs_info *info = root->fs_info; |
| 3782 | int factor; | ||
| 3461 | u64 total = num_bytes; | 3783 | u64 total = num_bytes; |
| 3462 | u64 old_val; | 3784 | u64 old_val; |
| 3463 | u64 byte_in_group; | 3785 | u64 byte_in_group; |
| @@ -3476,6 +3798,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
| 3476 | cache = btrfs_lookup_block_group(info, bytenr); | 3798 | cache = btrfs_lookup_block_group(info, bytenr); |
| 3477 | if (!cache) | 3799 | if (!cache) |
| 3478 | return -1; | 3800 | return -1; |
| 3801 | if (cache->flags & (BTRFS_BLOCK_GROUP_DUP | | ||
| 3802 | BTRFS_BLOCK_GROUP_RAID1 | | ||
| 3803 | BTRFS_BLOCK_GROUP_RAID10)) | ||
| 3804 | factor = 2; | ||
| 3805 | else | ||
| 3806 | factor = 1; | ||
| 3479 | byte_in_group = bytenr - cache->key.objectid; | 3807 | byte_in_group = bytenr - cache->key.objectid; |
| 3480 | WARN_ON(byte_in_group > cache->key.offset); | 3808 | WARN_ON(byte_in_group > cache->key.offset); |
| 3481 | 3809 | ||
| @@ -3488,31 +3816,24 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
| 3488 | old_val += num_bytes; | 3816 | old_val += num_bytes; |
| 3489 | btrfs_set_block_group_used(&cache->item, old_val); | 3817 | btrfs_set_block_group_used(&cache->item, old_val); |
| 3490 | cache->reserved -= num_bytes; | 3818 | cache->reserved -= num_bytes; |
| 3491 | cache->space_info->bytes_used += num_bytes; | ||
| 3492 | cache->space_info->bytes_reserved -= num_bytes; | 3819 | cache->space_info->bytes_reserved -= num_bytes; |
| 3493 | if (cache->ro) | 3820 | cache->space_info->bytes_used += num_bytes; |
| 3494 | cache->space_info->bytes_readonly -= num_bytes; | 3821 | cache->space_info->disk_used += num_bytes * factor; |
| 3495 | spin_unlock(&cache->lock); | 3822 | spin_unlock(&cache->lock); |
| 3496 | spin_unlock(&cache->space_info->lock); | 3823 | spin_unlock(&cache->space_info->lock); |
| 3497 | } else { | 3824 | } else { |
| 3498 | old_val -= num_bytes; | 3825 | old_val -= num_bytes; |
| 3499 | cache->space_info->bytes_used -= num_bytes; | ||
| 3500 | if (cache->ro) | ||
| 3501 | cache->space_info->bytes_readonly += num_bytes; | ||
| 3502 | btrfs_set_block_group_used(&cache->item, old_val); | 3826 | btrfs_set_block_group_used(&cache->item, old_val); |
| 3827 | cache->pinned += num_bytes; | ||
| 3828 | cache->space_info->bytes_pinned += num_bytes; | ||
| 3829 | cache->space_info->bytes_used -= num_bytes; | ||
| 3830 | cache->space_info->disk_used -= num_bytes * factor; | ||
| 3503 | spin_unlock(&cache->lock); | 3831 | spin_unlock(&cache->lock); |
| 3504 | spin_unlock(&cache->space_info->lock); | 3832 | spin_unlock(&cache->space_info->lock); |
| 3505 | if (mark_free) { | ||
| 3506 | int ret; | ||
| 3507 | |||
| 3508 | ret = btrfs_discard_extent(root, bytenr, | ||
| 3509 | num_bytes); | ||
| 3510 | WARN_ON(ret); | ||
| 3511 | 3833 | ||
| 3512 | ret = btrfs_add_free_space(cache, bytenr, | 3834 | set_extent_dirty(info->pinned_extents, |
| 3513 | num_bytes); | 3835 | bytenr, bytenr + num_bytes - 1, |
| 3514 | WARN_ON(ret); | 3836 | GFP_NOFS | __GFP_NOFAIL); |
| 3515 | } | ||
| 3516 | } | 3837 | } |
| 3517 | btrfs_put_block_group(cache); | 3838 | btrfs_put_block_group(cache); |
| 3518 | total -= num_bytes; | 3839 | total -= num_bytes; |
| @@ -3536,18 +3857,10 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) | |||
| 3536 | return bytenr; | 3857 | return bytenr; |
| 3537 | } | 3858 | } |
| 3538 | 3859 | ||
| 3539 | /* | 3860 | static int pin_down_extent(struct btrfs_root *root, |
| 3540 | * this function must be called within transaction | 3861 | struct btrfs_block_group_cache *cache, |
| 3541 | */ | 3862 | u64 bytenr, u64 num_bytes, int reserved) |
| 3542 | int btrfs_pin_extent(struct btrfs_root *root, | ||
| 3543 | u64 bytenr, u64 num_bytes, int reserved) | ||
| 3544 | { | 3863 | { |
| 3545 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 3546 | struct btrfs_block_group_cache *cache; | ||
| 3547 | |||
| 3548 | cache = btrfs_lookup_block_group(fs_info, bytenr); | ||
| 3549 | BUG_ON(!cache); | ||
| 3550 | |||
| 3551 | spin_lock(&cache->space_info->lock); | 3864 | spin_lock(&cache->space_info->lock); |
| 3552 | spin_lock(&cache->lock); | 3865 | spin_lock(&cache->lock); |
| 3553 | cache->pinned += num_bytes; | 3866 | cache->pinned += num_bytes; |
| @@ -3559,28 +3872,68 @@ int btrfs_pin_extent(struct btrfs_root *root, | |||
| 3559 | spin_unlock(&cache->lock); | 3872 | spin_unlock(&cache->lock); |
| 3560 | spin_unlock(&cache->space_info->lock); | 3873 | spin_unlock(&cache->space_info->lock); |
| 3561 | 3874 | ||
| 3562 | btrfs_put_block_group(cache); | 3875 | set_extent_dirty(root->fs_info->pinned_extents, bytenr, |
| 3876 | bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); | ||
| 3877 | return 0; | ||
| 3878 | } | ||
| 3563 | 3879 | ||
| 3564 | set_extent_dirty(fs_info->pinned_extents, | 3880 | /* |
| 3565 | bytenr, bytenr + num_bytes - 1, GFP_NOFS); | 3881 | * this function must be called within transaction |
| 3882 | */ | ||
| 3883 | int btrfs_pin_extent(struct btrfs_root *root, | ||
| 3884 | u64 bytenr, u64 num_bytes, int reserved) | ||
| 3885 | { | ||
| 3886 | struct btrfs_block_group_cache *cache; | ||
| 3887 | |||
| 3888 | cache = btrfs_lookup_block_group(root->fs_info, bytenr); | ||
| 3889 | BUG_ON(!cache); | ||
| 3890 | |||
| 3891 | pin_down_extent(root, cache, bytenr, num_bytes, reserved); | ||
| 3892 | |||
| 3893 | btrfs_put_block_group(cache); | ||
| 3566 | return 0; | 3894 | return 0; |
| 3567 | } | 3895 | } |
| 3568 | 3896 | ||
| 3569 | static int update_reserved_extents(struct btrfs_block_group_cache *cache, | 3897 | /* |
| 3570 | u64 num_bytes, int reserve) | 3898 | * update size of reserved extents. this function may return -EAGAIN |
| 3899 | * if 'reserve' is true or 'sinfo' is false. | ||
| 3900 | */ | ||
| 3901 | static int update_reserved_bytes(struct btrfs_block_group_cache *cache, | ||
| 3902 | u64 num_bytes, int reserve, int sinfo) | ||
| 3571 | { | 3903 | { |
| 3572 | spin_lock(&cache->space_info->lock); | 3904 | int ret = 0; |
| 3573 | spin_lock(&cache->lock); | 3905 | if (sinfo) { |
| 3574 | if (reserve) { | 3906 | struct btrfs_space_info *space_info = cache->space_info; |
| 3575 | cache->reserved += num_bytes; | 3907 | spin_lock(&space_info->lock); |
| 3576 | cache->space_info->bytes_reserved += num_bytes; | 3908 | spin_lock(&cache->lock); |
| 3909 | if (reserve) { | ||
| 3910 | if (cache->ro) { | ||
| 3911 | ret = -EAGAIN; | ||
| 3912 | } else { | ||
| 3913 | cache->reserved += num_bytes; | ||
| 3914 | space_info->bytes_reserved += num_bytes; | ||
| 3915 | } | ||
| 3916 | } else { | ||
| 3917 | if (cache->ro) | ||
| 3918 | space_info->bytes_readonly += num_bytes; | ||
| 3919 | cache->reserved -= num_bytes; | ||
| 3920 | space_info->bytes_reserved -= num_bytes; | ||
| 3921 | } | ||
| 3922 | spin_unlock(&cache->lock); | ||
| 3923 | spin_unlock(&space_info->lock); | ||
| 3577 | } else { | 3924 | } else { |
| 3578 | cache->reserved -= num_bytes; | 3925 | spin_lock(&cache->lock); |
| 3579 | cache->space_info->bytes_reserved -= num_bytes; | 3926 | if (cache->ro) { |
| 3927 | ret = -EAGAIN; | ||
| 3928 | } else { | ||
| 3929 | if (reserve) | ||
| 3930 | cache->reserved += num_bytes; | ||
| 3931 | else | ||
| 3932 | cache->reserved -= num_bytes; | ||
| 3933 | } | ||
| 3934 | spin_unlock(&cache->lock); | ||
| 3580 | } | 3935 | } |
| 3581 | spin_unlock(&cache->lock); | 3936 | return ret; |
| 3582 | spin_unlock(&cache->space_info->lock); | ||
| 3583 | return 0; | ||
| 3584 | } | 3937 | } |
| 3585 | 3938 | ||
| 3586 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | 3939 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, |
| @@ -3611,6 +3964,8 @@ int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | |||
| 3611 | fs_info->pinned_extents = &fs_info->freed_extents[0]; | 3964 | fs_info->pinned_extents = &fs_info->freed_extents[0]; |
| 3612 | 3965 | ||
| 3613 | up_write(&fs_info->extent_commit_sem); | 3966 | up_write(&fs_info->extent_commit_sem); |
| 3967 | |||
| 3968 | update_global_block_rsv(fs_info); | ||
| 3614 | return 0; | 3969 | return 0; |
| 3615 | } | 3970 | } |
| 3616 | 3971 | ||
| @@ -3637,14 +3992,21 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) | |||
| 3637 | btrfs_add_free_space(cache, start, len); | 3992 | btrfs_add_free_space(cache, start, len); |
| 3638 | } | 3993 | } |
| 3639 | 3994 | ||
| 3995 | start += len; | ||
| 3996 | |||
| 3640 | spin_lock(&cache->space_info->lock); | 3997 | spin_lock(&cache->space_info->lock); |
| 3641 | spin_lock(&cache->lock); | 3998 | spin_lock(&cache->lock); |
| 3642 | cache->pinned -= len; | 3999 | cache->pinned -= len; |
| 3643 | cache->space_info->bytes_pinned -= len; | 4000 | cache->space_info->bytes_pinned -= len; |
| 4001 | if (cache->ro) { | ||
| 4002 | cache->space_info->bytes_readonly += len; | ||
| 4003 | } else if (cache->reserved_pinned > 0) { | ||
| 4004 | len = min(len, cache->reserved_pinned); | ||
| 4005 | cache->reserved_pinned -= len; | ||
| 4006 | cache->space_info->bytes_reserved += len; | ||
| 4007 | } | ||
| 3644 | spin_unlock(&cache->lock); | 4008 | spin_unlock(&cache->lock); |
| 3645 | spin_unlock(&cache->space_info->lock); | 4009 | spin_unlock(&cache->space_info->lock); |
| 3646 | |||
| 3647 | start += len; | ||
| 3648 | } | 4010 | } |
| 3649 | 4011 | ||
| 3650 | if (cache) | 4012 | if (cache) |
| @@ -3657,8 +4019,11 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
| 3657 | { | 4019 | { |
| 3658 | struct btrfs_fs_info *fs_info = root->fs_info; | 4020 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 3659 | struct extent_io_tree *unpin; | 4021 | struct extent_io_tree *unpin; |
| 4022 | struct btrfs_block_rsv *block_rsv; | ||
| 4023 | struct btrfs_block_rsv *next_rsv; | ||
| 3660 | u64 start; | 4024 | u64 start; |
| 3661 | u64 end; | 4025 | u64 end; |
| 4026 | int idx; | ||
| 3662 | int ret; | 4027 | int ret; |
| 3663 | 4028 | ||
| 3664 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) | 4029 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) |
| @@ -3679,59 +4044,30 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
| 3679 | cond_resched(); | 4044 | cond_resched(); |
| 3680 | } | 4045 | } |
| 3681 | 4046 | ||
| 3682 | return ret; | 4047 | mutex_lock(&fs_info->durable_block_rsv_mutex); |
| 3683 | } | 4048 | list_for_each_entry_safe(block_rsv, next_rsv, |
| 3684 | 4049 | &fs_info->durable_block_rsv_list, list) { | |
| 3685 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | ||
| 3686 | struct btrfs_root *root, | ||
| 3687 | struct btrfs_path *path, | ||
| 3688 | u64 bytenr, u64 num_bytes, | ||
| 3689 | int is_data, int reserved, | ||
| 3690 | struct extent_buffer **must_clean) | ||
| 3691 | { | ||
| 3692 | int err = 0; | ||
| 3693 | struct extent_buffer *buf; | ||
| 3694 | |||
| 3695 | if (is_data) | ||
| 3696 | goto pinit; | ||
| 3697 | |||
| 3698 | /* | ||
| 3699 | * discard is sloooow, and so triggering discards on | ||
| 3700 | * individual btree blocks isn't a good plan. Just | ||
| 3701 | * pin everything in discard mode. | ||
| 3702 | */ | ||
| 3703 | if (btrfs_test_opt(root, DISCARD)) | ||
| 3704 | goto pinit; | ||
| 3705 | 4050 | ||
| 3706 | buf = btrfs_find_tree_block(root, bytenr, num_bytes); | 4051 | idx = trans->transid & 0x1; |
| 3707 | if (!buf) | 4052 | if (block_rsv->freed[idx] > 0) { |
| 3708 | goto pinit; | 4053 | block_rsv_add_bytes(block_rsv, |
| 4054 | block_rsv->freed[idx], 0); | ||
| 4055 | block_rsv->freed[idx] = 0; | ||
| 4056 | } | ||
| 4057 | if (atomic_read(&block_rsv->usage) == 0) { | ||
| 4058 | btrfs_block_rsv_release(root, block_rsv, (u64)-1); | ||
| 3709 | 4059 | ||
| 3710 | /* we can reuse a block if it hasn't been written | 4060 | if (block_rsv->freed[0] == 0 && |
| 3711 | * and it is from this transaction. We can't | 4061 | block_rsv->freed[1] == 0) { |
| 3712 | * reuse anything from the tree log root because | 4062 | list_del_init(&block_rsv->list); |
| 3713 | * it has tiny sub-transactions. | 4063 | kfree(block_rsv); |
| 3714 | */ | 4064 | } |
| 3715 | if (btrfs_buffer_uptodate(buf, 0) && | 4065 | } else { |
| 3716 | btrfs_try_tree_lock(buf)) { | 4066 | btrfs_block_rsv_release(root, block_rsv, 0); |
| 3717 | u64 header_owner = btrfs_header_owner(buf); | ||
| 3718 | u64 header_transid = btrfs_header_generation(buf); | ||
| 3719 | if (header_owner != BTRFS_TREE_LOG_OBJECTID && | ||
| 3720 | header_transid == trans->transid && | ||
| 3721 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | ||
| 3722 | *must_clean = buf; | ||
| 3723 | return 1; | ||
| 3724 | } | 4067 | } |
| 3725 | btrfs_tree_unlock(buf); | ||
| 3726 | } | 4068 | } |
| 3727 | free_extent_buffer(buf); | 4069 | mutex_unlock(&fs_info->durable_block_rsv_mutex); |
| 3728 | pinit: | ||
| 3729 | if (path) | ||
| 3730 | btrfs_set_path_blocking(path); | ||
| 3731 | /* unlocks the pinned mutex */ | ||
| 3732 | btrfs_pin_extent(root, bytenr, num_bytes, reserved); | ||
| 3733 | 4070 | ||
| 3734 | BUG_ON(err < 0); | ||
| 3735 | return 0; | 4071 | return 0; |
| 3736 | } | 4072 | } |
| 3737 | 4073 | ||
| @@ -3892,9 +4228,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 3892 | BUG_ON(ret); | 4228 | BUG_ON(ret); |
| 3893 | } | 4229 | } |
| 3894 | } else { | 4230 | } else { |
| 3895 | int mark_free = 0; | ||
| 3896 | struct extent_buffer *must_clean = NULL; | ||
| 3897 | |||
| 3898 | if (found_extent) { | 4231 | if (found_extent) { |
| 3899 | BUG_ON(is_data && refs_to_drop != | 4232 | BUG_ON(is_data && refs_to_drop != |
| 3900 | extent_data_ref_count(root, path, iref)); | 4233 | extent_data_ref_count(root, path, iref)); |
| @@ -3907,31 +4240,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 3907 | } | 4240 | } |
| 3908 | } | 4241 | } |
| 3909 | 4242 | ||
| 3910 | ret = pin_down_bytes(trans, root, path, bytenr, | ||
| 3911 | num_bytes, is_data, 0, &must_clean); | ||
| 3912 | if (ret > 0) | ||
| 3913 | mark_free = 1; | ||
| 3914 | BUG_ON(ret < 0); | ||
| 3915 | /* | ||
| 3916 | * it is going to be very rare for someone to be waiting | ||
| 3917 | * on the block we're freeing. del_items might need to | ||
| 3918 | * schedule, so rather than get fancy, just force it | ||
| 3919 | * to blocking here | ||
| 3920 | */ | ||
| 3921 | if (must_clean) | ||
| 3922 | btrfs_set_lock_blocking(must_clean); | ||
| 3923 | |||
| 3924 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], | 4243 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], |
| 3925 | num_to_del); | 4244 | num_to_del); |
| 3926 | BUG_ON(ret); | 4245 | BUG_ON(ret); |
| 3927 | btrfs_release_path(extent_root, path); | 4246 | btrfs_release_path(extent_root, path); |
| 3928 | 4247 | ||
| 3929 | if (must_clean) { | ||
| 3930 | clean_tree_block(NULL, root, must_clean); | ||
| 3931 | btrfs_tree_unlock(must_clean); | ||
| 3932 | free_extent_buffer(must_clean); | ||
| 3933 | } | ||
| 3934 | |||
| 3935 | if (is_data) { | 4248 | if (is_data) { |
| 3936 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); | 4249 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); |
| 3937 | BUG_ON(ret); | 4250 | BUG_ON(ret); |
| @@ -3941,8 +4254,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 3941 | (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); | 4254 | (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); |
| 3942 | } | 4255 | } |
| 3943 | 4256 | ||
| 3944 | ret = update_block_group(trans, root, bytenr, num_bytes, 0, | 4257 | ret = update_block_group(trans, root, bytenr, num_bytes, 0); |
| 3945 | mark_free); | ||
| 3946 | BUG_ON(ret); | 4258 | BUG_ON(ret); |
| 3947 | } | 4259 | } |
| 3948 | btrfs_free_path(path); | 4260 | btrfs_free_path(path); |
| @@ -3950,7 +4262,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 3950 | } | 4262 | } |
| 3951 | 4263 | ||
| 3952 | /* | 4264 | /* |
| 3953 | * when we free an extent, it is possible (and likely) that we free the last | 4265 | * when we free an block, it is possible (and likely) that we free the last |
| 3954 | * delayed ref for that extent as well. This searches the delayed ref tree for | 4266 | * delayed ref for that extent as well. This searches the delayed ref tree for |
| 3955 | * a given extent, and if there are no other delayed refs to be processed, it | 4267 | * a given extent, and if there are no other delayed refs to be processed, it |
| 3956 | * removes it from the tree. | 4268 | * removes it from the tree. |
| @@ -3962,7 +4274,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
| 3962 | struct btrfs_delayed_ref_root *delayed_refs; | 4274 | struct btrfs_delayed_ref_root *delayed_refs; |
| 3963 | struct btrfs_delayed_ref_node *ref; | 4275 | struct btrfs_delayed_ref_node *ref; |
| 3964 | struct rb_node *node; | 4276 | struct rb_node *node; |
| 3965 | int ret; | 4277 | int ret = 0; |
| 3966 | 4278 | ||
| 3967 | delayed_refs = &trans->transaction->delayed_refs; | 4279 | delayed_refs = &trans->transaction->delayed_refs; |
| 3968 | spin_lock(&delayed_refs->lock); | 4280 | spin_lock(&delayed_refs->lock); |
| @@ -4014,17 +4326,100 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
| 4014 | list_del_init(&head->cluster); | 4326 | list_del_init(&head->cluster); |
| 4015 | spin_unlock(&delayed_refs->lock); | 4327 | spin_unlock(&delayed_refs->lock); |
| 4016 | 4328 | ||
| 4017 | ret = run_one_delayed_ref(trans, root->fs_info->tree_root, | 4329 | BUG_ON(head->extent_op); |
| 4018 | &head->node, head->extent_op, | 4330 | if (head->must_insert_reserved) |
| 4019 | head->must_insert_reserved); | 4331 | ret = 1; |
| 4020 | BUG_ON(ret); | 4332 | |
| 4333 | mutex_unlock(&head->mutex); | ||
| 4021 | btrfs_put_delayed_ref(&head->node); | 4334 | btrfs_put_delayed_ref(&head->node); |
| 4022 | return 0; | 4335 | return ret; |
| 4023 | out: | 4336 | out: |
| 4024 | spin_unlock(&delayed_refs->lock); | 4337 | spin_unlock(&delayed_refs->lock); |
| 4025 | return 0; | 4338 | return 0; |
| 4026 | } | 4339 | } |
| 4027 | 4340 | ||
| 4341 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | ||
| 4342 | struct btrfs_root *root, | ||
| 4343 | struct extent_buffer *buf, | ||
| 4344 | u64 parent, int last_ref) | ||
| 4345 | { | ||
| 4346 | struct btrfs_block_rsv *block_rsv; | ||
| 4347 | struct btrfs_block_group_cache *cache = NULL; | ||
| 4348 | int ret; | ||
| 4349 | |||
| 4350 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
| 4351 | ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len, | ||
| 4352 | parent, root->root_key.objectid, | ||
| 4353 | btrfs_header_level(buf), | ||
| 4354 | BTRFS_DROP_DELAYED_REF, NULL); | ||
| 4355 | BUG_ON(ret); | ||
| 4356 | } | ||
| 4357 | |||
| 4358 | if (!last_ref) | ||
| 4359 | return; | ||
| 4360 | |||
| 4361 | block_rsv = get_block_rsv(trans, root); | ||
| 4362 | cache = btrfs_lookup_block_group(root->fs_info, buf->start); | ||
| 4363 | if (block_rsv->space_info != cache->space_info) | ||
| 4364 | goto out; | ||
| 4365 | |||
| 4366 | if (btrfs_header_generation(buf) == trans->transid) { | ||
| 4367 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
| 4368 | ret = check_ref_cleanup(trans, root, buf->start); | ||
| 4369 | if (!ret) | ||
| 4370 | goto pin; | ||
| 4371 | } | ||
| 4372 | |||
| 4373 | if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | ||
| 4374 | pin_down_extent(root, cache, buf->start, buf->len, 1); | ||
| 4375 | goto pin; | ||
| 4376 | } | ||
| 4377 | |||
| 4378 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); | ||
| 4379 | |||
| 4380 | btrfs_add_free_space(cache, buf->start, buf->len); | ||
| 4381 | ret = update_reserved_bytes(cache, buf->len, 0, 0); | ||
| 4382 | if (ret == -EAGAIN) { | ||
| 4383 | /* block group became read-only */ | ||
| 4384 | update_reserved_bytes(cache, buf->len, 0, 1); | ||
| 4385 | goto out; | ||
| 4386 | } | ||
| 4387 | |||
| 4388 | ret = 1; | ||
| 4389 | spin_lock(&block_rsv->lock); | ||
| 4390 | if (block_rsv->reserved < block_rsv->size) { | ||
| 4391 | block_rsv->reserved += buf->len; | ||
| 4392 | ret = 0; | ||
| 4393 | } | ||
| 4394 | spin_unlock(&block_rsv->lock); | ||
| 4395 | |||
| 4396 | if (ret) { | ||
| 4397 | spin_lock(&cache->space_info->lock); | ||
| 4398 | cache->space_info->bytes_reserved -= buf->len; | ||
| 4399 | spin_unlock(&cache->space_info->lock); | ||
| 4400 | } | ||
| 4401 | goto out; | ||
| 4402 | } | ||
| 4403 | pin: | ||
| 4404 | if (block_rsv->durable && !cache->ro) { | ||
| 4405 | ret = 0; | ||
| 4406 | spin_lock(&cache->lock); | ||
| 4407 | if (!cache->ro) { | ||
| 4408 | cache->reserved_pinned += buf->len; | ||
| 4409 | ret = 1; | ||
| 4410 | } | ||
| 4411 | spin_unlock(&cache->lock); | ||
| 4412 | |||
| 4413 | if (ret) { | ||
| 4414 | spin_lock(&block_rsv->lock); | ||
| 4415 | block_rsv->freed[trans->transid & 0x1] += buf->len; | ||
| 4416 | spin_unlock(&block_rsv->lock); | ||
| 4417 | } | ||
| 4418 | } | ||
| 4419 | out: | ||
| 4420 | btrfs_put_block_group(cache); | ||
| 4421 | } | ||
| 4422 | |||
| 4028 | int btrfs_free_extent(struct btrfs_trans_handle *trans, | 4423 | int btrfs_free_extent(struct btrfs_trans_handle *trans, |
| 4029 | struct btrfs_root *root, | 4424 | struct btrfs_root *root, |
| 4030 | u64 bytenr, u64 num_bytes, u64 parent, | 4425 | u64 bytenr, u64 num_bytes, u64 parent, |
| @@ -4046,8 +4441,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 4046 | parent, root_objectid, (int)owner, | 4441 | parent, root_objectid, (int)owner, |
| 4047 | BTRFS_DROP_DELAYED_REF, NULL); | 4442 | BTRFS_DROP_DELAYED_REF, NULL); |
| 4048 | BUG_ON(ret); | 4443 | BUG_ON(ret); |
| 4049 | ret = check_ref_cleanup(trans, root, bytenr); | ||
| 4050 | BUG_ON(ret); | ||
| 4051 | } else { | 4444 | } else { |
| 4052 | ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, | 4445 | ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, |
| 4053 | parent, root_objectid, owner, | 4446 | parent, root_objectid, owner, |
| @@ -4057,21 +4450,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 4057 | return ret; | 4450 | return ret; |
| 4058 | } | 4451 | } |
| 4059 | 4452 | ||
| 4060 | int btrfs_free_tree_block(struct btrfs_trans_handle *trans, | ||
| 4061 | struct btrfs_root *root, | ||
| 4062 | u64 bytenr, u32 blocksize, | ||
| 4063 | u64 parent, u64 root_objectid, int level) | ||
| 4064 | { | ||
| 4065 | u64 used; | ||
| 4066 | spin_lock(&root->node_lock); | ||
| 4067 | used = btrfs_root_used(&root->root_item) - blocksize; | ||
| 4068 | btrfs_set_root_used(&root->root_item, used); | ||
| 4069 | spin_unlock(&root->node_lock); | ||
| 4070 | |||
| 4071 | return btrfs_free_extent(trans, root, bytenr, blocksize, | ||
| 4072 | parent, root_objectid, level, 0); | ||
| 4073 | } | ||
| 4074 | |||
| 4075 | static u64 stripe_align(struct btrfs_root *root, u64 val) | 4453 | static u64 stripe_align(struct btrfs_root *root, u64 val) |
| 4076 | { | 4454 | { |
| 4077 | u64 mask = ((u64)root->stripesize - 1); | 4455 | u64 mask = ((u64)root->stripesize - 1); |
| @@ -4124,6 +4502,22 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache) | |||
| 4124 | return 0; | 4502 | return 0; |
| 4125 | } | 4503 | } |
| 4126 | 4504 | ||
| 4505 | static int get_block_group_index(struct btrfs_block_group_cache *cache) | ||
| 4506 | { | ||
| 4507 | int index; | ||
| 4508 | if (cache->flags & BTRFS_BLOCK_GROUP_RAID10) | ||
| 4509 | index = 0; | ||
| 4510 | else if (cache->flags & BTRFS_BLOCK_GROUP_RAID1) | ||
| 4511 | index = 1; | ||
| 4512 | else if (cache->flags & BTRFS_BLOCK_GROUP_DUP) | ||
| 4513 | index = 2; | ||
| 4514 | else if (cache->flags & BTRFS_BLOCK_GROUP_RAID0) | ||
| 4515 | index = 3; | ||
| 4516 | else | ||
| 4517 | index = 4; | ||
| 4518 | return index; | ||
| 4519 | } | ||
| 4520 | |||
| 4127 | enum btrfs_loop_type { | 4521 | enum btrfs_loop_type { |
| 4128 | LOOP_FIND_IDEAL = 0, | 4522 | LOOP_FIND_IDEAL = 0, |
| 4129 | LOOP_CACHING_NOWAIT = 1, | 4523 | LOOP_CACHING_NOWAIT = 1, |
| @@ -4145,7 +4539,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
| 4145 | u64 num_bytes, u64 empty_size, | 4539 | u64 num_bytes, u64 empty_size, |
| 4146 | u64 search_start, u64 search_end, | 4540 | u64 search_start, u64 search_end, |
| 4147 | u64 hint_byte, struct btrfs_key *ins, | 4541 | u64 hint_byte, struct btrfs_key *ins, |
| 4148 | u64 exclude_start, u64 exclude_nr, | ||
| 4149 | int data) | 4542 | int data) |
| 4150 | { | 4543 | { |
| 4151 | int ret = 0; | 4544 | int ret = 0; |
| @@ -4158,6 +4551,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
| 4158 | struct btrfs_space_info *space_info; | 4551 | struct btrfs_space_info *space_info; |
| 4159 | int last_ptr_loop = 0; | 4552 | int last_ptr_loop = 0; |
| 4160 | int loop = 0; | 4553 | int loop = 0; |
| 4554 | int index = 0; | ||
| 4161 | bool found_uncached_bg = false; | 4555 | bool found_uncached_bg = false; |
| 4162 | bool failed_cluster_refill = false; | 4556 | bool failed_cluster_refill = false; |
| 4163 | bool failed_alloc = false; | 4557 | bool failed_alloc = false; |
| @@ -4170,6 +4564,10 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
| 4170 | ins->offset = 0; | 4564 | ins->offset = 0; |
| 4171 | 4565 | ||
| 4172 | space_info = __find_space_info(root->fs_info, data); | 4566 | space_info = __find_space_info(root->fs_info, data); |
| 4567 | if (!space_info) { | ||
| 4568 | printk(KERN_ERR "No space info for %d\n", data); | ||
| 4569 | return -ENOSPC; | ||
| 4570 | } | ||
| 4173 | 4571 | ||
| 4174 | if (orig_root->ref_cows || empty_size) | 4572 | if (orig_root->ref_cows || empty_size) |
| 4175 | allowed_chunk_alloc = 1; | 4573 | allowed_chunk_alloc = 1; |
| @@ -4223,6 +4621,7 @@ ideal_cache: | |||
| 4223 | btrfs_put_block_group(block_group); | 4621 | btrfs_put_block_group(block_group); |
| 4224 | up_read(&space_info->groups_sem); | 4622 | up_read(&space_info->groups_sem); |
| 4225 | } else { | 4623 | } else { |
| 4624 | index = get_block_group_index(block_group); | ||
| 4226 | goto have_block_group; | 4625 | goto have_block_group; |
| 4227 | } | 4626 | } |
| 4228 | } else if (block_group) { | 4627 | } else if (block_group) { |
| @@ -4231,7 +4630,8 @@ ideal_cache: | |||
| 4231 | } | 4630 | } |
| 4232 | search: | 4631 | search: |
| 4233 | down_read(&space_info->groups_sem); | 4632 | down_read(&space_info->groups_sem); |
| 4234 | list_for_each_entry(block_group, &space_info->block_groups, list) { | 4633 | list_for_each_entry(block_group, &space_info->block_groups[index], |
| 4634 | list) { | ||
| 4235 | u64 offset; | 4635 | u64 offset; |
| 4236 | int cached; | 4636 | int cached; |
| 4237 | 4637 | ||
| @@ -4422,23 +4822,22 @@ checks: | |||
| 4422 | goto loop; | 4822 | goto loop; |
| 4423 | } | 4823 | } |
| 4424 | 4824 | ||
| 4425 | if (exclude_nr > 0 && | 4825 | ins->objectid = search_start; |
| 4426 | (search_start + num_bytes > exclude_start && | 4826 | ins->offset = num_bytes; |
| 4427 | search_start < exclude_start + exclude_nr)) { | ||
| 4428 | search_start = exclude_start + exclude_nr; | ||
| 4429 | 4827 | ||
| 4828 | if (offset < search_start) | ||
| 4829 | btrfs_add_free_space(block_group, offset, | ||
| 4830 | search_start - offset); | ||
| 4831 | BUG_ON(offset > search_start); | ||
| 4832 | |||
| 4833 | ret = update_reserved_bytes(block_group, num_bytes, 1, | ||
| 4834 | (data & BTRFS_BLOCK_GROUP_DATA)); | ||
| 4835 | if (ret == -EAGAIN) { | ||
| 4430 | btrfs_add_free_space(block_group, offset, num_bytes); | 4836 | btrfs_add_free_space(block_group, offset, num_bytes); |
| 4431 | /* | ||
| 4432 | * if search_start is still in this block group | ||
| 4433 | * then we just re-search this block group | ||
| 4434 | */ | ||
| 4435 | if (search_start >= block_group->key.objectid && | ||
| 4436 | search_start < (block_group->key.objectid + | ||
| 4437 | block_group->key.offset)) | ||
| 4438 | goto have_block_group; | ||
| 4439 | goto loop; | 4837 | goto loop; |
| 4440 | } | 4838 | } |
| 4441 | 4839 | ||
| 4840 | /* we are all good, lets return */ | ||
| 4442 | ins->objectid = search_start; | 4841 | ins->objectid = search_start; |
| 4443 | ins->offset = num_bytes; | 4842 | ins->offset = num_bytes; |
| 4444 | 4843 | ||
| @@ -4446,18 +4845,18 @@ checks: | |||
| 4446 | btrfs_add_free_space(block_group, offset, | 4845 | btrfs_add_free_space(block_group, offset, |
| 4447 | search_start - offset); | 4846 | search_start - offset); |
| 4448 | BUG_ON(offset > search_start); | 4847 | BUG_ON(offset > search_start); |
| 4449 | |||
| 4450 | update_reserved_extents(block_group, num_bytes, 1); | ||
| 4451 | |||
| 4452 | /* we are all good, lets return */ | ||
| 4453 | break; | 4848 | break; |
| 4454 | loop: | 4849 | loop: |
| 4455 | failed_cluster_refill = false; | 4850 | failed_cluster_refill = false; |
| 4456 | failed_alloc = false; | 4851 | failed_alloc = false; |
| 4852 | BUG_ON(index != get_block_group_index(block_group)); | ||
| 4457 | btrfs_put_block_group(block_group); | 4853 | btrfs_put_block_group(block_group); |
| 4458 | } | 4854 | } |
| 4459 | up_read(&space_info->groups_sem); | 4855 | up_read(&space_info->groups_sem); |
| 4460 | 4856 | ||
| 4857 | if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES) | ||
| 4858 | goto search; | ||
| 4859 | |||
| 4461 | /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for | 4860 | /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for |
| 4462 | * for them to make caching progress. Also | 4861 | * for them to make caching progress. Also |
| 4463 | * determine the best possible bg to cache | 4862 | * determine the best possible bg to cache |
| @@ -4471,6 +4870,7 @@ loop: | |||
| 4471 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && | 4870 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && |
| 4472 | (found_uncached_bg || empty_size || empty_cluster || | 4871 | (found_uncached_bg || empty_size || empty_cluster || |
| 4473 | allowed_chunk_alloc)) { | 4872 | allowed_chunk_alloc)) { |
| 4873 | index = 0; | ||
| 4474 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { | 4874 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { |
| 4475 | found_uncached_bg = false; | 4875 | found_uncached_bg = false; |
| 4476 | loop++; | 4876 | loop++; |
| @@ -4553,31 +4953,30 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | |||
| 4553 | int dump_block_groups) | 4953 | int dump_block_groups) |
| 4554 | { | 4954 | { |
| 4555 | struct btrfs_block_group_cache *cache; | 4955 | struct btrfs_block_group_cache *cache; |
| 4956 | int index = 0; | ||
| 4556 | 4957 | ||
| 4557 | spin_lock(&info->lock); | 4958 | spin_lock(&info->lock); |
| 4558 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", | 4959 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", |
| 4559 | (unsigned long long)(info->total_bytes - info->bytes_used - | 4960 | (unsigned long long)(info->total_bytes - info->bytes_used - |
| 4560 | info->bytes_pinned - info->bytes_reserved - | 4961 | info->bytes_pinned - info->bytes_reserved - |
| 4561 | info->bytes_super), | 4962 | info->bytes_readonly), |
| 4562 | (info->full) ? "" : "not "); | 4963 | (info->full) ? "" : "not "); |
| 4563 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," | 4964 | printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, " |
| 4564 | " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu" | 4965 | "reserved=%llu, may_use=%llu, readonly=%llu\n", |
| 4565 | "\n", | ||
| 4566 | (unsigned long long)info->total_bytes, | 4966 | (unsigned long long)info->total_bytes, |
| 4967 | (unsigned long long)info->bytes_used, | ||
| 4567 | (unsigned long long)info->bytes_pinned, | 4968 | (unsigned long long)info->bytes_pinned, |
| 4568 | (unsigned long long)info->bytes_delalloc, | 4969 | (unsigned long long)info->bytes_reserved, |
| 4569 | (unsigned long long)info->bytes_may_use, | 4970 | (unsigned long long)info->bytes_may_use, |
| 4570 | (unsigned long long)info->bytes_used, | 4971 | (unsigned long long)info->bytes_readonly); |
| 4571 | (unsigned long long)info->bytes_root, | ||
| 4572 | (unsigned long long)info->bytes_super, | ||
| 4573 | (unsigned long long)info->bytes_reserved); | ||
| 4574 | spin_unlock(&info->lock); | 4972 | spin_unlock(&info->lock); |
| 4575 | 4973 | ||
| 4576 | if (!dump_block_groups) | 4974 | if (!dump_block_groups) |
| 4577 | return; | 4975 | return; |
| 4578 | 4976 | ||
| 4579 | down_read(&info->groups_sem); | 4977 | down_read(&info->groups_sem); |
| 4580 | list_for_each_entry(cache, &info->block_groups, list) { | 4978 | again: |
| 4979 | list_for_each_entry(cache, &info->block_groups[index], list) { | ||
| 4581 | spin_lock(&cache->lock); | 4980 | spin_lock(&cache->lock); |
| 4582 | printk(KERN_INFO "block group %llu has %llu bytes, %llu used " | 4981 | printk(KERN_INFO "block group %llu has %llu bytes, %llu used " |
| 4583 | "%llu pinned %llu reserved\n", | 4982 | "%llu pinned %llu reserved\n", |
| @@ -4589,6 +4988,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | |||
| 4589 | btrfs_dump_free_space(cache, bytes); | 4988 | btrfs_dump_free_space(cache, bytes); |
| 4590 | spin_unlock(&cache->lock); | 4989 | spin_unlock(&cache->lock); |
| 4591 | } | 4990 | } |
| 4991 | if (++index < BTRFS_NR_RAID_TYPES) | ||
| 4992 | goto again; | ||
| 4592 | up_read(&info->groups_sem); | 4993 | up_read(&info->groups_sem); |
| 4593 | } | 4994 | } |
| 4594 | 4995 | ||
| @@ -4614,9 +5015,8 @@ again: | |||
| 4614 | 5015 | ||
| 4615 | WARN_ON(num_bytes < root->sectorsize); | 5016 | WARN_ON(num_bytes < root->sectorsize); |
| 4616 | ret = find_free_extent(trans, root, num_bytes, empty_size, | 5017 | ret = find_free_extent(trans, root, num_bytes, empty_size, |
| 4617 | search_start, search_end, hint_byte, ins, | 5018 | search_start, search_end, hint_byte, |
| 4618 | trans->alloc_exclude_start, | 5019 | ins, data); |
| 4619 | trans->alloc_exclude_nr, data); | ||
| 4620 | 5020 | ||
| 4621 | if (ret == -ENOSPC && num_bytes > min_alloc_size) { | 5021 | if (ret == -ENOSPC && num_bytes > min_alloc_size) { |
| 4622 | num_bytes = num_bytes >> 1; | 5022 | num_bytes = num_bytes >> 1; |
| @@ -4654,7 +5054,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
| 4654 | ret = btrfs_discard_extent(root, start, len); | 5054 | ret = btrfs_discard_extent(root, start, len); |
| 4655 | 5055 | ||
| 4656 | btrfs_add_free_space(cache, start, len); | 5056 | btrfs_add_free_space(cache, start, len); |
| 4657 | update_reserved_extents(cache, len, 0); | 5057 | update_reserved_bytes(cache, len, 0, 1); |
| 4658 | btrfs_put_block_group(cache); | 5058 | btrfs_put_block_group(cache); |
| 4659 | 5059 | ||
| 4660 | return ret; | 5060 | return ret; |
| @@ -4717,8 +5117,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
| 4717 | btrfs_mark_buffer_dirty(path->nodes[0]); | 5117 | btrfs_mark_buffer_dirty(path->nodes[0]); |
| 4718 | btrfs_free_path(path); | 5118 | btrfs_free_path(path); |
| 4719 | 5119 | ||
| 4720 | ret = update_block_group(trans, root, ins->objectid, ins->offset, | 5120 | ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); |
| 4721 | 1, 0); | ||
| 4722 | if (ret) { | 5121 | if (ret) { |
| 4723 | printk(KERN_ERR "btrfs update block group failed for %llu " | 5122 | printk(KERN_ERR "btrfs update block group failed for %llu " |
| 4724 | "%llu\n", (unsigned long long)ins->objectid, | 5123 | "%llu\n", (unsigned long long)ins->objectid, |
| @@ -4778,8 +5177,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
| 4778 | btrfs_mark_buffer_dirty(leaf); | 5177 | btrfs_mark_buffer_dirty(leaf); |
| 4779 | btrfs_free_path(path); | 5178 | btrfs_free_path(path); |
| 4780 | 5179 | ||
| 4781 | ret = update_block_group(trans, root, ins->objectid, ins->offset, | 5180 | ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); |
| 4782 | 1, 0); | ||
| 4783 | if (ret) { | 5181 | if (ret) { |
| 4784 | printk(KERN_ERR "btrfs update block group failed for %llu " | 5182 | printk(KERN_ERR "btrfs update block group failed for %llu " |
| 4785 | "%llu\n", (unsigned long long)ins->objectid, | 5183 | "%llu\n", (unsigned long long)ins->objectid, |
| @@ -4855,73 +5253,14 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
| 4855 | put_caching_control(caching_ctl); | 5253 | put_caching_control(caching_ctl); |
| 4856 | } | 5254 | } |
| 4857 | 5255 | ||
| 4858 | update_reserved_extents(block_group, ins->offset, 1); | 5256 | ret = update_reserved_bytes(block_group, ins->offset, 1, 1); |
| 5257 | BUG_ON(ret); | ||
| 4859 | btrfs_put_block_group(block_group); | 5258 | btrfs_put_block_group(block_group); |
| 4860 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, | 5259 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
| 4861 | 0, owner, offset, ins, 1); | 5260 | 0, owner, offset, ins, 1); |
| 4862 | return ret; | 5261 | return ret; |
| 4863 | } | 5262 | } |
| 4864 | 5263 | ||
| 4865 | /* | ||
| 4866 | * finds a free extent and does all the dirty work required for allocation | ||
| 4867 | * returns the key for the extent through ins, and a tree buffer for | ||
| 4868 | * the first block of the extent through buf. | ||
| 4869 | * | ||
| 4870 | * returns 0 if everything worked, non-zero otherwise. | ||
| 4871 | */ | ||
| 4872 | static int alloc_tree_block(struct btrfs_trans_handle *trans, | ||
| 4873 | struct btrfs_root *root, | ||
| 4874 | u64 num_bytes, u64 parent, u64 root_objectid, | ||
| 4875 | struct btrfs_disk_key *key, int level, | ||
| 4876 | u64 empty_size, u64 hint_byte, u64 search_end, | ||
| 4877 | struct btrfs_key *ins) | ||
| 4878 | { | ||
| 4879 | int ret; | ||
| 4880 | u64 flags = 0; | ||
| 4881 | |||
| 4882 | ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes, | ||
| 4883 | empty_size, hint_byte, search_end, | ||
| 4884 | ins, 0); | ||
| 4885 | if (ret) | ||
| 4886 | return ret; | ||
| 4887 | |||
| 4888 | if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { | ||
| 4889 | if (parent == 0) | ||
| 4890 | parent = ins->objectid; | ||
| 4891 | flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
| 4892 | } else | ||
| 4893 | BUG_ON(parent > 0); | ||
| 4894 | |||
| 4895 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
| 4896 | struct btrfs_delayed_extent_op *extent_op; | ||
| 4897 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | ||
| 4898 | BUG_ON(!extent_op); | ||
| 4899 | if (key) | ||
| 4900 | memcpy(&extent_op->key, key, sizeof(extent_op->key)); | ||
| 4901 | else | ||
| 4902 | memset(&extent_op->key, 0, sizeof(extent_op->key)); | ||
| 4903 | extent_op->flags_to_set = flags; | ||
| 4904 | extent_op->update_key = 1; | ||
| 4905 | extent_op->update_flags = 1; | ||
| 4906 | extent_op->is_data = 0; | ||
| 4907 | |||
| 4908 | ret = btrfs_add_delayed_tree_ref(trans, ins->objectid, | ||
| 4909 | ins->offset, parent, root_objectid, | ||
| 4910 | level, BTRFS_ADD_DELAYED_EXTENT, | ||
| 4911 | extent_op); | ||
| 4912 | BUG_ON(ret); | ||
| 4913 | } | ||
| 4914 | |||
| 4915 | if (root_objectid == root->root_key.objectid) { | ||
| 4916 | u64 used; | ||
| 4917 | spin_lock(&root->node_lock); | ||
| 4918 | used = btrfs_root_used(&root->root_item) + num_bytes; | ||
| 4919 | btrfs_set_root_used(&root->root_item, used); | ||
| 4920 | spin_unlock(&root->node_lock); | ||
| 4921 | } | ||
| 4922 | return ret; | ||
| 4923 | } | ||
| 4924 | |||
| 4925 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | 5264 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, |
| 4926 | struct btrfs_root *root, | 5265 | struct btrfs_root *root, |
| 4927 | u64 bytenr, u32 blocksize, | 5266 | u64 bytenr, u32 blocksize, |
| @@ -4960,8 +5299,45 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | |||
| 4960 | return buf; | 5299 | return buf; |
| 4961 | } | 5300 | } |
| 4962 | 5301 | ||
| 5302 | static struct btrfs_block_rsv * | ||
| 5303 | use_block_rsv(struct btrfs_trans_handle *trans, | ||
| 5304 | struct btrfs_root *root, u32 blocksize) | ||
| 5305 | { | ||
| 5306 | struct btrfs_block_rsv *block_rsv; | ||
| 5307 | int ret; | ||
| 5308 | |||
| 5309 | block_rsv = get_block_rsv(trans, root); | ||
| 5310 | |||
| 5311 | if (block_rsv->size == 0) { | ||
| 5312 | ret = reserve_metadata_bytes(block_rsv, blocksize); | ||
| 5313 | if (ret) | ||
| 5314 | return ERR_PTR(ret); | ||
| 5315 | return block_rsv; | ||
| 5316 | } | ||
| 5317 | |||
| 5318 | ret = block_rsv_use_bytes(block_rsv, blocksize); | ||
| 5319 | if (!ret) | ||
| 5320 | return block_rsv; | ||
| 5321 | |||
| 5322 | WARN_ON(1); | ||
| 5323 | printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n", | ||
| 5324 | block_rsv->size, block_rsv->reserved, | ||
| 5325 | block_rsv->freed[0], block_rsv->freed[1]); | ||
| 5326 | |||
| 5327 | return ERR_PTR(-ENOSPC); | ||
| 5328 | } | ||
| 5329 | |||
| 5330 | static void unuse_block_rsv(struct btrfs_block_rsv *block_rsv, u32 blocksize) | ||
| 5331 | { | ||
| 5332 | block_rsv_add_bytes(block_rsv, blocksize, 0); | ||
| 5333 | block_rsv_release_bytes(block_rsv, NULL, 0); | ||
| 5334 | } | ||
| 5335 | |||
| 4963 | /* | 5336 | /* |
| 4964 | * helper function to allocate a block for a given tree | 5337 | * finds a free extent and does all the dirty work required for allocation |
| 5338 | * returns the key for the extent through ins, and a tree buffer for | ||
| 5339 | * the first block of the extent through buf. | ||
| 5340 | * | ||
| 4965 | * returns the tree buffer or NULL. | 5341 | * returns the tree buffer or NULL. |
| 4966 | */ | 5342 | */ |
| 4967 | struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | 5343 | struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, |
| @@ -4971,18 +5347,53 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
| 4971 | u64 hint, u64 empty_size) | 5347 | u64 hint, u64 empty_size) |
| 4972 | { | 5348 | { |
| 4973 | struct btrfs_key ins; | 5349 | struct btrfs_key ins; |
| 4974 | int ret; | 5350 | struct btrfs_block_rsv *block_rsv; |
| 4975 | struct extent_buffer *buf; | 5351 | struct extent_buffer *buf; |
| 5352 | u64 flags = 0; | ||
| 5353 | int ret; | ||
| 4976 | 5354 | ||
| 4977 | ret = alloc_tree_block(trans, root, blocksize, parent, root_objectid, | 5355 | |
| 4978 | key, level, empty_size, hint, (u64)-1, &ins); | 5356 | block_rsv = use_block_rsv(trans, root, blocksize); |
| 5357 | if (IS_ERR(block_rsv)) | ||
| 5358 | return ERR_CAST(block_rsv); | ||
| 5359 | |||
| 5360 | ret = btrfs_reserve_extent(trans, root, blocksize, blocksize, | ||
| 5361 | empty_size, hint, (u64)-1, &ins, 0); | ||
| 4979 | if (ret) { | 5362 | if (ret) { |
| 4980 | BUG_ON(ret > 0); | 5363 | unuse_block_rsv(block_rsv, blocksize); |
| 4981 | return ERR_PTR(ret); | 5364 | return ERR_PTR(ret); |
| 4982 | } | 5365 | } |
| 4983 | 5366 | ||
| 4984 | buf = btrfs_init_new_buffer(trans, root, ins.objectid, | 5367 | buf = btrfs_init_new_buffer(trans, root, ins.objectid, |
| 4985 | blocksize, level); | 5368 | blocksize, level); |
| 5369 | BUG_ON(IS_ERR(buf)); | ||
| 5370 | |||
| 5371 | if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { | ||
| 5372 | if (parent == 0) | ||
| 5373 | parent = ins.objectid; | ||
| 5374 | flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
| 5375 | } else | ||
| 5376 | BUG_ON(parent > 0); | ||
| 5377 | |||
| 5378 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
| 5379 | struct btrfs_delayed_extent_op *extent_op; | ||
| 5380 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | ||
| 5381 | BUG_ON(!extent_op); | ||
| 5382 | if (key) | ||
| 5383 | memcpy(&extent_op->key, key, sizeof(extent_op->key)); | ||
| 5384 | else | ||
| 5385 | memset(&extent_op->key, 0, sizeof(extent_op->key)); | ||
| 5386 | extent_op->flags_to_set = flags; | ||
| 5387 | extent_op->update_key = 1; | ||
| 5388 | extent_op->update_flags = 1; | ||
| 5389 | extent_op->is_data = 0; | ||
| 5390 | |||
| 5391 | ret = btrfs_add_delayed_tree_ref(trans, ins.objectid, | ||
| 5392 | ins.offset, parent, root_objectid, | ||
| 5393 | level, BTRFS_ADD_DELAYED_EXTENT, | ||
| 5394 | extent_op); | ||
| 5395 | BUG_ON(ret); | ||
| 5396 | } | ||
| 4986 | return buf; | 5397 | return buf; |
| 4987 | } | 5398 | } |
| 4988 | 5399 | ||
| @@ -5205,6 +5616,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
| 5205 | next = btrfs_find_tree_block(root, bytenr, blocksize); | 5616 | next = btrfs_find_tree_block(root, bytenr, blocksize); |
| 5206 | if (!next) { | 5617 | if (!next) { |
| 5207 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); | 5618 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); |
| 5619 | if (!next) | ||
| 5620 | return -ENOMEM; | ||
| 5208 | reada = 1; | 5621 | reada = 1; |
| 5209 | } | 5622 | } |
| 5210 | btrfs_tree_lock(next); | 5623 | btrfs_tree_lock(next); |
| @@ -5305,7 +5718,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
| 5305 | struct btrfs_path *path, | 5718 | struct btrfs_path *path, |
| 5306 | struct walk_control *wc) | 5719 | struct walk_control *wc) |
| 5307 | { | 5720 | { |
| 5308 | int ret = 0; | 5721 | int ret; |
| 5309 | int level = wc->level; | 5722 | int level = wc->level; |
| 5310 | struct extent_buffer *eb = path->nodes[level]; | 5723 | struct extent_buffer *eb = path->nodes[level]; |
| 5311 | u64 parent = 0; | 5724 | u64 parent = 0; |
| @@ -5383,13 +5796,11 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
| 5383 | btrfs_header_owner(path->nodes[level + 1])); | 5796 | btrfs_header_owner(path->nodes[level + 1])); |
| 5384 | } | 5797 | } |
| 5385 | 5798 | ||
| 5386 | ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent, | 5799 | btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); |
| 5387 | root->root_key.objectid, level, 0); | ||
| 5388 | BUG_ON(ret); | ||
| 5389 | out: | 5800 | out: |
| 5390 | wc->refs[level] = 0; | 5801 | wc->refs[level] = 0; |
| 5391 | wc->flags[level] = 0; | 5802 | wc->flags[level] = 0; |
| 5392 | return ret; | 5803 | return 0; |
| 5393 | } | 5804 | } |
| 5394 | 5805 | ||
| 5395 | static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | 5806 | static noinline int walk_down_tree(struct btrfs_trans_handle *trans, |
| @@ -5417,7 +5828,8 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
| 5417 | if (ret > 0) { | 5828 | if (ret > 0) { |
| 5418 | path->slots[level]++; | 5829 | path->slots[level]++; |
| 5419 | continue; | 5830 | continue; |
| 5420 | } | 5831 | } else if (ret < 0) |
| 5832 | return ret; | ||
| 5421 | level = wc->level; | 5833 | level = wc->level; |
| 5422 | } | 5834 | } |
| 5423 | return 0; | 5835 | return 0; |
| @@ -5466,7 +5878,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, | |||
| 5466 | * also make sure backrefs for the shared block and all lower level | 5878 | * also make sure backrefs for the shared block and all lower level |
| 5467 | * blocks are properly updated. | 5879 | * blocks are properly updated. |
| 5468 | */ | 5880 | */ |
| 5469 | int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | 5881 | int btrfs_drop_snapshot(struct btrfs_root *root, |
| 5882 | struct btrfs_block_rsv *block_rsv, int update_ref) | ||
| 5470 | { | 5883 | { |
| 5471 | struct btrfs_path *path; | 5884 | struct btrfs_path *path; |
| 5472 | struct btrfs_trans_handle *trans; | 5885 | struct btrfs_trans_handle *trans; |
| @@ -5484,7 +5897,9 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
| 5484 | wc = kzalloc(sizeof(*wc), GFP_NOFS); | 5897 | wc = kzalloc(sizeof(*wc), GFP_NOFS); |
| 5485 | BUG_ON(!wc); | 5898 | BUG_ON(!wc); |
| 5486 | 5899 | ||
| 5487 | trans = btrfs_start_transaction(tree_root, 1); | 5900 | trans = btrfs_start_transaction(tree_root, 0); |
| 5901 | if (block_rsv) | ||
| 5902 | trans->block_rsv = block_rsv; | ||
| 5488 | 5903 | ||
| 5489 | if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { | 5904 | if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { |
| 5490 | level = btrfs_header_level(root->node); | 5905 | level = btrfs_header_level(root->node); |
| @@ -5572,22 +5987,16 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
| 5572 | } | 5987 | } |
| 5573 | 5988 | ||
| 5574 | BUG_ON(wc->level == 0); | 5989 | BUG_ON(wc->level == 0); |
| 5575 | if (trans->transaction->in_commit || | 5990 | if (btrfs_should_end_transaction(trans, tree_root)) { |
| 5576 | trans->transaction->delayed_refs.flushing) { | ||
| 5577 | ret = btrfs_update_root(trans, tree_root, | 5991 | ret = btrfs_update_root(trans, tree_root, |
| 5578 | &root->root_key, | 5992 | &root->root_key, |
| 5579 | root_item); | 5993 | root_item); |
| 5580 | BUG_ON(ret); | 5994 | BUG_ON(ret); |
| 5581 | 5995 | ||
| 5582 | btrfs_end_transaction(trans, tree_root); | 5996 | btrfs_end_transaction_throttle(trans, tree_root); |
| 5583 | trans = btrfs_start_transaction(tree_root, 1); | 5997 | trans = btrfs_start_transaction(tree_root, 0); |
| 5584 | } else { | 5998 | if (block_rsv) |
| 5585 | unsigned long update; | 5999 | trans->block_rsv = block_rsv; |
| 5586 | update = trans->delayed_ref_updates; | ||
| 5587 | trans->delayed_ref_updates = 0; | ||
| 5588 | if (update) | ||
| 5589 | btrfs_run_delayed_refs(trans, tree_root, | ||
| 5590 | update); | ||
| 5591 | } | 6000 | } |
| 5592 | } | 6001 | } |
| 5593 | btrfs_release_path(root, path); | 6002 | btrfs_release_path(root, path); |
| @@ -5615,7 +6024,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
| 5615 | kfree(root); | 6024 | kfree(root); |
| 5616 | } | 6025 | } |
| 5617 | out: | 6026 | out: |
| 5618 | btrfs_end_transaction(trans, tree_root); | 6027 | btrfs_end_transaction_throttle(trans, tree_root); |
| 5619 | kfree(wc); | 6028 | kfree(wc); |
| 5620 | btrfs_free_path(path); | 6029 | btrfs_free_path(path); |
| 5621 | return err; | 6030 | return err; |
| @@ -7211,48 +7620,80 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) | |||
| 7211 | return flags; | 7620 | return flags; |
| 7212 | } | 7621 | } |
| 7213 | 7622 | ||
| 7214 | static int __alloc_chunk_for_shrink(struct btrfs_root *root, | 7623 | static int set_block_group_ro(struct btrfs_block_group_cache *cache) |
| 7215 | struct btrfs_block_group_cache *shrink_block_group, | ||
| 7216 | int force) | ||
| 7217 | { | 7624 | { |
| 7218 | struct btrfs_trans_handle *trans; | 7625 | struct btrfs_space_info *sinfo = cache->space_info; |
| 7219 | u64 new_alloc_flags; | 7626 | u64 num_bytes; |
| 7220 | u64 calc; | 7627 | int ret = -ENOSPC; |
| 7221 | 7628 | ||
| 7222 | spin_lock(&shrink_block_group->lock); | 7629 | if (cache->ro) |
| 7223 | if (btrfs_block_group_used(&shrink_block_group->item) + | 7630 | return 0; |
| 7224 | shrink_block_group->reserved > 0) { | ||
| 7225 | spin_unlock(&shrink_block_group->lock); | ||
| 7226 | 7631 | ||
| 7227 | trans = btrfs_start_transaction(root, 1); | 7632 | spin_lock(&sinfo->lock); |
| 7228 | spin_lock(&shrink_block_group->lock); | 7633 | spin_lock(&cache->lock); |
| 7634 | num_bytes = cache->key.offset - cache->reserved - cache->pinned - | ||
| 7635 | cache->bytes_super - btrfs_block_group_used(&cache->item); | ||
| 7636 | |||
| 7637 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + | ||
| 7638 | sinfo->bytes_may_use + sinfo->bytes_readonly + | ||
| 7639 | cache->reserved_pinned + num_bytes < sinfo->total_bytes) { | ||
| 7640 | sinfo->bytes_readonly += num_bytes; | ||
| 7641 | sinfo->bytes_reserved += cache->reserved_pinned; | ||
| 7642 | cache->reserved_pinned = 0; | ||
| 7643 | cache->ro = 1; | ||
| 7644 | ret = 0; | ||
| 7645 | } | ||
| 7646 | spin_unlock(&cache->lock); | ||
| 7647 | spin_unlock(&sinfo->lock); | ||
| 7648 | return ret; | ||
| 7649 | } | ||
| 7229 | 7650 | ||
| 7230 | new_alloc_flags = update_block_group_flags(root, | 7651 | int btrfs_set_block_group_ro(struct btrfs_root *root, |
| 7231 | shrink_block_group->flags); | 7652 | struct btrfs_block_group_cache *cache) |
| 7232 | if (new_alloc_flags != shrink_block_group->flags) { | ||
| 7233 | calc = | ||
| 7234 | btrfs_block_group_used(&shrink_block_group->item); | ||
| 7235 | } else { | ||
| 7236 | calc = shrink_block_group->key.offset; | ||
| 7237 | } | ||
| 7238 | spin_unlock(&shrink_block_group->lock); | ||
| 7239 | 7653 | ||
| 7240 | do_chunk_alloc(trans, root->fs_info->extent_root, | 7654 | { |
| 7241 | calc + 2 * 1024 * 1024, new_alloc_flags, force); | 7655 | struct btrfs_trans_handle *trans; |
| 7656 | u64 alloc_flags; | ||
| 7657 | int ret; | ||
| 7242 | 7658 | ||
| 7243 | btrfs_end_transaction(trans, root); | 7659 | BUG_ON(cache->ro); |
| 7244 | } else | ||
| 7245 | spin_unlock(&shrink_block_group->lock); | ||
| 7246 | return 0; | ||
| 7247 | } | ||
| 7248 | 7660 | ||
| 7661 | trans = btrfs_join_transaction(root, 1); | ||
| 7662 | BUG_ON(IS_ERR(trans)); | ||
| 7249 | 7663 | ||
| 7250 | int btrfs_prepare_block_group_relocation(struct btrfs_root *root, | 7664 | alloc_flags = update_block_group_flags(root, cache->flags); |
| 7251 | struct btrfs_block_group_cache *group) | 7665 | if (alloc_flags != cache->flags) |
| 7666 | do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); | ||
| 7667 | |||
| 7668 | ret = set_block_group_ro(cache); | ||
| 7669 | if (!ret) | ||
| 7670 | goto out; | ||
| 7671 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); | ||
| 7672 | ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); | ||
| 7673 | if (ret < 0) | ||
| 7674 | goto out; | ||
| 7675 | ret = set_block_group_ro(cache); | ||
| 7676 | out: | ||
| 7677 | btrfs_end_transaction(trans, root); | ||
| 7678 | return ret; | ||
| 7679 | } | ||
| 7252 | 7680 | ||
| 7681 | int btrfs_set_block_group_rw(struct btrfs_root *root, | ||
| 7682 | struct btrfs_block_group_cache *cache) | ||
| 7253 | { | 7683 | { |
| 7254 | __alloc_chunk_for_shrink(root, group, 1); | 7684 | struct btrfs_space_info *sinfo = cache->space_info; |
| 7255 | set_block_group_readonly(group); | 7685 | u64 num_bytes; |
| 7686 | |||
| 7687 | BUG_ON(!cache->ro); | ||
| 7688 | |||
| 7689 | spin_lock(&sinfo->lock); | ||
| 7690 | spin_lock(&cache->lock); | ||
| 7691 | num_bytes = cache->key.offset - cache->reserved - cache->pinned - | ||
| 7692 | cache->bytes_super - btrfs_block_group_used(&cache->item); | ||
| 7693 | sinfo->bytes_readonly -= num_bytes; | ||
| 7694 | cache->ro = 0; | ||
| 7695 | spin_unlock(&cache->lock); | ||
| 7696 | spin_unlock(&sinfo->lock); | ||
| 7256 | return 0; | 7697 | return 0; |
| 7257 | } | 7698 | } |
| 7258 | 7699 | ||
| @@ -7369,7 +7810,6 @@ static int find_first_block_group(struct btrfs_root *root, | |||
| 7369 | } | 7810 | } |
| 7370 | path->slots[0]++; | 7811 | path->slots[0]++; |
| 7371 | } | 7812 | } |
| 7372 | ret = -ENOENT; | ||
| 7373 | out: | 7813 | out: |
| 7374 | return ret; | 7814 | return ret; |
| 7375 | } | 7815 | } |
| @@ -7420,17 +7860,33 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
| 7420 | */ | 7860 | */ |
| 7421 | synchronize_rcu(); | 7861 | synchronize_rcu(); |
| 7422 | 7862 | ||
| 7863 | release_global_block_rsv(info); | ||
| 7864 | |||
| 7423 | while(!list_empty(&info->space_info)) { | 7865 | while(!list_empty(&info->space_info)) { |
| 7424 | space_info = list_entry(info->space_info.next, | 7866 | space_info = list_entry(info->space_info.next, |
| 7425 | struct btrfs_space_info, | 7867 | struct btrfs_space_info, |
| 7426 | list); | 7868 | list); |
| 7427 | 7869 | if (space_info->bytes_pinned > 0 || | |
| 7870 | space_info->bytes_reserved > 0) { | ||
| 7871 | WARN_ON(1); | ||
| 7872 | dump_space_info(space_info, 0, 0); | ||
| 7873 | } | ||
| 7428 | list_del(&space_info->list); | 7874 | list_del(&space_info->list); |
| 7429 | kfree(space_info); | 7875 | kfree(space_info); |
| 7430 | } | 7876 | } |
| 7431 | return 0; | 7877 | return 0; |
| 7432 | } | 7878 | } |
| 7433 | 7879 | ||
| 7880 | static void __link_block_group(struct btrfs_space_info *space_info, | ||
| 7881 | struct btrfs_block_group_cache *cache) | ||
| 7882 | { | ||
| 7883 | int index = get_block_group_index(cache); | ||
| 7884 | |||
| 7885 | down_write(&space_info->groups_sem); | ||
| 7886 | list_add_tail(&cache->list, &space_info->block_groups[index]); | ||
| 7887 | up_write(&space_info->groups_sem); | ||
| 7888 | } | ||
| 7889 | |||
| 7434 | int btrfs_read_block_groups(struct btrfs_root *root) | 7890 | int btrfs_read_block_groups(struct btrfs_root *root) |
| 7435 | { | 7891 | { |
| 7436 | struct btrfs_path *path; | 7892 | struct btrfs_path *path; |
| @@ -7452,10 +7908,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
| 7452 | 7908 | ||
| 7453 | while (1) { | 7909 | while (1) { |
| 7454 | ret = find_first_block_group(root, path, &key); | 7910 | ret = find_first_block_group(root, path, &key); |
| 7455 | if (ret > 0) { | 7911 | if (ret > 0) |
| 7456 | ret = 0; | 7912 | break; |
| 7457 | goto error; | ||
| 7458 | } | ||
| 7459 | if (ret != 0) | 7913 | if (ret != 0) |
| 7460 | goto error; | 7914 | goto error; |
| 7461 | 7915 | ||
| @@ -7464,7 +7918,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
| 7464 | cache = kzalloc(sizeof(*cache), GFP_NOFS); | 7918 | cache = kzalloc(sizeof(*cache), GFP_NOFS); |
| 7465 | if (!cache) { | 7919 | if (!cache) { |
| 7466 | ret = -ENOMEM; | 7920 | ret = -ENOMEM; |
| 7467 | break; | 7921 | goto error; |
| 7468 | } | 7922 | } |
| 7469 | 7923 | ||
| 7470 | atomic_set(&cache->count, 1); | 7924 | atomic_set(&cache->count, 1); |
| @@ -7521,20 +7975,36 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
| 7521 | BUG_ON(ret); | 7975 | BUG_ON(ret); |
| 7522 | cache->space_info = space_info; | 7976 | cache->space_info = space_info; |
| 7523 | spin_lock(&cache->space_info->lock); | 7977 | spin_lock(&cache->space_info->lock); |
| 7524 | cache->space_info->bytes_super += cache->bytes_super; | 7978 | cache->space_info->bytes_readonly += cache->bytes_super; |
| 7525 | spin_unlock(&cache->space_info->lock); | 7979 | spin_unlock(&cache->space_info->lock); |
| 7526 | 7980 | ||
| 7527 | down_write(&space_info->groups_sem); | 7981 | __link_block_group(space_info, cache); |
| 7528 | list_add_tail(&cache->list, &space_info->block_groups); | ||
| 7529 | up_write(&space_info->groups_sem); | ||
| 7530 | 7982 | ||
| 7531 | ret = btrfs_add_block_group_cache(root->fs_info, cache); | 7983 | ret = btrfs_add_block_group_cache(root->fs_info, cache); |
| 7532 | BUG_ON(ret); | 7984 | BUG_ON(ret); |
| 7533 | 7985 | ||
| 7534 | set_avail_alloc_bits(root->fs_info, cache->flags); | 7986 | set_avail_alloc_bits(root->fs_info, cache->flags); |
| 7535 | if (btrfs_chunk_readonly(root, cache->key.objectid)) | 7987 | if (btrfs_chunk_readonly(root, cache->key.objectid)) |
| 7536 | set_block_group_readonly(cache); | 7988 | set_block_group_ro(cache); |
| 7537 | } | 7989 | } |
| 7990 | |||
| 7991 | list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { | ||
| 7992 | if (!(get_alloc_profile(root, space_info->flags) & | ||
| 7993 | (BTRFS_BLOCK_GROUP_RAID10 | | ||
| 7994 | BTRFS_BLOCK_GROUP_RAID1 | | ||
| 7995 | BTRFS_BLOCK_GROUP_DUP))) | ||
| 7996 | continue; | ||
| 7997 | /* | ||
| 7998 | * avoid allocating from un-mirrored block group if there are | ||
| 7999 | * mirrored block groups. | ||
| 8000 | */ | ||
| 8001 | list_for_each_entry(cache, &space_info->block_groups[3], list) | ||
| 8002 | set_block_group_ro(cache); | ||
| 8003 | list_for_each_entry(cache, &space_info->block_groups[4], list) | ||
| 8004 | set_block_group_ro(cache); | ||
| 8005 | } | ||
| 8006 | |||
| 8007 | init_global_block_rsv(info); | ||
| 7538 | ret = 0; | 8008 | ret = 0; |
| 7539 | error: | 8009 | error: |
| 7540 | btrfs_free_path(path); | 8010 | btrfs_free_path(path); |
| @@ -7595,12 +8065,10 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
| 7595 | BUG_ON(ret); | 8065 | BUG_ON(ret); |
| 7596 | 8066 | ||
| 7597 | spin_lock(&cache->space_info->lock); | 8067 | spin_lock(&cache->space_info->lock); |
| 7598 | cache->space_info->bytes_super += cache->bytes_super; | 8068 | cache->space_info->bytes_readonly += cache->bytes_super; |
| 7599 | spin_unlock(&cache->space_info->lock); | 8069 | spin_unlock(&cache->space_info->lock); |
| 7600 | 8070 | ||
| 7601 | down_write(&cache->space_info->groups_sem); | 8071 | __link_block_group(cache->space_info, cache); |
| 7602 | list_add_tail(&cache->list, &cache->space_info->block_groups); | ||
| 7603 | up_write(&cache->space_info->groups_sem); | ||
| 7604 | 8072 | ||
| 7605 | ret = btrfs_add_block_group_cache(root->fs_info, cache); | 8073 | ret = btrfs_add_block_group_cache(root->fs_info, cache); |
| 7606 | BUG_ON(ret); | 8074 | BUG_ON(ret); |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index c99121ac5d6b..d74e6af9b53a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -2,7 +2,6 @@ | |||
| 2 | #include <linux/slab.h> | 2 | #include <linux/slab.h> |
| 3 | #include <linux/bio.h> | 3 | #include <linux/bio.h> |
| 4 | #include <linux/mm.h> | 4 | #include <linux/mm.h> |
| 5 | #include <linux/gfp.h> | ||
| 6 | #include <linux/pagemap.h> | 5 | #include <linux/pagemap.h> |
| 7 | #include <linux/page-flags.h> | 6 | #include <linux/page-flags.h> |
| 8 | #include <linux/module.h> | 7 | #include <linux/module.h> |
| @@ -136,7 +135,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask) | |||
| 136 | return state; | 135 | return state; |
| 137 | } | 136 | } |
| 138 | 137 | ||
| 139 | static void free_extent_state(struct extent_state *state) | 138 | void free_extent_state(struct extent_state *state) |
| 140 | { | 139 | { |
| 141 | if (!state) | 140 | if (!state) |
| 142 | return; | 141 | return; |
| @@ -336,21 +335,18 @@ static int merge_state(struct extent_io_tree *tree, | |||
| 336 | } | 335 | } |
| 337 | 336 | ||
| 338 | static int set_state_cb(struct extent_io_tree *tree, | 337 | static int set_state_cb(struct extent_io_tree *tree, |
| 339 | struct extent_state *state, | 338 | struct extent_state *state, int *bits) |
| 340 | unsigned long bits) | ||
| 341 | { | 339 | { |
| 342 | if (tree->ops && tree->ops->set_bit_hook) { | 340 | if (tree->ops && tree->ops->set_bit_hook) { |
| 343 | return tree->ops->set_bit_hook(tree->mapping->host, | 341 | return tree->ops->set_bit_hook(tree->mapping->host, |
| 344 | state->start, state->end, | 342 | state, bits); |
| 345 | state->state, bits); | ||
| 346 | } | 343 | } |
| 347 | 344 | ||
| 348 | return 0; | 345 | return 0; |
| 349 | } | 346 | } |
| 350 | 347 | ||
| 351 | static void clear_state_cb(struct extent_io_tree *tree, | 348 | static void clear_state_cb(struct extent_io_tree *tree, |
| 352 | struct extent_state *state, | 349 | struct extent_state *state, int *bits) |
| 353 | unsigned long bits) | ||
| 354 | { | 350 | { |
| 355 | if (tree->ops && tree->ops->clear_bit_hook) | 351 | if (tree->ops && tree->ops->clear_bit_hook) |
| 356 | tree->ops->clear_bit_hook(tree->mapping->host, state, bits); | 352 | tree->ops->clear_bit_hook(tree->mapping->host, state, bits); |
| @@ -368,9 +364,10 @@ static void clear_state_cb(struct extent_io_tree *tree, | |||
| 368 | */ | 364 | */ |
| 369 | static int insert_state(struct extent_io_tree *tree, | 365 | static int insert_state(struct extent_io_tree *tree, |
| 370 | struct extent_state *state, u64 start, u64 end, | 366 | struct extent_state *state, u64 start, u64 end, |
| 371 | int bits) | 367 | int *bits) |
| 372 | { | 368 | { |
| 373 | struct rb_node *node; | 369 | struct rb_node *node; |
| 370 | int bits_to_set = *bits & ~EXTENT_CTLBITS; | ||
| 374 | int ret; | 371 | int ret; |
| 375 | 372 | ||
| 376 | if (end < start) { | 373 | if (end < start) { |
| @@ -385,9 +382,9 @@ static int insert_state(struct extent_io_tree *tree, | |||
| 385 | if (ret) | 382 | if (ret) |
| 386 | return ret; | 383 | return ret; |
| 387 | 384 | ||
| 388 | if (bits & EXTENT_DIRTY) | 385 | if (bits_to_set & EXTENT_DIRTY) |
| 389 | tree->dirty_bytes += end - start + 1; | 386 | tree->dirty_bytes += end - start + 1; |
| 390 | state->state |= bits; | 387 | state->state |= bits_to_set; |
| 391 | node = tree_insert(&tree->state, end, &state->rb_node); | 388 | node = tree_insert(&tree->state, end, &state->rb_node); |
| 392 | if (node) { | 389 | if (node) { |
| 393 | struct extent_state *found; | 390 | struct extent_state *found; |
| @@ -457,13 +454,13 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, | |||
| 457 | * struct is freed and removed from the tree | 454 | * struct is freed and removed from the tree |
| 458 | */ | 455 | */ |
| 459 | static int clear_state_bit(struct extent_io_tree *tree, | 456 | static int clear_state_bit(struct extent_io_tree *tree, |
| 460 | struct extent_state *state, int bits, int wake, | 457 | struct extent_state *state, |
| 461 | int delete) | 458 | int *bits, int wake) |
| 462 | { | 459 | { |
| 463 | int bits_to_clear = bits & ~EXTENT_DO_ACCOUNTING; | 460 | int bits_to_clear = *bits & ~EXTENT_CTLBITS; |
| 464 | int ret = state->state & bits_to_clear; | 461 | int ret = state->state & bits_to_clear; |
| 465 | 462 | ||
| 466 | if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { | 463 | if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { |
| 467 | u64 range = state->end - state->start + 1; | 464 | u64 range = state->end - state->start + 1; |
| 468 | WARN_ON(range > tree->dirty_bytes); | 465 | WARN_ON(range > tree->dirty_bytes); |
| 469 | tree->dirty_bytes -= range; | 466 | tree->dirty_bytes -= range; |
| @@ -472,9 +469,8 @@ static int clear_state_bit(struct extent_io_tree *tree, | |||
| 472 | state->state &= ~bits_to_clear; | 469 | state->state &= ~bits_to_clear; |
| 473 | if (wake) | 470 | if (wake) |
| 474 | wake_up(&state->wq); | 471 | wake_up(&state->wq); |
| 475 | if (delete || state->state == 0) { | 472 | if (state->state == 0) { |
| 476 | if (state->tree) { | 473 | if (state->tree) { |
| 477 | clear_state_cb(tree, state, state->state); | ||
| 478 | rb_erase(&state->rb_node, &tree->state); | 474 | rb_erase(&state->rb_node, &tree->state); |
| 479 | state->tree = NULL; | 475 | state->tree = NULL; |
| 480 | free_extent_state(state); | 476 | free_extent_state(state); |
| @@ -515,6 +511,10 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 515 | int set = 0; | 511 | int set = 0; |
| 516 | int clear = 0; | 512 | int clear = 0; |
| 517 | 513 | ||
| 514 | if (delete) | ||
| 515 | bits |= ~EXTENT_CTLBITS; | ||
| 516 | bits |= EXTENT_FIRST_DELALLOC; | ||
| 517 | |||
| 518 | if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY)) | 518 | if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY)) |
| 519 | clear = 1; | 519 | clear = 1; |
| 520 | again: | 520 | again: |
| @@ -581,8 +581,7 @@ hit_next: | |||
| 581 | if (err) | 581 | if (err) |
| 582 | goto out; | 582 | goto out; |
| 583 | if (state->end <= end) { | 583 | if (state->end <= end) { |
| 584 | set |= clear_state_bit(tree, state, bits, wake, | 584 | set |= clear_state_bit(tree, state, &bits, wake); |
| 585 | delete); | ||
| 586 | if (last_end == (u64)-1) | 585 | if (last_end == (u64)-1) |
| 587 | goto out; | 586 | goto out; |
| 588 | start = last_end + 1; | 587 | start = last_end + 1; |
| @@ -603,7 +602,7 @@ hit_next: | |||
| 603 | if (wake) | 602 | if (wake) |
| 604 | wake_up(&state->wq); | 603 | wake_up(&state->wq); |
| 605 | 604 | ||
| 606 | set |= clear_state_bit(tree, prealloc, bits, wake, delete); | 605 | set |= clear_state_bit(tree, prealloc, &bits, wake); |
| 607 | 606 | ||
| 608 | prealloc = NULL; | 607 | prealloc = NULL; |
| 609 | goto out; | 608 | goto out; |
| @@ -614,7 +613,7 @@ hit_next: | |||
| 614 | else | 613 | else |
| 615 | next_node = NULL; | 614 | next_node = NULL; |
| 616 | 615 | ||
| 617 | set |= clear_state_bit(tree, state, bits, wake, delete); | 616 | set |= clear_state_bit(tree, state, &bits, wake); |
| 618 | if (last_end == (u64)-1) | 617 | if (last_end == (u64)-1) |
| 619 | goto out; | 618 | goto out; |
| 620 | start = last_end + 1; | 619 | start = last_end + 1; |
| @@ -707,19 +706,19 @@ out: | |||
| 707 | 706 | ||
| 708 | static int set_state_bits(struct extent_io_tree *tree, | 707 | static int set_state_bits(struct extent_io_tree *tree, |
| 709 | struct extent_state *state, | 708 | struct extent_state *state, |
| 710 | int bits) | 709 | int *bits) |
| 711 | { | 710 | { |
| 712 | int ret; | 711 | int ret; |
| 712 | int bits_to_set = *bits & ~EXTENT_CTLBITS; | ||
| 713 | 713 | ||
| 714 | ret = set_state_cb(tree, state, bits); | 714 | ret = set_state_cb(tree, state, bits); |
| 715 | if (ret) | 715 | if (ret) |
| 716 | return ret; | 716 | return ret; |
| 717 | 717 | if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { | |
| 718 | if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { | ||
| 719 | u64 range = state->end - state->start + 1; | 718 | u64 range = state->end - state->start + 1; |
| 720 | tree->dirty_bytes += range; | 719 | tree->dirty_bytes += range; |
| 721 | } | 720 | } |
| 722 | state->state |= bits; | 721 | state->state |= bits_to_set; |
| 723 | 722 | ||
| 724 | return 0; | 723 | return 0; |
| 725 | } | 724 | } |
| @@ -746,10 +745,9 @@ static void cache_state(struct extent_state *state, | |||
| 746 | * [start, end] is inclusive This takes the tree lock. | 745 | * [start, end] is inclusive This takes the tree lock. |
| 747 | */ | 746 | */ |
| 748 | 747 | ||
| 749 | static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 748 | int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| 750 | int bits, int exclusive_bits, u64 *failed_start, | 749 | int bits, int exclusive_bits, u64 *failed_start, |
| 751 | struct extent_state **cached_state, | 750 | struct extent_state **cached_state, gfp_t mask) |
| 752 | gfp_t mask) | ||
| 753 | { | 751 | { |
| 754 | struct extent_state *state; | 752 | struct extent_state *state; |
| 755 | struct extent_state *prealloc = NULL; | 753 | struct extent_state *prealloc = NULL; |
| @@ -758,6 +756,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 758 | u64 last_start; | 756 | u64 last_start; |
| 759 | u64 last_end; | 757 | u64 last_end; |
| 760 | 758 | ||
| 759 | bits |= EXTENT_FIRST_DELALLOC; | ||
| 761 | again: | 760 | again: |
| 762 | if (!prealloc && (mask & __GFP_WAIT)) { | 761 | if (!prealloc && (mask & __GFP_WAIT)) { |
| 763 | prealloc = alloc_extent_state(mask); | 762 | prealloc = alloc_extent_state(mask); |
| @@ -779,7 +778,7 @@ again: | |||
| 779 | */ | 778 | */ |
| 780 | node = tree_search(tree, start); | 779 | node = tree_search(tree, start); |
| 781 | if (!node) { | 780 | if (!node) { |
| 782 | err = insert_state(tree, prealloc, start, end, bits); | 781 | err = insert_state(tree, prealloc, start, end, &bits); |
| 783 | prealloc = NULL; | 782 | prealloc = NULL; |
| 784 | BUG_ON(err == -EEXIST); | 783 | BUG_ON(err == -EEXIST); |
| 785 | goto out; | 784 | goto out; |
| @@ -803,7 +802,7 @@ hit_next: | |||
| 803 | goto out; | 802 | goto out; |
| 804 | } | 803 | } |
| 805 | 804 | ||
| 806 | err = set_state_bits(tree, state, bits); | 805 | err = set_state_bits(tree, state, &bits); |
| 807 | if (err) | 806 | if (err) |
| 808 | goto out; | 807 | goto out; |
| 809 | 808 | ||
| @@ -853,7 +852,7 @@ hit_next: | |||
| 853 | if (err) | 852 | if (err) |
| 854 | goto out; | 853 | goto out; |
| 855 | if (state->end <= end) { | 854 | if (state->end <= end) { |
| 856 | err = set_state_bits(tree, state, bits); | 855 | err = set_state_bits(tree, state, &bits); |
| 857 | if (err) | 856 | if (err) |
| 858 | goto out; | 857 | goto out; |
| 859 | cache_state(state, cached_state); | 858 | cache_state(state, cached_state); |
| @@ -878,7 +877,7 @@ hit_next: | |||
| 878 | else | 877 | else |
| 879 | this_end = last_start - 1; | 878 | this_end = last_start - 1; |
| 880 | err = insert_state(tree, prealloc, start, this_end, | 879 | err = insert_state(tree, prealloc, start, this_end, |
| 881 | bits); | 880 | &bits); |
| 882 | BUG_ON(err == -EEXIST); | 881 | BUG_ON(err == -EEXIST); |
| 883 | if (err) { | 882 | if (err) { |
| 884 | prealloc = NULL; | 883 | prealloc = NULL; |
| @@ -904,7 +903,7 @@ hit_next: | |||
| 904 | err = split_state(tree, state, prealloc, end + 1); | 903 | err = split_state(tree, state, prealloc, end + 1); |
| 905 | BUG_ON(err == -EEXIST); | 904 | BUG_ON(err == -EEXIST); |
| 906 | 905 | ||
| 907 | err = set_state_bits(tree, prealloc, bits); | 906 | err = set_state_bits(tree, prealloc, &bits); |
| 908 | if (err) { | 907 | if (err) { |
| 909 | prealloc = NULL; | 908 | prealloc = NULL; |
| 910 | goto out; | 909 | goto out; |
| @@ -967,8 +966,7 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 967 | { | 966 | { |
| 968 | return clear_extent_bit(tree, start, end, | 967 | return clear_extent_bit(tree, start, end, |
| 969 | EXTENT_DIRTY | EXTENT_DELALLOC | | 968 | EXTENT_DIRTY | EXTENT_DELALLOC | |
| 970 | EXTENT_DO_ACCOUNTING, 0, 0, | 969 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask); |
| 971 | NULL, mask); | ||
| 972 | } | 970 | } |
| 973 | 971 | ||
| 974 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 972 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
| @@ -1436,9 +1434,6 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
| 1436 | if (op & EXTENT_CLEAR_DELALLOC) | 1434 | if (op & EXTENT_CLEAR_DELALLOC) |
| 1437 | clear_bits |= EXTENT_DELALLOC; | 1435 | clear_bits |= EXTENT_DELALLOC; |
| 1438 | 1436 | ||
| 1439 | if (op & EXTENT_CLEAR_ACCOUNTING) | ||
| 1440 | clear_bits |= EXTENT_DO_ACCOUNTING; | ||
| 1441 | |||
| 1442 | clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); | 1437 | clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); |
| 1443 | if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | | 1438 | if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | |
| 1444 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK | | 1439 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK | |
| @@ -1917,7 +1912,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num, | |||
| 1917 | 1912 | ||
| 1918 | if (tree->ops && tree->ops->submit_bio_hook) | 1913 | if (tree->ops && tree->ops->submit_bio_hook) |
| 1919 | tree->ops->submit_bio_hook(page->mapping->host, rw, bio, | 1914 | tree->ops->submit_bio_hook(page->mapping->host, rw, bio, |
| 1920 | mirror_num, bio_flags); | 1915 | mirror_num, bio_flags, start); |
| 1921 | else | 1916 | else |
| 1922 | submit_bio(rw, bio); | 1917 | submit_bio(rw, bio); |
| 1923 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) | 1918 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) |
| @@ -2021,6 +2016,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
| 2021 | sector_t sector; | 2016 | sector_t sector; |
| 2022 | struct extent_map *em; | 2017 | struct extent_map *em; |
| 2023 | struct block_device *bdev; | 2018 | struct block_device *bdev; |
| 2019 | struct btrfs_ordered_extent *ordered; | ||
| 2024 | int ret; | 2020 | int ret; |
| 2025 | int nr = 0; | 2021 | int nr = 0; |
| 2026 | size_t page_offset = 0; | 2022 | size_t page_offset = 0; |
| @@ -2032,7 +2028,15 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
| 2032 | set_page_extent_mapped(page); | 2028 | set_page_extent_mapped(page); |
| 2033 | 2029 | ||
| 2034 | end = page_end; | 2030 | end = page_end; |
| 2035 | lock_extent(tree, start, end, GFP_NOFS); | 2031 | while (1) { |
| 2032 | lock_extent(tree, start, end, GFP_NOFS); | ||
| 2033 | ordered = btrfs_lookup_ordered_extent(inode, start); | ||
| 2034 | if (!ordered) | ||
| 2035 | break; | ||
| 2036 | unlock_extent(tree, start, end, GFP_NOFS); | ||
| 2037 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
| 2038 | btrfs_put_ordered_extent(ordered); | ||
| 2039 | } | ||
| 2036 | 2040 | ||
| 2037 | if (page->index == last_byte >> PAGE_CACHE_SHIFT) { | 2041 | if (page->index == last_byte >> PAGE_CACHE_SHIFT) { |
| 2038 | char *userpage; | 2042 | char *userpage; |
| @@ -2590,7 +2594,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
| 2590 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | 2594 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, |
| 2591 | }; | 2595 | }; |
| 2592 | struct writeback_control wbc_writepages = { | 2596 | struct writeback_control wbc_writepages = { |
| 2593 | .bdi = wbc->bdi, | ||
| 2594 | .sync_mode = wbc->sync_mode, | 2597 | .sync_mode = wbc->sync_mode, |
| 2595 | .older_than_this = NULL, | 2598 | .older_than_this = NULL, |
| 2596 | .nr_to_write = 64, | 2599 | .nr_to_write = 64, |
| @@ -2624,7 +2627,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | |||
| 2624 | .sync_io = mode == WB_SYNC_ALL, | 2627 | .sync_io = mode == WB_SYNC_ALL, |
| 2625 | }; | 2628 | }; |
| 2626 | struct writeback_control wbc_writepages = { | 2629 | struct writeback_control wbc_writepages = { |
| 2627 | .bdi = inode->i_mapping->backing_dev_info, | ||
| 2628 | .sync_mode = mode, | 2630 | .sync_mode = mode, |
| 2629 | .older_than_this = NULL, | 2631 | .older_than_this = NULL, |
| 2630 | .nr_to_write = nr_pages * 2, | 2632 | .nr_to_write = nr_pages * 2, |
| @@ -2679,33 +2681,20 @@ int extent_readpages(struct extent_io_tree *tree, | |||
| 2679 | { | 2681 | { |
| 2680 | struct bio *bio = NULL; | 2682 | struct bio *bio = NULL; |
| 2681 | unsigned page_idx; | 2683 | unsigned page_idx; |
| 2682 | struct pagevec pvec; | ||
| 2683 | unsigned long bio_flags = 0; | 2684 | unsigned long bio_flags = 0; |
| 2684 | 2685 | ||
| 2685 | pagevec_init(&pvec, 0); | ||
| 2686 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { | 2686 | for (page_idx = 0; page_idx < nr_pages; page_idx++) { |
| 2687 | struct page *page = list_entry(pages->prev, struct page, lru); | 2687 | struct page *page = list_entry(pages->prev, struct page, lru); |
| 2688 | 2688 | ||
| 2689 | prefetchw(&page->flags); | 2689 | prefetchw(&page->flags); |
| 2690 | list_del(&page->lru); | 2690 | list_del(&page->lru); |
| 2691 | /* | 2691 | if (!add_to_page_cache_lru(page, mapping, |
| 2692 | * what we want to do here is call add_to_page_cache_lru, | ||
| 2693 | * but that isn't exported, so we reproduce it here | ||
| 2694 | */ | ||
| 2695 | if (!add_to_page_cache(page, mapping, | ||
| 2696 | page->index, GFP_KERNEL)) { | 2692 | page->index, GFP_KERNEL)) { |
| 2697 | |||
| 2698 | /* open coding of lru_cache_add, also not exported */ | ||
| 2699 | page_cache_get(page); | ||
| 2700 | if (!pagevec_add(&pvec, page)) | ||
| 2701 | __pagevec_lru_add_file(&pvec); | ||
| 2702 | __extent_read_full_page(tree, page, get_extent, | 2693 | __extent_read_full_page(tree, page, get_extent, |
| 2703 | &bio, 0, &bio_flags); | 2694 | &bio, 0, &bio_flags); |
| 2704 | } | 2695 | } |
| 2705 | page_cache_release(page); | 2696 | page_cache_release(page); |
| 2706 | } | 2697 | } |
| 2707 | if (pagevec_count(&pvec)) | ||
| 2708 | __pagevec_lru_add_file(&pvec); | ||
| 2709 | BUG_ON(!list_empty(pages)); | 2698 | BUG_ON(!list_empty(pages)); |
| 2710 | if (bio) | 2699 | if (bio) |
| 2711 | submit_one_bio(READ, bio, 0, bio_flags); | 2700 | submit_one_bio(READ, bio, 0, bio_flags); |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index bbab4813646f..5691c7b590da 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
| @@ -16,7 +16,9 @@ | |||
| 16 | #define EXTENT_BOUNDARY (1 << 9) | 16 | #define EXTENT_BOUNDARY (1 << 9) |
| 17 | #define EXTENT_NODATASUM (1 << 10) | 17 | #define EXTENT_NODATASUM (1 << 10) |
| 18 | #define EXTENT_DO_ACCOUNTING (1 << 11) | 18 | #define EXTENT_DO_ACCOUNTING (1 << 11) |
| 19 | #define EXTENT_FIRST_DELALLOC (1 << 12) | ||
| 19 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) | 20 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) |
| 21 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) | ||
| 20 | 22 | ||
| 21 | /* flags for bio submission */ | 23 | /* flags for bio submission */ |
| 22 | #define EXTENT_BIO_COMPRESSED 1 | 24 | #define EXTENT_BIO_COMPRESSED 1 |
| @@ -47,7 +49,7 @@ struct extent_state; | |||
| 47 | 49 | ||
| 48 | typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, | 50 | typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, |
| 49 | struct bio *bio, int mirror_num, | 51 | struct bio *bio, int mirror_num, |
| 50 | unsigned long bio_flags); | 52 | unsigned long bio_flags, u64 bio_offset); |
| 51 | struct extent_io_ops { | 53 | struct extent_io_ops { |
| 52 | int (*fill_delalloc)(struct inode *inode, struct page *locked_page, | 54 | int (*fill_delalloc)(struct inode *inode, struct page *locked_page, |
| 53 | u64 start, u64 end, int *page_started, | 55 | u64 start, u64 end, int *page_started, |
| @@ -69,10 +71,10 @@ struct extent_io_ops { | |||
| 69 | struct extent_state *state); | 71 | struct extent_state *state); |
| 70 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, | 72 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, |
| 71 | struct extent_state *state, int uptodate); | 73 | struct extent_state *state, int uptodate); |
| 72 | int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, | 74 | int (*set_bit_hook)(struct inode *inode, struct extent_state *state, |
| 73 | unsigned long old, unsigned long bits); | 75 | int *bits); |
| 74 | int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, | 76 | int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, |
| 75 | unsigned long bits); | 77 | int *bits); |
| 76 | int (*merge_extent_hook)(struct inode *inode, | 78 | int (*merge_extent_hook)(struct inode *inode, |
| 77 | struct extent_state *new, | 79 | struct extent_state *new, |
| 78 | struct extent_state *other); | 80 | struct extent_state *other); |
| @@ -176,6 +178,7 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
| 176 | u64 *start, u64 search_end, | 178 | u64 *start, u64 search_end, |
| 177 | u64 max_bytes, unsigned long bits); | 179 | u64 max_bytes, unsigned long bits); |
| 178 | 180 | ||
| 181 | void free_extent_state(struct extent_state *state); | ||
| 179 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | 182 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| 180 | int bits, int filled, struct extent_state *cached_state); | 183 | int bits, int filled, struct extent_state *cached_state); |
| 181 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 184 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
| @@ -185,6 +188,9 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 185 | gfp_t mask); | 188 | gfp_t mask); |
| 186 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 189 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
| 187 | int bits, gfp_t mask); | 190 | int bits, gfp_t mask); |
| 191 | int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 192 | int bits, int exclusive_bits, u64 *failed_start, | ||
| 193 | struct extent_state **cached_state, gfp_t mask); | ||
| 188 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 194 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
| 189 | gfp_t mask); | 195 | gfp_t mask); |
| 190 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 196 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 28d87ba60ce8..454ca52d6451 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
| @@ -1,5 +1,4 @@ | |||
| 1 | #include <linux/err.h> | 1 | #include <linux/err.h> |
| 2 | #include <linux/gfp.h> | ||
| 3 | #include <linux/slab.h> | 2 | #include <linux/slab.h> |
| 4 | #include <linux/module.h> | 3 | #include <linux/module.h> |
| 5 | #include <linux/spinlock.h> | 4 | #include <linux/spinlock.h> |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 9b99886562d0..a562a250ae77 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | */ | 17 | */ |
| 18 | 18 | ||
| 19 | #include <linux/bio.h> | 19 | #include <linux/bio.h> |
| 20 | #include <linux/slab.h> | ||
| 20 | #include <linux/pagemap.h> | 21 | #include <linux/pagemap.h> |
| 21 | #include <linux/highmem.h> | 22 | #include <linux/highmem.h> |
| 22 | #include "ctree.h" | 23 | #include "ctree.h" |
| @@ -148,13 +149,14 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, | |||
| 148 | } | 149 | } |
| 149 | 150 | ||
| 150 | 151 | ||
| 151 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | 152 | static int __btrfs_lookup_bio_sums(struct btrfs_root *root, |
| 152 | struct bio *bio, u32 *dst) | 153 | struct inode *inode, struct bio *bio, |
| 154 | u64 logical_offset, u32 *dst, int dio) | ||
| 153 | { | 155 | { |
| 154 | u32 sum; | 156 | u32 sum; |
| 155 | struct bio_vec *bvec = bio->bi_io_vec; | 157 | struct bio_vec *bvec = bio->bi_io_vec; |
| 156 | int bio_index = 0; | 158 | int bio_index = 0; |
| 157 | u64 offset; | 159 | u64 offset = 0; |
| 158 | u64 item_start_offset = 0; | 160 | u64 item_start_offset = 0; |
| 159 | u64 item_last_offset = 0; | 161 | u64 item_last_offset = 0; |
| 160 | u64 disk_bytenr; | 162 | u64 disk_bytenr; |
| @@ -173,8 +175,11 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | |||
| 173 | WARN_ON(bio->bi_vcnt <= 0); | 175 | WARN_ON(bio->bi_vcnt <= 0); |
| 174 | 176 | ||
| 175 | disk_bytenr = (u64)bio->bi_sector << 9; | 177 | disk_bytenr = (u64)bio->bi_sector << 9; |
| 178 | if (dio) | ||
| 179 | offset = logical_offset; | ||
| 176 | while (bio_index < bio->bi_vcnt) { | 180 | while (bio_index < bio->bi_vcnt) { |
| 177 | offset = page_offset(bvec->bv_page) + bvec->bv_offset; | 181 | if (!dio) |
| 182 | offset = page_offset(bvec->bv_page) + bvec->bv_offset; | ||
| 178 | ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum); | 183 | ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum); |
| 179 | if (ret == 0) | 184 | if (ret == 0) |
| 180 | goto found; | 185 | goto found; |
| @@ -237,6 +242,7 @@ found: | |||
| 237 | else | 242 | else |
| 238 | set_state_private(io_tree, offset, sum); | 243 | set_state_private(io_tree, offset, sum); |
| 239 | disk_bytenr += bvec->bv_len; | 244 | disk_bytenr += bvec->bv_len; |
| 245 | offset += bvec->bv_len; | ||
| 240 | bio_index++; | 246 | bio_index++; |
| 241 | bvec++; | 247 | bvec++; |
| 242 | } | 248 | } |
| @@ -244,6 +250,18 @@ found: | |||
| 244 | return 0; | 250 | return 0; |
| 245 | } | 251 | } |
| 246 | 252 | ||
| 253 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | ||
| 254 | struct bio *bio, u32 *dst) | ||
| 255 | { | ||
| 256 | return __btrfs_lookup_bio_sums(root, inode, bio, 0, dst, 0); | ||
| 257 | } | ||
| 258 | |||
| 259 | int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, | ||
| 260 | struct bio *bio, u64 offset, u32 *dst) | ||
| 261 | { | ||
| 262 | return __btrfs_lookup_bio_sums(root, inode, bio, offset, dst, 1); | ||
| 263 | } | ||
| 264 | |||
| 247 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | 265 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, |
| 248 | struct list_head *list) | 266 | struct list_head *list) |
| 249 | { | 267 | { |
| @@ -656,6 +674,9 @@ again: | |||
| 656 | goto found; | 674 | goto found; |
| 657 | } | 675 | } |
| 658 | ret = PTR_ERR(item); | 676 | ret = PTR_ERR(item); |
| 677 | if (ret != -EFBIG && ret != -ENOENT) | ||
| 678 | goto fail_unlock; | ||
| 679 | |||
| 659 | if (ret == -EFBIG) { | 680 | if (ret == -EFBIG) { |
| 660 | u32 item_size; | 681 | u32 item_size; |
| 661 | /* we found one, but it isn't big enough yet */ | 682 | /* we found one, but it isn't big enough yet */ |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index ee3323c7fc1c..e354c33df082 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
| @@ -28,6 +28,7 @@ | |||
| 28 | #include <linux/writeback.h> | 28 | #include <linux/writeback.h> |
| 29 | #include <linux/statfs.h> | 29 | #include <linux/statfs.h> |
| 30 | #include <linux/compat.h> | 30 | #include <linux/compat.h> |
| 31 | #include <linux/slab.h> | ||
| 31 | #include "ctree.h" | 32 | #include "ctree.h" |
| 32 | #include "disk-io.h" | 33 | #include "disk-io.h" |
| 33 | #include "transaction.h" | 34 | #include "transaction.h" |
| @@ -45,32 +46,42 @@ | |||
| 45 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | 46 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, |
| 46 | int write_bytes, | 47 | int write_bytes, |
| 47 | struct page **prepared_pages, | 48 | struct page **prepared_pages, |
| 48 | const char __user *buf) | 49 | struct iov_iter *i) |
| 49 | { | 50 | { |
| 50 | long page_fault = 0; | 51 | size_t copied; |
| 51 | int i; | 52 | int pg = 0; |
| 52 | int offset = pos & (PAGE_CACHE_SIZE - 1); | 53 | int offset = pos & (PAGE_CACHE_SIZE - 1); |
| 53 | 54 | ||
| 54 | for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) { | 55 | while (write_bytes > 0) { |
| 55 | size_t count = min_t(size_t, | 56 | size_t count = min_t(size_t, |
| 56 | PAGE_CACHE_SIZE - offset, write_bytes); | 57 | PAGE_CACHE_SIZE - offset, write_bytes); |
| 57 | struct page *page = prepared_pages[i]; | 58 | struct page *page = prepared_pages[pg]; |
| 58 | fault_in_pages_readable(buf, count); | 59 | again: |
| 60 | if (unlikely(iov_iter_fault_in_readable(i, count))) | ||
| 61 | return -EFAULT; | ||
| 59 | 62 | ||
| 60 | /* Copy data from userspace to the current page */ | 63 | /* Copy data from userspace to the current page */ |
| 61 | kmap(page); | 64 | copied = iov_iter_copy_from_user(page, i, offset, count); |
| 62 | page_fault = __copy_from_user(page_address(page) + offset, | 65 | |
| 63 | buf, count); | ||
| 64 | /* Flush processor's dcache for this page */ | 66 | /* Flush processor's dcache for this page */ |
| 65 | flush_dcache_page(page); | 67 | flush_dcache_page(page); |
| 66 | kunmap(page); | 68 | iov_iter_advance(i, copied); |
| 67 | buf += count; | 69 | write_bytes -= copied; |
| 68 | write_bytes -= count; | ||
| 69 | 70 | ||
| 70 | if (page_fault) | 71 | if (unlikely(copied == 0)) { |
| 71 | break; | 72 | count = min_t(size_t, PAGE_CACHE_SIZE - offset, |
| 73 | iov_iter_single_seg_count(i)); | ||
| 74 | goto again; | ||
| 75 | } | ||
| 76 | |||
| 77 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { | ||
| 78 | offset += copied; | ||
| 79 | } else { | ||
| 80 | pg++; | ||
| 81 | offset = 0; | ||
| 82 | } | ||
| 72 | } | 83 | } |
| 73 | return page_fault ? -EFAULT : 0; | 84 | return 0; |
| 74 | } | 85 | } |
| 75 | 86 | ||
| 76 | /* | 87 | /* |
| @@ -125,8 +136,7 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
| 125 | end_of_last_block = start_pos + num_bytes - 1; | 136 | end_of_last_block = start_pos + num_bytes - 1; |
| 126 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, | 137 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, |
| 127 | NULL); | 138 | NULL); |
| 128 | if (err) | 139 | BUG_ON(err); |
| 129 | return err; | ||
| 130 | 140 | ||
| 131 | for (i = 0; i < num_pages; i++) { | 141 | for (i = 0; i < num_pages; i++) { |
| 132 | struct page *p = pages[i]; | 142 | struct page *p = pages[i]; |
| @@ -141,7 +151,7 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
| 141 | * at this time. | 151 | * at this time. |
| 142 | */ | 152 | */ |
| 143 | } | 153 | } |
| 144 | return err; | 154 | return 0; |
| 145 | } | 155 | } |
| 146 | 156 | ||
| 147 | /* | 157 | /* |
| @@ -822,45 +832,46 @@ again: | |||
| 822 | return 0; | 832 | return 0; |
| 823 | } | 833 | } |
| 824 | 834 | ||
| 825 | static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | 835 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, |
| 826 | size_t count, loff_t *ppos) | 836 | const struct iovec *iov, |
| 837 | unsigned long nr_segs, loff_t pos) | ||
| 827 | { | 838 | { |
| 828 | loff_t pos; | 839 | struct file *file = iocb->ki_filp; |
| 840 | struct inode *inode = fdentry(file)->d_inode; | ||
| 841 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 842 | struct page *pinned[2]; | ||
| 843 | struct page **pages = NULL; | ||
| 844 | struct iov_iter i; | ||
| 845 | loff_t *ppos = &iocb->ki_pos; | ||
| 829 | loff_t start_pos; | 846 | loff_t start_pos; |
| 830 | ssize_t num_written = 0; | 847 | ssize_t num_written = 0; |
| 831 | ssize_t err = 0; | 848 | ssize_t err = 0; |
| 849 | size_t count; | ||
| 850 | size_t ocount; | ||
| 832 | int ret = 0; | 851 | int ret = 0; |
| 833 | struct inode *inode = fdentry(file)->d_inode; | ||
| 834 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 835 | struct page **pages = NULL; | ||
| 836 | int nrptrs; | 852 | int nrptrs; |
| 837 | struct page *pinned[2]; | ||
| 838 | unsigned long first_index; | 853 | unsigned long first_index; |
| 839 | unsigned long last_index; | 854 | unsigned long last_index; |
| 840 | int will_write; | 855 | int will_write; |
| 856 | int buffered = 0; | ||
| 841 | 857 | ||
| 842 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || | 858 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || |
| 843 | (file->f_flags & O_DIRECT)); | 859 | (file->f_flags & O_DIRECT)); |
| 844 | 860 | ||
| 845 | nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, | ||
| 846 | PAGE_CACHE_SIZE / (sizeof(struct page *))); | ||
| 847 | pinned[0] = NULL; | 861 | pinned[0] = NULL; |
| 848 | pinned[1] = NULL; | 862 | pinned[1] = NULL; |
| 849 | 863 | ||
| 850 | pos = *ppos; | ||
| 851 | start_pos = pos; | 864 | start_pos = pos; |
| 852 | 865 | ||
| 853 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | 866 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); |
| 854 | 867 | ||
| 855 | /* do the reserve before the mutex lock in case we have to do some | ||
| 856 | * flushing. We wouldn't deadlock, but this is more polite. | ||
| 857 | */ | ||
| 858 | err = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
| 859 | if (err) | ||
| 860 | goto out_nolock; | ||
| 861 | |||
| 862 | mutex_lock(&inode->i_mutex); | 868 | mutex_lock(&inode->i_mutex); |
| 863 | 869 | ||
| 870 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); | ||
| 871 | if (err) | ||
| 872 | goto out; | ||
| 873 | count = ocount; | ||
| 874 | |||
| 864 | current->backing_dev_info = inode->i_mapping->backing_dev_info; | 875 | current->backing_dev_info = inode->i_mapping->backing_dev_info; |
| 865 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | 876 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); |
| 866 | if (err) | 877 | if (err) |
| @@ -874,15 +885,53 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
| 874 | goto out; | 885 | goto out; |
| 875 | 886 | ||
| 876 | file_update_time(file); | 887 | file_update_time(file); |
| 888 | BTRFS_I(inode)->sequence++; | ||
| 889 | |||
| 890 | if (unlikely(file->f_flags & O_DIRECT)) { | ||
| 891 | num_written = generic_file_direct_write(iocb, iov, &nr_segs, | ||
| 892 | pos, ppos, count, | ||
| 893 | ocount); | ||
| 894 | /* | ||
| 895 | * the generic O_DIRECT will update in-memory i_size after the | ||
| 896 | * DIOs are done. But our endio handlers that update the on | ||
| 897 | * disk i_size never update past the in memory i_size. So we | ||
| 898 | * need one more update here to catch any additions to the | ||
| 899 | * file | ||
| 900 | */ | ||
| 901 | if (inode->i_size != BTRFS_I(inode)->disk_i_size) { | ||
| 902 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | ||
| 903 | mark_inode_dirty(inode); | ||
| 904 | } | ||
| 877 | 905 | ||
| 906 | if (num_written < 0) { | ||
| 907 | ret = num_written; | ||
| 908 | num_written = 0; | ||
| 909 | goto out; | ||
| 910 | } else if (num_written == count) { | ||
| 911 | /* pick up pos changes done by the generic code */ | ||
| 912 | pos = *ppos; | ||
| 913 | goto out; | ||
| 914 | } | ||
| 915 | /* | ||
| 916 | * We are going to do buffered for the rest of the range, so we | ||
| 917 | * need to make sure to invalidate the buffered pages when we're | ||
| 918 | * done. | ||
| 919 | */ | ||
| 920 | buffered = 1; | ||
| 921 | pos += num_written; | ||
| 922 | } | ||
| 923 | |||
| 924 | iov_iter_init(&i, iov, nr_segs, count, num_written); | ||
| 925 | nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) / | ||
| 926 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / | ||
| 927 | (sizeof(struct page *))); | ||
| 878 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | 928 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); |
| 879 | 929 | ||
| 880 | /* generic_write_checks can change our pos */ | 930 | /* generic_write_checks can change our pos */ |
| 881 | start_pos = pos; | 931 | start_pos = pos; |
| 882 | 932 | ||
| 883 | BTRFS_I(inode)->sequence++; | ||
| 884 | first_index = pos >> PAGE_CACHE_SHIFT; | 933 | first_index = pos >> PAGE_CACHE_SHIFT; |
| 885 | last_index = (pos + count) >> PAGE_CACHE_SHIFT; | 934 | last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; |
| 886 | 935 | ||
| 887 | /* | 936 | /* |
| 888 | * there are lots of better ways to do this, but this code | 937 | * there are lots of better ways to do this, but this code |
| @@ -899,7 +948,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
| 899 | unlock_page(pinned[0]); | 948 | unlock_page(pinned[0]); |
| 900 | } | 949 | } |
| 901 | } | 950 | } |
| 902 | if ((pos + count) & (PAGE_CACHE_SIZE - 1)) { | 951 | if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) { |
| 903 | pinned[1] = grab_cache_page(inode->i_mapping, last_index); | 952 | pinned[1] = grab_cache_page(inode->i_mapping, last_index); |
| 904 | if (!PageUptodate(pinned[1])) { | 953 | if (!PageUptodate(pinned[1])) { |
| 905 | ret = btrfs_readpage(NULL, pinned[1]); | 954 | ret = btrfs_readpage(NULL, pinned[1]); |
| @@ -910,10 +959,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
| 910 | } | 959 | } |
| 911 | } | 960 | } |
| 912 | 961 | ||
| 913 | while (count > 0) { | 962 | while (iov_iter_count(&i) > 0) { |
| 914 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); | 963 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); |
| 915 | size_t write_bytes = min(count, nrptrs * | 964 | size_t write_bytes = min(iov_iter_count(&i), |
| 916 | (size_t)PAGE_CACHE_SIZE - | 965 | nrptrs * (size_t)PAGE_CACHE_SIZE - |
| 917 | offset); | 966 | offset); |
| 918 | size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> | 967 | size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> |
| 919 | PAGE_CACHE_SHIFT; | 968 | PAGE_CACHE_SHIFT; |
| @@ -921,7 +970,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
| 921 | WARN_ON(num_pages > nrptrs); | 970 | WARN_ON(num_pages > nrptrs); |
| 922 | memset(pages, 0, sizeof(struct page *) * nrptrs); | 971 | memset(pages, 0, sizeof(struct page *) * nrptrs); |
| 923 | 972 | ||
| 924 | ret = btrfs_check_data_free_space(root, inode, write_bytes); | 973 | ret = btrfs_delalloc_reserve_space(inode, write_bytes); |
| 925 | if (ret) | 974 | if (ret) |
| 926 | goto out; | 975 | goto out; |
| 927 | 976 | ||
| @@ -929,26 +978,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
| 929 | pos, first_index, last_index, | 978 | pos, first_index, last_index, |
| 930 | write_bytes); | 979 | write_bytes); |
| 931 | if (ret) { | 980 | if (ret) { |
| 932 | btrfs_free_reserved_data_space(root, inode, | 981 | btrfs_delalloc_release_space(inode, write_bytes); |
| 933 | write_bytes); | ||
| 934 | goto out; | 982 | goto out; |
| 935 | } | 983 | } |
| 936 | 984 | ||
| 937 | ret = btrfs_copy_from_user(pos, num_pages, | 985 | ret = btrfs_copy_from_user(pos, num_pages, |
| 938 | write_bytes, pages, buf); | 986 | write_bytes, pages, &i); |
| 939 | if (ret) { | 987 | if (ret == 0) { |
| 940 | btrfs_free_reserved_data_space(root, inode, | 988 | dirty_and_release_pages(NULL, root, file, pages, |
| 941 | write_bytes); | 989 | num_pages, pos, write_bytes); |
| 942 | btrfs_drop_pages(pages, num_pages); | ||
| 943 | goto out; | ||
| 944 | } | 990 | } |
| 945 | 991 | ||
| 946 | ret = dirty_and_release_pages(NULL, root, file, pages, | ||
| 947 | num_pages, pos, write_bytes); | ||
| 948 | btrfs_drop_pages(pages, num_pages); | 992 | btrfs_drop_pages(pages, num_pages); |
| 949 | if (ret) { | 993 | if (ret) { |
| 950 | btrfs_free_reserved_data_space(root, inode, | 994 | btrfs_delalloc_release_space(inode, write_bytes); |
| 951 | write_bytes); | ||
| 952 | goto out; | 995 | goto out; |
| 953 | } | 996 | } |
| 954 | 997 | ||
| @@ -964,8 +1007,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
| 964 | btrfs_throttle(root); | 1007 | btrfs_throttle(root); |
| 965 | } | 1008 | } |
| 966 | 1009 | ||
| 967 | buf += write_bytes; | ||
| 968 | count -= write_bytes; | ||
| 969 | pos += write_bytes; | 1010 | pos += write_bytes; |
| 970 | num_written += write_bytes; | 1011 | num_written += write_bytes; |
| 971 | 1012 | ||
| @@ -975,9 +1016,7 @@ out: | |||
| 975 | mutex_unlock(&inode->i_mutex); | 1016 | mutex_unlock(&inode->i_mutex); |
| 976 | if (ret) | 1017 | if (ret) |
| 977 | err = ret; | 1018 | err = ret; |
| 978 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
| 979 | 1019 | ||
| 980 | out_nolock: | ||
| 981 | kfree(pages); | 1020 | kfree(pages); |
| 982 | if (pinned[0]) | 1021 | if (pinned[0]) |
| 983 | page_cache_release(pinned[0]); | 1022 | page_cache_release(pinned[0]); |
| @@ -1007,7 +1046,7 @@ out_nolock: | |||
| 1007 | num_written = err; | 1046 | num_written = err; |
| 1008 | 1047 | ||
| 1009 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { | 1048 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { |
| 1010 | trans = btrfs_start_transaction(root, 1); | 1049 | trans = btrfs_start_transaction(root, 0); |
| 1011 | ret = btrfs_log_dentry_safe(trans, root, | 1050 | ret = btrfs_log_dentry_safe(trans, root, |
| 1012 | file->f_dentry); | 1051 | file->f_dentry); |
| 1013 | if (ret == 0) { | 1052 | if (ret == 0) { |
| @@ -1022,7 +1061,7 @@ out_nolock: | |||
| 1022 | btrfs_end_transaction(trans, root); | 1061 | btrfs_end_transaction(trans, root); |
| 1023 | } | 1062 | } |
| 1024 | } | 1063 | } |
| 1025 | if (file->f_flags & O_DIRECT) { | 1064 | if (file->f_flags & O_DIRECT && buffered) { |
| 1026 | invalidate_mapping_pages(inode->i_mapping, | 1065 | invalidate_mapping_pages(inode->i_mapping, |
| 1027 | start_pos >> PAGE_CACHE_SHIFT, | 1066 | start_pos >> PAGE_CACHE_SHIFT, |
| 1028 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); | 1067 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); |
| @@ -1062,8 +1101,9 @@ int btrfs_release_file(struct inode *inode, struct file *filp) | |||
| 1062 | * important optimization for directories because holding the mutex prevents | 1101 | * important optimization for directories because holding the mutex prevents |
| 1063 | * new operations on the dir while we write to disk. | 1102 | * new operations on the dir while we write to disk. |
| 1064 | */ | 1103 | */ |
| 1065 | int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | 1104 | int btrfs_sync_file(struct file *file, int datasync) |
| 1066 | { | 1105 | { |
| 1106 | struct dentry *dentry = file->f_path.dentry; | ||
| 1067 | struct inode *inode = dentry->d_inode; | 1107 | struct inode *inode = dentry->d_inode; |
| 1068 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1108 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1069 | int ret = 0; | 1109 | int ret = 0; |
| @@ -1100,12 +1140,12 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
| 1100 | /* | 1140 | /* |
| 1101 | * ok we haven't committed the transaction yet, lets do a commit | 1141 | * ok we haven't committed the transaction yet, lets do a commit |
| 1102 | */ | 1142 | */ |
| 1103 | if (file && file->private_data) | 1143 | if (file->private_data) |
| 1104 | btrfs_ioctl_trans_end(file); | 1144 | btrfs_ioctl_trans_end(file); |
| 1105 | 1145 | ||
| 1106 | trans = btrfs_start_transaction(root, 1); | 1146 | trans = btrfs_start_transaction(root, 0); |
| 1107 | if (!trans) { | 1147 | if (IS_ERR(trans)) { |
| 1108 | ret = -ENOMEM; | 1148 | ret = PTR_ERR(trans); |
| 1109 | goto out; | 1149 | goto out; |
| 1110 | } | 1150 | } |
| 1111 | 1151 | ||
| @@ -1150,17 +1190,25 @@ static const struct vm_operations_struct btrfs_file_vm_ops = { | |||
| 1150 | 1190 | ||
| 1151 | static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) | 1191 | static int btrfs_file_mmap(struct file *filp, struct vm_area_struct *vma) |
| 1152 | { | 1192 | { |
| 1153 | vma->vm_ops = &btrfs_file_vm_ops; | 1193 | struct address_space *mapping = filp->f_mapping; |
| 1194 | |||
| 1195 | if (!mapping->a_ops->readpage) | ||
| 1196 | return -ENOEXEC; | ||
| 1197 | |||
| 1154 | file_accessed(filp); | 1198 | file_accessed(filp); |
| 1199 | vma->vm_ops = &btrfs_file_vm_ops; | ||
| 1200 | vma->vm_flags |= VM_CAN_NONLINEAR; | ||
| 1201 | |||
| 1155 | return 0; | 1202 | return 0; |
| 1156 | } | 1203 | } |
| 1157 | 1204 | ||
| 1158 | const struct file_operations btrfs_file_operations = { | 1205 | const struct file_operations btrfs_file_operations = { |
| 1159 | .llseek = generic_file_llseek, | 1206 | .llseek = generic_file_llseek, |
| 1160 | .read = do_sync_read, | 1207 | .read = do_sync_read, |
| 1208 | .write = do_sync_write, | ||
| 1161 | .aio_read = generic_file_aio_read, | 1209 | .aio_read = generic_file_aio_read, |
| 1162 | .splice_read = generic_file_splice_read, | 1210 | .splice_read = generic_file_splice_read, |
| 1163 | .write = btrfs_file_write, | 1211 | .aio_write = btrfs_file_aio_write, |
| 1164 | .mmap = btrfs_file_mmap, | 1212 | .mmap = btrfs_file_mmap, |
| 1165 | .open = generic_file_open, | 1213 | .open = generic_file_open, |
| 1166 | .release = btrfs_release_file, | 1214 | .release = btrfs_release_file, |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index dd831ed31eea..f488fac04d99 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | 18 | ||
| 19 | #include <linux/pagemap.h> | 19 | #include <linux/pagemap.h> |
| 20 | #include <linux/sched.h> | 20 | #include <linux/sched.h> |
| 21 | #include <linux/slab.h> | ||
| 21 | #include <linux/math64.h> | 22 | #include <linux/math64.h> |
| 22 | #include "ctree.h" | 23 | #include "ctree.h" |
| 23 | #include "free-space-cache.h" | 24 | #include "free-space-cache.h" |
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 72ce3c173d6a..64f1150bb48d 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c | |||
| @@ -49,6 +49,33 @@ static int find_name_in_backref(struct btrfs_path *path, const char *name, | |||
| 49 | return 0; | 49 | return 0; |
| 50 | } | 50 | } |
| 51 | 51 | ||
| 52 | struct btrfs_inode_ref * | ||
| 53 | btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, | ||
| 54 | struct btrfs_root *root, | ||
| 55 | struct btrfs_path *path, | ||
| 56 | const char *name, int name_len, | ||
| 57 | u64 inode_objectid, u64 ref_objectid, int mod) | ||
| 58 | { | ||
| 59 | struct btrfs_key key; | ||
| 60 | struct btrfs_inode_ref *ref; | ||
| 61 | int ins_len = mod < 0 ? -1 : 0; | ||
| 62 | int cow = mod != 0; | ||
| 63 | int ret; | ||
| 64 | |||
| 65 | key.objectid = inode_objectid; | ||
| 66 | key.type = BTRFS_INODE_REF_KEY; | ||
| 67 | key.offset = ref_objectid; | ||
| 68 | |||
| 69 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); | ||
| 70 | if (ret < 0) | ||
| 71 | return ERR_PTR(ret); | ||
| 72 | if (ret > 0) | ||
| 73 | return NULL; | ||
| 74 | if (!find_name_in_backref(path, name, name_len, &ref)) | ||
| 75 | return NULL; | ||
| 76 | return ref; | ||
| 77 | } | ||
| 78 | |||
| 52 | int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | 79 | int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, |
| 53 | struct btrfs_root *root, | 80 | struct btrfs_root *root, |
| 54 | const char *name, int name_len, | 81 | const char *name, int name_len, |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 02bb099845fd..1bff92ad4744 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -36,6 +36,7 @@ | |||
| 36 | #include <linux/xattr.h> | 36 | #include <linux/xattr.h> |
| 37 | #include <linux/posix_acl.h> | 37 | #include <linux/posix_acl.h> |
| 38 | #include <linux/falloc.h> | 38 | #include <linux/falloc.h> |
| 39 | #include <linux/slab.h> | ||
| 39 | #include "compat.h" | 40 | #include "compat.h" |
| 40 | #include "ctree.h" | 41 | #include "ctree.h" |
| 41 | #include "disk-io.h" | 42 | #include "disk-io.h" |
| @@ -251,6 +252,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
| 251 | inline_len, compressed_size, | 252 | inline_len, compressed_size, |
| 252 | compressed_pages); | 253 | compressed_pages); |
| 253 | BUG_ON(ret); | 254 | BUG_ON(ret); |
| 255 | btrfs_delalloc_release_metadata(inode, end + 1 - start); | ||
| 254 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); | 256 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); |
| 255 | return 0; | 257 | return 0; |
| 256 | } | 258 | } |
| @@ -413,6 +415,7 @@ again: | |||
| 413 | trans = btrfs_join_transaction(root, 1); | 415 | trans = btrfs_join_transaction(root, 1); |
| 414 | BUG_ON(!trans); | 416 | BUG_ON(!trans); |
| 415 | btrfs_set_trans_block_group(trans, inode); | 417 | btrfs_set_trans_block_group(trans, inode); |
| 418 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
| 416 | 419 | ||
| 417 | /* lets try to make an inline extent */ | 420 | /* lets try to make an inline extent */ |
| 418 | if (ret || total_in < (actual_end - start)) { | 421 | if (ret || total_in < (actual_end - start)) { |
| @@ -438,7 +441,6 @@ again: | |||
| 438 | start, end, NULL, | 441 | start, end, NULL, |
| 439 | EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | | 442 | EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | |
| 440 | EXTENT_CLEAR_DELALLOC | | 443 | EXTENT_CLEAR_DELALLOC | |
| 441 | EXTENT_CLEAR_ACCOUNTING | | ||
| 442 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); | 444 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); |
| 443 | 445 | ||
| 444 | btrfs_end_transaction(trans, root); | 446 | btrfs_end_transaction(trans, root); |
| @@ -696,6 +698,38 @@ retry: | |||
| 696 | return 0; | 698 | return 0; |
| 697 | } | 699 | } |
| 698 | 700 | ||
| 701 | static u64 get_extent_allocation_hint(struct inode *inode, u64 start, | ||
| 702 | u64 num_bytes) | ||
| 703 | { | ||
| 704 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 705 | struct extent_map *em; | ||
| 706 | u64 alloc_hint = 0; | ||
| 707 | |||
| 708 | read_lock(&em_tree->lock); | ||
| 709 | em = search_extent_mapping(em_tree, start, num_bytes); | ||
| 710 | if (em) { | ||
| 711 | /* | ||
| 712 | * if block start isn't an actual block number then find the | ||
| 713 | * first block in this inode and use that as a hint. If that | ||
| 714 | * block is also bogus then just don't worry about it. | ||
| 715 | */ | ||
| 716 | if (em->block_start >= EXTENT_MAP_LAST_BYTE) { | ||
| 717 | free_extent_map(em); | ||
| 718 | em = search_extent_mapping(em_tree, 0, 0); | ||
| 719 | if (em && em->block_start < EXTENT_MAP_LAST_BYTE) | ||
| 720 | alloc_hint = em->block_start; | ||
| 721 | if (em) | ||
| 722 | free_extent_map(em); | ||
| 723 | } else { | ||
| 724 | alloc_hint = em->block_start; | ||
| 725 | free_extent_map(em); | ||
| 726 | } | ||
| 727 | } | ||
| 728 | read_unlock(&em_tree->lock); | ||
| 729 | |||
| 730 | return alloc_hint; | ||
| 731 | } | ||
| 732 | |||
| 699 | /* | 733 | /* |
| 700 | * when extent_io.c finds a delayed allocation range in the file, | 734 | * when extent_io.c finds a delayed allocation range in the file, |
| 701 | * the call backs end up in this code. The basic idea is to | 735 | * the call backs end up in this code. The basic idea is to |
| @@ -733,6 +767,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 733 | trans = btrfs_join_transaction(root, 1); | 767 | trans = btrfs_join_transaction(root, 1); |
| 734 | BUG_ON(!trans); | 768 | BUG_ON(!trans); |
| 735 | btrfs_set_trans_block_group(trans, inode); | 769 | btrfs_set_trans_block_group(trans, inode); |
| 770 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
| 736 | 771 | ||
| 737 | actual_end = min_t(u64, isize, end + 1); | 772 | actual_end = min_t(u64, isize, end + 1); |
| 738 | 773 | ||
| @@ -752,7 +787,6 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 752 | EXTENT_CLEAR_UNLOCK_PAGE | | 787 | EXTENT_CLEAR_UNLOCK_PAGE | |
| 753 | EXTENT_CLEAR_UNLOCK | | 788 | EXTENT_CLEAR_UNLOCK | |
| 754 | EXTENT_CLEAR_DELALLOC | | 789 | EXTENT_CLEAR_DELALLOC | |
| 755 | EXTENT_CLEAR_ACCOUNTING | | ||
| 756 | EXTENT_CLEAR_DIRTY | | 790 | EXTENT_CLEAR_DIRTY | |
| 757 | EXTENT_SET_WRITEBACK | | 791 | EXTENT_SET_WRITEBACK | |
| 758 | EXTENT_END_WRITEBACK); | 792 | EXTENT_END_WRITEBACK); |
| @@ -768,35 +802,13 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 768 | BUG_ON(disk_num_bytes > | 802 | BUG_ON(disk_num_bytes > |
| 769 | btrfs_super_total_bytes(&root->fs_info->super_copy)); | 803 | btrfs_super_total_bytes(&root->fs_info->super_copy)); |
| 770 | 804 | ||
| 771 | 805 | alloc_hint = get_extent_allocation_hint(inode, start, num_bytes); | |
| 772 | read_lock(&BTRFS_I(inode)->extent_tree.lock); | ||
| 773 | em = search_extent_mapping(&BTRFS_I(inode)->extent_tree, | ||
| 774 | start, num_bytes); | ||
| 775 | if (em) { | ||
| 776 | /* | ||
| 777 | * if block start isn't an actual block number then find the | ||
| 778 | * first block in this inode and use that as a hint. If that | ||
| 779 | * block is also bogus then just don't worry about it. | ||
| 780 | */ | ||
| 781 | if (em->block_start >= EXTENT_MAP_LAST_BYTE) { | ||
| 782 | free_extent_map(em); | ||
| 783 | em = search_extent_mapping(em_tree, 0, 0); | ||
| 784 | if (em && em->block_start < EXTENT_MAP_LAST_BYTE) | ||
| 785 | alloc_hint = em->block_start; | ||
| 786 | if (em) | ||
| 787 | free_extent_map(em); | ||
| 788 | } else { | ||
| 789 | alloc_hint = em->block_start; | ||
| 790 | free_extent_map(em); | ||
| 791 | } | ||
| 792 | } | ||
| 793 | read_unlock(&BTRFS_I(inode)->extent_tree.lock); | ||
| 794 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); | 806 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); |
| 795 | 807 | ||
| 796 | while (disk_num_bytes > 0) { | 808 | while (disk_num_bytes > 0) { |
| 797 | unsigned long op; | 809 | unsigned long op; |
| 798 | 810 | ||
| 799 | cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); | 811 | cur_alloc_size = disk_num_bytes; |
| 800 | ret = btrfs_reserve_extent(trans, root, cur_alloc_size, | 812 | ret = btrfs_reserve_extent(trans, root, cur_alloc_size, |
| 801 | root->sectorsize, 0, alloc_hint, | 813 | root->sectorsize, 0, alloc_hint, |
| 802 | (u64)-1, &ins, 1); | 814 | (u64)-1, &ins, 1); |
| @@ -1173,6 +1185,13 @@ out_check: | |||
| 1173 | num_bytes, num_bytes, type); | 1185 | num_bytes, num_bytes, type); |
| 1174 | BUG_ON(ret); | 1186 | BUG_ON(ret); |
| 1175 | 1187 | ||
| 1188 | if (root->root_key.objectid == | ||
| 1189 | BTRFS_DATA_RELOC_TREE_OBJECTID) { | ||
| 1190 | ret = btrfs_reloc_clone_csums(inode, cur_offset, | ||
| 1191 | num_bytes); | ||
| 1192 | BUG_ON(ret); | ||
| 1193 | } | ||
| 1194 | |||
| 1176 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, | 1195 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, |
| 1177 | cur_offset, cur_offset + num_bytes - 1, | 1196 | cur_offset, cur_offset + num_bytes - 1, |
| 1178 | locked_page, EXTENT_CLEAR_UNLOCK_PAGE | | 1197 | locked_page, EXTENT_CLEAR_UNLOCK_PAGE | |
| @@ -1225,36 +1244,13 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
| 1225 | } | 1244 | } |
| 1226 | 1245 | ||
| 1227 | static int btrfs_split_extent_hook(struct inode *inode, | 1246 | static int btrfs_split_extent_hook(struct inode *inode, |
| 1228 | struct extent_state *orig, u64 split) | 1247 | struct extent_state *orig, u64 split) |
| 1229 | { | 1248 | { |
| 1230 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1249 | /* not delalloc, ignore it */ |
| 1231 | u64 size; | ||
| 1232 | |||
| 1233 | if (!(orig->state & EXTENT_DELALLOC)) | 1250 | if (!(orig->state & EXTENT_DELALLOC)) |
| 1234 | return 0; | 1251 | return 0; |
| 1235 | 1252 | ||
| 1236 | size = orig->end - orig->start + 1; | 1253 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); |
| 1237 | if (size > root->fs_info->max_extent) { | ||
| 1238 | u64 num_extents; | ||
| 1239 | u64 new_size; | ||
| 1240 | |||
| 1241 | new_size = orig->end - split + 1; | ||
| 1242 | num_extents = div64_u64(size + root->fs_info->max_extent - 1, | ||
| 1243 | root->fs_info->max_extent); | ||
| 1244 | |||
| 1245 | /* | ||
| 1246 | * if we break a large extent up then leave oustanding_extents | ||
| 1247 | * be, since we've already accounted for the large extent. | ||
| 1248 | */ | ||
| 1249 | if (div64_u64(new_size + root->fs_info->max_extent - 1, | ||
| 1250 | root->fs_info->max_extent) < num_extents) | ||
| 1251 | return 0; | ||
| 1252 | } | ||
| 1253 | |||
| 1254 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
| 1255 | BTRFS_I(inode)->outstanding_extents++; | ||
| 1256 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 1257 | |||
| 1258 | return 0; | 1254 | return 0; |
| 1259 | } | 1255 | } |
| 1260 | 1256 | ||
| @@ -1268,42 +1264,11 @@ static int btrfs_merge_extent_hook(struct inode *inode, | |||
| 1268 | struct extent_state *new, | 1264 | struct extent_state *new, |
| 1269 | struct extent_state *other) | 1265 | struct extent_state *other) |
| 1270 | { | 1266 | { |
| 1271 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 1272 | u64 new_size, old_size; | ||
| 1273 | u64 num_extents; | ||
| 1274 | |||
| 1275 | /* not delalloc, ignore it */ | 1267 | /* not delalloc, ignore it */ |
| 1276 | if (!(other->state & EXTENT_DELALLOC)) | 1268 | if (!(other->state & EXTENT_DELALLOC)) |
| 1277 | return 0; | 1269 | return 0; |
| 1278 | 1270 | ||
| 1279 | old_size = other->end - other->start + 1; | 1271 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); |
| 1280 | if (new->start < other->start) | ||
| 1281 | new_size = other->end - new->start + 1; | ||
| 1282 | else | ||
| 1283 | new_size = new->end - other->start + 1; | ||
| 1284 | |||
| 1285 | /* we're not bigger than the max, unreserve the space and go */ | ||
| 1286 | if (new_size <= root->fs_info->max_extent) { | ||
| 1287 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
| 1288 | BTRFS_I(inode)->outstanding_extents--; | ||
| 1289 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 1290 | return 0; | ||
| 1291 | } | ||
| 1292 | |||
| 1293 | /* | ||
| 1294 | * If we grew by another max_extent, just return, we want to keep that | ||
| 1295 | * reserved amount. | ||
| 1296 | */ | ||
| 1297 | num_extents = div64_u64(old_size + root->fs_info->max_extent - 1, | ||
| 1298 | root->fs_info->max_extent); | ||
| 1299 | if (div64_u64(new_size + root->fs_info->max_extent - 1, | ||
| 1300 | root->fs_info->max_extent) > num_extents) | ||
| 1301 | return 0; | ||
| 1302 | |||
| 1303 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
| 1304 | BTRFS_I(inode)->outstanding_extents--; | ||
| 1305 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 1306 | |||
| 1307 | return 0; | 1272 | return 0; |
| 1308 | } | 1273 | } |
| 1309 | 1274 | ||
| @@ -1312,8 +1277,8 @@ static int btrfs_merge_extent_hook(struct inode *inode, | |||
| 1312 | * bytes in this file, and to maintain the list of inodes that | 1277 | * bytes in this file, and to maintain the list of inodes that |
| 1313 | * have pending delalloc work to be done. | 1278 | * have pending delalloc work to be done. |
| 1314 | */ | 1279 | */ |
| 1315 | static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | 1280 | static int btrfs_set_bit_hook(struct inode *inode, |
| 1316 | unsigned long old, unsigned long bits) | 1281 | struct extent_state *state, int *bits) |
| 1317 | { | 1282 | { |
| 1318 | 1283 | ||
| 1319 | /* | 1284 | /* |
| @@ -1321,16 +1286,18 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | |||
| 1321 | * but in this case, we are only testeing for the DELALLOC | 1286 | * but in this case, we are only testeing for the DELALLOC |
| 1322 | * bit, which is only set or cleared with irqs on | 1287 | * bit, which is only set or cleared with irqs on |
| 1323 | */ | 1288 | */ |
| 1324 | if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { | 1289 | if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { |
| 1325 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1290 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1291 | u64 len = state->end + 1 - state->start; | ||
| 1292 | |||
| 1293 | if (*bits & EXTENT_FIRST_DELALLOC) | ||
| 1294 | *bits &= ~EXTENT_FIRST_DELALLOC; | ||
| 1295 | else | ||
| 1296 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | ||
| 1326 | 1297 | ||
| 1327 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
| 1328 | BTRFS_I(inode)->outstanding_extents++; | ||
| 1329 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 1330 | btrfs_delalloc_reserve_space(root, inode, end - start + 1); | ||
| 1331 | spin_lock(&root->fs_info->delalloc_lock); | 1298 | spin_lock(&root->fs_info->delalloc_lock); |
| 1332 | BTRFS_I(inode)->delalloc_bytes += end - start + 1; | 1299 | BTRFS_I(inode)->delalloc_bytes += len; |
| 1333 | root->fs_info->delalloc_bytes += end - start + 1; | 1300 | root->fs_info->delalloc_bytes += len; |
| 1334 | if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | 1301 | if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { |
| 1335 | list_add_tail(&BTRFS_I(inode)->delalloc_inodes, | 1302 | list_add_tail(&BTRFS_I(inode)->delalloc_inodes, |
| 1336 | &root->fs_info->delalloc_inodes); | 1303 | &root->fs_info->delalloc_inodes); |
| @@ -1344,44 +1311,32 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | |||
| 1344 | * extent_io.c clear_bit_hook, see set_bit_hook for why | 1311 | * extent_io.c clear_bit_hook, see set_bit_hook for why |
| 1345 | */ | 1312 | */ |
| 1346 | static int btrfs_clear_bit_hook(struct inode *inode, | 1313 | static int btrfs_clear_bit_hook(struct inode *inode, |
| 1347 | struct extent_state *state, unsigned long bits) | 1314 | struct extent_state *state, int *bits) |
| 1348 | { | 1315 | { |
| 1349 | /* | 1316 | /* |
| 1350 | * set_bit and clear bit hooks normally require _irqsave/restore | 1317 | * set_bit and clear bit hooks normally require _irqsave/restore |
| 1351 | * but in this case, we are only testeing for the DELALLOC | 1318 | * but in this case, we are only testeing for the DELALLOC |
| 1352 | * bit, which is only set or cleared with irqs on | 1319 | * bit, which is only set or cleared with irqs on |
| 1353 | */ | 1320 | */ |
| 1354 | if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { | 1321 | if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { |
| 1355 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1322 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1323 | u64 len = state->end + 1 - state->start; | ||
| 1356 | 1324 | ||
| 1357 | if (bits & EXTENT_DO_ACCOUNTING) { | 1325 | if (*bits & EXTENT_FIRST_DELALLOC) |
| 1358 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 1326 | *bits &= ~EXTENT_FIRST_DELALLOC; |
| 1359 | BTRFS_I(inode)->outstanding_extents--; | 1327 | else if (!(*bits & EXTENT_DO_ACCOUNTING)) |
| 1360 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | 1328 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); |
| 1361 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | 1329 | |
| 1362 | } | 1330 | if (*bits & EXTENT_DO_ACCOUNTING) |
| 1331 | btrfs_delalloc_release_metadata(inode, len); | ||
| 1332 | |||
| 1333 | if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) | ||
| 1334 | btrfs_free_reserved_data_space(inode, len); | ||
| 1363 | 1335 | ||
| 1364 | spin_lock(&root->fs_info->delalloc_lock); | 1336 | spin_lock(&root->fs_info->delalloc_lock); |
| 1365 | if (state->end - state->start + 1 > | 1337 | root->fs_info->delalloc_bytes -= len; |
| 1366 | root->fs_info->delalloc_bytes) { | 1338 | BTRFS_I(inode)->delalloc_bytes -= len; |
| 1367 | printk(KERN_INFO "btrfs warning: delalloc account " | 1339 | |
| 1368 | "%llu %llu\n", | ||
| 1369 | (unsigned long long) | ||
| 1370 | state->end - state->start + 1, | ||
| 1371 | (unsigned long long) | ||
| 1372 | root->fs_info->delalloc_bytes); | ||
| 1373 | btrfs_delalloc_free_space(root, inode, (u64)-1); | ||
| 1374 | root->fs_info->delalloc_bytes = 0; | ||
| 1375 | BTRFS_I(inode)->delalloc_bytes = 0; | ||
| 1376 | } else { | ||
| 1377 | btrfs_delalloc_free_space(root, inode, | ||
| 1378 | state->end - | ||
| 1379 | state->start + 1); | ||
| 1380 | root->fs_info->delalloc_bytes -= state->end - | ||
| 1381 | state->start + 1; | ||
| 1382 | BTRFS_I(inode)->delalloc_bytes -= state->end - | ||
| 1383 | state->start + 1; | ||
| 1384 | } | ||
| 1385 | if (BTRFS_I(inode)->delalloc_bytes == 0 && | 1340 | if (BTRFS_I(inode)->delalloc_bytes == 0 && |
| 1386 | !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | 1341 | !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { |
| 1387 | list_del_init(&BTRFS_I(inode)->delalloc_inodes); | 1342 | list_del_init(&BTRFS_I(inode)->delalloc_inodes); |
| @@ -1430,7 +1385,8 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | |||
| 1430 | */ | 1385 | */ |
| 1431 | static int __btrfs_submit_bio_start(struct inode *inode, int rw, | 1386 | static int __btrfs_submit_bio_start(struct inode *inode, int rw, |
| 1432 | struct bio *bio, int mirror_num, | 1387 | struct bio *bio, int mirror_num, |
| 1433 | unsigned long bio_flags) | 1388 | unsigned long bio_flags, |
| 1389 | u64 bio_offset) | ||
| 1434 | { | 1390 | { |
| 1435 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1391 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1436 | int ret = 0; | 1392 | int ret = 0; |
| @@ -1449,7 +1405,8 @@ static int __btrfs_submit_bio_start(struct inode *inode, int rw, | |||
| 1449 | * are inserted into the btree | 1405 | * are inserted into the btree |
| 1450 | */ | 1406 | */ |
| 1451 | static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | 1407 | static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, |
| 1452 | int mirror_num, unsigned long bio_flags) | 1408 | int mirror_num, unsigned long bio_flags, |
| 1409 | u64 bio_offset) | ||
| 1453 | { | 1410 | { |
| 1454 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1411 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1455 | return btrfs_map_bio(root, rw, bio, mirror_num, 1); | 1412 | return btrfs_map_bio(root, rw, bio, mirror_num, 1); |
| @@ -1460,7 +1417,8 @@ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | |||
| 1460 | * on write, or reading the csums from the tree before a read | 1417 | * on write, or reading the csums from the tree before a read |
| 1461 | */ | 1418 | */ |
| 1462 | static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | 1419 | static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, |
| 1463 | int mirror_num, unsigned long bio_flags) | 1420 | int mirror_num, unsigned long bio_flags, |
| 1421 | u64 bio_offset) | ||
| 1464 | { | 1422 | { |
| 1465 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1423 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1466 | int ret = 0; | 1424 | int ret = 0; |
| @@ -1485,7 +1443,8 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
| 1485 | /* we're doing a write, do the async checksumming */ | 1443 | /* we're doing a write, do the async checksumming */ |
| 1486 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | 1444 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, |
| 1487 | inode, rw, bio, mirror_num, | 1445 | inode, rw, bio, mirror_num, |
| 1488 | bio_flags, __btrfs_submit_bio_start, | 1446 | bio_flags, bio_offset, |
| 1447 | __btrfs_submit_bio_start, | ||
| 1489 | __btrfs_submit_bio_done); | 1448 | __btrfs_submit_bio_done); |
| 1490 | } | 1449 | } |
| 1491 | 1450 | ||
| @@ -1566,6 +1525,7 @@ again: | |||
| 1566 | goto again; | 1525 | goto again; |
| 1567 | } | 1526 | } |
| 1568 | 1527 | ||
| 1528 | BUG(); | ||
| 1569 | btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state); | 1529 | btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state); |
| 1570 | ClearPageChecked(page); | 1530 | ClearPageChecked(page); |
| 1571 | out: | 1531 | out: |
| @@ -1696,7 +1656,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
| 1696 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | 1656 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) |
| 1697 | { | 1657 | { |
| 1698 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1658 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1699 | struct btrfs_trans_handle *trans; | 1659 | struct btrfs_trans_handle *trans = NULL; |
| 1700 | struct btrfs_ordered_extent *ordered_extent = NULL; | 1660 | struct btrfs_ordered_extent *ordered_extent = NULL; |
| 1701 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 1661 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
| 1702 | struct extent_state *cached_state = NULL; | 1662 | struct extent_state *cached_state = NULL; |
| @@ -1714,9 +1674,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
| 1714 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1674 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
| 1715 | if (!ret) { | 1675 | if (!ret) { |
| 1716 | trans = btrfs_join_transaction(root, 1); | 1676 | trans = btrfs_join_transaction(root, 1); |
| 1677 | btrfs_set_trans_block_group(trans, inode); | ||
| 1678 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
| 1717 | ret = btrfs_update_inode(trans, root, inode); | 1679 | ret = btrfs_update_inode(trans, root, inode); |
| 1718 | BUG_ON(ret); | 1680 | BUG_ON(ret); |
| 1719 | btrfs_end_transaction(trans, root); | ||
| 1720 | } | 1681 | } |
| 1721 | goto out; | 1682 | goto out; |
| 1722 | } | 1683 | } |
| @@ -1726,6 +1687,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
| 1726 | 0, &cached_state, GFP_NOFS); | 1687 | 0, &cached_state, GFP_NOFS); |
| 1727 | 1688 | ||
| 1728 | trans = btrfs_join_transaction(root, 1); | 1689 | trans = btrfs_join_transaction(root, 1); |
| 1690 | btrfs_set_trans_block_group(trans, inode); | ||
| 1691 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
| 1729 | 1692 | ||
| 1730 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) | 1693 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) |
| 1731 | compressed = 1; | 1694 | compressed = 1; |
| @@ -1757,12 +1720,13 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
| 1757 | add_pending_csums(trans, inode, ordered_extent->file_offset, | 1720 | add_pending_csums(trans, inode, ordered_extent->file_offset, |
| 1758 | &ordered_extent->list); | 1721 | &ordered_extent->list); |
| 1759 | 1722 | ||
| 1760 | /* this also removes the ordered extent from the tree */ | ||
| 1761 | btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1723 | btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
| 1762 | ret = btrfs_update_inode(trans, root, inode); | 1724 | ret = btrfs_update_inode(trans, root, inode); |
| 1763 | BUG_ON(ret); | 1725 | BUG_ON(ret); |
| 1764 | btrfs_end_transaction(trans, root); | ||
| 1765 | out: | 1726 | out: |
| 1727 | btrfs_delalloc_release_metadata(inode, ordered_extent->len); | ||
| 1728 | if (trans) | ||
| 1729 | btrfs_end_transaction(trans, root); | ||
| 1766 | /* once for us */ | 1730 | /* once for us */ |
| 1767 | btrfs_put_ordered_extent(ordered_extent); | 1731 | btrfs_put_ordered_extent(ordered_extent); |
| 1768 | /* once for the tree */ | 1732 | /* once for the tree */ |
| @@ -1884,7 +1848,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, | |||
| 1884 | 1848 | ||
| 1885 | BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, | 1849 | BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, |
| 1886 | failrec->last_mirror, | 1850 | failrec->last_mirror, |
| 1887 | failrec->bio_flags); | 1851 | failrec->bio_flags, 0); |
| 1888 | return 0; | 1852 | return 0; |
| 1889 | } | 1853 | } |
| 1890 | 1854 | ||
| @@ -2039,32 +2003,196 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) | |||
| 2039 | } | 2003 | } |
| 2040 | 2004 | ||
| 2041 | /* | 2005 | /* |
| 2006 | * calculate extra metadata reservation when snapshotting a subvolume | ||
| 2007 | * contains orphan files. | ||
| 2008 | */ | ||
| 2009 | void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, | ||
| 2010 | struct btrfs_pending_snapshot *pending, | ||
| 2011 | u64 *bytes_to_reserve) | ||
| 2012 | { | ||
| 2013 | struct btrfs_root *root; | ||
| 2014 | struct btrfs_block_rsv *block_rsv; | ||
| 2015 | u64 num_bytes; | ||
| 2016 | int index; | ||
| 2017 | |||
| 2018 | root = pending->root; | ||
| 2019 | if (!root->orphan_block_rsv || list_empty(&root->orphan_list)) | ||
| 2020 | return; | ||
| 2021 | |||
| 2022 | block_rsv = root->orphan_block_rsv; | ||
| 2023 | |||
| 2024 | /* orphan block reservation for the snapshot */ | ||
| 2025 | num_bytes = block_rsv->size; | ||
| 2026 | |||
| 2027 | /* | ||
| 2028 | * after the snapshot is created, COWing tree blocks may use more | ||
| 2029 | * space than it frees. So we should make sure there is enough | ||
| 2030 | * reserved space. | ||
| 2031 | */ | ||
| 2032 | index = trans->transid & 0x1; | ||
| 2033 | if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) { | ||
| 2034 | num_bytes += block_rsv->size - | ||
| 2035 | (block_rsv->reserved + block_rsv->freed[index]); | ||
| 2036 | } | ||
| 2037 | |||
| 2038 | *bytes_to_reserve += num_bytes; | ||
| 2039 | } | ||
| 2040 | |||
| 2041 | void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, | ||
| 2042 | struct btrfs_pending_snapshot *pending) | ||
| 2043 | { | ||
| 2044 | struct btrfs_root *root = pending->root; | ||
| 2045 | struct btrfs_root *snap = pending->snap; | ||
| 2046 | struct btrfs_block_rsv *block_rsv; | ||
| 2047 | u64 num_bytes; | ||
| 2048 | int index; | ||
| 2049 | int ret; | ||
| 2050 | |||
| 2051 | if (!root->orphan_block_rsv || list_empty(&root->orphan_list)) | ||
| 2052 | return; | ||
| 2053 | |||
| 2054 | /* refill source subvolume's orphan block reservation */ | ||
| 2055 | block_rsv = root->orphan_block_rsv; | ||
| 2056 | index = trans->transid & 0x1; | ||
| 2057 | if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) { | ||
| 2058 | num_bytes = block_rsv->size - | ||
| 2059 | (block_rsv->reserved + block_rsv->freed[index]); | ||
| 2060 | ret = btrfs_block_rsv_migrate(&pending->block_rsv, | ||
| 2061 | root->orphan_block_rsv, | ||
| 2062 | num_bytes); | ||
| 2063 | BUG_ON(ret); | ||
| 2064 | } | ||
| 2065 | |||
| 2066 | /* setup orphan block reservation for the snapshot */ | ||
| 2067 | block_rsv = btrfs_alloc_block_rsv(snap); | ||
| 2068 | BUG_ON(!block_rsv); | ||
| 2069 | |||
| 2070 | btrfs_add_durable_block_rsv(root->fs_info, block_rsv); | ||
| 2071 | snap->orphan_block_rsv = block_rsv; | ||
| 2072 | |||
| 2073 | num_bytes = root->orphan_block_rsv->size; | ||
| 2074 | ret = btrfs_block_rsv_migrate(&pending->block_rsv, | ||
| 2075 | block_rsv, num_bytes); | ||
| 2076 | BUG_ON(ret); | ||
| 2077 | |||
| 2078 | #if 0 | ||
| 2079 | /* insert orphan item for the snapshot */ | ||
| 2080 | WARN_ON(!root->orphan_item_inserted); | ||
| 2081 | ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root, | ||
| 2082 | snap->root_key.objectid); | ||
| 2083 | BUG_ON(ret); | ||
| 2084 | snap->orphan_item_inserted = 1; | ||
| 2085 | #endif | ||
| 2086 | } | ||
| 2087 | |||
| 2088 | enum btrfs_orphan_cleanup_state { | ||
| 2089 | ORPHAN_CLEANUP_STARTED = 1, | ||
| 2090 | ORPHAN_CLEANUP_DONE = 2, | ||
| 2091 | }; | ||
| 2092 | |||
| 2093 | /* | ||
| 2094 | * This is called in transaction commmit time. If there are no orphan | ||
| 2095 | * files in the subvolume, it removes orphan item and frees block_rsv | ||
| 2096 | * structure. | ||
| 2097 | */ | ||
| 2098 | void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, | ||
| 2099 | struct btrfs_root *root) | ||
| 2100 | { | ||
| 2101 | int ret; | ||
| 2102 | |||
| 2103 | if (!list_empty(&root->orphan_list) || | ||
| 2104 | root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) | ||
| 2105 | return; | ||
| 2106 | |||
| 2107 | if (root->orphan_item_inserted && | ||
| 2108 | btrfs_root_refs(&root->root_item) > 0) { | ||
| 2109 | ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root, | ||
| 2110 | root->root_key.objectid); | ||
| 2111 | BUG_ON(ret); | ||
| 2112 | root->orphan_item_inserted = 0; | ||
| 2113 | } | ||
| 2114 | |||
| 2115 | if (root->orphan_block_rsv) { | ||
| 2116 | WARN_ON(root->orphan_block_rsv->size > 0); | ||
| 2117 | btrfs_free_block_rsv(root, root->orphan_block_rsv); | ||
| 2118 | root->orphan_block_rsv = NULL; | ||
| 2119 | } | ||
| 2120 | } | ||
| 2121 | |||
| 2122 | /* | ||
| 2042 | * This creates an orphan entry for the given inode in case something goes | 2123 | * This creates an orphan entry for the given inode in case something goes |
| 2043 | * wrong in the middle of an unlink/truncate. | 2124 | * wrong in the middle of an unlink/truncate. |
| 2125 | * | ||
| 2126 | * NOTE: caller of this function should reserve 5 units of metadata for | ||
| 2127 | * this function. | ||
| 2044 | */ | 2128 | */ |
| 2045 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | 2129 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) |
| 2046 | { | 2130 | { |
| 2047 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2131 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 2048 | int ret = 0; | 2132 | struct btrfs_block_rsv *block_rsv = NULL; |
| 2133 | int reserve = 0; | ||
| 2134 | int insert = 0; | ||
| 2135 | int ret; | ||
| 2049 | 2136 | ||
| 2050 | spin_lock(&root->list_lock); | 2137 | if (!root->orphan_block_rsv) { |
| 2138 | block_rsv = btrfs_alloc_block_rsv(root); | ||
| 2139 | BUG_ON(!block_rsv); | ||
| 2140 | } | ||
| 2051 | 2141 | ||
| 2052 | /* already on the orphan list, we're good */ | 2142 | spin_lock(&root->orphan_lock); |
| 2053 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | 2143 | if (!root->orphan_block_rsv) { |
| 2054 | spin_unlock(&root->list_lock); | 2144 | root->orphan_block_rsv = block_rsv; |
| 2055 | return 0; | 2145 | } else if (block_rsv) { |
| 2146 | btrfs_free_block_rsv(root, block_rsv); | ||
| 2147 | block_rsv = NULL; | ||
| 2056 | } | 2148 | } |
| 2057 | 2149 | ||
| 2058 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | 2150 | if (list_empty(&BTRFS_I(inode)->i_orphan)) { |
| 2151 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | ||
| 2152 | #if 0 | ||
| 2153 | /* | ||
| 2154 | * For proper ENOSPC handling, we should do orphan | ||
| 2155 | * cleanup when mounting. But this introduces backward | ||
| 2156 | * compatibility issue. | ||
| 2157 | */ | ||
| 2158 | if (!xchg(&root->orphan_item_inserted, 1)) | ||
| 2159 | insert = 2; | ||
| 2160 | else | ||
| 2161 | insert = 1; | ||
| 2162 | #endif | ||
| 2163 | insert = 1; | ||
| 2164 | } else { | ||
| 2165 | WARN_ON(!BTRFS_I(inode)->orphan_meta_reserved); | ||
| 2166 | } | ||
| 2059 | 2167 | ||
| 2060 | spin_unlock(&root->list_lock); | 2168 | if (!BTRFS_I(inode)->orphan_meta_reserved) { |
| 2169 | BTRFS_I(inode)->orphan_meta_reserved = 1; | ||
| 2170 | reserve = 1; | ||
| 2171 | } | ||
| 2172 | spin_unlock(&root->orphan_lock); | ||
| 2061 | 2173 | ||
| 2062 | /* | 2174 | if (block_rsv) |
| 2063 | * insert an orphan item to track this unlinked/truncated file | 2175 | btrfs_add_durable_block_rsv(root->fs_info, block_rsv); |
| 2064 | */ | ||
| 2065 | ret = btrfs_insert_orphan_item(trans, root, inode->i_ino); | ||
| 2066 | 2176 | ||
| 2067 | return ret; | 2177 | /* grab metadata reservation from transaction handle */ |
| 2178 | if (reserve) { | ||
| 2179 | ret = btrfs_orphan_reserve_metadata(trans, inode); | ||
| 2180 | BUG_ON(ret); | ||
| 2181 | } | ||
| 2182 | |||
| 2183 | /* insert an orphan item to track this unlinked/truncated file */ | ||
| 2184 | if (insert >= 1) { | ||
| 2185 | ret = btrfs_insert_orphan_item(trans, root, inode->i_ino); | ||
| 2186 | BUG_ON(ret); | ||
| 2187 | } | ||
| 2188 | |||
| 2189 | /* insert an orphan item to track subvolume contains orphan files */ | ||
| 2190 | if (insert >= 2) { | ||
| 2191 | ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root, | ||
| 2192 | root->root_key.objectid); | ||
| 2193 | BUG_ON(ret); | ||
| 2194 | } | ||
| 2195 | return 0; | ||
| 2068 | } | 2196 | } |
| 2069 | 2197 | ||
| 2070 | /* | 2198 | /* |
| @@ -2074,26 +2202,31 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
| 2074 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) | 2202 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) |
| 2075 | { | 2203 | { |
| 2076 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2204 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 2205 | int delete_item = 0; | ||
| 2206 | int release_rsv = 0; | ||
| 2077 | int ret = 0; | 2207 | int ret = 0; |
| 2078 | 2208 | ||
| 2079 | spin_lock(&root->list_lock); | 2209 | spin_lock(&root->orphan_lock); |
| 2080 | 2210 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | |
| 2081 | if (list_empty(&BTRFS_I(inode)->i_orphan)) { | 2211 | list_del_init(&BTRFS_I(inode)->i_orphan); |
| 2082 | spin_unlock(&root->list_lock); | 2212 | delete_item = 1; |
| 2083 | return 0; | ||
| 2084 | } | 2213 | } |
| 2085 | 2214 | ||
| 2086 | list_del_init(&BTRFS_I(inode)->i_orphan); | 2215 | if (BTRFS_I(inode)->orphan_meta_reserved) { |
| 2087 | if (!trans) { | 2216 | BTRFS_I(inode)->orphan_meta_reserved = 0; |
| 2088 | spin_unlock(&root->list_lock); | 2217 | release_rsv = 1; |
| 2089 | return 0; | ||
| 2090 | } | 2218 | } |
| 2219 | spin_unlock(&root->orphan_lock); | ||
| 2091 | 2220 | ||
| 2092 | spin_unlock(&root->list_lock); | 2221 | if (trans && delete_item) { |
| 2222 | ret = btrfs_del_orphan_item(trans, root, inode->i_ino); | ||
| 2223 | BUG_ON(ret); | ||
| 2224 | } | ||
| 2093 | 2225 | ||
| 2094 | ret = btrfs_del_orphan_item(trans, root, inode->i_ino); | 2226 | if (release_rsv) |
| 2227 | btrfs_orphan_release_metadata(inode); | ||
| 2095 | 2228 | ||
| 2096 | return ret; | 2229 | return 0; |
| 2097 | } | 2230 | } |
| 2098 | 2231 | ||
| 2099 | /* | 2232 | /* |
| @@ -2110,7 +2243,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
| 2110 | struct inode *inode; | 2243 | struct inode *inode; |
| 2111 | int ret = 0, nr_unlink = 0, nr_truncate = 0; | 2244 | int ret = 0, nr_unlink = 0, nr_truncate = 0; |
| 2112 | 2245 | ||
| 2113 | if (!xchg(&root->clean_orphans, 0)) | 2246 | if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) |
| 2114 | return; | 2247 | return; |
| 2115 | 2248 | ||
| 2116 | path = btrfs_alloc_path(); | 2249 | path = btrfs_alloc_path(); |
| @@ -2163,16 +2296,15 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
| 2163 | found_key.type = BTRFS_INODE_ITEM_KEY; | 2296 | found_key.type = BTRFS_INODE_ITEM_KEY; |
| 2164 | found_key.offset = 0; | 2297 | found_key.offset = 0; |
| 2165 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); | 2298 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); |
| 2166 | if (IS_ERR(inode)) | 2299 | BUG_ON(IS_ERR(inode)); |
| 2167 | break; | ||
| 2168 | 2300 | ||
| 2169 | /* | 2301 | /* |
| 2170 | * add this inode to the orphan list so btrfs_orphan_del does | 2302 | * add this inode to the orphan list so btrfs_orphan_del does |
| 2171 | * the proper thing when we hit it | 2303 | * the proper thing when we hit it |
| 2172 | */ | 2304 | */ |
| 2173 | spin_lock(&root->list_lock); | 2305 | spin_lock(&root->orphan_lock); |
| 2174 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | 2306 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); |
| 2175 | spin_unlock(&root->list_lock); | 2307 | spin_unlock(&root->orphan_lock); |
| 2176 | 2308 | ||
| 2177 | /* | 2309 | /* |
| 2178 | * if this is a bad inode, means we actually succeeded in | 2310 | * if this is a bad inode, means we actually succeeded in |
| @@ -2181,7 +2313,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
| 2181 | * do a destroy_inode | 2313 | * do a destroy_inode |
| 2182 | */ | 2314 | */ |
| 2183 | if (is_bad_inode(inode)) { | 2315 | if (is_bad_inode(inode)) { |
| 2184 | trans = btrfs_start_transaction(root, 1); | 2316 | trans = btrfs_start_transaction(root, 0); |
| 2185 | btrfs_orphan_del(trans, inode); | 2317 | btrfs_orphan_del(trans, inode); |
| 2186 | btrfs_end_transaction(trans, root); | 2318 | btrfs_end_transaction(trans, root); |
| 2187 | iput(inode); | 2319 | iput(inode); |
| @@ -2199,13 +2331,23 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
| 2199 | /* this will do delete_inode and everything for us */ | 2331 | /* this will do delete_inode and everything for us */ |
| 2200 | iput(inode); | 2332 | iput(inode); |
| 2201 | } | 2333 | } |
| 2334 | btrfs_free_path(path); | ||
| 2335 | |||
| 2336 | root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE; | ||
| 2337 | |||
| 2338 | if (root->orphan_block_rsv) | ||
| 2339 | btrfs_block_rsv_release(root, root->orphan_block_rsv, | ||
| 2340 | (u64)-1); | ||
| 2341 | |||
| 2342 | if (root->orphan_block_rsv || root->orphan_item_inserted) { | ||
| 2343 | trans = btrfs_join_transaction(root, 1); | ||
| 2344 | btrfs_end_transaction(trans, root); | ||
| 2345 | } | ||
| 2202 | 2346 | ||
| 2203 | if (nr_unlink) | 2347 | if (nr_unlink) |
| 2204 | printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); | 2348 | printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); |
| 2205 | if (nr_truncate) | 2349 | if (nr_truncate) |
| 2206 | printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); | 2350 | printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); |
| 2207 | |||
| 2208 | btrfs_free_path(path); | ||
| 2209 | } | 2351 | } |
| 2210 | 2352 | ||
| 2211 | /* | 2353 | /* |
| @@ -2524,44 +2666,217 @@ out: | |||
| 2524 | return ret; | 2666 | return ret; |
| 2525 | } | 2667 | } |
| 2526 | 2668 | ||
| 2527 | static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | 2669 | /* helper to check if there is any shared block in the path */ |
| 2670 | static int check_path_shared(struct btrfs_root *root, | ||
| 2671 | struct btrfs_path *path) | ||
| 2672 | { | ||
| 2673 | struct extent_buffer *eb; | ||
| 2674 | int level; | ||
| 2675 | int ret; | ||
| 2676 | u64 refs = 1; | ||
| 2677 | |||
| 2678 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { | ||
| 2679 | if (!path->nodes[level]) | ||
| 2680 | break; | ||
| 2681 | eb = path->nodes[level]; | ||
| 2682 | if (!btrfs_block_can_be_shared(root, eb)) | ||
| 2683 | continue; | ||
| 2684 | ret = btrfs_lookup_extent_info(NULL, root, eb->start, eb->len, | ||
| 2685 | &refs, NULL); | ||
| 2686 | if (refs > 1) | ||
| 2687 | return 1; | ||
| 2688 | } | ||
| 2689 | return 0; | ||
| 2690 | } | ||
| 2691 | |||
| 2692 | /* | ||
| 2693 | * helper to start transaction for unlink and rmdir. | ||
| 2694 | * | ||
| 2695 | * unlink and rmdir are special in btrfs, they do not always free space. | ||
| 2696 | * so in enospc case, we should make sure they will free space before | ||
| 2697 | * allowing them to use the global metadata reservation. | ||
| 2698 | */ | ||
| 2699 | static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | ||
| 2700 | struct dentry *dentry) | ||
| 2528 | { | 2701 | { |
| 2529 | struct btrfs_root *root; | ||
| 2530 | struct btrfs_trans_handle *trans; | 2702 | struct btrfs_trans_handle *trans; |
| 2703 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
| 2704 | struct btrfs_path *path; | ||
| 2705 | struct btrfs_inode_ref *ref; | ||
| 2706 | struct btrfs_dir_item *di; | ||
| 2531 | struct inode *inode = dentry->d_inode; | 2707 | struct inode *inode = dentry->d_inode; |
| 2708 | u64 index; | ||
| 2709 | int check_link = 1; | ||
| 2710 | int err = -ENOSPC; | ||
| 2532 | int ret; | 2711 | int ret; |
| 2533 | unsigned long nr = 0; | ||
| 2534 | 2712 | ||
| 2535 | root = BTRFS_I(dir)->root; | 2713 | trans = btrfs_start_transaction(root, 10); |
| 2714 | if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) | ||
| 2715 | return trans; | ||
| 2536 | 2716 | ||
| 2537 | /* | 2717 | if (inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) |
| 2538 | * 5 items for unlink inode | 2718 | return ERR_PTR(-ENOSPC); |
| 2539 | * 1 for orphan | 2719 | |
| 2540 | */ | 2720 | /* check if there is someone else holds reference */ |
| 2541 | ret = btrfs_reserve_metadata_space(root, 6); | 2721 | if (S_ISDIR(inode->i_mode) && atomic_read(&inode->i_count) > 1) |
| 2542 | if (ret) | 2722 | return ERR_PTR(-ENOSPC); |
| 2543 | return ret; | ||
| 2544 | 2723 | ||
| 2545 | trans = btrfs_start_transaction(root, 1); | 2724 | if (atomic_read(&inode->i_count) > 2) |
| 2725 | return ERR_PTR(-ENOSPC); | ||
| 2726 | |||
| 2727 | if (xchg(&root->fs_info->enospc_unlink, 1)) | ||
| 2728 | return ERR_PTR(-ENOSPC); | ||
| 2729 | |||
| 2730 | path = btrfs_alloc_path(); | ||
| 2731 | if (!path) { | ||
| 2732 | root->fs_info->enospc_unlink = 0; | ||
| 2733 | return ERR_PTR(-ENOMEM); | ||
| 2734 | } | ||
| 2735 | |||
| 2736 | trans = btrfs_start_transaction(root, 0); | ||
| 2546 | if (IS_ERR(trans)) { | 2737 | if (IS_ERR(trans)) { |
| 2547 | btrfs_unreserve_metadata_space(root, 6); | 2738 | btrfs_free_path(path); |
| 2548 | return PTR_ERR(trans); | 2739 | root->fs_info->enospc_unlink = 0; |
| 2740 | return trans; | ||
| 2549 | } | 2741 | } |
| 2550 | 2742 | ||
| 2743 | path->skip_locking = 1; | ||
| 2744 | path->search_commit_root = 1; | ||
| 2745 | |||
| 2746 | ret = btrfs_lookup_inode(trans, root, path, | ||
| 2747 | &BTRFS_I(dir)->location, 0); | ||
| 2748 | if (ret < 0) { | ||
| 2749 | err = ret; | ||
| 2750 | goto out; | ||
| 2751 | } | ||
| 2752 | if (ret == 0) { | ||
| 2753 | if (check_path_shared(root, path)) | ||
| 2754 | goto out; | ||
| 2755 | } else { | ||
| 2756 | check_link = 0; | ||
| 2757 | } | ||
| 2758 | btrfs_release_path(root, path); | ||
| 2759 | |||
| 2760 | ret = btrfs_lookup_inode(trans, root, path, | ||
| 2761 | &BTRFS_I(inode)->location, 0); | ||
| 2762 | if (ret < 0) { | ||
| 2763 | err = ret; | ||
| 2764 | goto out; | ||
| 2765 | } | ||
| 2766 | if (ret == 0) { | ||
| 2767 | if (check_path_shared(root, path)) | ||
| 2768 | goto out; | ||
| 2769 | } else { | ||
| 2770 | check_link = 0; | ||
| 2771 | } | ||
| 2772 | btrfs_release_path(root, path); | ||
| 2773 | |||
| 2774 | if (ret == 0 && S_ISREG(inode->i_mode)) { | ||
| 2775 | ret = btrfs_lookup_file_extent(trans, root, path, | ||
| 2776 | inode->i_ino, (u64)-1, 0); | ||
| 2777 | if (ret < 0) { | ||
| 2778 | err = ret; | ||
| 2779 | goto out; | ||
| 2780 | } | ||
| 2781 | BUG_ON(ret == 0); | ||
| 2782 | if (check_path_shared(root, path)) | ||
| 2783 | goto out; | ||
| 2784 | btrfs_release_path(root, path); | ||
| 2785 | } | ||
| 2786 | |||
| 2787 | if (!check_link) { | ||
| 2788 | err = 0; | ||
| 2789 | goto out; | ||
| 2790 | } | ||
| 2791 | |||
| 2792 | di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, | ||
| 2793 | dentry->d_name.name, dentry->d_name.len, 0); | ||
| 2794 | if (IS_ERR(di)) { | ||
| 2795 | err = PTR_ERR(di); | ||
| 2796 | goto out; | ||
| 2797 | } | ||
| 2798 | if (di) { | ||
| 2799 | if (check_path_shared(root, path)) | ||
| 2800 | goto out; | ||
| 2801 | } else { | ||
| 2802 | err = 0; | ||
| 2803 | goto out; | ||
| 2804 | } | ||
| 2805 | btrfs_release_path(root, path); | ||
| 2806 | |||
| 2807 | ref = btrfs_lookup_inode_ref(trans, root, path, | ||
| 2808 | dentry->d_name.name, dentry->d_name.len, | ||
| 2809 | inode->i_ino, dir->i_ino, 0); | ||
| 2810 | if (IS_ERR(ref)) { | ||
| 2811 | err = PTR_ERR(ref); | ||
| 2812 | goto out; | ||
| 2813 | } | ||
| 2814 | BUG_ON(!ref); | ||
| 2815 | if (check_path_shared(root, path)) | ||
| 2816 | goto out; | ||
| 2817 | index = btrfs_inode_ref_index(path->nodes[0], ref); | ||
| 2818 | btrfs_release_path(root, path); | ||
| 2819 | |||
| 2820 | di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, index, | ||
| 2821 | dentry->d_name.name, dentry->d_name.len, 0); | ||
| 2822 | if (IS_ERR(di)) { | ||
| 2823 | err = PTR_ERR(di); | ||
| 2824 | goto out; | ||
| 2825 | } | ||
| 2826 | BUG_ON(ret == -ENOENT); | ||
| 2827 | if (check_path_shared(root, path)) | ||
| 2828 | goto out; | ||
| 2829 | |||
| 2830 | err = 0; | ||
| 2831 | out: | ||
| 2832 | btrfs_free_path(path); | ||
| 2833 | if (err) { | ||
| 2834 | btrfs_end_transaction(trans, root); | ||
| 2835 | root->fs_info->enospc_unlink = 0; | ||
| 2836 | return ERR_PTR(err); | ||
| 2837 | } | ||
| 2838 | |||
| 2839 | trans->block_rsv = &root->fs_info->global_block_rsv; | ||
| 2840 | return trans; | ||
| 2841 | } | ||
| 2842 | |||
| 2843 | static void __unlink_end_trans(struct btrfs_trans_handle *trans, | ||
| 2844 | struct btrfs_root *root) | ||
| 2845 | { | ||
| 2846 | if (trans->block_rsv == &root->fs_info->global_block_rsv) { | ||
| 2847 | BUG_ON(!root->fs_info->enospc_unlink); | ||
| 2848 | root->fs_info->enospc_unlink = 0; | ||
| 2849 | } | ||
| 2850 | btrfs_end_transaction_throttle(trans, root); | ||
| 2851 | } | ||
| 2852 | |||
| 2853 | static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | ||
| 2854 | { | ||
| 2855 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
| 2856 | struct btrfs_trans_handle *trans; | ||
| 2857 | struct inode *inode = dentry->d_inode; | ||
| 2858 | int ret; | ||
| 2859 | unsigned long nr = 0; | ||
| 2860 | |||
| 2861 | trans = __unlink_start_trans(dir, dentry); | ||
| 2862 | if (IS_ERR(trans)) | ||
| 2863 | return PTR_ERR(trans); | ||
| 2864 | |||
| 2551 | btrfs_set_trans_block_group(trans, dir); | 2865 | btrfs_set_trans_block_group(trans, dir); |
| 2552 | 2866 | ||
| 2553 | btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0); | 2867 | btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0); |
| 2554 | 2868 | ||
| 2555 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, | 2869 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, |
| 2556 | dentry->d_name.name, dentry->d_name.len); | 2870 | dentry->d_name.name, dentry->d_name.len); |
| 2871 | BUG_ON(ret); | ||
| 2557 | 2872 | ||
| 2558 | if (inode->i_nlink == 0) | 2873 | if (inode->i_nlink == 0) { |
| 2559 | ret = btrfs_orphan_add(trans, inode); | 2874 | ret = btrfs_orphan_add(trans, inode); |
| 2875 | BUG_ON(ret); | ||
| 2876 | } | ||
| 2560 | 2877 | ||
| 2561 | nr = trans->blocks_used; | 2878 | nr = trans->blocks_used; |
| 2562 | 2879 | __unlink_end_trans(trans, root); | |
| 2563 | btrfs_end_transaction_throttle(trans, root); | ||
| 2564 | btrfs_unreserve_metadata_space(root, 6); | ||
| 2565 | btrfs_btree_balance_dirty(root, nr); | 2880 | btrfs_btree_balance_dirty(root, nr); |
| 2566 | return ret; | 2881 | return ret; |
| 2567 | } | 2882 | } |
| @@ -2633,7 +2948,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 2633 | { | 2948 | { |
| 2634 | struct inode *inode = dentry->d_inode; | 2949 | struct inode *inode = dentry->d_inode; |
| 2635 | int err = 0; | 2950 | int err = 0; |
| 2636 | int ret; | ||
| 2637 | struct btrfs_root *root = BTRFS_I(dir)->root; | 2951 | struct btrfs_root *root = BTRFS_I(dir)->root; |
| 2638 | struct btrfs_trans_handle *trans; | 2952 | struct btrfs_trans_handle *trans; |
| 2639 | unsigned long nr = 0; | 2953 | unsigned long nr = 0; |
| @@ -2642,15 +2956,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 2642 | inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 2956 | inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) |
| 2643 | return -ENOTEMPTY; | 2957 | return -ENOTEMPTY; |
| 2644 | 2958 | ||
| 2645 | ret = btrfs_reserve_metadata_space(root, 5); | 2959 | trans = __unlink_start_trans(dir, dentry); |
| 2646 | if (ret) | 2960 | if (IS_ERR(trans)) |
| 2647 | return ret; | ||
| 2648 | |||
| 2649 | trans = btrfs_start_transaction(root, 1); | ||
| 2650 | if (IS_ERR(trans)) { | ||
| 2651 | btrfs_unreserve_metadata_space(root, 5); | ||
| 2652 | return PTR_ERR(trans); | 2961 | return PTR_ERR(trans); |
| 2653 | } | ||
| 2654 | 2962 | ||
| 2655 | btrfs_set_trans_block_group(trans, dir); | 2963 | btrfs_set_trans_block_group(trans, dir); |
| 2656 | 2964 | ||
| @@ -2673,12 +2981,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 2673 | btrfs_i_size_write(inode, 0); | 2981 | btrfs_i_size_write(inode, 0); |
| 2674 | out: | 2982 | out: |
| 2675 | nr = trans->blocks_used; | 2983 | nr = trans->blocks_used; |
| 2676 | ret = btrfs_end_transaction_throttle(trans, root); | 2984 | __unlink_end_trans(trans, root); |
| 2677 | btrfs_unreserve_metadata_space(root, 5); | ||
| 2678 | btrfs_btree_balance_dirty(root, nr); | 2985 | btrfs_btree_balance_dirty(root, nr); |
| 2679 | 2986 | ||
| 2680 | if (ret && !err) | ||
| 2681 | err = ret; | ||
| 2682 | return err; | 2987 | return err; |
| 2683 | } | 2988 | } |
| 2684 | 2989 | ||
| @@ -3075,6 +3380,7 @@ out: | |||
| 3075 | if (pending_del_nr) { | 3380 | if (pending_del_nr) { |
| 3076 | ret = btrfs_del_items(trans, root, path, pending_del_slot, | 3381 | ret = btrfs_del_items(trans, root, path, pending_del_slot, |
| 3077 | pending_del_nr); | 3382 | pending_del_nr); |
| 3383 | BUG_ON(ret); | ||
| 3078 | } | 3384 | } |
| 3079 | btrfs_free_path(path); | 3385 | btrfs_free_path(path); |
| 3080 | return err; | 3386 | return err; |
| @@ -3102,11 +3408,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
| 3102 | 3408 | ||
| 3103 | if ((offset & (blocksize - 1)) == 0) | 3409 | if ((offset & (blocksize - 1)) == 0) |
| 3104 | goto out; | 3410 | goto out; |
| 3105 | ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); | 3411 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
| 3106 | if (ret) | ||
| 3107 | goto out; | ||
| 3108 | |||
| 3109 | ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
| 3110 | if (ret) | 3412 | if (ret) |
| 3111 | goto out; | 3413 | goto out; |
| 3112 | 3414 | ||
| @@ -3114,8 +3416,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
| 3114 | again: | 3416 | again: |
| 3115 | page = grab_cache_page(mapping, index); | 3417 | page = grab_cache_page(mapping, index); |
| 3116 | if (!page) { | 3418 | if (!page) { |
| 3117 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | 3419 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); |
| 3118 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
| 3119 | goto out; | 3420 | goto out; |
| 3120 | } | 3421 | } |
| 3121 | 3422 | ||
| @@ -3178,8 +3479,7 @@ again: | |||
| 3178 | 3479 | ||
| 3179 | out_unlock: | 3480 | out_unlock: |
| 3180 | if (ret) | 3481 | if (ret) |
| 3181 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | 3482 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); |
| 3182 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
| 3183 | unlock_page(page); | 3483 | unlock_page(page); |
| 3184 | page_cache_release(page); | 3484 | page_cache_release(page); |
| 3185 | out: | 3485 | out: |
| @@ -3191,7 +3491,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
| 3191 | struct btrfs_trans_handle *trans; | 3491 | struct btrfs_trans_handle *trans; |
| 3192 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3492 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 3193 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 3493 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
| 3194 | struct extent_map *em; | 3494 | struct extent_map *em = NULL; |
| 3195 | struct extent_state *cached_state = NULL; | 3495 | struct extent_state *cached_state = NULL; |
| 3196 | u64 mask = root->sectorsize - 1; | 3496 | u64 mask = root->sectorsize - 1; |
| 3197 | u64 hole_start = (inode->i_size + mask) & ~mask; | 3497 | u64 hole_start = (inode->i_size + mask) & ~mask; |
| @@ -3229,11 +3529,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
| 3229 | u64 hint_byte = 0; | 3529 | u64 hint_byte = 0; |
| 3230 | hole_size = last_byte - cur_offset; | 3530 | hole_size = last_byte - cur_offset; |
| 3231 | 3531 | ||
| 3232 | err = btrfs_reserve_metadata_space(root, 2); | 3532 | trans = btrfs_start_transaction(root, 2); |
| 3233 | if (err) | 3533 | if (IS_ERR(trans)) { |
| 3534 | err = PTR_ERR(trans); | ||
| 3234 | break; | 3535 | break; |
| 3235 | 3536 | } | |
| 3236 | trans = btrfs_start_transaction(root, 1); | ||
| 3237 | btrfs_set_trans_block_group(trans, inode); | 3537 | btrfs_set_trans_block_group(trans, inode); |
| 3238 | 3538 | ||
| 3239 | err = btrfs_drop_extents(trans, inode, cur_offset, | 3539 | err = btrfs_drop_extents(trans, inode, cur_offset, |
| @@ -3251,14 +3551,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
| 3251 | last_byte - 1, 0); | 3551 | last_byte - 1, 0); |
| 3252 | 3552 | ||
| 3253 | btrfs_end_transaction(trans, root); | 3553 | btrfs_end_transaction(trans, root); |
| 3254 | btrfs_unreserve_metadata_space(root, 2); | ||
| 3255 | } | 3554 | } |
| 3256 | free_extent_map(em); | 3555 | free_extent_map(em); |
| 3556 | em = NULL; | ||
| 3257 | cur_offset = last_byte; | 3557 | cur_offset = last_byte; |
| 3258 | if (cur_offset >= block_end) | 3558 | if (cur_offset >= block_end) |
| 3259 | break; | 3559 | break; |
| 3260 | } | 3560 | } |
| 3261 | 3561 | ||
| 3562 | free_extent_map(em); | ||
| 3262 | unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state, | 3563 | unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state, |
| 3263 | GFP_NOFS); | 3564 | GFP_NOFS); |
| 3264 | return err; | 3565 | return err; |
| @@ -3285,11 +3586,10 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) | |||
| 3285 | } | 3586 | } |
| 3286 | } | 3587 | } |
| 3287 | 3588 | ||
| 3288 | ret = btrfs_reserve_metadata_space(root, 1); | 3589 | trans = btrfs_start_transaction(root, 5); |
| 3289 | if (ret) | 3590 | if (IS_ERR(trans)) |
| 3290 | return ret; | 3591 | return PTR_ERR(trans); |
| 3291 | 3592 | ||
| 3292 | trans = btrfs_start_transaction(root, 1); | ||
| 3293 | btrfs_set_trans_block_group(trans, inode); | 3593 | btrfs_set_trans_block_group(trans, inode); |
| 3294 | 3594 | ||
| 3295 | ret = btrfs_orphan_add(trans, inode); | 3595 | ret = btrfs_orphan_add(trans, inode); |
| @@ -3297,7 +3597,6 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) | |||
| 3297 | 3597 | ||
| 3298 | nr = trans->blocks_used; | 3598 | nr = trans->blocks_used; |
| 3299 | btrfs_end_transaction(trans, root); | 3599 | btrfs_end_transaction(trans, root); |
| 3300 | btrfs_unreserve_metadata_space(root, 1); | ||
| 3301 | btrfs_btree_balance_dirty(root, nr); | 3600 | btrfs_btree_balance_dirty(root, nr); |
| 3302 | 3601 | ||
| 3303 | if (attr->ia_size > inode->i_size) { | 3602 | if (attr->ia_size > inode->i_size) { |
| @@ -3310,8 +3609,11 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) | |||
| 3310 | i_size_write(inode, attr->ia_size); | 3609 | i_size_write(inode, attr->ia_size); |
| 3311 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | 3610 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); |
| 3312 | 3611 | ||
| 3313 | trans = btrfs_start_transaction(root, 1); | 3612 | trans = btrfs_start_transaction(root, 0); |
| 3613 | BUG_ON(IS_ERR(trans)); | ||
| 3314 | btrfs_set_trans_block_group(trans, inode); | 3614 | btrfs_set_trans_block_group(trans, inode); |
| 3615 | trans->block_rsv = root->orphan_block_rsv; | ||
| 3616 | BUG_ON(!trans->block_rsv); | ||
| 3315 | 3617 | ||
| 3316 | ret = btrfs_update_inode(trans, root, inode); | 3618 | ret = btrfs_update_inode(trans, root, inode); |
| 3317 | BUG_ON(ret); | 3619 | BUG_ON(ret); |
| @@ -3391,10 +3693,21 @@ void btrfs_delete_inode(struct inode *inode) | |||
| 3391 | btrfs_i_size_write(inode, 0); | 3693 | btrfs_i_size_write(inode, 0); |
| 3392 | 3694 | ||
| 3393 | while (1) { | 3695 | while (1) { |
| 3394 | trans = btrfs_start_transaction(root, 1); | 3696 | trans = btrfs_start_transaction(root, 0); |
| 3697 | BUG_ON(IS_ERR(trans)); | ||
| 3395 | btrfs_set_trans_block_group(trans, inode); | 3698 | btrfs_set_trans_block_group(trans, inode); |
| 3396 | ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); | 3699 | trans->block_rsv = root->orphan_block_rsv; |
| 3397 | 3700 | ||
| 3701 | ret = btrfs_block_rsv_check(trans, root, | ||
| 3702 | root->orphan_block_rsv, 0, 5); | ||
| 3703 | if (ret) { | ||
| 3704 | BUG_ON(ret != -EAGAIN); | ||
| 3705 | ret = btrfs_commit_transaction(trans, root); | ||
| 3706 | BUG_ON(ret); | ||
| 3707 | continue; | ||
| 3708 | } | ||
| 3709 | |||
| 3710 | ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); | ||
| 3398 | if (ret != -EAGAIN) | 3711 | if (ret != -EAGAIN) |
| 3399 | break; | 3712 | break; |
| 3400 | 3713 | ||
| @@ -3402,6 +3715,7 @@ void btrfs_delete_inode(struct inode *inode) | |||
| 3402 | btrfs_end_transaction(trans, root); | 3715 | btrfs_end_transaction(trans, root); |
| 3403 | trans = NULL; | 3716 | trans = NULL; |
| 3404 | btrfs_btree_balance_dirty(root, nr); | 3717 | btrfs_btree_balance_dirty(root, nr); |
| 3718 | |||
| 3405 | } | 3719 | } |
| 3406 | 3720 | ||
| 3407 | if (ret == 0) { | 3721 | if (ret == 0) { |
| @@ -3642,40 +3956,10 @@ again: | |||
| 3642 | return 0; | 3956 | return 0; |
| 3643 | } | 3957 | } |
| 3644 | 3958 | ||
| 3645 | static noinline void init_btrfs_i(struct inode *inode) | ||
| 3646 | { | ||
| 3647 | struct btrfs_inode *bi = BTRFS_I(inode); | ||
| 3648 | |||
| 3649 | bi->generation = 0; | ||
| 3650 | bi->sequence = 0; | ||
| 3651 | bi->last_trans = 0; | ||
| 3652 | bi->last_sub_trans = 0; | ||
| 3653 | bi->logged_trans = 0; | ||
| 3654 | bi->delalloc_bytes = 0; | ||
| 3655 | bi->reserved_bytes = 0; | ||
| 3656 | bi->disk_i_size = 0; | ||
| 3657 | bi->flags = 0; | ||
| 3658 | bi->index_cnt = (u64)-1; | ||
| 3659 | bi->last_unlink_trans = 0; | ||
| 3660 | bi->ordered_data_close = 0; | ||
| 3661 | bi->force_compress = 0; | ||
| 3662 | extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); | ||
| 3663 | extent_io_tree_init(&BTRFS_I(inode)->io_tree, | ||
| 3664 | inode->i_mapping, GFP_NOFS); | ||
| 3665 | extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, | ||
| 3666 | inode->i_mapping, GFP_NOFS); | ||
| 3667 | INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); | ||
| 3668 | INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); | ||
| 3669 | RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); | ||
| 3670 | btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); | ||
| 3671 | mutex_init(&BTRFS_I(inode)->log_mutex); | ||
| 3672 | } | ||
| 3673 | |||
| 3674 | static int btrfs_init_locked_inode(struct inode *inode, void *p) | 3959 | static int btrfs_init_locked_inode(struct inode *inode, void *p) |
| 3675 | { | 3960 | { |
| 3676 | struct btrfs_iget_args *args = p; | 3961 | struct btrfs_iget_args *args = p; |
| 3677 | inode->i_ino = args->ino; | 3962 | inode->i_ino = args->ino; |
| 3678 | init_btrfs_i(inode); | ||
| 3679 | BTRFS_I(inode)->root = args->root; | 3963 | BTRFS_I(inode)->root = args->root; |
| 3680 | btrfs_set_inode_space_info(args->root, inode); | 3964 | btrfs_set_inode_space_info(args->root, inode); |
| 3681 | return 0; | 3965 | return 0; |
| @@ -3738,8 +4022,6 @@ static struct inode *new_simple_dir(struct super_block *s, | |||
| 3738 | if (!inode) | 4022 | if (!inode) |
| 3739 | return ERR_PTR(-ENOMEM); | 4023 | return ERR_PTR(-ENOMEM); |
| 3740 | 4024 | ||
| 3741 | init_btrfs_i(inode); | ||
| 3742 | |||
| 3743 | BTRFS_I(inode)->root = root; | 4025 | BTRFS_I(inode)->root = root; |
| 3744 | memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); | 4026 | memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); |
| 3745 | BTRFS_I(inode)->dummy_inode = 1; | 4027 | BTRFS_I(inode)->dummy_inode = 1; |
| @@ -3996,7 +4278,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 3996 | struct btrfs_trans_handle *trans; | 4278 | struct btrfs_trans_handle *trans; |
| 3997 | int ret = 0; | 4279 | int ret = 0; |
| 3998 | 4280 | ||
| 3999 | if (root->fs_info->btree_inode == inode) | 4281 | if (BTRFS_I(inode)->dummy_inode) |
| 4000 | return 0; | 4282 | return 0; |
| 4001 | 4283 | ||
| 4002 | if (wbc->sync_mode == WB_SYNC_ALL) { | 4284 | if (wbc->sync_mode == WB_SYNC_ALL) { |
| @@ -4017,10 +4299,38 @@ void btrfs_dirty_inode(struct inode *inode) | |||
| 4017 | { | 4299 | { |
| 4018 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4300 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 4019 | struct btrfs_trans_handle *trans; | 4301 | struct btrfs_trans_handle *trans; |
| 4302 | int ret; | ||
| 4303 | |||
| 4304 | if (BTRFS_I(inode)->dummy_inode) | ||
| 4305 | return; | ||
| 4020 | 4306 | ||
| 4021 | trans = btrfs_join_transaction(root, 1); | 4307 | trans = btrfs_join_transaction(root, 1); |
| 4022 | btrfs_set_trans_block_group(trans, inode); | 4308 | btrfs_set_trans_block_group(trans, inode); |
| 4023 | btrfs_update_inode(trans, root, inode); | 4309 | |
| 4310 | ret = btrfs_update_inode(trans, root, inode); | ||
| 4311 | if (ret && ret == -ENOSPC) { | ||
| 4312 | /* whoops, lets try again with the full transaction */ | ||
| 4313 | btrfs_end_transaction(trans, root); | ||
| 4314 | trans = btrfs_start_transaction(root, 1); | ||
| 4315 | if (IS_ERR(trans)) { | ||
| 4316 | if (printk_ratelimit()) { | ||
| 4317 | printk(KERN_ERR "btrfs: fail to " | ||
| 4318 | "dirty inode %lu error %ld\n", | ||
| 4319 | inode->i_ino, PTR_ERR(trans)); | ||
| 4320 | } | ||
| 4321 | return; | ||
| 4322 | } | ||
| 4323 | btrfs_set_trans_block_group(trans, inode); | ||
| 4324 | |||
| 4325 | ret = btrfs_update_inode(trans, root, inode); | ||
| 4326 | if (ret) { | ||
| 4327 | if (printk_ratelimit()) { | ||
| 4328 | printk(KERN_ERR "btrfs: fail to " | ||
| 4329 | "dirty inode %lu error %d\n", | ||
| 4330 | inode->i_ino, ret); | ||
| 4331 | } | ||
| 4332 | } | ||
| 4333 | } | ||
| 4024 | btrfs_end_transaction(trans, root); | 4334 | btrfs_end_transaction(trans, root); |
| 4025 | } | 4335 | } |
| 4026 | 4336 | ||
| @@ -4138,7 +4448,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
| 4138 | * btrfs_get_inode_index_count has an explanation for the magic | 4448 | * btrfs_get_inode_index_count has an explanation for the magic |
| 4139 | * number | 4449 | * number |
| 4140 | */ | 4450 | */ |
| 4141 | init_btrfs_i(inode); | ||
| 4142 | BTRFS_I(inode)->index_cnt = 2; | 4451 | BTRFS_I(inode)->index_cnt = 2; |
| 4143 | BTRFS_I(inode)->root = root; | 4452 | BTRFS_I(inode)->root = root; |
| 4144 | BTRFS_I(inode)->generation = trans->transid; | 4453 | BTRFS_I(inode)->generation = trans->transid; |
| @@ -4167,16 +4476,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
| 4167 | if (ret != 0) | 4476 | if (ret != 0) |
| 4168 | goto fail; | 4477 | goto fail; |
| 4169 | 4478 | ||
| 4170 | inode->i_uid = current_fsuid(); | 4479 | inode_init_owner(inode, dir, mode); |
| 4171 | |||
| 4172 | if (dir && (dir->i_mode & S_ISGID)) { | ||
| 4173 | inode->i_gid = dir->i_gid; | ||
| 4174 | if (S_ISDIR(mode)) | ||
| 4175 | mode |= S_ISGID; | ||
| 4176 | } else | ||
| 4177 | inode->i_gid = current_fsgid(); | ||
| 4178 | |||
| 4179 | inode->i_mode = mode; | ||
| 4180 | inode->i_ino = objectid; | 4480 | inode->i_ino = objectid; |
| 4181 | inode_set_bytes(inode, 0); | 4481 | inode_set_bytes(inode, 0); |
| 4182 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 4482 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
| @@ -4302,26 +4602,21 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
| 4302 | if (!new_valid_dev(rdev)) | 4602 | if (!new_valid_dev(rdev)) |
| 4303 | return -EINVAL; | 4603 | return -EINVAL; |
| 4304 | 4604 | ||
| 4605 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
| 4606 | if (err) | ||
| 4607 | return err; | ||
| 4608 | |||
| 4305 | /* | 4609 | /* |
| 4306 | * 2 for inode item and ref | 4610 | * 2 for inode item and ref |
| 4307 | * 2 for dir items | 4611 | * 2 for dir items |
| 4308 | * 1 for xattr if selinux is on | 4612 | * 1 for xattr if selinux is on |
| 4309 | */ | 4613 | */ |
| 4310 | err = btrfs_reserve_metadata_space(root, 5); | 4614 | trans = btrfs_start_transaction(root, 5); |
| 4311 | if (err) | 4615 | if (IS_ERR(trans)) |
| 4312 | return err; | 4616 | return PTR_ERR(trans); |
| 4313 | 4617 | ||
| 4314 | trans = btrfs_start_transaction(root, 1); | ||
| 4315 | if (!trans) | ||
| 4316 | goto fail; | ||
| 4317 | btrfs_set_trans_block_group(trans, dir); | 4618 | btrfs_set_trans_block_group(trans, dir); |
| 4318 | 4619 | ||
| 4319 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
| 4320 | if (err) { | ||
| 4321 | err = -ENOSPC; | ||
| 4322 | goto out_unlock; | ||
| 4323 | } | ||
| 4324 | |||
| 4325 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4620 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
| 4326 | dentry->d_name.len, | 4621 | dentry->d_name.len, |
| 4327 | dentry->d_parent->d_inode->i_ino, objectid, | 4622 | dentry->d_parent->d_inode->i_ino, objectid, |
| @@ -4350,13 +4645,11 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
| 4350 | out_unlock: | 4645 | out_unlock: |
| 4351 | nr = trans->blocks_used; | 4646 | nr = trans->blocks_used; |
| 4352 | btrfs_end_transaction_throttle(trans, root); | 4647 | btrfs_end_transaction_throttle(trans, root); |
| 4353 | fail: | 4648 | btrfs_btree_balance_dirty(root, nr); |
| 4354 | btrfs_unreserve_metadata_space(root, 5); | ||
| 4355 | if (drop_inode) { | 4649 | if (drop_inode) { |
| 4356 | inode_dec_link_count(inode); | 4650 | inode_dec_link_count(inode); |
| 4357 | iput(inode); | 4651 | iput(inode); |
| 4358 | } | 4652 | } |
| 4359 | btrfs_btree_balance_dirty(root, nr); | ||
| 4360 | return err; | 4653 | return err; |
| 4361 | } | 4654 | } |
| 4362 | 4655 | ||
| @@ -4366,32 +4659,26 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
| 4366 | struct btrfs_trans_handle *trans; | 4659 | struct btrfs_trans_handle *trans; |
| 4367 | struct btrfs_root *root = BTRFS_I(dir)->root; | 4660 | struct btrfs_root *root = BTRFS_I(dir)->root; |
| 4368 | struct inode *inode = NULL; | 4661 | struct inode *inode = NULL; |
| 4369 | int err; | ||
| 4370 | int drop_inode = 0; | 4662 | int drop_inode = 0; |
| 4663 | int err; | ||
| 4371 | unsigned long nr = 0; | 4664 | unsigned long nr = 0; |
| 4372 | u64 objectid; | 4665 | u64 objectid; |
| 4373 | u64 index = 0; | 4666 | u64 index = 0; |
| 4374 | 4667 | ||
| 4668 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
| 4669 | if (err) | ||
| 4670 | return err; | ||
| 4375 | /* | 4671 | /* |
| 4376 | * 2 for inode item and ref | 4672 | * 2 for inode item and ref |
| 4377 | * 2 for dir items | 4673 | * 2 for dir items |
| 4378 | * 1 for xattr if selinux is on | 4674 | * 1 for xattr if selinux is on |
| 4379 | */ | 4675 | */ |
| 4380 | err = btrfs_reserve_metadata_space(root, 5); | 4676 | trans = btrfs_start_transaction(root, 5); |
| 4381 | if (err) | 4677 | if (IS_ERR(trans)) |
| 4382 | return err; | 4678 | return PTR_ERR(trans); |
| 4383 | 4679 | ||
| 4384 | trans = btrfs_start_transaction(root, 1); | ||
| 4385 | if (!trans) | ||
| 4386 | goto fail; | ||
| 4387 | btrfs_set_trans_block_group(trans, dir); | 4680 | btrfs_set_trans_block_group(trans, dir); |
| 4388 | 4681 | ||
| 4389 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
| 4390 | if (err) { | ||
| 4391 | err = -ENOSPC; | ||
| 4392 | goto out_unlock; | ||
| 4393 | } | ||
| 4394 | |||
| 4395 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4682 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
| 4396 | dentry->d_name.len, | 4683 | dentry->d_name.len, |
| 4397 | dentry->d_parent->d_inode->i_ino, | 4684 | dentry->d_parent->d_inode->i_ino, |
| @@ -4423,8 +4710,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
| 4423 | out_unlock: | 4710 | out_unlock: |
| 4424 | nr = trans->blocks_used; | 4711 | nr = trans->blocks_used; |
| 4425 | btrfs_end_transaction_throttle(trans, root); | 4712 | btrfs_end_transaction_throttle(trans, root); |
| 4426 | fail: | ||
| 4427 | btrfs_unreserve_metadata_space(root, 5); | ||
| 4428 | if (drop_inode) { | 4713 | if (drop_inode) { |
| 4429 | inode_dec_link_count(inode); | 4714 | inode_dec_link_count(inode); |
| 4430 | iput(inode); | 4715 | iput(inode); |
| @@ -4451,21 +4736,21 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
| 4451 | if (root->objectid != BTRFS_I(inode)->root->objectid) | 4736 | if (root->objectid != BTRFS_I(inode)->root->objectid) |
| 4452 | return -EPERM; | 4737 | return -EPERM; |
| 4453 | 4738 | ||
| 4454 | /* | ||
| 4455 | * 1 item for inode ref | ||
| 4456 | * 2 items for dir items | ||
| 4457 | */ | ||
| 4458 | err = btrfs_reserve_metadata_space(root, 3); | ||
| 4459 | if (err) | ||
| 4460 | return err; | ||
| 4461 | |||
| 4462 | btrfs_inc_nlink(inode); | 4739 | btrfs_inc_nlink(inode); |
| 4463 | 4740 | ||
| 4464 | err = btrfs_set_inode_index(dir, &index); | 4741 | err = btrfs_set_inode_index(dir, &index); |
| 4465 | if (err) | 4742 | if (err) |
| 4466 | goto fail; | 4743 | goto fail; |
| 4467 | 4744 | ||
| 4468 | trans = btrfs_start_transaction(root, 1); | 4745 | /* |
| 4746 | * 1 item for inode ref | ||
| 4747 | * 2 items for dir items | ||
| 4748 | */ | ||
| 4749 | trans = btrfs_start_transaction(root, 3); | ||
| 4750 | if (IS_ERR(trans)) { | ||
| 4751 | err = PTR_ERR(trans); | ||
| 4752 | goto fail; | ||
| 4753 | } | ||
| 4469 | 4754 | ||
| 4470 | btrfs_set_trans_block_group(trans, dir); | 4755 | btrfs_set_trans_block_group(trans, dir); |
| 4471 | atomic_inc(&inode->i_count); | 4756 | atomic_inc(&inode->i_count); |
| @@ -4484,7 +4769,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
| 4484 | nr = trans->blocks_used; | 4769 | nr = trans->blocks_used; |
| 4485 | btrfs_end_transaction_throttle(trans, root); | 4770 | btrfs_end_transaction_throttle(trans, root); |
| 4486 | fail: | 4771 | fail: |
| 4487 | btrfs_unreserve_metadata_space(root, 3); | ||
| 4488 | if (drop_inode) { | 4772 | if (drop_inode) { |
| 4489 | inode_dec_link_count(inode); | 4773 | inode_dec_link_count(inode); |
| 4490 | iput(inode); | 4774 | iput(inode); |
| @@ -4504,28 +4788,20 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 4504 | u64 index = 0; | 4788 | u64 index = 0; |
| 4505 | unsigned long nr = 1; | 4789 | unsigned long nr = 1; |
| 4506 | 4790 | ||
| 4791 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
| 4792 | if (err) | ||
| 4793 | return err; | ||
| 4794 | |||
| 4507 | /* | 4795 | /* |
| 4508 | * 2 items for inode and ref | 4796 | * 2 items for inode and ref |
| 4509 | * 2 items for dir items | 4797 | * 2 items for dir items |
| 4510 | * 1 for xattr if selinux is on | 4798 | * 1 for xattr if selinux is on |
| 4511 | */ | 4799 | */ |
| 4512 | err = btrfs_reserve_metadata_space(root, 5); | 4800 | trans = btrfs_start_transaction(root, 5); |
| 4513 | if (err) | 4801 | if (IS_ERR(trans)) |
| 4514 | return err; | 4802 | return PTR_ERR(trans); |
| 4515 | |||
| 4516 | trans = btrfs_start_transaction(root, 1); | ||
| 4517 | if (!trans) { | ||
| 4518 | err = -ENOMEM; | ||
| 4519 | goto out_unlock; | ||
| 4520 | } | ||
| 4521 | btrfs_set_trans_block_group(trans, dir); | 4803 | btrfs_set_trans_block_group(trans, dir); |
| 4522 | 4804 | ||
| 4523 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
| 4524 | if (err) { | ||
| 4525 | err = -ENOSPC; | ||
| 4526 | goto out_fail; | ||
| 4527 | } | ||
| 4528 | |||
| 4529 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4805 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
| 4530 | dentry->d_name.len, | 4806 | dentry->d_name.len, |
| 4531 | dentry->d_parent->d_inode->i_ino, objectid, | 4807 | dentry->d_parent->d_inode->i_ino, objectid, |
| @@ -4565,9 +4841,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 4565 | out_fail: | 4841 | out_fail: |
| 4566 | nr = trans->blocks_used; | 4842 | nr = trans->blocks_used; |
| 4567 | btrfs_end_transaction_throttle(trans, root); | 4843 | btrfs_end_transaction_throttle(trans, root); |
| 4568 | |||
| 4569 | out_unlock: | ||
| 4570 | btrfs_unreserve_metadata_space(root, 5); | ||
| 4571 | if (drop_on_err) | 4844 | if (drop_on_err) |
| 4572 | iput(inode); | 4845 | iput(inode); |
| 4573 | btrfs_btree_balance_dirty(root, nr); | 4846 | btrfs_btree_balance_dirty(root, nr); |
| @@ -4825,6 +5098,7 @@ again: | |||
| 4825 | } | 5098 | } |
| 4826 | flush_dcache_page(page); | 5099 | flush_dcache_page(page); |
| 4827 | } else if (create && PageUptodate(page)) { | 5100 | } else if (create && PageUptodate(page)) { |
| 5101 | WARN_ON(1); | ||
| 4828 | if (!trans) { | 5102 | if (!trans) { |
| 4829 | kunmap(page); | 5103 | kunmap(page); |
| 4830 | free_extent_map(em); | 5104 | free_extent_map(em); |
| @@ -4921,11 +5195,651 @@ out: | |||
| 4921 | return em; | 5195 | return em; |
| 4922 | } | 5196 | } |
| 4923 | 5197 | ||
| 5198 | static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | ||
| 5199 | u64 start, u64 len) | ||
| 5200 | { | ||
| 5201 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 5202 | struct btrfs_trans_handle *trans; | ||
| 5203 | struct extent_map *em; | ||
| 5204 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 5205 | struct btrfs_key ins; | ||
| 5206 | u64 alloc_hint; | ||
| 5207 | int ret; | ||
| 5208 | |||
| 5209 | btrfs_drop_extent_cache(inode, start, start + len - 1, 0); | ||
| 5210 | |||
| 5211 | trans = btrfs_join_transaction(root, 0); | ||
| 5212 | if (!trans) | ||
| 5213 | return ERR_PTR(-ENOMEM); | ||
| 5214 | |||
| 5215 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
| 5216 | |||
| 5217 | alloc_hint = get_extent_allocation_hint(inode, start, len); | ||
| 5218 | ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0, | ||
| 5219 | alloc_hint, (u64)-1, &ins, 1); | ||
| 5220 | if (ret) { | ||
| 5221 | em = ERR_PTR(ret); | ||
| 5222 | goto out; | ||
| 5223 | } | ||
| 5224 | |||
| 5225 | em = alloc_extent_map(GFP_NOFS); | ||
| 5226 | if (!em) { | ||
| 5227 | em = ERR_PTR(-ENOMEM); | ||
| 5228 | goto out; | ||
| 5229 | } | ||
| 5230 | |||
| 5231 | em->start = start; | ||
| 5232 | em->orig_start = em->start; | ||
| 5233 | em->len = ins.offset; | ||
| 5234 | |||
| 5235 | em->block_start = ins.objectid; | ||
| 5236 | em->block_len = ins.offset; | ||
| 5237 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
| 5238 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
| 5239 | |||
| 5240 | while (1) { | ||
| 5241 | write_lock(&em_tree->lock); | ||
| 5242 | ret = add_extent_mapping(em_tree, em); | ||
| 5243 | write_unlock(&em_tree->lock); | ||
| 5244 | if (ret != -EEXIST) | ||
| 5245 | break; | ||
| 5246 | btrfs_drop_extent_cache(inode, start, start + em->len - 1, 0); | ||
| 5247 | } | ||
| 5248 | |||
| 5249 | ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, | ||
| 5250 | ins.offset, ins.offset, 0); | ||
| 5251 | if (ret) { | ||
| 5252 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset); | ||
| 5253 | em = ERR_PTR(ret); | ||
| 5254 | } | ||
| 5255 | out: | ||
| 5256 | btrfs_end_transaction(trans, root); | ||
| 5257 | return em; | ||
| 5258 | } | ||
| 5259 | |||
| 5260 | /* | ||
| 5261 | * returns 1 when the nocow is safe, < 1 on error, 0 if the | ||
| 5262 | * block must be cow'd | ||
| 5263 | */ | ||
| 5264 | static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, | ||
| 5265 | struct inode *inode, u64 offset, u64 len) | ||
| 5266 | { | ||
| 5267 | struct btrfs_path *path; | ||
| 5268 | int ret; | ||
| 5269 | struct extent_buffer *leaf; | ||
| 5270 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 5271 | struct btrfs_file_extent_item *fi; | ||
| 5272 | struct btrfs_key key; | ||
| 5273 | u64 disk_bytenr; | ||
| 5274 | u64 backref_offset; | ||
| 5275 | u64 extent_end; | ||
| 5276 | u64 num_bytes; | ||
| 5277 | int slot; | ||
| 5278 | int found_type; | ||
| 5279 | |||
| 5280 | path = btrfs_alloc_path(); | ||
| 5281 | if (!path) | ||
| 5282 | return -ENOMEM; | ||
| 5283 | |||
| 5284 | ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, | ||
| 5285 | offset, 0); | ||
| 5286 | if (ret < 0) | ||
| 5287 | goto out; | ||
| 5288 | |||
| 5289 | slot = path->slots[0]; | ||
| 5290 | if (ret == 1) { | ||
| 5291 | if (slot == 0) { | ||
| 5292 | /* can't find the item, must cow */ | ||
| 5293 | ret = 0; | ||
| 5294 | goto out; | ||
| 5295 | } | ||
| 5296 | slot--; | ||
| 5297 | } | ||
| 5298 | ret = 0; | ||
| 5299 | leaf = path->nodes[0]; | ||
| 5300 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
| 5301 | if (key.objectid != inode->i_ino || | ||
| 5302 | key.type != BTRFS_EXTENT_DATA_KEY) { | ||
| 5303 | /* not our file or wrong item type, must cow */ | ||
| 5304 | goto out; | ||
| 5305 | } | ||
| 5306 | |||
| 5307 | if (key.offset > offset) { | ||
| 5308 | /* Wrong offset, must cow */ | ||
| 5309 | goto out; | ||
| 5310 | } | ||
| 5311 | |||
| 5312 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); | ||
| 5313 | found_type = btrfs_file_extent_type(leaf, fi); | ||
| 5314 | if (found_type != BTRFS_FILE_EXTENT_REG && | ||
| 5315 | found_type != BTRFS_FILE_EXTENT_PREALLOC) { | ||
| 5316 | /* not a regular extent, must cow */ | ||
| 5317 | goto out; | ||
| 5318 | } | ||
| 5319 | disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
| 5320 | backref_offset = btrfs_file_extent_offset(leaf, fi); | ||
| 5321 | |||
| 5322 | extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); | ||
| 5323 | if (extent_end < offset + len) { | ||
| 5324 | /* extent doesn't include our full range, must cow */ | ||
| 5325 | goto out; | ||
| 5326 | } | ||
| 5327 | |||
| 5328 | if (btrfs_extent_readonly(root, disk_bytenr)) | ||
| 5329 | goto out; | ||
| 5330 | |||
| 5331 | /* | ||
| 5332 | * look for other files referencing this extent, if we | ||
| 5333 | * find any we must cow | ||
| 5334 | */ | ||
| 5335 | if (btrfs_cross_ref_exist(trans, root, inode->i_ino, | ||
| 5336 | key.offset - backref_offset, disk_bytenr)) | ||
| 5337 | goto out; | ||
| 5338 | |||
| 5339 | /* | ||
| 5340 | * adjust disk_bytenr and num_bytes to cover just the bytes | ||
| 5341 | * in this extent we are about to write. If there | ||
| 5342 | * are any csums in that range we have to cow in order | ||
| 5343 | * to keep the csums correct | ||
| 5344 | */ | ||
| 5345 | disk_bytenr += backref_offset; | ||
| 5346 | disk_bytenr += offset - key.offset; | ||
| 5347 | num_bytes = min(offset + len, extent_end) - offset; | ||
| 5348 | if (csum_exist_in_range(root, disk_bytenr, num_bytes)) | ||
| 5349 | goto out; | ||
| 5350 | /* | ||
| 5351 | * all of the above have passed, it is safe to overwrite this extent | ||
| 5352 | * without cow | ||
| 5353 | */ | ||
| 5354 | ret = 1; | ||
| 5355 | out: | ||
| 5356 | btrfs_free_path(path); | ||
| 5357 | return ret; | ||
| 5358 | } | ||
| 5359 | |||
| 5360 | static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | ||
| 5361 | struct buffer_head *bh_result, int create) | ||
| 5362 | { | ||
| 5363 | struct extent_map *em; | ||
| 5364 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 5365 | u64 start = iblock << inode->i_blkbits; | ||
| 5366 | u64 len = bh_result->b_size; | ||
| 5367 | struct btrfs_trans_handle *trans; | ||
| 5368 | |||
| 5369 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); | ||
| 5370 | if (IS_ERR(em)) | ||
| 5371 | return PTR_ERR(em); | ||
| 5372 | |||
| 5373 | /* | ||
| 5374 | * Ok for INLINE and COMPRESSED extents we need to fallback on buffered | ||
| 5375 | * io. INLINE is special, and we could probably kludge it in here, but | ||
| 5376 | * it's still buffered so for safety lets just fall back to the generic | ||
| 5377 | * buffered path. | ||
| 5378 | * | ||
| 5379 | * For COMPRESSED we _have_ to read the entire extent in so we can | ||
| 5380 | * decompress it, so there will be buffering required no matter what we | ||
| 5381 | * do, so go ahead and fallback to buffered. | ||
| 5382 | * | ||
| 5383 | * We return -ENOTBLK because thats what makes DIO go ahead and go back | ||
| 5384 | * to buffered IO. Don't blame me, this is the price we pay for using | ||
| 5385 | * the generic code. | ||
| 5386 | */ | ||
| 5387 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || | ||
| 5388 | em->block_start == EXTENT_MAP_INLINE) { | ||
| 5389 | free_extent_map(em); | ||
| 5390 | return -ENOTBLK; | ||
| 5391 | } | ||
| 5392 | |||
| 5393 | /* Just a good old fashioned hole, return */ | ||
| 5394 | if (!create && (em->block_start == EXTENT_MAP_HOLE || | ||
| 5395 | test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | ||
| 5396 | free_extent_map(em); | ||
| 5397 | /* DIO will do one hole at a time, so just unlock a sector */ | ||
| 5398 | unlock_extent(&BTRFS_I(inode)->io_tree, start, | ||
| 5399 | start + root->sectorsize - 1, GFP_NOFS); | ||
| 5400 | return 0; | ||
| 5401 | } | ||
| 5402 | |||
| 5403 | /* | ||
| 5404 | * We don't allocate a new extent in the following cases | ||
| 5405 | * | ||
| 5406 | * 1) The inode is marked as NODATACOW. In this case we'll just use the | ||
| 5407 | * existing extent. | ||
| 5408 | * 2) The extent is marked as PREALLOC. We're good to go here and can | ||
| 5409 | * just use the extent. | ||
| 5410 | * | ||
| 5411 | */ | ||
| 5412 | if (!create) { | ||
| 5413 | len = em->len - (start - em->start); | ||
| 5414 | goto map; | ||
| 5415 | } | ||
| 5416 | |||
| 5417 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || | ||
| 5418 | ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) && | ||
| 5419 | em->block_start != EXTENT_MAP_HOLE)) { | ||
| 5420 | int type; | ||
| 5421 | int ret; | ||
| 5422 | u64 block_start; | ||
| 5423 | |||
| 5424 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) | ||
| 5425 | type = BTRFS_ORDERED_PREALLOC; | ||
| 5426 | else | ||
| 5427 | type = BTRFS_ORDERED_NOCOW; | ||
| 5428 | len = min(len, em->len - (start - em->start)); | ||
| 5429 | block_start = em->block_start + (start - em->start); | ||
| 5430 | |||
| 5431 | /* | ||
| 5432 | * we're not going to log anything, but we do need | ||
| 5433 | * to make sure the current transaction stays open | ||
| 5434 | * while we look for nocow cross refs | ||
| 5435 | */ | ||
| 5436 | trans = btrfs_join_transaction(root, 0); | ||
| 5437 | if (!trans) | ||
| 5438 | goto must_cow; | ||
| 5439 | |||
| 5440 | if (can_nocow_odirect(trans, inode, start, len) == 1) { | ||
| 5441 | ret = btrfs_add_ordered_extent_dio(inode, start, | ||
| 5442 | block_start, len, len, type); | ||
| 5443 | btrfs_end_transaction(trans, root); | ||
| 5444 | if (ret) { | ||
| 5445 | free_extent_map(em); | ||
| 5446 | return ret; | ||
| 5447 | } | ||
| 5448 | goto unlock; | ||
| 5449 | } | ||
| 5450 | btrfs_end_transaction(trans, root); | ||
| 5451 | } | ||
| 5452 | must_cow: | ||
| 5453 | /* | ||
| 5454 | * this will cow the extent, reset the len in case we changed | ||
| 5455 | * it above | ||
| 5456 | */ | ||
| 5457 | len = bh_result->b_size; | ||
| 5458 | free_extent_map(em); | ||
| 5459 | em = btrfs_new_extent_direct(inode, start, len); | ||
| 5460 | if (IS_ERR(em)) | ||
| 5461 | return PTR_ERR(em); | ||
| 5462 | len = min(len, em->len - (start - em->start)); | ||
| 5463 | unlock: | ||
| 5464 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1, | ||
| 5465 | EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1, | ||
| 5466 | 0, NULL, GFP_NOFS); | ||
| 5467 | map: | ||
| 5468 | bh_result->b_blocknr = (em->block_start + (start - em->start)) >> | ||
| 5469 | inode->i_blkbits; | ||
| 5470 | bh_result->b_size = len; | ||
| 5471 | bh_result->b_bdev = em->bdev; | ||
| 5472 | set_buffer_mapped(bh_result); | ||
| 5473 | if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) | ||
| 5474 | set_buffer_new(bh_result); | ||
| 5475 | |||
| 5476 | free_extent_map(em); | ||
| 5477 | |||
| 5478 | return 0; | ||
| 5479 | } | ||
| 5480 | |||
| 5481 | struct btrfs_dio_private { | ||
| 5482 | struct inode *inode; | ||
| 5483 | u64 logical_offset; | ||
| 5484 | u64 disk_bytenr; | ||
| 5485 | u64 bytes; | ||
| 5486 | u32 *csums; | ||
| 5487 | void *private; | ||
| 5488 | }; | ||
| 5489 | |||
| 5490 | static void btrfs_endio_direct_read(struct bio *bio, int err) | ||
| 5491 | { | ||
| 5492 | struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
| 5493 | struct bio_vec *bvec = bio->bi_io_vec; | ||
| 5494 | struct btrfs_dio_private *dip = bio->bi_private; | ||
| 5495 | struct inode *inode = dip->inode; | ||
| 5496 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 5497 | u64 start; | ||
| 5498 | u32 *private = dip->csums; | ||
| 5499 | |||
| 5500 | start = dip->logical_offset; | ||
| 5501 | do { | ||
| 5502 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { | ||
| 5503 | struct page *page = bvec->bv_page; | ||
| 5504 | char *kaddr; | ||
| 5505 | u32 csum = ~(u32)0; | ||
| 5506 | unsigned long flags; | ||
| 5507 | |||
| 5508 | local_irq_save(flags); | ||
| 5509 | kaddr = kmap_atomic(page, KM_IRQ0); | ||
| 5510 | csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, | ||
| 5511 | csum, bvec->bv_len); | ||
| 5512 | btrfs_csum_final(csum, (char *)&csum); | ||
| 5513 | kunmap_atomic(kaddr, KM_IRQ0); | ||
| 5514 | local_irq_restore(flags); | ||
| 5515 | |||
| 5516 | flush_dcache_page(bvec->bv_page); | ||
| 5517 | if (csum != *private) { | ||
| 5518 | printk(KERN_ERR "btrfs csum failed ino %lu off" | ||
| 5519 | " %llu csum %u private %u\n", | ||
| 5520 | inode->i_ino, (unsigned long long)start, | ||
| 5521 | csum, *private); | ||
| 5522 | err = -EIO; | ||
| 5523 | } | ||
| 5524 | } | ||
| 5525 | |||
| 5526 | start += bvec->bv_len; | ||
| 5527 | private++; | ||
| 5528 | bvec++; | ||
| 5529 | } while (bvec <= bvec_end); | ||
| 5530 | |||
| 5531 | unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, | ||
| 5532 | dip->logical_offset + dip->bytes - 1, GFP_NOFS); | ||
| 5533 | bio->bi_private = dip->private; | ||
| 5534 | |||
| 5535 | kfree(dip->csums); | ||
| 5536 | kfree(dip); | ||
| 5537 | dio_end_io(bio, err); | ||
| 5538 | } | ||
| 5539 | |||
| 5540 | static void btrfs_endio_direct_write(struct bio *bio, int err) | ||
| 5541 | { | ||
| 5542 | struct btrfs_dio_private *dip = bio->bi_private; | ||
| 5543 | struct inode *inode = dip->inode; | ||
| 5544 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 5545 | struct btrfs_trans_handle *trans; | ||
| 5546 | struct btrfs_ordered_extent *ordered = NULL; | ||
| 5547 | struct extent_state *cached_state = NULL; | ||
| 5548 | int ret; | ||
| 5549 | |||
| 5550 | if (err) | ||
| 5551 | goto out_done; | ||
| 5552 | |||
| 5553 | ret = btrfs_dec_test_ordered_pending(inode, &ordered, | ||
| 5554 | dip->logical_offset, dip->bytes); | ||
| 5555 | if (!ret) | ||
| 5556 | goto out_done; | ||
| 5557 | |||
| 5558 | BUG_ON(!ordered); | ||
| 5559 | |||
| 5560 | trans = btrfs_join_transaction(root, 1); | ||
| 5561 | if (!trans) { | ||
| 5562 | err = -ENOMEM; | ||
| 5563 | goto out; | ||
| 5564 | } | ||
| 5565 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
| 5566 | |||
| 5567 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { | ||
| 5568 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); | ||
| 5569 | if (!ret) | ||
| 5570 | ret = btrfs_update_inode(trans, root, inode); | ||
| 5571 | err = ret; | ||
| 5572 | goto out; | ||
| 5573 | } | ||
| 5574 | |||
| 5575 | lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset, | ||
| 5576 | ordered->file_offset + ordered->len - 1, 0, | ||
| 5577 | &cached_state, GFP_NOFS); | ||
| 5578 | |||
| 5579 | if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) { | ||
| 5580 | ret = btrfs_mark_extent_written(trans, inode, | ||
| 5581 | ordered->file_offset, | ||
| 5582 | ordered->file_offset + | ||
| 5583 | ordered->len); | ||
| 5584 | if (ret) { | ||
| 5585 | err = ret; | ||
| 5586 | goto out_unlock; | ||
| 5587 | } | ||
| 5588 | } else { | ||
| 5589 | ret = insert_reserved_file_extent(trans, inode, | ||
| 5590 | ordered->file_offset, | ||
| 5591 | ordered->start, | ||
| 5592 | ordered->disk_len, | ||
| 5593 | ordered->len, | ||
| 5594 | ordered->len, | ||
| 5595 | 0, 0, 0, | ||
| 5596 | BTRFS_FILE_EXTENT_REG); | ||
| 5597 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | ||
| 5598 | ordered->file_offset, ordered->len); | ||
| 5599 | if (ret) { | ||
| 5600 | err = ret; | ||
| 5601 | WARN_ON(1); | ||
| 5602 | goto out_unlock; | ||
| 5603 | } | ||
| 5604 | } | ||
| 5605 | |||
| 5606 | add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); | ||
| 5607 | btrfs_ordered_update_i_size(inode, 0, ordered); | ||
| 5608 | btrfs_update_inode(trans, root, inode); | ||
| 5609 | out_unlock: | ||
| 5610 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, | ||
| 5611 | ordered->file_offset + ordered->len - 1, | ||
| 5612 | &cached_state, GFP_NOFS); | ||
| 5613 | out: | ||
| 5614 | btrfs_delalloc_release_metadata(inode, ordered->len); | ||
| 5615 | btrfs_end_transaction(trans, root); | ||
| 5616 | btrfs_put_ordered_extent(ordered); | ||
| 5617 | btrfs_put_ordered_extent(ordered); | ||
| 5618 | out_done: | ||
| 5619 | bio->bi_private = dip->private; | ||
| 5620 | |||
| 5621 | kfree(dip->csums); | ||
| 5622 | kfree(dip); | ||
| 5623 | dio_end_io(bio, err); | ||
| 5624 | } | ||
| 5625 | |||
| 5626 | static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw, | ||
| 5627 | struct bio *bio, int mirror_num, | ||
| 5628 | unsigned long bio_flags, u64 offset) | ||
| 5629 | { | ||
| 5630 | int ret; | ||
| 5631 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 5632 | ret = btrfs_csum_one_bio(root, inode, bio, offset, 1); | ||
| 5633 | BUG_ON(ret); | ||
| 5634 | return 0; | ||
| 5635 | } | ||
| 5636 | |||
| 5637 | static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, | ||
| 5638 | loff_t file_offset) | ||
| 5639 | { | ||
| 5640 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 5641 | struct btrfs_dio_private *dip; | ||
| 5642 | struct bio_vec *bvec = bio->bi_io_vec; | ||
| 5643 | u64 start; | ||
| 5644 | int skip_sum; | ||
| 5645 | int write = rw & (1 << BIO_RW); | ||
| 5646 | int ret = 0; | ||
| 5647 | |||
| 5648 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | ||
| 5649 | |||
| 5650 | dip = kmalloc(sizeof(*dip), GFP_NOFS); | ||
| 5651 | if (!dip) { | ||
| 5652 | ret = -ENOMEM; | ||
| 5653 | goto free_ordered; | ||
| 5654 | } | ||
| 5655 | dip->csums = NULL; | ||
| 5656 | |||
| 5657 | if (!skip_sum) { | ||
| 5658 | dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); | ||
| 5659 | if (!dip->csums) { | ||
| 5660 | ret = -ENOMEM; | ||
| 5661 | goto free_ordered; | ||
| 5662 | } | ||
| 5663 | } | ||
| 5664 | |||
| 5665 | dip->private = bio->bi_private; | ||
| 5666 | dip->inode = inode; | ||
| 5667 | dip->logical_offset = file_offset; | ||
| 5668 | |||
| 5669 | start = dip->logical_offset; | ||
| 5670 | dip->bytes = 0; | ||
| 5671 | do { | ||
| 5672 | dip->bytes += bvec->bv_len; | ||
| 5673 | bvec++; | ||
| 5674 | } while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1)); | ||
| 5675 | |||
| 5676 | dip->disk_bytenr = (u64)bio->bi_sector << 9; | ||
| 5677 | bio->bi_private = dip; | ||
| 5678 | |||
| 5679 | if (write) | ||
| 5680 | bio->bi_end_io = btrfs_endio_direct_write; | ||
| 5681 | else | ||
| 5682 | bio->bi_end_io = btrfs_endio_direct_read; | ||
| 5683 | |||
| 5684 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | ||
| 5685 | if (ret) | ||
| 5686 | goto out_err; | ||
| 5687 | |||
| 5688 | if (write && !skip_sum) { | ||
| 5689 | ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | ||
| 5690 | inode, rw, bio, 0, 0, | ||
| 5691 | dip->logical_offset, | ||
| 5692 | __btrfs_submit_bio_start_direct_io, | ||
| 5693 | __btrfs_submit_bio_done); | ||
| 5694 | if (ret) | ||
| 5695 | goto out_err; | ||
| 5696 | return; | ||
| 5697 | } else if (!skip_sum) | ||
| 5698 | btrfs_lookup_bio_sums_dio(root, inode, bio, | ||
| 5699 | dip->logical_offset, dip->csums); | ||
| 5700 | |||
| 5701 | ret = btrfs_map_bio(root, rw, bio, 0, 1); | ||
| 5702 | if (ret) | ||
| 5703 | goto out_err; | ||
| 5704 | return; | ||
| 5705 | out_err: | ||
| 5706 | kfree(dip->csums); | ||
| 5707 | kfree(dip); | ||
| 5708 | free_ordered: | ||
| 5709 | /* | ||
| 5710 | * If this is a write, we need to clean up the reserved space and kill | ||
| 5711 | * the ordered extent. | ||
| 5712 | */ | ||
| 5713 | if (write) { | ||
| 5714 | struct btrfs_ordered_extent *ordered; | ||
| 5715 | ordered = btrfs_lookup_ordered_extent(inode, | ||
| 5716 | dip->logical_offset); | ||
| 5717 | if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && | ||
| 5718 | !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) | ||
| 5719 | btrfs_free_reserved_extent(root, ordered->start, | ||
| 5720 | ordered->disk_len); | ||
| 5721 | btrfs_put_ordered_extent(ordered); | ||
| 5722 | btrfs_put_ordered_extent(ordered); | ||
| 5723 | } | ||
| 5724 | bio_endio(bio, ret); | ||
| 5725 | } | ||
| 5726 | |||
| 5727 | static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb, | ||
| 5728 | const struct iovec *iov, loff_t offset, | ||
| 5729 | unsigned long nr_segs) | ||
| 5730 | { | ||
| 5731 | int seg; | ||
| 5732 | size_t size; | ||
| 5733 | unsigned long addr; | ||
| 5734 | unsigned blocksize_mask = root->sectorsize - 1; | ||
| 5735 | ssize_t retval = -EINVAL; | ||
| 5736 | loff_t end = offset; | ||
| 5737 | |||
| 5738 | if (offset & blocksize_mask) | ||
| 5739 | goto out; | ||
| 5740 | |||
| 5741 | /* Check the memory alignment. Blocks cannot straddle pages */ | ||
| 5742 | for (seg = 0; seg < nr_segs; seg++) { | ||
| 5743 | addr = (unsigned long)iov[seg].iov_base; | ||
| 5744 | size = iov[seg].iov_len; | ||
| 5745 | end += size; | ||
| 5746 | if ((addr & blocksize_mask) || (size & blocksize_mask)) | ||
| 5747 | goto out; | ||
| 5748 | } | ||
| 5749 | retval = 0; | ||
| 5750 | out: | ||
| 5751 | return retval; | ||
| 5752 | } | ||
| 4924 | static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | 5753 | static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, |
| 4925 | const struct iovec *iov, loff_t offset, | 5754 | const struct iovec *iov, loff_t offset, |
| 4926 | unsigned long nr_segs) | 5755 | unsigned long nr_segs) |
| 4927 | { | 5756 | { |
| 4928 | return -EINVAL; | 5757 | struct file *file = iocb->ki_filp; |
| 5758 | struct inode *inode = file->f_mapping->host; | ||
| 5759 | struct btrfs_ordered_extent *ordered; | ||
| 5760 | struct extent_state *cached_state = NULL; | ||
| 5761 | u64 lockstart, lockend; | ||
| 5762 | ssize_t ret; | ||
| 5763 | int writing = rw & WRITE; | ||
| 5764 | int write_bits = 0; | ||
| 5765 | size_t count = iov_length(iov, nr_segs); | ||
| 5766 | |||
| 5767 | if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, | ||
| 5768 | offset, nr_segs)) { | ||
| 5769 | return 0; | ||
| 5770 | } | ||
| 5771 | |||
| 5772 | lockstart = offset; | ||
| 5773 | lockend = offset + count - 1; | ||
| 5774 | |||
| 5775 | if (writing) { | ||
| 5776 | ret = btrfs_delalloc_reserve_space(inode, count); | ||
| 5777 | if (ret) | ||
| 5778 | goto out; | ||
| 5779 | } | ||
| 5780 | |||
| 5781 | while (1) { | ||
| 5782 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
| 5783 | 0, &cached_state, GFP_NOFS); | ||
| 5784 | /* | ||
| 5785 | * We're concerned with the entire range that we're going to be | ||
| 5786 | * doing DIO to, so we need to make sure theres no ordered | ||
| 5787 | * extents in this range. | ||
| 5788 | */ | ||
| 5789 | ordered = btrfs_lookup_ordered_range(inode, lockstart, | ||
| 5790 | lockend - lockstart + 1); | ||
| 5791 | if (!ordered) | ||
| 5792 | break; | ||
| 5793 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
| 5794 | &cached_state, GFP_NOFS); | ||
| 5795 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
| 5796 | btrfs_put_ordered_extent(ordered); | ||
| 5797 | cond_resched(); | ||
| 5798 | } | ||
| 5799 | |||
| 5800 | /* | ||
| 5801 | * we don't use btrfs_set_extent_delalloc because we don't want | ||
| 5802 | * the dirty or uptodate bits | ||
| 5803 | */ | ||
| 5804 | if (writing) { | ||
| 5805 | write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING; | ||
| 5806 | ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
| 5807 | EXTENT_DELALLOC, 0, NULL, &cached_state, | ||
| 5808 | GFP_NOFS); | ||
| 5809 | if (ret) { | ||
| 5810 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | ||
| 5811 | lockend, EXTENT_LOCKED | write_bits, | ||
| 5812 | 1, 0, &cached_state, GFP_NOFS); | ||
| 5813 | goto out; | ||
| 5814 | } | ||
| 5815 | } | ||
| 5816 | |||
| 5817 | free_extent_state(cached_state); | ||
| 5818 | cached_state = NULL; | ||
| 5819 | |||
| 5820 | ret = __blockdev_direct_IO(rw, iocb, inode, | ||
| 5821 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, | ||
| 5822 | iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, | ||
| 5823 | btrfs_submit_direct, 0); | ||
| 5824 | |||
| 5825 | if (ret < 0 && ret != -EIOCBQUEUED) { | ||
| 5826 | clear_extent_bit(&BTRFS_I(inode)->io_tree, offset, | ||
| 5827 | offset + iov_length(iov, nr_segs) - 1, | ||
| 5828 | EXTENT_LOCKED | write_bits, 1, 0, | ||
| 5829 | &cached_state, GFP_NOFS); | ||
| 5830 | } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) { | ||
| 5831 | /* | ||
| 5832 | * We're falling back to buffered, unlock the section we didn't | ||
| 5833 | * do IO on. | ||
| 5834 | */ | ||
| 5835 | clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret, | ||
| 5836 | offset + iov_length(iov, nr_segs) - 1, | ||
| 5837 | EXTENT_LOCKED | write_bits, 1, 0, | ||
| 5838 | &cached_state, GFP_NOFS); | ||
| 5839 | } | ||
| 5840 | out: | ||
| 5841 | free_extent_state(cached_state); | ||
| 5842 | return ret; | ||
| 4929 | } | 5843 | } |
| 4930 | 5844 | ||
| 4931 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 5845 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
| @@ -5089,7 +6003,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
| 5089 | u64 page_start; | 6003 | u64 page_start; |
| 5090 | u64 page_end; | 6004 | u64 page_end; |
| 5091 | 6005 | ||
| 5092 | ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); | 6006 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
| 5093 | if (ret) { | 6007 | if (ret) { |
| 5094 | if (ret == -ENOMEM) | 6008 | if (ret == -ENOMEM) |
| 5095 | ret = VM_FAULT_OOM; | 6009 | ret = VM_FAULT_OOM; |
| @@ -5098,13 +6012,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
| 5098 | goto out; | 6012 | goto out; |
| 5099 | } | 6013 | } |
| 5100 | 6014 | ||
| 5101 | ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
| 5102 | if (ret) { | ||
| 5103 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
| 5104 | ret = VM_FAULT_SIGBUS; | ||
| 5105 | goto out; | ||
| 5106 | } | ||
| 5107 | |||
| 5108 | ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ | 6015 | ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ |
| 5109 | again: | 6016 | again: |
| 5110 | lock_page(page); | 6017 | lock_page(page); |
| @@ -5114,7 +6021,6 @@ again: | |||
| 5114 | 6021 | ||
| 5115 | if ((page->mapping != inode->i_mapping) || | 6022 | if ((page->mapping != inode->i_mapping) || |
| 5116 | (page_start >= size)) { | 6023 | (page_start >= size)) { |
| 5117 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
| 5118 | /* page got truncated out from underneath us */ | 6024 | /* page got truncated out from underneath us */ |
| 5119 | goto out_unlock; | 6025 | goto out_unlock; |
| 5120 | } | 6026 | } |
| @@ -5155,7 +6061,6 @@ again: | |||
| 5155 | unlock_extent_cached(io_tree, page_start, page_end, | 6061 | unlock_extent_cached(io_tree, page_start, page_end, |
| 5156 | &cached_state, GFP_NOFS); | 6062 | &cached_state, GFP_NOFS); |
| 5157 | ret = VM_FAULT_SIGBUS; | 6063 | ret = VM_FAULT_SIGBUS; |
| 5158 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
| 5159 | goto out_unlock; | 6064 | goto out_unlock; |
| 5160 | } | 6065 | } |
| 5161 | ret = 0; | 6066 | ret = 0; |
| @@ -5182,10 +6087,10 @@ again: | |||
| 5182 | unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); | 6087 | unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); |
| 5183 | 6088 | ||
| 5184 | out_unlock: | 6089 | out_unlock: |
| 5185 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
| 5186 | if (!ret) | 6090 | if (!ret) |
| 5187 | return VM_FAULT_LOCKED; | 6091 | return VM_FAULT_LOCKED; |
| 5188 | unlock_page(page); | 6092 | unlock_page(page); |
| 6093 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | ||
| 5189 | out: | 6094 | out: |
| 5190 | return ret; | 6095 | return ret; |
| 5191 | } | 6096 | } |
| @@ -5210,8 +6115,10 @@ static void btrfs_truncate(struct inode *inode) | |||
| 5210 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); | 6115 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); |
| 5211 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | 6116 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); |
| 5212 | 6117 | ||
| 5213 | trans = btrfs_start_transaction(root, 1); | 6118 | trans = btrfs_start_transaction(root, 0); |
| 6119 | BUG_ON(IS_ERR(trans)); | ||
| 5214 | btrfs_set_trans_block_group(trans, inode); | 6120 | btrfs_set_trans_block_group(trans, inode); |
| 6121 | trans->block_rsv = root->orphan_block_rsv; | ||
| 5215 | 6122 | ||
| 5216 | /* | 6123 | /* |
| 5217 | * setattr is responsible for setting the ordered_data_close flag, | 6124 | * setattr is responsible for setting the ordered_data_close flag, |
| @@ -5234,6 +6141,23 @@ static void btrfs_truncate(struct inode *inode) | |||
| 5234 | btrfs_add_ordered_operation(trans, root, inode); | 6141 | btrfs_add_ordered_operation(trans, root, inode); |
| 5235 | 6142 | ||
| 5236 | while (1) { | 6143 | while (1) { |
| 6144 | if (!trans) { | ||
| 6145 | trans = btrfs_start_transaction(root, 0); | ||
| 6146 | BUG_ON(IS_ERR(trans)); | ||
| 6147 | btrfs_set_trans_block_group(trans, inode); | ||
| 6148 | trans->block_rsv = root->orphan_block_rsv; | ||
| 6149 | } | ||
| 6150 | |||
| 6151 | ret = btrfs_block_rsv_check(trans, root, | ||
| 6152 | root->orphan_block_rsv, 0, 5); | ||
| 6153 | if (ret) { | ||
| 6154 | BUG_ON(ret != -EAGAIN); | ||
| 6155 | ret = btrfs_commit_transaction(trans, root); | ||
| 6156 | BUG_ON(ret); | ||
| 6157 | trans = NULL; | ||
| 6158 | continue; | ||
| 6159 | } | ||
| 6160 | |||
| 5237 | ret = btrfs_truncate_inode_items(trans, root, inode, | 6161 | ret = btrfs_truncate_inode_items(trans, root, inode, |
| 5238 | inode->i_size, | 6162 | inode->i_size, |
| 5239 | BTRFS_EXTENT_DATA_KEY); | 6163 | BTRFS_EXTENT_DATA_KEY); |
| @@ -5245,10 +6169,8 @@ static void btrfs_truncate(struct inode *inode) | |||
| 5245 | 6169 | ||
| 5246 | nr = trans->blocks_used; | 6170 | nr = trans->blocks_used; |
| 5247 | btrfs_end_transaction(trans, root); | 6171 | btrfs_end_transaction(trans, root); |
| 6172 | trans = NULL; | ||
| 5248 | btrfs_btree_balance_dirty(root, nr); | 6173 | btrfs_btree_balance_dirty(root, nr); |
| 5249 | |||
| 5250 | trans = btrfs_start_transaction(root, 1); | ||
| 5251 | btrfs_set_trans_block_group(trans, inode); | ||
| 5252 | } | 6174 | } |
| 5253 | 6175 | ||
| 5254 | if (ret == 0 && inode->i_nlink > 0) { | 6176 | if (ret == 0 && inode->i_nlink > 0) { |
| @@ -5309,21 +6231,47 @@ unsigned long btrfs_force_ra(struct address_space *mapping, | |||
| 5309 | struct inode *btrfs_alloc_inode(struct super_block *sb) | 6231 | struct inode *btrfs_alloc_inode(struct super_block *sb) |
| 5310 | { | 6232 | { |
| 5311 | struct btrfs_inode *ei; | 6233 | struct btrfs_inode *ei; |
| 6234 | struct inode *inode; | ||
| 5312 | 6235 | ||
| 5313 | ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); | 6236 | ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); |
| 5314 | if (!ei) | 6237 | if (!ei) |
| 5315 | return NULL; | 6238 | return NULL; |
| 6239 | |||
| 6240 | ei->root = NULL; | ||
| 6241 | ei->space_info = NULL; | ||
| 6242 | ei->generation = 0; | ||
| 6243 | ei->sequence = 0; | ||
| 5316 | ei->last_trans = 0; | 6244 | ei->last_trans = 0; |
| 5317 | ei->last_sub_trans = 0; | 6245 | ei->last_sub_trans = 0; |
| 5318 | ei->logged_trans = 0; | 6246 | ei->logged_trans = 0; |
| 5319 | ei->outstanding_extents = 0; | 6247 | ei->delalloc_bytes = 0; |
| 5320 | ei->reserved_extents = 0; | 6248 | ei->reserved_bytes = 0; |
| 5321 | ei->root = NULL; | 6249 | ei->disk_i_size = 0; |
| 6250 | ei->flags = 0; | ||
| 6251 | ei->index_cnt = (u64)-1; | ||
| 6252 | ei->last_unlink_trans = 0; | ||
| 6253 | |||
| 5322 | spin_lock_init(&ei->accounting_lock); | 6254 | spin_lock_init(&ei->accounting_lock); |
| 6255 | atomic_set(&ei->outstanding_extents, 0); | ||
| 6256 | ei->reserved_extents = 0; | ||
| 6257 | |||
| 6258 | ei->ordered_data_close = 0; | ||
| 6259 | ei->orphan_meta_reserved = 0; | ||
| 6260 | ei->dummy_inode = 0; | ||
| 6261 | ei->force_compress = 0; | ||
| 6262 | |||
| 6263 | inode = &ei->vfs_inode; | ||
| 6264 | extent_map_tree_init(&ei->extent_tree, GFP_NOFS); | ||
| 6265 | extent_io_tree_init(&ei->io_tree, &inode->i_data, GFP_NOFS); | ||
| 6266 | extent_io_tree_init(&ei->io_failure_tree, &inode->i_data, GFP_NOFS); | ||
| 6267 | mutex_init(&ei->log_mutex); | ||
| 5323 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); | 6268 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); |
| 5324 | INIT_LIST_HEAD(&ei->i_orphan); | 6269 | INIT_LIST_HEAD(&ei->i_orphan); |
| 6270 | INIT_LIST_HEAD(&ei->delalloc_inodes); | ||
| 5325 | INIT_LIST_HEAD(&ei->ordered_operations); | 6271 | INIT_LIST_HEAD(&ei->ordered_operations); |
| 5326 | return &ei->vfs_inode; | 6272 | RB_CLEAR_NODE(&ei->rb_node); |
| 6273 | |||
| 6274 | return inode; | ||
| 5327 | } | 6275 | } |
| 5328 | 6276 | ||
| 5329 | void btrfs_destroy_inode(struct inode *inode) | 6277 | void btrfs_destroy_inode(struct inode *inode) |
| @@ -5333,6 +6281,8 @@ void btrfs_destroy_inode(struct inode *inode) | |||
| 5333 | 6281 | ||
| 5334 | WARN_ON(!list_empty(&inode->i_dentry)); | 6282 | WARN_ON(!list_empty(&inode->i_dentry)); |
| 5335 | WARN_ON(inode->i_data.nrpages); | 6283 | WARN_ON(inode->i_data.nrpages); |
| 6284 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); | ||
| 6285 | WARN_ON(BTRFS_I(inode)->reserved_extents); | ||
| 5336 | 6286 | ||
| 5337 | /* | 6287 | /* |
| 5338 | * This can happen where we create an inode, but somebody else also | 6288 | * This can happen where we create an inode, but somebody else also |
| @@ -5353,13 +6303,13 @@ void btrfs_destroy_inode(struct inode *inode) | |||
| 5353 | spin_unlock(&root->fs_info->ordered_extent_lock); | 6303 | spin_unlock(&root->fs_info->ordered_extent_lock); |
| 5354 | } | 6304 | } |
| 5355 | 6305 | ||
| 5356 | spin_lock(&root->list_lock); | 6306 | spin_lock(&root->orphan_lock); |
| 5357 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | 6307 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { |
| 5358 | printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", | 6308 | printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", |
| 5359 | inode->i_ino); | 6309 | inode->i_ino); |
| 5360 | list_del_init(&BTRFS_I(inode)->i_orphan); | 6310 | list_del_init(&BTRFS_I(inode)->i_orphan); |
| 5361 | } | 6311 | } |
| 5362 | spin_unlock(&root->list_lock); | 6312 | spin_unlock(&root->orphan_lock); |
| 5363 | 6313 | ||
| 5364 | while (1) { | 6314 | while (1) { |
| 5365 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); | 6315 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); |
| @@ -5384,7 +6334,6 @@ free: | |||
| 5384 | void btrfs_drop_inode(struct inode *inode) | 6334 | void btrfs_drop_inode(struct inode *inode) |
| 5385 | { | 6335 | { |
| 5386 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6336 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 5387 | |||
| 5388 | if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0) | 6337 | if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0) |
| 5389 | generic_delete_inode(inode); | 6338 | generic_delete_inode(inode); |
| 5390 | else | 6339 | else |
| @@ -5481,19 +6430,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 5481 | if (S_ISDIR(old_inode->i_mode) && new_inode && | 6430 | if (S_ISDIR(old_inode->i_mode) && new_inode && |
| 5482 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) | 6431 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) |
| 5483 | return -ENOTEMPTY; | 6432 | return -ENOTEMPTY; |
| 5484 | |||
| 5485 | /* | ||
| 5486 | * We want to reserve the absolute worst case amount of items. So if | ||
| 5487 | * both inodes are subvols and we need to unlink them then that would | ||
| 5488 | * require 4 item modifications, but if they are both normal inodes it | ||
| 5489 | * would require 5 item modifications, so we'll assume their normal | ||
| 5490 | * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items | ||
| 5491 | * should cover the worst case number of items we'll modify. | ||
| 5492 | */ | ||
| 5493 | ret = btrfs_reserve_metadata_space(root, 11); | ||
| 5494 | if (ret) | ||
| 5495 | return ret; | ||
| 5496 | |||
| 5497 | /* | 6433 | /* |
| 5498 | * we're using rename to replace one file with another. | 6434 | * we're using rename to replace one file with another. |
| 5499 | * and the replacement file is large. Start IO on it now so | 6435 | * and the replacement file is large. Start IO on it now so |
| @@ -5506,8 +6442,18 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 5506 | /* close the racy window with snapshot create/destroy ioctl */ | 6442 | /* close the racy window with snapshot create/destroy ioctl */ |
| 5507 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 6443 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) |
| 5508 | down_read(&root->fs_info->subvol_sem); | 6444 | down_read(&root->fs_info->subvol_sem); |
| 6445 | /* | ||
| 6446 | * We want to reserve the absolute worst case amount of items. So if | ||
| 6447 | * both inodes are subvols and we need to unlink them then that would | ||
| 6448 | * require 4 item modifications, but if they are both normal inodes it | ||
| 6449 | * would require 5 item modifications, so we'll assume their normal | ||
| 6450 | * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items | ||
| 6451 | * should cover the worst case number of items we'll modify. | ||
| 6452 | */ | ||
| 6453 | trans = btrfs_start_transaction(root, 20); | ||
| 6454 | if (IS_ERR(trans)) | ||
| 6455 | return PTR_ERR(trans); | ||
| 5509 | 6456 | ||
| 5510 | trans = btrfs_start_transaction(root, 1); | ||
| 5511 | btrfs_set_trans_block_group(trans, new_dir); | 6457 | btrfs_set_trans_block_group(trans, new_dir); |
| 5512 | 6458 | ||
| 5513 | if (dest != root) | 6459 | if (dest != root) |
| @@ -5606,7 +6552,6 @@ out_fail: | |||
| 5606 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 6552 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) |
| 5607 | up_read(&root->fs_info->subvol_sem); | 6553 | up_read(&root->fs_info->subvol_sem); |
| 5608 | 6554 | ||
| 5609 | btrfs_unreserve_metadata_space(root, 11); | ||
| 5610 | return ret; | 6555 | return ret; |
| 5611 | } | 6556 | } |
| 5612 | 6557 | ||
| @@ -5658,6 +6603,38 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
| 5658 | return 0; | 6603 | return 0; |
| 5659 | } | 6604 | } |
| 5660 | 6605 | ||
| 6606 | int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput) | ||
| 6607 | { | ||
| 6608 | struct btrfs_inode *binode; | ||
| 6609 | struct inode *inode = NULL; | ||
| 6610 | |||
| 6611 | spin_lock(&root->fs_info->delalloc_lock); | ||
| 6612 | while (!list_empty(&root->fs_info->delalloc_inodes)) { | ||
| 6613 | binode = list_entry(root->fs_info->delalloc_inodes.next, | ||
| 6614 | struct btrfs_inode, delalloc_inodes); | ||
| 6615 | inode = igrab(&binode->vfs_inode); | ||
| 6616 | if (inode) { | ||
| 6617 | list_move_tail(&binode->delalloc_inodes, | ||
| 6618 | &root->fs_info->delalloc_inodes); | ||
| 6619 | break; | ||
| 6620 | } | ||
| 6621 | |||
| 6622 | list_del_init(&binode->delalloc_inodes); | ||
| 6623 | cond_resched_lock(&root->fs_info->delalloc_lock); | ||
| 6624 | } | ||
| 6625 | spin_unlock(&root->fs_info->delalloc_lock); | ||
| 6626 | |||
| 6627 | if (inode) { | ||
| 6628 | write_inode_now(inode, 0); | ||
| 6629 | if (delay_iput) | ||
| 6630 | btrfs_add_delayed_iput(inode); | ||
| 6631 | else | ||
| 6632 | iput(inode); | ||
| 6633 | return 1; | ||
| 6634 | } | ||
| 6635 | return 0; | ||
| 6636 | } | ||
| 6637 | |||
| 5661 | static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | 6638 | static int btrfs_symlink(struct inode *dir, struct dentry *dentry, |
| 5662 | const char *symname) | 6639 | const char *symname) |
| 5663 | { | 6640 | { |
| @@ -5681,26 +6658,20 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
| 5681 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) | 6658 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) |
| 5682 | return -ENAMETOOLONG; | 6659 | return -ENAMETOOLONG; |
| 5683 | 6660 | ||
| 6661 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
| 6662 | if (err) | ||
| 6663 | return err; | ||
| 5684 | /* | 6664 | /* |
| 5685 | * 2 items for inode item and ref | 6665 | * 2 items for inode item and ref |
| 5686 | * 2 items for dir items | 6666 | * 2 items for dir items |
| 5687 | * 1 item for xattr if selinux is on | 6667 | * 1 item for xattr if selinux is on |
| 5688 | */ | 6668 | */ |
| 5689 | err = btrfs_reserve_metadata_space(root, 5); | 6669 | trans = btrfs_start_transaction(root, 5); |
| 5690 | if (err) | 6670 | if (IS_ERR(trans)) |
| 5691 | return err; | 6671 | return PTR_ERR(trans); |
| 5692 | 6672 | ||
| 5693 | trans = btrfs_start_transaction(root, 1); | ||
| 5694 | if (!trans) | ||
| 5695 | goto out_fail; | ||
| 5696 | btrfs_set_trans_block_group(trans, dir); | 6673 | btrfs_set_trans_block_group(trans, dir); |
| 5697 | 6674 | ||
| 5698 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
| 5699 | if (err) { | ||
| 5700 | err = -ENOSPC; | ||
| 5701 | goto out_unlock; | ||
| 5702 | } | ||
| 5703 | |||
| 5704 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 6675 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
| 5705 | dentry->d_name.len, | 6676 | dentry->d_name.len, |
| 5706 | dentry->d_parent->d_inode->i_ino, objectid, | 6677 | dentry->d_parent->d_inode->i_ino, objectid, |
| @@ -5772,8 +6743,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
| 5772 | out_unlock: | 6743 | out_unlock: |
| 5773 | nr = trans->blocks_used; | 6744 | nr = trans->blocks_used; |
| 5774 | btrfs_end_transaction_throttle(trans, root); | 6745 | btrfs_end_transaction_throttle(trans, root); |
| 5775 | out_fail: | ||
| 5776 | btrfs_unreserve_metadata_space(root, 5); | ||
| 5777 | if (drop_inode) { | 6746 | if (drop_inode) { |
| 5778 | inode_dec_link_count(inode); | 6747 | inode_dec_link_count(inode); |
| 5779 | iput(inode); | 6748 | iput(inode); |
| @@ -5782,36 +6751,28 @@ out_fail: | |||
| 5782 | return err; | 6751 | return err; |
| 5783 | } | 6752 | } |
| 5784 | 6753 | ||
| 5785 | static int prealloc_file_range(struct inode *inode, u64 start, u64 end, | 6754 | int btrfs_prealloc_file_range(struct inode *inode, int mode, |
| 5786 | u64 alloc_hint, int mode, loff_t actual_len) | 6755 | u64 start, u64 num_bytes, u64 min_size, |
| 6756 | loff_t actual_len, u64 *alloc_hint) | ||
| 5787 | { | 6757 | { |
| 5788 | struct btrfs_trans_handle *trans; | 6758 | struct btrfs_trans_handle *trans; |
| 5789 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6759 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 5790 | struct btrfs_key ins; | 6760 | struct btrfs_key ins; |
| 5791 | u64 alloc_size; | ||
| 5792 | u64 cur_offset = start; | 6761 | u64 cur_offset = start; |
| 5793 | u64 num_bytes = end - start; | ||
| 5794 | int ret = 0; | 6762 | int ret = 0; |
| 5795 | u64 i_size; | ||
| 5796 | 6763 | ||
| 5797 | while (num_bytes > 0) { | 6764 | while (num_bytes > 0) { |
| 5798 | alloc_size = min(num_bytes, root->fs_info->max_extent); | 6765 | trans = btrfs_start_transaction(root, 3); |
| 5799 | 6766 | if (IS_ERR(trans)) { | |
| 5800 | trans = btrfs_start_transaction(root, 1); | 6767 | ret = PTR_ERR(trans); |
| 5801 | 6768 | break; | |
| 5802 | ret = btrfs_reserve_extent(trans, root, alloc_size, | ||
| 5803 | root->sectorsize, 0, alloc_hint, | ||
| 5804 | (u64)-1, &ins, 1); | ||
| 5805 | if (ret) { | ||
| 5806 | WARN_ON(1); | ||
| 5807 | goto stop_trans; | ||
| 5808 | } | 6769 | } |
| 5809 | 6770 | ||
| 5810 | ret = btrfs_reserve_metadata_space(root, 3); | 6771 | ret = btrfs_reserve_extent(trans, root, num_bytes, min_size, |
| 6772 | 0, *alloc_hint, (u64)-1, &ins, 1); | ||
| 5811 | if (ret) { | 6773 | if (ret) { |
| 5812 | btrfs_free_reserved_extent(root, ins.objectid, | 6774 | btrfs_end_transaction(trans, root); |
| 5813 | ins.offset); | 6775 | break; |
| 5814 | goto stop_trans; | ||
| 5815 | } | 6776 | } |
| 5816 | 6777 | ||
| 5817 | ret = insert_reserved_file_extent(trans, inode, | 6778 | ret = insert_reserved_file_extent(trans, inode, |
| @@ -5825,34 +6786,27 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, | |||
| 5825 | 6786 | ||
| 5826 | num_bytes -= ins.offset; | 6787 | num_bytes -= ins.offset; |
| 5827 | cur_offset += ins.offset; | 6788 | cur_offset += ins.offset; |
| 5828 | alloc_hint = ins.objectid + ins.offset; | 6789 | *alloc_hint = ins.objectid + ins.offset; |
| 5829 | 6790 | ||
| 5830 | inode->i_ctime = CURRENT_TIME; | 6791 | inode->i_ctime = CURRENT_TIME; |
| 5831 | BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; | 6792 | BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; |
| 5832 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | 6793 | if (!(mode & FALLOC_FL_KEEP_SIZE) && |
| 5833 | (actual_len > inode->i_size) && | 6794 | (actual_len > inode->i_size) && |
| 5834 | (cur_offset > inode->i_size)) { | 6795 | (cur_offset > inode->i_size)) { |
| 5835 | |||
| 5836 | if (cur_offset > actual_len) | 6796 | if (cur_offset > actual_len) |
| 5837 | i_size = actual_len; | 6797 | i_size_write(inode, actual_len); |
| 5838 | else | 6798 | else |
| 5839 | i_size = cur_offset; | 6799 | i_size_write(inode, cur_offset); |
| 5840 | i_size_write(inode, i_size); | 6800 | i_size_write(inode, cur_offset); |
| 5841 | btrfs_ordered_update_i_size(inode, i_size, NULL); | 6801 | btrfs_ordered_update_i_size(inode, cur_offset, NULL); |
| 5842 | } | 6802 | } |
| 5843 | 6803 | ||
| 5844 | ret = btrfs_update_inode(trans, root, inode); | 6804 | ret = btrfs_update_inode(trans, root, inode); |
| 5845 | BUG_ON(ret); | 6805 | BUG_ON(ret); |
| 5846 | 6806 | ||
| 5847 | btrfs_end_transaction(trans, root); | 6807 | btrfs_end_transaction(trans, root); |
| 5848 | btrfs_unreserve_metadata_space(root, 3); | ||
| 5849 | } | 6808 | } |
| 5850 | return ret; | 6809 | return ret; |
| 5851 | |||
| 5852 | stop_trans: | ||
| 5853 | btrfs_end_transaction(trans, root); | ||
| 5854 | return ret; | ||
| 5855 | |||
| 5856 | } | 6810 | } |
| 5857 | 6811 | ||
| 5858 | static long btrfs_fallocate(struct inode *inode, int mode, | 6812 | static long btrfs_fallocate(struct inode *inode, int mode, |
| @@ -5885,8 +6839,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
| 5885 | goto out; | 6839 | goto out; |
| 5886 | } | 6840 | } |
| 5887 | 6841 | ||
| 5888 | ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode, | 6842 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); |
| 5889 | alloc_end - alloc_start); | ||
| 5890 | if (ret) | 6843 | if (ret) |
| 5891 | goto out; | 6844 | goto out; |
| 5892 | 6845 | ||
| @@ -5931,16 +6884,16 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
| 5931 | if (em->block_start == EXTENT_MAP_HOLE || | 6884 | if (em->block_start == EXTENT_MAP_HOLE || |
| 5932 | (cur_offset >= inode->i_size && | 6885 | (cur_offset >= inode->i_size && |
| 5933 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | 6886 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { |
| 5934 | ret = prealloc_file_range(inode, | 6887 | ret = btrfs_prealloc_file_range(inode, mode, cur_offset, |
| 5935 | cur_offset, last_byte, | 6888 | last_byte - cur_offset, |
| 5936 | alloc_hint, mode, offset+len); | 6889 | 1 << inode->i_blkbits, |
| 6890 | offset + len, | ||
| 6891 | &alloc_hint); | ||
| 5937 | if (ret < 0) { | 6892 | if (ret < 0) { |
| 5938 | free_extent_map(em); | 6893 | free_extent_map(em); |
| 5939 | break; | 6894 | break; |
| 5940 | } | 6895 | } |
| 5941 | } | 6896 | } |
| 5942 | if (em->block_start <= EXTENT_MAP_LAST_BYTE) | ||
| 5943 | alloc_hint = em->block_start; | ||
| 5944 | free_extent_map(em); | 6897 | free_extent_map(em); |
| 5945 | 6898 | ||
| 5946 | cur_offset = last_byte; | 6899 | cur_offset = last_byte; |
| @@ -5952,8 +6905,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
| 5952 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | 6905 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, |
| 5953 | &cached_state, GFP_NOFS); | 6906 | &cached_state, GFP_NOFS); |
| 5954 | 6907 | ||
| 5955 | btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode, | 6908 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); |
| 5956 | alloc_end - alloc_start); | ||
| 5957 | out: | 6909 | out: |
| 5958 | mutex_unlock(&inode->i_mutex); | 6910 | mutex_unlock(&inode->i_mutex); |
| 5959 | return ret; | 6911 | return ret; |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 2845c6ceecd2..9254b3d58dbe 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -39,6 +39,7 @@ | |||
| 39 | #include <linux/security.h> | 39 | #include <linux/security.h> |
| 40 | #include <linux/xattr.h> | 40 | #include <linux/xattr.h> |
| 41 | #include <linux/vmalloc.h> | 41 | #include <linux/vmalloc.h> |
| 42 | #include <linux/slab.h> | ||
| 42 | #include "compat.h" | 43 | #include "compat.h" |
| 43 | #include "ctree.h" | 44 | #include "ctree.h" |
| 44 | #include "disk-io.h" | 45 | #include "disk-io.h" |
| @@ -48,7 +49,6 @@ | |||
| 48 | #include "print-tree.h" | 49 | #include "print-tree.h" |
| 49 | #include "volumes.h" | 50 | #include "volumes.h" |
| 50 | #include "locking.h" | 51 | #include "locking.h" |
| 51 | #include "ctree.h" | ||
| 52 | 52 | ||
| 53 | /* Mask out flags that are inappropriate for the given type of inode. */ | 53 | /* Mask out flags that are inappropriate for the given type of inode. */ |
| 54 | static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) | 54 | static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) |
| @@ -239,23 +239,19 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
| 239 | u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; | 239 | u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; |
| 240 | u64 index = 0; | 240 | u64 index = 0; |
| 241 | 241 | ||
| 242 | ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root, | ||
| 243 | 0, &objectid); | ||
| 244 | if (ret) | ||
| 245 | return ret; | ||
| 242 | /* | 246 | /* |
| 243 | * 1 - inode item | 247 | * 1 - inode item |
| 244 | * 2 - refs | 248 | * 2 - refs |
| 245 | * 1 - root item | 249 | * 1 - root item |
| 246 | * 2 - dir items | 250 | * 2 - dir items |
| 247 | */ | 251 | */ |
| 248 | ret = btrfs_reserve_metadata_space(root, 6); | 252 | trans = btrfs_start_transaction(root, 6); |
| 249 | if (ret) | 253 | if (IS_ERR(trans)) |
| 250 | return ret; | 254 | return PTR_ERR(trans); |
| 251 | |||
| 252 | trans = btrfs_start_transaction(root, 1); | ||
| 253 | BUG_ON(!trans); | ||
| 254 | |||
| 255 | ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, | ||
| 256 | 0, &objectid); | ||
| 257 | if (ret) | ||
| 258 | goto fail; | ||
| 259 | 255 | ||
| 260 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, | 256 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, |
| 261 | 0, objectid, NULL, 0, 0, 0); | 257 | 0, objectid, NULL, 0, 0, 0); |
| @@ -345,13 +341,10 @@ fail: | |||
| 345 | err = btrfs_commit_transaction(trans, root); | 341 | err = btrfs_commit_transaction(trans, root); |
| 346 | if (err && !ret) | 342 | if (err && !ret) |
| 347 | ret = err; | 343 | ret = err; |
| 348 | |||
| 349 | btrfs_unreserve_metadata_space(root, 6); | ||
| 350 | return ret; | 344 | return ret; |
| 351 | } | 345 | } |
| 352 | 346 | ||
| 353 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | 347 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry) |
| 354 | char *name, int namelen) | ||
| 355 | { | 348 | { |
| 356 | struct inode *inode; | 349 | struct inode *inode; |
| 357 | struct btrfs_pending_snapshot *pending_snapshot; | 350 | struct btrfs_pending_snapshot *pending_snapshot; |
| @@ -361,40 +354,33 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
| 361 | if (!root->ref_cows) | 354 | if (!root->ref_cows) |
| 362 | return -EINVAL; | 355 | return -EINVAL; |
| 363 | 356 | ||
| 364 | /* | ||
| 365 | * 1 - inode item | ||
| 366 | * 2 - refs | ||
| 367 | * 1 - root item | ||
| 368 | * 2 - dir items | ||
| 369 | */ | ||
| 370 | ret = btrfs_reserve_metadata_space(root, 6); | ||
| 371 | if (ret) | ||
| 372 | goto fail; | ||
| 373 | |||
| 374 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); | 357 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); |
| 375 | if (!pending_snapshot) { | 358 | if (!pending_snapshot) |
| 376 | ret = -ENOMEM; | 359 | return -ENOMEM; |
| 377 | btrfs_unreserve_metadata_space(root, 6); | 360 | |
| 378 | goto fail; | 361 | btrfs_init_block_rsv(&pending_snapshot->block_rsv); |
| 379 | } | ||
| 380 | pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); | ||
| 381 | if (!pending_snapshot->name) { | ||
| 382 | ret = -ENOMEM; | ||
| 383 | kfree(pending_snapshot); | ||
| 384 | btrfs_unreserve_metadata_space(root, 6); | ||
| 385 | goto fail; | ||
| 386 | } | ||
| 387 | memcpy(pending_snapshot->name, name, namelen); | ||
| 388 | pending_snapshot->name[namelen] = '\0'; | ||
| 389 | pending_snapshot->dentry = dentry; | 362 | pending_snapshot->dentry = dentry; |
| 390 | trans = btrfs_start_transaction(root, 1); | ||
| 391 | BUG_ON(!trans); | ||
| 392 | pending_snapshot->root = root; | 363 | pending_snapshot->root = root; |
| 364 | |||
| 365 | trans = btrfs_start_transaction(root->fs_info->extent_root, 5); | ||
| 366 | if (IS_ERR(trans)) { | ||
| 367 | ret = PTR_ERR(trans); | ||
| 368 | goto fail; | ||
| 369 | } | ||
| 370 | |||
| 371 | ret = btrfs_snap_reserve_metadata(trans, pending_snapshot); | ||
| 372 | BUG_ON(ret); | ||
| 373 | |||
| 393 | list_add(&pending_snapshot->list, | 374 | list_add(&pending_snapshot->list, |
| 394 | &trans->transaction->pending_snapshots); | 375 | &trans->transaction->pending_snapshots); |
| 395 | ret = btrfs_commit_transaction(trans, root); | 376 | ret = btrfs_commit_transaction(trans, root->fs_info->extent_root); |
| 396 | BUG_ON(ret); | 377 | BUG_ON(ret); |
| 397 | btrfs_unreserve_metadata_space(root, 6); | 378 | |
| 379 | ret = pending_snapshot->error; | ||
| 380 | if (ret) | ||
| 381 | goto fail; | ||
| 382 | |||
| 383 | btrfs_orphan_cleanup(pending_snapshot->snap); | ||
| 398 | 384 | ||
| 399 | inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); | 385 | inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); |
| 400 | if (IS_ERR(inode)) { | 386 | if (IS_ERR(inode)) { |
| @@ -405,6 +391,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
| 405 | d_instantiate(dentry, inode); | 391 | d_instantiate(dentry, inode); |
| 406 | ret = 0; | 392 | ret = 0; |
| 407 | fail: | 393 | fail: |
| 394 | kfree(pending_snapshot); | ||
| 408 | return ret; | 395 | return ret; |
| 409 | } | 396 | } |
| 410 | 397 | ||
| @@ -456,8 +443,7 @@ static noinline int btrfs_mksubvol(struct path *parent, | |||
| 456 | goto out_up_read; | 443 | goto out_up_read; |
| 457 | 444 | ||
| 458 | if (snap_src) { | 445 | if (snap_src) { |
| 459 | error = create_snapshot(snap_src, dentry, | 446 | error = create_snapshot(snap_src, dentry); |
| 460 | name, namelen); | ||
| 461 | } else { | 447 | } else { |
| 462 | error = create_subvol(BTRFS_I(dir)->root, dentry, | 448 | error = create_subvol(BTRFS_I(dir)->root, dentry, |
| 463 | name, namelen); | 449 | name, namelen); |
| @@ -511,7 +497,7 @@ static int should_defrag_range(struct inode *inode, u64 start, u64 len, | |||
| 511 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); | 497 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); |
| 512 | unlock_extent(io_tree, start, start + len - 1, GFP_NOFS); | 498 | unlock_extent(io_tree, start, start + len - 1, GFP_NOFS); |
| 513 | 499 | ||
| 514 | if (!em) | 500 | if (IS_ERR(em)) |
| 515 | return 0; | 501 | return 0; |
| 516 | } | 502 | } |
| 517 | 503 | ||
| @@ -601,19 +587,9 @@ static int btrfs_defrag_file(struct file *file, | |||
| 601 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) | 587 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) |
| 602 | BTRFS_I(inode)->force_compress = 1; | 588 | BTRFS_I(inode)->force_compress = 1; |
| 603 | 589 | ||
| 604 | ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); | 590 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
| 605 | if (ret) { | 591 | if (ret) |
| 606 | ret = -ENOSPC; | 592 | goto err_unlock; |
| 607 | break; | ||
| 608 | } | ||
| 609 | |||
| 610 | ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
| 611 | if (ret) { | ||
| 612 | btrfs_free_reserved_data_space(root, inode, | ||
| 613 | PAGE_CACHE_SIZE); | ||
| 614 | ret = -ENOSPC; | ||
| 615 | break; | ||
| 616 | } | ||
| 617 | again: | 593 | again: |
| 618 | if (inode->i_size == 0 || | 594 | if (inode->i_size == 0 || |
| 619 | i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) { | 595 | i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) { |
| @@ -622,8 +598,10 @@ again: | |||
| 622 | } | 598 | } |
| 623 | 599 | ||
| 624 | page = grab_cache_page(inode->i_mapping, i); | 600 | page = grab_cache_page(inode->i_mapping, i); |
| 625 | if (!page) | 601 | if (!page) { |
| 602 | ret = -ENOMEM; | ||
| 626 | goto err_reservations; | 603 | goto err_reservations; |
| 604 | } | ||
| 627 | 605 | ||
| 628 | if (!PageUptodate(page)) { | 606 | if (!PageUptodate(page)) { |
| 629 | btrfs_readpage(NULL, page); | 607 | btrfs_readpage(NULL, page); |
| @@ -631,6 +609,7 @@ again: | |||
| 631 | if (!PageUptodate(page)) { | 609 | if (!PageUptodate(page)) { |
| 632 | unlock_page(page); | 610 | unlock_page(page); |
| 633 | page_cache_release(page); | 611 | page_cache_release(page); |
| 612 | ret = -EIO; | ||
| 634 | goto err_reservations; | 613 | goto err_reservations; |
| 635 | } | 614 | } |
| 636 | } | 615 | } |
| @@ -644,8 +623,7 @@ again: | |||
| 644 | wait_on_page_writeback(page); | 623 | wait_on_page_writeback(page); |
| 645 | 624 | ||
| 646 | if (PageDirty(page)) { | 625 | if (PageDirty(page)) { |
| 647 | btrfs_free_reserved_data_space(root, inode, | 626 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); |
| 648 | PAGE_CACHE_SIZE); | ||
| 649 | goto loop_unlock; | 627 | goto loop_unlock; |
| 650 | } | 628 | } |
| 651 | 629 | ||
| @@ -683,7 +661,6 @@ loop_unlock: | |||
| 683 | page_cache_release(page); | 661 | page_cache_release(page); |
| 684 | mutex_unlock(&inode->i_mutex); | 662 | mutex_unlock(&inode->i_mutex); |
| 685 | 663 | ||
| 686 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
| 687 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); | 664 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); |
| 688 | i++; | 665 | i++; |
| 689 | } | 666 | } |
| @@ -713,9 +690,9 @@ loop_unlock: | |||
| 713 | return 0; | 690 | return 0; |
| 714 | 691 | ||
| 715 | err_reservations: | 692 | err_reservations: |
| 693 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | ||
| 694 | err_unlock: | ||
| 716 | mutex_unlock(&inode->i_mutex); | 695 | mutex_unlock(&inode->i_mutex); |
| 717 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
| 718 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
| 719 | return ret; | 696 | return ret; |
| 720 | } | 697 | } |
| 721 | 698 | ||
| @@ -811,7 +788,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
| 811 | device->name, (unsigned long long)new_size); | 788 | device->name, (unsigned long long)new_size); |
| 812 | 789 | ||
| 813 | if (new_size > old_size) { | 790 | if (new_size > old_size) { |
| 814 | trans = btrfs_start_transaction(root, 1); | 791 | trans = btrfs_start_transaction(root, 0); |
| 815 | ret = btrfs_grow_device(trans, device, new_size); | 792 | ret = btrfs_grow_device(trans, device, new_size); |
| 816 | btrfs_commit_transaction(trans, root); | 793 | btrfs_commit_transaction(trans, root); |
| 817 | } else { | 794 | } else { |
| @@ -1212,6 +1189,9 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file, | |||
| 1212 | return -EPERM; | 1189 | return -EPERM; |
| 1213 | 1190 | ||
| 1214 | args = kmalloc(sizeof(*args), GFP_KERNEL); | 1191 | args = kmalloc(sizeof(*args), GFP_KERNEL); |
| 1192 | if (!args) | ||
| 1193 | return -ENOMEM; | ||
| 1194 | |||
| 1215 | if (copy_from_user(args, argp, sizeof(*args))) { | 1195 | if (copy_from_user(args, argp, sizeof(*args))) { |
| 1216 | kfree(args); | 1196 | kfree(args); |
| 1217 | return -EFAULT; | 1197 | return -EFAULT; |
| @@ -1297,7 +1277,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
| 1297 | if (err) | 1277 | if (err) |
| 1298 | goto out_up_write; | 1278 | goto out_up_write; |
| 1299 | 1279 | ||
| 1300 | trans = btrfs_start_transaction(root, 1); | 1280 | trans = btrfs_start_transaction(root, 0); |
| 1281 | if (IS_ERR(trans)) { | ||
| 1282 | err = PTR_ERR(trans); | ||
| 1283 | goto out_up_write; | ||
| 1284 | } | ||
| 1285 | trans->block_rsv = &root->fs_info->global_block_rsv; | ||
| 1286 | |||
| 1301 | ret = btrfs_unlink_subvol(trans, root, dir, | 1287 | ret = btrfs_unlink_subvol(trans, root, dir, |
| 1302 | dest->root_key.objectid, | 1288 | dest->root_key.objectid, |
| 1303 | dentry->d_name.name, | 1289 | dentry->d_name.name, |
| @@ -1311,10 +1297,12 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
| 1311 | dest->root_item.drop_level = 0; | 1297 | dest->root_item.drop_level = 0; |
| 1312 | btrfs_set_root_refs(&dest->root_item, 0); | 1298 | btrfs_set_root_refs(&dest->root_item, 0); |
| 1313 | 1299 | ||
| 1314 | ret = btrfs_insert_orphan_item(trans, | 1300 | if (!xchg(&dest->orphan_item_inserted, 1)) { |
| 1315 | root->fs_info->tree_root, | 1301 | ret = btrfs_insert_orphan_item(trans, |
| 1316 | dest->root_key.objectid); | 1302 | root->fs_info->tree_root, |
| 1317 | BUG_ON(ret); | 1303 | dest->root_key.objectid); |
| 1304 | BUG_ON(ret); | ||
| 1305 | } | ||
| 1318 | 1306 | ||
| 1319 | ret = btrfs_commit_transaction(trans, root); | 1307 | ret = btrfs_commit_transaction(trans, root); |
| 1320 | BUG_ON(ret); | 1308 | BUG_ON(ret); |
| @@ -1355,8 +1343,10 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) | |||
| 1355 | ret = -EPERM; | 1343 | ret = -EPERM; |
| 1356 | goto out; | 1344 | goto out; |
| 1357 | } | 1345 | } |
| 1358 | btrfs_defrag_root(root, 0); | 1346 | ret = btrfs_defrag_root(root, 0); |
| 1359 | btrfs_defrag_root(root->fs_info->extent_root, 0); | 1347 | if (ret) |
| 1348 | goto out; | ||
| 1349 | ret = btrfs_defrag_root(root->fs_info->extent_root, 0); | ||
| 1360 | break; | 1350 | break; |
| 1361 | case S_IFREG: | 1351 | case S_IFREG: |
| 1362 | if (!(file->f_mode & FMODE_WRITE)) { | 1352 | if (!(file->f_mode & FMODE_WRITE)) { |
| @@ -1375,6 +1365,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) | |||
| 1375 | sizeof(*range))) { | 1365 | sizeof(*range))) { |
| 1376 | ret = -EFAULT; | 1366 | ret = -EFAULT; |
| 1377 | kfree(range); | 1367 | kfree(range); |
| 1368 | goto out; | ||
| 1378 | } | 1369 | } |
| 1379 | /* compression requires us to start the IO */ | 1370 | /* compression requires us to start the IO */ |
| 1380 | if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { | 1371 | if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { |
| @@ -1385,9 +1376,11 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) | |||
| 1385 | /* the rest are all set to zero by kzalloc */ | 1376 | /* the rest are all set to zero by kzalloc */ |
| 1386 | range->len = (u64)-1; | 1377 | range->len = (u64)-1; |
| 1387 | } | 1378 | } |
| 1388 | btrfs_defrag_file(file, range); | 1379 | ret = btrfs_defrag_file(file, range); |
| 1389 | kfree(range); | 1380 | kfree(range); |
| 1390 | break; | 1381 | break; |
| 1382 | default: | ||
| 1383 | ret = -EINVAL; | ||
| 1391 | } | 1384 | } |
| 1392 | out: | 1385 | out: |
| 1393 | mnt_drop_write(file->f_path.mnt); | 1386 | mnt_drop_write(file->f_path.mnt); |
| @@ -1465,7 +1458,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 1465 | */ | 1458 | */ |
| 1466 | 1459 | ||
| 1467 | /* the destination must be opened for writing */ | 1460 | /* the destination must be opened for writing */ |
| 1468 | if (!(file->f_mode & FMODE_WRITE)) | 1461 | if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) |
| 1469 | return -EINVAL; | 1462 | return -EINVAL; |
| 1470 | 1463 | ||
| 1471 | ret = mnt_want_write(file->f_path.mnt); | 1464 | ret = mnt_want_write(file->f_path.mnt); |
| @@ -1477,12 +1470,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 1477 | ret = -EBADF; | 1470 | ret = -EBADF; |
| 1478 | goto out_drop_write; | 1471 | goto out_drop_write; |
| 1479 | } | 1472 | } |
| 1473 | |||
| 1480 | src = src_file->f_dentry->d_inode; | 1474 | src = src_file->f_dentry->d_inode; |
| 1481 | 1475 | ||
| 1482 | ret = -EINVAL; | 1476 | ret = -EINVAL; |
| 1483 | if (src == inode) | 1477 | if (src == inode) |
| 1484 | goto out_fput; | 1478 | goto out_fput; |
| 1485 | 1479 | ||
| 1480 | /* the src must be open for reading */ | ||
| 1481 | if (!(src_file->f_mode & FMODE_READ)) | ||
| 1482 | goto out_fput; | ||
| 1483 | |||
| 1486 | ret = -EISDIR; | 1484 | ret = -EISDIR; |
| 1487 | if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) | 1485 | if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) |
| 1488 | goto out_fput; | 1486 | goto out_fput; |
| @@ -1513,7 +1511,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 1513 | 1511 | ||
| 1514 | /* determine range to clone */ | 1512 | /* determine range to clone */ |
| 1515 | ret = -EINVAL; | 1513 | ret = -EINVAL; |
| 1516 | if (off >= src->i_size || off + len > src->i_size) | 1514 | if (off + len > src->i_size || off + len < off) |
| 1517 | goto out_unlock; | 1515 | goto out_unlock; |
| 1518 | if (len == 0) | 1516 | if (len == 0) |
| 1519 | olen = len = src->i_size - off; | 1517 | olen = len = src->i_size - off; |
| @@ -1541,12 +1539,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 1541 | btrfs_wait_ordered_range(src, off, off+len); | 1539 | btrfs_wait_ordered_range(src, off, off+len); |
| 1542 | } | 1540 | } |
| 1543 | 1541 | ||
| 1544 | trans = btrfs_start_transaction(root, 1); | ||
| 1545 | BUG_ON(!trans); | ||
| 1546 | |||
| 1547 | /* punch hole in destination first */ | ||
| 1548 | btrfs_drop_extents(trans, inode, off, off + len, &hint_byte, 1); | ||
| 1549 | |||
| 1550 | /* clone data */ | 1542 | /* clone data */ |
| 1551 | key.objectid = src->i_ino; | 1543 | key.objectid = src->i_ino; |
| 1552 | key.type = BTRFS_EXTENT_DATA_KEY; | 1544 | key.type = BTRFS_EXTENT_DATA_KEY; |
| @@ -1557,7 +1549,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 1557 | * note the key will change type as we walk through the | 1549 | * note the key will change type as we walk through the |
| 1558 | * tree. | 1550 | * tree. |
| 1559 | */ | 1551 | */ |
| 1560 | ret = btrfs_search_slot(trans, root, &key, path, 0, 0); | 1552 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
| 1561 | if (ret < 0) | 1553 | if (ret < 0) |
| 1562 | goto out; | 1554 | goto out; |
| 1563 | 1555 | ||
| @@ -1586,6 +1578,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 1586 | u64 disko = 0, diskl = 0; | 1578 | u64 disko = 0, diskl = 0; |
| 1587 | u64 datao = 0, datal = 0; | 1579 | u64 datao = 0, datal = 0; |
| 1588 | u8 comp; | 1580 | u8 comp; |
| 1581 | u64 endoff; | ||
| 1589 | 1582 | ||
| 1590 | size = btrfs_item_size_nr(leaf, slot); | 1583 | size = btrfs_item_size_nr(leaf, slot); |
| 1591 | read_extent_buffer(leaf, buf, | 1584 | read_extent_buffer(leaf, buf, |
| @@ -1620,12 +1613,31 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 1620 | new_key.objectid = inode->i_ino; | 1613 | new_key.objectid = inode->i_ino; |
| 1621 | new_key.offset = key.offset + destoff - off; | 1614 | new_key.offset = key.offset + destoff - off; |
| 1622 | 1615 | ||
| 1616 | trans = btrfs_start_transaction(root, 1); | ||
| 1617 | if (IS_ERR(trans)) { | ||
| 1618 | ret = PTR_ERR(trans); | ||
| 1619 | goto out; | ||
| 1620 | } | ||
| 1621 | |||
| 1623 | if (type == BTRFS_FILE_EXTENT_REG || | 1622 | if (type == BTRFS_FILE_EXTENT_REG || |
| 1624 | type == BTRFS_FILE_EXTENT_PREALLOC) { | 1623 | type == BTRFS_FILE_EXTENT_PREALLOC) { |
| 1624 | if (off > key.offset) { | ||
| 1625 | datao += off - key.offset; | ||
| 1626 | datal -= off - key.offset; | ||
| 1627 | } | ||
| 1628 | |||
| 1629 | if (key.offset + datal > off + len) | ||
| 1630 | datal = off + len - key.offset; | ||
| 1631 | |||
| 1632 | ret = btrfs_drop_extents(trans, inode, | ||
| 1633 | new_key.offset, | ||
| 1634 | new_key.offset + datal, | ||
| 1635 | &hint_byte, 1); | ||
| 1636 | BUG_ON(ret); | ||
| 1637 | |||
| 1625 | ret = btrfs_insert_empty_item(trans, root, path, | 1638 | ret = btrfs_insert_empty_item(trans, root, path, |
| 1626 | &new_key, size); | 1639 | &new_key, size); |
| 1627 | if (ret) | 1640 | BUG_ON(ret); |
| 1628 | goto out; | ||
| 1629 | 1641 | ||
| 1630 | leaf = path->nodes[0]; | 1642 | leaf = path->nodes[0]; |
| 1631 | slot = path->slots[0]; | 1643 | slot = path->slots[0]; |
| @@ -1636,14 +1648,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 1636 | extent = btrfs_item_ptr(leaf, slot, | 1648 | extent = btrfs_item_ptr(leaf, slot, |
| 1637 | struct btrfs_file_extent_item); | 1649 | struct btrfs_file_extent_item); |
| 1638 | 1650 | ||
| 1639 | if (off > key.offset) { | ||
| 1640 | datao += off - key.offset; | ||
| 1641 | datal -= off - key.offset; | ||
| 1642 | } | ||
| 1643 | |||
| 1644 | if (key.offset + datal > off + len) | ||
| 1645 | datal = off + len - key.offset; | ||
| 1646 | |||
| 1647 | /* disko == 0 means it's a hole */ | 1651 | /* disko == 0 means it's a hole */ |
| 1648 | if (!disko) | 1652 | if (!disko) |
| 1649 | datao = 0; | 1653 | datao = 0; |
| @@ -1674,14 +1678,21 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 1674 | 1678 | ||
| 1675 | if (comp && (skip || trim)) { | 1679 | if (comp && (skip || trim)) { |
| 1676 | ret = -EINVAL; | 1680 | ret = -EINVAL; |
| 1681 | btrfs_end_transaction(trans, root); | ||
| 1677 | goto out; | 1682 | goto out; |
| 1678 | } | 1683 | } |
| 1679 | size -= skip + trim; | 1684 | size -= skip + trim; |
| 1680 | datal -= skip + trim; | 1685 | datal -= skip + trim; |
| 1686 | |||
| 1687 | ret = btrfs_drop_extents(trans, inode, | ||
| 1688 | new_key.offset, | ||
| 1689 | new_key.offset + datal, | ||
| 1690 | &hint_byte, 1); | ||
| 1691 | BUG_ON(ret); | ||
| 1692 | |||
| 1681 | ret = btrfs_insert_empty_item(trans, root, path, | 1693 | ret = btrfs_insert_empty_item(trans, root, path, |
| 1682 | &new_key, size); | 1694 | &new_key, size); |
| 1683 | if (ret) | 1695 | BUG_ON(ret); |
| 1684 | goto out; | ||
| 1685 | 1696 | ||
| 1686 | if (skip) { | 1697 | if (skip) { |
| 1687 | u32 start = | 1698 | u32 start = |
| @@ -1699,8 +1710,26 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 1699 | } | 1710 | } |
| 1700 | 1711 | ||
| 1701 | btrfs_mark_buffer_dirty(leaf); | 1712 | btrfs_mark_buffer_dirty(leaf); |
| 1702 | } | 1713 | btrfs_release_path(root, path); |
| 1714 | |||
| 1715 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
| 1703 | 1716 | ||
| 1717 | /* | ||
| 1718 | * we round up to the block size at eof when | ||
| 1719 | * determining which extents to clone above, | ||
| 1720 | * but shouldn't round up the file size | ||
| 1721 | */ | ||
| 1722 | endoff = new_key.offset + datal; | ||
| 1723 | if (endoff > off+olen) | ||
| 1724 | endoff = off+olen; | ||
| 1725 | if (endoff > inode->i_size) | ||
| 1726 | btrfs_i_size_write(inode, endoff); | ||
| 1727 | |||
| 1728 | BTRFS_I(inode)->flags = BTRFS_I(src)->flags; | ||
| 1729 | ret = btrfs_update_inode(trans, root, inode); | ||
| 1730 | BUG_ON(ret); | ||
| 1731 | btrfs_end_transaction(trans, root); | ||
| 1732 | } | ||
| 1704 | next: | 1733 | next: |
| 1705 | btrfs_release_path(root, path); | 1734 | btrfs_release_path(root, path); |
| 1706 | key.offset++; | 1735 | key.offset++; |
| @@ -1708,17 +1737,7 @@ next: | |||
| 1708 | ret = 0; | 1737 | ret = 0; |
| 1709 | out: | 1738 | out: |
| 1710 | btrfs_release_path(root, path); | 1739 | btrfs_release_path(root, path); |
| 1711 | if (ret == 0) { | ||
| 1712 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
| 1713 | if (destoff + olen > inode->i_size) | ||
| 1714 | btrfs_i_size_write(inode, destoff + olen); | ||
| 1715 | BTRFS_I(inode)->flags = BTRFS_I(src)->flags; | ||
| 1716 | ret = btrfs_update_inode(trans, root, inode); | ||
| 1717 | } | ||
| 1718 | btrfs_end_transaction(trans, root); | ||
| 1719 | unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); | 1740 | unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); |
| 1720 | if (ret) | ||
| 1721 | vmtruncate(inode, 0); | ||
| 1722 | out_unlock: | 1741 | out_unlock: |
| 1723 | mutex_unlock(&src->i_mutex); | 1742 | mutex_unlock(&src->i_mutex); |
| 1724 | mutex_unlock(&inode->i_mutex); | 1743 | mutex_unlock(&inode->i_mutex); |
| @@ -1836,7 +1855,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) | |||
| 1836 | dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); | 1855 | dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); |
| 1837 | di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path, | 1856 | di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path, |
| 1838 | dir_id, "default", 7, 1); | 1857 | dir_id, "default", 7, 1); |
| 1839 | if (!di) { | 1858 | if (IS_ERR_OR_NULL(di)) { |
| 1840 | btrfs_free_path(path); | 1859 | btrfs_free_path(path); |
| 1841 | btrfs_end_transaction(trans, root); | 1860 | btrfs_end_transaction(trans, root); |
| 1842 | printk(KERN_ERR "Umm, you don't have the default dir item, " | 1861 | printk(KERN_ERR "Umm, you don't have the default dir item, " |
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 1c36e5cd8f55..6151f2ea38bb 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c | |||
| @@ -16,7 +16,6 @@ | |||
| 16 | * Boston, MA 021110-1307, USA. | 16 | * Boston, MA 021110-1307, USA. |
| 17 | */ | 17 | */ |
| 18 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
| 19 | #include <linux/gfp.h> | ||
| 20 | #include <linux/pagemap.h> | 19 | #include <linux/pagemap.h> |
| 21 | #include <linux/spinlock.h> | 20 | #include <linux/spinlock.h> |
| 22 | #include <linux/page-flags.h> | 21 | #include <linux/page-flags.h> |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index a8ffecd0b491..e56c72bc5add 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
| @@ -16,7 +16,6 @@ | |||
| 16 | * Boston, MA 021110-1307, USA. | 16 | * Boston, MA 021110-1307, USA. |
| 17 | */ | 17 | */ |
| 18 | 18 | ||
| 19 | #include <linux/gfp.h> | ||
| 20 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
| 21 | #include <linux/blkdev.h> | 20 | #include <linux/blkdev.h> |
| 22 | #include <linux/writeback.h> | 21 | #include <linux/writeback.h> |
| @@ -125,6 +124,15 @@ static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset) | |||
| 125 | return 1; | 124 | return 1; |
| 126 | } | 125 | } |
| 127 | 126 | ||
| 127 | static int range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset, | ||
| 128 | u64 len) | ||
| 129 | { | ||
| 130 | if (file_offset + len <= entry->file_offset || | ||
| 131 | entry->file_offset + entry->len <= file_offset) | ||
| 132 | return 0; | ||
| 133 | return 1; | ||
| 134 | } | ||
| 135 | |||
| 128 | /* | 136 | /* |
| 129 | * look find the first ordered struct that has this offset, otherwise | 137 | * look find the first ordered struct that has this offset, otherwise |
| 130 | * the first one less than this offset | 138 | * the first one less than this offset |
| @@ -162,8 +170,9 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | |||
| 162 | * The tree is given a single reference on the ordered extent that was | 170 | * The tree is given a single reference on the ordered extent that was |
| 163 | * inserted. | 171 | * inserted. |
| 164 | */ | 172 | */ |
| 165 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 173 | static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
| 166 | u64 start, u64 len, u64 disk_len, int type) | 174 | u64 start, u64 len, u64 disk_len, |
| 175 | int type, int dio) | ||
| 167 | { | 176 | { |
| 168 | struct btrfs_ordered_inode_tree *tree; | 177 | struct btrfs_ordered_inode_tree *tree; |
| 169 | struct rb_node *node; | 178 | struct rb_node *node; |
| @@ -183,6 +192,9 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
| 183 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) | 192 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) |
| 184 | set_bit(type, &entry->flags); | 193 | set_bit(type, &entry->flags); |
| 185 | 194 | ||
| 195 | if (dio) | ||
| 196 | set_bit(BTRFS_ORDERED_DIRECT, &entry->flags); | ||
| 197 | |||
| 186 | /* one ref for the tree */ | 198 | /* one ref for the tree */ |
| 187 | atomic_set(&entry->refs, 1); | 199 | atomic_set(&entry->refs, 1); |
| 188 | init_waitqueue_head(&entry->wait); | 200 | init_waitqueue_head(&entry->wait); |
| @@ -204,6 +216,20 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
| 204 | return 0; | 216 | return 0; |
| 205 | } | 217 | } |
| 206 | 218 | ||
| 219 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | ||
| 220 | u64 start, u64 len, u64 disk_len, int type) | ||
| 221 | { | ||
| 222 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | ||
| 223 | disk_len, type, 0); | ||
| 224 | } | ||
| 225 | |||
| 226 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | ||
| 227 | u64 start, u64 len, u64 disk_len, int type) | ||
| 228 | { | ||
| 229 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | ||
| 230 | disk_len, type, 1); | ||
| 231 | } | ||
| 232 | |||
| 207 | /* | 233 | /* |
| 208 | * Add a struct btrfs_ordered_sum into the list of checksums to be inserted | 234 | * Add a struct btrfs_ordered_sum into the list of checksums to be inserted |
| 209 | * when an ordered extent is finished. If the list covers more than one | 235 | * when an ordered extent is finished. If the list covers more than one |
| @@ -303,6 +329,7 @@ static int __btrfs_remove_ordered_extent(struct inode *inode, | |||
| 303 | struct btrfs_ordered_extent *entry) | 329 | struct btrfs_ordered_extent *entry) |
| 304 | { | 330 | { |
| 305 | struct btrfs_ordered_inode_tree *tree; | 331 | struct btrfs_ordered_inode_tree *tree; |
| 332 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 306 | struct rb_node *node; | 333 | struct rb_node *node; |
| 307 | 334 | ||
| 308 | tree = &BTRFS_I(inode)->ordered_tree; | 335 | tree = &BTRFS_I(inode)->ordered_tree; |
| @@ -311,13 +338,7 @@ static int __btrfs_remove_ordered_extent(struct inode *inode, | |||
| 311 | tree->last = NULL; | 338 | tree->last = NULL; |
| 312 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); | 339 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); |
| 313 | 340 | ||
| 314 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 341 | spin_lock(&root->fs_info->ordered_extent_lock); |
| 315 | BTRFS_I(inode)->outstanding_extents--; | ||
| 316 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 317 | btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root, | ||
| 318 | inode, 1); | ||
| 319 | |||
| 320 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | ||
| 321 | list_del_init(&entry->root_extent_list); | 342 | list_del_init(&entry->root_extent_list); |
| 322 | 343 | ||
| 323 | /* | 344 | /* |
| @@ -329,7 +350,7 @@ static int __btrfs_remove_ordered_extent(struct inode *inode, | |||
| 329 | !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { | 350 | !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { |
| 330 | list_del_init(&BTRFS_I(inode)->ordered_operations); | 351 | list_del_init(&BTRFS_I(inode)->ordered_operations); |
| 331 | } | 352 | } |
| 332 | spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 353 | spin_unlock(&root->fs_info->ordered_extent_lock); |
| 333 | 354 | ||
| 334 | return 0; | 355 | return 0; |
| 335 | } | 356 | } |
| @@ -490,7 +511,8 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
| 490 | * start IO on any dirty ones so the wait doesn't stall waiting | 511 | * start IO on any dirty ones so the wait doesn't stall waiting |
| 491 | * for pdflush to find them | 512 | * for pdflush to find them |
| 492 | */ | 513 | */ |
| 493 | filemap_fdatawrite_range(inode->i_mapping, start, end); | 514 | if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags)) |
| 515 | filemap_fdatawrite_range(inode->i_mapping, start, end); | ||
| 494 | if (wait) { | 516 | if (wait) { |
| 495 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, | 517 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, |
| 496 | &entry->flags)); | 518 | &entry->flags)); |
| @@ -587,6 +609,47 @@ out: | |||
| 587 | return entry; | 609 | return entry; |
| 588 | } | 610 | } |
| 589 | 611 | ||
| 612 | /* Since the DIO code tries to lock a wide area we need to look for any ordered | ||
| 613 | * extents that exist in the range, rather than just the start of the range. | ||
| 614 | */ | ||
| 615 | struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, | ||
| 616 | u64 file_offset, | ||
| 617 | u64 len) | ||
| 618 | { | ||
| 619 | struct btrfs_ordered_inode_tree *tree; | ||
| 620 | struct rb_node *node; | ||
| 621 | struct btrfs_ordered_extent *entry = NULL; | ||
| 622 | |||
| 623 | tree = &BTRFS_I(inode)->ordered_tree; | ||
| 624 | spin_lock(&tree->lock); | ||
| 625 | node = tree_search(tree, file_offset); | ||
| 626 | if (!node) { | ||
| 627 | node = tree_search(tree, file_offset + len); | ||
| 628 | if (!node) | ||
| 629 | goto out; | ||
| 630 | } | ||
| 631 | |||
| 632 | while (1) { | ||
| 633 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); | ||
| 634 | if (range_overlaps(entry, file_offset, len)) | ||
| 635 | break; | ||
| 636 | |||
| 637 | if (entry->file_offset >= file_offset + len) { | ||
| 638 | entry = NULL; | ||
| 639 | break; | ||
| 640 | } | ||
| 641 | entry = NULL; | ||
| 642 | node = rb_next(node); | ||
| 643 | if (!node) | ||
| 644 | break; | ||
| 645 | } | ||
| 646 | out: | ||
| 647 | if (entry) | ||
| 648 | atomic_inc(&entry->refs); | ||
| 649 | spin_unlock(&tree->lock); | ||
| 650 | return entry; | ||
| 651 | } | ||
| 652 | |||
| 590 | /* | 653 | /* |
| 591 | * lookup and return any extent before 'file_offset'. NULL is returned | 654 | * lookup and return any extent before 'file_offset'. NULL is returned |
| 592 | * if none is found | 655 | * if none is found |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index c82f76a9f040..8ac365492a3f 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
| @@ -72,6 +72,8 @@ struct btrfs_ordered_sum { | |||
| 72 | 72 | ||
| 73 | #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ | 73 | #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ |
| 74 | 74 | ||
| 75 | #define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */ | ||
| 76 | |||
| 75 | struct btrfs_ordered_extent { | 77 | struct btrfs_ordered_extent { |
| 76 | /* logical offset in the file */ | 78 | /* logical offset in the file */ |
| 77 | u64 file_offset; | 79 | u64 file_offset; |
| @@ -140,7 +142,9 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, | |||
| 140 | struct btrfs_ordered_extent **cached, | 142 | struct btrfs_ordered_extent **cached, |
| 141 | u64 file_offset, u64 io_size); | 143 | u64 file_offset, u64 io_size); |
| 142 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 144 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
| 143 | u64 start, u64 len, u64 disk_len, int tyep); | 145 | u64 start, u64 len, u64 disk_len, int type); |
| 146 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | ||
| 147 | u64 start, u64 len, u64 disk_len, int type); | ||
| 144 | int btrfs_add_ordered_sum(struct inode *inode, | 148 | int btrfs_add_ordered_sum(struct inode *inode, |
| 145 | struct btrfs_ordered_extent *entry, | 149 | struct btrfs_ordered_extent *entry, |
| 146 | struct btrfs_ordered_sum *sum); | 150 | struct btrfs_ordered_sum *sum); |
| @@ -151,6 +155,9 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
| 151 | int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); | 155 | int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); |
| 152 | struct btrfs_ordered_extent * | 156 | struct btrfs_ordered_extent * |
| 153 | btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); | 157 | btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); |
| 158 | struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, | ||
| 159 | u64 file_offset, | ||
| 160 | u64 len); | ||
| 154 | int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | 161 | int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, |
| 155 | struct btrfs_ordered_extent *ordered); | 162 | struct btrfs_ordered_extent *ordered); |
| 156 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); | 163 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); |
diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c index d0cc62bccb94..a97314cf6bd6 100644 --- a/fs/btrfs/ref-cache.c +++ b/fs/btrfs/ref-cache.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | */ | 17 | */ |
| 18 | 18 | ||
| 19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
| 20 | #include <linux/slab.h> | ||
| 20 | #include <linux/sort.h> | 21 | #include <linux/sort.h> |
| 21 | #include "ctree.h" | 22 | #include "ctree.h" |
| 22 | #include "ref-cache.h" | 23 | #include "ref-cache.h" |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 0b23942cbc0d..b37d723b9d4a 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include <linux/writeback.h> | 21 | #include <linux/writeback.h> |
| 22 | #include <linux/blkdev.h> | 22 | #include <linux/blkdev.h> |
| 23 | #include <linux/rbtree.h> | 23 | #include <linux/rbtree.h> |
| 24 | #include <linux/slab.h> | ||
| 24 | #include "ctree.h" | 25 | #include "ctree.h" |
| 25 | #include "disk-io.h" | 26 | #include "disk-io.h" |
| 26 | #include "transaction.h" | 27 | #include "transaction.h" |
| @@ -43,8 +44,12 @@ struct tree_entry { | |||
| 43 | struct backref_node { | 44 | struct backref_node { |
| 44 | struct rb_node rb_node; | 45 | struct rb_node rb_node; |
| 45 | u64 bytenr; | 46 | u64 bytenr; |
| 46 | /* objectid tree block owner */ | 47 | |
| 48 | u64 new_bytenr; | ||
| 49 | /* objectid of tree block owner, can be not uptodate */ | ||
| 47 | u64 owner; | 50 | u64 owner; |
| 51 | /* link to pending, changed or detached list */ | ||
| 52 | struct list_head list; | ||
| 48 | /* list of upper level blocks reference this block */ | 53 | /* list of upper level blocks reference this block */ |
| 49 | struct list_head upper; | 54 | struct list_head upper; |
| 50 | /* list of child blocks in the cache */ | 55 | /* list of child blocks in the cache */ |
| @@ -55,9 +60,9 @@ struct backref_node { | |||
| 55 | struct extent_buffer *eb; | 60 | struct extent_buffer *eb; |
| 56 | /* level of tree block */ | 61 | /* level of tree block */ |
| 57 | unsigned int level:8; | 62 | unsigned int level:8; |
| 58 | /* 1 if the block is root of old snapshot */ | 63 | /* is the block in non-reference counted tree */ |
| 59 | unsigned int old_root:1; | 64 | unsigned int cowonly:1; |
| 60 | /* 1 if no child blocks in the cache */ | 65 | /* 1 if no child node in the cache */ |
| 61 | unsigned int lowest:1; | 66 | unsigned int lowest:1; |
| 62 | /* is the extent buffer locked */ | 67 | /* is the extent buffer locked */ |
| 63 | unsigned int locked:1; | 68 | unsigned int locked:1; |
| @@ -65,6 +70,16 @@ struct backref_node { | |||
| 65 | unsigned int processed:1; | 70 | unsigned int processed:1; |
| 66 | /* have backrefs of this block been checked */ | 71 | /* have backrefs of this block been checked */ |
| 67 | unsigned int checked:1; | 72 | unsigned int checked:1; |
| 73 | /* | ||
| 74 | * 1 if corresponding block has been cowed but some upper | ||
| 75 | * level block pointers may not point to the new location | ||
| 76 | */ | ||
| 77 | unsigned int pending:1; | ||
| 78 | /* | ||
| 79 | * 1 if the backref node isn't connected to any other | ||
| 80 | * backref node. | ||
| 81 | */ | ||
| 82 | unsigned int detached:1; | ||
| 68 | }; | 83 | }; |
| 69 | 84 | ||
| 70 | /* | 85 | /* |
| @@ -73,7 +88,6 @@ struct backref_node { | |||
| 73 | struct backref_edge { | 88 | struct backref_edge { |
| 74 | struct list_head list[2]; | 89 | struct list_head list[2]; |
| 75 | struct backref_node *node[2]; | 90 | struct backref_node *node[2]; |
| 76 | u64 blockptr; | ||
| 77 | }; | 91 | }; |
| 78 | 92 | ||
| 79 | #define LOWER 0 | 93 | #define LOWER 0 |
| @@ -82,9 +96,25 @@ struct backref_edge { | |||
| 82 | struct backref_cache { | 96 | struct backref_cache { |
| 83 | /* red black tree of all backref nodes in the cache */ | 97 | /* red black tree of all backref nodes in the cache */ |
| 84 | struct rb_root rb_root; | 98 | struct rb_root rb_root; |
| 85 | /* list of backref nodes with no child block in the cache */ | 99 | /* for passing backref nodes to btrfs_reloc_cow_block */ |
| 100 | struct backref_node *path[BTRFS_MAX_LEVEL]; | ||
| 101 | /* | ||
| 102 | * list of blocks that have been cowed but some block | ||
| 103 | * pointers in upper level blocks may not reflect the | ||
| 104 | * new location | ||
| 105 | */ | ||
| 86 | struct list_head pending[BTRFS_MAX_LEVEL]; | 106 | struct list_head pending[BTRFS_MAX_LEVEL]; |
| 87 | spinlock_t lock; | 107 | /* list of backref nodes with no child node */ |
| 108 | struct list_head leaves; | ||
| 109 | /* list of blocks that have been cowed in current transaction */ | ||
| 110 | struct list_head changed; | ||
| 111 | /* list of detached backref node. */ | ||
| 112 | struct list_head detached; | ||
| 113 | |||
| 114 | u64 last_trans; | ||
| 115 | |||
| 116 | int nr_nodes; | ||
| 117 | int nr_edges; | ||
| 88 | }; | 118 | }; |
| 89 | 119 | ||
| 90 | /* | 120 | /* |
| @@ -112,15 +142,6 @@ struct tree_block { | |||
| 112 | unsigned int key_ready:1; | 142 | unsigned int key_ready:1; |
| 113 | }; | 143 | }; |
| 114 | 144 | ||
| 115 | /* inode vector */ | ||
| 116 | #define INODEVEC_SIZE 16 | ||
| 117 | |||
| 118 | struct inodevec { | ||
| 119 | struct list_head list; | ||
| 120 | struct inode *inode[INODEVEC_SIZE]; | ||
| 121 | int nr; | ||
| 122 | }; | ||
| 123 | |||
| 124 | #define MAX_EXTENTS 128 | 145 | #define MAX_EXTENTS 128 |
| 125 | 146 | ||
| 126 | struct file_extent_cluster { | 147 | struct file_extent_cluster { |
| @@ -137,36 +158,43 @@ struct reloc_control { | |||
| 137 | struct btrfs_root *extent_root; | 158 | struct btrfs_root *extent_root; |
| 138 | /* inode for moving data */ | 159 | /* inode for moving data */ |
| 139 | struct inode *data_inode; | 160 | struct inode *data_inode; |
| 140 | struct btrfs_workers workers; | 161 | |
| 162 | struct btrfs_block_rsv *block_rsv; | ||
| 163 | |||
| 164 | struct backref_cache backref_cache; | ||
| 165 | |||
| 166 | struct file_extent_cluster cluster; | ||
| 141 | /* tree blocks have been processed */ | 167 | /* tree blocks have been processed */ |
| 142 | struct extent_io_tree processed_blocks; | 168 | struct extent_io_tree processed_blocks; |
| 143 | /* map start of tree root to corresponding reloc tree */ | 169 | /* map start of tree root to corresponding reloc tree */ |
| 144 | struct mapping_tree reloc_root_tree; | 170 | struct mapping_tree reloc_root_tree; |
| 145 | /* list of reloc trees */ | 171 | /* list of reloc trees */ |
| 146 | struct list_head reloc_roots; | 172 | struct list_head reloc_roots; |
| 173 | /* size of metadata reservation for merging reloc trees */ | ||
| 174 | u64 merging_rsv_size; | ||
| 175 | /* size of relocated tree nodes */ | ||
| 176 | u64 nodes_relocated; | ||
| 177 | |||
| 147 | u64 search_start; | 178 | u64 search_start; |
| 148 | u64 extents_found; | 179 | u64 extents_found; |
| 149 | u64 extents_skipped; | 180 | |
| 150 | int stage; | 181 | int block_rsv_retries; |
| 151 | int create_reloc_root; | 182 | |
| 183 | unsigned int stage:8; | ||
| 184 | unsigned int create_reloc_tree:1; | ||
| 185 | unsigned int merge_reloc_tree:1; | ||
| 152 | unsigned int found_file_extent:1; | 186 | unsigned int found_file_extent:1; |
| 153 | unsigned int found_old_snapshot:1; | 187 | unsigned int commit_transaction:1; |
| 154 | }; | 188 | }; |
| 155 | 189 | ||
| 156 | /* stages of data relocation */ | 190 | /* stages of data relocation */ |
| 157 | #define MOVE_DATA_EXTENTS 0 | 191 | #define MOVE_DATA_EXTENTS 0 |
| 158 | #define UPDATE_DATA_PTRS 1 | 192 | #define UPDATE_DATA_PTRS 1 |
| 159 | 193 | ||
| 160 | /* | 194 | static void remove_backref_node(struct backref_cache *cache, |
| 161 | * merge reloc tree to corresponding fs tree in worker threads | 195 | struct backref_node *node); |
| 162 | */ | 196 | static void __mark_block_processed(struct reloc_control *rc, |
| 163 | struct async_merge { | 197 | struct backref_node *node); |
| 164 | struct btrfs_work work; | ||
| 165 | struct reloc_control *rc; | ||
| 166 | struct btrfs_root *root; | ||
| 167 | struct completion *done; | ||
| 168 | atomic_t *num_pending; | ||
| 169 | }; | ||
| 170 | 198 | ||
| 171 | static void mapping_tree_init(struct mapping_tree *tree) | 199 | static void mapping_tree_init(struct mapping_tree *tree) |
| 172 | { | 200 | { |
| @@ -180,15 +208,80 @@ static void backref_cache_init(struct backref_cache *cache) | |||
| 180 | cache->rb_root = RB_ROOT; | 208 | cache->rb_root = RB_ROOT; |
| 181 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) | 209 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) |
| 182 | INIT_LIST_HEAD(&cache->pending[i]); | 210 | INIT_LIST_HEAD(&cache->pending[i]); |
| 183 | spin_lock_init(&cache->lock); | 211 | INIT_LIST_HEAD(&cache->changed); |
| 212 | INIT_LIST_HEAD(&cache->detached); | ||
| 213 | INIT_LIST_HEAD(&cache->leaves); | ||
| 214 | } | ||
| 215 | |||
| 216 | static void backref_cache_cleanup(struct backref_cache *cache) | ||
| 217 | { | ||
| 218 | struct backref_node *node; | ||
| 219 | int i; | ||
| 220 | |||
| 221 | while (!list_empty(&cache->detached)) { | ||
| 222 | node = list_entry(cache->detached.next, | ||
| 223 | struct backref_node, list); | ||
| 224 | remove_backref_node(cache, node); | ||
| 225 | } | ||
| 226 | |||
| 227 | while (!list_empty(&cache->leaves)) { | ||
| 228 | node = list_entry(cache->leaves.next, | ||
| 229 | struct backref_node, lower); | ||
| 230 | remove_backref_node(cache, node); | ||
| 231 | } | ||
| 232 | |||
| 233 | cache->last_trans = 0; | ||
| 234 | |||
| 235 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) | ||
| 236 | BUG_ON(!list_empty(&cache->pending[i])); | ||
| 237 | BUG_ON(!list_empty(&cache->changed)); | ||
| 238 | BUG_ON(!list_empty(&cache->detached)); | ||
| 239 | BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root)); | ||
| 240 | BUG_ON(cache->nr_nodes); | ||
| 241 | BUG_ON(cache->nr_edges); | ||
| 242 | } | ||
| 243 | |||
| 244 | static struct backref_node *alloc_backref_node(struct backref_cache *cache) | ||
| 245 | { | ||
| 246 | struct backref_node *node; | ||
| 247 | |||
| 248 | node = kzalloc(sizeof(*node), GFP_NOFS); | ||
| 249 | if (node) { | ||
| 250 | INIT_LIST_HEAD(&node->list); | ||
| 251 | INIT_LIST_HEAD(&node->upper); | ||
| 252 | INIT_LIST_HEAD(&node->lower); | ||
| 253 | RB_CLEAR_NODE(&node->rb_node); | ||
| 254 | cache->nr_nodes++; | ||
| 255 | } | ||
| 256 | return node; | ||
| 257 | } | ||
| 258 | |||
| 259 | static void free_backref_node(struct backref_cache *cache, | ||
| 260 | struct backref_node *node) | ||
| 261 | { | ||
| 262 | if (node) { | ||
| 263 | cache->nr_nodes--; | ||
| 264 | kfree(node); | ||
| 265 | } | ||
| 266 | } | ||
| 267 | |||
| 268 | static struct backref_edge *alloc_backref_edge(struct backref_cache *cache) | ||
| 269 | { | ||
| 270 | struct backref_edge *edge; | ||
| 271 | |||
| 272 | edge = kzalloc(sizeof(*edge), GFP_NOFS); | ||
| 273 | if (edge) | ||
| 274 | cache->nr_edges++; | ||
| 275 | return edge; | ||
| 184 | } | 276 | } |
| 185 | 277 | ||
| 186 | static void backref_node_init(struct backref_node *node) | 278 | static void free_backref_edge(struct backref_cache *cache, |
| 279 | struct backref_edge *edge) | ||
| 187 | { | 280 | { |
| 188 | memset(node, 0, sizeof(*node)); | 281 | if (edge) { |
| 189 | INIT_LIST_HEAD(&node->upper); | 282 | cache->nr_edges--; |
| 190 | INIT_LIST_HEAD(&node->lower); | 283 | kfree(edge); |
| 191 | RB_CLEAR_NODE(&node->rb_node); | 284 | } |
| 192 | } | 285 | } |
| 193 | 286 | ||
| 194 | static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, | 287 | static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, |
| @@ -249,6 +342,7 @@ static struct backref_node *walk_up_backref(struct backref_node *node, | |||
| 249 | edges[idx++] = edge; | 342 | edges[idx++] = edge; |
| 250 | node = edge->node[UPPER]; | 343 | node = edge->node[UPPER]; |
| 251 | } | 344 | } |
| 345 | BUG_ON(node->detached); | ||
| 252 | *index = idx; | 346 | *index = idx; |
| 253 | return node; | 347 | return node; |
| 254 | } | 348 | } |
| @@ -280,13 +374,18 @@ static struct backref_node *walk_down_backref(struct backref_edge *edges[], | |||
| 280 | return NULL; | 374 | return NULL; |
| 281 | } | 375 | } |
| 282 | 376 | ||
| 377 | static void unlock_node_buffer(struct backref_node *node) | ||
| 378 | { | ||
| 379 | if (node->locked) { | ||
| 380 | btrfs_tree_unlock(node->eb); | ||
| 381 | node->locked = 0; | ||
| 382 | } | ||
| 383 | } | ||
| 384 | |||
| 283 | static void drop_node_buffer(struct backref_node *node) | 385 | static void drop_node_buffer(struct backref_node *node) |
| 284 | { | 386 | { |
| 285 | if (node->eb) { | 387 | if (node->eb) { |
| 286 | if (node->locked) { | 388 | unlock_node_buffer(node); |
| 287 | btrfs_tree_unlock(node->eb); | ||
| 288 | node->locked = 0; | ||
| 289 | } | ||
| 290 | free_extent_buffer(node->eb); | 389 | free_extent_buffer(node->eb); |
| 291 | node->eb = NULL; | 390 | node->eb = NULL; |
| 292 | } | 391 | } |
| @@ -295,14 +394,14 @@ static void drop_node_buffer(struct backref_node *node) | |||
| 295 | static void drop_backref_node(struct backref_cache *tree, | 394 | static void drop_backref_node(struct backref_cache *tree, |
| 296 | struct backref_node *node) | 395 | struct backref_node *node) |
| 297 | { | 396 | { |
| 298 | BUG_ON(!node->lowest); | ||
| 299 | BUG_ON(!list_empty(&node->upper)); | 397 | BUG_ON(!list_empty(&node->upper)); |
| 300 | 398 | ||
| 301 | drop_node_buffer(node); | 399 | drop_node_buffer(node); |
| 400 | list_del(&node->list); | ||
| 302 | list_del(&node->lower); | 401 | list_del(&node->lower); |
| 303 | 402 | if (!RB_EMPTY_NODE(&node->rb_node)) | |
| 304 | rb_erase(&node->rb_node, &tree->rb_root); | 403 | rb_erase(&node->rb_node, &tree->rb_root); |
| 305 | kfree(node); | 404 | free_backref_node(tree, node); |
| 306 | } | 405 | } |
| 307 | 406 | ||
| 308 | /* | 407 | /* |
| @@ -317,27 +416,121 @@ static void remove_backref_node(struct backref_cache *cache, | |||
| 317 | if (!node) | 416 | if (!node) |
| 318 | return; | 417 | return; |
| 319 | 418 | ||
| 320 | BUG_ON(!node->lowest); | 419 | BUG_ON(!node->lowest && !node->detached); |
| 321 | while (!list_empty(&node->upper)) { | 420 | while (!list_empty(&node->upper)) { |
| 322 | edge = list_entry(node->upper.next, struct backref_edge, | 421 | edge = list_entry(node->upper.next, struct backref_edge, |
| 323 | list[LOWER]); | 422 | list[LOWER]); |
| 324 | upper = edge->node[UPPER]; | 423 | upper = edge->node[UPPER]; |
| 325 | list_del(&edge->list[LOWER]); | 424 | list_del(&edge->list[LOWER]); |
| 326 | list_del(&edge->list[UPPER]); | 425 | list_del(&edge->list[UPPER]); |
| 327 | kfree(edge); | 426 | free_backref_edge(cache, edge); |
| 427 | |||
| 428 | if (RB_EMPTY_NODE(&upper->rb_node)) { | ||
| 429 | BUG_ON(!list_empty(&node->upper)); | ||
| 430 | drop_backref_node(cache, node); | ||
| 431 | node = upper; | ||
| 432 | node->lowest = 1; | ||
| 433 | continue; | ||
| 434 | } | ||
| 328 | /* | 435 | /* |
| 329 | * add the node to pending list if no other | 436 | * add the node to leaf node list if no other |
| 330 | * child block cached. | 437 | * child block cached. |
| 331 | */ | 438 | */ |
| 332 | if (list_empty(&upper->lower)) { | 439 | if (list_empty(&upper->lower)) { |
| 333 | list_add_tail(&upper->lower, | 440 | list_add_tail(&upper->lower, &cache->leaves); |
| 334 | &cache->pending[upper->level]); | ||
| 335 | upper->lowest = 1; | 441 | upper->lowest = 1; |
| 336 | } | 442 | } |
| 337 | } | 443 | } |
| 444 | |||
| 338 | drop_backref_node(cache, node); | 445 | drop_backref_node(cache, node); |
| 339 | } | 446 | } |
| 340 | 447 | ||
| 448 | static void update_backref_node(struct backref_cache *cache, | ||
| 449 | struct backref_node *node, u64 bytenr) | ||
| 450 | { | ||
| 451 | struct rb_node *rb_node; | ||
| 452 | rb_erase(&node->rb_node, &cache->rb_root); | ||
| 453 | node->bytenr = bytenr; | ||
| 454 | rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); | ||
| 455 | BUG_ON(rb_node); | ||
| 456 | } | ||
| 457 | |||
| 458 | /* | ||
| 459 | * update backref cache after a transaction commit | ||
| 460 | */ | ||
| 461 | static int update_backref_cache(struct btrfs_trans_handle *trans, | ||
| 462 | struct backref_cache *cache) | ||
| 463 | { | ||
| 464 | struct backref_node *node; | ||
| 465 | int level = 0; | ||
| 466 | |||
| 467 | if (cache->last_trans == 0) { | ||
| 468 | cache->last_trans = trans->transid; | ||
| 469 | return 0; | ||
| 470 | } | ||
| 471 | |||
| 472 | if (cache->last_trans == trans->transid) | ||
| 473 | return 0; | ||
| 474 | |||
| 475 | /* | ||
| 476 | * detached nodes are used to avoid unnecessary backref | ||
| 477 | * lookup. transaction commit changes the extent tree. | ||
| 478 | * so the detached nodes are no longer useful. | ||
| 479 | */ | ||
| 480 | while (!list_empty(&cache->detached)) { | ||
| 481 | node = list_entry(cache->detached.next, | ||
| 482 | struct backref_node, list); | ||
| 483 | remove_backref_node(cache, node); | ||
| 484 | } | ||
| 485 | |||
| 486 | while (!list_empty(&cache->changed)) { | ||
| 487 | node = list_entry(cache->changed.next, | ||
| 488 | struct backref_node, list); | ||
| 489 | list_del_init(&node->list); | ||
| 490 | BUG_ON(node->pending); | ||
| 491 | update_backref_node(cache, node, node->new_bytenr); | ||
| 492 | } | ||
| 493 | |||
| 494 | /* | ||
| 495 | * some nodes can be left in the pending list if there were | ||
| 496 | * errors during processing the pending nodes. | ||
| 497 | */ | ||
| 498 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { | ||
| 499 | list_for_each_entry(node, &cache->pending[level], list) { | ||
| 500 | BUG_ON(!node->pending); | ||
| 501 | if (node->bytenr == node->new_bytenr) | ||
| 502 | continue; | ||
| 503 | update_backref_node(cache, node, node->new_bytenr); | ||
| 504 | } | ||
| 505 | } | ||
| 506 | |||
| 507 | cache->last_trans = 0; | ||
| 508 | return 1; | ||
| 509 | } | ||
| 510 | |||
| 511 | static int should_ignore_root(struct btrfs_root *root) | ||
| 512 | { | ||
| 513 | struct btrfs_root *reloc_root; | ||
| 514 | |||
| 515 | if (!root->ref_cows) | ||
| 516 | return 0; | ||
| 517 | |||
| 518 | reloc_root = root->reloc_root; | ||
| 519 | if (!reloc_root) | ||
| 520 | return 0; | ||
| 521 | |||
| 522 | if (btrfs_root_last_snapshot(&reloc_root->root_item) == | ||
| 523 | root->fs_info->running_transaction->transid - 1) | ||
| 524 | return 0; | ||
| 525 | /* | ||
| 526 | * if there is reloc tree and it was created in previous | ||
| 527 | * transaction backref lookup can find the reloc tree, | ||
| 528 | * so backref node for the fs tree root is useless for | ||
| 529 | * relocation. | ||
| 530 | */ | ||
| 531 | return 1; | ||
| 532 | } | ||
| 533 | |||
| 341 | /* | 534 | /* |
| 342 | * find reloc tree by address of tree root | 535 | * find reloc tree by address of tree root |
| 343 | */ | 536 | */ |
| @@ -452,11 +645,12 @@ int find_inline_backref(struct extent_buffer *leaf, int slot, | |||
| 452 | * for all upper level blocks that directly/indirectly reference the | 645 | * for all upper level blocks that directly/indirectly reference the |
| 453 | * block are also cached. | 646 | * block are also cached. |
| 454 | */ | 647 | */ |
| 455 | static struct backref_node *build_backref_tree(struct reloc_control *rc, | 648 | static noinline_for_stack |
| 456 | struct backref_cache *cache, | 649 | struct backref_node *build_backref_tree(struct reloc_control *rc, |
| 457 | struct btrfs_key *node_key, | 650 | struct btrfs_key *node_key, |
| 458 | int level, u64 bytenr) | 651 | int level, u64 bytenr) |
| 459 | { | 652 | { |
| 653 | struct backref_cache *cache = &rc->backref_cache; | ||
| 460 | struct btrfs_path *path1; | 654 | struct btrfs_path *path1; |
| 461 | struct btrfs_path *path2; | 655 | struct btrfs_path *path2; |
| 462 | struct extent_buffer *eb; | 656 | struct extent_buffer *eb; |
| @@ -472,6 +666,8 @@ static struct backref_node *build_backref_tree(struct reloc_control *rc, | |||
| 472 | unsigned long end; | 666 | unsigned long end; |
| 473 | unsigned long ptr; | 667 | unsigned long ptr; |
| 474 | LIST_HEAD(list); | 668 | LIST_HEAD(list); |
| 669 | LIST_HEAD(useless); | ||
| 670 | int cowonly; | ||
| 475 | int ret; | 671 | int ret; |
| 476 | int err = 0; | 672 | int err = 0; |
| 477 | 673 | ||
| @@ -482,15 +678,13 @@ static struct backref_node *build_backref_tree(struct reloc_control *rc, | |||
| 482 | goto out; | 678 | goto out; |
| 483 | } | 679 | } |
| 484 | 680 | ||
| 485 | node = kmalloc(sizeof(*node), GFP_NOFS); | 681 | node = alloc_backref_node(cache); |
| 486 | if (!node) { | 682 | if (!node) { |
| 487 | err = -ENOMEM; | 683 | err = -ENOMEM; |
| 488 | goto out; | 684 | goto out; |
| 489 | } | 685 | } |
| 490 | 686 | ||
| 491 | backref_node_init(node); | ||
| 492 | node->bytenr = bytenr; | 687 | node->bytenr = bytenr; |
| 493 | node->owner = 0; | ||
| 494 | node->level = level; | 688 | node->level = level; |
| 495 | node->lowest = 1; | 689 | node->lowest = 1; |
| 496 | cur = node; | 690 | cur = node; |
| @@ -586,17 +780,21 @@ again: | |||
| 586 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | 780 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 |
| 587 | if (key.type == BTRFS_SHARED_BLOCK_REF_KEY || | 781 | if (key.type == BTRFS_SHARED_BLOCK_REF_KEY || |
| 588 | key.type == BTRFS_EXTENT_REF_V0_KEY) { | 782 | key.type == BTRFS_EXTENT_REF_V0_KEY) { |
| 589 | if (key.objectid == key.offset && | 783 | if (key.type == BTRFS_EXTENT_REF_V0_KEY) { |
| 590 | key.type == BTRFS_EXTENT_REF_V0_KEY) { | ||
| 591 | struct btrfs_extent_ref_v0 *ref0; | 784 | struct btrfs_extent_ref_v0 *ref0; |
| 592 | ref0 = btrfs_item_ptr(eb, path1->slots[0], | 785 | ref0 = btrfs_item_ptr(eb, path1->slots[0], |
| 593 | struct btrfs_extent_ref_v0); | 786 | struct btrfs_extent_ref_v0); |
| 594 | root = find_tree_root(rc, eb, ref0); | 787 | if (key.objectid == key.offset) { |
| 595 | if (root) | 788 | root = find_tree_root(rc, eb, ref0); |
| 596 | cur->root = root; | 789 | if (root && !should_ignore_root(root)) |
| 597 | else | 790 | cur->root = root; |
| 598 | cur->old_root = 1; | 791 | else |
| 599 | break; | 792 | list_add(&cur->list, &useless); |
| 793 | break; | ||
| 794 | } | ||
| 795 | if (is_cowonly_root(btrfs_ref_root_v0(eb, | ||
| 796 | ref0))) | ||
| 797 | cur->cowonly = 1; | ||
| 600 | } | 798 | } |
| 601 | #else | 799 | #else |
| 602 | BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); | 800 | BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); |
| @@ -613,22 +811,20 @@ again: | |||
| 613 | break; | 811 | break; |
| 614 | } | 812 | } |
| 615 | 813 | ||
| 616 | edge = kzalloc(sizeof(*edge), GFP_NOFS); | 814 | edge = alloc_backref_edge(cache); |
| 617 | if (!edge) { | 815 | if (!edge) { |
| 618 | err = -ENOMEM; | 816 | err = -ENOMEM; |
| 619 | goto out; | 817 | goto out; |
| 620 | } | 818 | } |
| 621 | rb_node = tree_search(&cache->rb_root, key.offset); | 819 | rb_node = tree_search(&cache->rb_root, key.offset); |
| 622 | if (!rb_node) { | 820 | if (!rb_node) { |
| 623 | upper = kmalloc(sizeof(*upper), GFP_NOFS); | 821 | upper = alloc_backref_node(cache); |
| 624 | if (!upper) { | 822 | if (!upper) { |
| 625 | kfree(edge); | 823 | free_backref_edge(cache, edge); |
| 626 | err = -ENOMEM; | 824 | err = -ENOMEM; |
| 627 | goto out; | 825 | goto out; |
| 628 | } | 826 | } |
| 629 | backref_node_init(upper); | ||
| 630 | upper->bytenr = key.offset; | 827 | upper->bytenr = key.offset; |
| 631 | upper->owner = 0; | ||
| 632 | upper->level = cur->level + 1; | 828 | upper->level = cur->level + 1; |
| 633 | /* | 829 | /* |
| 634 | * backrefs for the upper level block isn't | 830 | * backrefs for the upper level block isn't |
| @@ -638,11 +834,12 @@ again: | |||
| 638 | } else { | 834 | } else { |
| 639 | upper = rb_entry(rb_node, struct backref_node, | 835 | upper = rb_entry(rb_node, struct backref_node, |
| 640 | rb_node); | 836 | rb_node); |
| 837 | BUG_ON(!upper->checked); | ||
| 641 | INIT_LIST_HEAD(&edge->list[UPPER]); | 838 | INIT_LIST_HEAD(&edge->list[UPPER]); |
| 642 | } | 839 | } |
| 643 | list_add(&edge->list[LOWER], &cur->upper); | 840 | list_add_tail(&edge->list[LOWER], &cur->upper); |
| 644 | edge->node[UPPER] = upper; | ||
| 645 | edge->node[LOWER] = cur; | 841 | edge->node[LOWER] = cur; |
| 842 | edge->node[UPPER] = upper; | ||
| 646 | 843 | ||
| 647 | goto next; | 844 | goto next; |
| 648 | } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { | 845 | } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { |
| @@ -656,11 +853,17 @@ again: | |||
| 656 | goto out; | 853 | goto out; |
| 657 | } | 854 | } |
| 658 | 855 | ||
| 856 | if (!root->ref_cows) | ||
| 857 | cur->cowonly = 1; | ||
| 858 | |||
| 659 | if (btrfs_root_level(&root->root_item) == cur->level) { | 859 | if (btrfs_root_level(&root->root_item) == cur->level) { |
| 660 | /* tree root */ | 860 | /* tree root */ |
| 661 | BUG_ON(btrfs_root_bytenr(&root->root_item) != | 861 | BUG_ON(btrfs_root_bytenr(&root->root_item) != |
| 662 | cur->bytenr); | 862 | cur->bytenr); |
| 663 | cur->root = root; | 863 | if (should_ignore_root(root)) |
| 864 | list_add(&cur->list, &useless); | ||
| 865 | else | ||
| 866 | cur->root = root; | ||
| 664 | break; | 867 | break; |
| 665 | } | 868 | } |
| 666 | 869 | ||
| @@ -691,11 +894,14 @@ again: | |||
| 691 | if (!path2->nodes[level]) { | 894 | if (!path2->nodes[level]) { |
| 692 | BUG_ON(btrfs_root_bytenr(&root->root_item) != | 895 | BUG_ON(btrfs_root_bytenr(&root->root_item) != |
| 693 | lower->bytenr); | 896 | lower->bytenr); |
| 694 | lower->root = root; | 897 | if (should_ignore_root(root)) |
| 898 | list_add(&lower->list, &useless); | ||
| 899 | else | ||
| 900 | lower->root = root; | ||
| 695 | break; | 901 | break; |
| 696 | } | 902 | } |
| 697 | 903 | ||
| 698 | edge = kzalloc(sizeof(*edge), GFP_NOFS); | 904 | edge = alloc_backref_edge(cache); |
| 699 | if (!edge) { | 905 | if (!edge) { |
| 700 | err = -ENOMEM; | 906 | err = -ENOMEM; |
| 701 | goto out; | 907 | goto out; |
| @@ -704,16 +910,17 @@ again: | |||
| 704 | eb = path2->nodes[level]; | 910 | eb = path2->nodes[level]; |
| 705 | rb_node = tree_search(&cache->rb_root, eb->start); | 911 | rb_node = tree_search(&cache->rb_root, eb->start); |
| 706 | if (!rb_node) { | 912 | if (!rb_node) { |
| 707 | upper = kmalloc(sizeof(*upper), GFP_NOFS); | 913 | upper = alloc_backref_node(cache); |
| 708 | if (!upper) { | 914 | if (!upper) { |
| 709 | kfree(edge); | 915 | free_backref_edge(cache, edge); |
| 710 | err = -ENOMEM; | 916 | err = -ENOMEM; |
| 711 | goto out; | 917 | goto out; |
| 712 | } | 918 | } |
| 713 | backref_node_init(upper); | ||
| 714 | upper->bytenr = eb->start; | 919 | upper->bytenr = eb->start; |
| 715 | upper->owner = btrfs_header_owner(eb); | 920 | upper->owner = btrfs_header_owner(eb); |
| 716 | upper->level = lower->level + 1; | 921 | upper->level = lower->level + 1; |
| 922 | if (!root->ref_cows) | ||
| 923 | upper->cowonly = 1; | ||
| 717 | 924 | ||
| 718 | /* | 925 | /* |
| 719 | * if we know the block isn't shared | 926 | * if we know the block isn't shared |
| @@ -743,10 +950,12 @@ again: | |||
| 743 | rb_node); | 950 | rb_node); |
| 744 | BUG_ON(!upper->checked); | 951 | BUG_ON(!upper->checked); |
| 745 | INIT_LIST_HEAD(&edge->list[UPPER]); | 952 | INIT_LIST_HEAD(&edge->list[UPPER]); |
| 953 | if (!upper->owner) | ||
| 954 | upper->owner = btrfs_header_owner(eb); | ||
| 746 | } | 955 | } |
| 747 | list_add_tail(&edge->list[LOWER], &lower->upper); | 956 | list_add_tail(&edge->list[LOWER], &lower->upper); |
| 748 | edge->node[UPPER] = upper; | ||
| 749 | edge->node[LOWER] = lower; | 957 | edge->node[LOWER] = lower; |
| 958 | edge->node[UPPER] = upper; | ||
| 750 | 959 | ||
| 751 | if (rb_node) | 960 | if (rb_node) |
| 752 | break; | 961 | break; |
| @@ -784,8 +993,13 @@ next: | |||
| 784 | * into the cache. | 993 | * into the cache. |
| 785 | */ | 994 | */ |
| 786 | BUG_ON(!node->checked); | 995 | BUG_ON(!node->checked); |
| 787 | rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); | 996 | cowonly = node->cowonly; |
| 788 | BUG_ON(rb_node); | 997 | if (!cowonly) { |
| 998 | rb_node = tree_insert(&cache->rb_root, node->bytenr, | ||
| 999 | &node->rb_node); | ||
| 1000 | BUG_ON(rb_node); | ||
| 1001 | list_add_tail(&node->lower, &cache->leaves); | ||
| 1002 | } | ||
| 789 | 1003 | ||
| 790 | list_for_each_entry(edge, &node->upper, list[LOWER]) | 1004 | list_for_each_entry(edge, &node->upper, list[LOWER]) |
| 791 | list_add_tail(&edge->list[UPPER], &list); | 1005 | list_add_tail(&edge->list[UPPER], &list); |
| @@ -794,6 +1008,14 @@ next: | |||
| 794 | edge = list_entry(list.next, struct backref_edge, list[UPPER]); | 1008 | edge = list_entry(list.next, struct backref_edge, list[UPPER]); |
| 795 | list_del_init(&edge->list[UPPER]); | 1009 | list_del_init(&edge->list[UPPER]); |
| 796 | upper = edge->node[UPPER]; | 1010 | upper = edge->node[UPPER]; |
| 1011 | if (upper->detached) { | ||
| 1012 | list_del(&edge->list[LOWER]); | ||
| 1013 | lower = edge->node[LOWER]; | ||
| 1014 | free_backref_edge(cache, edge); | ||
| 1015 | if (list_empty(&lower->upper)) | ||
| 1016 | list_add(&lower->list, &useless); | ||
| 1017 | continue; | ||
| 1018 | } | ||
| 797 | 1019 | ||
| 798 | if (!RB_EMPTY_NODE(&upper->rb_node)) { | 1020 | if (!RB_EMPTY_NODE(&upper->rb_node)) { |
| 799 | if (upper->lowest) { | 1021 | if (upper->lowest) { |
| @@ -806,25 +1028,69 @@ next: | |||
| 806 | } | 1028 | } |
| 807 | 1029 | ||
| 808 | BUG_ON(!upper->checked); | 1030 | BUG_ON(!upper->checked); |
| 809 | rb_node = tree_insert(&cache->rb_root, upper->bytenr, | 1031 | BUG_ON(cowonly != upper->cowonly); |
| 810 | &upper->rb_node); | 1032 | if (!cowonly) { |
| 811 | BUG_ON(rb_node); | 1033 | rb_node = tree_insert(&cache->rb_root, upper->bytenr, |
| 1034 | &upper->rb_node); | ||
| 1035 | BUG_ON(rb_node); | ||
| 1036 | } | ||
| 812 | 1037 | ||
| 813 | list_add_tail(&edge->list[UPPER], &upper->lower); | 1038 | list_add_tail(&edge->list[UPPER], &upper->lower); |
| 814 | 1039 | ||
| 815 | list_for_each_entry(edge, &upper->upper, list[LOWER]) | 1040 | list_for_each_entry(edge, &upper->upper, list[LOWER]) |
| 816 | list_add_tail(&edge->list[UPPER], &list); | 1041 | list_add_tail(&edge->list[UPPER], &list); |
| 817 | } | 1042 | } |
| 1043 | /* | ||
| 1044 | * process useless backref nodes. backref nodes for tree leaves | ||
| 1045 | * are deleted from the cache. backref nodes for upper level | ||
| 1046 | * tree blocks are left in the cache to avoid unnecessary backref | ||
| 1047 | * lookup. | ||
| 1048 | */ | ||
| 1049 | while (!list_empty(&useless)) { | ||
| 1050 | upper = list_entry(useless.next, struct backref_node, list); | ||
| 1051 | list_del_init(&upper->list); | ||
| 1052 | BUG_ON(!list_empty(&upper->upper)); | ||
| 1053 | if (upper == node) | ||
| 1054 | node = NULL; | ||
| 1055 | if (upper->lowest) { | ||
| 1056 | list_del_init(&upper->lower); | ||
| 1057 | upper->lowest = 0; | ||
| 1058 | } | ||
| 1059 | while (!list_empty(&upper->lower)) { | ||
| 1060 | edge = list_entry(upper->lower.next, | ||
| 1061 | struct backref_edge, list[UPPER]); | ||
| 1062 | list_del(&edge->list[UPPER]); | ||
| 1063 | list_del(&edge->list[LOWER]); | ||
| 1064 | lower = edge->node[LOWER]; | ||
| 1065 | free_backref_edge(cache, edge); | ||
| 1066 | |||
| 1067 | if (list_empty(&lower->upper)) | ||
| 1068 | list_add(&lower->list, &useless); | ||
| 1069 | } | ||
| 1070 | __mark_block_processed(rc, upper); | ||
| 1071 | if (upper->level > 0) { | ||
| 1072 | list_add(&upper->list, &cache->detached); | ||
| 1073 | upper->detached = 1; | ||
| 1074 | } else { | ||
| 1075 | rb_erase(&upper->rb_node, &cache->rb_root); | ||
| 1076 | free_backref_node(cache, upper); | ||
| 1077 | } | ||
| 1078 | } | ||
| 818 | out: | 1079 | out: |
| 819 | btrfs_free_path(path1); | 1080 | btrfs_free_path(path1); |
| 820 | btrfs_free_path(path2); | 1081 | btrfs_free_path(path2); |
| 821 | if (err) { | 1082 | if (err) { |
| 822 | INIT_LIST_HEAD(&list); | 1083 | while (!list_empty(&useless)) { |
| 1084 | lower = list_entry(useless.next, | ||
| 1085 | struct backref_node, upper); | ||
| 1086 | list_del_init(&lower->upper); | ||
| 1087 | } | ||
| 823 | upper = node; | 1088 | upper = node; |
| 1089 | INIT_LIST_HEAD(&list); | ||
| 824 | while (upper) { | 1090 | while (upper) { |
| 825 | if (RB_EMPTY_NODE(&upper->rb_node)) { | 1091 | if (RB_EMPTY_NODE(&upper->rb_node)) { |
| 826 | list_splice_tail(&upper->upper, &list); | 1092 | list_splice_tail(&upper->upper, &list); |
| 827 | kfree(upper); | 1093 | free_backref_node(cache, upper); |
| 828 | } | 1094 | } |
| 829 | 1095 | ||
| 830 | if (list_empty(&list)) | 1096 | if (list_empty(&list)) |
| @@ -832,15 +1098,104 @@ out: | |||
| 832 | 1098 | ||
| 833 | edge = list_entry(list.next, struct backref_edge, | 1099 | edge = list_entry(list.next, struct backref_edge, |
| 834 | list[LOWER]); | 1100 | list[LOWER]); |
| 1101 | list_del(&edge->list[LOWER]); | ||
| 835 | upper = edge->node[UPPER]; | 1102 | upper = edge->node[UPPER]; |
| 836 | kfree(edge); | 1103 | free_backref_edge(cache, edge); |
| 837 | } | 1104 | } |
| 838 | return ERR_PTR(err); | 1105 | return ERR_PTR(err); |
| 839 | } | 1106 | } |
| 1107 | BUG_ON(node && node->detached); | ||
| 840 | return node; | 1108 | return node; |
| 841 | } | 1109 | } |
| 842 | 1110 | ||
| 843 | /* | 1111 | /* |
| 1112 | * helper to add backref node for the newly created snapshot. | ||
| 1113 | * the backref node is created by cloning backref node that | ||
| 1114 | * corresponds to root of source tree | ||
| 1115 | */ | ||
| 1116 | static int clone_backref_node(struct btrfs_trans_handle *trans, | ||
| 1117 | struct reloc_control *rc, | ||
| 1118 | struct btrfs_root *src, | ||
| 1119 | struct btrfs_root *dest) | ||
| 1120 | { | ||
| 1121 | struct btrfs_root *reloc_root = src->reloc_root; | ||
| 1122 | struct backref_cache *cache = &rc->backref_cache; | ||
| 1123 | struct backref_node *node = NULL; | ||
| 1124 | struct backref_node *new_node; | ||
| 1125 | struct backref_edge *edge; | ||
| 1126 | struct backref_edge *new_edge; | ||
| 1127 | struct rb_node *rb_node; | ||
| 1128 | |||
| 1129 | if (cache->last_trans > 0) | ||
| 1130 | update_backref_cache(trans, cache); | ||
| 1131 | |||
| 1132 | rb_node = tree_search(&cache->rb_root, src->commit_root->start); | ||
| 1133 | if (rb_node) { | ||
| 1134 | node = rb_entry(rb_node, struct backref_node, rb_node); | ||
| 1135 | if (node->detached) | ||
| 1136 | node = NULL; | ||
| 1137 | else | ||
| 1138 | BUG_ON(node->new_bytenr != reloc_root->node->start); | ||
| 1139 | } | ||
| 1140 | |||
| 1141 | if (!node) { | ||
| 1142 | rb_node = tree_search(&cache->rb_root, | ||
| 1143 | reloc_root->commit_root->start); | ||
| 1144 | if (rb_node) { | ||
| 1145 | node = rb_entry(rb_node, struct backref_node, | ||
| 1146 | rb_node); | ||
| 1147 | BUG_ON(node->detached); | ||
| 1148 | } | ||
| 1149 | } | ||
| 1150 | |||
| 1151 | if (!node) | ||
| 1152 | return 0; | ||
| 1153 | |||
| 1154 | new_node = alloc_backref_node(cache); | ||
| 1155 | if (!new_node) | ||
| 1156 | return -ENOMEM; | ||
| 1157 | |||
| 1158 | new_node->bytenr = dest->node->start; | ||
| 1159 | new_node->level = node->level; | ||
| 1160 | new_node->lowest = node->lowest; | ||
| 1161 | new_node->root = dest; | ||
| 1162 | |||
| 1163 | if (!node->lowest) { | ||
| 1164 | list_for_each_entry(edge, &node->lower, list[UPPER]) { | ||
| 1165 | new_edge = alloc_backref_edge(cache); | ||
| 1166 | if (!new_edge) | ||
| 1167 | goto fail; | ||
| 1168 | |||
| 1169 | new_edge->node[UPPER] = new_node; | ||
| 1170 | new_edge->node[LOWER] = edge->node[LOWER]; | ||
| 1171 | list_add_tail(&new_edge->list[UPPER], | ||
| 1172 | &new_node->lower); | ||
| 1173 | } | ||
| 1174 | } | ||
| 1175 | |||
| 1176 | rb_node = tree_insert(&cache->rb_root, new_node->bytenr, | ||
| 1177 | &new_node->rb_node); | ||
| 1178 | BUG_ON(rb_node); | ||
| 1179 | |||
| 1180 | if (!new_node->lowest) { | ||
| 1181 | list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) { | ||
| 1182 | list_add_tail(&new_edge->list[LOWER], | ||
| 1183 | &new_edge->node[LOWER]->upper); | ||
| 1184 | } | ||
| 1185 | } | ||
| 1186 | return 0; | ||
| 1187 | fail: | ||
| 1188 | while (!list_empty(&new_node->lower)) { | ||
| 1189 | new_edge = list_entry(new_node->lower.next, | ||
| 1190 | struct backref_edge, list[UPPER]); | ||
| 1191 | list_del(&new_edge->list[UPPER]); | ||
| 1192 | free_backref_edge(cache, new_edge); | ||
| 1193 | } | ||
| 1194 | free_backref_node(cache, new_node); | ||
| 1195 | return -ENOMEM; | ||
| 1196 | } | ||
| 1197 | |||
| 1198 | /* | ||
| 844 | * helper to add 'address of tree root -> reloc tree' mapping | 1199 | * helper to add 'address of tree root -> reloc tree' mapping |
| 845 | */ | 1200 | */ |
| 846 | static int __add_reloc_root(struct btrfs_root *root) | 1201 | static int __add_reloc_root(struct btrfs_root *root) |
| @@ -900,12 +1255,8 @@ static int __update_reloc_root(struct btrfs_root *root, int del) | |||
| 900 | return 0; | 1255 | return 0; |
| 901 | } | 1256 | } |
| 902 | 1257 | ||
| 903 | /* | 1258 | static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, |
| 904 | * create reloc tree for a given fs tree. reloc tree is just a | 1259 | struct btrfs_root *root, u64 objectid) |
| 905 | * snapshot of the fs tree with special root objectid. | ||
| 906 | */ | ||
| 907 | int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, | ||
| 908 | struct btrfs_root *root) | ||
| 909 | { | 1260 | { |
| 910 | struct btrfs_root *reloc_root; | 1261 | struct btrfs_root *reloc_root; |
| 911 | struct extent_buffer *eb; | 1262 | struct extent_buffer *eb; |
| @@ -913,36 +1264,45 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, | |||
| 913 | struct btrfs_key root_key; | 1264 | struct btrfs_key root_key; |
| 914 | int ret; | 1265 | int ret; |
| 915 | 1266 | ||
| 916 | if (root->reloc_root) { | ||
| 917 | reloc_root = root->reloc_root; | ||
| 918 | reloc_root->last_trans = trans->transid; | ||
| 919 | return 0; | ||
| 920 | } | ||
| 921 | |||
| 922 | if (!root->fs_info->reloc_ctl || | ||
| 923 | !root->fs_info->reloc_ctl->create_reloc_root || | ||
| 924 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) | ||
| 925 | return 0; | ||
| 926 | |||
| 927 | root_item = kmalloc(sizeof(*root_item), GFP_NOFS); | 1267 | root_item = kmalloc(sizeof(*root_item), GFP_NOFS); |
| 928 | BUG_ON(!root_item); | 1268 | BUG_ON(!root_item); |
| 929 | 1269 | ||
| 930 | root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; | 1270 | root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; |
| 931 | root_key.type = BTRFS_ROOT_ITEM_KEY; | 1271 | root_key.type = BTRFS_ROOT_ITEM_KEY; |
| 932 | root_key.offset = root->root_key.objectid; | 1272 | root_key.offset = objectid; |
| 933 | 1273 | ||
| 934 | ret = btrfs_copy_root(trans, root, root->commit_root, &eb, | 1274 | if (root->root_key.objectid == objectid) { |
| 935 | BTRFS_TREE_RELOC_OBJECTID); | 1275 | /* called by btrfs_init_reloc_root */ |
| 936 | BUG_ON(ret); | 1276 | ret = btrfs_copy_root(trans, root, root->commit_root, &eb, |
| 1277 | BTRFS_TREE_RELOC_OBJECTID); | ||
| 1278 | BUG_ON(ret); | ||
| 1279 | |||
| 1280 | btrfs_set_root_last_snapshot(&root->root_item, | ||
| 1281 | trans->transid - 1); | ||
| 1282 | } else { | ||
| 1283 | /* | ||
| 1284 | * called by btrfs_reloc_post_snapshot_hook. | ||
| 1285 | * the source tree is a reloc tree, all tree blocks | ||
| 1286 | * modified after it was created have RELOC flag | ||
| 1287 | * set in their headers. so it's OK to not update | ||
| 1288 | * the 'last_snapshot'. | ||
| 1289 | */ | ||
| 1290 | ret = btrfs_copy_root(trans, root, root->node, &eb, | ||
| 1291 | BTRFS_TREE_RELOC_OBJECTID); | ||
| 1292 | BUG_ON(ret); | ||
| 1293 | } | ||
| 937 | 1294 | ||
| 938 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid - 1); | ||
| 939 | memcpy(root_item, &root->root_item, sizeof(*root_item)); | 1295 | memcpy(root_item, &root->root_item, sizeof(*root_item)); |
| 940 | btrfs_set_root_refs(root_item, 1); | ||
| 941 | btrfs_set_root_bytenr(root_item, eb->start); | 1296 | btrfs_set_root_bytenr(root_item, eb->start); |
| 942 | btrfs_set_root_level(root_item, btrfs_header_level(eb)); | 1297 | btrfs_set_root_level(root_item, btrfs_header_level(eb)); |
| 943 | btrfs_set_root_generation(root_item, trans->transid); | 1298 | btrfs_set_root_generation(root_item, trans->transid); |
| 944 | memset(&root_item->drop_progress, 0, sizeof(struct btrfs_disk_key)); | 1299 | |
| 945 | root_item->drop_level = 0; | 1300 | if (root->root_key.objectid == objectid) { |
| 1301 | btrfs_set_root_refs(root_item, 0); | ||
| 1302 | memset(&root_item->drop_progress, 0, | ||
| 1303 | sizeof(struct btrfs_disk_key)); | ||
| 1304 | root_item->drop_level = 0; | ||
| 1305 | } | ||
| 946 | 1306 | ||
| 947 | btrfs_tree_unlock(eb); | 1307 | btrfs_tree_unlock(eb); |
| 948 | free_extent_buffer(eb); | 1308 | free_extent_buffer(eb); |
| @@ -956,6 +1316,37 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, | |||
| 956 | &root_key); | 1316 | &root_key); |
| 957 | BUG_ON(IS_ERR(reloc_root)); | 1317 | BUG_ON(IS_ERR(reloc_root)); |
| 958 | reloc_root->last_trans = trans->transid; | 1318 | reloc_root->last_trans = trans->transid; |
| 1319 | return reloc_root; | ||
| 1320 | } | ||
| 1321 | |||
| 1322 | /* | ||
| 1323 | * create reloc tree for a given fs tree. reloc tree is just a | ||
| 1324 | * snapshot of the fs tree with special root objectid. | ||
| 1325 | */ | ||
| 1326 | int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, | ||
| 1327 | struct btrfs_root *root) | ||
| 1328 | { | ||
| 1329 | struct btrfs_root *reloc_root; | ||
| 1330 | struct reloc_control *rc = root->fs_info->reloc_ctl; | ||
| 1331 | int clear_rsv = 0; | ||
| 1332 | |||
| 1333 | if (root->reloc_root) { | ||
| 1334 | reloc_root = root->reloc_root; | ||
| 1335 | reloc_root->last_trans = trans->transid; | ||
| 1336 | return 0; | ||
| 1337 | } | ||
| 1338 | |||
| 1339 | if (!rc || !rc->create_reloc_tree || | ||
| 1340 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) | ||
| 1341 | return 0; | ||
| 1342 | |||
| 1343 | if (!trans->block_rsv) { | ||
| 1344 | trans->block_rsv = rc->block_rsv; | ||
| 1345 | clear_rsv = 1; | ||
| 1346 | } | ||
| 1347 | reloc_root = create_reloc_root(trans, root, root->root_key.objectid); | ||
| 1348 | if (clear_rsv) | ||
| 1349 | trans->block_rsv = NULL; | ||
| 959 | 1350 | ||
| 960 | __add_reloc_root(reloc_root); | 1351 | __add_reloc_root(reloc_root); |
| 961 | root->reloc_root = reloc_root; | 1352 | root->reloc_root = reloc_root; |
| @@ -979,7 +1370,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, | |||
| 979 | reloc_root = root->reloc_root; | 1370 | reloc_root = root->reloc_root; |
| 980 | root_item = &reloc_root->root_item; | 1371 | root_item = &reloc_root->root_item; |
| 981 | 1372 | ||
| 982 | if (btrfs_root_refs(root_item) == 0) { | 1373 | if (root->fs_info->reloc_ctl->merge_reloc_tree && |
| 1374 | btrfs_root_refs(root_item) == 0) { | ||
| 983 | root->reloc_root = NULL; | 1375 | root->reloc_root = NULL; |
| 984 | del = 1; | 1376 | del = 1; |
| 985 | } | 1377 | } |
| @@ -1101,8 +1493,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr, | |||
| 1101 | goto out; | 1493 | goto out; |
| 1102 | } | 1494 | } |
| 1103 | 1495 | ||
| 1104 | if (new_bytenr) | 1496 | *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); |
| 1105 | *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
| 1106 | ret = 0; | 1497 | ret = 0; |
| 1107 | out: | 1498 | out: |
| 1108 | btrfs_free_path(path); | 1499 | btrfs_free_path(path); |
| @@ -1113,19 +1504,18 @@ out: | |||
| 1113 | * update file extent items in the tree leaf to point to | 1504 | * update file extent items in the tree leaf to point to |
| 1114 | * the new locations. | 1505 | * the new locations. |
| 1115 | */ | 1506 | */ |
| 1116 | static int replace_file_extents(struct btrfs_trans_handle *trans, | 1507 | static noinline_for_stack |
| 1117 | struct reloc_control *rc, | 1508 | int replace_file_extents(struct btrfs_trans_handle *trans, |
| 1118 | struct btrfs_root *root, | 1509 | struct reloc_control *rc, |
| 1119 | struct extent_buffer *leaf, | 1510 | struct btrfs_root *root, |
| 1120 | struct list_head *inode_list) | 1511 | struct extent_buffer *leaf) |
| 1121 | { | 1512 | { |
| 1122 | struct btrfs_key key; | 1513 | struct btrfs_key key; |
| 1123 | struct btrfs_file_extent_item *fi; | 1514 | struct btrfs_file_extent_item *fi; |
| 1124 | struct inode *inode = NULL; | 1515 | struct inode *inode = NULL; |
| 1125 | struct inodevec *ivec = NULL; | ||
| 1126 | u64 parent; | 1516 | u64 parent; |
| 1127 | u64 bytenr; | 1517 | u64 bytenr; |
| 1128 | u64 new_bytenr; | 1518 | u64 new_bytenr = 0; |
| 1129 | u64 num_bytes; | 1519 | u64 num_bytes; |
| 1130 | u64 end; | 1520 | u64 end; |
| 1131 | u32 nritems; | 1521 | u32 nritems; |
| @@ -1165,21 +1555,12 @@ static int replace_file_extents(struct btrfs_trans_handle *trans, | |||
| 1165 | * to complete and drop the extent cache | 1555 | * to complete and drop the extent cache |
| 1166 | */ | 1556 | */ |
| 1167 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { | 1557 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { |
| 1168 | if (!ivec || ivec->nr == INODEVEC_SIZE) { | ||
| 1169 | ivec = kmalloc(sizeof(*ivec), GFP_NOFS); | ||
| 1170 | BUG_ON(!ivec); | ||
| 1171 | ivec->nr = 0; | ||
| 1172 | list_add_tail(&ivec->list, inode_list); | ||
| 1173 | } | ||
| 1174 | if (first) { | 1558 | if (first) { |
| 1175 | inode = find_next_inode(root, key.objectid); | 1559 | inode = find_next_inode(root, key.objectid); |
| 1176 | if (inode) | ||
| 1177 | ivec->inode[ivec->nr++] = inode; | ||
| 1178 | first = 0; | 1560 | first = 0; |
| 1179 | } else if (inode && inode->i_ino < key.objectid) { | 1561 | } else if (inode && inode->i_ino < key.objectid) { |
| 1562 | btrfs_add_delayed_iput(inode); | ||
| 1180 | inode = find_next_inode(root, key.objectid); | 1563 | inode = find_next_inode(root, key.objectid); |
| 1181 | if (inode) | ||
| 1182 | ivec->inode[ivec->nr++] = inode; | ||
| 1183 | } | 1564 | } |
| 1184 | if (inode && inode->i_ino == key.objectid) { | 1565 | if (inode && inode->i_ino == key.objectid) { |
| 1185 | end = key.offset + | 1566 | end = key.offset + |
| @@ -1203,8 +1584,10 @@ static int replace_file_extents(struct btrfs_trans_handle *trans, | |||
| 1203 | 1584 | ||
| 1204 | ret = get_new_location(rc->data_inode, &new_bytenr, | 1585 | ret = get_new_location(rc->data_inode, &new_bytenr, |
| 1205 | bytenr, num_bytes); | 1586 | bytenr, num_bytes); |
| 1206 | if (ret > 0) | 1587 | if (ret > 0) { |
| 1588 | WARN_ON(1); | ||
| 1207 | continue; | 1589 | continue; |
| 1590 | } | ||
| 1208 | BUG_ON(ret < 0); | 1591 | BUG_ON(ret < 0); |
| 1209 | 1592 | ||
| 1210 | btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); | 1593 | btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); |
| @@ -1224,6 +1607,8 @@ static int replace_file_extents(struct btrfs_trans_handle *trans, | |||
| 1224 | } | 1607 | } |
| 1225 | if (dirty) | 1608 | if (dirty) |
| 1226 | btrfs_mark_buffer_dirty(leaf); | 1609 | btrfs_mark_buffer_dirty(leaf); |
| 1610 | if (inode) | ||
| 1611 | btrfs_add_delayed_iput(inode); | ||
| 1227 | return 0; | 1612 | return 0; |
| 1228 | } | 1613 | } |
| 1229 | 1614 | ||
| @@ -1247,11 +1632,11 @@ int memcmp_node_keys(struct extent_buffer *eb, int slot, | |||
| 1247 | * if no block got replaced, 0 is returned. if there are other | 1632 | * if no block got replaced, 0 is returned. if there are other |
| 1248 | * errors, a negative error number is returned. | 1633 | * errors, a negative error number is returned. |
| 1249 | */ | 1634 | */ |
| 1250 | static int replace_path(struct btrfs_trans_handle *trans, | 1635 | static noinline_for_stack |
| 1251 | struct btrfs_root *dest, struct btrfs_root *src, | 1636 | int replace_path(struct btrfs_trans_handle *trans, |
| 1252 | struct btrfs_path *path, struct btrfs_key *next_key, | 1637 | struct btrfs_root *dest, struct btrfs_root *src, |
| 1253 | struct extent_buffer **leaf, | 1638 | struct btrfs_path *path, struct btrfs_key *next_key, |
| 1254 | int lowest_level, int max_level) | 1639 | int lowest_level, int max_level) |
| 1255 | { | 1640 | { |
| 1256 | struct extent_buffer *eb; | 1641 | struct extent_buffer *eb; |
| 1257 | struct extent_buffer *parent; | 1642 | struct extent_buffer *parent; |
| @@ -1262,16 +1647,16 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
| 1262 | u64 new_ptr_gen; | 1647 | u64 new_ptr_gen; |
| 1263 | u64 last_snapshot; | 1648 | u64 last_snapshot; |
| 1264 | u32 blocksize; | 1649 | u32 blocksize; |
| 1650 | int cow = 0; | ||
| 1265 | int level; | 1651 | int level; |
| 1266 | int ret; | 1652 | int ret; |
| 1267 | int slot; | 1653 | int slot; |
| 1268 | 1654 | ||
| 1269 | BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); | 1655 | BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); |
| 1270 | BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID); | 1656 | BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID); |
| 1271 | BUG_ON(lowest_level > 1 && leaf); | ||
| 1272 | 1657 | ||
| 1273 | last_snapshot = btrfs_root_last_snapshot(&src->root_item); | 1658 | last_snapshot = btrfs_root_last_snapshot(&src->root_item); |
| 1274 | 1659 | again: | |
| 1275 | slot = path->slots[lowest_level]; | 1660 | slot = path->slots[lowest_level]; |
| 1276 | btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot); | 1661 | btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot); |
| 1277 | 1662 | ||
| @@ -1285,8 +1670,10 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
| 1285 | return 0; | 1670 | return 0; |
| 1286 | } | 1671 | } |
| 1287 | 1672 | ||
| 1288 | ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb); | 1673 | if (cow) { |
| 1289 | BUG_ON(ret); | 1674 | ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb); |
| 1675 | BUG_ON(ret); | ||
| 1676 | } | ||
| 1290 | btrfs_set_lock_blocking(eb); | 1677 | btrfs_set_lock_blocking(eb); |
| 1291 | 1678 | ||
| 1292 | if (next_key) { | 1679 | if (next_key) { |
| @@ -1330,7 +1717,7 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
| 1330 | 1717 | ||
| 1331 | if (new_bytenr == 0 || old_ptr_gen > last_snapshot || | 1718 | if (new_bytenr == 0 || old_ptr_gen > last_snapshot || |
| 1332 | memcmp_node_keys(parent, slot, path, level)) { | 1719 | memcmp_node_keys(parent, slot, path, level)) { |
| 1333 | if (level <= lowest_level && !leaf) { | 1720 | if (level <= lowest_level) { |
| 1334 | ret = 0; | 1721 | ret = 0; |
| 1335 | break; | 1722 | break; |
| 1336 | } | 1723 | } |
| @@ -1338,16 +1725,12 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
| 1338 | eb = read_tree_block(dest, old_bytenr, blocksize, | 1725 | eb = read_tree_block(dest, old_bytenr, blocksize, |
| 1339 | old_ptr_gen); | 1726 | old_ptr_gen); |
| 1340 | btrfs_tree_lock(eb); | 1727 | btrfs_tree_lock(eb); |
| 1341 | ret = btrfs_cow_block(trans, dest, eb, parent, | 1728 | if (cow) { |
| 1342 | slot, &eb); | 1729 | ret = btrfs_cow_block(trans, dest, eb, parent, |
| 1343 | BUG_ON(ret); | 1730 | slot, &eb); |
| 1344 | btrfs_set_lock_blocking(eb); | 1731 | BUG_ON(ret); |
| 1345 | |||
| 1346 | if (level <= lowest_level) { | ||
| 1347 | *leaf = eb; | ||
| 1348 | ret = 0; | ||
| 1349 | break; | ||
| 1350 | } | 1732 | } |
| 1733 | btrfs_set_lock_blocking(eb); | ||
| 1351 | 1734 | ||
| 1352 | btrfs_tree_unlock(parent); | 1735 | btrfs_tree_unlock(parent); |
| 1353 | free_extent_buffer(parent); | 1736 | free_extent_buffer(parent); |
| @@ -1356,6 +1739,13 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
| 1356 | continue; | 1739 | continue; |
| 1357 | } | 1740 | } |
| 1358 | 1741 | ||
| 1742 | if (!cow) { | ||
| 1743 | btrfs_tree_unlock(parent); | ||
| 1744 | free_extent_buffer(parent); | ||
| 1745 | cow = 1; | ||
| 1746 | goto again; | ||
| 1747 | } | ||
| 1748 | |||
| 1359 | btrfs_node_key_to_cpu(path->nodes[level], &key, | 1749 | btrfs_node_key_to_cpu(path->nodes[level], &key, |
| 1360 | path->slots[level]); | 1750 | path->slots[level]); |
| 1361 | btrfs_release_path(src, path); | 1751 | btrfs_release_path(src, path); |
| @@ -1561,20 +1951,6 @@ static int invalidate_extent_cache(struct btrfs_root *root, | |||
| 1561 | return 0; | 1951 | return 0; |
| 1562 | } | 1952 | } |
| 1563 | 1953 | ||
| 1564 | static void put_inodes(struct list_head *list) | ||
| 1565 | { | ||
| 1566 | struct inodevec *ivec; | ||
| 1567 | while (!list_empty(list)) { | ||
| 1568 | ivec = list_entry(list->next, struct inodevec, list); | ||
| 1569 | list_del(&ivec->list); | ||
| 1570 | while (ivec->nr > 0) { | ||
| 1571 | ivec->nr--; | ||
| 1572 | iput(ivec->inode[ivec->nr]); | ||
| 1573 | } | ||
| 1574 | kfree(ivec); | ||
| 1575 | } | ||
| 1576 | } | ||
| 1577 | |||
| 1578 | static int find_next_key(struct btrfs_path *path, int level, | 1954 | static int find_next_key(struct btrfs_path *path, int level, |
| 1579 | struct btrfs_key *key) | 1955 | struct btrfs_key *key) |
| 1580 | 1956 | ||
| @@ -1607,13 +1983,14 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
| 1607 | struct btrfs_root *reloc_root; | 1983 | struct btrfs_root *reloc_root; |
| 1608 | struct btrfs_root_item *root_item; | 1984 | struct btrfs_root_item *root_item; |
| 1609 | struct btrfs_path *path; | 1985 | struct btrfs_path *path; |
| 1610 | struct extent_buffer *leaf = NULL; | 1986 | struct extent_buffer *leaf; |
| 1611 | unsigned long nr; | 1987 | unsigned long nr; |
| 1612 | int level; | 1988 | int level; |
| 1613 | int max_level; | 1989 | int max_level; |
| 1614 | int replaced = 0; | 1990 | int replaced = 0; |
| 1615 | int ret; | 1991 | int ret; |
| 1616 | int err = 0; | 1992 | int err = 0; |
| 1993 | u32 min_reserved; | ||
| 1617 | 1994 | ||
| 1618 | path = btrfs_alloc_path(); | 1995 | path = btrfs_alloc_path(); |
| 1619 | if (!path) | 1996 | if (!path) |
| @@ -1647,34 +2024,23 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
| 1647 | btrfs_unlock_up_safe(path, 0); | 2024 | btrfs_unlock_up_safe(path, 0); |
| 1648 | } | 2025 | } |
| 1649 | 2026 | ||
| 1650 | if (level == 0 && rc->stage == UPDATE_DATA_PTRS) { | 2027 | min_reserved = root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; |
| 1651 | trans = btrfs_start_transaction(root, 1); | 2028 | memset(&next_key, 0, sizeof(next_key)); |
| 1652 | 2029 | ||
| 1653 | leaf = path->nodes[0]; | 2030 | while (1) { |
| 1654 | btrfs_item_key_to_cpu(leaf, &key, 0); | 2031 | trans = btrfs_start_transaction(root, 0); |
| 1655 | btrfs_release_path(reloc_root, path); | 2032 | trans->block_rsv = rc->block_rsv; |
| 1656 | 2033 | ||
| 1657 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | 2034 | ret = btrfs_block_rsv_check(trans, root, rc->block_rsv, |
| 1658 | if (ret < 0) { | 2035 | min_reserved, 0); |
| 1659 | err = ret; | 2036 | if (ret) { |
| 1660 | goto out; | 2037 | BUG_ON(ret != -EAGAIN); |
| 2038 | ret = btrfs_commit_transaction(trans, root); | ||
| 2039 | BUG_ON(ret); | ||
| 2040 | continue; | ||
| 1661 | } | 2041 | } |
| 1662 | 2042 | ||
| 1663 | leaf = path->nodes[0]; | ||
| 1664 | btrfs_unlock_up_safe(path, 1); | ||
| 1665 | ret = replace_file_extents(trans, rc, root, leaf, | ||
| 1666 | &inode_list); | ||
| 1667 | if (ret < 0) | ||
| 1668 | err = ret; | ||
| 1669 | goto out; | ||
| 1670 | } | ||
| 1671 | |||
| 1672 | memset(&next_key, 0, sizeof(next_key)); | ||
| 1673 | |||
| 1674 | while (1) { | ||
| 1675 | leaf = NULL; | ||
| 1676 | replaced = 0; | 2043 | replaced = 0; |
| 1677 | trans = btrfs_start_transaction(root, 1); | ||
| 1678 | max_level = level; | 2044 | max_level = level; |
| 1679 | 2045 | ||
| 1680 | ret = walk_down_reloc_tree(reloc_root, path, &level); | 2046 | ret = walk_down_reloc_tree(reloc_root, path, &level); |
| @@ -1688,14 +2054,9 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
| 1688 | if (!find_next_key(path, level, &key) && | 2054 | if (!find_next_key(path, level, &key) && |
| 1689 | btrfs_comp_cpu_keys(&next_key, &key) >= 0) { | 2055 | btrfs_comp_cpu_keys(&next_key, &key) >= 0) { |
| 1690 | ret = 0; | 2056 | ret = 0; |
| 1691 | } else if (level == 1 && rc->stage == UPDATE_DATA_PTRS) { | ||
| 1692 | ret = replace_path(trans, root, reloc_root, | ||
| 1693 | path, &next_key, &leaf, | ||
| 1694 | level, max_level); | ||
| 1695 | } else { | 2057 | } else { |
| 1696 | ret = replace_path(trans, root, reloc_root, | 2058 | ret = replace_path(trans, root, reloc_root, path, |
| 1697 | path, &next_key, NULL, | 2059 | &next_key, level, max_level); |
| 1698 | level, max_level); | ||
| 1699 | } | 2060 | } |
| 1700 | if (ret < 0) { | 2061 | if (ret < 0) { |
| 1701 | err = ret; | 2062 | err = ret; |
| @@ -1707,16 +2068,6 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
| 1707 | btrfs_node_key_to_cpu(path->nodes[level], &key, | 2068 | btrfs_node_key_to_cpu(path->nodes[level], &key, |
| 1708 | path->slots[level]); | 2069 | path->slots[level]); |
| 1709 | replaced = 1; | 2070 | replaced = 1; |
| 1710 | } else if (leaf) { | ||
| 1711 | /* | ||
| 1712 | * no block got replaced, try replacing file extents | ||
| 1713 | */ | ||
| 1714 | btrfs_item_key_to_cpu(leaf, &key, 0); | ||
| 1715 | ret = replace_file_extents(trans, rc, root, leaf, | ||
| 1716 | &inode_list); | ||
| 1717 | btrfs_tree_unlock(leaf); | ||
| 1718 | free_extent_buffer(leaf); | ||
| 1719 | BUG_ON(ret < 0); | ||
| 1720 | } | 2071 | } |
| 1721 | 2072 | ||
| 1722 | ret = walk_up_reloc_tree(reloc_root, path, &level); | 2073 | ret = walk_up_reloc_tree(reloc_root, path, &level); |
| @@ -1733,15 +2084,10 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
| 1733 | root_item->drop_level = level; | 2084 | root_item->drop_level = level; |
| 1734 | 2085 | ||
| 1735 | nr = trans->blocks_used; | 2086 | nr = trans->blocks_used; |
| 1736 | btrfs_end_transaction(trans, root); | 2087 | btrfs_end_transaction_throttle(trans, root); |
| 1737 | 2088 | ||
| 1738 | btrfs_btree_balance_dirty(root, nr); | 2089 | btrfs_btree_balance_dirty(root, nr); |
| 1739 | 2090 | ||
| 1740 | /* | ||
| 1741 | * put inodes outside transaction, otherwise we may deadlock. | ||
| 1742 | */ | ||
| 1743 | put_inodes(&inode_list); | ||
| 1744 | |||
| 1745 | if (replaced && rc->stage == UPDATE_DATA_PTRS) | 2091 | if (replaced && rc->stage == UPDATE_DATA_PTRS) |
| 1746 | invalidate_extent_cache(root, &key, &next_key); | 2092 | invalidate_extent_cache(root, &key, &next_key); |
| 1747 | } | 2093 | } |
| @@ -1764,87 +2110,125 @@ out: | |||
| 1764 | sizeof(root_item->drop_progress)); | 2110 | sizeof(root_item->drop_progress)); |
| 1765 | root_item->drop_level = 0; | 2111 | root_item->drop_level = 0; |
| 1766 | btrfs_set_root_refs(root_item, 0); | 2112 | btrfs_set_root_refs(root_item, 0); |
| 2113 | btrfs_update_reloc_root(trans, root); | ||
| 1767 | } | 2114 | } |
| 1768 | 2115 | ||
| 1769 | nr = trans->blocks_used; | 2116 | nr = trans->blocks_used; |
| 1770 | btrfs_end_transaction(trans, root); | 2117 | btrfs_end_transaction_throttle(trans, root); |
| 1771 | 2118 | ||
| 1772 | btrfs_btree_balance_dirty(root, nr); | 2119 | btrfs_btree_balance_dirty(root, nr); |
| 1773 | 2120 | ||
| 1774 | put_inodes(&inode_list); | ||
| 1775 | |||
| 1776 | if (replaced && rc->stage == UPDATE_DATA_PTRS) | 2121 | if (replaced && rc->stage == UPDATE_DATA_PTRS) |
| 1777 | invalidate_extent_cache(root, &key, &next_key); | 2122 | invalidate_extent_cache(root, &key, &next_key); |
| 1778 | 2123 | ||
| 1779 | return err; | 2124 | return err; |
| 1780 | } | 2125 | } |
| 1781 | 2126 | ||
| 1782 | /* | 2127 | static noinline_for_stack |
| 1783 | * callback for the work threads. | 2128 | int prepare_to_merge(struct reloc_control *rc, int err) |
| 1784 | * this function merges reloc tree with corresponding fs tree, | ||
| 1785 | * and then drops the reloc tree. | ||
| 1786 | */ | ||
| 1787 | static void merge_func(struct btrfs_work *work) | ||
| 1788 | { | 2129 | { |
| 1789 | struct btrfs_trans_handle *trans; | 2130 | struct btrfs_root *root = rc->extent_root; |
| 1790 | struct btrfs_root *root; | ||
| 1791 | struct btrfs_root *reloc_root; | 2131 | struct btrfs_root *reloc_root; |
| 1792 | struct async_merge *async; | 2132 | struct btrfs_trans_handle *trans; |
| 2133 | LIST_HEAD(reloc_roots); | ||
| 2134 | u64 num_bytes = 0; | ||
| 2135 | int ret; | ||
| 2136 | int retries = 0; | ||
| 2137 | |||
| 2138 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 2139 | rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; | ||
| 2140 | rc->merging_rsv_size += rc->nodes_relocated * 2; | ||
| 2141 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 2142 | again: | ||
| 2143 | if (!err) { | ||
| 2144 | num_bytes = rc->merging_rsv_size; | ||
| 2145 | ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv, | ||
| 2146 | num_bytes, &retries); | ||
| 2147 | if (ret) | ||
| 2148 | err = ret; | ||
| 2149 | } | ||
| 2150 | |||
| 2151 | trans = btrfs_join_transaction(rc->extent_root, 1); | ||
| 2152 | |||
| 2153 | if (!err) { | ||
| 2154 | if (num_bytes != rc->merging_rsv_size) { | ||
| 2155 | btrfs_end_transaction(trans, rc->extent_root); | ||
| 2156 | btrfs_block_rsv_release(rc->extent_root, | ||
| 2157 | rc->block_rsv, num_bytes); | ||
| 2158 | retries = 0; | ||
| 2159 | goto again; | ||
| 2160 | } | ||
| 2161 | } | ||
| 1793 | 2162 | ||
| 1794 | async = container_of(work, struct async_merge, work); | 2163 | rc->merge_reloc_tree = 1; |
| 1795 | reloc_root = async->root; | 2164 | |
| 2165 | while (!list_empty(&rc->reloc_roots)) { | ||
| 2166 | reloc_root = list_entry(rc->reloc_roots.next, | ||
| 2167 | struct btrfs_root, root_list); | ||
| 2168 | list_del_init(&reloc_root->root_list); | ||
| 1796 | 2169 | ||
| 1797 | if (btrfs_root_refs(&reloc_root->root_item) > 0) { | ||
| 1798 | root = read_fs_root(reloc_root->fs_info, | 2170 | root = read_fs_root(reloc_root->fs_info, |
| 1799 | reloc_root->root_key.offset); | 2171 | reloc_root->root_key.offset); |
| 1800 | BUG_ON(IS_ERR(root)); | 2172 | BUG_ON(IS_ERR(root)); |
| 1801 | BUG_ON(root->reloc_root != reloc_root); | 2173 | BUG_ON(root->reloc_root != reloc_root); |
| 1802 | 2174 | ||
| 1803 | merge_reloc_root(async->rc, root); | 2175 | /* |
| 1804 | 2176 | * set reference count to 1, so btrfs_recover_relocation | |
| 1805 | trans = btrfs_start_transaction(root, 1); | 2177 | * knows it should resumes merging |
| 2178 | */ | ||
| 2179 | if (!err) | ||
| 2180 | btrfs_set_root_refs(&reloc_root->root_item, 1); | ||
| 1806 | btrfs_update_reloc_root(trans, root); | 2181 | btrfs_update_reloc_root(trans, root); |
| 1807 | btrfs_end_transaction(trans, root); | ||
| 1808 | } | ||
| 1809 | 2182 | ||
| 1810 | btrfs_drop_snapshot(reloc_root, 0); | 2183 | list_add(&reloc_root->root_list, &reloc_roots); |
| 2184 | } | ||
| 1811 | 2185 | ||
| 1812 | if (atomic_dec_and_test(async->num_pending)) | 2186 | list_splice(&reloc_roots, &rc->reloc_roots); |
| 1813 | complete(async->done); | ||
| 1814 | 2187 | ||
| 1815 | kfree(async); | 2188 | if (!err) |
| 2189 | btrfs_commit_transaction(trans, rc->extent_root); | ||
| 2190 | else | ||
| 2191 | btrfs_end_transaction(trans, rc->extent_root); | ||
| 2192 | return err; | ||
| 1816 | } | 2193 | } |
| 1817 | 2194 | ||
| 1818 | static int merge_reloc_roots(struct reloc_control *rc) | 2195 | static noinline_for_stack |
| 2196 | int merge_reloc_roots(struct reloc_control *rc) | ||
| 1819 | { | 2197 | { |
| 1820 | struct async_merge *async; | ||
| 1821 | struct btrfs_root *root; | 2198 | struct btrfs_root *root; |
| 1822 | struct completion done; | 2199 | struct btrfs_root *reloc_root; |
| 1823 | atomic_t num_pending; | 2200 | LIST_HEAD(reloc_roots); |
| 2201 | int found = 0; | ||
| 2202 | int ret; | ||
| 2203 | again: | ||
| 2204 | root = rc->extent_root; | ||
| 2205 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 2206 | list_splice_init(&rc->reloc_roots, &reloc_roots); | ||
| 2207 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 1824 | 2208 | ||
| 1825 | init_completion(&done); | 2209 | while (!list_empty(&reloc_roots)) { |
| 1826 | atomic_set(&num_pending, 1); | 2210 | found = 1; |
| 2211 | reloc_root = list_entry(reloc_roots.next, | ||
| 2212 | struct btrfs_root, root_list); | ||
| 1827 | 2213 | ||
| 1828 | while (!list_empty(&rc->reloc_roots)) { | 2214 | if (btrfs_root_refs(&reloc_root->root_item) > 0) { |
| 1829 | root = list_entry(rc->reloc_roots.next, | 2215 | root = read_fs_root(reloc_root->fs_info, |
| 1830 | struct btrfs_root, root_list); | 2216 | reloc_root->root_key.offset); |
| 1831 | list_del_init(&root->root_list); | 2217 | BUG_ON(IS_ERR(root)); |
| 2218 | BUG_ON(root->reloc_root != reloc_root); | ||
| 1832 | 2219 | ||
| 1833 | async = kmalloc(sizeof(*async), GFP_NOFS); | 2220 | ret = merge_reloc_root(rc, root); |
| 1834 | BUG_ON(!async); | 2221 | BUG_ON(ret); |
| 1835 | async->work.func = merge_func; | 2222 | } else { |
| 1836 | async->work.flags = 0; | 2223 | list_del_init(&reloc_root->root_list); |
| 1837 | async->rc = rc; | 2224 | } |
| 1838 | async->root = root; | 2225 | btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0); |
| 1839 | async->done = &done; | ||
| 1840 | async->num_pending = &num_pending; | ||
| 1841 | atomic_inc(&num_pending); | ||
| 1842 | btrfs_queue_worker(&rc->workers, &async->work); | ||
| 1843 | } | 2226 | } |
| 1844 | 2227 | ||
| 1845 | if (!atomic_dec_and_test(&num_pending)) | 2228 | if (found) { |
| 1846 | wait_for_completion(&done); | 2229 | found = 0; |
| 1847 | 2230 | goto again; | |
| 2231 | } | ||
| 1848 | BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); | 2232 | BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); |
| 1849 | return 0; | 2233 | return 0; |
| 1850 | } | 2234 | } |
| @@ -1875,119 +2259,169 @@ static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans, | |||
| 1875 | return btrfs_record_root_in_trans(trans, root); | 2259 | return btrfs_record_root_in_trans(trans, root); |
| 1876 | } | 2260 | } |
| 1877 | 2261 | ||
| 1878 | /* | 2262 | static noinline_for_stack |
| 1879 | * select one tree from trees that references the block. | 2263 | struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, |
| 1880 | * for blocks in refernce counted trees, we preper reloc tree. | 2264 | struct reloc_control *rc, |
| 1881 | * if no reloc tree found and reloc_only is true, NULL is returned. | 2265 | struct backref_node *node, |
| 1882 | */ | 2266 | struct backref_edge *edges[], int *nr) |
| 1883 | static struct btrfs_root *__select_one_root(struct btrfs_trans_handle *trans, | ||
| 1884 | struct backref_node *node, | ||
| 1885 | struct backref_edge *edges[], | ||
| 1886 | int *nr, int reloc_only) | ||
| 1887 | { | 2267 | { |
| 1888 | struct backref_node *next; | 2268 | struct backref_node *next; |
| 1889 | struct btrfs_root *root; | 2269 | struct btrfs_root *root; |
| 1890 | int index; | 2270 | int index = 0; |
| 1891 | int loop = 0; | 2271 | |
| 1892 | again: | ||
| 1893 | index = 0; | ||
| 1894 | next = node; | 2272 | next = node; |
| 1895 | while (1) { | 2273 | while (1) { |
| 1896 | cond_resched(); | 2274 | cond_resched(); |
| 1897 | next = walk_up_backref(next, edges, &index); | 2275 | next = walk_up_backref(next, edges, &index); |
| 1898 | root = next->root; | 2276 | root = next->root; |
| 1899 | if (!root) { | 2277 | BUG_ON(!root); |
| 1900 | BUG_ON(!node->old_root); | 2278 | BUG_ON(!root->ref_cows); |
| 1901 | goto skip; | ||
| 1902 | } | ||
| 1903 | |||
| 1904 | /* no other choice for non-refernce counted tree */ | ||
| 1905 | if (!root->ref_cows) { | ||
| 1906 | BUG_ON(reloc_only); | ||
| 1907 | break; | ||
| 1908 | } | ||
| 1909 | 2279 | ||
| 1910 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { | 2280 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { |
| 1911 | record_reloc_root_in_trans(trans, root); | 2281 | record_reloc_root_in_trans(trans, root); |
| 1912 | break; | 2282 | break; |
| 1913 | } | 2283 | } |
| 1914 | 2284 | ||
| 1915 | if (loop) { | 2285 | btrfs_record_root_in_trans(trans, root); |
| 1916 | btrfs_record_root_in_trans(trans, root); | 2286 | root = root->reloc_root; |
| 2287 | |||
| 2288 | if (next->new_bytenr != root->node->start) { | ||
| 2289 | BUG_ON(next->new_bytenr); | ||
| 2290 | BUG_ON(!list_empty(&next->list)); | ||
| 2291 | next->new_bytenr = root->node->start; | ||
| 2292 | next->root = root; | ||
| 2293 | list_add_tail(&next->list, | ||
| 2294 | &rc->backref_cache.changed); | ||
| 2295 | __mark_block_processed(rc, next); | ||
| 1917 | break; | 2296 | break; |
| 1918 | } | 2297 | } |
| 1919 | 2298 | ||
| 1920 | if (reloc_only || next != node) { | 2299 | WARN_ON(1); |
| 1921 | if (!root->reloc_root) | ||
| 1922 | btrfs_record_root_in_trans(trans, root); | ||
| 1923 | root = root->reloc_root; | ||
| 1924 | /* | ||
| 1925 | * if the reloc tree was created in current | ||
| 1926 | * transation, there is no node in backref tree | ||
| 1927 | * corresponds to the root of the reloc tree. | ||
| 1928 | */ | ||
| 1929 | if (btrfs_root_last_snapshot(&root->root_item) == | ||
| 1930 | trans->transid - 1) | ||
| 1931 | break; | ||
| 1932 | } | ||
| 1933 | skip: | ||
| 1934 | root = NULL; | 2300 | root = NULL; |
| 1935 | next = walk_down_backref(edges, &index); | 2301 | next = walk_down_backref(edges, &index); |
| 1936 | if (!next || next->level <= node->level) | 2302 | if (!next || next->level <= node->level) |
| 1937 | break; | 2303 | break; |
| 1938 | } | 2304 | } |
| 2305 | if (!root) | ||
| 2306 | return NULL; | ||
| 1939 | 2307 | ||
| 1940 | if (!root && !loop && !reloc_only) { | 2308 | *nr = index; |
| 1941 | loop = 1; | 2309 | next = node; |
| 1942 | goto again; | 2310 | /* setup backref node path for btrfs_reloc_cow_block */ |
| 2311 | while (1) { | ||
| 2312 | rc->backref_cache.path[next->level] = next; | ||
| 2313 | if (--index < 0) | ||
| 2314 | break; | ||
| 2315 | next = edges[index]->node[UPPER]; | ||
| 1943 | } | 2316 | } |
| 1944 | |||
| 1945 | if (root) | ||
| 1946 | *nr = index; | ||
| 1947 | else | ||
| 1948 | *nr = 0; | ||
| 1949 | |||
| 1950 | return root; | 2317 | return root; |
| 1951 | } | 2318 | } |
| 1952 | 2319 | ||
| 2320 | /* | ||
| 2321 | * select a tree root for relocation. return NULL if the block | ||
| 2322 | * is reference counted. we should use do_relocation() in this | ||
| 2323 | * case. return a tree root pointer if the block isn't reference | ||
| 2324 | * counted. return -ENOENT if the block is root of reloc tree. | ||
| 2325 | */ | ||
| 1953 | static noinline_for_stack | 2326 | static noinline_for_stack |
| 1954 | struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, | 2327 | struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, |
| 1955 | struct backref_node *node) | 2328 | struct backref_node *node) |
| 1956 | { | 2329 | { |
| 2330 | struct backref_node *next; | ||
| 2331 | struct btrfs_root *root; | ||
| 2332 | struct btrfs_root *fs_root = NULL; | ||
| 1957 | struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; | 2333 | struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; |
| 1958 | int nr; | 2334 | int index = 0; |
| 1959 | return __select_one_root(trans, node, edges, &nr, 0); | 2335 | |
| 2336 | next = node; | ||
| 2337 | while (1) { | ||
| 2338 | cond_resched(); | ||
| 2339 | next = walk_up_backref(next, edges, &index); | ||
| 2340 | root = next->root; | ||
| 2341 | BUG_ON(!root); | ||
| 2342 | |||
| 2343 | /* no other choice for non-refernce counted tree */ | ||
| 2344 | if (!root->ref_cows) | ||
| 2345 | return root; | ||
| 2346 | |||
| 2347 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) | ||
| 2348 | fs_root = root; | ||
| 2349 | |||
| 2350 | if (next != node) | ||
| 2351 | return NULL; | ||
| 2352 | |||
| 2353 | next = walk_down_backref(edges, &index); | ||
| 2354 | if (!next || next->level <= node->level) | ||
| 2355 | break; | ||
| 2356 | } | ||
| 2357 | |||
| 2358 | if (!fs_root) | ||
| 2359 | return ERR_PTR(-ENOENT); | ||
| 2360 | return fs_root; | ||
| 1960 | } | 2361 | } |
| 1961 | 2362 | ||
| 1962 | static noinline_for_stack | 2363 | static noinline_for_stack |
| 1963 | struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, | 2364 | u64 calcu_metadata_size(struct reloc_control *rc, |
| 1964 | struct backref_node *node, | 2365 | struct backref_node *node, int reserve) |
| 1965 | struct backref_edge *edges[], int *nr) | ||
| 1966 | { | 2366 | { |
| 1967 | return __select_one_root(trans, node, edges, nr, 1); | 2367 | struct backref_node *next = node; |
| 2368 | struct backref_edge *edge; | ||
| 2369 | struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; | ||
| 2370 | u64 num_bytes = 0; | ||
| 2371 | int index = 0; | ||
| 2372 | |||
| 2373 | BUG_ON(reserve && node->processed); | ||
| 2374 | |||
| 2375 | while (next) { | ||
| 2376 | cond_resched(); | ||
| 2377 | while (1) { | ||
| 2378 | if (next->processed && (reserve || next != node)) | ||
| 2379 | break; | ||
| 2380 | |||
| 2381 | num_bytes += btrfs_level_size(rc->extent_root, | ||
| 2382 | next->level); | ||
| 2383 | |||
| 2384 | if (list_empty(&next->upper)) | ||
| 2385 | break; | ||
| 2386 | |||
| 2387 | edge = list_entry(next->upper.next, | ||
| 2388 | struct backref_edge, list[LOWER]); | ||
| 2389 | edges[index++] = edge; | ||
| 2390 | next = edge->node[UPPER]; | ||
| 2391 | } | ||
| 2392 | next = walk_down_backref(edges, &index); | ||
| 2393 | } | ||
| 2394 | return num_bytes; | ||
| 1968 | } | 2395 | } |
| 1969 | 2396 | ||
| 1970 | static void grab_path_buffers(struct btrfs_path *path, | 2397 | static int reserve_metadata_space(struct btrfs_trans_handle *trans, |
| 1971 | struct backref_node *node, | 2398 | struct reloc_control *rc, |
| 1972 | struct backref_edge *edges[], int nr) | 2399 | struct backref_node *node) |
| 1973 | { | 2400 | { |
| 1974 | int i = 0; | 2401 | struct btrfs_root *root = rc->extent_root; |
| 1975 | while (1) { | 2402 | u64 num_bytes; |
| 1976 | drop_node_buffer(node); | 2403 | int ret; |
| 1977 | node->eb = path->nodes[node->level]; | 2404 | |
| 1978 | BUG_ON(!node->eb); | 2405 | num_bytes = calcu_metadata_size(rc, node, 1) * 2; |
| 1979 | if (path->locks[node->level]) | ||
| 1980 | node->locked = 1; | ||
| 1981 | path->nodes[node->level] = NULL; | ||
| 1982 | path->locks[node->level] = 0; | ||
| 1983 | |||
| 1984 | if (i >= nr) | ||
| 1985 | break; | ||
| 1986 | 2406 | ||
| 1987 | edges[i]->blockptr = node->eb->start; | 2407 | trans->block_rsv = rc->block_rsv; |
| 1988 | node = edges[i]->node[UPPER]; | 2408 | ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes, |
| 1989 | i++; | 2409 | &rc->block_rsv_retries); |
| 2410 | if (ret) { | ||
| 2411 | if (ret == -EAGAIN) | ||
| 2412 | rc->commit_transaction = 1; | ||
| 2413 | return ret; | ||
| 1990 | } | 2414 | } |
| 2415 | |||
| 2416 | rc->block_rsv_retries = 0; | ||
| 2417 | return 0; | ||
| 2418 | } | ||
| 2419 | |||
| 2420 | static void release_metadata_space(struct reloc_control *rc, | ||
| 2421 | struct backref_node *node) | ||
| 2422 | { | ||
| 2423 | u64 num_bytes = calcu_metadata_size(rc, node, 0) * 2; | ||
| 2424 | btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, num_bytes); | ||
| 1991 | } | 2425 | } |
| 1992 | 2426 | ||
| 1993 | /* | 2427 | /* |
| @@ -1998,6 +2432,7 @@ static void grab_path_buffers(struct btrfs_path *path, | |||
| 1998 | * in that case this function just updates pointers. | 2432 | * in that case this function just updates pointers. |
| 1999 | */ | 2433 | */ |
| 2000 | static int do_relocation(struct btrfs_trans_handle *trans, | 2434 | static int do_relocation(struct btrfs_trans_handle *trans, |
| 2435 | struct reloc_control *rc, | ||
| 2001 | struct backref_node *node, | 2436 | struct backref_node *node, |
| 2002 | struct btrfs_key *key, | 2437 | struct btrfs_key *key, |
| 2003 | struct btrfs_path *path, int lowest) | 2438 | struct btrfs_path *path, int lowest) |
| @@ -2018,18 +2453,25 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
| 2018 | BUG_ON(lowest && node->eb); | 2453 | BUG_ON(lowest && node->eb); |
| 2019 | 2454 | ||
| 2020 | path->lowest_level = node->level + 1; | 2455 | path->lowest_level = node->level + 1; |
| 2456 | rc->backref_cache.path[node->level] = node; | ||
| 2021 | list_for_each_entry(edge, &node->upper, list[LOWER]) { | 2457 | list_for_each_entry(edge, &node->upper, list[LOWER]) { |
| 2022 | cond_resched(); | 2458 | cond_resched(); |
| 2023 | if (node->eb && node->eb->start == edge->blockptr) | ||
| 2024 | continue; | ||
| 2025 | 2459 | ||
| 2026 | upper = edge->node[UPPER]; | 2460 | upper = edge->node[UPPER]; |
| 2027 | root = select_reloc_root(trans, upper, edges, &nr); | 2461 | root = select_reloc_root(trans, rc, upper, edges, &nr); |
| 2028 | if (!root) | 2462 | BUG_ON(!root); |
| 2029 | continue; | 2463 | |
| 2030 | 2464 | if (upper->eb && !upper->locked) { | |
| 2031 | if (upper->eb && !upper->locked) | 2465 | if (!lowest) { |
| 2466 | ret = btrfs_bin_search(upper->eb, key, | ||
| 2467 | upper->level, &slot); | ||
| 2468 | BUG_ON(ret); | ||
| 2469 | bytenr = btrfs_node_blockptr(upper->eb, slot); | ||
| 2470 | if (node->eb->start == bytenr) | ||
| 2471 | goto next; | ||
| 2472 | } | ||
| 2032 | drop_node_buffer(upper); | 2473 | drop_node_buffer(upper); |
| 2474 | } | ||
| 2033 | 2475 | ||
| 2034 | if (!upper->eb) { | 2476 | if (!upper->eb) { |
| 2035 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); | 2477 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); |
| @@ -2039,11 +2481,17 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
| 2039 | } | 2481 | } |
| 2040 | BUG_ON(ret > 0); | 2482 | BUG_ON(ret > 0); |
| 2041 | 2483 | ||
| 2042 | slot = path->slots[upper->level]; | 2484 | if (!upper->eb) { |
| 2485 | upper->eb = path->nodes[upper->level]; | ||
| 2486 | path->nodes[upper->level] = NULL; | ||
| 2487 | } else { | ||
| 2488 | BUG_ON(upper->eb != path->nodes[upper->level]); | ||
| 2489 | } | ||
| 2043 | 2490 | ||
| 2044 | btrfs_unlock_up_safe(path, upper->level + 1); | 2491 | upper->locked = 1; |
| 2045 | grab_path_buffers(path, upper, edges, nr); | 2492 | path->locks[upper->level] = 0; |
| 2046 | 2493 | ||
| 2494 | slot = path->slots[upper->level]; | ||
| 2047 | btrfs_release_path(NULL, path); | 2495 | btrfs_release_path(NULL, path); |
| 2048 | } else { | 2496 | } else { |
| 2049 | ret = btrfs_bin_search(upper->eb, key, upper->level, | 2497 | ret = btrfs_bin_search(upper->eb, key, upper->level, |
| @@ -2052,14 +2500,11 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
| 2052 | } | 2500 | } |
| 2053 | 2501 | ||
| 2054 | bytenr = btrfs_node_blockptr(upper->eb, slot); | 2502 | bytenr = btrfs_node_blockptr(upper->eb, slot); |
| 2055 | if (!lowest) { | 2503 | if (lowest) { |
| 2056 | if (node->eb->start == bytenr) { | 2504 | BUG_ON(bytenr != node->bytenr); |
| 2057 | btrfs_tree_unlock(upper->eb); | ||
| 2058 | upper->locked = 0; | ||
| 2059 | continue; | ||
| 2060 | } | ||
| 2061 | } else { | 2505 | } else { |
| 2062 | BUG_ON(node->bytenr != bytenr); | 2506 | if (node->eb->start == bytenr) |
| 2507 | goto next; | ||
| 2063 | } | 2508 | } |
| 2064 | 2509 | ||
| 2065 | blocksize = btrfs_level_size(root, node->level); | 2510 | blocksize = btrfs_level_size(root, node->level); |
| @@ -2071,13 +2516,13 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
| 2071 | if (!node->eb) { | 2516 | if (!node->eb) { |
| 2072 | ret = btrfs_cow_block(trans, root, eb, upper->eb, | 2517 | ret = btrfs_cow_block(trans, root, eb, upper->eb, |
| 2073 | slot, &eb); | 2518 | slot, &eb); |
| 2519 | btrfs_tree_unlock(eb); | ||
| 2520 | free_extent_buffer(eb); | ||
| 2074 | if (ret < 0) { | 2521 | if (ret < 0) { |
| 2075 | err = ret; | 2522 | err = ret; |
| 2076 | break; | 2523 | goto next; |
| 2077 | } | 2524 | } |
| 2078 | btrfs_set_lock_blocking(eb); | 2525 | BUG_ON(node->eb != eb); |
| 2079 | node->eb = eb; | ||
| 2080 | node->locked = 1; | ||
| 2081 | } else { | 2526 | } else { |
| 2082 | btrfs_set_node_blockptr(upper->eb, slot, | 2527 | btrfs_set_node_blockptr(upper->eb, slot, |
| 2083 | node->eb->start); | 2528 | node->eb->start); |
| @@ -2095,67 +2540,80 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
| 2095 | ret = btrfs_drop_subtree(trans, root, eb, upper->eb); | 2540 | ret = btrfs_drop_subtree(trans, root, eb, upper->eb); |
| 2096 | BUG_ON(ret); | 2541 | BUG_ON(ret); |
| 2097 | } | 2542 | } |
| 2098 | if (!lowest) { | 2543 | next: |
| 2099 | btrfs_tree_unlock(upper->eb); | 2544 | if (!upper->pending) |
| 2100 | upper->locked = 0; | 2545 | drop_node_buffer(upper); |
| 2101 | } | 2546 | else |
| 2547 | unlock_node_buffer(upper); | ||
| 2548 | if (err) | ||
| 2549 | break; | ||
| 2102 | } | 2550 | } |
| 2551 | |||
| 2552 | if (!err && node->pending) { | ||
| 2553 | drop_node_buffer(node); | ||
| 2554 | list_move_tail(&node->list, &rc->backref_cache.changed); | ||
| 2555 | node->pending = 0; | ||
| 2556 | } | ||
| 2557 | |||
| 2103 | path->lowest_level = 0; | 2558 | path->lowest_level = 0; |
| 2559 | BUG_ON(err == -ENOSPC); | ||
| 2104 | return err; | 2560 | return err; |
| 2105 | } | 2561 | } |
| 2106 | 2562 | ||
| 2107 | static int link_to_upper(struct btrfs_trans_handle *trans, | 2563 | static int link_to_upper(struct btrfs_trans_handle *trans, |
| 2564 | struct reloc_control *rc, | ||
| 2108 | struct backref_node *node, | 2565 | struct backref_node *node, |
| 2109 | struct btrfs_path *path) | 2566 | struct btrfs_path *path) |
| 2110 | { | 2567 | { |
| 2111 | struct btrfs_key key; | 2568 | struct btrfs_key key; |
| 2112 | if (!node->eb || list_empty(&node->upper)) | ||
| 2113 | return 0; | ||
| 2114 | 2569 | ||
| 2115 | btrfs_node_key_to_cpu(node->eb, &key, 0); | 2570 | btrfs_node_key_to_cpu(node->eb, &key, 0); |
| 2116 | return do_relocation(trans, node, &key, path, 0); | 2571 | return do_relocation(trans, rc, node, &key, path, 0); |
| 2117 | } | 2572 | } |
| 2118 | 2573 | ||
| 2119 | static int finish_pending_nodes(struct btrfs_trans_handle *trans, | 2574 | static int finish_pending_nodes(struct btrfs_trans_handle *trans, |
| 2120 | struct backref_cache *cache, | 2575 | struct reloc_control *rc, |
| 2121 | struct btrfs_path *path) | 2576 | struct btrfs_path *path, int err) |
| 2122 | { | 2577 | { |
| 2578 | LIST_HEAD(list); | ||
| 2579 | struct backref_cache *cache = &rc->backref_cache; | ||
| 2123 | struct backref_node *node; | 2580 | struct backref_node *node; |
| 2124 | int level; | 2581 | int level; |
| 2125 | int ret; | 2582 | int ret; |
| 2126 | int err = 0; | ||
| 2127 | 2583 | ||
| 2128 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { | 2584 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { |
| 2129 | while (!list_empty(&cache->pending[level])) { | 2585 | while (!list_empty(&cache->pending[level])) { |
| 2130 | node = list_entry(cache->pending[level].next, | 2586 | node = list_entry(cache->pending[level].next, |
| 2131 | struct backref_node, lower); | 2587 | struct backref_node, list); |
| 2132 | BUG_ON(node->level != level); | 2588 | list_move_tail(&node->list, &list); |
| 2589 | BUG_ON(!node->pending); | ||
| 2133 | 2590 | ||
| 2134 | ret = link_to_upper(trans, node, path); | 2591 | if (!err) { |
| 2135 | if (ret < 0) | 2592 | ret = link_to_upper(trans, rc, node, path); |
| 2136 | err = ret; | 2593 | if (ret < 0) |
| 2137 | /* | 2594 | err = ret; |
| 2138 | * this remove the node from the pending list and | 2595 | } |
| 2139 | * may add some other nodes to the level + 1 | ||
| 2140 | * pending list | ||
| 2141 | */ | ||
| 2142 | remove_backref_node(cache, node); | ||
| 2143 | } | 2596 | } |
| 2597 | list_splice_init(&list, &cache->pending[level]); | ||
| 2144 | } | 2598 | } |
| 2145 | BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root)); | ||
| 2146 | return err; | 2599 | return err; |
| 2147 | } | 2600 | } |
| 2148 | 2601 | ||
| 2149 | static void mark_block_processed(struct reloc_control *rc, | 2602 | static void mark_block_processed(struct reloc_control *rc, |
| 2150 | struct backref_node *node) | 2603 | u64 bytenr, u32 blocksize) |
| 2604 | { | ||
| 2605 | set_extent_bits(&rc->processed_blocks, bytenr, bytenr + blocksize - 1, | ||
| 2606 | EXTENT_DIRTY, GFP_NOFS); | ||
| 2607 | } | ||
| 2608 | |||
| 2609 | static void __mark_block_processed(struct reloc_control *rc, | ||
| 2610 | struct backref_node *node) | ||
| 2151 | { | 2611 | { |
| 2152 | u32 blocksize; | 2612 | u32 blocksize; |
| 2153 | if (node->level == 0 || | 2613 | if (node->level == 0 || |
| 2154 | in_block_group(node->bytenr, rc->block_group)) { | 2614 | in_block_group(node->bytenr, rc->block_group)) { |
| 2155 | blocksize = btrfs_level_size(rc->extent_root, node->level); | 2615 | blocksize = btrfs_level_size(rc->extent_root, node->level); |
| 2156 | set_extent_bits(&rc->processed_blocks, node->bytenr, | 2616 | mark_block_processed(rc, node->bytenr, blocksize); |
| 2157 | node->bytenr + blocksize - 1, EXTENT_DIRTY, | ||
| 2158 | GFP_NOFS); | ||
| 2159 | } | 2617 | } |
| 2160 | node->processed = 1; | 2618 | node->processed = 1; |
| 2161 | } | 2619 | } |
| @@ -2178,7 +2636,7 @@ static void update_processed_blocks(struct reloc_control *rc, | |||
| 2178 | if (next->processed) | 2636 | if (next->processed) |
| 2179 | break; | 2637 | break; |
| 2180 | 2638 | ||
| 2181 | mark_block_processed(rc, next); | 2639 | __mark_block_processed(rc, next); |
| 2182 | 2640 | ||
| 2183 | if (list_empty(&next->upper)) | 2641 | if (list_empty(&next->upper)) |
| 2184 | break; | 2642 | break; |
| @@ -2201,138 +2659,6 @@ static int tree_block_processed(u64 bytenr, u32 blocksize, | |||
| 2201 | return 0; | 2659 | return 0; |
| 2202 | } | 2660 | } |
| 2203 | 2661 | ||
| 2204 | /* | ||
| 2205 | * check if there are any file extent pointers in the leaf point to | ||
| 2206 | * data require processing | ||
| 2207 | */ | ||
| 2208 | static int check_file_extents(struct reloc_control *rc, | ||
| 2209 | u64 bytenr, u32 blocksize, u64 ptr_gen) | ||
| 2210 | { | ||
| 2211 | struct btrfs_key found_key; | ||
| 2212 | struct btrfs_file_extent_item *fi; | ||
| 2213 | struct extent_buffer *leaf; | ||
| 2214 | u32 nritems; | ||
| 2215 | int i; | ||
| 2216 | int ret = 0; | ||
| 2217 | |||
| 2218 | leaf = read_tree_block(rc->extent_root, bytenr, blocksize, ptr_gen); | ||
| 2219 | |||
| 2220 | nritems = btrfs_header_nritems(leaf); | ||
| 2221 | for (i = 0; i < nritems; i++) { | ||
| 2222 | cond_resched(); | ||
| 2223 | btrfs_item_key_to_cpu(leaf, &found_key, i); | ||
| 2224 | if (found_key.type != BTRFS_EXTENT_DATA_KEY) | ||
| 2225 | continue; | ||
| 2226 | fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); | ||
| 2227 | if (btrfs_file_extent_type(leaf, fi) == | ||
| 2228 | BTRFS_FILE_EXTENT_INLINE) | ||
| 2229 | continue; | ||
| 2230 | bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
| 2231 | if (bytenr == 0) | ||
| 2232 | continue; | ||
| 2233 | if (in_block_group(bytenr, rc->block_group)) { | ||
| 2234 | ret = 1; | ||
| 2235 | break; | ||
| 2236 | } | ||
| 2237 | } | ||
| 2238 | free_extent_buffer(leaf); | ||
| 2239 | return ret; | ||
| 2240 | } | ||
| 2241 | |||
| 2242 | /* | ||
| 2243 | * scan child blocks of a given block to find blocks require processing | ||
| 2244 | */ | ||
| 2245 | static int add_child_blocks(struct btrfs_trans_handle *trans, | ||
| 2246 | struct reloc_control *rc, | ||
| 2247 | struct backref_node *node, | ||
| 2248 | struct rb_root *blocks) | ||
| 2249 | { | ||
| 2250 | struct tree_block *block; | ||
| 2251 | struct rb_node *rb_node; | ||
| 2252 | u64 bytenr; | ||
| 2253 | u64 ptr_gen; | ||
| 2254 | u32 blocksize; | ||
| 2255 | u32 nritems; | ||
| 2256 | int i; | ||
| 2257 | int err = 0; | ||
| 2258 | |||
| 2259 | nritems = btrfs_header_nritems(node->eb); | ||
| 2260 | blocksize = btrfs_level_size(rc->extent_root, node->level - 1); | ||
| 2261 | for (i = 0; i < nritems; i++) { | ||
| 2262 | cond_resched(); | ||
| 2263 | bytenr = btrfs_node_blockptr(node->eb, i); | ||
| 2264 | ptr_gen = btrfs_node_ptr_generation(node->eb, i); | ||
| 2265 | if (ptr_gen == trans->transid) | ||
| 2266 | continue; | ||
| 2267 | if (!in_block_group(bytenr, rc->block_group) && | ||
| 2268 | (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS)) | ||
| 2269 | continue; | ||
| 2270 | if (tree_block_processed(bytenr, blocksize, rc)) | ||
| 2271 | continue; | ||
| 2272 | |||
| 2273 | readahead_tree_block(rc->extent_root, | ||
| 2274 | bytenr, blocksize, ptr_gen); | ||
| 2275 | } | ||
| 2276 | |||
| 2277 | for (i = 0; i < nritems; i++) { | ||
| 2278 | cond_resched(); | ||
| 2279 | bytenr = btrfs_node_blockptr(node->eb, i); | ||
| 2280 | ptr_gen = btrfs_node_ptr_generation(node->eb, i); | ||
| 2281 | if (ptr_gen == trans->transid) | ||
| 2282 | continue; | ||
| 2283 | if (!in_block_group(bytenr, rc->block_group) && | ||
| 2284 | (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS)) | ||
| 2285 | continue; | ||
| 2286 | if (tree_block_processed(bytenr, blocksize, rc)) | ||
| 2287 | continue; | ||
| 2288 | if (!in_block_group(bytenr, rc->block_group) && | ||
| 2289 | !check_file_extents(rc, bytenr, blocksize, ptr_gen)) | ||
| 2290 | continue; | ||
| 2291 | |||
| 2292 | block = kmalloc(sizeof(*block), GFP_NOFS); | ||
| 2293 | if (!block) { | ||
| 2294 | err = -ENOMEM; | ||
| 2295 | break; | ||
| 2296 | } | ||
| 2297 | block->bytenr = bytenr; | ||
| 2298 | btrfs_node_key_to_cpu(node->eb, &block->key, i); | ||
| 2299 | block->level = node->level - 1; | ||
| 2300 | block->key_ready = 1; | ||
| 2301 | rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); | ||
| 2302 | BUG_ON(rb_node); | ||
| 2303 | } | ||
| 2304 | if (err) | ||
| 2305 | free_block_list(blocks); | ||
| 2306 | return err; | ||
| 2307 | } | ||
| 2308 | |||
| 2309 | /* | ||
| 2310 | * find adjacent blocks require processing | ||
| 2311 | */ | ||
| 2312 | static noinline_for_stack | ||
| 2313 | int add_adjacent_blocks(struct btrfs_trans_handle *trans, | ||
| 2314 | struct reloc_control *rc, | ||
| 2315 | struct backref_cache *cache, | ||
| 2316 | struct rb_root *blocks, int level, | ||
| 2317 | struct backref_node **upper) | ||
| 2318 | { | ||
| 2319 | struct backref_node *node; | ||
| 2320 | int ret = 0; | ||
| 2321 | |||
| 2322 | WARN_ON(!list_empty(&cache->pending[level])); | ||
| 2323 | |||
| 2324 | if (list_empty(&cache->pending[level + 1])) | ||
| 2325 | return 1; | ||
| 2326 | |||
| 2327 | node = list_entry(cache->pending[level + 1].next, | ||
| 2328 | struct backref_node, lower); | ||
| 2329 | if (node->eb) | ||
| 2330 | ret = add_child_blocks(trans, rc, node, blocks); | ||
| 2331 | |||
| 2332 | *upper = node; | ||
| 2333 | return ret; | ||
| 2334 | } | ||
| 2335 | |||
| 2336 | static int get_tree_block_key(struct reloc_control *rc, | 2662 | static int get_tree_block_key(struct reloc_control *rc, |
| 2337 | struct tree_block *block) | 2663 | struct tree_block *block) |
| 2338 | { | 2664 | { |
| @@ -2370,40 +2696,53 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans, | |||
| 2370 | struct btrfs_path *path) | 2696 | struct btrfs_path *path) |
| 2371 | { | 2697 | { |
| 2372 | struct btrfs_root *root; | 2698 | struct btrfs_root *root; |
| 2373 | int ret; | 2699 | int release = 0; |
| 2700 | int ret = 0; | ||
| 2374 | 2701 | ||
| 2702 | if (!node) | ||
| 2703 | return 0; | ||
| 2704 | |||
| 2705 | BUG_ON(node->processed); | ||
| 2375 | root = select_one_root(trans, node); | 2706 | root = select_one_root(trans, node); |
| 2376 | if (unlikely(!root)) { | 2707 | if (root == ERR_PTR(-ENOENT)) { |
| 2377 | rc->found_old_snapshot = 1; | ||
| 2378 | update_processed_blocks(rc, node); | 2708 | update_processed_blocks(rc, node); |
| 2379 | return 0; | 2709 | goto out; |
| 2380 | } | 2710 | } |
| 2381 | 2711 | ||
| 2382 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { | 2712 | if (!root || root->ref_cows) { |
| 2383 | ret = do_relocation(trans, node, key, path, 1); | 2713 | ret = reserve_metadata_space(trans, rc, node); |
| 2384 | if (ret < 0) | 2714 | if (ret) |
| 2385 | goto out; | ||
| 2386 | if (node->level == 0 && rc->stage == UPDATE_DATA_PTRS) { | ||
| 2387 | ret = replace_file_extents(trans, rc, root, | ||
| 2388 | node->eb, NULL); | ||
| 2389 | if (ret < 0) | ||
| 2390 | goto out; | ||
| 2391 | } | ||
| 2392 | drop_node_buffer(node); | ||
| 2393 | } else if (!root->ref_cows) { | ||
| 2394 | path->lowest_level = node->level; | ||
| 2395 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); | ||
| 2396 | btrfs_release_path(root, path); | ||
| 2397 | if (ret < 0) | ||
| 2398 | goto out; | 2715 | goto out; |
| 2399 | } else if (root != node->root) { | 2716 | release = 1; |
| 2400 | WARN_ON(node->level > 0 || rc->stage != UPDATE_DATA_PTRS); | ||
| 2401 | } | 2717 | } |
| 2402 | 2718 | ||
| 2403 | update_processed_blocks(rc, node); | 2719 | if (root) { |
| 2404 | ret = 0; | 2720 | if (root->ref_cows) { |
| 2721 | BUG_ON(node->new_bytenr); | ||
| 2722 | BUG_ON(!list_empty(&node->list)); | ||
| 2723 | btrfs_record_root_in_trans(trans, root); | ||
| 2724 | root = root->reloc_root; | ||
| 2725 | node->new_bytenr = root->node->start; | ||
| 2726 | node->root = root; | ||
| 2727 | list_add_tail(&node->list, &rc->backref_cache.changed); | ||
| 2728 | } else { | ||
| 2729 | path->lowest_level = node->level; | ||
| 2730 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); | ||
| 2731 | btrfs_release_path(root, path); | ||
| 2732 | if (ret > 0) | ||
| 2733 | ret = 0; | ||
| 2734 | } | ||
| 2735 | if (!ret) | ||
| 2736 | update_processed_blocks(rc, node); | ||
| 2737 | } else { | ||
| 2738 | ret = do_relocation(trans, rc, node, key, path, 1); | ||
| 2739 | } | ||
| 2405 | out: | 2740 | out: |
| 2406 | drop_node_buffer(node); | 2741 | if (ret || node->level == 0 || node->cowonly) { |
| 2742 | if (release) | ||
| 2743 | release_metadata_space(rc, node); | ||
| 2744 | remove_backref_node(&rc->backref_cache, node); | ||
| 2745 | } | ||
| 2407 | return ret; | 2746 | return ret; |
| 2408 | } | 2747 | } |
| 2409 | 2748 | ||
| @@ -2414,12 +2753,10 @@ static noinline_for_stack | |||
| 2414 | int relocate_tree_blocks(struct btrfs_trans_handle *trans, | 2753 | int relocate_tree_blocks(struct btrfs_trans_handle *trans, |
| 2415 | struct reloc_control *rc, struct rb_root *blocks) | 2754 | struct reloc_control *rc, struct rb_root *blocks) |
| 2416 | { | 2755 | { |
| 2417 | struct backref_cache *cache; | ||
| 2418 | struct backref_node *node; | 2756 | struct backref_node *node; |
| 2419 | struct btrfs_path *path; | 2757 | struct btrfs_path *path; |
| 2420 | struct tree_block *block; | 2758 | struct tree_block *block; |
| 2421 | struct rb_node *rb_node; | 2759 | struct rb_node *rb_node; |
| 2422 | int level = -1; | ||
| 2423 | int ret; | 2760 | int ret; |
| 2424 | int err = 0; | 2761 | int err = 0; |
| 2425 | 2762 | ||
| @@ -2427,21 +2764,9 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, | |||
| 2427 | if (!path) | 2764 | if (!path) |
| 2428 | return -ENOMEM; | 2765 | return -ENOMEM; |
| 2429 | 2766 | ||
| 2430 | cache = kmalloc(sizeof(*cache), GFP_NOFS); | ||
| 2431 | if (!cache) { | ||
| 2432 | btrfs_free_path(path); | ||
| 2433 | return -ENOMEM; | ||
| 2434 | } | ||
| 2435 | |||
| 2436 | backref_cache_init(cache); | ||
| 2437 | |||
| 2438 | rb_node = rb_first(blocks); | 2767 | rb_node = rb_first(blocks); |
| 2439 | while (rb_node) { | 2768 | while (rb_node) { |
| 2440 | block = rb_entry(rb_node, struct tree_block, rb_node); | 2769 | block = rb_entry(rb_node, struct tree_block, rb_node); |
| 2441 | if (level == -1) | ||
| 2442 | level = block->level; | ||
| 2443 | else | ||
| 2444 | BUG_ON(level != block->level); | ||
| 2445 | if (!block->key_ready) | 2770 | if (!block->key_ready) |
| 2446 | reada_tree_block(rc, block); | 2771 | reada_tree_block(rc, block); |
| 2447 | rb_node = rb_next(rb_node); | 2772 | rb_node = rb_next(rb_node); |
| @@ -2459,7 +2784,7 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, | |||
| 2459 | while (rb_node) { | 2784 | while (rb_node) { |
| 2460 | block = rb_entry(rb_node, struct tree_block, rb_node); | 2785 | block = rb_entry(rb_node, struct tree_block, rb_node); |
| 2461 | 2786 | ||
| 2462 | node = build_backref_tree(rc, cache, &block->key, | 2787 | node = build_backref_tree(rc, &block->key, |
| 2463 | block->level, block->bytenr); | 2788 | block->level, block->bytenr); |
| 2464 | if (IS_ERR(node)) { | 2789 | if (IS_ERR(node)) { |
| 2465 | err = PTR_ERR(node); | 2790 | err = PTR_ERR(node); |
| @@ -2469,79 +2794,62 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, | |||
| 2469 | ret = relocate_tree_block(trans, rc, node, &block->key, | 2794 | ret = relocate_tree_block(trans, rc, node, &block->key, |
| 2470 | path); | 2795 | path); |
| 2471 | if (ret < 0) { | 2796 | if (ret < 0) { |
| 2472 | err = ret; | 2797 | if (ret != -EAGAIN || rb_node == rb_first(blocks)) |
| 2798 | err = ret; | ||
| 2473 | goto out; | 2799 | goto out; |
| 2474 | } | 2800 | } |
| 2475 | remove_backref_node(cache, node); | ||
| 2476 | rb_node = rb_next(rb_node); | 2801 | rb_node = rb_next(rb_node); |
| 2477 | } | 2802 | } |
| 2478 | 2803 | out: | |
| 2479 | if (level > 0) | ||
| 2480 | goto out; | ||
| 2481 | |||
| 2482 | free_block_list(blocks); | 2804 | free_block_list(blocks); |
| 2805 | err = finish_pending_nodes(trans, rc, path, err); | ||
| 2483 | 2806 | ||
| 2484 | /* | 2807 | btrfs_free_path(path); |
| 2485 | * now backrefs of some upper level tree blocks have been cached, | 2808 | return err; |
| 2486 | * try relocating blocks referenced by these upper level blocks. | 2809 | } |
| 2487 | */ | ||
| 2488 | while (1) { | ||
| 2489 | struct backref_node *upper = NULL; | ||
| 2490 | if (trans->transaction->in_commit || | ||
| 2491 | trans->transaction->delayed_refs.flushing) | ||
| 2492 | break; | ||
| 2493 | 2810 | ||
| 2494 | ret = add_adjacent_blocks(trans, rc, cache, blocks, level, | 2811 | static noinline_for_stack |
| 2495 | &upper); | 2812 | int prealloc_file_extent_cluster(struct inode *inode, |
| 2496 | if (ret < 0) | 2813 | struct file_extent_cluster *cluster) |
| 2497 | err = ret; | 2814 | { |
| 2498 | if (ret != 0) | 2815 | u64 alloc_hint = 0; |
| 2499 | break; | 2816 | u64 start; |
| 2817 | u64 end; | ||
| 2818 | u64 offset = BTRFS_I(inode)->index_cnt; | ||
| 2819 | u64 num_bytes; | ||
| 2820 | int nr = 0; | ||
| 2821 | int ret = 0; | ||
| 2500 | 2822 | ||
| 2501 | rb_node = rb_first(blocks); | 2823 | BUG_ON(cluster->start != cluster->boundary[0]); |
| 2502 | while (rb_node) { | 2824 | mutex_lock(&inode->i_mutex); |
| 2503 | block = rb_entry(rb_node, struct tree_block, rb_node); | ||
| 2504 | if (trans->transaction->in_commit || | ||
| 2505 | trans->transaction->delayed_refs.flushing) | ||
| 2506 | goto out; | ||
| 2507 | BUG_ON(!block->key_ready); | ||
| 2508 | node = build_backref_tree(rc, cache, &block->key, | ||
| 2509 | level, block->bytenr); | ||
| 2510 | if (IS_ERR(node)) { | ||
| 2511 | err = PTR_ERR(node); | ||
| 2512 | goto out; | ||
| 2513 | } | ||
| 2514 | 2825 | ||
| 2515 | ret = relocate_tree_block(trans, rc, node, | 2826 | ret = btrfs_check_data_free_space(inode, cluster->end + |
| 2516 | &block->key, path); | 2827 | 1 - cluster->start); |
| 2517 | if (ret < 0) { | 2828 | if (ret) |
| 2518 | err = ret; | 2829 | goto out; |
| 2519 | goto out; | ||
| 2520 | } | ||
| 2521 | remove_backref_node(cache, node); | ||
| 2522 | rb_node = rb_next(rb_node); | ||
| 2523 | } | ||
| 2524 | free_block_list(blocks); | ||
| 2525 | 2830 | ||
| 2526 | if (upper) { | 2831 | while (nr < cluster->nr) { |
| 2527 | ret = link_to_upper(trans, upper, path); | 2832 | start = cluster->boundary[nr] - offset; |
| 2528 | if (ret < 0) { | 2833 | if (nr + 1 < cluster->nr) |
| 2529 | err = ret; | 2834 | end = cluster->boundary[nr + 1] - 1 - offset; |
| 2530 | break; | 2835 | else |
| 2531 | } | 2836 | end = cluster->end - offset; |
| 2532 | remove_backref_node(cache, upper); | 2837 | |
| 2533 | } | 2838 | lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); |
| 2839 | num_bytes = end + 1 - start; | ||
| 2840 | ret = btrfs_prealloc_file_range(inode, 0, start, | ||
| 2841 | num_bytes, num_bytes, | ||
| 2842 | end + 1, &alloc_hint); | ||
| 2843 | unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
| 2844 | if (ret) | ||
| 2845 | break; | ||
| 2846 | nr++; | ||
| 2534 | } | 2847 | } |
| 2848 | btrfs_free_reserved_data_space(inode, cluster->end + | ||
| 2849 | 1 - cluster->start); | ||
| 2535 | out: | 2850 | out: |
| 2536 | free_block_list(blocks); | 2851 | mutex_unlock(&inode->i_mutex); |
| 2537 | 2852 | return ret; | |
| 2538 | ret = finish_pending_nodes(trans, cache, path); | ||
| 2539 | if (ret < 0) | ||
| 2540 | err = ret; | ||
| 2541 | |||
| 2542 | kfree(cache); | ||
| 2543 | btrfs_free_path(path); | ||
| 2544 | return err; | ||
| 2545 | } | 2853 | } |
| 2546 | 2854 | ||
| 2547 | static noinline_for_stack | 2855 | static noinline_for_stack |
| @@ -2587,7 +2895,6 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
| 2587 | u64 offset = BTRFS_I(inode)->index_cnt; | 2895 | u64 offset = BTRFS_I(inode)->index_cnt; |
| 2588 | unsigned long index; | 2896 | unsigned long index; |
| 2589 | unsigned long last_index; | 2897 | unsigned long last_index; |
| 2590 | unsigned int dirty_page = 0; | ||
| 2591 | struct page *page; | 2898 | struct page *page; |
| 2592 | struct file_ra_state *ra; | 2899 | struct file_ra_state *ra; |
| 2593 | int nr = 0; | 2900 | int nr = 0; |
| @@ -2600,21 +2907,24 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
| 2600 | if (!ra) | 2907 | if (!ra) |
| 2601 | return -ENOMEM; | 2908 | return -ENOMEM; |
| 2602 | 2909 | ||
| 2603 | index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; | 2910 | ret = prealloc_file_extent_cluster(inode, cluster); |
| 2604 | last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; | 2911 | if (ret) |
| 2912 | goto out; | ||
| 2605 | 2913 | ||
| 2606 | mutex_lock(&inode->i_mutex); | 2914 | file_ra_state_init(ra, inode->i_mapping); |
| 2607 | 2915 | ||
| 2608 | i_size_write(inode, cluster->end + 1 - offset); | ||
| 2609 | ret = setup_extent_mapping(inode, cluster->start - offset, | 2916 | ret = setup_extent_mapping(inode, cluster->start - offset, |
| 2610 | cluster->end - offset, cluster->start); | 2917 | cluster->end - offset, cluster->start); |
| 2611 | if (ret) | 2918 | if (ret) |
| 2612 | goto out_unlock; | 2919 | goto out; |
| 2613 | |||
| 2614 | file_ra_state_init(ra, inode->i_mapping); | ||
| 2615 | 2920 | ||
| 2616 | WARN_ON(cluster->start != cluster->boundary[0]); | 2921 | index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; |
| 2922 | last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; | ||
| 2617 | while (index <= last_index) { | 2923 | while (index <= last_index) { |
| 2924 | ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE); | ||
| 2925 | if (ret) | ||
| 2926 | goto out; | ||
| 2927 | |||
| 2618 | page = find_lock_page(inode->i_mapping, index); | 2928 | page = find_lock_page(inode->i_mapping, index); |
| 2619 | if (!page) { | 2929 | if (!page) { |
| 2620 | page_cache_sync_readahead(inode->i_mapping, | 2930 | page_cache_sync_readahead(inode->i_mapping, |
| @@ -2622,8 +2932,10 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
| 2622 | last_index + 1 - index); | 2932 | last_index + 1 - index); |
| 2623 | page = grab_cache_page(inode->i_mapping, index); | 2933 | page = grab_cache_page(inode->i_mapping, index); |
| 2624 | if (!page) { | 2934 | if (!page) { |
| 2935 | btrfs_delalloc_release_metadata(inode, | ||
| 2936 | PAGE_CACHE_SIZE); | ||
| 2625 | ret = -ENOMEM; | 2937 | ret = -ENOMEM; |
| 2626 | goto out_unlock; | 2938 | goto out; |
| 2627 | } | 2939 | } |
| 2628 | } | 2940 | } |
| 2629 | 2941 | ||
| @@ -2639,8 +2951,10 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
| 2639 | if (!PageUptodate(page)) { | 2951 | if (!PageUptodate(page)) { |
| 2640 | unlock_page(page); | 2952 | unlock_page(page); |
| 2641 | page_cache_release(page); | 2953 | page_cache_release(page); |
| 2954 | btrfs_delalloc_release_metadata(inode, | ||
| 2955 | PAGE_CACHE_SIZE); | ||
| 2642 | ret = -EIO; | 2956 | ret = -EIO; |
| 2643 | goto out_unlock; | 2957 | goto out; |
| 2644 | } | 2958 | } |
| 2645 | } | 2959 | } |
| 2646 | 2960 | ||
| @@ -2659,10 +2973,9 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
| 2659 | EXTENT_BOUNDARY, GFP_NOFS); | 2973 | EXTENT_BOUNDARY, GFP_NOFS); |
| 2660 | nr++; | 2974 | nr++; |
| 2661 | } | 2975 | } |
| 2662 | btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); | ||
| 2663 | 2976 | ||
| 2977 | btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); | ||
| 2664 | set_page_dirty(page); | 2978 | set_page_dirty(page); |
| 2665 | dirty_page++; | ||
| 2666 | 2979 | ||
| 2667 | unlock_extent(&BTRFS_I(inode)->io_tree, | 2980 | unlock_extent(&BTRFS_I(inode)->io_tree, |
| 2668 | page_start, page_end, GFP_NOFS); | 2981 | page_start, page_end, GFP_NOFS); |
| @@ -2670,20 +2983,11 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
| 2670 | page_cache_release(page); | 2983 | page_cache_release(page); |
| 2671 | 2984 | ||
| 2672 | index++; | 2985 | index++; |
| 2673 | if (nr < cluster->nr && | 2986 | balance_dirty_pages_ratelimited(inode->i_mapping); |
| 2674 | page_end + 1 + offset == cluster->boundary[nr]) { | 2987 | btrfs_throttle(BTRFS_I(inode)->root); |
| 2675 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
| 2676 | dirty_page); | ||
| 2677 | dirty_page = 0; | ||
| 2678 | } | ||
| 2679 | } | ||
| 2680 | if (dirty_page) { | ||
| 2681 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
| 2682 | dirty_page); | ||
| 2683 | } | 2988 | } |
| 2684 | WARN_ON(nr != cluster->nr); | 2989 | WARN_ON(nr != cluster->nr); |
| 2685 | out_unlock: | 2990 | out: |
| 2686 | mutex_unlock(&inode->i_mutex); | ||
| 2687 | kfree(ra); | 2991 | kfree(ra); |
| 2688 | return ret; | 2992 | return ret; |
| 2689 | } | 2993 | } |
| @@ -2869,9 +3173,6 @@ out: | |||
| 2869 | static int block_use_full_backref(struct reloc_control *rc, | 3173 | static int block_use_full_backref(struct reloc_control *rc, |
| 2870 | struct extent_buffer *eb) | 3174 | struct extent_buffer *eb) |
| 2871 | { | 3175 | { |
| 2872 | struct btrfs_path *path; | ||
| 2873 | struct btrfs_extent_item *ei; | ||
| 2874 | struct btrfs_key key; | ||
| 2875 | u64 flags; | 3176 | u64 flags; |
| 2876 | int ret; | 3177 | int ret; |
| 2877 | 3178 | ||
| @@ -2879,28 +3180,14 @@ static int block_use_full_backref(struct reloc_control *rc, | |||
| 2879 | btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV) | 3180 | btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV) |
| 2880 | return 1; | 3181 | return 1; |
| 2881 | 3182 | ||
| 2882 | path = btrfs_alloc_path(); | 3183 | ret = btrfs_lookup_extent_info(NULL, rc->extent_root, |
| 2883 | BUG_ON(!path); | 3184 | eb->start, eb->len, NULL, &flags); |
| 2884 | |||
| 2885 | key.objectid = eb->start; | ||
| 2886 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 2887 | key.offset = eb->len; | ||
| 2888 | |||
| 2889 | path->search_commit_root = 1; | ||
| 2890 | path->skip_locking = 1; | ||
| 2891 | ret = btrfs_search_slot(NULL, rc->extent_root, | ||
| 2892 | &key, path, 0, 0); | ||
| 2893 | BUG_ON(ret); | 3185 | BUG_ON(ret); |
| 2894 | 3186 | ||
| 2895 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
| 2896 | struct btrfs_extent_item); | ||
| 2897 | flags = btrfs_extent_flags(path->nodes[0], ei); | ||
| 2898 | BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)); | ||
| 2899 | if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) | 3187 | if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) |
| 2900 | ret = 1; | 3188 | ret = 1; |
| 2901 | else | 3189 | else |
| 2902 | ret = 0; | 3190 | ret = 0; |
| 2903 | btrfs_free_path(path); | ||
| 2904 | return ret; | 3191 | return ret; |
| 2905 | } | 3192 | } |
| 2906 | 3193 | ||
| @@ -3073,22 +3360,10 @@ int add_data_references(struct reloc_control *rc, | |||
| 3073 | struct btrfs_extent_inline_ref *iref; | 3360 | struct btrfs_extent_inline_ref *iref; |
| 3074 | unsigned long ptr; | 3361 | unsigned long ptr; |
| 3075 | unsigned long end; | 3362 | unsigned long end; |
| 3076 | u32 blocksize; | 3363 | u32 blocksize = btrfs_level_size(rc->extent_root, 0); |
| 3077 | int ret; | 3364 | int ret; |
| 3078 | int err = 0; | 3365 | int err = 0; |
| 3079 | 3366 | ||
| 3080 | ret = get_new_location(rc->data_inode, NULL, extent_key->objectid, | ||
| 3081 | extent_key->offset); | ||
| 3082 | BUG_ON(ret < 0); | ||
| 3083 | if (ret > 0) { | ||
| 3084 | /* the relocated data is fragmented */ | ||
| 3085 | rc->extents_skipped++; | ||
| 3086 | btrfs_release_path(rc->extent_root, path); | ||
| 3087 | return 0; | ||
| 3088 | } | ||
| 3089 | |||
| 3090 | blocksize = btrfs_level_size(rc->extent_root, 0); | ||
| 3091 | |||
| 3092 | eb = path->nodes[0]; | 3367 | eb = path->nodes[0]; |
| 3093 | ptr = btrfs_item_ptr_offset(eb, path->slots[0]); | 3368 | ptr = btrfs_item_ptr_offset(eb, path->slots[0]); |
| 3094 | end = ptr + btrfs_item_size_nr(eb, path->slots[0]); | 3369 | end = ptr + btrfs_item_size_nr(eb, path->slots[0]); |
| @@ -3169,7 +3444,8 @@ int add_data_references(struct reloc_control *rc, | |||
| 3169 | */ | 3444 | */ |
| 3170 | static noinline_for_stack | 3445 | static noinline_for_stack |
| 3171 | int find_next_extent(struct btrfs_trans_handle *trans, | 3446 | int find_next_extent(struct btrfs_trans_handle *trans, |
| 3172 | struct reloc_control *rc, struct btrfs_path *path) | 3447 | struct reloc_control *rc, struct btrfs_path *path, |
| 3448 | struct btrfs_key *extent_key) | ||
| 3173 | { | 3449 | { |
| 3174 | struct btrfs_key key; | 3450 | struct btrfs_key key; |
| 3175 | struct extent_buffer *leaf; | 3451 | struct extent_buffer *leaf; |
| @@ -3224,6 +3500,7 @@ next: | |||
| 3224 | rc->search_start = end + 1; | 3500 | rc->search_start = end + 1; |
| 3225 | } else { | 3501 | } else { |
| 3226 | rc->search_start = key.objectid + key.offset; | 3502 | rc->search_start = key.objectid + key.offset; |
| 3503 | memcpy(extent_key, &key, sizeof(key)); | ||
| 3227 | return 0; | 3504 | return 0; |
| 3228 | } | 3505 | } |
| 3229 | } | 3506 | } |
| @@ -3261,12 +3538,49 @@ static int check_extent_flags(u64 flags) | |||
| 3261 | return 0; | 3538 | return 0; |
| 3262 | } | 3539 | } |
| 3263 | 3540 | ||
| 3541 | static noinline_for_stack | ||
| 3542 | int prepare_to_relocate(struct reloc_control *rc) | ||
| 3543 | { | ||
| 3544 | struct btrfs_trans_handle *trans; | ||
| 3545 | int ret; | ||
| 3546 | |||
| 3547 | rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root); | ||
| 3548 | if (!rc->block_rsv) | ||
| 3549 | return -ENOMEM; | ||
| 3550 | |||
| 3551 | /* | ||
| 3552 | * reserve some space for creating reloc trees. | ||
| 3553 | * btrfs_init_reloc_root will use them when there | ||
| 3554 | * is no reservation in transaction handle. | ||
| 3555 | */ | ||
| 3556 | ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv, | ||
| 3557 | rc->extent_root->nodesize * 256, | ||
| 3558 | &rc->block_rsv_retries); | ||
| 3559 | if (ret) | ||
| 3560 | return ret; | ||
| 3561 | |||
| 3562 | rc->block_rsv->refill_used = 1; | ||
| 3563 | btrfs_add_durable_block_rsv(rc->extent_root->fs_info, rc->block_rsv); | ||
| 3564 | |||
| 3565 | memset(&rc->cluster, 0, sizeof(rc->cluster)); | ||
| 3566 | rc->search_start = rc->block_group->key.objectid; | ||
| 3567 | rc->extents_found = 0; | ||
| 3568 | rc->nodes_relocated = 0; | ||
| 3569 | rc->merging_rsv_size = 0; | ||
| 3570 | rc->block_rsv_retries = 0; | ||
| 3571 | |||
| 3572 | rc->create_reloc_tree = 1; | ||
| 3573 | set_reloc_control(rc); | ||
| 3574 | |||
| 3575 | trans = btrfs_join_transaction(rc->extent_root, 1); | ||
| 3576 | btrfs_commit_transaction(trans, rc->extent_root); | ||
| 3577 | return 0; | ||
| 3578 | } | ||
| 3264 | 3579 | ||
| 3265 | static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | 3580 | static noinline_for_stack int relocate_block_group(struct reloc_control *rc) |
| 3266 | { | 3581 | { |
| 3267 | struct rb_root blocks = RB_ROOT; | 3582 | struct rb_root blocks = RB_ROOT; |
| 3268 | struct btrfs_key key; | 3583 | struct btrfs_key key; |
| 3269 | struct file_extent_cluster *cluster; | ||
| 3270 | struct btrfs_trans_handle *trans = NULL; | 3584 | struct btrfs_trans_handle *trans = NULL; |
| 3271 | struct btrfs_path *path; | 3585 | struct btrfs_path *path; |
| 3272 | struct btrfs_extent_item *ei; | 3586 | struct btrfs_extent_item *ei; |
| @@ -3276,33 +3590,25 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
| 3276 | int ret; | 3590 | int ret; |
| 3277 | int err = 0; | 3591 | int err = 0; |
| 3278 | 3592 | ||
| 3279 | cluster = kzalloc(sizeof(*cluster), GFP_NOFS); | ||
| 3280 | if (!cluster) | ||
| 3281 | return -ENOMEM; | ||
| 3282 | |||
| 3283 | path = btrfs_alloc_path(); | 3593 | path = btrfs_alloc_path(); |
| 3284 | if (!path) { | 3594 | if (!path) |
| 3285 | kfree(cluster); | ||
| 3286 | return -ENOMEM; | 3595 | return -ENOMEM; |
| 3287 | } | ||
| 3288 | |||
| 3289 | rc->extents_found = 0; | ||
| 3290 | rc->extents_skipped = 0; | ||
| 3291 | |||
| 3292 | rc->search_start = rc->block_group->key.objectid; | ||
| 3293 | clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, | ||
| 3294 | GFP_NOFS); | ||
| 3295 | |||
| 3296 | rc->create_reloc_root = 1; | ||
| 3297 | set_reloc_control(rc); | ||
| 3298 | 3596 | ||
| 3299 | trans = btrfs_start_transaction(rc->extent_root, 1); | 3597 | ret = prepare_to_relocate(rc); |
| 3300 | btrfs_commit_transaction(trans, rc->extent_root); | 3598 | if (ret) { |
| 3599 | err = ret; | ||
| 3600 | goto out_free; | ||
| 3601 | } | ||
| 3301 | 3602 | ||
| 3302 | while (1) { | 3603 | while (1) { |
| 3303 | trans = btrfs_start_transaction(rc->extent_root, 1); | 3604 | trans = btrfs_start_transaction(rc->extent_root, 0); |
| 3605 | |||
| 3606 | if (update_backref_cache(trans, &rc->backref_cache)) { | ||
| 3607 | btrfs_end_transaction(trans, rc->extent_root); | ||
| 3608 | continue; | ||
| 3609 | } | ||
| 3304 | 3610 | ||
| 3305 | ret = find_next_extent(trans, rc, path); | 3611 | ret = find_next_extent(trans, rc, path, &key); |
| 3306 | if (ret < 0) | 3612 | if (ret < 0) |
| 3307 | err = ret; | 3613 | err = ret; |
| 3308 | if (ret != 0) | 3614 | if (ret != 0) |
| @@ -3312,9 +3618,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
| 3312 | 3618 | ||
| 3313 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], | 3619 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], |
| 3314 | struct btrfs_extent_item); | 3620 | struct btrfs_extent_item); |
| 3315 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | 3621 | item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); |
| 3316 | item_size = btrfs_item_size_nr(path->nodes[0], | ||
| 3317 | path->slots[0]); | ||
| 3318 | if (item_size >= sizeof(*ei)) { | 3622 | if (item_size >= sizeof(*ei)) { |
| 3319 | flags = btrfs_extent_flags(path->nodes[0], ei); | 3623 | flags = btrfs_extent_flags(path->nodes[0], ei); |
| 3320 | ret = check_extent_flags(flags); | 3624 | ret = check_extent_flags(flags); |
| @@ -3355,73 +3659,100 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
| 3355 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 3659 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
| 3356 | ret = add_tree_block(rc, &key, path, &blocks); | 3660 | ret = add_tree_block(rc, &key, path, &blocks); |
| 3357 | } else if (rc->stage == UPDATE_DATA_PTRS && | 3661 | } else if (rc->stage == UPDATE_DATA_PTRS && |
| 3358 | (flags & BTRFS_EXTENT_FLAG_DATA)) { | 3662 | (flags & BTRFS_EXTENT_FLAG_DATA)) { |
| 3359 | ret = add_data_references(rc, &key, path, &blocks); | 3663 | ret = add_data_references(rc, &key, path, &blocks); |
| 3360 | } else { | 3664 | } else { |
| 3361 | btrfs_release_path(rc->extent_root, path); | 3665 | btrfs_release_path(rc->extent_root, path); |
| 3362 | ret = 0; | 3666 | ret = 0; |
| 3363 | } | 3667 | } |
| 3364 | if (ret < 0) { | 3668 | if (ret < 0) { |
| 3365 | err = 0; | 3669 | err = ret; |
| 3366 | break; | 3670 | break; |
| 3367 | } | 3671 | } |
| 3368 | 3672 | ||
| 3369 | if (!RB_EMPTY_ROOT(&blocks)) { | 3673 | if (!RB_EMPTY_ROOT(&blocks)) { |
| 3370 | ret = relocate_tree_blocks(trans, rc, &blocks); | 3674 | ret = relocate_tree_blocks(trans, rc, &blocks); |
| 3371 | if (ret < 0) { | 3675 | if (ret < 0) { |
| 3676 | if (ret != -EAGAIN) { | ||
| 3677 | err = ret; | ||
| 3678 | break; | ||
| 3679 | } | ||
| 3680 | rc->extents_found--; | ||
| 3681 | rc->search_start = key.objectid; | ||
| 3682 | } | ||
| 3683 | } | ||
| 3684 | |||
| 3685 | ret = btrfs_block_rsv_check(trans, rc->extent_root, | ||
| 3686 | rc->block_rsv, 0, 5); | ||
| 3687 | if (ret < 0) { | ||
| 3688 | if (ret != -EAGAIN) { | ||
| 3372 | err = ret; | 3689 | err = ret; |
| 3690 | WARN_ON(1); | ||
| 3373 | break; | 3691 | break; |
| 3374 | } | 3692 | } |
| 3693 | rc->commit_transaction = 1; | ||
| 3375 | } | 3694 | } |
| 3376 | 3695 | ||
| 3377 | nr = trans->blocks_used; | 3696 | if (rc->commit_transaction) { |
| 3378 | btrfs_end_transaction(trans, rc->extent_root); | 3697 | rc->commit_transaction = 0; |
| 3698 | ret = btrfs_commit_transaction(trans, rc->extent_root); | ||
| 3699 | BUG_ON(ret); | ||
| 3700 | } else { | ||
| 3701 | nr = trans->blocks_used; | ||
| 3702 | btrfs_end_transaction_throttle(trans, rc->extent_root); | ||
| 3703 | btrfs_btree_balance_dirty(rc->extent_root, nr); | ||
| 3704 | } | ||
| 3379 | trans = NULL; | 3705 | trans = NULL; |
| 3380 | btrfs_btree_balance_dirty(rc->extent_root, nr); | ||
| 3381 | 3706 | ||
| 3382 | if (rc->stage == MOVE_DATA_EXTENTS && | 3707 | if (rc->stage == MOVE_DATA_EXTENTS && |
| 3383 | (flags & BTRFS_EXTENT_FLAG_DATA)) { | 3708 | (flags & BTRFS_EXTENT_FLAG_DATA)) { |
| 3384 | rc->found_file_extent = 1; | 3709 | rc->found_file_extent = 1; |
| 3385 | ret = relocate_data_extent(rc->data_inode, | 3710 | ret = relocate_data_extent(rc->data_inode, |
| 3386 | &key, cluster); | 3711 | &key, &rc->cluster); |
| 3387 | if (ret < 0) { | 3712 | if (ret < 0) { |
| 3388 | err = ret; | 3713 | err = ret; |
| 3389 | break; | 3714 | break; |
| 3390 | } | 3715 | } |
| 3391 | } | 3716 | } |
| 3392 | } | 3717 | } |
| 3393 | btrfs_free_path(path); | 3718 | |
| 3719 | btrfs_release_path(rc->extent_root, path); | ||
| 3720 | clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, | ||
| 3721 | GFP_NOFS); | ||
| 3394 | 3722 | ||
| 3395 | if (trans) { | 3723 | if (trans) { |
| 3396 | nr = trans->blocks_used; | 3724 | nr = trans->blocks_used; |
| 3397 | btrfs_end_transaction(trans, rc->extent_root); | 3725 | btrfs_end_transaction_throttle(trans, rc->extent_root); |
| 3398 | btrfs_btree_balance_dirty(rc->extent_root, nr); | 3726 | btrfs_btree_balance_dirty(rc->extent_root, nr); |
| 3399 | } | 3727 | } |
| 3400 | 3728 | ||
| 3401 | if (!err) { | 3729 | if (!err) { |
| 3402 | ret = relocate_file_extent_cluster(rc->data_inode, cluster); | 3730 | ret = relocate_file_extent_cluster(rc->data_inode, |
| 3731 | &rc->cluster); | ||
| 3403 | if (ret < 0) | 3732 | if (ret < 0) |
| 3404 | err = ret; | 3733 | err = ret; |
| 3405 | } | 3734 | } |
| 3406 | 3735 | ||
| 3407 | kfree(cluster); | 3736 | rc->create_reloc_tree = 0; |
| 3737 | set_reloc_control(rc); | ||
| 3408 | 3738 | ||
| 3409 | rc->create_reloc_root = 0; | 3739 | backref_cache_cleanup(&rc->backref_cache); |
| 3410 | smp_mb(); | 3740 | btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); |
| 3411 | 3741 | ||
| 3412 | if (rc->extents_found > 0) { | 3742 | err = prepare_to_merge(rc, err); |
| 3413 | trans = btrfs_start_transaction(rc->extent_root, 1); | ||
| 3414 | btrfs_commit_transaction(trans, rc->extent_root); | ||
| 3415 | } | ||
| 3416 | 3743 | ||
| 3417 | merge_reloc_roots(rc); | 3744 | merge_reloc_roots(rc); |
| 3418 | 3745 | ||
| 3746 | rc->merge_reloc_tree = 0; | ||
| 3419 | unset_reloc_control(rc); | 3747 | unset_reloc_control(rc); |
| 3748 | btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); | ||
| 3420 | 3749 | ||
| 3421 | /* get rid of pinned extents */ | 3750 | /* get rid of pinned extents */ |
| 3422 | trans = btrfs_start_transaction(rc->extent_root, 1); | 3751 | trans = btrfs_join_transaction(rc->extent_root, 1); |
| 3423 | btrfs_commit_transaction(trans, rc->extent_root); | 3752 | btrfs_commit_transaction(trans, rc->extent_root); |
| 3424 | 3753 | out_free: | |
| 3754 | btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); | ||
| 3755 | btrfs_free_path(path); | ||
| 3425 | return err; | 3756 | return err; |
| 3426 | } | 3757 | } |
| 3427 | 3758 | ||
| @@ -3447,7 +3778,8 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | |||
| 3447 | btrfs_set_inode_generation(leaf, item, 1); | 3778 | btrfs_set_inode_generation(leaf, item, 1); |
| 3448 | btrfs_set_inode_size(leaf, item, 0); | 3779 | btrfs_set_inode_size(leaf, item, 0); |
| 3449 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); | 3780 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); |
| 3450 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); | 3781 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | |
| 3782 | BTRFS_INODE_PREALLOC); | ||
| 3451 | btrfs_mark_buffer_dirty(leaf); | 3783 | btrfs_mark_buffer_dirty(leaf); |
| 3452 | btrfs_release_path(root, path); | 3784 | btrfs_release_path(root, path); |
| 3453 | out: | 3785 | out: |
| @@ -3459,8 +3791,9 @@ out: | |||
| 3459 | * helper to create inode for data relocation. | 3791 | * helper to create inode for data relocation. |
| 3460 | * the inode is in data relocation tree and its link count is 0 | 3792 | * the inode is in data relocation tree and its link count is 0 |
| 3461 | */ | 3793 | */ |
| 3462 | static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | 3794 | static noinline_for_stack |
| 3463 | struct btrfs_block_group_cache *group) | 3795 | struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, |
| 3796 | struct btrfs_block_group_cache *group) | ||
| 3464 | { | 3797 | { |
| 3465 | struct inode *inode = NULL; | 3798 | struct inode *inode = NULL; |
| 3466 | struct btrfs_trans_handle *trans; | 3799 | struct btrfs_trans_handle *trans; |
| @@ -3474,8 +3807,9 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | |||
| 3474 | if (IS_ERR(root)) | 3807 | if (IS_ERR(root)) |
| 3475 | return ERR_CAST(root); | 3808 | return ERR_CAST(root); |
| 3476 | 3809 | ||
| 3477 | trans = btrfs_start_transaction(root, 1); | 3810 | trans = btrfs_start_transaction(root, 6); |
| 3478 | BUG_ON(!trans); | 3811 | if (IS_ERR(trans)) |
| 3812 | return ERR_CAST(trans); | ||
| 3479 | 3813 | ||
| 3480 | err = btrfs_find_free_objectid(trans, root, objectid, &objectid); | 3814 | err = btrfs_find_free_objectid(trans, root, objectid, &objectid); |
| 3481 | if (err) | 3815 | if (err) |
| @@ -3495,7 +3829,6 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | |||
| 3495 | out: | 3829 | out: |
| 3496 | nr = trans->blocks_used; | 3830 | nr = trans->blocks_used; |
| 3497 | btrfs_end_transaction(trans, root); | 3831 | btrfs_end_transaction(trans, root); |
| 3498 | |||
| 3499 | btrfs_btree_balance_dirty(root, nr); | 3832 | btrfs_btree_balance_dirty(root, nr); |
| 3500 | if (err) { | 3833 | if (err) { |
| 3501 | if (inode) | 3834 | if (inode) |
| @@ -3505,6 +3838,21 @@ out: | |||
| 3505 | return inode; | 3838 | return inode; |
| 3506 | } | 3839 | } |
| 3507 | 3840 | ||
| 3841 | static struct reloc_control *alloc_reloc_control(void) | ||
| 3842 | { | ||
| 3843 | struct reloc_control *rc; | ||
| 3844 | |||
| 3845 | rc = kzalloc(sizeof(*rc), GFP_NOFS); | ||
| 3846 | if (!rc) | ||
| 3847 | return NULL; | ||
| 3848 | |||
| 3849 | INIT_LIST_HEAD(&rc->reloc_roots); | ||
| 3850 | backref_cache_init(&rc->backref_cache); | ||
| 3851 | mapping_tree_init(&rc->reloc_root_tree); | ||
| 3852 | extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); | ||
| 3853 | return rc; | ||
| 3854 | } | ||
| 3855 | |||
| 3508 | /* | 3856 | /* |
| 3509 | * function to relocate all extents in a block group. | 3857 | * function to relocate all extents in a block group. |
| 3510 | */ | 3858 | */ |
| @@ -3513,24 +3861,26 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
| 3513 | struct btrfs_fs_info *fs_info = extent_root->fs_info; | 3861 | struct btrfs_fs_info *fs_info = extent_root->fs_info; |
| 3514 | struct reloc_control *rc; | 3862 | struct reloc_control *rc; |
| 3515 | int ret; | 3863 | int ret; |
| 3864 | int rw = 0; | ||
| 3516 | int err = 0; | 3865 | int err = 0; |
| 3517 | 3866 | ||
| 3518 | rc = kzalloc(sizeof(*rc), GFP_NOFS); | 3867 | rc = alloc_reloc_control(); |
| 3519 | if (!rc) | 3868 | if (!rc) |
| 3520 | return -ENOMEM; | 3869 | return -ENOMEM; |
| 3521 | 3870 | ||
| 3522 | mapping_tree_init(&rc->reloc_root_tree); | 3871 | rc->extent_root = extent_root; |
| 3523 | extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); | ||
| 3524 | INIT_LIST_HEAD(&rc->reloc_roots); | ||
| 3525 | 3872 | ||
| 3526 | rc->block_group = btrfs_lookup_block_group(fs_info, group_start); | 3873 | rc->block_group = btrfs_lookup_block_group(fs_info, group_start); |
| 3527 | BUG_ON(!rc->block_group); | 3874 | BUG_ON(!rc->block_group); |
| 3528 | 3875 | ||
| 3529 | btrfs_init_workers(&rc->workers, "relocate", | 3876 | if (!rc->block_group->ro) { |
| 3530 | fs_info->thread_pool_size, NULL); | 3877 | ret = btrfs_set_block_group_ro(extent_root, rc->block_group); |
| 3531 | 3878 | if (ret) { | |
| 3532 | rc->extent_root = extent_root; | 3879 | err = ret; |
| 3533 | btrfs_prepare_block_group_relocation(extent_root, rc->block_group); | 3880 | goto out; |
| 3881 | } | ||
| 3882 | rw = 1; | ||
| 3883 | } | ||
| 3534 | 3884 | ||
| 3535 | rc->data_inode = create_reloc_inode(fs_info, rc->block_group); | 3885 | rc->data_inode = create_reloc_inode(fs_info, rc->block_group); |
| 3536 | if (IS_ERR(rc->data_inode)) { | 3886 | if (IS_ERR(rc->data_inode)) { |
| @@ -3547,9 +3897,6 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
| 3547 | btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0); | 3897 | btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0); |
| 3548 | 3898 | ||
| 3549 | while (1) { | 3899 | while (1) { |
| 3550 | rc->extents_found = 0; | ||
| 3551 | rc->extents_skipped = 0; | ||
| 3552 | |||
| 3553 | mutex_lock(&fs_info->cleaner_mutex); | 3900 | mutex_lock(&fs_info->cleaner_mutex); |
| 3554 | 3901 | ||
| 3555 | btrfs_clean_old_snapshots(fs_info->tree_root); | 3902 | btrfs_clean_old_snapshots(fs_info->tree_root); |
| @@ -3558,7 +3905,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
| 3558 | mutex_unlock(&fs_info->cleaner_mutex); | 3905 | mutex_unlock(&fs_info->cleaner_mutex); |
| 3559 | if (ret < 0) { | 3906 | if (ret < 0) { |
| 3560 | err = ret; | 3907 | err = ret; |
| 3561 | break; | 3908 | goto out; |
| 3562 | } | 3909 | } |
| 3563 | 3910 | ||
| 3564 | if (rc->extents_found == 0) | 3911 | if (rc->extents_found == 0) |
| @@ -3572,18 +3919,6 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
| 3572 | invalidate_mapping_pages(rc->data_inode->i_mapping, | 3919 | invalidate_mapping_pages(rc->data_inode->i_mapping, |
| 3573 | 0, -1); | 3920 | 0, -1); |
| 3574 | rc->stage = UPDATE_DATA_PTRS; | 3921 | rc->stage = UPDATE_DATA_PTRS; |
| 3575 | } else if (rc->stage == UPDATE_DATA_PTRS && | ||
| 3576 | rc->extents_skipped >= rc->extents_found) { | ||
| 3577 | iput(rc->data_inode); | ||
| 3578 | rc->data_inode = create_reloc_inode(fs_info, | ||
| 3579 | rc->block_group); | ||
| 3580 | if (IS_ERR(rc->data_inode)) { | ||
| 3581 | err = PTR_ERR(rc->data_inode); | ||
| 3582 | rc->data_inode = NULL; | ||
| 3583 | break; | ||
| 3584 | } | ||
| 3585 | rc->stage = MOVE_DATA_EXTENTS; | ||
| 3586 | rc->found_file_extent = 0; | ||
| 3587 | } | 3922 | } |
| 3588 | } | 3923 | } |
| 3589 | 3924 | ||
| @@ -3596,8 +3931,9 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
| 3596 | WARN_ON(rc->block_group->reserved > 0); | 3931 | WARN_ON(rc->block_group->reserved > 0); |
| 3597 | WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); | 3932 | WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); |
| 3598 | out: | 3933 | out: |
| 3934 | if (err && rw) | ||
| 3935 | btrfs_set_block_group_rw(extent_root, rc->block_group); | ||
| 3599 | iput(rc->data_inode); | 3936 | iput(rc->data_inode); |
| 3600 | btrfs_stop_workers(&rc->workers); | ||
| 3601 | btrfs_put_block_group(rc->block_group); | 3937 | btrfs_put_block_group(rc->block_group); |
| 3602 | kfree(rc); | 3938 | kfree(rc); |
| 3603 | return err; | 3939 | return err; |
| @@ -3608,7 +3944,7 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) | |||
| 3608 | struct btrfs_trans_handle *trans; | 3944 | struct btrfs_trans_handle *trans; |
| 3609 | int ret; | 3945 | int ret; |
| 3610 | 3946 | ||
| 3611 | trans = btrfs_start_transaction(root->fs_info->tree_root, 1); | 3947 | trans = btrfs_start_transaction(root->fs_info->tree_root, 0); |
| 3612 | 3948 | ||
| 3613 | memset(&root->root_item.drop_progress, 0, | 3949 | memset(&root->root_item.drop_progress, 0, |
| 3614 | sizeof(root->root_item.drop_progress)); | 3950 | sizeof(root->root_item.drop_progress)); |
| @@ -3701,20 +4037,20 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
| 3701 | if (list_empty(&reloc_roots)) | 4037 | if (list_empty(&reloc_roots)) |
| 3702 | goto out; | 4038 | goto out; |
| 3703 | 4039 | ||
| 3704 | rc = kzalloc(sizeof(*rc), GFP_NOFS); | 4040 | rc = alloc_reloc_control(); |
| 3705 | if (!rc) { | 4041 | if (!rc) { |
| 3706 | err = -ENOMEM; | 4042 | err = -ENOMEM; |
| 3707 | goto out; | 4043 | goto out; |
| 3708 | } | 4044 | } |
| 3709 | 4045 | ||
| 3710 | mapping_tree_init(&rc->reloc_root_tree); | ||
| 3711 | INIT_LIST_HEAD(&rc->reloc_roots); | ||
| 3712 | btrfs_init_workers(&rc->workers, "relocate", | ||
| 3713 | root->fs_info->thread_pool_size, NULL); | ||
| 3714 | rc->extent_root = root->fs_info->extent_root; | 4046 | rc->extent_root = root->fs_info->extent_root; |
| 3715 | 4047 | ||
| 3716 | set_reloc_control(rc); | 4048 | set_reloc_control(rc); |
| 3717 | 4049 | ||
| 4050 | trans = btrfs_join_transaction(rc->extent_root, 1); | ||
| 4051 | |||
| 4052 | rc->merge_reloc_tree = 1; | ||
| 4053 | |||
| 3718 | while (!list_empty(&reloc_roots)) { | 4054 | while (!list_empty(&reloc_roots)) { |
| 3719 | reloc_root = list_entry(reloc_roots.next, | 4055 | reloc_root = list_entry(reloc_roots.next, |
| 3720 | struct btrfs_root, root_list); | 4056 | struct btrfs_root, root_list); |
| @@ -3734,20 +4070,16 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
| 3734 | fs_root->reloc_root = reloc_root; | 4070 | fs_root->reloc_root = reloc_root; |
| 3735 | } | 4071 | } |
| 3736 | 4072 | ||
| 3737 | trans = btrfs_start_transaction(rc->extent_root, 1); | ||
| 3738 | btrfs_commit_transaction(trans, rc->extent_root); | 4073 | btrfs_commit_transaction(trans, rc->extent_root); |
| 3739 | 4074 | ||
| 3740 | merge_reloc_roots(rc); | 4075 | merge_reloc_roots(rc); |
| 3741 | 4076 | ||
| 3742 | unset_reloc_control(rc); | 4077 | unset_reloc_control(rc); |
| 3743 | 4078 | ||
| 3744 | trans = btrfs_start_transaction(rc->extent_root, 1); | 4079 | trans = btrfs_join_transaction(rc->extent_root, 1); |
| 3745 | btrfs_commit_transaction(trans, rc->extent_root); | 4080 | btrfs_commit_transaction(trans, rc->extent_root); |
| 3746 | out: | 4081 | out: |
| 3747 | if (rc) { | 4082 | kfree(rc); |
| 3748 | btrfs_stop_workers(&rc->workers); | ||
| 3749 | kfree(rc); | ||
| 3750 | } | ||
| 3751 | while (!list_empty(&reloc_roots)) { | 4083 | while (!list_empty(&reloc_roots)) { |
| 3752 | reloc_root = list_entry(reloc_roots.next, | 4084 | reloc_root = list_entry(reloc_roots.next, |
| 3753 | struct btrfs_root, root_list); | 4085 | struct btrfs_root, root_list); |
| @@ -3813,3 +4145,130 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) | |||
| 3813 | btrfs_put_ordered_extent(ordered); | 4145 | btrfs_put_ordered_extent(ordered); |
| 3814 | return 0; | 4146 | return 0; |
| 3815 | } | 4147 | } |
| 4148 | |||
| 4149 | void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, | ||
| 4150 | struct btrfs_root *root, struct extent_buffer *buf, | ||
| 4151 | struct extent_buffer *cow) | ||
| 4152 | { | ||
| 4153 | struct reloc_control *rc; | ||
| 4154 | struct backref_node *node; | ||
| 4155 | int first_cow = 0; | ||
| 4156 | int level; | ||
| 4157 | int ret; | ||
| 4158 | |||
| 4159 | rc = root->fs_info->reloc_ctl; | ||
| 4160 | if (!rc) | ||
| 4161 | return; | ||
| 4162 | |||
| 4163 | BUG_ON(rc->stage == UPDATE_DATA_PTRS && | ||
| 4164 | root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID); | ||
| 4165 | |||
| 4166 | level = btrfs_header_level(buf); | ||
| 4167 | if (btrfs_header_generation(buf) <= | ||
| 4168 | btrfs_root_last_snapshot(&root->root_item)) | ||
| 4169 | first_cow = 1; | ||
| 4170 | |||
| 4171 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID && | ||
| 4172 | rc->create_reloc_tree) { | ||
| 4173 | WARN_ON(!first_cow && level == 0); | ||
| 4174 | |||
| 4175 | node = rc->backref_cache.path[level]; | ||
| 4176 | BUG_ON(node->bytenr != buf->start && | ||
| 4177 | node->new_bytenr != buf->start); | ||
| 4178 | |||
| 4179 | drop_node_buffer(node); | ||
| 4180 | extent_buffer_get(cow); | ||
| 4181 | node->eb = cow; | ||
| 4182 | node->new_bytenr = cow->start; | ||
| 4183 | |||
| 4184 | if (!node->pending) { | ||
| 4185 | list_move_tail(&node->list, | ||
| 4186 | &rc->backref_cache.pending[level]); | ||
| 4187 | node->pending = 1; | ||
| 4188 | } | ||
| 4189 | |||
| 4190 | if (first_cow) | ||
| 4191 | __mark_block_processed(rc, node); | ||
| 4192 | |||
| 4193 | if (first_cow && level > 0) | ||
| 4194 | rc->nodes_relocated += buf->len; | ||
| 4195 | } | ||
| 4196 | |||
| 4197 | if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) { | ||
| 4198 | ret = replace_file_extents(trans, rc, root, cow); | ||
| 4199 | BUG_ON(ret); | ||
| 4200 | } | ||
| 4201 | } | ||
| 4202 | |||
| 4203 | /* | ||
| 4204 | * called before creating snapshot. it calculates metadata reservation | ||
| 4205 | * requried for relocating tree blocks in the snapshot | ||
| 4206 | */ | ||
| 4207 | void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans, | ||
| 4208 | struct btrfs_pending_snapshot *pending, | ||
| 4209 | u64 *bytes_to_reserve) | ||
| 4210 | { | ||
| 4211 | struct btrfs_root *root; | ||
| 4212 | struct reloc_control *rc; | ||
| 4213 | |||
| 4214 | root = pending->root; | ||
| 4215 | if (!root->reloc_root) | ||
| 4216 | return; | ||
| 4217 | |||
| 4218 | rc = root->fs_info->reloc_ctl; | ||
| 4219 | if (!rc->merge_reloc_tree) | ||
| 4220 | return; | ||
| 4221 | |||
| 4222 | root = root->reloc_root; | ||
| 4223 | BUG_ON(btrfs_root_refs(&root->root_item) == 0); | ||
| 4224 | /* | ||
| 4225 | * relocation is in the stage of merging trees. the space | ||
| 4226 | * used by merging a reloc tree is twice the size of | ||
| 4227 | * relocated tree nodes in the worst case. half for cowing | ||
| 4228 | * the reloc tree, half for cowing the fs tree. the space | ||
| 4229 | * used by cowing the reloc tree will be freed after the | ||
| 4230 | * tree is dropped. if we create snapshot, cowing the fs | ||
| 4231 | * tree may use more space than it frees. so we need | ||
| 4232 | * reserve extra space. | ||
| 4233 | */ | ||
| 4234 | *bytes_to_reserve += rc->nodes_relocated; | ||
| 4235 | } | ||
| 4236 | |||
| 4237 | /* | ||
| 4238 | * called after snapshot is created. migrate block reservation | ||
| 4239 | * and create reloc root for the newly created snapshot | ||
| 4240 | */ | ||
| 4241 | void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, | ||
| 4242 | struct btrfs_pending_snapshot *pending) | ||
| 4243 | { | ||
| 4244 | struct btrfs_root *root = pending->root; | ||
| 4245 | struct btrfs_root *reloc_root; | ||
| 4246 | struct btrfs_root *new_root; | ||
| 4247 | struct reloc_control *rc; | ||
| 4248 | int ret; | ||
| 4249 | |||
| 4250 | if (!root->reloc_root) | ||
| 4251 | return; | ||
| 4252 | |||
| 4253 | rc = root->fs_info->reloc_ctl; | ||
| 4254 | rc->merging_rsv_size += rc->nodes_relocated; | ||
| 4255 | |||
| 4256 | if (rc->merge_reloc_tree) { | ||
| 4257 | ret = btrfs_block_rsv_migrate(&pending->block_rsv, | ||
| 4258 | rc->block_rsv, | ||
| 4259 | rc->nodes_relocated); | ||
| 4260 | BUG_ON(ret); | ||
| 4261 | } | ||
| 4262 | |||
| 4263 | new_root = pending->snap; | ||
| 4264 | reloc_root = create_reloc_root(trans, root->reloc_root, | ||
| 4265 | new_root->root_key.objectid); | ||
| 4266 | |||
| 4267 | __add_reloc_root(reloc_root); | ||
| 4268 | new_root->reloc_root = reloc_root; | ||
| 4269 | |||
| 4270 | if (rc->create_reloc_tree) { | ||
| 4271 | ret = clone_backref_node(trans, rc, root, reloc_root); | ||
| 4272 | BUG_ON(ret); | ||
| 4273 | } | ||
| 4274 | } | ||
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 67fa2d29d663..2d958be761c8 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
| @@ -259,6 +259,8 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
| 259 | struct extent_buffer *leaf; | 259 | struct extent_buffer *leaf; |
| 260 | struct btrfs_path *path; | 260 | struct btrfs_path *path; |
| 261 | struct btrfs_key key; | 261 | struct btrfs_key key; |
| 262 | struct btrfs_key root_key; | ||
| 263 | struct btrfs_root *root; | ||
| 262 | int err = 0; | 264 | int err = 0; |
| 263 | int ret; | 265 | int ret; |
| 264 | 266 | ||
| @@ -270,6 +272,9 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
| 270 | key.type = BTRFS_ORPHAN_ITEM_KEY; | 272 | key.type = BTRFS_ORPHAN_ITEM_KEY; |
| 271 | key.offset = 0; | 273 | key.offset = 0; |
| 272 | 274 | ||
| 275 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
| 276 | root_key.offset = (u64)-1; | ||
| 277 | |||
| 273 | while (1) { | 278 | while (1) { |
| 274 | ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); | 279 | ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); |
| 275 | if (ret < 0) { | 280 | if (ret < 0) { |
| @@ -294,13 +299,25 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
| 294 | key.type != BTRFS_ORPHAN_ITEM_KEY) | 299 | key.type != BTRFS_ORPHAN_ITEM_KEY) |
| 295 | break; | 300 | break; |
| 296 | 301 | ||
| 297 | ret = btrfs_find_dead_roots(tree_root, key.offset); | 302 | root_key.objectid = key.offset; |
| 298 | if (ret) { | 303 | key.offset++; |
| 304 | |||
| 305 | root = btrfs_read_fs_root_no_name(tree_root->fs_info, | ||
| 306 | &root_key); | ||
| 307 | if (!IS_ERR(root)) | ||
| 308 | continue; | ||
| 309 | |||
| 310 | ret = PTR_ERR(root); | ||
| 311 | if (ret != -ENOENT) { | ||
| 299 | err = ret; | 312 | err = ret; |
| 300 | break; | 313 | break; |
| 301 | } | 314 | } |
| 302 | 315 | ||
| 303 | key.offset++; | 316 | ret = btrfs_find_dead_roots(tree_root, root_key.objectid); |
| 317 | if (ret) { | ||
| 318 | err = ret; | ||
| 319 | break; | ||
| 320 | } | ||
| 304 | } | 321 | } |
| 305 | 322 | ||
| 306 | btrfs_free_path(path); | 323 | btrfs_free_path(path); |
| @@ -313,7 +330,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
| 313 | { | 330 | { |
| 314 | struct btrfs_path *path; | 331 | struct btrfs_path *path; |
| 315 | int ret; | 332 | int ret; |
| 316 | u32 refs; | ||
| 317 | struct btrfs_root_item *ri; | 333 | struct btrfs_root_item *ri; |
| 318 | struct extent_buffer *leaf; | 334 | struct extent_buffer *leaf; |
| 319 | 335 | ||
| @@ -327,8 +343,6 @@ int btrfs_del_root(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
| 327 | leaf = path->nodes[0]; | 343 | leaf = path->nodes[0]; |
| 328 | ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item); | 344 | ri = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_item); |
| 329 | 345 | ||
| 330 | refs = btrfs_disk_root_refs(leaf, ri); | ||
| 331 | BUG_ON(refs != 0); | ||
| 332 | ret = btrfs_del_item(trans, root, path); | 346 | ret = btrfs_del_item(trans, root, path); |
| 333 | out: | 347 | out: |
| 334 | btrfs_free_path(path); | 348 | btrfs_free_path(path); |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 9ac612e6ca60..f2393b390318 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
| @@ -38,6 +38,7 @@ | |||
| 38 | #include <linux/namei.h> | 38 | #include <linux/namei.h> |
| 39 | #include <linux/miscdevice.h> | 39 | #include <linux/miscdevice.h> |
| 40 | #include <linux/magic.h> | 40 | #include <linux/magic.h> |
| 41 | #include <linux/slab.h> | ||
| 41 | #include "compat.h" | 42 | #include "compat.h" |
| 42 | #include "ctree.h" | 43 | #include "ctree.h" |
| 43 | #include "disk-io.h" | 44 | #include "disk-io.h" |
| @@ -64,10 +65,9 @@ static void btrfs_put_super(struct super_block *sb) | |||
| 64 | 65 | ||
| 65 | enum { | 66 | enum { |
| 66 | Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, | 67 | Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum, |
| 67 | Opt_nodatacow, Opt_max_extent, Opt_max_inline, Opt_alloc_start, | 68 | Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, |
| 68 | Opt_nobarrier, Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, | 69 | Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, |
| 69 | Opt_noacl, Opt_compress, Opt_compress_force, Opt_notreelog, Opt_ratio, | 70 | Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit, |
| 70 | Opt_flushoncommit, | ||
| 71 | Opt_discard, Opt_err, | 71 | Opt_discard, Opt_err, |
| 72 | }; | 72 | }; |
| 73 | 73 | ||
| @@ -79,7 +79,6 @@ static match_table_t tokens = { | |||
| 79 | {Opt_nodatasum, "nodatasum"}, | 79 | {Opt_nodatasum, "nodatasum"}, |
| 80 | {Opt_nodatacow, "nodatacow"}, | 80 | {Opt_nodatacow, "nodatacow"}, |
| 81 | {Opt_nobarrier, "nobarrier"}, | 81 | {Opt_nobarrier, "nobarrier"}, |
| 82 | {Opt_max_extent, "max_extent=%s"}, | ||
| 83 | {Opt_max_inline, "max_inline=%s"}, | 82 | {Opt_max_inline, "max_inline=%s"}, |
| 84 | {Opt_alloc_start, "alloc_start=%s"}, | 83 | {Opt_alloc_start, "alloc_start=%s"}, |
| 85 | {Opt_thread_pool, "thread_pool=%d"}, | 84 | {Opt_thread_pool, "thread_pool=%d"}, |
| @@ -188,18 +187,6 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
| 188 | info->thread_pool_size); | 187 | info->thread_pool_size); |
| 189 | } | 188 | } |
| 190 | break; | 189 | break; |
| 191 | case Opt_max_extent: | ||
| 192 | num = match_strdup(&args[0]); | ||
| 193 | if (num) { | ||
| 194 | info->max_extent = memparse(num, NULL); | ||
| 195 | kfree(num); | ||
| 196 | |||
| 197 | info->max_extent = max_t(u64, | ||
| 198 | info->max_extent, root->sectorsize); | ||
| 199 | printk(KERN_INFO "btrfs: max_extent at %llu\n", | ||
| 200 | (unsigned long long)info->max_extent); | ||
| 201 | } | ||
| 202 | break; | ||
| 203 | case Opt_max_inline: | 190 | case Opt_max_inline: |
| 204 | num = match_strdup(&args[0]); | 191 | num = match_strdup(&args[0]); |
| 205 | if (num) { | 192 | if (num) { |
| @@ -373,6 +360,8 @@ static struct dentry *get_default_root(struct super_block *sb, | |||
| 373 | */ | 360 | */ |
| 374 | dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); | 361 | dir_id = btrfs_super_root_dir(&root->fs_info->super_copy); |
| 375 | di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0); | 362 | di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0); |
| 363 | if (IS_ERR(di)) | ||
| 364 | return ERR_CAST(di); | ||
| 376 | if (!di) { | 365 | if (!di) { |
| 377 | /* | 366 | /* |
| 378 | * Ok the default dir item isn't there. This is weird since | 367 | * Ok the default dir item isn't there. This is weird since |
| @@ -403,8 +392,8 @@ setup_root: | |||
| 403 | location.offset = 0; | 392 | location.offset = 0; |
| 404 | 393 | ||
| 405 | inode = btrfs_iget(sb, &location, new_root, &new); | 394 | inode = btrfs_iget(sb, &location, new_root, &new); |
| 406 | if (!inode) | 395 | if (IS_ERR(inode)) |
| 407 | return ERR_PTR(-ENOMEM); | 396 | return ERR_CAST(inode); |
| 408 | 397 | ||
| 409 | /* | 398 | /* |
| 410 | * If we're just mounting the root most subvol put the inode and return | 399 | * If we're just mounting the root most subvol put the inode and return |
| @@ -511,7 +500,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
| 511 | btrfs_start_delalloc_inodes(root, 0); | 500 | btrfs_start_delalloc_inodes(root, 0); |
| 512 | btrfs_wait_ordered_extents(root, 0, 0); | 501 | btrfs_wait_ordered_extents(root, 0, 0); |
| 513 | 502 | ||
| 514 | trans = btrfs_start_transaction(root, 1); | 503 | trans = btrfs_start_transaction(root, 0); |
| 515 | ret = btrfs_commit_transaction(trans, root); | 504 | ret = btrfs_commit_transaction(trans, root); |
| 516 | return ret; | 505 | return ret; |
| 517 | } | 506 | } |
| @@ -529,9 +518,6 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
| 529 | seq_puts(seq, ",nodatacow"); | 518 | seq_puts(seq, ",nodatacow"); |
| 530 | if (btrfs_test_opt(root, NOBARRIER)) | 519 | if (btrfs_test_opt(root, NOBARRIER)) |
| 531 | seq_puts(seq, ",nobarrier"); | 520 | seq_puts(seq, ",nobarrier"); |
| 532 | if (info->max_extent != (u64)-1) | ||
| 533 | seq_printf(seq, ",max_extent=%llu", | ||
| 534 | (unsigned long long)info->max_extent); | ||
| 535 | if (info->max_inline != 8192 * 1024) | 521 | if (info->max_inline != 8192 * 1024) |
| 536 | seq_printf(seq, ",max_inline=%llu", | 522 | seq_printf(seq, ",max_inline=%llu", |
| 537 | (unsigned long long)info->max_inline); | 523 | (unsigned long long)info->max_inline); |
| @@ -710,11 +696,11 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
| 710 | if (btrfs_super_log_root(&root->fs_info->super_copy) != 0) | 696 | if (btrfs_super_log_root(&root->fs_info->super_copy) != 0) |
| 711 | return -EINVAL; | 697 | return -EINVAL; |
| 712 | 698 | ||
| 713 | /* recover relocation */ | 699 | ret = btrfs_cleanup_fs_roots(root->fs_info); |
| 714 | ret = btrfs_recover_relocation(root); | ||
| 715 | WARN_ON(ret); | 700 | WARN_ON(ret); |
| 716 | 701 | ||
| 717 | ret = btrfs_cleanup_fs_roots(root->fs_info); | 702 | /* recover relocation */ |
| 703 | ret = btrfs_recover_relocation(root); | ||
| 718 | WARN_ON(ret); | 704 | WARN_ON(ret); |
| 719 | 705 | ||
| 720 | sb->s_flags &= ~MS_RDONLY; | 706 | sb->s_flags &= ~MS_RDONLY; |
| @@ -730,34 +716,18 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 730 | struct list_head *head = &root->fs_info->space_info; | 716 | struct list_head *head = &root->fs_info->space_info; |
| 731 | struct btrfs_space_info *found; | 717 | struct btrfs_space_info *found; |
| 732 | u64 total_used = 0; | 718 | u64 total_used = 0; |
| 733 | u64 data_used = 0; | ||
| 734 | int bits = dentry->d_sb->s_blocksize_bits; | 719 | int bits = dentry->d_sb->s_blocksize_bits; |
| 735 | __be32 *fsid = (__be32 *)root->fs_info->fsid; | 720 | __be32 *fsid = (__be32 *)root->fs_info->fsid; |
| 736 | 721 | ||
| 737 | rcu_read_lock(); | 722 | rcu_read_lock(); |
| 738 | list_for_each_entry_rcu(found, head, list) { | 723 | list_for_each_entry_rcu(found, head, list) |
| 739 | if (found->flags & (BTRFS_BLOCK_GROUP_DUP| | 724 | total_used += found->disk_used; |
| 740 | BTRFS_BLOCK_GROUP_RAID10| | ||
| 741 | BTRFS_BLOCK_GROUP_RAID1)) { | ||
| 742 | total_used += found->bytes_used; | ||
| 743 | if (found->flags & BTRFS_BLOCK_GROUP_DATA) | ||
| 744 | data_used += found->bytes_used; | ||
| 745 | else | ||
| 746 | data_used += found->total_bytes; | ||
| 747 | } | ||
| 748 | |||
| 749 | total_used += found->bytes_used; | ||
| 750 | if (found->flags & BTRFS_BLOCK_GROUP_DATA) | ||
| 751 | data_used += found->bytes_used; | ||
| 752 | else | ||
| 753 | data_used += found->total_bytes; | ||
| 754 | } | ||
| 755 | rcu_read_unlock(); | 725 | rcu_read_unlock(); |
| 756 | 726 | ||
| 757 | buf->f_namelen = BTRFS_NAME_LEN; | 727 | buf->f_namelen = BTRFS_NAME_LEN; |
| 758 | buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; | 728 | buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; |
| 759 | buf->f_bfree = buf->f_blocks - (total_used >> bits); | 729 | buf->f_bfree = buf->f_blocks - (total_used >> bits); |
| 760 | buf->f_bavail = buf->f_blocks - (data_used >> bits); | 730 | buf->f_bavail = buf->f_bfree; |
| 761 | buf->f_bsize = dentry->d_sb->s_blocksize; | 731 | buf->f_bsize = dentry->d_sb->s_blocksize; |
| 762 | buf->f_type = BTRFS_SUPER_MAGIC; | 732 | buf->f_type = BTRFS_SUPER_MAGIC; |
| 763 | 733 | ||
| @@ -848,11 +818,14 @@ static const struct file_operations btrfs_ctl_fops = { | |||
| 848 | }; | 818 | }; |
| 849 | 819 | ||
| 850 | static struct miscdevice btrfs_misc = { | 820 | static struct miscdevice btrfs_misc = { |
| 851 | .minor = MISC_DYNAMIC_MINOR, | 821 | .minor = BTRFS_MINOR, |
| 852 | .name = "btrfs-control", | 822 | .name = "btrfs-control", |
| 853 | .fops = &btrfs_ctl_fops | 823 | .fops = &btrfs_ctl_fops |
| 854 | }; | 824 | }; |
| 855 | 825 | ||
| 826 | MODULE_ALIAS_MISCDEV(BTRFS_MINOR); | ||
| 827 | MODULE_ALIAS("devname:btrfs-control"); | ||
| 828 | |||
| 856 | static int btrfs_interface_init(void) | 829 | static int btrfs_interface_init(void) |
| 857 | { | 830 | { |
| 858 | return misc_register(&btrfs_misc); | 831 | return misc_register(&btrfs_misc); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2d654c1c794d..66e4c66cc63b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | */ | 17 | */ |
| 18 | 18 | ||
| 19 | #include <linux/fs.h> | 19 | #include <linux/fs.h> |
| 20 | #include <linux/slab.h> | ||
| 20 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
| 21 | #include <linux/writeback.h> | 22 | #include <linux/writeback.h> |
| 22 | #include <linux/pagemap.h> | 23 | #include <linux/pagemap.h> |
| @@ -147,18 +148,13 @@ static void wait_current_trans(struct btrfs_root *root) | |||
| 147 | while (1) { | 148 | while (1) { |
| 148 | prepare_to_wait(&root->fs_info->transaction_wait, &wait, | 149 | prepare_to_wait(&root->fs_info->transaction_wait, &wait, |
| 149 | TASK_UNINTERRUPTIBLE); | 150 | TASK_UNINTERRUPTIBLE); |
| 150 | if (cur_trans->blocked) { | 151 | if (!cur_trans->blocked) |
| 151 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 152 | schedule(); | ||
| 153 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 154 | finish_wait(&root->fs_info->transaction_wait, | ||
| 155 | &wait); | ||
| 156 | } else { | ||
| 157 | finish_wait(&root->fs_info->transaction_wait, | ||
| 158 | &wait); | ||
| 159 | break; | 152 | break; |
| 160 | } | 153 | mutex_unlock(&root->fs_info->trans_mutex); |
| 154 | schedule(); | ||
| 155 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 161 | } | 156 | } |
| 157 | finish_wait(&root->fs_info->transaction_wait, &wait); | ||
| 162 | put_transaction(cur_trans); | 158 | put_transaction(cur_trans); |
| 163 | } | 159 | } |
| 164 | } | 160 | } |
| @@ -169,54 +165,89 @@ enum btrfs_trans_type { | |||
| 169 | TRANS_USERSPACE, | 165 | TRANS_USERSPACE, |
| 170 | }; | 166 | }; |
| 171 | 167 | ||
| 168 | static int may_wait_transaction(struct btrfs_root *root, int type) | ||
| 169 | { | ||
| 170 | if (!root->fs_info->log_root_recovering && | ||
| 171 | ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || | ||
| 172 | type == TRANS_USERSPACE)) | ||
| 173 | return 1; | ||
| 174 | return 0; | ||
| 175 | } | ||
| 176 | |||
| 172 | static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | 177 | static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, |
| 173 | int num_blocks, int type) | 178 | u64 num_items, int type) |
| 174 | { | 179 | { |
| 175 | struct btrfs_trans_handle *h = | 180 | struct btrfs_trans_handle *h; |
| 176 | kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | 181 | struct btrfs_transaction *cur_trans; |
| 182 | int retries = 0; | ||
| 177 | int ret; | 183 | int ret; |
| 184 | again: | ||
| 185 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | ||
| 186 | if (!h) | ||
| 187 | return ERR_PTR(-ENOMEM); | ||
| 178 | 188 | ||
| 179 | mutex_lock(&root->fs_info->trans_mutex); | 189 | mutex_lock(&root->fs_info->trans_mutex); |
| 180 | if (!root->fs_info->log_root_recovering && | 190 | if (may_wait_transaction(root, type)) |
| 181 | ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || | ||
| 182 | type == TRANS_USERSPACE)) | ||
| 183 | wait_current_trans(root); | 191 | wait_current_trans(root); |
| 192 | |||
| 184 | ret = join_transaction(root); | 193 | ret = join_transaction(root); |
| 185 | BUG_ON(ret); | 194 | BUG_ON(ret); |
| 186 | 195 | ||
| 187 | h->transid = root->fs_info->running_transaction->transid; | 196 | cur_trans = root->fs_info->running_transaction; |
| 188 | h->transaction = root->fs_info->running_transaction; | 197 | cur_trans->use_count++; |
| 189 | h->blocks_reserved = num_blocks; | 198 | mutex_unlock(&root->fs_info->trans_mutex); |
| 199 | |||
| 200 | h->transid = cur_trans->transid; | ||
| 201 | h->transaction = cur_trans; | ||
| 190 | h->blocks_used = 0; | 202 | h->blocks_used = 0; |
| 191 | h->block_group = 0; | 203 | h->block_group = 0; |
| 192 | h->alloc_exclude_nr = 0; | 204 | h->bytes_reserved = 0; |
| 193 | h->alloc_exclude_start = 0; | ||
| 194 | h->delayed_ref_updates = 0; | 205 | h->delayed_ref_updates = 0; |
| 206 | h->block_rsv = NULL; | ||
| 195 | 207 | ||
| 196 | if (!current->journal_info && type != TRANS_USERSPACE) | 208 | smp_mb(); |
| 197 | current->journal_info = h; | 209 | if (cur_trans->blocked && may_wait_transaction(root, type)) { |
| 210 | btrfs_commit_transaction(h, root); | ||
| 211 | goto again; | ||
| 212 | } | ||
| 213 | |||
| 214 | if (num_items > 0) { | ||
| 215 | ret = btrfs_trans_reserve_metadata(h, root, num_items, | ||
| 216 | &retries); | ||
| 217 | if (ret == -EAGAIN) { | ||
| 218 | btrfs_commit_transaction(h, root); | ||
| 219 | goto again; | ||
| 220 | } | ||
| 221 | if (ret < 0) { | ||
| 222 | btrfs_end_transaction(h, root); | ||
| 223 | return ERR_PTR(ret); | ||
| 224 | } | ||
| 225 | } | ||
| 198 | 226 | ||
| 199 | root->fs_info->running_transaction->use_count++; | 227 | mutex_lock(&root->fs_info->trans_mutex); |
| 200 | record_root_in_trans(h, root); | 228 | record_root_in_trans(h, root); |
| 201 | mutex_unlock(&root->fs_info->trans_mutex); | 229 | mutex_unlock(&root->fs_info->trans_mutex); |
| 230 | |||
| 231 | if (!current->journal_info && type != TRANS_USERSPACE) | ||
| 232 | current->journal_info = h; | ||
| 202 | return h; | 233 | return h; |
| 203 | } | 234 | } |
| 204 | 235 | ||
| 205 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | 236 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, |
| 206 | int num_blocks) | 237 | int num_items) |
| 207 | { | 238 | { |
| 208 | return start_transaction(root, num_blocks, TRANS_START); | 239 | return start_transaction(root, num_items, TRANS_START); |
| 209 | } | 240 | } |
| 210 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, | 241 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, |
| 211 | int num_blocks) | 242 | int num_blocks) |
| 212 | { | 243 | { |
| 213 | return start_transaction(root, num_blocks, TRANS_JOIN); | 244 | return start_transaction(root, 0, TRANS_JOIN); |
| 214 | } | 245 | } |
| 215 | 246 | ||
| 216 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, | 247 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, |
| 217 | int num_blocks) | 248 | int num_blocks) |
| 218 | { | 249 | { |
| 219 | return start_transaction(r, num_blocks, TRANS_USERSPACE); | 250 | return start_transaction(r, 0, TRANS_USERSPACE); |
| 220 | } | 251 | } |
| 221 | 252 | ||
| 222 | /* wait for a transaction commit to be fully complete */ | 253 | /* wait for a transaction commit to be fully complete */ |
| @@ -290,10 +321,36 @@ void btrfs_throttle(struct btrfs_root *root) | |||
| 290 | mutex_unlock(&root->fs_info->trans_mutex); | 321 | mutex_unlock(&root->fs_info->trans_mutex); |
| 291 | } | 322 | } |
| 292 | 323 | ||
| 324 | static int should_end_transaction(struct btrfs_trans_handle *trans, | ||
| 325 | struct btrfs_root *root) | ||
| 326 | { | ||
| 327 | int ret; | ||
| 328 | ret = btrfs_block_rsv_check(trans, root, | ||
| 329 | &root->fs_info->global_block_rsv, 0, 5); | ||
| 330 | return ret ? 1 : 0; | ||
| 331 | } | ||
| 332 | |||
| 333 | int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | ||
| 334 | struct btrfs_root *root) | ||
| 335 | { | ||
| 336 | struct btrfs_transaction *cur_trans = trans->transaction; | ||
| 337 | int updates; | ||
| 338 | |||
| 339 | if (cur_trans->blocked || cur_trans->delayed_refs.flushing) | ||
| 340 | return 1; | ||
| 341 | |||
| 342 | updates = trans->delayed_ref_updates; | ||
| 343 | trans->delayed_ref_updates = 0; | ||
| 344 | if (updates) | ||
| 345 | btrfs_run_delayed_refs(trans, root, updates); | ||
| 346 | |||
| 347 | return should_end_transaction(trans, root); | ||
| 348 | } | ||
| 349 | |||
| 293 | static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | 350 | static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, |
| 294 | struct btrfs_root *root, int throttle) | 351 | struct btrfs_root *root, int throttle) |
| 295 | { | 352 | { |
| 296 | struct btrfs_transaction *cur_trans; | 353 | struct btrfs_transaction *cur_trans = trans->transaction; |
| 297 | struct btrfs_fs_info *info = root->fs_info; | 354 | struct btrfs_fs_info *info = root->fs_info; |
| 298 | int count = 0; | 355 | int count = 0; |
| 299 | 356 | ||
| @@ -317,9 +374,21 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 317 | count++; | 374 | count++; |
| 318 | } | 375 | } |
| 319 | 376 | ||
| 377 | btrfs_trans_release_metadata(trans, root); | ||
| 378 | |||
| 379 | if (!root->fs_info->open_ioctl_trans && | ||
| 380 | should_end_transaction(trans, root)) | ||
| 381 | trans->transaction->blocked = 1; | ||
| 382 | |||
| 383 | if (cur_trans->blocked && !cur_trans->in_commit) { | ||
| 384 | if (throttle) | ||
| 385 | return btrfs_commit_transaction(trans, root); | ||
| 386 | else | ||
| 387 | wake_up_process(info->transaction_kthread); | ||
| 388 | } | ||
| 389 | |||
| 320 | mutex_lock(&info->trans_mutex); | 390 | mutex_lock(&info->trans_mutex); |
| 321 | cur_trans = info->running_transaction; | 391 | WARN_ON(cur_trans != info->running_transaction); |
| 322 | WARN_ON(cur_trans != trans->transaction); | ||
| 323 | WARN_ON(cur_trans->num_writers < 1); | 392 | WARN_ON(cur_trans->num_writers < 1); |
| 324 | cur_trans->num_writers--; | 393 | cur_trans->num_writers--; |
| 325 | 394 | ||
| @@ -607,6 +676,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
| 607 | 676 | ||
| 608 | btrfs_free_log(trans, root); | 677 | btrfs_free_log(trans, root); |
| 609 | btrfs_update_reloc_root(trans, root); | 678 | btrfs_update_reloc_root(trans, root); |
| 679 | btrfs_orphan_commit_root(trans, root); | ||
| 610 | 680 | ||
| 611 | if (root->commit_root != root->node) { | 681 | if (root->commit_root != root->node) { |
| 612 | switch_commit_root(root); | 682 | switch_commit_root(root); |
| @@ -631,30 +701,30 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
| 631 | int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) | 701 | int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) |
| 632 | { | 702 | { |
| 633 | struct btrfs_fs_info *info = root->fs_info; | 703 | struct btrfs_fs_info *info = root->fs_info; |
| 634 | int ret; | ||
| 635 | struct btrfs_trans_handle *trans; | 704 | struct btrfs_trans_handle *trans; |
| 705 | int ret; | ||
| 636 | unsigned long nr; | 706 | unsigned long nr; |
| 637 | 707 | ||
| 638 | smp_mb(); | 708 | if (xchg(&root->defrag_running, 1)) |
| 639 | if (root->defrag_running) | ||
| 640 | return 0; | 709 | return 0; |
| 641 | trans = btrfs_start_transaction(root, 1); | 710 | |
| 642 | while (1) { | 711 | while (1) { |
| 643 | root->defrag_running = 1; | 712 | trans = btrfs_start_transaction(root, 0); |
| 713 | if (IS_ERR(trans)) | ||
| 714 | return PTR_ERR(trans); | ||
| 715 | |||
| 644 | ret = btrfs_defrag_leaves(trans, root, cacheonly); | 716 | ret = btrfs_defrag_leaves(trans, root, cacheonly); |
| 717 | |||
| 645 | nr = trans->blocks_used; | 718 | nr = trans->blocks_used; |
| 646 | btrfs_end_transaction(trans, root); | 719 | btrfs_end_transaction(trans, root); |
| 647 | btrfs_btree_balance_dirty(info->tree_root, nr); | 720 | btrfs_btree_balance_dirty(info->tree_root, nr); |
| 648 | cond_resched(); | 721 | cond_resched(); |
| 649 | 722 | ||
| 650 | trans = btrfs_start_transaction(root, 1); | ||
| 651 | if (root->fs_info->closing || ret != -EAGAIN) | 723 | if (root->fs_info->closing || ret != -EAGAIN) |
| 652 | break; | 724 | break; |
| 653 | } | 725 | } |
| 654 | root->defrag_running = 0; | 726 | root->defrag_running = 0; |
| 655 | smp_mb(); | 727 | return ret; |
| 656 | btrfs_end_transaction(trans, root); | ||
| 657 | return 0; | ||
| 658 | } | 728 | } |
| 659 | 729 | ||
| 660 | #if 0 | 730 | #if 0 |
| @@ -760,29 +830,72 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 760 | struct btrfs_root_item *new_root_item; | 830 | struct btrfs_root_item *new_root_item; |
| 761 | struct btrfs_root *tree_root = fs_info->tree_root; | 831 | struct btrfs_root *tree_root = fs_info->tree_root; |
| 762 | struct btrfs_root *root = pending->root; | 832 | struct btrfs_root *root = pending->root; |
| 833 | struct btrfs_root *parent_root; | ||
| 834 | struct inode *parent_inode; | ||
| 835 | struct dentry *dentry; | ||
| 763 | struct extent_buffer *tmp; | 836 | struct extent_buffer *tmp; |
| 764 | struct extent_buffer *old; | 837 | struct extent_buffer *old; |
| 765 | int ret; | 838 | int ret; |
| 839 | int retries = 0; | ||
| 840 | u64 to_reserve = 0; | ||
| 841 | u64 index = 0; | ||
| 766 | u64 objectid; | 842 | u64 objectid; |
| 767 | 843 | ||
| 768 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); | 844 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); |
| 769 | if (!new_root_item) { | 845 | if (!new_root_item) { |
| 770 | ret = -ENOMEM; | 846 | pending->error = -ENOMEM; |
| 771 | goto fail; | 847 | goto fail; |
| 772 | } | 848 | } |
| 849 | |||
| 773 | ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); | 850 | ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); |
| 774 | if (ret) | 851 | if (ret) { |
| 852 | pending->error = ret; | ||
| 775 | goto fail; | 853 | goto fail; |
| 854 | } | ||
| 855 | |||
| 856 | btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); | ||
| 857 | btrfs_orphan_pre_snapshot(trans, pending, &to_reserve); | ||
| 858 | |||
| 859 | if (to_reserve > 0) { | ||
| 860 | ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv, | ||
| 861 | to_reserve, &retries); | ||
| 862 | if (ret) { | ||
| 863 | pending->error = ret; | ||
| 864 | goto fail; | ||
| 865 | } | ||
| 866 | } | ||
| 867 | |||
| 868 | key.objectid = objectid; | ||
| 869 | key.offset = (u64)-1; | ||
| 870 | key.type = BTRFS_ROOT_ITEM_KEY; | ||
| 871 | |||
| 872 | trans->block_rsv = &pending->block_rsv; | ||
| 873 | |||
| 874 | dentry = pending->dentry; | ||
| 875 | parent_inode = dentry->d_parent->d_inode; | ||
| 876 | parent_root = BTRFS_I(parent_inode)->root; | ||
| 877 | record_root_in_trans(trans, parent_root); | ||
| 878 | |||
| 879 | /* | ||
| 880 | * insert the directory item | ||
| 881 | */ | ||
| 882 | ret = btrfs_set_inode_index(parent_inode, &index); | ||
| 883 | BUG_ON(ret); | ||
| 884 | ret = btrfs_insert_dir_item(trans, parent_root, | ||
| 885 | dentry->d_name.name, dentry->d_name.len, | ||
| 886 | parent_inode->i_ino, &key, | ||
| 887 | BTRFS_FT_DIR, index); | ||
| 888 | BUG_ON(ret); | ||
| 889 | |||
| 890 | btrfs_i_size_write(parent_inode, parent_inode->i_size + | ||
| 891 | dentry->d_name.len * 2); | ||
| 892 | ret = btrfs_update_inode(trans, parent_root, parent_inode); | ||
| 893 | BUG_ON(ret); | ||
| 776 | 894 | ||
| 777 | record_root_in_trans(trans, root); | 895 | record_root_in_trans(trans, root); |
| 778 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); | 896 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid); |
| 779 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); | 897 | memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); |
| 780 | 898 | ||
| 781 | key.objectid = objectid; | ||
| 782 | /* record when the snapshot was created in key.offset */ | ||
| 783 | key.offset = trans->transid; | ||
| 784 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | ||
| 785 | |||
| 786 | old = btrfs_lock_root_node(root); | 899 | old = btrfs_lock_root_node(root); |
| 787 | btrfs_cow_block(trans, root, old, NULL, 0, &old); | 900 | btrfs_cow_block(trans, root, old, NULL, 0, &old); |
| 788 | btrfs_set_lock_blocking(old); | 901 | btrfs_set_lock_blocking(old); |
| @@ -792,62 +905,32 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 792 | free_extent_buffer(old); | 905 | free_extent_buffer(old); |
| 793 | 906 | ||
| 794 | btrfs_set_root_node(new_root_item, tmp); | 907 | btrfs_set_root_node(new_root_item, tmp); |
| 795 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, | 908 | /* record when the snapshot was created in key.offset */ |
| 796 | new_root_item); | 909 | key.offset = trans->transid; |
| 910 | ret = btrfs_insert_root(trans, tree_root, &key, new_root_item); | ||
| 797 | btrfs_tree_unlock(tmp); | 911 | btrfs_tree_unlock(tmp); |
| 798 | free_extent_buffer(tmp); | 912 | free_extent_buffer(tmp); |
| 799 | if (ret) | 913 | BUG_ON(ret); |
| 800 | goto fail; | ||
| 801 | |||
| 802 | key.offset = (u64)-1; | ||
| 803 | memcpy(&pending->root_key, &key, sizeof(key)); | ||
| 804 | fail: | ||
| 805 | kfree(new_root_item); | ||
| 806 | return ret; | ||
| 807 | } | ||
| 808 | |||
| 809 | static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info, | ||
| 810 | struct btrfs_pending_snapshot *pending) | ||
| 811 | { | ||
| 812 | int ret; | ||
| 813 | int namelen; | ||
| 814 | u64 index = 0; | ||
| 815 | struct btrfs_trans_handle *trans; | ||
| 816 | struct inode *parent_inode; | ||
| 817 | struct btrfs_root *parent_root; | ||
| 818 | |||
| 819 | parent_inode = pending->dentry->d_parent->d_inode; | ||
| 820 | parent_root = BTRFS_I(parent_inode)->root; | ||
| 821 | trans = btrfs_join_transaction(parent_root, 1); | ||
| 822 | 914 | ||
| 823 | /* | 915 | /* |
| 824 | * insert the directory item | 916 | * insert root back/forward references |
| 825 | */ | 917 | */ |
| 826 | namelen = strlen(pending->name); | 918 | ret = btrfs_add_root_ref(trans, tree_root, objectid, |
| 827 | ret = btrfs_set_inode_index(parent_inode, &index); | ||
| 828 | ret = btrfs_insert_dir_item(trans, parent_root, | ||
| 829 | pending->name, namelen, | ||
| 830 | parent_inode->i_ino, | ||
| 831 | &pending->root_key, BTRFS_FT_DIR, index); | ||
| 832 | |||
| 833 | if (ret) | ||
| 834 | goto fail; | ||
| 835 | |||
| 836 | btrfs_i_size_write(parent_inode, parent_inode->i_size + namelen * 2); | ||
| 837 | ret = btrfs_update_inode(trans, parent_root, parent_inode); | ||
| 838 | BUG_ON(ret); | ||
| 839 | |||
| 840 | ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, | ||
| 841 | pending->root_key.objectid, | ||
| 842 | parent_root->root_key.objectid, | 919 | parent_root->root_key.objectid, |
| 843 | parent_inode->i_ino, index, pending->name, | 920 | parent_inode->i_ino, index, |
| 844 | namelen); | 921 | dentry->d_name.name, dentry->d_name.len); |
| 845 | |||
| 846 | BUG_ON(ret); | 922 | BUG_ON(ret); |
| 847 | 923 | ||
| 924 | key.offset = (u64)-1; | ||
| 925 | pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); | ||
| 926 | BUG_ON(IS_ERR(pending->snap)); | ||
| 927 | |||
| 928 | btrfs_reloc_post_snapshot(trans, pending); | ||
| 929 | btrfs_orphan_post_snapshot(trans, pending); | ||
| 848 | fail: | 930 | fail: |
| 849 | btrfs_end_transaction(trans, fs_info->fs_root); | 931 | kfree(new_root_item); |
| 850 | return ret; | 932 | btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1); |
| 933 | return 0; | ||
| 851 | } | 934 | } |
| 852 | 935 | ||
| 853 | /* | 936 | /* |
| @@ -867,25 +950,6 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, | |||
| 867 | return 0; | 950 | return 0; |
| 868 | } | 951 | } |
| 869 | 952 | ||
| 870 | static noinline int finish_pending_snapshots(struct btrfs_trans_handle *trans, | ||
| 871 | struct btrfs_fs_info *fs_info) | ||
| 872 | { | ||
| 873 | struct btrfs_pending_snapshot *pending; | ||
| 874 | struct list_head *head = &trans->transaction->pending_snapshots; | ||
| 875 | int ret; | ||
| 876 | |||
| 877 | while (!list_empty(head)) { | ||
| 878 | pending = list_entry(head->next, | ||
| 879 | struct btrfs_pending_snapshot, list); | ||
| 880 | ret = finish_pending_snapshot(fs_info, pending); | ||
| 881 | BUG_ON(ret); | ||
| 882 | list_del(&pending->list); | ||
| 883 | kfree(pending->name); | ||
| 884 | kfree(pending); | ||
| 885 | } | ||
| 886 | return 0; | ||
| 887 | } | ||
| 888 | |||
| 889 | static void update_super_roots(struct btrfs_root *root) | 953 | static void update_super_roots(struct btrfs_root *root) |
| 890 | { | 954 | { |
| 891 | struct btrfs_root_item *root_item; | 955 | struct btrfs_root_item *root_item; |
| @@ -914,6 +978,16 @@ int btrfs_transaction_in_commit(struct btrfs_fs_info *info) | |||
| 914 | return ret; | 978 | return ret; |
| 915 | } | 979 | } |
| 916 | 980 | ||
| 981 | int btrfs_transaction_blocked(struct btrfs_fs_info *info) | ||
| 982 | { | ||
| 983 | int ret = 0; | ||
| 984 | spin_lock(&info->new_trans_lock); | ||
| 985 | if (info->running_transaction) | ||
| 986 | ret = info->running_transaction->blocked; | ||
| 987 | spin_unlock(&info->new_trans_lock); | ||
| 988 | return ret; | ||
| 989 | } | ||
| 990 | |||
| 917 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | 991 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, |
| 918 | struct btrfs_root *root) | 992 | struct btrfs_root *root) |
| 919 | { | 993 | { |
| @@ -935,6 +1009,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 935 | ret = btrfs_run_delayed_refs(trans, root, 0); | 1009 | ret = btrfs_run_delayed_refs(trans, root, 0); |
| 936 | BUG_ON(ret); | 1010 | BUG_ON(ret); |
| 937 | 1011 | ||
| 1012 | btrfs_trans_release_metadata(trans, root); | ||
| 1013 | |||
| 938 | cur_trans = trans->transaction; | 1014 | cur_trans = trans->transaction; |
| 939 | /* | 1015 | /* |
| 940 | * set the flushing flag so procs in this transaction have to | 1016 | * set the flushing flag so procs in this transaction have to |
| @@ -987,9 +1063,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 987 | snap_pending = 1; | 1063 | snap_pending = 1; |
| 988 | 1064 | ||
| 989 | WARN_ON(cur_trans != trans->transaction); | 1065 | WARN_ON(cur_trans != trans->transaction); |
| 990 | prepare_to_wait(&cur_trans->writer_wait, &wait, | ||
| 991 | TASK_UNINTERRUPTIBLE); | ||
| 992 | |||
| 993 | if (cur_trans->num_writers > 1) | 1066 | if (cur_trans->num_writers > 1) |
| 994 | timeout = MAX_SCHEDULE_TIMEOUT; | 1067 | timeout = MAX_SCHEDULE_TIMEOUT; |
| 995 | else if (should_grow) | 1068 | else if (should_grow) |
| @@ -1012,6 +1085,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1012 | */ | 1085 | */ |
| 1013 | btrfs_run_ordered_operations(root, 1); | 1086 | btrfs_run_ordered_operations(root, 1); |
| 1014 | 1087 | ||
| 1088 | prepare_to_wait(&cur_trans->writer_wait, &wait, | ||
| 1089 | TASK_UNINTERRUPTIBLE); | ||
| 1090 | |||
| 1015 | smp_mb(); | 1091 | smp_mb(); |
| 1016 | if (cur_trans->num_writers > 1 || should_grow) | 1092 | if (cur_trans->num_writers > 1 || should_grow) |
| 1017 | schedule_timeout(timeout); | 1093 | schedule_timeout(timeout); |
| @@ -1097,9 +1173,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 1097 | 1173 | ||
| 1098 | btrfs_finish_extent_commit(trans, root); | 1174 | btrfs_finish_extent_commit(trans, root); |
| 1099 | 1175 | ||
| 1100 | /* do the directory inserts of any pending snapshot creations */ | ||
| 1101 | finish_pending_snapshots(trans, root->fs_info); | ||
| 1102 | |||
| 1103 | mutex_lock(&root->fs_info->trans_mutex); | 1176 | mutex_lock(&root->fs_info->trans_mutex); |
| 1104 | 1177 | ||
| 1105 | cur_trans->commit_done = 1; | 1178 | cur_trans->commit_done = 1; |
| @@ -1142,9 +1215,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) | |||
| 1142 | 1215 | ||
| 1143 | if (btrfs_header_backref_rev(root->node) < | 1216 | if (btrfs_header_backref_rev(root->node) < |
| 1144 | BTRFS_MIXED_BACKREF_REV) | 1217 | BTRFS_MIXED_BACKREF_REV) |
| 1145 | btrfs_drop_snapshot(root, 0); | 1218 | btrfs_drop_snapshot(root, NULL, 0); |
| 1146 | else | 1219 | else |
| 1147 | btrfs_drop_snapshot(root, 1); | 1220 | btrfs_drop_snapshot(root, NULL, 1); |
| 1148 | } | 1221 | } |
| 1149 | return 0; | 1222 | return 0; |
| 1150 | } | 1223 | } |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 93c7ccb33118..e104986d0bfd 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
| @@ -45,20 +45,23 @@ struct btrfs_transaction { | |||
| 45 | 45 | ||
| 46 | struct btrfs_trans_handle { | 46 | struct btrfs_trans_handle { |
| 47 | u64 transid; | 47 | u64 transid; |
| 48 | u64 block_group; | ||
| 49 | u64 bytes_reserved; | ||
| 48 | unsigned long blocks_reserved; | 50 | unsigned long blocks_reserved; |
| 49 | unsigned long blocks_used; | 51 | unsigned long blocks_used; |
| 50 | struct btrfs_transaction *transaction; | ||
| 51 | u64 block_group; | ||
| 52 | u64 alloc_exclude_start; | ||
| 53 | u64 alloc_exclude_nr; | ||
| 54 | unsigned long delayed_ref_updates; | 52 | unsigned long delayed_ref_updates; |
| 53 | struct btrfs_transaction *transaction; | ||
| 54 | struct btrfs_block_rsv *block_rsv; | ||
| 55 | }; | 55 | }; |
| 56 | 56 | ||
| 57 | struct btrfs_pending_snapshot { | 57 | struct btrfs_pending_snapshot { |
| 58 | struct dentry *dentry; | 58 | struct dentry *dentry; |
| 59 | struct btrfs_root *root; | 59 | struct btrfs_root *root; |
| 60 | char *name; | 60 | struct btrfs_root *snap; |
| 61 | struct btrfs_key root_key; | 61 | /* block reservation for the operation */ |
| 62 | struct btrfs_block_rsv block_rsv; | ||
| 63 | /* extra metadata reseration for relocation */ | ||
| 64 | int error; | ||
| 62 | struct list_head list; | 65 | struct list_head list; |
| 63 | }; | 66 | }; |
| 64 | 67 | ||
| @@ -85,11 +88,11 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, | |||
| 85 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, | 88 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, |
| 86 | struct btrfs_root *root); | 89 | struct btrfs_root *root); |
| 87 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | 90 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, |
| 88 | int num_blocks); | 91 | int num_items); |
| 89 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, | 92 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, |
| 90 | int num_blocks); | 93 | int num_blocks); |
| 91 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, | 94 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, |
| 92 | int num_blocks); | 95 | int num_blocks); |
| 93 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | 96 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, |
| 94 | struct btrfs_root *root); | 97 | struct btrfs_root *root); |
| 95 | int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, | 98 | int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, |
| @@ -103,6 +106,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 103 | struct btrfs_root *root); | 106 | struct btrfs_root *root); |
| 104 | int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, | 107 | int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, |
| 105 | struct btrfs_root *root); | 108 | struct btrfs_root *root); |
| 109 | int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | ||
| 110 | struct btrfs_root *root); | ||
| 106 | void btrfs_throttle(struct btrfs_root *root); | 111 | void btrfs_throttle(struct btrfs_root *root); |
| 107 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, | 112 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, |
| 108 | struct btrfs_root *root); | 113 | struct btrfs_root *root); |
| @@ -112,5 +117,6 @@ int btrfs_write_marked_extents(struct btrfs_root *root, | |||
| 112 | struct extent_io_tree *dirty_pages, int mark); | 117 | struct extent_io_tree *dirty_pages, int mark); |
| 113 | int btrfs_wait_marked_extents(struct btrfs_root *root, | 118 | int btrfs_wait_marked_extents(struct btrfs_root *root, |
| 114 | struct extent_io_tree *dirty_pages, int mark); | 119 | struct extent_io_tree *dirty_pages, int mark); |
| 120 | int btrfs_transaction_blocked(struct btrfs_fs_info *info); | ||
| 115 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info); | 121 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info); |
| 116 | #endif | 122 | #endif |
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index b10eacdb1620..f7ac8e013ed7 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c | |||
| @@ -117,13 +117,14 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | |||
| 117 | path->nodes[1], 0, | 117 | path->nodes[1], 0, |
| 118 | cache_only, &last_ret, | 118 | cache_only, &last_ret, |
| 119 | &root->defrag_progress); | 119 | &root->defrag_progress); |
| 120 | WARN_ON(ret && ret != -EAGAIN); | 120 | if (ret) { |
| 121 | WARN_ON(ret == -EAGAIN); | ||
| 122 | goto out; | ||
| 123 | } | ||
| 121 | if (next_key_ret == 0) { | 124 | if (next_key_ret == 0) { |
| 122 | memcpy(&root->defrag_progress, &key, sizeof(key)); | 125 | memcpy(&root->defrag_progress, &key, sizeof(key)); |
| 123 | ret = -EAGAIN; | 126 | ret = -EAGAIN; |
| 124 | } | 127 | } |
| 125 | |||
| 126 | btrfs_release_path(root, path); | ||
| 127 | out: | 128 | out: |
| 128 | if (path) | 129 | if (path) |
| 129 | btrfs_free_path(path); | 130 | btrfs_free_path(path); |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1255fcc8ade5..fb102a9aee9c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | */ | 17 | */ |
| 18 | 18 | ||
| 19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
| 20 | #include <linux/slab.h> | ||
| 20 | #include "ctree.h" | 21 | #include "ctree.h" |
| 21 | #include "transaction.h" | 22 | #include "transaction.h" |
| 22 | #include "disk-io.h" | 23 | #include "disk-io.h" |
| @@ -134,6 +135,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
| 134 | struct btrfs_root *root) | 135 | struct btrfs_root *root) |
| 135 | { | 136 | { |
| 136 | int ret; | 137 | int ret; |
| 138 | int err = 0; | ||
| 137 | 139 | ||
| 138 | mutex_lock(&root->log_mutex); | 140 | mutex_lock(&root->log_mutex); |
| 139 | if (root->log_root) { | 141 | if (root->log_root) { |
| @@ -154,17 +156,19 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
| 154 | mutex_lock(&root->fs_info->tree_log_mutex); | 156 | mutex_lock(&root->fs_info->tree_log_mutex); |
| 155 | if (!root->fs_info->log_root_tree) { | 157 | if (!root->fs_info->log_root_tree) { |
| 156 | ret = btrfs_init_log_root_tree(trans, root->fs_info); | 158 | ret = btrfs_init_log_root_tree(trans, root->fs_info); |
| 157 | BUG_ON(ret); | 159 | if (ret) |
| 160 | err = ret; | ||
| 158 | } | 161 | } |
| 159 | if (!root->log_root) { | 162 | if (err == 0 && !root->log_root) { |
| 160 | ret = btrfs_add_log_tree(trans, root); | 163 | ret = btrfs_add_log_tree(trans, root); |
| 161 | BUG_ON(ret); | 164 | if (ret) |
| 165 | err = ret; | ||
| 162 | } | 166 | } |
| 163 | mutex_unlock(&root->fs_info->tree_log_mutex); | 167 | mutex_unlock(&root->fs_info->tree_log_mutex); |
| 164 | root->log_batch++; | 168 | root->log_batch++; |
| 165 | atomic_inc(&root->log_writers); | 169 | atomic_inc(&root->log_writers); |
| 166 | mutex_unlock(&root->log_mutex); | 170 | mutex_unlock(&root->log_mutex); |
| 167 | return 0; | 171 | return err; |
| 168 | } | 172 | } |
| 169 | 173 | ||
| 170 | /* | 174 | /* |
| @@ -375,7 +379,7 @@ insert: | |||
| 375 | BUG_ON(ret); | 379 | BUG_ON(ret); |
| 376 | } | 380 | } |
| 377 | } else if (ret) { | 381 | } else if (ret) { |
| 378 | BUG(); | 382 | return ret; |
| 379 | } | 383 | } |
| 380 | dst_ptr = btrfs_item_ptr_offset(path->nodes[0], | 384 | dst_ptr = btrfs_item_ptr_offset(path->nodes[0], |
| 381 | path->slots[0]); | 385 | path->slots[0]); |
| @@ -1698,9 +1702,9 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
| 1698 | 1702 | ||
| 1699 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); | 1703 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); |
| 1700 | 1704 | ||
| 1701 | wc->process_func(root, next, wc, ptr_gen); | ||
| 1702 | |||
| 1703 | if (*level == 1) { | 1705 | if (*level == 1) { |
| 1706 | wc->process_func(root, next, wc, ptr_gen); | ||
| 1707 | |||
| 1704 | path->slots[*level]++; | 1708 | path->slots[*level]++; |
| 1705 | if (wc->free) { | 1709 | if (wc->free) { |
| 1706 | btrfs_read_buffer(next, ptr_gen); | 1710 | btrfs_read_buffer(next, ptr_gen); |
| @@ -1733,35 +1737,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
| 1733 | WARN_ON(*level < 0); | 1737 | WARN_ON(*level < 0); |
| 1734 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | 1738 | WARN_ON(*level >= BTRFS_MAX_LEVEL); |
| 1735 | 1739 | ||
| 1736 | if (path->nodes[*level] == root->node) | 1740 | path->slots[*level] = btrfs_header_nritems(path->nodes[*level]); |
| 1737 | parent = path->nodes[*level]; | ||
| 1738 | else | ||
| 1739 | parent = path->nodes[*level + 1]; | ||
| 1740 | |||
| 1741 | bytenr = path->nodes[*level]->start; | ||
| 1742 | |||
| 1743 | blocksize = btrfs_level_size(root, *level); | ||
| 1744 | root_owner = btrfs_header_owner(parent); | ||
| 1745 | root_gen = btrfs_header_generation(parent); | ||
| 1746 | |||
| 1747 | wc->process_func(root, path->nodes[*level], wc, | ||
| 1748 | btrfs_header_generation(path->nodes[*level])); | ||
| 1749 | |||
| 1750 | if (wc->free) { | ||
| 1751 | next = path->nodes[*level]; | ||
| 1752 | btrfs_tree_lock(next); | ||
| 1753 | clean_tree_block(trans, root, next); | ||
| 1754 | btrfs_set_lock_blocking(next); | ||
| 1755 | btrfs_wait_tree_block_writeback(next); | ||
| 1756 | btrfs_tree_unlock(next); | ||
| 1757 | |||
| 1758 | WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); | ||
| 1759 | ret = btrfs_free_reserved_extent(root, bytenr, blocksize); | ||
| 1760 | BUG_ON(ret); | ||
| 1761 | } | ||
| 1762 | free_extent_buffer(path->nodes[*level]); | ||
| 1763 | path->nodes[*level] = NULL; | ||
| 1764 | *level += 1; | ||
| 1765 | 1741 | ||
| 1766 | cond_resched(); | 1742 | cond_resched(); |
| 1767 | return 0; | 1743 | return 0; |
| @@ -1780,7 +1756,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, | |||
| 1780 | 1756 | ||
| 1781 | for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { | 1757 | for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { |
| 1782 | slot = path->slots[i]; | 1758 | slot = path->slots[i]; |
| 1783 | if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { | 1759 | if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { |
| 1784 | struct extent_buffer *node; | 1760 | struct extent_buffer *node; |
| 1785 | node = path->nodes[i]; | 1761 | node = path->nodes[i]; |
| 1786 | path->slots[i]++; | 1762 | path->slots[i]++; |
| @@ -2046,7 +2022,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2046 | mutex_unlock(&log_root_tree->log_mutex); | 2022 | mutex_unlock(&log_root_tree->log_mutex); |
| 2047 | 2023 | ||
| 2048 | ret = update_log_root(trans, log); | 2024 | ret = update_log_root(trans, log); |
| 2049 | BUG_ON(ret); | ||
| 2050 | 2025 | ||
| 2051 | mutex_lock(&log_root_tree->log_mutex); | 2026 | mutex_lock(&log_root_tree->log_mutex); |
| 2052 | if (atomic_dec_and_test(&log_root_tree->log_writers)) { | 2027 | if (atomic_dec_and_test(&log_root_tree->log_writers)) { |
| @@ -2055,6 +2030,15 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2055 | wake_up(&log_root_tree->log_writer_wait); | 2030 | wake_up(&log_root_tree->log_writer_wait); |
| 2056 | } | 2031 | } |
| 2057 | 2032 | ||
| 2033 | if (ret) { | ||
| 2034 | BUG_ON(ret != -ENOSPC); | ||
| 2035 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
| 2036 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | ||
| 2037 | mutex_unlock(&log_root_tree->log_mutex); | ||
| 2038 | ret = -EAGAIN; | ||
| 2039 | goto out; | ||
| 2040 | } | ||
| 2041 | |||
| 2058 | index2 = log_root_tree->log_transid % 2; | 2042 | index2 = log_root_tree->log_transid % 2; |
| 2059 | if (atomic_read(&log_root_tree->log_commit[index2])) { | 2043 | if (atomic_read(&log_root_tree->log_commit[index2])) { |
| 2060 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2044 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
| @@ -2128,15 +2112,10 @@ out: | |||
| 2128 | return 0; | 2112 | return 0; |
| 2129 | } | 2113 | } |
| 2130 | 2114 | ||
| 2131 | /* | 2115 | static void free_log_tree(struct btrfs_trans_handle *trans, |
| 2132 | * free all the extents used by the tree log. This should be called | 2116 | struct btrfs_root *log) |
| 2133 | * at commit time of the full transaction | ||
| 2134 | */ | ||
| 2135 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | ||
| 2136 | { | 2117 | { |
| 2137 | int ret; | 2118 | int ret; |
| 2138 | struct btrfs_root *log; | ||
| 2139 | struct key; | ||
| 2140 | u64 start; | 2119 | u64 start; |
| 2141 | u64 end; | 2120 | u64 end; |
| 2142 | struct walk_control wc = { | 2121 | struct walk_control wc = { |
| @@ -2144,10 +2123,6 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | |||
| 2144 | .process_func = process_one_buffer | 2123 | .process_func = process_one_buffer |
| 2145 | }; | 2124 | }; |
| 2146 | 2125 | ||
| 2147 | if (!root->log_root || root->fs_info->log_root_recovering) | ||
| 2148 | return 0; | ||
| 2149 | |||
| 2150 | log = root->log_root; | ||
| 2151 | ret = walk_log_tree(trans, log, &wc); | 2126 | ret = walk_log_tree(trans, log, &wc); |
| 2152 | BUG_ON(ret); | 2127 | BUG_ON(ret); |
| 2153 | 2128 | ||
| @@ -2161,14 +2136,30 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | |||
| 2161 | EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); | 2136 | EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); |
| 2162 | } | 2137 | } |
| 2163 | 2138 | ||
| 2164 | if (log->log_transid > 0) { | ||
| 2165 | ret = btrfs_del_root(trans, root->fs_info->log_root_tree, | ||
| 2166 | &log->root_key); | ||
| 2167 | BUG_ON(ret); | ||
| 2168 | } | ||
| 2169 | root->log_root = NULL; | ||
| 2170 | free_extent_buffer(log->node); | 2139 | free_extent_buffer(log->node); |
| 2171 | kfree(log); | 2140 | kfree(log); |
| 2141 | } | ||
| 2142 | |||
| 2143 | /* | ||
| 2144 | * free all the extents used by the tree log. This should be called | ||
| 2145 | * at commit time of the full transaction | ||
| 2146 | */ | ||
| 2147 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | ||
| 2148 | { | ||
| 2149 | if (root->log_root) { | ||
| 2150 | free_log_tree(trans, root->log_root); | ||
| 2151 | root->log_root = NULL; | ||
| 2152 | } | ||
| 2153 | return 0; | ||
| 2154 | } | ||
| 2155 | |||
| 2156 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | ||
| 2157 | struct btrfs_fs_info *fs_info) | ||
| 2158 | { | ||
| 2159 | if (fs_info->log_root_tree) { | ||
| 2160 | free_log_tree(trans, fs_info->log_root_tree); | ||
| 2161 | fs_info->log_root_tree = NULL; | ||
| 2162 | } | ||
| 2172 | return 0; | 2163 | return 0; |
| 2173 | } | 2164 | } |
| 2174 | 2165 | ||
| @@ -2202,6 +2193,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
| 2202 | struct btrfs_dir_item *di; | 2193 | struct btrfs_dir_item *di; |
| 2203 | struct btrfs_path *path; | 2194 | struct btrfs_path *path; |
| 2204 | int ret; | 2195 | int ret; |
| 2196 | int err = 0; | ||
| 2205 | int bytes_del = 0; | 2197 | int bytes_del = 0; |
| 2206 | 2198 | ||
| 2207 | if (BTRFS_I(dir)->logged_trans < trans->transid) | 2199 | if (BTRFS_I(dir)->logged_trans < trans->transid) |
| @@ -2217,7 +2209,11 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
| 2217 | path = btrfs_alloc_path(); | 2209 | path = btrfs_alloc_path(); |
| 2218 | di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, | 2210 | di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, |
| 2219 | name, name_len, -1); | 2211 | name, name_len, -1); |
| 2220 | if (di && !IS_ERR(di)) { | 2212 | if (IS_ERR(di)) { |
| 2213 | err = PTR_ERR(di); | ||
| 2214 | goto fail; | ||
| 2215 | } | ||
| 2216 | if (di) { | ||
| 2221 | ret = btrfs_delete_one_dir_name(trans, log, path, di); | 2217 | ret = btrfs_delete_one_dir_name(trans, log, path, di); |
| 2222 | bytes_del += name_len; | 2218 | bytes_del += name_len; |
| 2223 | BUG_ON(ret); | 2219 | BUG_ON(ret); |
| @@ -2225,7 +2221,11 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
| 2225 | btrfs_release_path(log, path); | 2221 | btrfs_release_path(log, path); |
| 2226 | di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino, | 2222 | di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino, |
| 2227 | index, name, name_len, -1); | 2223 | index, name, name_len, -1); |
| 2228 | if (di && !IS_ERR(di)) { | 2224 | if (IS_ERR(di)) { |
| 2225 | err = PTR_ERR(di); | ||
| 2226 | goto fail; | ||
| 2227 | } | ||
| 2228 | if (di) { | ||
| 2229 | ret = btrfs_delete_one_dir_name(trans, log, path, di); | 2229 | ret = btrfs_delete_one_dir_name(trans, log, path, di); |
| 2230 | bytes_del += name_len; | 2230 | bytes_del += name_len; |
| 2231 | BUG_ON(ret); | 2231 | BUG_ON(ret); |
| @@ -2243,6 +2243,10 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
| 2243 | btrfs_release_path(log, path); | 2243 | btrfs_release_path(log, path); |
| 2244 | 2244 | ||
| 2245 | ret = btrfs_search_slot(trans, log, &key, path, 0, 1); | 2245 | ret = btrfs_search_slot(trans, log, &key, path, 0, 1); |
| 2246 | if (ret < 0) { | ||
| 2247 | err = ret; | ||
| 2248 | goto fail; | ||
| 2249 | } | ||
| 2246 | if (ret == 0) { | 2250 | if (ret == 0) { |
| 2247 | struct btrfs_inode_item *item; | 2251 | struct btrfs_inode_item *item; |
| 2248 | u64 i_size; | 2252 | u64 i_size; |
| @@ -2260,9 +2264,13 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
| 2260 | ret = 0; | 2264 | ret = 0; |
| 2261 | btrfs_release_path(log, path); | 2265 | btrfs_release_path(log, path); |
| 2262 | } | 2266 | } |
| 2263 | 2267 | fail: | |
| 2264 | btrfs_free_path(path); | 2268 | btrfs_free_path(path); |
| 2265 | mutex_unlock(&BTRFS_I(dir)->log_mutex); | 2269 | mutex_unlock(&BTRFS_I(dir)->log_mutex); |
| 2270 | if (ret == -ENOSPC) { | ||
| 2271 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
| 2272 | ret = 0; | ||
| 2273 | } | ||
| 2266 | btrfs_end_log_trans(root); | 2274 | btrfs_end_log_trans(root); |
| 2267 | 2275 | ||
| 2268 | return 0; | 2276 | return 0; |
| @@ -2290,6 +2298,10 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, | |||
| 2290 | ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, | 2298 | ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, |
| 2291 | dirid, &index); | 2299 | dirid, &index); |
| 2292 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 2300 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
| 2301 | if (ret == -ENOSPC) { | ||
| 2302 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
| 2303 | ret = 0; | ||
| 2304 | } | ||
| 2293 | btrfs_end_log_trans(root); | 2305 | btrfs_end_log_trans(root); |
| 2294 | 2306 | ||
| 2295 | return ret; | 2307 | return ret; |
| @@ -2317,7 +2329,8 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, | |||
| 2317 | else | 2329 | else |
| 2318 | key.type = BTRFS_DIR_LOG_INDEX_KEY; | 2330 | key.type = BTRFS_DIR_LOG_INDEX_KEY; |
| 2319 | ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item)); | 2331 | ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item)); |
| 2320 | BUG_ON(ret); | 2332 | if (ret) |
| 2333 | return ret; | ||
| 2321 | 2334 | ||
| 2322 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], | 2335 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], |
| 2323 | struct btrfs_dir_log_item); | 2336 | struct btrfs_dir_log_item); |
| @@ -2342,6 +2355,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
| 2342 | struct btrfs_key max_key; | 2355 | struct btrfs_key max_key; |
| 2343 | struct btrfs_root *log = root->log_root; | 2356 | struct btrfs_root *log = root->log_root; |
| 2344 | struct extent_buffer *src; | 2357 | struct extent_buffer *src; |
| 2358 | int err = 0; | ||
| 2345 | int ret; | 2359 | int ret; |
| 2346 | int i; | 2360 | int i; |
| 2347 | int nritems; | 2361 | int nritems; |
| @@ -2404,6 +2418,10 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
| 2404 | ret = overwrite_item(trans, log, dst_path, | 2418 | ret = overwrite_item(trans, log, dst_path, |
| 2405 | path->nodes[0], path->slots[0], | 2419 | path->nodes[0], path->slots[0], |
| 2406 | &tmp); | 2420 | &tmp); |
| 2421 | if (ret) { | ||
| 2422 | err = ret; | ||
| 2423 | goto done; | ||
| 2424 | } | ||
| 2407 | } | 2425 | } |
| 2408 | } | 2426 | } |
| 2409 | btrfs_release_path(root, path); | 2427 | btrfs_release_path(root, path); |
| @@ -2431,7 +2449,10 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
| 2431 | goto done; | 2449 | goto done; |
| 2432 | ret = overwrite_item(trans, log, dst_path, src, i, | 2450 | ret = overwrite_item(trans, log, dst_path, src, i, |
| 2433 | &min_key); | 2451 | &min_key); |
| 2434 | BUG_ON(ret); | 2452 | if (ret) { |
| 2453 | err = ret; | ||
| 2454 | goto done; | ||
| 2455 | } | ||
| 2435 | } | 2456 | } |
| 2436 | path->slots[0] = nritems; | 2457 | path->slots[0] = nritems; |
| 2437 | 2458 | ||
| @@ -2453,22 +2474,30 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
| 2453 | ret = overwrite_item(trans, log, dst_path, | 2474 | ret = overwrite_item(trans, log, dst_path, |
| 2454 | path->nodes[0], path->slots[0], | 2475 | path->nodes[0], path->slots[0], |
| 2455 | &tmp); | 2476 | &tmp); |
| 2456 | 2477 | if (ret) | |
| 2457 | BUG_ON(ret); | 2478 | err = ret; |
| 2458 | last_offset = tmp.offset; | 2479 | else |
| 2480 | last_offset = tmp.offset; | ||
| 2459 | goto done; | 2481 | goto done; |
| 2460 | } | 2482 | } |
| 2461 | } | 2483 | } |
| 2462 | done: | 2484 | done: |
| 2463 | *last_offset_ret = last_offset; | ||
| 2464 | btrfs_release_path(root, path); | 2485 | btrfs_release_path(root, path); |
| 2465 | btrfs_release_path(log, dst_path); | 2486 | btrfs_release_path(log, dst_path); |
| 2466 | 2487 | ||
| 2467 | /* insert the log range keys to indicate where the log is valid */ | 2488 | if (err == 0) { |
| 2468 | ret = insert_dir_log_key(trans, log, path, key_type, inode->i_ino, | 2489 | *last_offset_ret = last_offset; |
| 2469 | first_offset, last_offset); | 2490 | /* |
| 2470 | BUG_ON(ret); | 2491 | * insert the log range keys to indicate where the log |
| 2471 | return 0; | 2492 | * is valid |
| 2493 | */ | ||
| 2494 | ret = insert_dir_log_key(trans, log, path, key_type, | ||
| 2495 | inode->i_ino, first_offset, | ||
| 2496 | last_offset); | ||
| 2497 | if (ret) | ||
| 2498 | err = ret; | ||
| 2499 | } | ||
| 2500 | return err; | ||
| 2472 | } | 2501 | } |
| 2473 | 2502 | ||
| 2474 | /* | 2503 | /* |
| @@ -2500,7 +2529,8 @@ again: | |||
| 2500 | ret = log_dir_items(trans, root, inode, path, | 2529 | ret = log_dir_items(trans, root, inode, path, |
| 2501 | dst_path, key_type, min_key, | 2530 | dst_path, key_type, min_key, |
| 2502 | &max_key); | 2531 | &max_key); |
| 2503 | BUG_ON(ret); | 2532 | if (ret) |
| 2533 | return ret; | ||
| 2504 | if (max_key == (u64)-1) | 2534 | if (max_key == (u64)-1) |
| 2505 | break; | 2535 | break; |
| 2506 | min_key = max_key + 1; | 2536 | min_key = max_key + 1; |
| @@ -2534,8 +2564,8 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
| 2534 | 2564 | ||
| 2535 | while (1) { | 2565 | while (1) { |
| 2536 | ret = btrfs_search_slot(trans, log, &key, path, -1, 1); | 2566 | ret = btrfs_search_slot(trans, log, &key, path, -1, 1); |
| 2537 | 2567 | BUG_ON(ret == 0); | |
| 2538 | if (ret != 1) | 2568 | if (ret < 0) |
| 2539 | break; | 2569 | break; |
| 2540 | 2570 | ||
| 2541 | if (path->slots[0] == 0) | 2571 | if (path->slots[0] == 0) |
| @@ -2553,7 +2583,7 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
| 2553 | btrfs_release_path(log, path); | 2583 | btrfs_release_path(log, path); |
| 2554 | } | 2584 | } |
| 2555 | btrfs_release_path(log, path); | 2585 | btrfs_release_path(log, path); |
| 2556 | return 0; | 2586 | return ret; |
| 2557 | } | 2587 | } |
| 2558 | 2588 | ||
| 2559 | static noinline int copy_items(struct btrfs_trans_handle *trans, | 2589 | static noinline int copy_items(struct btrfs_trans_handle *trans, |
| @@ -2586,7 +2616,10 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
| 2586 | } | 2616 | } |
| 2587 | ret = btrfs_insert_empty_items(trans, log, dst_path, | 2617 | ret = btrfs_insert_empty_items(trans, log, dst_path, |
| 2588 | ins_keys, ins_sizes, nr); | 2618 | ins_keys, ins_sizes, nr); |
| 2589 | BUG_ON(ret); | 2619 | if (ret) { |
| 2620 | kfree(ins_data); | ||
| 2621 | return ret; | ||
| 2622 | } | ||
| 2590 | 2623 | ||
| 2591 | for (i = 0; i < nr; i++, dst_path->slots[0]++) { | 2624 | for (i = 0; i < nr; i++, dst_path->slots[0]++) { |
| 2592 | dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], | 2625 | dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], |
| @@ -2659,16 +2692,17 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
| 2659 | * we have to do this after the loop above to avoid changing the | 2692 | * we have to do this after the loop above to avoid changing the |
| 2660 | * log tree while trying to change the log tree. | 2693 | * log tree while trying to change the log tree. |
| 2661 | */ | 2694 | */ |
| 2695 | ret = 0; | ||
| 2662 | while (!list_empty(&ordered_sums)) { | 2696 | while (!list_empty(&ordered_sums)) { |
| 2663 | struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, | 2697 | struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, |
| 2664 | struct btrfs_ordered_sum, | 2698 | struct btrfs_ordered_sum, |
| 2665 | list); | 2699 | list); |
| 2666 | ret = btrfs_csum_file_blocks(trans, log, sums); | 2700 | if (!ret) |
| 2667 | BUG_ON(ret); | 2701 | ret = btrfs_csum_file_blocks(trans, log, sums); |
| 2668 | list_del(&sums->list); | 2702 | list_del(&sums->list); |
| 2669 | kfree(sums); | 2703 | kfree(sums); |
| 2670 | } | 2704 | } |
| 2671 | return 0; | 2705 | return ret; |
| 2672 | } | 2706 | } |
| 2673 | 2707 | ||
| 2674 | /* log a single inode in the tree log. | 2708 | /* log a single inode in the tree log. |
| @@ -2696,6 +2730,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
| 2696 | struct btrfs_root *log = root->log_root; | 2730 | struct btrfs_root *log = root->log_root; |
| 2697 | struct extent_buffer *src = NULL; | 2731 | struct extent_buffer *src = NULL; |
| 2698 | u32 size; | 2732 | u32 size; |
| 2733 | int err = 0; | ||
| 2699 | int ret; | 2734 | int ret; |
| 2700 | int nritems; | 2735 | int nritems; |
| 2701 | int ins_start_slot = 0; | 2736 | int ins_start_slot = 0; |
| @@ -2738,7 +2773,10 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
| 2738 | } else { | 2773 | } else { |
| 2739 | ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); | 2774 | ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); |
| 2740 | } | 2775 | } |
| 2741 | BUG_ON(ret); | 2776 | if (ret) { |
| 2777 | err = ret; | ||
| 2778 | goto out_unlock; | ||
| 2779 | } | ||
| 2742 | path->keep_locks = 1; | 2780 | path->keep_locks = 1; |
| 2743 | 2781 | ||
| 2744 | while (1) { | 2782 | while (1) { |
| @@ -2767,7 +2805,10 @@ again: | |||
| 2767 | 2805 | ||
| 2768 | ret = copy_items(trans, log, dst_path, src, ins_start_slot, | 2806 | ret = copy_items(trans, log, dst_path, src, ins_start_slot, |
| 2769 | ins_nr, inode_only); | 2807 | ins_nr, inode_only); |
| 2770 | BUG_ON(ret); | 2808 | if (ret) { |
| 2809 | err = ret; | ||
| 2810 | goto out_unlock; | ||
| 2811 | } | ||
| 2771 | ins_nr = 1; | 2812 | ins_nr = 1; |
| 2772 | ins_start_slot = path->slots[0]; | 2813 | ins_start_slot = path->slots[0]; |
| 2773 | next_slot: | 2814 | next_slot: |
| @@ -2783,7 +2824,10 @@ next_slot: | |||
| 2783 | ret = copy_items(trans, log, dst_path, src, | 2824 | ret = copy_items(trans, log, dst_path, src, |
| 2784 | ins_start_slot, | 2825 | ins_start_slot, |
| 2785 | ins_nr, inode_only); | 2826 | ins_nr, inode_only); |
| 2786 | BUG_ON(ret); | 2827 | if (ret) { |
| 2828 | err = ret; | ||
| 2829 | goto out_unlock; | ||
| 2830 | } | ||
| 2787 | ins_nr = 0; | 2831 | ins_nr = 0; |
| 2788 | } | 2832 | } |
| 2789 | btrfs_release_path(root, path); | 2833 | btrfs_release_path(root, path); |
| @@ -2801,7 +2845,10 @@ next_slot: | |||
| 2801 | ret = copy_items(trans, log, dst_path, src, | 2845 | ret = copy_items(trans, log, dst_path, src, |
| 2802 | ins_start_slot, | 2846 | ins_start_slot, |
| 2803 | ins_nr, inode_only); | 2847 | ins_nr, inode_only); |
| 2804 | BUG_ON(ret); | 2848 | if (ret) { |
| 2849 | err = ret; | ||
| 2850 | goto out_unlock; | ||
| 2851 | } | ||
| 2805 | ins_nr = 0; | 2852 | ins_nr = 0; |
| 2806 | } | 2853 | } |
| 2807 | WARN_ON(ins_nr); | 2854 | WARN_ON(ins_nr); |
| @@ -2809,14 +2856,18 @@ next_slot: | |||
| 2809 | btrfs_release_path(root, path); | 2856 | btrfs_release_path(root, path); |
| 2810 | btrfs_release_path(log, dst_path); | 2857 | btrfs_release_path(log, dst_path); |
| 2811 | ret = log_directory_changes(trans, root, inode, path, dst_path); | 2858 | ret = log_directory_changes(trans, root, inode, path, dst_path); |
| 2812 | BUG_ON(ret); | 2859 | if (ret) { |
| 2860 | err = ret; | ||
| 2861 | goto out_unlock; | ||
| 2862 | } | ||
| 2813 | } | 2863 | } |
| 2814 | BTRFS_I(inode)->logged_trans = trans->transid; | 2864 | BTRFS_I(inode)->logged_trans = trans->transid; |
| 2865 | out_unlock: | ||
| 2815 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 2866 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
| 2816 | 2867 | ||
| 2817 | btrfs_free_path(path); | 2868 | btrfs_free_path(path); |
| 2818 | btrfs_free_path(dst_path); | 2869 | btrfs_free_path(dst_path); |
| 2819 | return 0; | 2870 | return err; |
| 2820 | } | 2871 | } |
| 2821 | 2872 | ||
| 2822 | /* | 2873 | /* |
| @@ -2941,10 +2992,13 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
| 2941 | goto end_no_trans; | 2992 | goto end_no_trans; |
| 2942 | } | 2993 | } |
| 2943 | 2994 | ||
| 2944 | start_log_trans(trans, root); | 2995 | ret = start_log_trans(trans, root); |
| 2996 | if (ret) | ||
| 2997 | goto end_trans; | ||
| 2945 | 2998 | ||
| 2946 | ret = btrfs_log_inode(trans, root, inode, inode_only); | 2999 | ret = btrfs_log_inode(trans, root, inode, inode_only); |
| 2947 | BUG_ON(ret); | 3000 | if (ret) |
| 3001 | goto end_trans; | ||
| 2948 | 3002 | ||
| 2949 | /* | 3003 | /* |
| 2950 | * for regular files, if its inode is already on disk, we don't | 3004 | * for regular files, if its inode is already on disk, we don't |
| @@ -2954,8 +3008,10 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
| 2954 | */ | 3008 | */ |
| 2955 | if (S_ISREG(inode->i_mode) && | 3009 | if (S_ISREG(inode->i_mode) && |
| 2956 | BTRFS_I(inode)->generation <= last_committed && | 3010 | BTRFS_I(inode)->generation <= last_committed && |
| 2957 | BTRFS_I(inode)->last_unlink_trans <= last_committed) | 3011 | BTRFS_I(inode)->last_unlink_trans <= last_committed) { |
| 2958 | goto no_parent; | 3012 | ret = 0; |
| 3013 | goto end_trans; | ||
| 3014 | } | ||
| 2959 | 3015 | ||
| 2960 | inode_only = LOG_INODE_EXISTS; | 3016 | inode_only = LOG_INODE_EXISTS; |
| 2961 | while (1) { | 3017 | while (1) { |
| @@ -2969,15 +3025,21 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
| 2969 | if (BTRFS_I(inode)->generation > | 3025 | if (BTRFS_I(inode)->generation > |
| 2970 | root->fs_info->last_trans_committed) { | 3026 | root->fs_info->last_trans_committed) { |
| 2971 | ret = btrfs_log_inode(trans, root, inode, inode_only); | 3027 | ret = btrfs_log_inode(trans, root, inode, inode_only); |
| 2972 | BUG_ON(ret); | 3028 | if (ret) |
| 3029 | goto end_trans; | ||
| 2973 | } | 3030 | } |
| 2974 | if (IS_ROOT(parent)) | 3031 | if (IS_ROOT(parent)) |
| 2975 | break; | 3032 | break; |
| 2976 | 3033 | ||
| 2977 | parent = parent->d_parent; | 3034 | parent = parent->d_parent; |
| 2978 | } | 3035 | } |
| 2979 | no_parent: | ||
| 2980 | ret = 0; | 3036 | ret = 0; |
| 3037 | end_trans: | ||
| 3038 | if (ret < 0) { | ||
| 3039 | BUG_ON(ret != -ENOSPC); | ||
| 3040 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
| 3041 | ret = 1; | ||
| 3042 | } | ||
| 2981 | btrfs_end_log_trans(root); | 3043 | btrfs_end_log_trans(root); |
| 2982 | end_no_trans: | 3044 | end_no_trans: |
| 2983 | return ret; | 3045 | return ret; |
| @@ -3019,7 +3081,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) | |||
| 3019 | path = btrfs_alloc_path(); | 3081 | path = btrfs_alloc_path(); |
| 3020 | BUG_ON(!path); | 3082 | BUG_ON(!path); |
| 3021 | 3083 | ||
| 3022 | trans = btrfs_start_transaction(fs_info->tree_root, 1); | 3084 | trans = btrfs_start_transaction(fs_info->tree_root, 0); |
| 3023 | 3085 | ||
| 3024 | wc.trans = trans; | 3086 | wc.trans = trans; |
| 3025 | wc.pin = 1; | 3087 | wc.pin = 1; |
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 0776eacb5083..3dfae84c8cc8 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h | |||
| @@ -25,6 +25,8 @@ | |||
| 25 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | 25 | int btrfs_sync_log(struct btrfs_trans_handle *trans, |
| 26 | struct btrfs_root *root); | 26 | struct btrfs_root *root); |
| 27 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); | 27 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); |
| 28 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | ||
| 29 | struct btrfs_fs_info *fs_info); | ||
| 28 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); | 30 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); |
| 29 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 31 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
| 30 | struct btrfs_root *root, struct dentry *dentry); | 32 | struct btrfs_root *root, struct dentry *dentry); |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9df8e3f1ccab..d6e3af8be95b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
| @@ -17,6 +17,7 @@ | |||
| 17 | */ | 17 | */ |
| 18 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
| 19 | #include <linux/bio.h> | 19 | #include <linux/bio.h> |
| 20 | #include <linux/slab.h> | ||
| 20 | #include <linux/buffer_head.h> | 21 | #include <linux/buffer_head.h> |
| 21 | #include <linux/blkdev.h> | 22 | #include <linux/blkdev.h> |
| 22 | #include <linux/random.h> | 23 | #include <linux/random.h> |
| @@ -1096,7 +1097,7 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, | |||
| 1096 | if (!path) | 1097 | if (!path) |
| 1097 | return -ENOMEM; | 1098 | return -ENOMEM; |
| 1098 | 1099 | ||
| 1099 | trans = btrfs_start_transaction(root, 1); | 1100 | trans = btrfs_start_transaction(root, 0); |
| 1100 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; | 1101 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; |
| 1101 | key.type = BTRFS_DEV_ITEM_KEY; | 1102 | key.type = BTRFS_DEV_ITEM_KEY; |
| 1102 | key.offset = device->devid; | 1103 | key.offset = device->devid; |
| @@ -1485,7 +1486,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
| 1485 | goto error; | 1486 | goto error; |
| 1486 | } | 1487 | } |
| 1487 | 1488 | ||
| 1488 | trans = btrfs_start_transaction(root, 1); | 1489 | trans = btrfs_start_transaction(root, 0); |
| 1489 | lock_chunks(root); | 1490 | lock_chunks(root); |
| 1490 | 1491 | ||
| 1491 | device->barriers = 1; | 1492 | device->barriers = 1; |
| @@ -1750,9 +1751,10 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
| 1750 | 1751 | ||
| 1751 | /* step one, relocate all the extents inside this chunk */ | 1752 | /* step one, relocate all the extents inside this chunk */ |
| 1752 | ret = btrfs_relocate_block_group(extent_root, chunk_offset); | 1753 | ret = btrfs_relocate_block_group(extent_root, chunk_offset); |
| 1753 | BUG_ON(ret); | 1754 | if (ret) |
| 1755 | return ret; | ||
| 1754 | 1756 | ||
| 1755 | trans = btrfs_start_transaction(root, 1); | 1757 | trans = btrfs_start_transaction(root, 0); |
| 1756 | BUG_ON(!trans); | 1758 | BUG_ON(!trans); |
| 1757 | 1759 | ||
| 1758 | lock_chunks(root); | 1760 | lock_chunks(root); |
| @@ -1924,7 +1926,7 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
| 1924 | break; | 1926 | break; |
| 1925 | BUG_ON(ret); | 1927 | BUG_ON(ret); |
| 1926 | 1928 | ||
| 1927 | trans = btrfs_start_transaction(dev_root, 1); | 1929 | trans = btrfs_start_transaction(dev_root, 0); |
| 1928 | BUG_ON(!trans); | 1930 | BUG_ON(!trans); |
| 1929 | 1931 | ||
| 1930 | ret = btrfs_grow_device(trans, device, old_size); | 1932 | ret = btrfs_grow_device(trans, device, old_size); |
| @@ -2093,11 +2095,7 @@ again: | |||
| 2093 | } | 2095 | } |
| 2094 | 2096 | ||
| 2095 | /* Shrinking succeeded, else we would be at "done". */ | 2097 | /* Shrinking succeeded, else we would be at "done". */ |
| 2096 | trans = btrfs_start_transaction(root, 1); | 2098 | trans = btrfs_start_transaction(root, 0); |
| 2097 | if (!trans) { | ||
| 2098 | ret = -ENOMEM; | ||
| 2099 | goto done; | ||
| 2100 | } | ||
| 2101 | lock_chunks(root); | 2099 | lock_chunks(root); |
| 2102 | 2100 | ||
| 2103 | device->disk_total_bytes = new_size; | 2101 | device->disk_total_bytes = new_size; |
| @@ -2198,9 +2196,9 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
| 2198 | min_stripes = 2; | 2196 | min_stripes = 2; |
| 2199 | } | 2197 | } |
| 2200 | if (type & (BTRFS_BLOCK_GROUP_RAID1)) { | 2198 | if (type & (BTRFS_BLOCK_GROUP_RAID1)) { |
| 2201 | num_stripes = min_t(u64, 2, fs_devices->rw_devices); | 2199 | if (fs_devices->rw_devices < 2) |
| 2202 | if (num_stripes < 2) | ||
| 2203 | return -ENOSPC; | 2200 | return -ENOSPC; |
| 2201 | num_stripes = 2; | ||
| 2204 | min_stripes = 2; | 2202 | min_stripes = 2; |
| 2205 | } | 2203 | } |
| 2206 | if (type & (BTRFS_BLOCK_GROUP_RAID10)) { | 2204 | if (type & (BTRFS_BLOCK_GROUP_RAID10)) { |
| @@ -2244,8 +2242,16 @@ again: | |||
| 2244 | do_div(calc_size, stripe_len); | 2242 | do_div(calc_size, stripe_len); |
| 2245 | calc_size *= stripe_len; | 2243 | calc_size *= stripe_len; |
| 2246 | } | 2244 | } |
| 2245 | |||
| 2247 | /* we don't want tiny stripes */ | 2246 | /* we don't want tiny stripes */ |
| 2248 | calc_size = max_t(u64, min_stripe_size, calc_size); | 2247 | if (!looped) |
| 2248 | calc_size = max_t(u64, min_stripe_size, calc_size); | ||
| 2249 | |||
| 2250 | /* | ||
| 2251 | * we're about to do_div by the stripe_len so lets make sure | ||
| 2252 | * we end up with something bigger than a stripe | ||
| 2253 | */ | ||
| 2254 | calc_size = max_t(u64, calc_size, stripe_len * 4); | ||
| 2249 | 2255 | ||
| 2250 | do_div(calc_size, stripe_len); | 2256 | do_div(calc_size, stripe_len); |
| 2251 | calc_size *= stripe_len; | 2257 | calc_size *= stripe_len; |
| @@ -3389,6 +3395,8 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) | |||
| 3389 | key.type = 0; | 3395 | key.type = 0; |
| 3390 | again: | 3396 | again: |
| 3391 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 3397 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
| 3398 | if (ret < 0) | ||
| 3399 | goto error; | ||
| 3392 | while (1) { | 3400 | while (1) { |
| 3393 | leaf = path->nodes[0]; | 3401 | leaf = path->nodes[0]; |
| 3394 | slot = path->slots[0]; | 3402 | slot = path->slots[0]; |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 193b58f7d3f3..88ecbb215878 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
| @@ -154,15 +154,10 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, | |||
| 154 | if (trans) | 154 | if (trans) |
| 155 | return do_setxattr(trans, inode, name, value, size, flags); | 155 | return do_setxattr(trans, inode, name, value, size, flags); |
| 156 | 156 | ||
| 157 | ret = btrfs_reserve_metadata_space(root, 2); | 157 | trans = btrfs_start_transaction(root, 2); |
| 158 | if (ret) | 158 | if (IS_ERR(trans)) |
| 159 | return ret; | 159 | return PTR_ERR(trans); |
| 160 | 160 | ||
| 161 | trans = btrfs_start_transaction(root, 1); | ||
| 162 | if (!trans) { | ||
| 163 | ret = -ENOMEM; | ||
| 164 | goto out; | ||
| 165 | } | ||
| 166 | btrfs_set_trans_block_group(trans, inode); | 161 | btrfs_set_trans_block_group(trans, inode); |
| 167 | 162 | ||
| 168 | ret = do_setxattr(trans, inode, name, value, size, flags); | 163 | ret = do_setxattr(trans, inode, name, value, size, flags); |
| @@ -174,7 +169,6 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, | |||
| 174 | BUG_ON(ret); | 169 | BUG_ON(ret); |
| 175 | out: | 170 | out: |
| 176 | btrfs_end_transaction_throttle(trans, root); | 171 | btrfs_end_transaction_throttle(trans, root); |
| 177 | btrfs_unreserve_metadata_space(root, 2); | ||
| 178 | return ret; | 172 | return ret; |
| 179 | } | 173 | } |
| 180 | 174 | ||
| @@ -282,7 +276,7 @@ err: | |||
| 282 | * List of handlers for synthetic system.* attributes. All real ondisk | 276 | * List of handlers for synthetic system.* attributes. All real ondisk |
| 283 | * attributes are handled directly. | 277 | * attributes are handled directly. |
| 284 | */ | 278 | */ |
| 285 | struct xattr_handler *btrfs_xattr_handlers[] = { | 279 | const struct xattr_handler *btrfs_xattr_handlers[] = { |
| 286 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL | 280 | #ifdef CONFIG_BTRFS_FS_POSIX_ACL |
| 287 | &btrfs_xattr_acl_access_handler, | 281 | &btrfs_xattr_acl_access_handler, |
| 288 | &btrfs_xattr_acl_default_handler, | 282 | &btrfs_xattr_acl_default_handler, |
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h index 721efa0346e0..7a43fd640bbb 100644 --- a/fs/btrfs/xattr.h +++ b/fs/btrfs/xattr.h | |||
| @@ -21,9 +21,9 @@ | |||
| 21 | 21 | ||
| 22 | #include <linux/xattr.h> | 22 | #include <linux/xattr.h> |
| 23 | 23 | ||
| 24 | extern struct xattr_handler btrfs_xattr_acl_access_handler; | 24 | extern const struct xattr_handler btrfs_xattr_acl_access_handler; |
| 25 | extern struct xattr_handler btrfs_xattr_acl_default_handler; | 25 | extern const struct xattr_handler btrfs_xattr_acl_default_handler; |
| 26 | extern struct xattr_handler *btrfs_xattr_handlers[]; | 26 | extern const struct xattr_handler *btrfs_xattr_handlers[]; |
| 27 | 27 | ||
| 28 | extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name, | 28 | extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name, |
| 29 | void *buffer, size_t size); | 29 | void *buffer, size_t size); |
