diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-14 16:35:29 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-11-14 16:35:29 -0500 |
commit | 5cea7647e64657138138a3794ae172ee0fc175da (patch) | |
tree | 38adc54cba508db574e190e9d9aa601c36a8fd7c | |
parent | 808eb24e0e0939b487bf90e3888a9636f1c83acb (diff) | |
parent | d28e649a5c58b779b303c252c66ee84a0f2c3b32 (diff) |
Merge branch 'for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"There are some new user features and the usual load of invisible
enhancements or cleanups.
New features:
- extend mount options to specify zlib compression level, -o
compress=zlib:9
- v2 of ioctl "extent to inode mapping", addressing a usecase where
we want to retrieve more but inaccurate results and do the
postprocessing in userspace, aiding defragmentation or
deduplication tools
- populate compression heuristics logic, do data sampling and try to
guess compressibility by: looking for repeated patterns, counting
unique byte values and distribution, calculating Shannon entropy;
this will need more benchmarking and possibly fine tuning, but the
base should be good enough
- enable indexing for btrfs as lower filesystem in overlayfs
- speedup page cache readahead during send on large files
Internal enhancements:
- more sanity checks of b-tree items when reading them from disk
- more EINVAL/EUCLEAN fixups, missing BLK_STS_* conversion, other
errno or error handling fixes
- remove some homegrown IO-related logic, that's been obsoleted by
core block layer changes (batching, plug/unplug, own counters)
- add ref-verify, optional debugging feature to verify extent
reference accounting
- simplify code handling outstanding extents, make it more clear
where and how the accounting is done
- make delalloc reservations per-inode, simplify the code and make
the logic more straightforward
- extensive cleanup of delayed refs code
Notable fixes:
- fix send ioctl on 32bit with 64bit kernel"
* 'for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (102 commits)
btrfs: Fix bug for misused dev_t when lookup in dev state hash table.
Btrfs: heuristic: add Shannon entropy calculation
Btrfs: heuristic: add byte core set calculation
Btrfs: heuristic: add byte set calculation
Btrfs: heuristic: add detection of repeated data patterns
Btrfs: heuristic: implement sampling logic
Btrfs: heuristic: add bucket and sample counters and other defines
Btrfs: compression: separate heuristic/compression workspaces
btrfs: move btrfs_truncate_block out of trans handle
btrfs: don't call btrfs_start_delalloc_roots in flushoncommit
btrfs: track refs in a rb_tree instead of a list
btrfs: add a comp_refs() helper
btrfs: switch args for comp_*_refs
btrfs: make the delalloc block rsv per inode
btrfs: add tracepoints for outstanding extents mods
Btrfs: rework outstanding_extents
btrfs: increase output size for LOGICAL_INO_V2 ioctl
btrfs: add a flags argument to LOGICAL_INO and call it LOGICAL_INO_V2
btrfs: add a flag to iterate_inodes_from_logical to find all extent refs for uncompressed extents
btrfs: send: remove unused code
...
51 files changed, 3356 insertions, 1556 deletions
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index a26c63b4ad68..2e558227931a 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig | |||
@@ -91,3 +91,14 @@ config BTRFS_ASSERT | |||
91 | any of the assertions trip. This is meant for btrfs developers only. | 91 | any of the assertions trip. This is meant for btrfs developers only. |
92 | 92 | ||
93 | If unsure, say N. | 93 | If unsure, say N. |
94 | |||
95 | config BTRFS_FS_REF_VERIFY | ||
96 | bool "Btrfs with the ref verify tool compiled in" | ||
97 | depends on BTRFS_FS | ||
98 | default n | ||
99 | help | ||
100 | Enable run-time extent reference verification instrumentation. This | ||
101 | is meant to be used by btrfs developers for tracking down extent | ||
102 | reference problems or verifying they didn't break something. | ||
103 | |||
104 | If unsure, say N. | ||
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index f2cd9dedb037..6fe881d5cb38 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
@@ -10,10 +10,11 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | |||
10 | export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \ | 10 | export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \ |
11 | compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ | 11 | compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ |
12 | reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ | 12 | reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ |
13 | uuid-tree.o props.o hash.o free-space-tree.o | 13 | uuid-tree.o props.o hash.o free-space-tree.o tree-checker.o |
14 | 14 | ||
15 | btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o | 15 | btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o |
16 | btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o | 16 | btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o |
17 | btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o | ||
17 | 18 | ||
18 | btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \ | 19 | btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \ |
19 | tests/extent-buffer-tests.o tests/btrfs-tests.o \ | 20 | tests/extent-buffer-tests.o tests/btrfs-tests.o \ |
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index e00c8a9fd5bb..d5540749f0e5 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
@@ -67,7 +67,7 @@ struct btrfs_workqueue { | |||
67 | static void normal_work_helper(struct btrfs_work *work); | 67 | static void normal_work_helper(struct btrfs_work *work); |
68 | 68 | ||
69 | #define BTRFS_WORK_HELPER(name) \ | 69 | #define BTRFS_WORK_HELPER(name) \ |
70 | void btrfs_##name(struct work_struct *arg) \ | 70 | noinline_for_stack void btrfs_##name(struct work_struct *arg) \ |
71 | { \ | 71 | { \ |
72 | struct btrfs_work *work = container_of(arg, struct btrfs_work, \ | 72 | struct btrfs_work *work = container_of(arg, struct btrfs_work, \ |
73 | normal_work); \ | 73 | normal_work); \ |
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index b517ef1477ea..7d0dc100a09a 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
@@ -40,12 +40,14 @@ static int check_extent_in_eb(const struct btrfs_key *key, | |||
40 | const struct extent_buffer *eb, | 40 | const struct extent_buffer *eb, |
41 | const struct btrfs_file_extent_item *fi, | 41 | const struct btrfs_file_extent_item *fi, |
42 | u64 extent_item_pos, | 42 | u64 extent_item_pos, |
43 | struct extent_inode_elem **eie) | 43 | struct extent_inode_elem **eie, |
44 | bool ignore_offset) | ||
44 | { | 45 | { |
45 | u64 offset = 0; | 46 | u64 offset = 0; |
46 | struct extent_inode_elem *e; | 47 | struct extent_inode_elem *e; |
47 | 48 | ||
48 | if (!btrfs_file_extent_compression(eb, fi) && | 49 | if (!ignore_offset && |
50 | !btrfs_file_extent_compression(eb, fi) && | ||
49 | !btrfs_file_extent_encryption(eb, fi) && | 51 | !btrfs_file_extent_encryption(eb, fi) && |
50 | !btrfs_file_extent_other_encoding(eb, fi)) { | 52 | !btrfs_file_extent_other_encoding(eb, fi)) { |
51 | u64 data_offset; | 53 | u64 data_offset; |
@@ -84,7 +86,8 @@ static void free_inode_elem_list(struct extent_inode_elem *eie) | |||
84 | 86 | ||
85 | static int find_extent_in_eb(const struct extent_buffer *eb, | 87 | static int find_extent_in_eb(const struct extent_buffer *eb, |
86 | u64 wanted_disk_byte, u64 extent_item_pos, | 88 | u64 wanted_disk_byte, u64 extent_item_pos, |
87 | struct extent_inode_elem **eie) | 89 | struct extent_inode_elem **eie, |
90 | bool ignore_offset) | ||
88 | { | 91 | { |
89 | u64 disk_byte; | 92 | u64 disk_byte; |
90 | struct btrfs_key key; | 93 | struct btrfs_key key; |
@@ -113,7 +116,7 @@ static int find_extent_in_eb(const struct extent_buffer *eb, | |||
113 | if (disk_byte != wanted_disk_byte) | 116 | if (disk_byte != wanted_disk_byte) |
114 | continue; | 117 | continue; |
115 | 118 | ||
116 | ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie); | 119 | ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie, ignore_offset); |
117 | if (ret < 0) | 120 | if (ret < 0) |
118 | return ret; | 121 | return ret; |
119 | } | 122 | } |
@@ -419,7 +422,7 @@ static int add_indirect_ref(const struct btrfs_fs_info *fs_info, | |||
419 | static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, | 422 | static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, |
420 | struct ulist *parents, struct prelim_ref *ref, | 423 | struct ulist *parents, struct prelim_ref *ref, |
421 | int level, u64 time_seq, const u64 *extent_item_pos, | 424 | int level, u64 time_seq, const u64 *extent_item_pos, |
422 | u64 total_refs) | 425 | u64 total_refs, bool ignore_offset) |
423 | { | 426 | { |
424 | int ret = 0; | 427 | int ret = 0; |
425 | int slot; | 428 | int slot; |
@@ -472,7 +475,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, | |||
472 | if (extent_item_pos) { | 475 | if (extent_item_pos) { |
473 | ret = check_extent_in_eb(&key, eb, fi, | 476 | ret = check_extent_in_eb(&key, eb, fi, |
474 | *extent_item_pos, | 477 | *extent_item_pos, |
475 | &eie); | 478 | &eie, ignore_offset); |
476 | if (ret < 0) | 479 | if (ret < 0) |
477 | break; | 480 | break; |
478 | } | 481 | } |
@@ -510,7 +513,8 @@ next: | |||
510 | static int resolve_indirect_ref(struct btrfs_fs_info *fs_info, | 513 | static int resolve_indirect_ref(struct btrfs_fs_info *fs_info, |
511 | struct btrfs_path *path, u64 time_seq, | 514 | struct btrfs_path *path, u64 time_seq, |
512 | struct prelim_ref *ref, struct ulist *parents, | 515 | struct prelim_ref *ref, struct ulist *parents, |
513 | const u64 *extent_item_pos, u64 total_refs) | 516 | const u64 *extent_item_pos, u64 total_refs, |
517 | bool ignore_offset) | ||
514 | { | 518 | { |
515 | struct btrfs_root *root; | 519 | struct btrfs_root *root; |
516 | struct btrfs_key root_key; | 520 | struct btrfs_key root_key; |
@@ -581,7 +585,7 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info, | |||
581 | } | 585 | } |
582 | 586 | ||
583 | ret = add_all_parents(root, path, parents, ref, level, time_seq, | 587 | ret = add_all_parents(root, path, parents, ref, level, time_seq, |
584 | extent_item_pos, total_refs); | 588 | extent_item_pos, total_refs, ignore_offset); |
585 | out: | 589 | out: |
586 | path->lowest_level = 0; | 590 | path->lowest_level = 0; |
587 | btrfs_release_path(path); | 591 | btrfs_release_path(path); |
@@ -616,7 +620,7 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
616 | struct btrfs_path *path, u64 time_seq, | 620 | struct btrfs_path *path, u64 time_seq, |
617 | struct preftrees *preftrees, | 621 | struct preftrees *preftrees, |
618 | const u64 *extent_item_pos, u64 total_refs, | 622 | const u64 *extent_item_pos, u64 total_refs, |
619 | struct share_check *sc) | 623 | struct share_check *sc, bool ignore_offset) |
620 | { | 624 | { |
621 | int err; | 625 | int err; |
622 | int ret = 0; | 626 | int ret = 0; |
@@ -661,7 +665,7 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
661 | } | 665 | } |
662 | err = resolve_indirect_ref(fs_info, path, time_seq, ref, | 666 | err = resolve_indirect_ref(fs_info, path, time_seq, ref, |
663 | parents, extent_item_pos, | 667 | parents, extent_item_pos, |
664 | total_refs); | 668 | total_refs, ignore_offset); |
665 | /* | 669 | /* |
666 | * we can only tolerate ENOENT,otherwise,we should catch error | 670 | * we can only tolerate ENOENT,otherwise,we should catch error |
667 | * and return directly. | 671 | * and return directly. |
@@ -769,6 +773,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, | |||
769 | struct btrfs_key key; | 773 | struct btrfs_key key; |
770 | struct btrfs_key tmp_op_key; | 774 | struct btrfs_key tmp_op_key; |
771 | struct btrfs_key *op_key = NULL; | 775 | struct btrfs_key *op_key = NULL; |
776 | struct rb_node *n; | ||
772 | int count; | 777 | int count; |
773 | int ret = 0; | 778 | int ret = 0; |
774 | 779 | ||
@@ -778,7 +783,9 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, | |||
778 | } | 783 | } |
779 | 784 | ||
780 | spin_lock(&head->lock); | 785 | spin_lock(&head->lock); |
781 | list_for_each_entry(node, &head->ref_list, list) { | 786 | for (n = rb_first(&head->ref_tree); n; n = rb_next(n)) { |
787 | node = rb_entry(n, struct btrfs_delayed_ref_node, | ||
788 | ref_node); | ||
782 | if (node->seq > seq) | 789 | if (node->seq > seq) |
783 | continue; | 790 | continue; |
784 | 791 | ||
@@ -1107,13 +1114,17 @@ static int add_keyed_refs(struct btrfs_fs_info *fs_info, | |||
1107 | * | 1114 | * |
1108 | * Otherwise this returns 0 for success and <0 for an error. | 1115 | * Otherwise this returns 0 for success and <0 for an error. |
1109 | * | 1116 | * |
1117 | * If ignore_offset is set to false, only extent refs whose offsets match | ||
1118 | * extent_item_pos are returned. If true, every extent ref is returned | ||
1119 | * and extent_item_pos is ignored. | ||
1120 | * | ||
1110 | * FIXME some caching might speed things up | 1121 | * FIXME some caching might speed things up |
1111 | */ | 1122 | */ |
1112 | static int find_parent_nodes(struct btrfs_trans_handle *trans, | 1123 | static int find_parent_nodes(struct btrfs_trans_handle *trans, |
1113 | struct btrfs_fs_info *fs_info, u64 bytenr, | 1124 | struct btrfs_fs_info *fs_info, u64 bytenr, |
1114 | u64 time_seq, struct ulist *refs, | 1125 | u64 time_seq, struct ulist *refs, |
1115 | struct ulist *roots, const u64 *extent_item_pos, | 1126 | struct ulist *roots, const u64 *extent_item_pos, |
1116 | struct share_check *sc) | 1127 | struct share_check *sc, bool ignore_offset) |
1117 | { | 1128 | { |
1118 | struct btrfs_key key; | 1129 | struct btrfs_key key; |
1119 | struct btrfs_path *path; | 1130 | struct btrfs_path *path; |
@@ -1178,7 +1189,7 @@ again: | |||
1178 | head = btrfs_find_delayed_ref_head(delayed_refs, bytenr); | 1189 | head = btrfs_find_delayed_ref_head(delayed_refs, bytenr); |
1179 | if (head) { | 1190 | if (head) { |
1180 | if (!mutex_trylock(&head->mutex)) { | 1191 | if (!mutex_trylock(&head->mutex)) { |
1181 | refcount_inc(&head->node.refs); | 1192 | refcount_inc(&head->refs); |
1182 | spin_unlock(&delayed_refs->lock); | 1193 | spin_unlock(&delayed_refs->lock); |
1183 | 1194 | ||
1184 | btrfs_release_path(path); | 1195 | btrfs_release_path(path); |
@@ -1189,7 +1200,7 @@ again: | |||
1189 | */ | 1200 | */ |
1190 | mutex_lock(&head->mutex); | 1201 | mutex_lock(&head->mutex); |
1191 | mutex_unlock(&head->mutex); | 1202 | mutex_unlock(&head->mutex); |
1192 | btrfs_put_delayed_ref(&head->node); | 1203 | btrfs_put_delayed_ref_head(head); |
1193 | goto again; | 1204 | goto again; |
1194 | } | 1205 | } |
1195 | spin_unlock(&delayed_refs->lock); | 1206 | spin_unlock(&delayed_refs->lock); |
@@ -1235,7 +1246,7 @@ again: | |||
1235 | WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root)); | 1246 | WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root)); |
1236 | 1247 | ||
1237 | ret = resolve_indirect_refs(fs_info, path, time_seq, &preftrees, | 1248 | ret = resolve_indirect_refs(fs_info, path, time_seq, &preftrees, |
1238 | extent_item_pos, total_refs, sc); | 1249 | extent_item_pos, total_refs, sc, ignore_offset); |
1239 | if (ret) | 1250 | if (ret) |
1240 | goto out; | 1251 | goto out; |
1241 | 1252 | ||
@@ -1282,7 +1293,7 @@ again: | |||
1282 | btrfs_tree_read_lock(eb); | 1293 | btrfs_tree_read_lock(eb); |
1283 | btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); | 1294 | btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); |
1284 | ret = find_extent_in_eb(eb, bytenr, | 1295 | ret = find_extent_in_eb(eb, bytenr, |
1285 | *extent_item_pos, &eie); | 1296 | *extent_item_pos, &eie, ignore_offset); |
1286 | btrfs_tree_read_unlock_blocking(eb); | 1297 | btrfs_tree_read_unlock_blocking(eb); |
1287 | free_extent_buffer(eb); | 1298 | free_extent_buffer(eb); |
1288 | if (ret < 0) | 1299 | if (ret < 0) |
@@ -1350,7 +1361,7 @@ static void free_leaf_list(struct ulist *blocks) | |||
1350 | static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | 1361 | static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, |
1351 | struct btrfs_fs_info *fs_info, u64 bytenr, | 1362 | struct btrfs_fs_info *fs_info, u64 bytenr, |
1352 | u64 time_seq, struct ulist **leafs, | 1363 | u64 time_seq, struct ulist **leafs, |
1353 | const u64 *extent_item_pos) | 1364 | const u64 *extent_item_pos, bool ignore_offset) |
1354 | { | 1365 | { |
1355 | int ret; | 1366 | int ret; |
1356 | 1367 | ||
@@ -1359,7 +1370,7 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | |||
1359 | return -ENOMEM; | 1370 | return -ENOMEM; |
1360 | 1371 | ||
1361 | ret = find_parent_nodes(trans, fs_info, bytenr, time_seq, | 1372 | ret = find_parent_nodes(trans, fs_info, bytenr, time_seq, |
1362 | *leafs, NULL, extent_item_pos, NULL); | 1373 | *leafs, NULL, extent_item_pos, NULL, ignore_offset); |
1363 | if (ret < 0 && ret != -ENOENT) { | 1374 | if (ret < 0 && ret != -ENOENT) { |
1364 | free_leaf_list(*leafs); | 1375 | free_leaf_list(*leafs); |
1365 | return ret; | 1376 | return ret; |
@@ -1383,7 +1394,8 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | |||
1383 | */ | 1394 | */ |
1384 | static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans, | 1395 | static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans, |
1385 | struct btrfs_fs_info *fs_info, u64 bytenr, | 1396 | struct btrfs_fs_info *fs_info, u64 bytenr, |
1386 | u64 time_seq, struct ulist **roots) | 1397 | u64 time_seq, struct ulist **roots, |
1398 | bool ignore_offset) | ||
1387 | { | 1399 | { |
1388 | struct ulist *tmp; | 1400 | struct ulist *tmp; |
1389 | struct ulist_node *node = NULL; | 1401 | struct ulist_node *node = NULL; |
@@ -1402,7 +1414,7 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans, | |||
1402 | ULIST_ITER_INIT(&uiter); | 1414 | ULIST_ITER_INIT(&uiter); |
1403 | while (1) { | 1415 | while (1) { |
1404 | ret = find_parent_nodes(trans, fs_info, bytenr, time_seq, | 1416 | ret = find_parent_nodes(trans, fs_info, bytenr, time_seq, |
1405 | tmp, *roots, NULL, NULL); | 1417 | tmp, *roots, NULL, NULL, ignore_offset); |
1406 | if (ret < 0 && ret != -ENOENT) { | 1418 | if (ret < 0 && ret != -ENOENT) { |
1407 | ulist_free(tmp); | 1419 | ulist_free(tmp); |
1408 | ulist_free(*roots); | 1420 | ulist_free(*roots); |
@@ -1421,14 +1433,15 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans, | |||
1421 | 1433 | ||
1422 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | 1434 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, |
1423 | struct btrfs_fs_info *fs_info, u64 bytenr, | 1435 | struct btrfs_fs_info *fs_info, u64 bytenr, |
1424 | u64 time_seq, struct ulist **roots) | 1436 | u64 time_seq, struct ulist **roots, |
1437 | bool ignore_offset) | ||
1425 | { | 1438 | { |
1426 | int ret; | 1439 | int ret; |
1427 | 1440 | ||
1428 | if (!trans) | 1441 | if (!trans) |
1429 | down_read(&fs_info->commit_root_sem); | 1442 | down_read(&fs_info->commit_root_sem); |
1430 | ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr, | 1443 | ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr, |
1431 | time_seq, roots); | 1444 | time_seq, roots, ignore_offset); |
1432 | if (!trans) | 1445 | if (!trans) |
1433 | up_read(&fs_info->commit_root_sem); | 1446 | up_read(&fs_info->commit_root_sem); |
1434 | return ret; | 1447 | return ret; |
@@ -1483,7 +1496,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr) | |||
1483 | ULIST_ITER_INIT(&uiter); | 1496 | ULIST_ITER_INIT(&uiter); |
1484 | while (1) { | 1497 | while (1) { |
1485 | ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp, | 1498 | ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp, |
1486 | roots, NULL, &shared); | 1499 | roots, NULL, &shared, false); |
1487 | if (ret == BACKREF_FOUND_SHARED) { | 1500 | if (ret == BACKREF_FOUND_SHARED) { |
1488 | /* this is the only condition under which we return 1 */ | 1501 | /* this is the only condition under which we return 1 */ |
1489 | ret = 1; | 1502 | ret = 1; |
@@ -1877,7 +1890,8 @@ static int iterate_leaf_refs(struct btrfs_fs_info *fs_info, | |||
1877 | int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | 1890 | int iterate_extent_inodes(struct btrfs_fs_info *fs_info, |
1878 | u64 extent_item_objectid, u64 extent_item_pos, | 1891 | u64 extent_item_objectid, u64 extent_item_pos, |
1879 | int search_commit_root, | 1892 | int search_commit_root, |
1880 | iterate_extent_inodes_t *iterate, void *ctx) | 1893 | iterate_extent_inodes_t *iterate, void *ctx, |
1894 | bool ignore_offset) | ||
1881 | { | 1895 | { |
1882 | int ret; | 1896 | int ret; |
1883 | struct btrfs_trans_handle *trans = NULL; | 1897 | struct btrfs_trans_handle *trans = NULL; |
@@ -1903,14 +1917,15 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | |||
1903 | 1917 | ||
1904 | ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, | 1918 | ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, |
1905 | tree_mod_seq_elem.seq, &refs, | 1919 | tree_mod_seq_elem.seq, &refs, |
1906 | &extent_item_pos); | 1920 | &extent_item_pos, ignore_offset); |
1907 | if (ret) | 1921 | if (ret) |
1908 | goto out; | 1922 | goto out; |
1909 | 1923 | ||
1910 | ULIST_ITER_INIT(&ref_uiter); | 1924 | ULIST_ITER_INIT(&ref_uiter); |
1911 | while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { | 1925 | while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { |
1912 | ret = btrfs_find_all_roots_safe(trans, fs_info, ref_node->val, | 1926 | ret = btrfs_find_all_roots_safe(trans, fs_info, ref_node->val, |
1913 | tree_mod_seq_elem.seq, &roots); | 1927 | tree_mod_seq_elem.seq, &roots, |
1928 | ignore_offset); | ||
1914 | if (ret) | 1929 | if (ret) |
1915 | break; | 1930 | break; |
1916 | ULIST_ITER_INIT(&root_uiter); | 1931 | ULIST_ITER_INIT(&root_uiter); |
@@ -1943,7 +1958,8 @@ out: | |||
1943 | 1958 | ||
1944 | int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, | 1959 | int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, |
1945 | struct btrfs_path *path, | 1960 | struct btrfs_path *path, |
1946 | iterate_extent_inodes_t *iterate, void *ctx) | 1961 | iterate_extent_inodes_t *iterate, void *ctx, |
1962 | bool ignore_offset) | ||
1947 | { | 1963 | { |
1948 | int ret; | 1964 | int ret; |
1949 | u64 extent_item_pos; | 1965 | u64 extent_item_pos; |
@@ -1961,7 +1977,7 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, | |||
1961 | extent_item_pos = logical - found_key.objectid; | 1977 | extent_item_pos = logical - found_key.objectid; |
1962 | ret = iterate_extent_inodes(fs_info, found_key.objectid, | 1978 | ret = iterate_extent_inodes(fs_info, found_key.objectid, |
1963 | extent_item_pos, search_commit_root, | 1979 | extent_item_pos, search_commit_root, |
1964 | iterate, ctx); | 1980 | iterate, ctx, ignore_offset); |
1965 | 1981 | ||
1966 | return ret; | 1982 | return ret; |
1967 | } | 1983 | } |
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index e410335841aa..0c2fab8514ff 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h | |||
@@ -43,17 +43,19 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, | |||
43 | int iterate_extent_inodes(struct btrfs_fs_info *fs_info, | 43 | int iterate_extent_inodes(struct btrfs_fs_info *fs_info, |
44 | u64 extent_item_objectid, | 44 | u64 extent_item_objectid, |
45 | u64 extent_offset, int search_commit_root, | 45 | u64 extent_offset, int search_commit_root, |
46 | iterate_extent_inodes_t *iterate, void *ctx); | 46 | iterate_extent_inodes_t *iterate, void *ctx, |
47 | bool ignore_offset); | ||
47 | 48 | ||
48 | int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, | 49 | int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, |
49 | struct btrfs_path *path, | 50 | struct btrfs_path *path, |
50 | iterate_extent_inodes_t *iterate, void *ctx); | 51 | iterate_extent_inodes_t *iterate, void *ctx, |
52 | bool ignore_offset); | ||
51 | 53 | ||
52 | int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); | 54 | int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); |
53 | 55 | ||
54 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, | 56 | int btrfs_find_all_roots(struct btrfs_trans_handle *trans, |
55 | struct btrfs_fs_info *fs_info, u64 bytenr, | 57 | struct btrfs_fs_info *fs_info, u64 bytenr, |
56 | u64 time_seq, struct ulist **roots); | 58 | u64 time_seq, struct ulist **roots, bool ignore_offset); |
57 | char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, | 59 | char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, |
58 | u32 name_len, unsigned long name_off, | 60 | u32 name_len, unsigned long name_off, |
59 | struct extent_buffer *eb_in, u64 parent, | 61 | struct extent_buffer *eb_in, u64 parent, |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index eccadb5f62a5..63f0ccc92a71 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -36,14 +36,13 @@ | |||
36 | #define BTRFS_INODE_ORPHAN_META_RESERVED 1 | 36 | #define BTRFS_INODE_ORPHAN_META_RESERVED 1 |
37 | #define BTRFS_INODE_DUMMY 2 | 37 | #define BTRFS_INODE_DUMMY 2 |
38 | #define BTRFS_INODE_IN_DEFRAG 3 | 38 | #define BTRFS_INODE_IN_DEFRAG 3 |
39 | #define BTRFS_INODE_DELALLOC_META_RESERVED 4 | 39 | #define BTRFS_INODE_HAS_ORPHAN_ITEM 4 |
40 | #define BTRFS_INODE_HAS_ORPHAN_ITEM 5 | 40 | #define BTRFS_INODE_HAS_ASYNC_EXTENT 5 |
41 | #define BTRFS_INODE_HAS_ASYNC_EXTENT 6 | 41 | #define BTRFS_INODE_NEEDS_FULL_SYNC 6 |
42 | #define BTRFS_INODE_NEEDS_FULL_SYNC 7 | 42 | #define BTRFS_INODE_COPY_EVERYTHING 7 |
43 | #define BTRFS_INODE_COPY_EVERYTHING 8 | 43 | #define BTRFS_INODE_IN_DELALLOC_LIST 8 |
44 | #define BTRFS_INODE_IN_DELALLOC_LIST 9 | 44 | #define BTRFS_INODE_READDIO_NEED_LOCK 9 |
45 | #define BTRFS_INODE_READDIO_NEED_LOCK 10 | 45 | #define BTRFS_INODE_HAS_PROPS 10 |
46 | #define BTRFS_INODE_HAS_PROPS 11 | ||
47 | 46 | ||
48 | /* in memory btrfs inode */ | 47 | /* in memory btrfs inode */ |
49 | struct btrfs_inode { | 48 | struct btrfs_inode { |
@@ -176,7 +175,8 @@ struct btrfs_inode { | |||
176 | * of extent items we've reserved metadata for. | 175 | * of extent items we've reserved metadata for. |
177 | */ | 176 | */ |
178 | unsigned outstanding_extents; | 177 | unsigned outstanding_extents; |
179 | unsigned reserved_extents; | 178 | |
179 | struct btrfs_block_rsv block_rsv; | ||
180 | 180 | ||
181 | /* | 181 | /* |
182 | * Cached values of inode properties | 182 | * Cached values of inode properties |
@@ -267,6 +267,17 @@ static inline bool btrfs_is_free_space_inode(struct btrfs_inode *inode) | |||
267 | return false; | 267 | return false; |
268 | } | 268 | } |
269 | 269 | ||
270 | static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode, | ||
271 | int mod) | ||
272 | { | ||
273 | lockdep_assert_held(&inode->lock); | ||
274 | inode->outstanding_extents += mod; | ||
275 | if (btrfs_is_free_space_inode(inode)) | ||
276 | return; | ||
277 | trace_btrfs_inode_mod_outstanding_extents(inode->root, btrfs_ino(inode), | ||
278 | mod); | ||
279 | } | ||
280 | |||
270 | static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation) | 281 | static inline int btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation) |
271 | { | 282 | { |
272 | int ret = 0; | 283 | int ret = 0; |
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 7d5a9b51f0d7..7d51b5a5b505 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c | |||
@@ -613,7 +613,7 @@ static void btrfsic_dev_state_hashtable_add( | |||
613 | struct btrfsic_dev_state_hashtable *h) | 613 | struct btrfsic_dev_state_hashtable *h) |
614 | { | 614 | { |
615 | const unsigned int hashval = | 615 | const unsigned int hashval = |
616 | (((unsigned int)((uintptr_t)ds->bdev)) & | 616 | (((unsigned int)((uintptr_t)ds->bdev->bd_dev)) & |
617 | (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1)); | 617 | (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1)); |
618 | 618 | ||
619 | list_add(&ds->collision_resolving_node, h->table + hashval); | 619 | list_add(&ds->collision_resolving_node, h->table + hashval); |
@@ -2803,7 +2803,7 @@ static void __btrfsic_submit_bio(struct bio *bio) | |||
2803 | mutex_lock(&btrfsic_mutex); | 2803 | mutex_lock(&btrfsic_mutex); |
2804 | /* since btrfsic_submit_bio() is also called before | 2804 | /* since btrfsic_submit_bio() is also called before |
2805 | * btrfsic_mount(), this might return NULL */ | 2805 | * btrfsic_mount(), this might return NULL */ |
2806 | dev_state = btrfsic_dev_state_lookup(bio_dev(bio)); | 2806 | dev_state = btrfsic_dev_state_lookup(bio_dev(bio) + bio->bi_partno); |
2807 | if (NULL != dev_state && | 2807 | if (NULL != dev_state && |
2808 | (bio_op(bio) == REQ_OP_WRITE) && bio_has_data(bio)) { | 2808 | (bio_op(bio) == REQ_OP_WRITE) && bio_has_data(bio)) { |
2809 | unsigned int i = 0; | 2809 | unsigned int i = 0; |
@@ -2913,7 +2913,7 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info, | |||
2913 | state = kvzalloc(sizeof(*state), GFP_KERNEL); | 2913 | state = kvzalloc(sizeof(*state), GFP_KERNEL); |
2914 | if (!state) { | 2914 | if (!state) { |
2915 | pr_info("btrfs check-integrity: allocation failed!\n"); | 2915 | pr_info("btrfs check-integrity: allocation failed!\n"); |
2916 | return -1; | 2916 | return -ENOMEM; |
2917 | } | 2917 | } |
2918 | 2918 | ||
2919 | if (!btrfsic_is_initialized) { | 2919 | if (!btrfsic_is_initialized) { |
@@ -2945,7 +2945,7 @@ int btrfsic_mount(struct btrfs_fs_info *fs_info, | |||
2945 | if (NULL == ds) { | 2945 | if (NULL == ds) { |
2946 | pr_info("btrfs check-integrity: kmalloc() failed!\n"); | 2946 | pr_info("btrfs check-integrity: kmalloc() failed!\n"); |
2947 | mutex_unlock(&btrfsic_mutex); | 2947 | mutex_unlock(&btrfsic_mutex); |
2948 | return -1; | 2948 | return -ENOMEM; |
2949 | } | 2949 | } |
2950 | ds->bdev = device->bdev; | 2950 | ds->bdev = device->bdev; |
2951 | ds->state = state; | 2951 | ds->state = state; |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 280384bf34f1..b35ce16b3df3 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -33,6 +33,8 @@ | |||
33 | #include <linux/bit_spinlock.h> | 33 | #include <linux/bit_spinlock.h> |
34 | #include <linux/slab.h> | 34 | #include <linux/slab.h> |
35 | #include <linux/sched/mm.h> | 35 | #include <linux/sched/mm.h> |
36 | #include <linux/sort.h> | ||
37 | #include <linux/log2.h> | ||
36 | #include "ctree.h" | 38 | #include "ctree.h" |
37 | #include "disk-io.h" | 39 | #include "disk-io.h" |
38 | #include "transaction.h" | 40 | #include "transaction.h" |
@@ -255,7 +257,8 @@ static void end_compressed_bio_write(struct bio *bio) | |||
255 | cb->start, | 257 | cb->start, |
256 | cb->start + cb->len - 1, | 258 | cb->start + cb->len - 1, |
257 | NULL, | 259 | NULL, |
258 | bio->bi_status ? 0 : 1); | 260 | bio->bi_status ? |
261 | BLK_STS_OK : BLK_STS_NOTSUPP); | ||
259 | cb->compressed_pages[0]->mapping = NULL; | 262 | cb->compressed_pages[0]->mapping = NULL; |
260 | 263 | ||
261 | end_compressed_writeback(inode, cb); | 264 | end_compressed_writeback(inode, cb); |
@@ -706,7 +709,86 @@ out: | |||
706 | return ret; | 709 | return ret; |
707 | } | 710 | } |
708 | 711 | ||
709 | static struct { | 712 | /* |
713 | * Heuristic uses systematic sampling to collect data from the input data | ||
714 | * range, the logic can be tuned by the following constants: | ||
715 | * | ||
716 | * @SAMPLING_READ_SIZE - how many bytes will be copied from for each sample | ||
717 | * @SAMPLING_INTERVAL - range from which the sampled data can be collected | ||
718 | */ | ||
719 | #define SAMPLING_READ_SIZE (16) | ||
720 | #define SAMPLING_INTERVAL (256) | ||
721 | |||
722 | /* | ||
723 | * For statistical analysis of the input data we consider bytes that form a | ||
724 | * Galois Field of 256 objects. Each object has an attribute count, ie. how | ||
725 | * many times the object appeared in the sample. | ||
726 | */ | ||
727 | #define BUCKET_SIZE (256) | ||
728 | |||
729 | /* | ||
730 | * The size of the sample is based on a statistical sampling rule of thumb. | ||
731 | * The common way is to perform sampling tests as long as the number of | ||
732 | * elements in each cell is at least 5. | ||
733 | * | ||
734 | * Instead of 5, we choose 32 to obtain more accurate results. | ||
735 | * If the data contain the maximum number of symbols, which is 256, we obtain a | ||
736 | * sample size bound by 8192. | ||
737 | * | ||
738 | * For a sample of at most 8KB of data per data range: 16 consecutive bytes | ||
739 | * from up to 512 locations. | ||
740 | */ | ||
741 | #define MAX_SAMPLE_SIZE (BTRFS_MAX_UNCOMPRESSED * \ | ||
742 | SAMPLING_READ_SIZE / SAMPLING_INTERVAL) | ||
743 | |||
744 | struct bucket_item { | ||
745 | u32 count; | ||
746 | }; | ||
747 | |||
748 | struct heuristic_ws { | ||
749 | /* Partial copy of input data */ | ||
750 | u8 *sample; | ||
751 | u32 sample_size; | ||
752 | /* Buckets store counters for each byte value */ | ||
753 | struct bucket_item *bucket; | ||
754 | struct list_head list; | ||
755 | }; | ||
756 | |||
757 | static void free_heuristic_ws(struct list_head *ws) | ||
758 | { | ||
759 | struct heuristic_ws *workspace; | ||
760 | |||
761 | workspace = list_entry(ws, struct heuristic_ws, list); | ||
762 | |||
763 | kvfree(workspace->sample); | ||
764 | kfree(workspace->bucket); | ||
765 | kfree(workspace); | ||
766 | } | ||
767 | |||
768 | static struct list_head *alloc_heuristic_ws(void) | ||
769 | { | ||
770 | struct heuristic_ws *ws; | ||
771 | |||
772 | ws = kzalloc(sizeof(*ws), GFP_KERNEL); | ||
773 | if (!ws) | ||
774 | return ERR_PTR(-ENOMEM); | ||
775 | |||
776 | ws->sample = kvmalloc(MAX_SAMPLE_SIZE, GFP_KERNEL); | ||
777 | if (!ws->sample) | ||
778 | goto fail; | ||
779 | |||
780 | ws->bucket = kcalloc(BUCKET_SIZE, sizeof(*ws->bucket), GFP_KERNEL); | ||
781 | if (!ws->bucket) | ||
782 | goto fail; | ||
783 | |||
784 | INIT_LIST_HEAD(&ws->list); | ||
785 | return &ws->list; | ||
786 | fail: | ||
787 | free_heuristic_ws(&ws->list); | ||
788 | return ERR_PTR(-ENOMEM); | ||
789 | } | ||
790 | |||
791 | struct workspaces_list { | ||
710 | struct list_head idle_ws; | 792 | struct list_head idle_ws; |
711 | spinlock_t ws_lock; | 793 | spinlock_t ws_lock; |
712 | /* Number of free workspaces */ | 794 | /* Number of free workspaces */ |
@@ -715,7 +797,11 @@ static struct { | |||
715 | atomic_t total_ws; | 797 | atomic_t total_ws; |
716 | /* Waiters for a free workspace */ | 798 | /* Waiters for a free workspace */ |
717 | wait_queue_head_t ws_wait; | 799 | wait_queue_head_t ws_wait; |
718 | } btrfs_comp_ws[BTRFS_COMPRESS_TYPES]; | 800 | }; |
801 | |||
802 | static struct workspaces_list btrfs_comp_ws[BTRFS_COMPRESS_TYPES]; | ||
803 | |||
804 | static struct workspaces_list btrfs_heuristic_ws; | ||
719 | 805 | ||
720 | static const struct btrfs_compress_op * const btrfs_compress_op[] = { | 806 | static const struct btrfs_compress_op * const btrfs_compress_op[] = { |
721 | &btrfs_zlib_compress, | 807 | &btrfs_zlib_compress, |
@@ -725,11 +811,25 @@ static const struct btrfs_compress_op * const btrfs_compress_op[] = { | |||
725 | 811 | ||
726 | void __init btrfs_init_compress(void) | 812 | void __init btrfs_init_compress(void) |
727 | { | 813 | { |
814 | struct list_head *workspace; | ||
728 | int i; | 815 | int i; |
729 | 816 | ||
730 | for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { | 817 | INIT_LIST_HEAD(&btrfs_heuristic_ws.idle_ws); |
731 | struct list_head *workspace; | 818 | spin_lock_init(&btrfs_heuristic_ws.ws_lock); |
819 | atomic_set(&btrfs_heuristic_ws.total_ws, 0); | ||
820 | init_waitqueue_head(&btrfs_heuristic_ws.ws_wait); | ||
821 | |||
822 | workspace = alloc_heuristic_ws(); | ||
823 | if (IS_ERR(workspace)) { | ||
824 | pr_warn( | ||
825 | "BTRFS: cannot preallocate heuristic workspace, will try later\n"); | ||
826 | } else { | ||
827 | atomic_set(&btrfs_heuristic_ws.total_ws, 1); | ||
828 | btrfs_heuristic_ws.free_ws = 1; | ||
829 | list_add(workspace, &btrfs_heuristic_ws.idle_ws); | ||
830 | } | ||
732 | 831 | ||
832 | for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { | ||
733 | INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws); | 833 | INIT_LIST_HEAD(&btrfs_comp_ws[i].idle_ws); |
734 | spin_lock_init(&btrfs_comp_ws[i].ws_lock); | 834 | spin_lock_init(&btrfs_comp_ws[i].ws_lock); |
735 | atomic_set(&btrfs_comp_ws[i].total_ws, 0); | 835 | atomic_set(&btrfs_comp_ws[i].total_ws, 0); |
@@ -756,18 +856,32 @@ void __init btrfs_init_compress(void) | |||
756 | * Preallocation makes a forward progress guarantees and we do not return | 856 | * Preallocation makes a forward progress guarantees and we do not return |
757 | * errors. | 857 | * errors. |
758 | */ | 858 | */ |
759 | static struct list_head *find_workspace(int type) | 859 | static struct list_head *__find_workspace(int type, bool heuristic) |
760 | { | 860 | { |
761 | struct list_head *workspace; | 861 | struct list_head *workspace; |
762 | int cpus = num_online_cpus(); | 862 | int cpus = num_online_cpus(); |
763 | int idx = type - 1; | 863 | int idx = type - 1; |
764 | unsigned nofs_flag; | 864 | unsigned nofs_flag; |
865 | struct list_head *idle_ws; | ||
866 | spinlock_t *ws_lock; | ||
867 | atomic_t *total_ws; | ||
868 | wait_queue_head_t *ws_wait; | ||
869 | int *free_ws; | ||
870 | |||
871 | if (heuristic) { | ||
872 | idle_ws = &btrfs_heuristic_ws.idle_ws; | ||
873 | ws_lock = &btrfs_heuristic_ws.ws_lock; | ||
874 | total_ws = &btrfs_heuristic_ws.total_ws; | ||
875 | ws_wait = &btrfs_heuristic_ws.ws_wait; | ||
876 | free_ws = &btrfs_heuristic_ws.free_ws; | ||
877 | } else { | ||
878 | idle_ws = &btrfs_comp_ws[idx].idle_ws; | ||
879 | ws_lock = &btrfs_comp_ws[idx].ws_lock; | ||
880 | total_ws = &btrfs_comp_ws[idx].total_ws; | ||
881 | ws_wait = &btrfs_comp_ws[idx].ws_wait; | ||
882 | free_ws = &btrfs_comp_ws[idx].free_ws; | ||
883 | } | ||
765 | 884 | ||
766 | struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws; | ||
767 | spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock; | ||
768 | atomic_t *total_ws = &btrfs_comp_ws[idx].total_ws; | ||
769 | wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait; | ||
770 | int *free_ws = &btrfs_comp_ws[idx].free_ws; | ||
771 | again: | 885 | again: |
772 | spin_lock(ws_lock); | 886 | spin_lock(ws_lock); |
773 | if (!list_empty(idle_ws)) { | 887 | if (!list_empty(idle_ws)) { |
@@ -797,7 +911,10 @@ again: | |||
797 | * context of btrfs_compress_bio/btrfs_compress_pages | 911 | * context of btrfs_compress_bio/btrfs_compress_pages |
798 | */ | 912 | */ |
799 | nofs_flag = memalloc_nofs_save(); | 913 | nofs_flag = memalloc_nofs_save(); |
800 | workspace = btrfs_compress_op[idx]->alloc_workspace(); | 914 | if (heuristic) |
915 | workspace = alloc_heuristic_ws(); | ||
916 | else | ||
917 | workspace = btrfs_compress_op[idx]->alloc_workspace(); | ||
801 | memalloc_nofs_restore(nofs_flag); | 918 | memalloc_nofs_restore(nofs_flag); |
802 | 919 | ||
803 | if (IS_ERR(workspace)) { | 920 | if (IS_ERR(workspace)) { |
@@ -828,18 +945,38 @@ again: | |||
828 | return workspace; | 945 | return workspace; |
829 | } | 946 | } |
830 | 947 | ||
948 | static struct list_head *find_workspace(int type) | ||
949 | { | ||
950 | return __find_workspace(type, false); | ||
951 | } | ||
952 | |||
831 | /* | 953 | /* |
832 | * put a workspace struct back on the list or free it if we have enough | 954 | * put a workspace struct back on the list or free it if we have enough |
833 | * idle ones sitting around | 955 | * idle ones sitting around |
834 | */ | 956 | */ |
835 | static void free_workspace(int type, struct list_head *workspace) | 957 | static void __free_workspace(int type, struct list_head *workspace, |
958 | bool heuristic) | ||
836 | { | 959 | { |
837 | int idx = type - 1; | 960 | int idx = type - 1; |
838 | struct list_head *idle_ws = &btrfs_comp_ws[idx].idle_ws; | 961 | struct list_head *idle_ws; |
839 | spinlock_t *ws_lock = &btrfs_comp_ws[idx].ws_lock; | 962 | spinlock_t *ws_lock; |
840 | atomic_t *total_ws = &btrfs_comp_ws[idx].total_ws; | 963 | atomic_t *total_ws; |
841 | wait_queue_head_t *ws_wait = &btrfs_comp_ws[idx].ws_wait; | 964 | wait_queue_head_t *ws_wait; |
842 | int *free_ws = &btrfs_comp_ws[idx].free_ws; | 965 | int *free_ws; |
966 | |||
967 | if (heuristic) { | ||
968 | idle_ws = &btrfs_heuristic_ws.idle_ws; | ||
969 | ws_lock = &btrfs_heuristic_ws.ws_lock; | ||
970 | total_ws = &btrfs_heuristic_ws.total_ws; | ||
971 | ws_wait = &btrfs_heuristic_ws.ws_wait; | ||
972 | free_ws = &btrfs_heuristic_ws.free_ws; | ||
973 | } else { | ||
974 | idle_ws = &btrfs_comp_ws[idx].idle_ws; | ||
975 | ws_lock = &btrfs_comp_ws[idx].ws_lock; | ||
976 | total_ws = &btrfs_comp_ws[idx].total_ws; | ||
977 | ws_wait = &btrfs_comp_ws[idx].ws_wait; | ||
978 | free_ws = &btrfs_comp_ws[idx].free_ws; | ||
979 | } | ||
843 | 980 | ||
844 | spin_lock(ws_lock); | 981 | spin_lock(ws_lock); |
845 | if (*free_ws <= num_online_cpus()) { | 982 | if (*free_ws <= num_online_cpus()) { |
@@ -850,7 +987,10 @@ static void free_workspace(int type, struct list_head *workspace) | |||
850 | } | 987 | } |
851 | spin_unlock(ws_lock); | 988 | spin_unlock(ws_lock); |
852 | 989 | ||
853 | btrfs_compress_op[idx]->free_workspace(workspace); | 990 | if (heuristic) |
991 | free_heuristic_ws(workspace); | ||
992 | else | ||
993 | btrfs_compress_op[idx]->free_workspace(workspace); | ||
854 | atomic_dec(total_ws); | 994 | atomic_dec(total_ws); |
855 | wake: | 995 | wake: |
856 | /* | 996 | /* |
@@ -861,6 +1001,11 @@ wake: | |||
861 | wake_up(ws_wait); | 1001 | wake_up(ws_wait); |
862 | } | 1002 | } |
863 | 1003 | ||
1004 | static void free_workspace(int type, struct list_head *ws) | ||
1005 | { | ||
1006 | return __free_workspace(type, ws, false); | ||
1007 | } | ||
1008 | |||
864 | /* | 1009 | /* |
865 | * cleanup function for module exit | 1010 | * cleanup function for module exit |
866 | */ | 1011 | */ |
@@ -869,6 +1014,13 @@ static void free_workspaces(void) | |||
869 | struct list_head *workspace; | 1014 | struct list_head *workspace; |
870 | int i; | 1015 | int i; |
871 | 1016 | ||
1017 | while (!list_empty(&btrfs_heuristic_ws.idle_ws)) { | ||
1018 | workspace = btrfs_heuristic_ws.idle_ws.next; | ||
1019 | list_del(workspace); | ||
1020 | free_heuristic_ws(workspace); | ||
1021 | atomic_dec(&btrfs_heuristic_ws.total_ws); | ||
1022 | } | ||
1023 | |||
872 | for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { | 1024 | for (i = 0; i < BTRFS_COMPRESS_TYPES; i++) { |
873 | while (!list_empty(&btrfs_comp_ws[i].idle_ws)) { | 1025 | while (!list_empty(&btrfs_comp_ws[i].idle_ws)) { |
874 | workspace = btrfs_comp_ws[i].idle_ws.next; | 1026 | workspace = btrfs_comp_ws[i].idle_ws.next; |
@@ -883,6 +1035,11 @@ static void free_workspaces(void) | |||
883 | * Given an address space and start and length, compress the bytes into @pages | 1035 | * Given an address space and start and length, compress the bytes into @pages |
884 | * that are allocated on demand. | 1036 | * that are allocated on demand. |
885 | * | 1037 | * |
1038 | * @type_level is encoded algorithm and level, where level 0 means whatever | ||
1039 | * default the algorithm chooses and is opaque here; | ||
1040 | * - compression algo are 0-3 | ||
1041 | * - the level are bits 4-7 | ||
1042 | * | ||
886 | * @out_pages is an in/out parameter, holds maximum number of pages to allocate | 1043 | * @out_pages is an in/out parameter, holds maximum number of pages to allocate |
887 | * and returns number of actually allocated pages | 1044 | * and returns number of actually allocated pages |
888 | * | 1045 | * |
@@ -897,7 +1054,7 @@ static void free_workspaces(void) | |||
897 | * @max_out tells us the max number of bytes that we're allowed to | 1054 | * @max_out tells us the max number of bytes that we're allowed to |
898 | * stuff into pages | 1055 | * stuff into pages |
899 | */ | 1056 | */ |
900 | int btrfs_compress_pages(int type, struct address_space *mapping, | 1057 | int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping, |
901 | u64 start, struct page **pages, | 1058 | u64 start, struct page **pages, |
902 | unsigned long *out_pages, | 1059 | unsigned long *out_pages, |
903 | unsigned long *total_in, | 1060 | unsigned long *total_in, |
@@ -905,9 +1062,11 @@ int btrfs_compress_pages(int type, struct address_space *mapping, | |||
905 | { | 1062 | { |
906 | struct list_head *workspace; | 1063 | struct list_head *workspace; |
907 | int ret; | 1064 | int ret; |
1065 | int type = type_level & 0xF; | ||
908 | 1066 | ||
909 | workspace = find_workspace(type); | 1067 | workspace = find_workspace(type); |
910 | 1068 | ||
1069 | btrfs_compress_op[type - 1]->set_level(workspace, type_level); | ||
911 | ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping, | 1070 | ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping, |
912 | start, pages, | 1071 | start, pages, |
913 | out_pages, | 1072 | out_pages, |
@@ -1066,6 +1225,211 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start, | |||
1066 | } | 1225 | } |
1067 | 1226 | ||
1068 | /* | 1227 | /* |
1228 | * Shannon Entropy calculation | ||
1229 | * | ||
1230 | * Pure byte distribution analysis fails to determine compressiability of data. | ||
1231 | * Try calculating entropy to estimate the average minimum number of bits | ||
1232 | * needed to encode the sampled data. | ||
1233 | * | ||
1234 | * For convenience, return the percentage of needed bits, instead of amount of | ||
1235 | * bits directly. | ||
1236 | * | ||
1237 | * @ENTROPY_LVL_ACEPTABLE - below that threshold, sample has low byte entropy | ||
1238 | * and can be compressible with high probability | ||
1239 | * | ||
1240 | * @ENTROPY_LVL_HIGH - data are not compressible with high probability | ||
1241 | * | ||
1242 | * Use of ilog2() decreases precision, we lower the LVL to 5 to compensate. | ||
1243 | */ | ||
1244 | #define ENTROPY_LVL_ACEPTABLE (65) | ||
1245 | #define ENTROPY_LVL_HIGH (80) | ||
1246 | |||
1247 | /* | ||
1248 | * For increasead precision in shannon_entropy calculation, | ||
1249 | * let's do pow(n, M) to save more digits after comma: | ||
1250 | * | ||
1251 | * - maximum int bit length is 64 | ||
1252 | * - ilog2(MAX_SAMPLE_SIZE) -> 13 | ||
1253 | * - 13 * 4 = 52 < 64 -> M = 4 | ||
1254 | * | ||
1255 | * So use pow(n, 4). | ||
1256 | */ | ||
1257 | static inline u32 ilog2_w(u64 n) | ||
1258 | { | ||
1259 | return ilog2(n * n * n * n); | ||
1260 | } | ||
1261 | |||
1262 | static u32 shannon_entropy(struct heuristic_ws *ws) | ||
1263 | { | ||
1264 | const u32 entropy_max = 8 * ilog2_w(2); | ||
1265 | u32 entropy_sum = 0; | ||
1266 | u32 p, p_base, sz_base; | ||
1267 | u32 i; | ||
1268 | |||
1269 | sz_base = ilog2_w(ws->sample_size); | ||
1270 | for (i = 0; i < BUCKET_SIZE && ws->bucket[i].count > 0; i++) { | ||
1271 | p = ws->bucket[i].count; | ||
1272 | p_base = ilog2_w(p); | ||
1273 | entropy_sum += p * (sz_base - p_base); | ||
1274 | } | ||
1275 | |||
1276 | entropy_sum /= ws->sample_size; | ||
1277 | return entropy_sum * 100 / entropy_max; | ||
1278 | } | ||
1279 | |||
1280 | /* Compare buckets by size, ascending */ | ||
1281 | static int bucket_comp_rev(const void *lv, const void *rv) | ||
1282 | { | ||
1283 | const struct bucket_item *l = (const struct bucket_item *)lv; | ||
1284 | const struct bucket_item *r = (const struct bucket_item *)rv; | ||
1285 | |||
1286 | return r->count - l->count; | ||
1287 | } | ||
1288 | |||
1289 | /* | ||
1290 | * Size of the core byte set - how many bytes cover 90% of the sample | ||
1291 | * | ||
1292 | * There are several types of structured binary data that use nearly all byte | ||
1293 | * values. The distribution can be uniform and counts in all buckets will be | ||
1294 | * nearly the same (eg. encrypted data). Unlikely to be compressible. | ||
1295 | * | ||
1296 | * Other possibility is normal (Gaussian) distribution, where the data could | ||
1297 | * be potentially compressible, but we have to take a few more steps to decide | ||
1298 | * how much. | ||
1299 | * | ||
1300 | * @BYTE_CORE_SET_LOW - main part of byte values repeated frequently, | ||
1301 | * compression algo can easy fix that | ||
1302 | * @BYTE_CORE_SET_HIGH - data have uniform distribution and with high | ||
1303 | * probability is not compressible | ||
1304 | */ | ||
1305 | #define BYTE_CORE_SET_LOW (64) | ||
1306 | #define BYTE_CORE_SET_HIGH (200) | ||
1307 | |||
1308 | static int byte_core_set_size(struct heuristic_ws *ws) | ||
1309 | { | ||
1310 | u32 i; | ||
1311 | u32 coreset_sum = 0; | ||
1312 | const u32 core_set_threshold = ws->sample_size * 90 / 100; | ||
1313 | struct bucket_item *bucket = ws->bucket; | ||
1314 | |||
1315 | /* Sort in reverse order */ | ||
1316 | sort(bucket, BUCKET_SIZE, sizeof(*bucket), &bucket_comp_rev, NULL); | ||
1317 | |||
1318 | for (i = 0; i < BYTE_CORE_SET_LOW; i++) | ||
1319 | coreset_sum += bucket[i].count; | ||
1320 | |||
1321 | if (coreset_sum > core_set_threshold) | ||
1322 | return i; | ||
1323 | |||
1324 | for (; i < BYTE_CORE_SET_HIGH && bucket[i].count > 0; i++) { | ||
1325 | coreset_sum += bucket[i].count; | ||
1326 | if (coreset_sum > core_set_threshold) | ||
1327 | break; | ||
1328 | } | ||
1329 | |||
1330 | return i; | ||
1331 | } | ||
1332 | |||
1333 | /* | ||
1334 | * Count byte values in buckets. | ||
1335 | * This heuristic can detect textual data (configs, xml, json, html, etc). | ||
1336 | * Because in most text-like data byte set is restricted to limited number of | ||
1337 | * possible characters, and that restriction in most cases makes data easy to | ||
1338 | * compress. | ||
1339 | * | ||
1340 | * @BYTE_SET_THRESHOLD - consider all data within this byte set size: | ||
1341 | * less - compressible | ||
1342 | * more - need additional analysis | ||
1343 | */ | ||
1344 | #define BYTE_SET_THRESHOLD (64) | ||
1345 | |||
1346 | static u32 byte_set_size(const struct heuristic_ws *ws) | ||
1347 | { | ||
1348 | u32 i; | ||
1349 | u32 byte_set_size = 0; | ||
1350 | |||
1351 | for (i = 0; i < BYTE_SET_THRESHOLD; i++) { | ||
1352 | if (ws->bucket[i].count > 0) | ||
1353 | byte_set_size++; | ||
1354 | } | ||
1355 | |||
1356 | /* | ||
1357 | * Continue collecting count of byte values in buckets. If the byte | ||
1358 | * set size is bigger then the threshold, it's pointless to continue, | ||
1359 | * the detection technique would fail for this type of data. | ||
1360 | */ | ||
1361 | for (; i < BUCKET_SIZE; i++) { | ||
1362 | if (ws->bucket[i].count > 0) { | ||
1363 | byte_set_size++; | ||
1364 | if (byte_set_size > BYTE_SET_THRESHOLD) | ||
1365 | return byte_set_size; | ||
1366 | } | ||
1367 | } | ||
1368 | |||
1369 | return byte_set_size; | ||
1370 | } | ||
1371 | |||
1372 | static bool sample_repeated_patterns(struct heuristic_ws *ws) | ||
1373 | { | ||
1374 | const u32 half_of_sample = ws->sample_size / 2; | ||
1375 | const u8 *data = ws->sample; | ||
1376 | |||
1377 | return memcmp(&data[0], &data[half_of_sample], half_of_sample) == 0; | ||
1378 | } | ||
1379 | |||
1380 | static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end, | ||
1381 | struct heuristic_ws *ws) | ||
1382 | { | ||
1383 | struct page *page; | ||
1384 | u64 index, index_end; | ||
1385 | u32 i, curr_sample_pos; | ||
1386 | u8 *in_data; | ||
1387 | |||
1388 | /* | ||
1389 | * Compression handles the input data by chunks of 128KiB | ||
1390 | * (defined by BTRFS_MAX_UNCOMPRESSED) | ||
1391 | * | ||
1392 | * We do the same for the heuristic and loop over the whole range. | ||
1393 | * | ||
1394 | * MAX_SAMPLE_SIZE - calculated under assumption that heuristic will | ||
1395 | * process no more than BTRFS_MAX_UNCOMPRESSED at a time. | ||
1396 | */ | ||
1397 | if (end - start > BTRFS_MAX_UNCOMPRESSED) | ||
1398 | end = start + BTRFS_MAX_UNCOMPRESSED; | ||
1399 | |||
1400 | index = start >> PAGE_SHIFT; | ||
1401 | index_end = end >> PAGE_SHIFT; | ||
1402 | |||
1403 | /* Don't miss unaligned end */ | ||
1404 | if (!IS_ALIGNED(end, PAGE_SIZE)) | ||
1405 | index_end++; | ||
1406 | |||
1407 | curr_sample_pos = 0; | ||
1408 | while (index < index_end) { | ||
1409 | page = find_get_page(inode->i_mapping, index); | ||
1410 | in_data = kmap(page); | ||
1411 | /* Handle case where the start is not aligned to PAGE_SIZE */ | ||
1412 | i = start % PAGE_SIZE; | ||
1413 | while (i < PAGE_SIZE - SAMPLING_READ_SIZE) { | ||
1414 | /* Don't sample any garbage from the last page */ | ||
1415 | if (start > end - SAMPLING_READ_SIZE) | ||
1416 | break; | ||
1417 | memcpy(&ws->sample[curr_sample_pos], &in_data[i], | ||
1418 | SAMPLING_READ_SIZE); | ||
1419 | i += SAMPLING_INTERVAL; | ||
1420 | start += SAMPLING_INTERVAL; | ||
1421 | curr_sample_pos += SAMPLING_READ_SIZE; | ||
1422 | } | ||
1423 | kunmap(page); | ||
1424 | put_page(page); | ||
1425 | |||
1426 | index++; | ||
1427 | } | ||
1428 | |||
1429 | ws->sample_size = curr_sample_pos; | ||
1430 | } | ||
1431 | |||
1432 | /* | ||
1069 | * Compression heuristic. | 1433 | * Compression heuristic. |
1070 | * | 1434 | * |
1071 | * For now is's a naive and optimistic 'return true', we'll extend the logic to | 1435 | * For now is's a naive and optimistic 'return true', we'll extend the logic to |
@@ -1082,18 +1446,87 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start, | |||
1082 | */ | 1446 | */ |
1083 | int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end) | 1447 | int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end) |
1084 | { | 1448 | { |
1085 | u64 index = start >> PAGE_SHIFT; | 1449 | struct list_head *ws_list = __find_workspace(0, true); |
1086 | u64 end_index = end >> PAGE_SHIFT; | 1450 | struct heuristic_ws *ws; |
1087 | struct page *page; | 1451 | u32 i; |
1088 | int ret = 1; | 1452 | u8 byte; |
1453 | int ret = 0; | ||
1089 | 1454 | ||
1090 | while (index <= end_index) { | 1455 | ws = list_entry(ws_list, struct heuristic_ws, list); |
1091 | page = find_get_page(inode->i_mapping, index); | 1456 | |
1092 | kmap(page); | 1457 | heuristic_collect_sample(inode, start, end, ws); |
1093 | kunmap(page); | 1458 | |
1094 | put_page(page); | 1459 | if (sample_repeated_patterns(ws)) { |
1095 | index++; | 1460 | ret = 1; |
1461 | goto out; | ||
1462 | } | ||
1463 | |||
1464 | memset(ws->bucket, 0, sizeof(*ws->bucket)*BUCKET_SIZE); | ||
1465 | |||
1466 | for (i = 0; i < ws->sample_size; i++) { | ||
1467 | byte = ws->sample[i]; | ||
1468 | ws->bucket[byte].count++; | ||
1469 | } | ||
1470 | |||
1471 | i = byte_set_size(ws); | ||
1472 | if (i < BYTE_SET_THRESHOLD) { | ||
1473 | ret = 2; | ||
1474 | goto out; | ||
1475 | } | ||
1476 | |||
1477 | i = byte_core_set_size(ws); | ||
1478 | if (i <= BYTE_CORE_SET_LOW) { | ||
1479 | ret = 3; | ||
1480 | goto out; | ||
1096 | } | 1481 | } |
1097 | 1482 | ||
1483 | if (i >= BYTE_CORE_SET_HIGH) { | ||
1484 | ret = 0; | ||
1485 | goto out; | ||
1486 | } | ||
1487 | |||
1488 | i = shannon_entropy(ws); | ||
1489 | if (i <= ENTROPY_LVL_ACEPTABLE) { | ||
1490 | ret = 4; | ||
1491 | goto out; | ||
1492 | } | ||
1493 | |||
1494 | /* | ||
1495 | * For the levels below ENTROPY_LVL_HIGH, additional analysis would be | ||
1496 | * needed to give green light to compression. | ||
1497 | * | ||
1498 | * For now just assume that compression at that level is not worth the | ||
1499 | * resources because: | ||
1500 | * | ||
1501 | * 1. it is possible to defrag the data later | ||
1502 | * | ||
1503 | * 2. the data would turn out to be hardly compressible, eg. 150 byte | ||
1504 | * values, every bucket has counter at level ~54. The heuristic would | ||
1505 | * be confused. This can happen when data have some internal repeated | ||
1506 | * patterns like "abbacbbc...". This can be detected by analyzing | ||
1507 | * pairs of bytes, which is too costly. | ||
1508 | */ | ||
1509 | if (i < ENTROPY_LVL_HIGH) { | ||
1510 | ret = 5; | ||
1511 | goto out; | ||
1512 | } else { | ||
1513 | ret = 0; | ||
1514 | goto out; | ||
1515 | } | ||
1516 | |||
1517 | out: | ||
1518 | __free_workspace(0, ws_list, true); | ||
1098 | return ret; | 1519 | return ret; |
1099 | } | 1520 | } |
1521 | |||
1522 | unsigned int btrfs_compress_str2level(const char *str) | ||
1523 | { | ||
1524 | if (strncmp(str, "zlib", 4) != 0) | ||
1525 | return 0; | ||
1526 | |||
1527 | /* Accepted form: zlib:1 up to zlib:9 and nothing left after the number */ | ||
1528 | if (str[4] == ':' && '1' <= str[5] && str[5] <= '9' && str[6] == 0) | ||
1529 | return str[5] - '0'; | ||
1530 | |||
1531 | return 0; | ||
1532 | } | ||
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h index d2781ff8f994..da20755ebf21 100644 --- a/fs/btrfs/compression.h +++ b/fs/btrfs/compression.h | |||
@@ -76,7 +76,7 @@ struct compressed_bio { | |||
76 | void btrfs_init_compress(void); | 76 | void btrfs_init_compress(void); |
77 | void btrfs_exit_compress(void); | 77 | void btrfs_exit_compress(void); |
78 | 78 | ||
79 | int btrfs_compress_pages(int type, struct address_space *mapping, | 79 | int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping, |
80 | u64 start, struct page **pages, | 80 | u64 start, struct page **pages, |
81 | unsigned long *out_pages, | 81 | unsigned long *out_pages, |
82 | unsigned long *total_in, | 82 | unsigned long *total_in, |
@@ -95,6 +95,8 @@ blk_status_t btrfs_submit_compressed_write(struct inode *inode, u64 start, | |||
95 | blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, | 95 | blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, |
96 | int mirror_num, unsigned long bio_flags); | 96 | int mirror_num, unsigned long bio_flags); |
97 | 97 | ||
98 | unsigned btrfs_compress_str2level(const char *str); | ||
99 | |||
98 | enum btrfs_compression_type { | 100 | enum btrfs_compression_type { |
99 | BTRFS_COMPRESS_NONE = 0, | 101 | BTRFS_COMPRESS_NONE = 0, |
100 | BTRFS_COMPRESS_ZLIB = 1, | 102 | BTRFS_COMPRESS_ZLIB = 1, |
@@ -124,6 +126,8 @@ struct btrfs_compress_op { | |||
124 | struct page *dest_page, | 126 | struct page *dest_page, |
125 | unsigned long start_byte, | 127 | unsigned long start_byte, |
126 | size_t srclen, size_t destlen); | 128 | size_t srclen, size_t destlen); |
129 | |||
130 | void (*set_level)(struct list_head *ws, unsigned int type); | ||
127 | }; | 131 | }; |
128 | 132 | ||
129 | extern const struct btrfs_compress_op btrfs_zlib_compress; | 133 | extern const struct btrfs_compress_op btrfs_zlib_compress; |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 6d49db7d86be..531e0a8645b0 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -192,7 +192,7 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) | |||
192 | * tree until you end up with a lock on the root. A locked buffer | 192 | * tree until you end up with a lock on the root. A locked buffer |
193 | * is returned, with a reference held. | 193 | * is returned, with a reference held. |
194 | */ | 194 | */ |
195 | static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) | 195 | struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) |
196 | { | 196 | { |
197 | struct extent_buffer *eb; | 197 | struct extent_buffer *eb; |
198 | 198 | ||
@@ -5496,8 +5496,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
5496 | goto out; | 5496 | goto out; |
5497 | } else if (left_end_reached) { | 5497 | } else if (left_end_reached) { |
5498 | if (right_level == 0) { | 5498 | if (right_level == 0) { |
5499 | ret = changed_cb(left_root, right_root, | 5499 | ret = changed_cb(left_path, right_path, |
5500 | left_path, right_path, | ||
5501 | &right_key, | 5500 | &right_key, |
5502 | BTRFS_COMPARE_TREE_DELETED, | 5501 | BTRFS_COMPARE_TREE_DELETED, |
5503 | ctx); | 5502 | ctx); |
@@ -5508,8 +5507,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
5508 | continue; | 5507 | continue; |
5509 | } else if (right_end_reached) { | 5508 | } else if (right_end_reached) { |
5510 | if (left_level == 0) { | 5509 | if (left_level == 0) { |
5511 | ret = changed_cb(left_root, right_root, | 5510 | ret = changed_cb(left_path, right_path, |
5512 | left_path, right_path, | ||
5513 | &left_key, | 5511 | &left_key, |
5514 | BTRFS_COMPARE_TREE_NEW, | 5512 | BTRFS_COMPARE_TREE_NEW, |
5515 | ctx); | 5513 | ctx); |
@@ -5523,8 +5521,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
5523 | if (left_level == 0 && right_level == 0) { | 5521 | if (left_level == 0 && right_level == 0) { |
5524 | cmp = btrfs_comp_cpu_keys(&left_key, &right_key); | 5522 | cmp = btrfs_comp_cpu_keys(&left_key, &right_key); |
5525 | if (cmp < 0) { | 5523 | if (cmp < 0) { |
5526 | ret = changed_cb(left_root, right_root, | 5524 | ret = changed_cb(left_path, right_path, |
5527 | left_path, right_path, | ||
5528 | &left_key, | 5525 | &left_key, |
5529 | BTRFS_COMPARE_TREE_NEW, | 5526 | BTRFS_COMPARE_TREE_NEW, |
5530 | ctx); | 5527 | ctx); |
@@ -5532,8 +5529,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
5532 | goto out; | 5529 | goto out; |
5533 | advance_left = ADVANCE; | 5530 | advance_left = ADVANCE; |
5534 | } else if (cmp > 0) { | 5531 | } else if (cmp > 0) { |
5535 | ret = changed_cb(left_root, right_root, | 5532 | ret = changed_cb(left_path, right_path, |
5536 | left_path, right_path, | ||
5537 | &right_key, | 5533 | &right_key, |
5538 | BTRFS_COMPARE_TREE_DELETED, | 5534 | BTRFS_COMPARE_TREE_DELETED, |
5539 | ctx); | 5535 | ctx); |
@@ -5550,8 +5546,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
5550 | result = BTRFS_COMPARE_TREE_CHANGED; | 5546 | result = BTRFS_COMPARE_TREE_CHANGED; |
5551 | else | 5547 | else |
5552 | result = BTRFS_COMPARE_TREE_SAME; | 5548 | result = BTRFS_COMPARE_TREE_SAME; |
5553 | ret = changed_cb(left_root, right_root, | 5549 | ret = changed_cb(left_path, right_path, |
5554 | left_path, right_path, | ||
5555 | &left_key, result, ctx); | 5550 | &left_key, result, ctx); |
5556 | if (ret < 0) | 5551 | if (ret < 0) |
5557 | goto out; | 5552 | goto out; |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8fc690384c58..f7df5536ab61 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -523,7 +523,7 @@ struct btrfs_caching_control { | |||
523 | }; | 523 | }; |
524 | 524 | ||
525 | /* Once caching_thread() finds this much free space, it will wake up waiters. */ | 525 | /* Once caching_thread() finds this much free space, it will wake up waiters. */ |
526 | #define CACHING_CTL_WAKE_UP (1024 * 1024 * 2) | 526 | #define CACHING_CTL_WAKE_UP SZ_2M |
527 | 527 | ||
528 | struct btrfs_io_ctl { | 528 | struct btrfs_io_ctl { |
529 | void *cur, *orig; | 529 | void *cur, *orig; |
@@ -763,8 +763,6 @@ struct btrfs_fs_info { | |||
763 | * delayed dir index item | 763 | * delayed dir index item |
764 | */ | 764 | */ |
765 | struct btrfs_block_rsv global_block_rsv; | 765 | struct btrfs_block_rsv global_block_rsv; |
766 | /* block reservation for delay allocation */ | ||
767 | struct btrfs_block_rsv delalloc_block_rsv; | ||
768 | /* block reservation for metadata operations */ | 766 | /* block reservation for metadata operations */ |
769 | struct btrfs_block_rsv trans_block_rsv; | 767 | struct btrfs_block_rsv trans_block_rsv; |
770 | /* block reservation for chunk tree */ | 768 | /* block reservation for chunk tree */ |
@@ -790,6 +788,7 @@ struct btrfs_fs_info { | |||
790 | */ | 788 | */ |
791 | unsigned long pending_changes; | 789 | unsigned long pending_changes; |
792 | unsigned long compress_type:4; | 790 | unsigned long compress_type:4; |
791 | unsigned int compress_level; | ||
793 | int commit_interval; | 792 | int commit_interval; |
794 | /* | 793 | /* |
795 | * It is a suggestive number, the read side is safe even it gets a | 794 | * It is a suggestive number, the read side is safe even it gets a |
@@ -878,9 +877,6 @@ struct btrfs_fs_info { | |||
878 | rwlock_t tree_mod_log_lock; | 877 | rwlock_t tree_mod_log_lock; |
879 | struct rb_root tree_mod_log; | 878 | struct rb_root tree_mod_log; |
880 | 879 | ||
881 | atomic_t nr_async_submits; | ||
882 | atomic_t async_submit_draining; | ||
883 | atomic_t nr_async_bios; | ||
884 | atomic_t async_delalloc_pages; | 880 | atomic_t async_delalloc_pages; |
885 | atomic_t open_ioctl_trans; | 881 | atomic_t open_ioctl_trans; |
886 | 882 | ||
@@ -1100,6 +1096,11 @@ struct btrfs_fs_info { | |||
1100 | u32 nodesize; | 1096 | u32 nodesize; |
1101 | u32 sectorsize; | 1097 | u32 sectorsize; |
1102 | u32 stripesize; | 1098 | u32 stripesize; |
1099 | |||
1100 | #ifdef CONFIG_BTRFS_FS_REF_VERIFY | ||
1101 | spinlock_t ref_verify_lock; | ||
1102 | struct rb_root block_tree; | ||
1103 | #endif | ||
1103 | }; | 1104 | }; |
1104 | 1105 | ||
1105 | static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) | 1106 | static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb) |
@@ -1338,6 +1339,7 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info) | |||
1338 | #define BTRFS_MOUNT_FRAGMENT_METADATA (1 << 25) | 1339 | #define BTRFS_MOUNT_FRAGMENT_METADATA (1 << 25) |
1339 | #define BTRFS_MOUNT_FREE_SPACE_TREE (1 << 26) | 1340 | #define BTRFS_MOUNT_FREE_SPACE_TREE (1 << 26) |
1340 | #define BTRFS_MOUNT_NOLOGREPLAY (1 << 27) | 1341 | #define BTRFS_MOUNT_NOLOGREPLAY (1 << 27) |
1342 | #define BTRFS_MOUNT_REF_VERIFY (1 << 28) | ||
1341 | 1343 | ||
1342 | #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) | 1344 | #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) |
1343 | #define BTRFS_DEFAULT_MAX_INLINE (2048) | 1345 | #define BTRFS_DEFAULT_MAX_INLINE (2048) |
@@ -2639,7 +2641,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
2639 | struct extent_buffer *buf, | 2641 | struct extent_buffer *buf, |
2640 | u64 parent, int last_ref); | 2642 | u64 parent, int last_ref); |
2641 | int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | 2643 | int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, |
2642 | u64 root_objectid, u64 owner, | 2644 | struct btrfs_root *root, u64 owner, |
2643 | u64 offset, u64 ram_bytes, | 2645 | u64 offset, u64 ram_bytes, |
2644 | struct btrfs_key *ins); | 2646 | struct btrfs_key *ins); |
2645 | int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | 2647 | int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, |
@@ -2658,7 +2660,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, | |||
2658 | u64 bytenr, u64 num_bytes, u64 flags, | 2660 | u64 bytenr, u64 num_bytes, u64 flags, |
2659 | int level, int is_data); | 2661 | int level, int is_data); |
2660 | int btrfs_free_extent(struct btrfs_trans_handle *trans, | 2662 | int btrfs_free_extent(struct btrfs_trans_handle *trans, |
2661 | struct btrfs_fs_info *fs_info, | 2663 | struct btrfs_root *root, |
2662 | u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, | 2664 | u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, |
2663 | u64 owner, u64 offset); | 2665 | u64 owner, u64 offset); |
2664 | 2666 | ||
@@ -2670,7 +2672,7 @@ void btrfs_prepare_extent_commit(struct btrfs_fs_info *fs_info); | |||
2670 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | 2672 | int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, |
2671 | struct btrfs_fs_info *fs_info); | 2673 | struct btrfs_fs_info *fs_info); |
2672 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | 2674 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, |
2673 | struct btrfs_fs_info *fs_info, | 2675 | struct btrfs_root *root, |
2674 | u64 bytenr, u64 num_bytes, u64 parent, | 2676 | u64 bytenr, u64 num_bytes, u64 parent, |
2675 | u64 root_objectid, u64 owner, u64 offset); | 2677 | u64 root_objectid, u64 owner, u64 offset); |
2676 | 2678 | ||
@@ -2744,6 +2746,8 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, | |||
2744 | u64 *qgroup_reserved, bool use_global_rsv); | 2746 | u64 *qgroup_reserved, bool use_global_rsv); |
2745 | void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info, | 2747 | void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info, |
2746 | struct btrfs_block_rsv *rsv); | 2748 | struct btrfs_block_rsv *rsv); |
2749 | void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes); | ||
2750 | |||
2747 | int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes); | 2751 | int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes); |
2748 | void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes); | 2752 | void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes); |
2749 | int btrfs_delalloc_reserve_space(struct inode *inode, | 2753 | int btrfs_delalloc_reserve_space(struct inode *inode, |
@@ -2751,6 +2755,9 @@ int btrfs_delalloc_reserve_space(struct inode *inode, | |||
2751 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type); | 2755 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type); |
2752 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info, | 2756 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info, |
2753 | unsigned short type); | 2757 | unsigned short type); |
2758 | void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info, | ||
2759 | struct btrfs_block_rsv *rsv, | ||
2760 | unsigned short type); | ||
2754 | void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info, | 2761 | void btrfs_free_block_rsv(struct btrfs_fs_info *fs_info, |
2755 | struct btrfs_block_rsv *rsv); | 2762 | struct btrfs_block_rsv *rsv); |
2756 | void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv); | 2763 | void __btrfs_free_block_rsv(struct btrfs_block_rsv *rsv); |
@@ -2809,6 +2816,7 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info, | |||
2809 | const struct btrfs_key *new_key); | 2816 | const struct btrfs_key *new_key); |
2810 | struct extent_buffer *btrfs_root_node(struct btrfs_root *root); | 2817 | struct extent_buffer *btrfs_root_node(struct btrfs_root *root); |
2811 | struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); | 2818 | struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); |
2819 | struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root); | ||
2812 | int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, | 2820 | int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, |
2813 | struct btrfs_key *key, int lowest_level, | 2821 | struct btrfs_key *key, int lowest_level, |
2814 | u64 min_trans); | 2822 | u64 min_trans); |
@@ -2821,9 +2829,7 @@ enum btrfs_compare_tree_result { | |||
2821 | BTRFS_COMPARE_TREE_CHANGED, | 2829 | BTRFS_COMPARE_TREE_CHANGED, |
2822 | BTRFS_COMPARE_TREE_SAME, | 2830 | BTRFS_COMPARE_TREE_SAME, |
2823 | }; | 2831 | }; |
2824 | typedef int (*btrfs_changed_cb_t)(struct btrfs_root *left_root, | 2832 | typedef int (*btrfs_changed_cb_t)(struct btrfs_path *left_path, |
2825 | struct btrfs_root *right_root, | ||
2826 | struct btrfs_path *left_path, | ||
2827 | struct btrfs_path *right_path, | 2833 | struct btrfs_path *right_path, |
2828 | struct btrfs_key *key, | 2834 | struct btrfs_key *key, |
2829 | enum btrfs_compare_tree_result result, | 2835 | enum btrfs_compare_tree_result result, |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 19e4ad2f3f2e..5d73f79ded8b 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -581,7 +581,6 @@ static int btrfs_delayed_inode_reserve_metadata( | |||
581 | struct btrfs_block_rsv *dst_rsv; | 581 | struct btrfs_block_rsv *dst_rsv; |
582 | u64 num_bytes; | 582 | u64 num_bytes; |
583 | int ret; | 583 | int ret; |
584 | bool release = false; | ||
585 | 584 | ||
586 | src_rsv = trans->block_rsv; | 585 | src_rsv = trans->block_rsv; |
587 | dst_rsv = &fs_info->delayed_block_rsv; | 586 | dst_rsv = &fs_info->delayed_block_rsv; |
@@ -589,36 +588,13 @@ static int btrfs_delayed_inode_reserve_metadata( | |||
589 | num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); | 588 | num_bytes = btrfs_calc_trans_metadata_size(fs_info, 1); |
590 | 589 | ||
591 | /* | 590 | /* |
592 | * If our block_rsv is the delalloc block reserve then check and see if | ||
593 | * we have our extra reservation for updating the inode. If not fall | ||
594 | * through and try to reserve space quickly. | ||
595 | * | ||
596 | * We used to try and steal from the delalloc block rsv or the global | ||
597 | * reserve, but we'd steal a full reservation, which isn't kind. We are | ||
598 | * here through delalloc which means we've likely just cowed down close | ||
599 | * to the leaf that contains the inode, so we would steal less just | ||
600 | * doing the fallback inode update, so if we do end up having to steal | ||
601 | * from the global block rsv we hopefully only steal one or two blocks | ||
602 | * worth which is less likely to hurt us. | ||
603 | */ | ||
604 | if (src_rsv && src_rsv->type == BTRFS_BLOCK_RSV_DELALLOC) { | ||
605 | spin_lock(&inode->lock); | ||
606 | if (test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, | ||
607 | &inode->runtime_flags)) | ||
608 | release = true; | ||
609 | else | ||
610 | src_rsv = NULL; | ||
611 | spin_unlock(&inode->lock); | ||
612 | } | ||
613 | |||
614 | /* | ||
615 | * btrfs_dirty_inode will update the inode under btrfs_join_transaction | 591 | * btrfs_dirty_inode will update the inode under btrfs_join_transaction |
616 | * which doesn't reserve space for speed. This is a problem since we | 592 | * which doesn't reserve space for speed. This is a problem since we |
617 | * still need to reserve space for this update, so try to reserve the | 593 | * still need to reserve space for this update, so try to reserve the |
618 | * space. | 594 | * space. |
619 | * | 595 | * |
620 | * Now if src_rsv == delalloc_block_rsv we'll let it just steal since | 596 | * Now if src_rsv == delalloc_block_rsv we'll let it just steal since |
621 | * we're accounted for. | 597 | * we always reserve enough to update the inode item. |
622 | */ | 598 | */ |
623 | if (!src_rsv || (!trans->bytes_reserved && | 599 | if (!src_rsv || (!trans->bytes_reserved && |
624 | src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) { | 600 | src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) { |
@@ -643,32 +619,12 @@ static int btrfs_delayed_inode_reserve_metadata( | |||
643 | } | 619 | } |
644 | 620 | ||
645 | ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1); | 621 | ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes, 1); |
646 | |||
647 | /* | ||
648 | * Migrate only takes a reservation, it doesn't touch the size of the | ||
649 | * block_rsv. This is to simplify people who don't normally have things | ||
650 | * migrated from their block rsv. If they go to release their | ||
651 | * reservation, that will decrease the size as well, so if migrate | ||
652 | * reduced size we'd end up with a negative size. But for the | ||
653 | * delalloc_meta_reserved stuff we will only know to drop 1 reservation, | ||
654 | * but we could in fact do this reserve/migrate dance several times | ||
655 | * between the time we did the original reservation and we'd clean it | ||
656 | * up. So to take care of this, release the space for the meta | ||
657 | * reservation here. I think it may be time for a documentation page on | ||
658 | * how block rsvs. work. | ||
659 | */ | ||
660 | if (!ret) { | 622 | if (!ret) { |
661 | trace_btrfs_space_reservation(fs_info, "delayed_inode", | 623 | trace_btrfs_space_reservation(fs_info, "delayed_inode", |
662 | btrfs_ino(inode), num_bytes, 1); | 624 | btrfs_ino(inode), num_bytes, 1); |
663 | node->bytes_reserved = num_bytes; | 625 | node->bytes_reserved = num_bytes; |
664 | } | 626 | } |
665 | 627 | ||
666 | if (release) { | ||
667 | trace_btrfs_space_reservation(fs_info, "delalloc", | ||
668 | btrfs_ino(inode), num_bytes, 0); | ||
669 | btrfs_block_rsv_release(fs_info, src_rsv, num_bytes); | ||
670 | } | ||
671 | |||
672 | return ret; | 628 | return ret; |
673 | } | 629 | } |
674 | 630 | ||
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 93ffa898df6d..83be8f9fd906 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
@@ -40,10 +40,10 @@ struct kmem_cache *btrfs_delayed_extent_op_cachep; | |||
40 | /* | 40 | /* |
41 | * compare two delayed tree backrefs with same bytenr and type | 41 | * compare two delayed tree backrefs with same bytenr and type |
42 | */ | 42 | */ |
43 | static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2, | 43 | static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref1, |
44 | struct btrfs_delayed_tree_ref *ref1, int type) | 44 | struct btrfs_delayed_tree_ref *ref2) |
45 | { | 45 | { |
46 | if (type == BTRFS_TREE_BLOCK_REF_KEY) { | 46 | if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) { |
47 | if (ref1->root < ref2->root) | 47 | if (ref1->root < ref2->root) |
48 | return -1; | 48 | return -1; |
49 | if (ref1->root > ref2->root) | 49 | if (ref1->root > ref2->root) |
@@ -60,8 +60,8 @@ static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2, | |||
60 | /* | 60 | /* |
61 | * compare two delayed data backrefs with same bytenr and type | 61 | * compare two delayed data backrefs with same bytenr and type |
62 | */ | 62 | */ |
63 | static int comp_data_refs(struct btrfs_delayed_data_ref *ref2, | 63 | static int comp_data_refs(struct btrfs_delayed_data_ref *ref1, |
64 | struct btrfs_delayed_data_ref *ref1) | 64 | struct btrfs_delayed_data_ref *ref2) |
65 | { | 65 | { |
66 | if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) { | 66 | if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) { |
67 | if (ref1->root < ref2->root) | 67 | if (ref1->root < ref2->root) |
@@ -85,6 +85,34 @@ static int comp_data_refs(struct btrfs_delayed_data_ref *ref2, | |||
85 | return 0; | 85 | return 0; |
86 | } | 86 | } |
87 | 87 | ||
88 | static int comp_refs(struct btrfs_delayed_ref_node *ref1, | ||
89 | struct btrfs_delayed_ref_node *ref2, | ||
90 | bool check_seq) | ||
91 | { | ||
92 | int ret = 0; | ||
93 | |||
94 | if (ref1->type < ref2->type) | ||
95 | return -1; | ||
96 | if (ref1->type > ref2->type) | ||
97 | return 1; | ||
98 | if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY || | ||
99 | ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) | ||
100 | ret = comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref1), | ||
101 | btrfs_delayed_node_to_tree_ref(ref2)); | ||
102 | else | ||
103 | ret = comp_data_refs(btrfs_delayed_node_to_data_ref(ref1), | ||
104 | btrfs_delayed_node_to_data_ref(ref2)); | ||
105 | if (ret) | ||
106 | return ret; | ||
107 | if (check_seq) { | ||
108 | if (ref1->seq < ref2->seq) | ||
109 | return -1; | ||
110 | if (ref1->seq > ref2->seq) | ||
111 | return 1; | ||
112 | } | ||
113 | return 0; | ||
114 | } | ||
115 | |||
88 | /* insert a new ref to head ref rbtree */ | 116 | /* insert a new ref to head ref rbtree */ |
89 | static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root, | 117 | static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root, |
90 | struct rb_node *node) | 118 | struct rb_node *node) |
@@ -96,15 +124,43 @@ static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root, | |||
96 | u64 bytenr; | 124 | u64 bytenr; |
97 | 125 | ||
98 | ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node); | 126 | ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node); |
99 | bytenr = ins->node.bytenr; | 127 | bytenr = ins->bytenr; |
100 | while (*p) { | 128 | while (*p) { |
101 | parent_node = *p; | 129 | parent_node = *p; |
102 | entry = rb_entry(parent_node, struct btrfs_delayed_ref_head, | 130 | entry = rb_entry(parent_node, struct btrfs_delayed_ref_head, |
103 | href_node); | 131 | href_node); |
104 | 132 | ||
105 | if (bytenr < entry->node.bytenr) | 133 | if (bytenr < entry->bytenr) |
134 | p = &(*p)->rb_left; | ||
135 | else if (bytenr > entry->bytenr) | ||
136 | p = &(*p)->rb_right; | ||
137 | else | ||
138 | return entry; | ||
139 | } | ||
140 | |||
141 | rb_link_node(node, parent_node, p); | ||
142 | rb_insert_color(node, root); | ||
143 | return NULL; | ||
144 | } | ||
145 | |||
146 | static struct btrfs_delayed_ref_node* tree_insert(struct rb_root *root, | ||
147 | struct btrfs_delayed_ref_node *ins) | ||
148 | { | ||
149 | struct rb_node **p = &root->rb_node; | ||
150 | struct rb_node *node = &ins->ref_node; | ||
151 | struct rb_node *parent_node = NULL; | ||
152 | struct btrfs_delayed_ref_node *entry; | ||
153 | |||
154 | while (*p) { | ||
155 | int comp; | ||
156 | |||
157 | parent_node = *p; | ||
158 | entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, | ||
159 | ref_node); | ||
160 | comp = comp_refs(ins, entry, true); | ||
161 | if (comp < 0) | ||
106 | p = &(*p)->rb_left; | 162 | p = &(*p)->rb_left; |
107 | else if (bytenr > entry->node.bytenr) | 163 | else if (comp > 0) |
108 | p = &(*p)->rb_right; | 164 | p = &(*p)->rb_right; |
109 | else | 165 | else |
110 | return entry; | 166 | return entry; |
@@ -133,15 +189,15 @@ find_ref_head(struct rb_root *root, u64 bytenr, | |||
133 | while (n) { | 189 | while (n) { |
134 | entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node); | 190 | entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node); |
135 | 191 | ||
136 | if (bytenr < entry->node.bytenr) | 192 | if (bytenr < entry->bytenr) |
137 | n = n->rb_left; | 193 | n = n->rb_left; |
138 | else if (bytenr > entry->node.bytenr) | 194 | else if (bytenr > entry->bytenr) |
139 | n = n->rb_right; | 195 | n = n->rb_right; |
140 | else | 196 | else |
141 | return entry; | 197 | return entry; |
142 | } | 198 | } |
143 | if (entry && return_bigger) { | 199 | if (entry && return_bigger) { |
144 | if (bytenr > entry->node.bytenr) { | 200 | if (bytenr > entry->bytenr) { |
145 | n = rb_next(&entry->href_node); | 201 | n = rb_next(&entry->href_node); |
146 | if (!n) | 202 | if (!n) |
147 | n = rb_first(root); | 203 | n = rb_first(root); |
@@ -164,17 +220,17 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, | |||
164 | if (mutex_trylock(&head->mutex)) | 220 | if (mutex_trylock(&head->mutex)) |
165 | return 0; | 221 | return 0; |
166 | 222 | ||
167 | refcount_inc(&head->node.refs); | 223 | refcount_inc(&head->refs); |
168 | spin_unlock(&delayed_refs->lock); | 224 | spin_unlock(&delayed_refs->lock); |
169 | 225 | ||
170 | mutex_lock(&head->mutex); | 226 | mutex_lock(&head->mutex); |
171 | spin_lock(&delayed_refs->lock); | 227 | spin_lock(&delayed_refs->lock); |
172 | if (!head->node.in_tree) { | 228 | if (RB_EMPTY_NODE(&head->href_node)) { |
173 | mutex_unlock(&head->mutex); | 229 | mutex_unlock(&head->mutex); |
174 | btrfs_put_delayed_ref(&head->node); | 230 | btrfs_put_delayed_ref_head(head); |
175 | return -EAGAIN; | 231 | return -EAGAIN; |
176 | } | 232 | } |
177 | btrfs_put_delayed_ref(&head->node); | 233 | btrfs_put_delayed_ref_head(head); |
178 | return 0; | 234 | return 0; |
179 | } | 235 | } |
180 | 236 | ||
@@ -183,15 +239,11 @@ static inline void drop_delayed_ref(struct btrfs_trans_handle *trans, | |||
183 | struct btrfs_delayed_ref_head *head, | 239 | struct btrfs_delayed_ref_head *head, |
184 | struct btrfs_delayed_ref_node *ref) | 240 | struct btrfs_delayed_ref_node *ref) |
185 | { | 241 | { |
186 | if (btrfs_delayed_ref_is_head(ref)) { | 242 | assert_spin_locked(&head->lock); |
187 | head = btrfs_delayed_node_to_head(ref); | 243 | rb_erase(&ref->ref_node, &head->ref_tree); |
188 | rb_erase(&head->href_node, &delayed_refs->href_root); | 244 | RB_CLEAR_NODE(&ref->ref_node); |
189 | } else { | 245 | if (!list_empty(&ref->add_list)) |
190 | assert_spin_locked(&head->lock); | 246 | list_del(&ref->add_list); |
191 | list_del(&ref->list); | ||
192 | if (!list_empty(&ref->add_list)) | ||
193 | list_del(&ref->add_list); | ||
194 | } | ||
195 | ref->in_tree = 0; | 247 | ref->in_tree = 0; |
196 | btrfs_put_delayed_ref(ref); | 248 | btrfs_put_delayed_ref(ref); |
197 | atomic_dec(&delayed_refs->num_entries); | 249 | atomic_dec(&delayed_refs->num_entries); |
@@ -206,36 +258,18 @@ static bool merge_ref(struct btrfs_trans_handle *trans, | |||
206 | u64 seq) | 258 | u64 seq) |
207 | { | 259 | { |
208 | struct btrfs_delayed_ref_node *next; | 260 | struct btrfs_delayed_ref_node *next; |
261 | struct rb_node *node = rb_next(&ref->ref_node); | ||
209 | bool done = false; | 262 | bool done = false; |
210 | 263 | ||
211 | next = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node, | 264 | while (!done && node) { |
212 | list); | ||
213 | while (!done && &next->list != &head->ref_list) { | ||
214 | int mod; | 265 | int mod; |
215 | struct btrfs_delayed_ref_node *next2; | ||
216 | |||
217 | next2 = list_next_entry(next, list); | ||
218 | |||
219 | if (next == ref) | ||
220 | goto next; | ||
221 | 266 | ||
267 | next = rb_entry(node, struct btrfs_delayed_ref_node, ref_node); | ||
268 | node = rb_next(node); | ||
222 | if (seq && next->seq >= seq) | 269 | if (seq && next->seq >= seq) |
223 | goto next; | 270 | break; |
224 | 271 | if (comp_refs(ref, next, false)) | |
225 | if (next->type != ref->type) | 272 | break; |
226 | goto next; | ||
227 | |||
228 | if ((ref->type == BTRFS_TREE_BLOCK_REF_KEY || | ||
229 | ref->type == BTRFS_SHARED_BLOCK_REF_KEY) && | ||
230 | comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref), | ||
231 | btrfs_delayed_node_to_tree_ref(next), | ||
232 | ref->type)) | ||
233 | goto next; | ||
234 | if ((ref->type == BTRFS_EXTENT_DATA_REF_KEY || | ||
235 | ref->type == BTRFS_SHARED_DATA_REF_KEY) && | ||
236 | comp_data_refs(btrfs_delayed_node_to_data_ref(ref), | ||
237 | btrfs_delayed_node_to_data_ref(next))) | ||
238 | goto next; | ||
239 | 273 | ||
240 | if (ref->action == next->action) { | 274 | if (ref->action == next->action) { |
241 | mod = next->ref_mod; | 275 | mod = next->ref_mod; |
@@ -259,8 +293,6 @@ static bool merge_ref(struct btrfs_trans_handle *trans, | |||
259 | WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY || | 293 | WARN_ON(ref->type == BTRFS_TREE_BLOCK_REF_KEY || |
260 | ref->type == BTRFS_SHARED_BLOCK_REF_KEY); | 294 | ref->type == BTRFS_SHARED_BLOCK_REF_KEY); |
261 | } | 295 | } |
262 | next: | ||
263 | next = next2; | ||
264 | } | 296 | } |
265 | 297 | ||
266 | return done; | 298 | return done; |
@@ -272,11 +304,12 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, | |||
272 | struct btrfs_delayed_ref_head *head) | 304 | struct btrfs_delayed_ref_head *head) |
273 | { | 305 | { |
274 | struct btrfs_delayed_ref_node *ref; | 306 | struct btrfs_delayed_ref_node *ref; |
307 | struct rb_node *node; | ||
275 | u64 seq = 0; | 308 | u64 seq = 0; |
276 | 309 | ||
277 | assert_spin_locked(&head->lock); | 310 | assert_spin_locked(&head->lock); |
278 | 311 | ||
279 | if (list_empty(&head->ref_list)) | 312 | if (RB_EMPTY_ROOT(&head->ref_tree)) |
280 | return; | 313 | return; |
281 | 314 | ||
282 | /* We don't have too many refs to merge for data. */ | 315 | /* We don't have too many refs to merge for data. */ |
@@ -293,22 +326,13 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, | |||
293 | } | 326 | } |
294 | spin_unlock(&fs_info->tree_mod_seq_lock); | 327 | spin_unlock(&fs_info->tree_mod_seq_lock); |
295 | 328 | ||
296 | ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node, | 329 | again: |
297 | list); | 330 | for (node = rb_first(&head->ref_tree); node; node = rb_next(node)) { |
298 | while (&ref->list != &head->ref_list) { | 331 | ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node); |
299 | if (seq && ref->seq >= seq) | 332 | if (seq && ref->seq >= seq) |
300 | goto next; | ||
301 | |||
302 | if (merge_ref(trans, delayed_refs, head, ref, seq)) { | ||
303 | if (list_empty(&head->ref_list)) | ||
304 | break; | ||
305 | ref = list_first_entry(&head->ref_list, | ||
306 | struct btrfs_delayed_ref_node, | ||
307 | list); | ||
308 | continue; | 333 | continue; |
309 | } | 334 | if (merge_ref(trans, delayed_refs, head, ref, seq)) |
310 | next: | 335 | goto again; |
311 | ref = list_next_entry(ref, list); | ||
312 | } | 336 | } |
313 | } | 337 | } |
314 | 338 | ||
@@ -380,8 +404,8 @@ again: | |||
380 | head->processing = 1; | 404 | head->processing = 1; |
381 | WARN_ON(delayed_refs->num_heads_ready == 0); | 405 | WARN_ON(delayed_refs->num_heads_ready == 0); |
382 | delayed_refs->num_heads_ready--; | 406 | delayed_refs->num_heads_ready--; |
383 | delayed_refs->run_delayed_start = head->node.bytenr + | 407 | delayed_refs->run_delayed_start = head->bytenr + |
384 | head->node.num_bytes; | 408 | head->num_bytes; |
385 | return head; | 409 | return head; |
386 | } | 410 | } |
387 | 411 | ||
@@ -391,37 +415,19 @@ again: | |||
391 | * Return 0 for insert. | 415 | * Return 0 for insert. |
392 | * Return >0 for merge. | 416 | * Return >0 for merge. |
393 | */ | 417 | */ |
394 | static int | 418 | static int insert_delayed_ref(struct btrfs_trans_handle *trans, |
395 | add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans, | 419 | struct btrfs_delayed_ref_root *root, |
396 | struct btrfs_delayed_ref_root *root, | 420 | struct btrfs_delayed_ref_head *href, |
397 | struct btrfs_delayed_ref_head *href, | 421 | struct btrfs_delayed_ref_node *ref) |
398 | struct btrfs_delayed_ref_node *ref) | ||
399 | { | 422 | { |
400 | struct btrfs_delayed_ref_node *exist; | 423 | struct btrfs_delayed_ref_node *exist; |
401 | int mod; | 424 | int mod; |
402 | int ret = 0; | 425 | int ret = 0; |
403 | 426 | ||
404 | spin_lock(&href->lock); | 427 | spin_lock(&href->lock); |
405 | /* Check whether we can merge the tail node with ref */ | 428 | exist = tree_insert(&href->ref_tree, ref); |
406 | if (list_empty(&href->ref_list)) | 429 | if (!exist) |
407 | goto add_tail; | 430 | goto inserted; |
408 | exist = list_entry(href->ref_list.prev, struct btrfs_delayed_ref_node, | ||
409 | list); | ||
410 | /* No need to compare bytenr nor is_head */ | ||
411 | if (exist->type != ref->type || exist->seq != ref->seq) | ||
412 | goto add_tail; | ||
413 | |||
414 | if ((exist->type == BTRFS_TREE_BLOCK_REF_KEY || | ||
415 | exist->type == BTRFS_SHARED_BLOCK_REF_KEY) && | ||
416 | comp_tree_refs(btrfs_delayed_node_to_tree_ref(exist), | ||
417 | btrfs_delayed_node_to_tree_ref(ref), | ||
418 | ref->type)) | ||
419 | goto add_tail; | ||
420 | if ((exist->type == BTRFS_EXTENT_DATA_REF_KEY || | ||
421 | exist->type == BTRFS_SHARED_DATA_REF_KEY) && | ||
422 | comp_data_refs(btrfs_delayed_node_to_data_ref(exist), | ||
423 | btrfs_delayed_node_to_data_ref(ref))) | ||
424 | goto add_tail; | ||
425 | 431 | ||
426 | /* Now we are sure we can merge */ | 432 | /* Now we are sure we can merge */ |
427 | ret = 1; | 433 | ret = 1; |
@@ -452,9 +458,7 @@ add_delayed_ref_tail_merge(struct btrfs_trans_handle *trans, | |||
452 | drop_delayed_ref(trans, root, href, exist); | 458 | drop_delayed_ref(trans, root, href, exist); |
453 | spin_unlock(&href->lock); | 459 | spin_unlock(&href->lock); |
454 | return ret; | 460 | return ret; |
455 | 461 | inserted: | |
456 | add_tail: | ||
457 | list_add_tail(&ref->list, &href->ref_list); | ||
458 | if (ref->action == BTRFS_ADD_DELAYED_REF) | 462 | if (ref->action == BTRFS_ADD_DELAYED_REF) |
459 | list_add_tail(&ref->add_list, &href->ref_add_list); | 463 | list_add_tail(&ref->add_list, &href->ref_add_list); |
460 | atomic_inc(&root->num_entries); | 464 | atomic_inc(&root->num_entries); |
@@ -469,20 +473,16 @@ add_tail: | |||
469 | */ | 473 | */ |
470 | static noinline void | 474 | static noinline void |
471 | update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs, | 475 | update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs, |
472 | struct btrfs_delayed_ref_node *existing, | 476 | struct btrfs_delayed_ref_head *existing, |
473 | struct btrfs_delayed_ref_node *update, | 477 | struct btrfs_delayed_ref_head *update, |
474 | int *old_ref_mod_ret) | 478 | int *old_ref_mod_ret) |
475 | { | 479 | { |
476 | struct btrfs_delayed_ref_head *existing_ref; | ||
477 | struct btrfs_delayed_ref_head *ref; | ||
478 | int old_ref_mod; | 480 | int old_ref_mod; |
479 | 481 | ||
480 | existing_ref = btrfs_delayed_node_to_head(existing); | 482 | BUG_ON(existing->is_data != update->is_data); |
481 | ref = btrfs_delayed_node_to_head(update); | ||
482 | BUG_ON(existing_ref->is_data != ref->is_data); | ||
483 | 483 | ||
484 | spin_lock(&existing_ref->lock); | 484 | spin_lock(&existing->lock); |
485 | if (ref->must_insert_reserved) { | 485 | if (update->must_insert_reserved) { |
486 | /* if the extent was freed and then | 486 | /* if the extent was freed and then |
487 | * reallocated before the delayed ref | 487 | * reallocated before the delayed ref |
488 | * entries were processed, we can end up | 488 | * entries were processed, we can end up |
@@ -490,7 +490,7 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs, | |||
490 | * the must_insert_reserved flag set. | 490 | * the must_insert_reserved flag set. |
491 | * Set it again here | 491 | * Set it again here |
492 | */ | 492 | */ |
493 | existing_ref->must_insert_reserved = ref->must_insert_reserved; | 493 | existing->must_insert_reserved = update->must_insert_reserved; |
494 | 494 | ||
495 | /* | 495 | /* |
496 | * update the num_bytes so we make sure the accounting | 496 | * update the num_bytes so we make sure the accounting |
@@ -500,22 +500,22 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs, | |||
500 | 500 | ||
501 | } | 501 | } |
502 | 502 | ||
503 | if (ref->extent_op) { | 503 | if (update->extent_op) { |
504 | if (!existing_ref->extent_op) { | 504 | if (!existing->extent_op) { |
505 | existing_ref->extent_op = ref->extent_op; | 505 | existing->extent_op = update->extent_op; |
506 | } else { | 506 | } else { |
507 | if (ref->extent_op->update_key) { | 507 | if (update->extent_op->update_key) { |
508 | memcpy(&existing_ref->extent_op->key, | 508 | memcpy(&existing->extent_op->key, |
509 | &ref->extent_op->key, | 509 | &update->extent_op->key, |
510 | sizeof(ref->extent_op->key)); | 510 | sizeof(update->extent_op->key)); |
511 | existing_ref->extent_op->update_key = true; | 511 | existing->extent_op->update_key = true; |
512 | } | 512 | } |
513 | if (ref->extent_op->update_flags) { | 513 | if (update->extent_op->update_flags) { |
514 | existing_ref->extent_op->flags_to_set |= | 514 | existing->extent_op->flags_to_set |= |
515 | ref->extent_op->flags_to_set; | 515 | update->extent_op->flags_to_set; |
516 | existing_ref->extent_op->update_flags = true; | 516 | existing->extent_op->update_flags = true; |
517 | } | 517 | } |
518 | btrfs_free_delayed_extent_op(ref->extent_op); | 518 | btrfs_free_delayed_extent_op(update->extent_op); |
519 | } | 519 | } |
520 | } | 520 | } |
521 | /* | 521 | /* |
@@ -523,23 +523,23 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs, | |||
523 | * only need the lock for this case cause we could be processing it | 523 | * only need the lock for this case cause we could be processing it |
524 | * currently, for refs we just added we know we're a-ok. | 524 | * currently, for refs we just added we know we're a-ok. |
525 | */ | 525 | */ |
526 | old_ref_mod = existing_ref->total_ref_mod; | 526 | old_ref_mod = existing->total_ref_mod; |
527 | if (old_ref_mod_ret) | 527 | if (old_ref_mod_ret) |
528 | *old_ref_mod_ret = old_ref_mod; | 528 | *old_ref_mod_ret = old_ref_mod; |
529 | existing->ref_mod += update->ref_mod; | 529 | existing->ref_mod += update->ref_mod; |
530 | existing_ref->total_ref_mod += update->ref_mod; | 530 | existing->total_ref_mod += update->ref_mod; |
531 | 531 | ||
532 | /* | 532 | /* |
533 | * If we are going to from a positive ref mod to a negative or vice | 533 | * If we are going to from a positive ref mod to a negative or vice |
534 | * versa we need to make sure to adjust pending_csums accordingly. | 534 | * versa we need to make sure to adjust pending_csums accordingly. |
535 | */ | 535 | */ |
536 | if (existing_ref->is_data) { | 536 | if (existing->is_data) { |
537 | if (existing_ref->total_ref_mod >= 0 && old_ref_mod < 0) | 537 | if (existing->total_ref_mod >= 0 && old_ref_mod < 0) |
538 | delayed_refs->pending_csums -= existing->num_bytes; | 538 | delayed_refs->pending_csums -= existing->num_bytes; |
539 | if (existing_ref->total_ref_mod < 0 && old_ref_mod >= 0) | 539 | if (existing->total_ref_mod < 0 && old_ref_mod >= 0) |
540 | delayed_refs->pending_csums += existing->num_bytes; | 540 | delayed_refs->pending_csums += existing->num_bytes; |
541 | } | 541 | } |
542 | spin_unlock(&existing_ref->lock); | 542 | spin_unlock(&existing->lock); |
543 | } | 543 | } |
544 | 544 | ||
545 | /* | 545 | /* |
@@ -550,14 +550,13 @@ update_existing_head_ref(struct btrfs_delayed_ref_root *delayed_refs, | |||
550 | static noinline struct btrfs_delayed_ref_head * | 550 | static noinline struct btrfs_delayed_ref_head * |
551 | add_delayed_ref_head(struct btrfs_fs_info *fs_info, | 551 | add_delayed_ref_head(struct btrfs_fs_info *fs_info, |
552 | struct btrfs_trans_handle *trans, | 552 | struct btrfs_trans_handle *trans, |
553 | struct btrfs_delayed_ref_node *ref, | 553 | struct btrfs_delayed_ref_head *head_ref, |
554 | struct btrfs_qgroup_extent_record *qrecord, | 554 | struct btrfs_qgroup_extent_record *qrecord, |
555 | u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved, | 555 | u64 bytenr, u64 num_bytes, u64 ref_root, u64 reserved, |
556 | int action, int is_data, int *qrecord_inserted_ret, | 556 | int action, int is_data, int *qrecord_inserted_ret, |
557 | int *old_ref_mod, int *new_ref_mod) | 557 | int *old_ref_mod, int *new_ref_mod) |
558 | { | 558 | { |
559 | struct btrfs_delayed_ref_head *existing; | 559 | struct btrfs_delayed_ref_head *existing; |
560 | struct btrfs_delayed_ref_head *head_ref = NULL; | ||
561 | struct btrfs_delayed_ref_root *delayed_refs; | 560 | struct btrfs_delayed_ref_root *delayed_refs; |
562 | int count_mod = 1; | 561 | int count_mod = 1; |
563 | int must_insert_reserved = 0; | 562 | int must_insert_reserved = 0; |
@@ -593,26 +592,21 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, | |||
593 | 592 | ||
594 | delayed_refs = &trans->transaction->delayed_refs; | 593 | delayed_refs = &trans->transaction->delayed_refs; |
595 | 594 | ||
596 | /* first set the basic ref node struct up */ | 595 | refcount_set(&head_ref->refs, 1); |
597 | refcount_set(&ref->refs, 1); | 596 | head_ref->bytenr = bytenr; |
598 | ref->bytenr = bytenr; | 597 | head_ref->num_bytes = num_bytes; |
599 | ref->num_bytes = num_bytes; | 598 | head_ref->ref_mod = count_mod; |
600 | ref->ref_mod = count_mod; | ||
601 | ref->type = 0; | ||
602 | ref->action = 0; | ||
603 | ref->is_head = 1; | ||
604 | ref->in_tree = 1; | ||
605 | ref->seq = 0; | ||
606 | |||
607 | head_ref = btrfs_delayed_node_to_head(ref); | ||
608 | head_ref->must_insert_reserved = must_insert_reserved; | 599 | head_ref->must_insert_reserved = must_insert_reserved; |
609 | head_ref->is_data = is_data; | 600 | head_ref->is_data = is_data; |
610 | INIT_LIST_HEAD(&head_ref->ref_list); | 601 | head_ref->ref_tree = RB_ROOT; |
611 | INIT_LIST_HEAD(&head_ref->ref_add_list); | 602 | INIT_LIST_HEAD(&head_ref->ref_add_list); |
603 | RB_CLEAR_NODE(&head_ref->href_node); | ||
612 | head_ref->processing = 0; | 604 | head_ref->processing = 0; |
613 | head_ref->total_ref_mod = count_mod; | 605 | head_ref->total_ref_mod = count_mod; |
614 | head_ref->qgroup_reserved = 0; | 606 | head_ref->qgroup_reserved = 0; |
615 | head_ref->qgroup_ref_root = 0; | 607 | head_ref->qgroup_ref_root = 0; |
608 | spin_lock_init(&head_ref->lock); | ||
609 | mutex_init(&head_ref->mutex); | ||
616 | 610 | ||
617 | /* Record qgroup extent info if provided */ | 611 | /* Record qgroup extent info if provided */ |
618 | if (qrecord) { | 612 | if (qrecord) { |
@@ -632,17 +626,14 @@ add_delayed_ref_head(struct btrfs_fs_info *fs_info, | |||
632 | qrecord_inserted = 1; | 626 | qrecord_inserted = 1; |
633 | } | 627 | } |
634 | 628 | ||
635 | spin_lock_init(&head_ref->lock); | 629 | trace_add_delayed_ref_head(fs_info, head_ref, action); |
636 | mutex_init(&head_ref->mutex); | ||
637 | |||
638 | trace_add_delayed_ref_head(fs_info, ref, head_ref, action); | ||
639 | 630 | ||
640 | existing = htree_insert(&delayed_refs->href_root, | 631 | existing = htree_insert(&delayed_refs->href_root, |
641 | &head_ref->href_node); | 632 | &head_ref->href_node); |
642 | if (existing) { | 633 | if (existing) { |
643 | WARN_ON(ref_root && reserved && existing->qgroup_ref_root | 634 | WARN_ON(ref_root && reserved && existing->qgroup_ref_root |
644 | && existing->qgroup_reserved); | 635 | && existing->qgroup_reserved); |
645 | update_existing_head_ref(delayed_refs, &existing->node, ref, | 636 | update_existing_head_ref(delayed_refs, existing, head_ref, |
646 | old_ref_mod); | 637 | old_ref_mod); |
647 | /* | 638 | /* |
648 | * we've updated the existing ref, free the newly | 639 | * we've updated the existing ref, free the newly |
@@ -699,7 +690,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
699 | ref->is_head = 0; | 690 | ref->is_head = 0; |
700 | ref->in_tree = 1; | 691 | ref->in_tree = 1; |
701 | ref->seq = seq; | 692 | ref->seq = seq; |
702 | INIT_LIST_HEAD(&ref->list); | 693 | RB_CLEAR_NODE(&ref->ref_node); |
703 | INIT_LIST_HEAD(&ref->add_list); | 694 | INIT_LIST_HEAD(&ref->add_list); |
704 | 695 | ||
705 | full_ref = btrfs_delayed_node_to_tree_ref(ref); | 696 | full_ref = btrfs_delayed_node_to_tree_ref(ref); |
@@ -713,7 +704,7 @@ add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
713 | 704 | ||
714 | trace_add_delayed_tree_ref(fs_info, ref, full_ref, action); | 705 | trace_add_delayed_tree_ref(fs_info, ref, full_ref, action); |
715 | 706 | ||
716 | ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref); | 707 | ret = insert_delayed_ref(trans, delayed_refs, head_ref, ref); |
717 | 708 | ||
718 | /* | 709 | /* |
719 | * XXX: memory should be freed at the same level allocated. | 710 | * XXX: memory should be freed at the same level allocated. |
@@ -756,7 +747,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
756 | ref->is_head = 0; | 747 | ref->is_head = 0; |
757 | ref->in_tree = 1; | 748 | ref->in_tree = 1; |
758 | ref->seq = seq; | 749 | ref->seq = seq; |
759 | INIT_LIST_HEAD(&ref->list); | 750 | RB_CLEAR_NODE(&ref->ref_node); |
760 | INIT_LIST_HEAD(&ref->add_list); | 751 | INIT_LIST_HEAD(&ref->add_list); |
761 | 752 | ||
762 | full_ref = btrfs_delayed_node_to_data_ref(ref); | 753 | full_ref = btrfs_delayed_node_to_data_ref(ref); |
@@ -772,8 +763,7 @@ add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
772 | 763 | ||
773 | trace_add_delayed_data_ref(fs_info, ref, full_ref, action); | 764 | trace_add_delayed_data_ref(fs_info, ref, full_ref, action); |
774 | 765 | ||
775 | ret = add_delayed_ref_tail_merge(trans, delayed_refs, head_ref, ref); | 766 | ret = insert_delayed_ref(trans, delayed_refs, head_ref, ref); |
776 | |||
777 | if (ret > 0) | 767 | if (ret > 0) |
778 | kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref); | 768 | kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref); |
779 | } | 769 | } |
@@ -821,7 +811,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
821 | * insert both the head node and the new ref without dropping | 811 | * insert both the head node and the new ref without dropping |
822 | * the spin lock | 812 | * the spin lock |
823 | */ | 813 | */ |
824 | head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record, | 814 | head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record, |
825 | bytenr, num_bytes, 0, 0, action, 0, | 815 | bytenr, num_bytes, 0, 0, action, 0, |
826 | &qrecord_inserted, old_ref_mod, | 816 | &qrecord_inserted, old_ref_mod, |
827 | new_ref_mod); | 817 | new_ref_mod); |
@@ -888,7 +878,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
888 | * insert both the head node and the new ref without dropping | 878 | * insert both the head node and the new ref without dropping |
889 | * the spin lock | 879 | * the spin lock |
890 | */ | 880 | */ |
891 | head_ref = add_delayed_ref_head(fs_info, trans, &head_ref->node, record, | 881 | head_ref = add_delayed_ref_head(fs_info, trans, head_ref, record, |
892 | bytenr, num_bytes, ref_root, reserved, | 882 | bytenr, num_bytes, ref_root, reserved, |
893 | action, 1, &qrecord_inserted, | 883 | action, 1, &qrecord_inserted, |
894 | old_ref_mod, new_ref_mod); | 884 | old_ref_mod, new_ref_mod); |
@@ -920,7 +910,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, | |||
920 | delayed_refs = &trans->transaction->delayed_refs; | 910 | delayed_refs = &trans->transaction->delayed_refs; |
921 | spin_lock(&delayed_refs->lock); | 911 | spin_lock(&delayed_refs->lock); |
922 | 912 | ||
923 | add_delayed_ref_head(fs_info, trans, &head_ref->node, NULL, bytenr, | 913 | add_delayed_ref_head(fs_info, trans, head_ref, NULL, bytenr, |
924 | num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD, | 914 | num_bytes, 0, 0, BTRFS_UPDATE_DELAYED_HEAD, |
925 | extent_op->is_data, NULL, NULL, NULL); | 915 | extent_op->is_data, NULL, NULL, NULL); |
926 | 916 | ||
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index ce88e4ac5276..a43af432f859 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h | |||
@@ -26,18 +26,8 @@ | |||
26 | #define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */ | 26 | #define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */ |
27 | #define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */ | 27 | #define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */ |
28 | 28 | ||
29 | /* | ||
30 | * XXX: Qu: I really hate the design that ref_head and tree/data ref shares the | ||
31 | * same ref_node structure. | ||
32 | * Ref_head is in a higher logic level than tree/data ref, and duplicated | ||
33 | * bytenr/num_bytes in ref_node is really a waste or memory, they should be | ||
34 | * referred from ref_head. | ||
35 | * This gets more disgusting after we use list to store tree/data ref in | ||
36 | * ref_head. Must clean this mess up later. | ||
37 | */ | ||
38 | struct btrfs_delayed_ref_node { | 29 | struct btrfs_delayed_ref_node { |
39 | /*data/tree ref use list, stored in ref_head->ref_list. */ | 30 | struct rb_node ref_node; |
40 | struct list_head list; | ||
41 | /* | 31 | /* |
42 | * If action is BTRFS_ADD_DELAYED_REF, also link this node to | 32 | * If action is BTRFS_ADD_DELAYED_REF, also link this node to |
43 | * ref_head->ref_add_list, then we do not need to iterate the | 33 | * ref_head->ref_add_list, then we do not need to iterate the |
@@ -91,8 +81,9 @@ struct btrfs_delayed_extent_op { | |||
91 | * reference count modifications we've queued up. | 81 | * reference count modifications we've queued up. |
92 | */ | 82 | */ |
93 | struct btrfs_delayed_ref_head { | 83 | struct btrfs_delayed_ref_head { |
94 | struct btrfs_delayed_ref_node node; | 84 | u64 bytenr; |
95 | 85 | u64 num_bytes; | |
86 | refcount_t refs; | ||
96 | /* | 87 | /* |
97 | * the mutex is held while running the refs, and it is also | 88 | * the mutex is held while running the refs, and it is also |
98 | * held when checking the sum of reference modifications. | 89 | * held when checking the sum of reference modifications. |
@@ -100,7 +91,7 @@ struct btrfs_delayed_ref_head { | |||
100 | struct mutex mutex; | 91 | struct mutex mutex; |
101 | 92 | ||
102 | spinlock_t lock; | 93 | spinlock_t lock; |
103 | struct list_head ref_list; | 94 | struct rb_root ref_tree; |
104 | /* accumulate add BTRFS_ADD_DELAYED_REF nodes to this ref_add_list. */ | 95 | /* accumulate add BTRFS_ADD_DELAYED_REF nodes to this ref_add_list. */ |
105 | struct list_head ref_add_list; | 96 | struct list_head ref_add_list; |
106 | 97 | ||
@@ -116,6 +107,14 @@ struct btrfs_delayed_ref_head { | |||
116 | int total_ref_mod; | 107 | int total_ref_mod; |
117 | 108 | ||
118 | /* | 109 | /* |
110 | * This is the current outstanding mod references for this bytenr. This | ||
111 | * is used with lookup_extent_info to get an accurate reference count | ||
112 | * for a bytenr, so it is adjusted as delayed refs are run so that any | ||
113 | * on disk reference count + ref_mod is accurate. | ||
114 | */ | ||
115 | int ref_mod; | ||
116 | |||
117 | /* | ||
119 | * For qgroup reserved space freeing. | 118 | * For qgroup reserved space freeing. |
120 | * | 119 | * |
121 | * ref_root and reserved will be recorded after | 120 | * ref_root and reserved will be recorded after |
@@ -234,15 +233,18 @@ static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) | |||
234 | case BTRFS_SHARED_DATA_REF_KEY: | 233 | case BTRFS_SHARED_DATA_REF_KEY: |
235 | kmem_cache_free(btrfs_delayed_data_ref_cachep, ref); | 234 | kmem_cache_free(btrfs_delayed_data_ref_cachep, ref); |
236 | break; | 235 | break; |
237 | case 0: | ||
238 | kmem_cache_free(btrfs_delayed_ref_head_cachep, ref); | ||
239 | break; | ||
240 | default: | 236 | default: |
241 | BUG(); | 237 | BUG(); |
242 | } | 238 | } |
243 | } | 239 | } |
244 | } | 240 | } |
245 | 241 | ||
242 | static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *head) | ||
243 | { | ||
244 | if (refcount_dec_and_test(&head->refs)) | ||
245 | kmem_cache_free(btrfs_delayed_ref_head_cachep, head); | ||
246 | } | ||
247 | |||
246 | int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | 248 | int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, |
247 | struct btrfs_trans_handle *trans, | 249 | struct btrfs_trans_handle *trans, |
248 | u64 bytenr, u64 num_bytes, u64 parent, | 250 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -283,35 +285,17 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, | |||
283 | u64 seq); | 285 | u64 seq); |
284 | 286 | ||
285 | /* | 287 | /* |
286 | * a node might live in a head or a regular ref, this lets you | ||
287 | * test for the proper type to use. | ||
288 | */ | ||
289 | static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node) | ||
290 | { | ||
291 | return node->is_head; | ||
292 | } | ||
293 | |||
294 | /* | ||
295 | * helper functions to cast a node into its container | 288 | * helper functions to cast a node into its container |
296 | */ | 289 | */ |
297 | static inline struct btrfs_delayed_tree_ref * | 290 | static inline struct btrfs_delayed_tree_ref * |
298 | btrfs_delayed_node_to_tree_ref(struct btrfs_delayed_ref_node *node) | 291 | btrfs_delayed_node_to_tree_ref(struct btrfs_delayed_ref_node *node) |
299 | { | 292 | { |
300 | WARN_ON(btrfs_delayed_ref_is_head(node)); | ||
301 | return container_of(node, struct btrfs_delayed_tree_ref, node); | 293 | return container_of(node, struct btrfs_delayed_tree_ref, node); |
302 | } | 294 | } |
303 | 295 | ||
304 | static inline struct btrfs_delayed_data_ref * | 296 | static inline struct btrfs_delayed_data_ref * |
305 | btrfs_delayed_node_to_data_ref(struct btrfs_delayed_ref_node *node) | 297 | btrfs_delayed_node_to_data_ref(struct btrfs_delayed_ref_node *node) |
306 | { | 298 | { |
307 | WARN_ON(btrfs_delayed_ref_is_head(node)); | ||
308 | return container_of(node, struct btrfs_delayed_data_ref, node); | 299 | return container_of(node, struct btrfs_delayed_data_ref, node); |
309 | } | 300 | } |
310 | |||
311 | static inline struct btrfs_delayed_ref_head * | ||
312 | btrfs_delayed_node_to_head(struct btrfs_delayed_ref_node *node) | ||
313 | { | ||
314 | WARN_ON(!btrfs_delayed_ref_is_head(node)); | ||
315 | return container_of(node, struct btrfs_delayed_ref_head, node); | ||
316 | } | ||
317 | #endif | 301 | #endif |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index dfdab849037b..efce9a2fa9be 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -50,6 +50,8 @@ | |||
50 | #include "sysfs.h" | 50 | #include "sysfs.h" |
51 | #include "qgroup.h" | 51 | #include "qgroup.h" |
52 | #include "compression.h" | 52 | #include "compression.h" |
53 | #include "tree-checker.h" | ||
54 | #include "ref-verify.h" | ||
53 | 55 | ||
54 | #ifdef CONFIG_X86 | 56 | #ifdef CONFIG_X86 |
55 | #include <asm/cpufeature.h> | 57 | #include <asm/cpufeature.h> |
@@ -543,146 +545,6 @@ static int check_tree_block_fsid(struct btrfs_fs_info *fs_info, | |||
543 | return ret; | 545 | return ret; |
544 | } | 546 | } |
545 | 547 | ||
546 | #define CORRUPT(reason, eb, root, slot) \ | ||
547 | btrfs_crit(root->fs_info, \ | ||
548 | "corrupt %s, %s: block=%llu, root=%llu, slot=%d", \ | ||
549 | btrfs_header_level(eb) == 0 ? "leaf" : "node", \ | ||
550 | reason, btrfs_header_bytenr(eb), root->objectid, slot) | ||
551 | |||
552 | static noinline int check_leaf(struct btrfs_root *root, | ||
553 | struct extent_buffer *leaf) | ||
554 | { | ||
555 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
556 | struct btrfs_key key; | ||
557 | struct btrfs_key leaf_key; | ||
558 | u32 nritems = btrfs_header_nritems(leaf); | ||
559 | int slot; | ||
560 | |||
561 | /* | ||
562 | * Extent buffers from a relocation tree have a owner field that | ||
563 | * corresponds to the subvolume tree they are based on. So just from an | ||
564 | * extent buffer alone we can not find out what is the id of the | ||
565 | * corresponding subvolume tree, so we can not figure out if the extent | ||
566 | * buffer corresponds to the root of the relocation tree or not. So skip | ||
567 | * this check for relocation trees. | ||
568 | */ | ||
569 | if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) { | ||
570 | struct btrfs_root *check_root; | ||
571 | |||
572 | key.objectid = btrfs_header_owner(leaf); | ||
573 | key.type = BTRFS_ROOT_ITEM_KEY; | ||
574 | key.offset = (u64)-1; | ||
575 | |||
576 | check_root = btrfs_get_fs_root(fs_info, &key, false); | ||
577 | /* | ||
578 | * The only reason we also check NULL here is that during | ||
579 | * open_ctree() some roots has not yet been set up. | ||
580 | */ | ||
581 | if (!IS_ERR_OR_NULL(check_root)) { | ||
582 | struct extent_buffer *eb; | ||
583 | |||
584 | eb = btrfs_root_node(check_root); | ||
585 | /* if leaf is the root, then it's fine */ | ||
586 | if (leaf != eb) { | ||
587 | CORRUPT("non-root leaf's nritems is 0", | ||
588 | leaf, check_root, 0); | ||
589 | free_extent_buffer(eb); | ||
590 | return -EIO; | ||
591 | } | ||
592 | free_extent_buffer(eb); | ||
593 | } | ||
594 | return 0; | ||
595 | } | ||
596 | |||
597 | if (nritems == 0) | ||
598 | return 0; | ||
599 | |||
600 | /* Check the 0 item */ | ||
601 | if (btrfs_item_offset_nr(leaf, 0) + btrfs_item_size_nr(leaf, 0) != | ||
602 | BTRFS_LEAF_DATA_SIZE(fs_info)) { | ||
603 | CORRUPT("invalid item offset size pair", leaf, root, 0); | ||
604 | return -EIO; | ||
605 | } | ||
606 | |||
607 | /* | ||
608 | * Check to make sure each items keys are in the correct order and their | ||
609 | * offsets make sense. We only have to loop through nritems-1 because | ||
610 | * we check the current slot against the next slot, which verifies the | ||
611 | * next slot's offset+size makes sense and that the current's slot | ||
612 | * offset is correct. | ||
613 | */ | ||
614 | for (slot = 0; slot < nritems - 1; slot++) { | ||
615 | btrfs_item_key_to_cpu(leaf, &leaf_key, slot); | ||
616 | btrfs_item_key_to_cpu(leaf, &key, slot + 1); | ||
617 | |||
618 | /* Make sure the keys are in the right order */ | ||
619 | if (btrfs_comp_cpu_keys(&leaf_key, &key) >= 0) { | ||
620 | CORRUPT("bad key order", leaf, root, slot); | ||
621 | return -EIO; | ||
622 | } | ||
623 | |||
624 | /* | ||
625 | * Make sure the offset and ends are right, remember that the | ||
626 | * item data starts at the end of the leaf and grows towards the | ||
627 | * front. | ||
628 | */ | ||
629 | if (btrfs_item_offset_nr(leaf, slot) != | ||
630 | btrfs_item_end_nr(leaf, slot + 1)) { | ||
631 | CORRUPT("slot offset bad", leaf, root, slot); | ||
632 | return -EIO; | ||
633 | } | ||
634 | |||
635 | /* | ||
636 | * Check to make sure that we don't point outside of the leaf, | ||
637 | * just in case all the items are consistent to each other, but | ||
638 | * all point outside of the leaf. | ||
639 | */ | ||
640 | if (btrfs_item_end_nr(leaf, slot) > | ||
641 | BTRFS_LEAF_DATA_SIZE(fs_info)) { | ||
642 | CORRUPT("slot end outside of leaf", leaf, root, slot); | ||
643 | return -EIO; | ||
644 | } | ||
645 | } | ||
646 | |||
647 | return 0; | ||
648 | } | ||
649 | |||
650 | static int check_node(struct btrfs_root *root, struct extent_buffer *node) | ||
651 | { | ||
652 | unsigned long nr = btrfs_header_nritems(node); | ||
653 | struct btrfs_key key, next_key; | ||
654 | int slot; | ||
655 | u64 bytenr; | ||
656 | int ret = 0; | ||
657 | |||
658 | if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) { | ||
659 | btrfs_crit(root->fs_info, | ||
660 | "corrupt node: block %llu root %llu nritems %lu", | ||
661 | node->start, root->objectid, nr); | ||
662 | return -EIO; | ||
663 | } | ||
664 | |||
665 | for (slot = 0; slot < nr - 1; slot++) { | ||
666 | bytenr = btrfs_node_blockptr(node, slot); | ||
667 | btrfs_node_key_to_cpu(node, &key, slot); | ||
668 | btrfs_node_key_to_cpu(node, &next_key, slot + 1); | ||
669 | |||
670 | if (!bytenr) { | ||
671 | CORRUPT("invalid item slot", node, root, slot); | ||
672 | ret = -EIO; | ||
673 | goto out; | ||
674 | } | ||
675 | |||
676 | if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) { | ||
677 | CORRUPT("bad key order", node, root, slot); | ||
678 | ret = -EIO; | ||
679 | goto out; | ||
680 | } | ||
681 | } | ||
682 | out: | ||
683 | return ret; | ||
684 | } | ||
685 | |||
686 | static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, | 548 | static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, |
687 | u64 phy_offset, struct page *page, | 549 | u64 phy_offset, struct page *page, |
688 | u64 start, u64 end, int mirror) | 550 | u64 start, u64 end, int mirror) |
@@ -748,12 +610,12 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio, | |||
748 | * that we don't try and read the other copies of this block, just | 610 | * that we don't try and read the other copies of this block, just |
749 | * return -EIO. | 611 | * return -EIO. |
750 | */ | 612 | */ |
751 | if (found_level == 0 && check_leaf(root, eb)) { | 613 | if (found_level == 0 && btrfs_check_leaf(root, eb)) { |
752 | set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); | 614 | set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); |
753 | ret = -EIO; | 615 | ret = -EIO; |
754 | } | 616 | } |
755 | 617 | ||
756 | if (found_level > 0 && check_node(root, eb)) | 618 | if (found_level > 0 && btrfs_check_node(root, eb)) |
757 | ret = -EIO; | 619 | ret = -EIO; |
758 | 620 | ||
759 | if (!ret) | 621 | if (!ret) |
@@ -879,22 +741,9 @@ static void run_one_async_start(struct btrfs_work *work) | |||
879 | 741 | ||
880 | static void run_one_async_done(struct btrfs_work *work) | 742 | static void run_one_async_done(struct btrfs_work *work) |
881 | { | 743 | { |
882 | struct btrfs_fs_info *fs_info; | ||
883 | struct async_submit_bio *async; | 744 | struct async_submit_bio *async; |
884 | int limit; | ||
885 | 745 | ||
886 | async = container_of(work, struct async_submit_bio, work); | 746 | async = container_of(work, struct async_submit_bio, work); |
887 | fs_info = async->fs_info; | ||
888 | |||
889 | limit = btrfs_async_submit_limit(fs_info); | ||
890 | limit = limit * 2 / 3; | ||
891 | |||
892 | /* | ||
893 | * atomic_dec_return implies a barrier for waitqueue_active | ||
894 | */ | ||
895 | if (atomic_dec_return(&fs_info->nr_async_submits) < limit && | ||
896 | waitqueue_active(&fs_info->async_submit_wait)) | ||
897 | wake_up(&fs_info->async_submit_wait); | ||
898 | 747 | ||
899 | /* If an error occurred we just want to clean up the bio and move on */ | 748 | /* If an error occurred we just want to clean up the bio and move on */ |
900 | if (async->status) { | 749 | if (async->status) { |
@@ -942,19 +791,10 @@ blk_status_t btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct bio *bio, | |||
942 | 791 | ||
943 | async->status = 0; | 792 | async->status = 0; |
944 | 793 | ||
945 | atomic_inc(&fs_info->nr_async_submits); | ||
946 | |||
947 | if (op_is_sync(bio->bi_opf)) | 794 | if (op_is_sync(bio->bi_opf)) |
948 | btrfs_set_work_high_priority(&async->work); | 795 | btrfs_set_work_high_priority(&async->work); |
949 | 796 | ||
950 | btrfs_queue_work(fs_info->workers, &async->work); | 797 | btrfs_queue_work(fs_info->workers, &async->work); |
951 | |||
952 | while (atomic_read(&fs_info->async_submit_draining) && | ||
953 | atomic_read(&fs_info->nr_async_submits)) { | ||
954 | wait_event(fs_info->async_submit_wait, | ||
955 | (atomic_read(&fs_info->nr_async_submits) == 0)); | ||
956 | } | ||
957 | |||
958 | return 0; | 798 | return 0; |
959 | } | 799 | } |
960 | 800 | ||
@@ -1005,9 +845,9 @@ static blk_status_t __btree_submit_bio_done(void *private_data, struct bio *bio, | |||
1005 | return ret; | 845 | return ret; |
1006 | } | 846 | } |
1007 | 847 | ||
1008 | static int check_async_write(unsigned long bio_flags) | 848 | static int check_async_write(struct btrfs_inode *bi) |
1009 | { | 849 | { |
1010 | if (bio_flags & EXTENT_BIO_TREE_LOG) | 850 | if (atomic_read(&bi->sync_writers)) |
1011 | return 0; | 851 | return 0; |
1012 | #ifdef CONFIG_X86 | 852 | #ifdef CONFIG_X86 |
1013 | if (static_cpu_has(X86_FEATURE_XMM4_2)) | 853 | if (static_cpu_has(X86_FEATURE_XMM4_2)) |
@@ -1022,7 +862,7 @@ static blk_status_t btree_submit_bio_hook(void *private_data, struct bio *bio, | |||
1022 | { | 862 | { |
1023 | struct inode *inode = private_data; | 863 | struct inode *inode = private_data; |
1024 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); | 864 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
1025 | int async = check_async_write(bio_flags); | 865 | int async = check_async_write(BTRFS_I(inode)); |
1026 | blk_status_t ret; | 866 | blk_status_t ret; |
1027 | 867 | ||
1028 | if (bio_op(bio) != REQ_OP_WRITE) { | 868 | if (bio_op(bio) != REQ_OP_WRITE) { |
@@ -2607,14 +2447,6 @@ int open_ctree(struct super_block *sb, | |||
2607 | goto fail_delalloc_bytes; | 2447 | goto fail_delalloc_bytes; |
2608 | } | 2448 | } |
2609 | 2449 | ||
2610 | fs_info->btree_inode = new_inode(sb); | ||
2611 | if (!fs_info->btree_inode) { | ||
2612 | err = -ENOMEM; | ||
2613 | goto fail_bio_counter; | ||
2614 | } | ||
2615 | |||
2616 | mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
2617 | |||
2618 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); | 2450 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); |
2619 | INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC); | 2451 | INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC); |
2620 | INIT_LIST_HEAD(&fs_info->trans_list); | 2452 | INIT_LIST_HEAD(&fs_info->trans_list); |
@@ -2647,17 +2479,12 @@ int open_ctree(struct super_block *sb, | |||
2647 | btrfs_mapping_init(&fs_info->mapping_tree); | 2479 | btrfs_mapping_init(&fs_info->mapping_tree); |
2648 | btrfs_init_block_rsv(&fs_info->global_block_rsv, | 2480 | btrfs_init_block_rsv(&fs_info->global_block_rsv, |
2649 | BTRFS_BLOCK_RSV_GLOBAL); | 2481 | BTRFS_BLOCK_RSV_GLOBAL); |
2650 | btrfs_init_block_rsv(&fs_info->delalloc_block_rsv, | ||
2651 | BTRFS_BLOCK_RSV_DELALLOC); | ||
2652 | btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS); | 2482 | btrfs_init_block_rsv(&fs_info->trans_block_rsv, BTRFS_BLOCK_RSV_TRANS); |
2653 | btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK); | 2483 | btrfs_init_block_rsv(&fs_info->chunk_block_rsv, BTRFS_BLOCK_RSV_CHUNK); |
2654 | btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY); | 2484 | btrfs_init_block_rsv(&fs_info->empty_block_rsv, BTRFS_BLOCK_RSV_EMPTY); |
2655 | btrfs_init_block_rsv(&fs_info->delayed_block_rsv, | 2485 | btrfs_init_block_rsv(&fs_info->delayed_block_rsv, |
2656 | BTRFS_BLOCK_RSV_DELOPS); | 2486 | BTRFS_BLOCK_RSV_DELOPS); |
2657 | atomic_set(&fs_info->nr_async_submits, 0); | ||
2658 | atomic_set(&fs_info->async_delalloc_pages, 0); | 2487 | atomic_set(&fs_info->async_delalloc_pages, 0); |
2659 | atomic_set(&fs_info->async_submit_draining, 0); | ||
2660 | atomic_set(&fs_info->nr_async_bios, 0); | ||
2661 | atomic_set(&fs_info->defrag_running, 0); | 2488 | atomic_set(&fs_info->defrag_running, 0); |
2662 | atomic_set(&fs_info->qgroup_op_seq, 0); | 2489 | atomic_set(&fs_info->qgroup_op_seq, 0); |
2663 | atomic_set(&fs_info->reada_works_cnt, 0); | 2490 | atomic_set(&fs_info->reada_works_cnt, 0); |
@@ -2673,12 +2500,21 @@ int open_ctree(struct super_block *sb, | |||
2673 | /* readahead state */ | 2500 | /* readahead state */ |
2674 | INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); | 2501 | INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_DIRECT_RECLAIM); |
2675 | spin_lock_init(&fs_info->reada_lock); | 2502 | spin_lock_init(&fs_info->reada_lock); |
2503 | btrfs_init_ref_verify(fs_info); | ||
2676 | 2504 | ||
2677 | fs_info->thread_pool_size = min_t(unsigned long, | 2505 | fs_info->thread_pool_size = min_t(unsigned long, |
2678 | num_online_cpus() + 2, 8); | 2506 | num_online_cpus() + 2, 8); |
2679 | 2507 | ||
2680 | INIT_LIST_HEAD(&fs_info->ordered_roots); | 2508 | INIT_LIST_HEAD(&fs_info->ordered_roots); |
2681 | spin_lock_init(&fs_info->ordered_root_lock); | 2509 | spin_lock_init(&fs_info->ordered_root_lock); |
2510 | |||
2511 | fs_info->btree_inode = new_inode(sb); | ||
2512 | if (!fs_info->btree_inode) { | ||
2513 | err = -ENOMEM; | ||
2514 | goto fail_bio_counter; | ||
2515 | } | ||
2516 | mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
2517 | |||
2682 | fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root), | 2518 | fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root), |
2683 | GFP_KERNEL); | 2519 | GFP_KERNEL); |
2684 | if (!fs_info->delayed_root) { | 2520 | if (!fs_info->delayed_root) { |
@@ -2895,12 +2731,13 @@ int open_ctree(struct super_block *sb, | |||
2895 | sb->s_bdi->congested_fn = btrfs_congested_fn; | 2731 | sb->s_bdi->congested_fn = btrfs_congested_fn; |
2896 | sb->s_bdi->congested_data = fs_info; | 2732 | sb->s_bdi->congested_data = fs_info; |
2897 | sb->s_bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK; | 2733 | sb->s_bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK; |
2898 | sb->s_bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE; | 2734 | sb->s_bdi->ra_pages = VM_MAX_READAHEAD * SZ_1K / PAGE_SIZE; |
2899 | sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super); | 2735 | sb->s_bdi->ra_pages *= btrfs_super_num_devices(disk_super); |
2900 | sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE); | 2736 | sb->s_bdi->ra_pages = max(sb->s_bdi->ra_pages, SZ_4M / PAGE_SIZE); |
2901 | 2737 | ||
2902 | sb->s_blocksize = sectorsize; | 2738 | sb->s_blocksize = sectorsize; |
2903 | sb->s_blocksize_bits = blksize_bits(sectorsize); | 2739 | sb->s_blocksize_bits = blksize_bits(sectorsize); |
2740 | memcpy(&sb->s_uuid, fs_info->fsid, BTRFS_FSID_SIZE); | ||
2904 | 2741 | ||
2905 | mutex_lock(&fs_info->chunk_mutex); | 2742 | mutex_lock(&fs_info->chunk_mutex); |
2906 | ret = btrfs_read_sys_array(fs_info); | 2743 | ret = btrfs_read_sys_array(fs_info); |
@@ -3083,6 +2920,9 @@ retry_root_backup: | |||
3083 | if (ret) | 2920 | if (ret) |
3084 | goto fail_trans_kthread; | 2921 | goto fail_trans_kthread; |
3085 | 2922 | ||
2923 | if (btrfs_build_ref_tree(fs_info)) | ||
2924 | btrfs_err(fs_info, "couldn't build ref tree"); | ||
2925 | |||
3086 | /* do not make disk changes in broken FS or nologreplay is given */ | 2926 | /* do not make disk changes in broken FS or nologreplay is given */ |
3087 | if (btrfs_super_log_root(disk_super) != 0 && | 2927 | if (btrfs_super_log_root(disk_super) != 0 && |
3088 | !btrfs_test_opt(fs_info, NOLOGREPLAY)) { | 2928 | !btrfs_test_opt(fs_info, NOLOGREPLAY)) { |
@@ -3948,6 +3788,7 @@ void close_ctree(struct btrfs_fs_info *fs_info) | |||
3948 | cleanup_srcu_struct(&fs_info->subvol_srcu); | 3788 | cleanup_srcu_struct(&fs_info->subvol_srcu); |
3949 | 3789 | ||
3950 | btrfs_free_stripe_hash_table(fs_info); | 3790 | btrfs_free_stripe_hash_table(fs_info); |
3791 | btrfs_free_ref_cache(fs_info); | ||
3951 | 3792 | ||
3952 | __btrfs_free_block_rsv(root->orphan_block_rsv); | 3793 | __btrfs_free_block_rsv(root->orphan_block_rsv); |
3953 | root->orphan_block_rsv = NULL; | 3794 | root->orphan_block_rsv = NULL; |
@@ -4007,7 +3848,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
4007 | buf->len, | 3848 | buf->len, |
4008 | fs_info->dirty_metadata_batch); | 3849 | fs_info->dirty_metadata_batch); |
4009 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | 3850 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY |
4010 | if (btrfs_header_level(buf) == 0 && check_leaf(root, buf)) { | 3851 | if (btrfs_header_level(buf) == 0 && btrfs_check_leaf(root, buf)) { |
4011 | btrfs_print_leaf(buf); | 3852 | btrfs_print_leaf(buf); |
4012 | ASSERT(0); | 3853 | ASSERT(0); |
4013 | } | 3854 | } |
@@ -4272,26 +4113,28 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | |||
4272 | 4113 | ||
4273 | while ((node = rb_first(&delayed_refs->href_root)) != NULL) { | 4114 | while ((node = rb_first(&delayed_refs->href_root)) != NULL) { |
4274 | struct btrfs_delayed_ref_head *head; | 4115 | struct btrfs_delayed_ref_head *head; |
4275 | struct btrfs_delayed_ref_node *tmp; | 4116 | struct rb_node *n; |
4276 | bool pin_bytes = false; | 4117 | bool pin_bytes = false; |
4277 | 4118 | ||
4278 | head = rb_entry(node, struct btrfs_delayed_ref_head, | 4119 | head = rb_entry(node, struct btrfs_delayed_ref_head, |
4279 | href_node); | 4120 | href_node); |
4280 | if (!mutex_trylock(&head->mutex)) { | 4121 | if (!mutex_trylock(&head->mutex)) { |
4281 | refcount_inc(&head->node.refs); | 4122 | refcount_inc(&head->refs); |
4282 | spin_unlock(&delayed_refs->lock); | 4123 | spin_unlock(&delayed_refs->lock); |
4283 | 4124 | ||
4284 | mutex_lock(&head->mutex); | 4125 | mutex_lock(&head->mutex); |
4285 | mutex_unlock(&head->mutex); | 4126 | mutex_unlock(&head->mutex); |
4286 | btrfs_put_delayed_ref(&head->node); | 4127 | btrfs_put_delayed_ref_head(head); |
4287 | spin_lock(&delayed_refs->lock); | 4128 | spin_lock(&delayed_refs->lock); |
4288 | continue; | 4129 | continue; |
4289 | } | 4130 | } |
4290 | spin_lock(&head->lock); | 4131 | spin_lock(&head->lock); |
4291 | list_for_each_entry_safe_reverse(ref, tmp, &head->ref_list, | 4132 | while ((n = rb_first(&head->ref_tree)) != NULL) { |
4292 | list) { | 4133 | ref = rb_entry(n, struct btrfs_delayed_ref_node, |
4134 | ref_node); | ||
4293 | ref->in_tree = 0; | 4135 | ref->in_tree = 0; |
4294 | list_del(&ref->list); | 4136 | rb_erase(&ref->ref_node, &head->ref_tree); |
4137 | RB_CLEAR_NODE(&ref->ref_node); | ||
4295 | if (!list_empty(&ref->add_list)) | 4138 | if (!list_empty(&ref->add_list)) |
4296 | list_del(&ref->add_list); | 4139 | list_del(&ref->add_list); |
4297 | atomic_dec(&delayed_refs->num_entries); | 4140 | atomic_dec(&delayed_refs->num_entries); |
@@ -4304,16 +4147,16 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | |||
4304 | if (head->processing == 0) | 4147 | if (head->processing == 0) |
4305 | delayed_refs->num_heads_ready--; | 4148 | delayed_refs->num_heads_ready--; |
4306 | atomic_dec(&delayed_refs->num_entries); | 4149 | atomic_dec(&delayed_refs->num_entries); |
4307 | head->node.in_tree = 0; | ||
4308 | rb_erase(&head->href_node, &delayed_refs->href_root); | 4150 | rb_erase(&head->href_node, &delayed_refs->href_root); |
4151 | RB_CLEAR_NODE(&head->href_node); | ||
4309 | spin_unlock(&head->lock); | 4152 | spin_unlock(&head->lock); |
4310 | spin_unlock(&delayed_refs->lock); | 4153 | spin_unlock(&delayed_refs->lock); |
4311 | mutex_unlock(&head->mutex); | 4154 | mutex_unlock(&head->mutex); |
4312 | 4155 | ||
4313 | if (pin_bytes) | 4156 | if (pin_bytes) |
4314 | btrfs_pin_extent(fs_info, head->node.bytenr, | 4157 | btrfs_pin_extent(fs_info, head->bytenr, |
4315 | head->node.num_bytes, 1); | 4158 | head->num_bytes, 1); |
4316 | btrfs_put_delayed_ref(&head->node); | 4159 | btrfs_put_delayed_ref_head(head); |
4317 | cond_resched(); | 4160 | cond_resched(); |
4318 | spin_lock(&delayed_refs->lock); | 4161 | spin_lock(&delayed_refs->lock); |
4319 | } | 4162 | } |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e2d7e86b51d1..673ac4e01dd0 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
27 | #include <linux/ratelimit.h> | 27 | #include <linux/ratelimit.h> |
28 | #include <linux/percpu_counter.h> | 28 | #include <linux/percpu_counter.h> |
29 | #include <linux/lockdep.h> | ||
29 | #include "hash.h" | 30 | #include "hash.h" |
30 | #include "tree-log.h" | 31 | #include "tree-log.h" |
31 | #include "disk-io.h" | 32 | #include "disk-io.h" |
@@ -38,6 +39,7 @@ | |||
38 | #include "math.h" | 39 | #include "math.h" |
39 | #include "sysfs.h" | 40 | #include "sysfs.h" |
40 | #include "qgroup.h" | 41 | #include "qgroup.h" |
42 | #include "ref-verify.h" | ||
41 | 43 | ||
42 | #undef SCRAMBLE_DELAYED_REFS | 44 | #undef SCRAMBLE_DELAYED_REFS |
43 | 45 | ||
@@ -61,9 +63,6 @@ enum { | |||
61 | CHUNK_ALLOC_FORCE = 2, | 63 | CHUNK_ALLOC_FORCE = 2, |
62 | }; | 64 | }; |
63 | 65 | ||
64 | static int update_block_group(struct btrfs_trans_handle *trans, | ||
65 | struct btrfs_fs_info *fs_info, u64 bytenr, | ||
66 | u64 num_bytes, int alloc); | ||
67 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 66 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
68 | struct btrfs_fs_info *fs_info, | 67 | struct btrfs_fs_info *fs_info, |
69 | struct btrfs_delayed_ref_node *node, u64 parent, | 68 | struct btrfs_delayed_ref_node *node, u64 parent, |
@@ -91,17 +90,8 @@ static int find_next_key(struct btrfs_path *path, int level, | |||
91 | static void dump_space_info(struct btrfs_fs_info *fs_info, | 90 | static void dump_space_info(struct btrfs_fs_info *fs_info, |
92 | struct btrfs_space_info *info, u64 bytes, | 91 | struct btrfs_space_info *info, u64 bytes, |
93 | int dump_block_groups); | 92 | int dump_block_groups); |
94 | static int btrfs_add_reserved_bytes(struct btrfs_block_group_cache *cache, | ||
95 | u64 ram_bytes, u64 num_bytes, int delalloc); | ||
96 | static int btrfs_free_reserved_bytes(struct btrfs_block_group_cache *cache, | ||
97 | u64 num_bytes, int delalloc); | ||
98 | static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, | 93 | static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, |
99 | u64 num_bytes); | 94 | u64 num_bytes); |
100 | static int __reserve_metadata_bytes(struct btrfs_fs_info *fs_info, | ||
101 | struct btrfs_space_info *space_info, | ||
102 | u64 orig_bytes, | ||
103 | enum btrfs_reserve_flush_enum flush, | ||
104 | bool system_chunk); | ||
105 | static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info, | 95 | static void space_info_add_new_bytes(struct btrfs_fs_info *fs_info, |
106 | struct btrfs_space_info *space_info, | 96 | struct btrfs_space_info *space_info, |
107 | u64 num_bytes); | 97 | u64 num_bytes); |
@@ -652,7 +642,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
652 | cache->cached = BTRFS_CACHE_FAST; | 642 | cache->cached = BTRFS_CACHE_FAST; |
653 | spin_unlock(&cache->lock); | 643 | spin_unlock(&cache->lock); |
654 | 644 | ||
655 | if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) { | 645 | if (btrfs_test_opt(fs_info, SPACE_CACHE)) { |
656 | mutex_lock(&caching_ctl->mutex); | 646 | mutex_lock(&caching_ctl->mutex); |
657 | ret = load_free_space_cache(fs_info, cache); | 647 | ret = load_free_space_cache(fs_info, cache); |
658 | 648 | ||
@@ -923,7 +913,7 @@ search_again: | |||
923 | head = btrfs_find_delayed_ref_head(delayed_refs, bytenr); | 913 | head = btrfs_find_delayed_ref_head(delayed_refs, bytenr); |
924 | if (head) { | 914 | if (head) { |
925 | if (!mutex_trylock(&head->mutex)) { | 915 | if (!mutex_trylock(&head->mutex)) { |
926 | refcount_inc(&head->node.refs); | 916 | refcount_inc(&head->refs); |
927 | spin_unlock(&delayed_refs->lock); | 917 | spin_unlock(&delayed_refs->lock); |
928 | 918 | ||
929 | btrfs_release_path(path); | 919 | btrfs_release_path(path); |
@@ -934,7 +924,7 @@ search_again: | |||
934 | */ | 924 | */ |
935 | mutex_lock(&head->mutex); | 925 | mutex_lock(&head->mutex); |
936 | mutex_unlock(&head->mutex); | 926 | mutex_unlock(&head->mutex); |
937 | btrfs_put_delayed_ref(&head->node); | 927 | btrfs_put_delayed_ref_head(head); |
938 | goto search_again; | 928 | goto search_again; |
939 | } | 929 | } |
940 | spin_lock(&head->lock); | 930 | spin_lock(&head->lock); |
@@ -943,7 +933,7 @@ search_again: | |||
943 | else | 933 | else |
944 | BUG_ON(num_refs == 0); | 934 | BUG_ON(num_refs == 0); |
945 | 935 | ||
946 | num_refs += head->node.ref_mod; | 936 | num_refs += head->ref_mod; |
947 | spin_unlock(&head->lock); | 937 | spin_unlock(&head->lock); |
948 | mutex_unlock(&head->mutex); | 938 | mutex_unlock(&head->mutex); |
949 | } | 939 | } |
@@ -2189,16 +2179,20 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr, | |||
2189 | 2179 | ||
2190 | /* Can return -ENOMEM */ | 2180 | /* Can return -ENOMEM */ |
2191 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | 2181 | int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, |
2192 | struct btrfs_fs_info *fs_info, | 2182 | struct btrfs_root *root, |
2193 | u64 bytenr, u64 num_bytes, u64 parent, | 2183 | u64 bytenr, u64 num_bytes, u64 parent, |
2194 | u64 root_objectid, u64 owner, u64 offset) | 2184 | u64 root_objectid, u64 owner, u64 offset) |
2195 | { | 2185 | { |
2186 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
2196 | int old_ref_mod, new_ref_mod; | 2187 | int old_ref_mod, new_ref_mod; |
2197 | int ret; | 2188 | int ret; |
2198 | 2189 | ||
2199 | BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID && | 2190 | BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID && |
2200 | root_objectid == BTRFS_TREE_LOG_OBJECTID); | 2191 | root_objectid == BTRFS_TREE_LOG_OBJECTID); |
2201 | 2192 | ||
2193 | btrfs_ref_tree_mod(root, bytenr, num_bytes, parent, root_objectid, | ||
2194 | owner, offset, BTRFS_ADD_DELAYED_REF); | ||
2195 | |||
2202 | if (owner < BTRFS_FIRST_FREE_OBJECTID) { | 2196 | if (owner < BTRFS_FIRST_FREE_OBJECTID) { |
2203 | ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, | 2197 | ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, |
2204 | num_bytes, parent, | 2198 | num_bytes, parent, |
@@ -2344,7 +2338,7 @@ static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, | |||
2344 | 2338 | ||
2345 | static int run_delayed_extent_op(struct btrfs_trans_handle *trans, | 2339 | static int run_delayed_extent_op(struct btrfs_trans_handle *trans, |
2346 | struct btrfs_fs_info *fs_info, | 2340 | struct btrfs_fs_info *fs_info, |
2347 | struct btrfs_delayed_ref_node *node, | 2341 | struct btrfs_delayed_ref_head *head, |
2348 | struct btrfs_delayed_extent_op *extent_op) | 2342 | struct btrfs_delayed_extent_op *extent_op) |
2349 | { | 2343 | { |
2350 | struct btrfs_key key; | 2344 | struct btrfs_key key; |
@@ -2366,14 +2360,14 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans, | |||
2366 | if (!path) | 2360 | if (!path) |
2367 | return -ENOMEM; | 2361 | return -ENOMEM; |
2368 | 2362 | ||
2369 | key.objectid = node->bytenr; | 2363 | key.objectid = head->bytenr; |
2370 | 2364 | ||
2371 | if (metadata) { | 2365 | if (metadata) { |
2372 | key.type = BTRFS_METADATA_ITEM_KEY; | 2366 | key.type = BTRFS_METADATA_ITEM_KEY; |
2373 | key.offset = extent_op->level; | 2367 | key.offset = extent_op->level; |
2374 | } else { | 2368 | } else { |
2375 | key.type = BTRFS_EXTENT_ITEM_KEY; | 2369 | key.type = BTRFS_EXTENT_ITEM_KEY; |
2376 | key.offset = node->num_bytes; | 2370 | key.offset = head->num_bytes; |
2377 | } | 2371 | } |
2378 | 2372 | ||
2379 | again: | 2373 | again: |
@@ -2390,17 +2384,17 @@ again: | |||
2390 | path->slots[0]--; | 2384 | path->slots[0]--; |
2391 | btrfs_item_key_to_cpu(path->nodes[0], &key, | 2385 | btrfs_item_key_to_cpu(path->nodes[0], &key, |
2392 | path->slots[0]); | 2386 | path->slots[0]); |
2393 | if (key.objectid == node->bytenr && | 2387 | if (key.objectid == head->bytenr && |
2394 | key.type == BTRFS_EXTENT_ITEM_KEY && | 2388 | key.type == BTRFS_EXTENT_ITEM_KEY && |
2395 | key.offset == node->num_bytes) | 2389 | key.offset == head->num_bytes) |
2396 | ret = 0; | 2390 | ret = 0; |
2397 | } | 2391 | } |
2398 | if (ret > 0) { | 2392 | if (ret > 0) { |
2399 | btrfs_release_path(path); | 2393 | btrfs_release_path(path); |
2400 | metadata = 0; | 2394 | metadata = 0; |
2401 | 2395 | ||
2402 | key.objectid = node->bytenr; | 2396 | key.objectid = head->bytenr; |
2403 | key.offset = node->num_bytes; | 2397 | key.offset = head->num_bytes; |
2404 | key.type = BTRFS_EXTENT_ITEM_KEY; | 2398 | key.type = BTRFS_EXTENT_ITEM_KEY; |
2405 | goto again; | 2399 | goto again; |
2406 | } | 2400 | } |
@@ -2507,44 +2501,6 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, | |||
2507 | return 0; | 2501 | return 0; |
2508 | } | 2502 | } |
2509 | 2503 | ||
2510 | if (btrfs_delayed_ref_is_head(node)) { | ||
2511 | struct btrfs_delayed_ref_head *head; | ||
2512 | /* | ||
2513 | * we've hit the end of the chain and we were supposed | ||
2514 | * to insert this extent into the tree. But, it got | ||
2515 | * deleted before we ever needed to insert it, so all | ||
2516 | * we have to do is clean up the accounting | ||
2517 | */ | ||
2518 | BUG_ON(extent_op); | ||
2519 | head = btrfs_delayed_node_to_head(node); | ||
2520 | trace_run_delayed_ref_head(fs_info, node, head, node->action); | ||
2521 | |||
2522 | if (head->total_ref_mod < 0) { | ||
2523 | struct btrfs_block_group_cache *cache; | ||
2524 | |||
2525 | cache = btrfs_lookup_block_group(fs_info, node->bytenr); | ||
2526 | ASSERT(cache); | ||
2527 | percpu_counter_add(&cache->space_info->total_bytes_pinned, | ||
2528 | -node->num_bytes); | ||
2529 | btrfs_put_block_group(cache); | ||
2530 | } | ||
2531 | |||
2532 | if (insert_reserved) { | ||
2533 | btrfs_pin_extent(fs_info, node->bytenr, | ||
2534 | node->num_bytes, 1); | ||
2535 | if (head->is_data) { | ||
2536 | ret = btrfs_del_csums(trans, fs_info, | ||
2537 | node->bytenr, | ||
2538 | node->num_bytes); | ||
2539 | } | ||
2540 | } | ||
2541 | |||
2542 | /* Also free its reserved qgroup space */ | ||
2543 | btrfs_qgroup_free_delayed_ref(fs_info, head->qgroup_ref_root, | ||
2544 | head->qgroup_reserved); | ||
2545 | return ret; | ||
2546 | } | ||
2547 | |||
2548 | if (node->type == BTRFS_TREE_BLOCK_REF_KEY || | 2504 | if (node->type == BTRFS_TREE_BLOCK_REF_KEY || |
2549 | node->type == BTRFS_SHARED_BLOCK_REF_KEY) | 2505 | node->type == BTRFS_SHARED_BLOCK_REF_KEY) |
2550 | ret = run_delayed_tree_ref(trans, fs_info, node, extent_op, | 2506 | ret = run_delayed_tree_ref(trans, fs_info, node, extent_op, |
@@ -2563,7 +2519,7 @@ select_delayed_ref(struct btrfs_delayed_ref_head *head) | |||
2563 | { | 2519 | { |
2564 | struct btrfs_delayed_ref_node *ref; | 2520 | struct btrfs_delayed_ref_node *ref; |
2565 | 2521 | ||
2566 | if (list_empty(&head->ref_list)) | 2522 | if (RB_EMPTY_ROOT(&head->ref_tree)) |
2567 | return NULL; | 2523 | return NULL; |
2568 | 2524 | ||
2569 | /* | 2525 | /* |
@@ -2576,12 +2532,114 @@ select_delayed_ref(struct btrfs_delayed_ref_head *head) | |||
2576 | return list_first_entry(&head->ref_add_list, | 2532 | return list_first_entry(&head->ref_add_list, |
2577 | struct btrfs_delayed_ref_node, add_list); | 2533 | struct btrfs_delayed_ref_node, add_list); |
2578 | 2534 | ||
2579 | ref = list_first_entry(&head->ref_list, struct btrfs_delayed_ref_node, | 2535 | ref = rb_entry(rb_first(&head->ref_tree), |
2580 | list); | 2536 | struct btrfs_delayed_ref_node, ref_node); |
2581 | ASSERT(list_empty(&ref->add_list)); | 2537 | ASSERT(list_empty(&ref->add_list)); |
2582 | return ref; | 2538 | return ref; |
2583 | } | 2539 | } |
2584 | 2540 | ||
2541 | static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, | ||
2542 | struct btrfs_delayed_ref_head *head) | ||
2543 | { | ||
2544 | spin_lock(&delayed_refs->lock); | ||
2545 | head->processing = 0; | ||
2546 | delayed_refs->num_heads_ready++; | ||
2547 | spin_unlock(&delayed_refs->lock); | ||
2548 | btrfs_delayed_ref_unlock(head); | ||
2549 | } | ||
2550 | |||
2551 | static int cleanup_extent_op(struct btrfs_trans_handle *trans, | ||
2552 | struct btrfs_fs_info *fs_info, | ||
2553 | struct btrfs_delayed_ref_head *head) | ||
2554 | { | ||
2555 | struct btrfs_delayed_extent_op *extent_op = head->extent_op; | ||
2556 | int ret; | ||
2557 | |||
2558 | if (!extent_op) | ||
2559 | return 0; | ||
2560 | head->extent_op = NULL; | ||
2561 | if (head->must_insert_reserved) { | ||
2562 | btrfs_free_delayed_extent_op(extent_op); | ||
2563 | return 0; | ||
2564 | } | ||
2565 | spin_unlock(&head->lock); | ||
2566 | ret = run_delayed_extent_op(trans, fs_info, head, extent_op); | ||
2567 | btrfs_free_delayed_extent_op(extent_op); | ||
2568 | return ret ? ret : 1; | ||
2569 | } | ||
2570 | |||
2571 | static int cleanup_ref_head(struct btrfs_trans_handle *trans, | ||
2572 | struct btrfs_fs_info *fs_info, | ||
2573 | struct btrfs_delayed_ref_head *head) | ||
2574 | { | ||
2575 | struct btrfs_delayed_ref_root *delayed_refs; | ||
2576 | int ret; | ||
2577 | |||
2578 | delayed_refs = &trans->transaction->delayed_refs; | ||
2579 | |||
2580 | ret = cleanup_extent_op(trans, fs_info, head); | ||
2581 | if (ret < 0) { | ||
2582 | unselect_delayed_ref_head(delayed_refs, head); | ||
2583 | btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret); | ||
2584 | return ret; | ||
2585 | } else if (ret) { | ||
2586 | return ret; | ||
2587 | } | ||
2588 | |||
2589 | /* | ||
2590 | * Need to drop our head ref lock and re-acquire the delayed ref lock | ||
2591 | * and then re-check to make sure nobody got added. | ||
2592 | */ | ||
2593 | spin_unlock(&head->lock); | ||
2594 | spin_lock(&delayed_refs->lock); | ||
2595 | spin_lock(&head->lock); | ||
2596 | if (!RB_EMPTY_ROOT(&head->ref_tree) || head->extent_op) { | ||
2597 | spin_unlock(&head->lock); | ||
2598 | spin_unlock(&delayed_refs->lock); | ||
2599 | return 1; | ||
2600 | } | ||
2601 | delayed_refs->num_heads--; | ||
2602 | rb_erase(&head->href_node, &delayed_refs->href_root); | ||
2603 | RB_CLEAR_NODE(&head->href_node); | ||
2604 | spin_unlock(&delayed_refs->lock); | ||
2605 | spin_unlock(&head->lock); | ||
2606 | atomic_dec(&delayed_refs->num_entries); | ||
2607 | |||
2608 | trace_run_delayed_ref_head(fs_info, head, 0); | ||
2609 | |||
2610 | if (head->total_ref_mod < 0) { | ||
2611 | struct btrfs_block_group_cache *cache; | ||
2612 | |||
2613 | cache = btrfs_lookup_block_group(fs_info, head->bytenr); | ||
2614 | ASSERT(cache); | ||
2615 | percpu_counter_add(&cache->space_info->total_bytes_pinned, | ||
2616 | -head->num_bytes); | ||
2617 | btrfs_put_block_group(cache); | ||
2618 | |||
2619 | if (head->is_data) { | ||
2620 | spin_lock(&delayed_refs->lock); | ||
2621 | delayed_refs->pending_csums -= head->num_bytes; | ||
2622 | spin_unlock(&delayed_refs->lock); | ||
2623 | } | ||
2624 | } | ||
2625 | |||
2626 | if (head->must_insert_reserved) { | ||
2627 | btrfs_pin_extent(fs_info, head->bytenr, | ||
2628 | head->num_bytes, 1); | ||
2629 | if (head->is_data) { | ||
2630 | ret = btrfs_del_csums(trans, fs_info, head->bytenr, | ||
2631 | head->num_bytes); | ||
2632 | } | ||
2633 | } | ||
2634 | |||
2635 | /* Also free its reserved qgroup space */ | ||
2636 | btrfs_qgroup_free_delayed_ref(fs_info, head->qgroup_ref_root, | ||
2637 | head->qgroup_reserved); | ||
2638 | btrfs_delayed_ref_unlock(head); | ||
2639 | btrfs_put_delayed_ref_head(head); | ||
2640 | return 0; | ||
2641 | } | ||
2642 | |||
2585 | /* | 2643 | /* |
2586 | * Returns 0 on success or if called with an already aborted transaction. | 2644 | * Returns 0 on success or if called with an already aborted transaction. |
2587 | * Returns -ENOMEM or -EIO on failure and will abort the transaction. | 2645 | * Returns -ENOMEM or -EIO on failure and will abort the transaction. |
@@ -2655,11 +2713,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
2655 | if (ref && ref->seq && | 2713 | if (ref && ref->seq && |
2656 | btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) { | 2714 | btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) { |
2657 | spin_unlock(&locked_ref->lock); | 2715 | spin_unlock(&locked_ref->lock); |
2658 | spin_lock(&delayed_refs->lock); | 2716 | unselect_delayed_ref_head(delayed_refs, locked_ref); |
2659 | locked_ref->processing = 0; | ||
2660 | delayed_refs->num_heads_ready++; | ||
2661 | spin_unlock(&delayed_refs->lock); | ||
2662 | btrfs_delayed_ref_unlock(locked_ref); | ||
2663 | locked_ref = NULL; | 2717 | locked_ref = NULL; |
2664 | cond_resched(); | 2718 | cond_resched(); |
2665 | count++; | 2719 | count++; |
@@ -2667,102 +2721,55 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
2667 | } | 2721 | } |
2668 | 2722 | ||
2669 | /* | 2723 | /* |
2670 | * record the must insert reserved flag before we | 2724 | * We're done processing refs in this ref_head, clean everything |
2671 | * drop the spin lock. | 2725 | * up and move on to the next ref_head. |
2672 | */ | 2726 | */ |
2673 | must_insert_reserved = locked_ref->must_insert_reserved; | ||
2674 | locked_ref->must_insert_reserved = 0; | ||
2675 | |||
2676 | extent_op = locked_ref->extent_op; | ||
2677 | locked_ref->extent_op = NULL; | ||
2678 | |||
2679 | if (!ref) { | 2727 | if (!ref) { |
2680 | 2728 | ret = cleanup_ref_head(trans, fs_info, locked_ref); | |
2681 | 2729 | if (ret > 0 ) { | |
2682 | /* All delayed refs have been processed, Go ahead | 2730 | /* We dropped our lock, we need to loop. */ |
2683 | * and send the head node to run_one_delayed_ref, | 2731 | ret = 0; |
2684 | * so that any accounting fixes can happen | ||
2685 | */ | ||
2686 | ref = &locked_ref->node; | ||
2687 | |||
2688 | if (extent_op && must_insert_reserved) { | ||
2689 | btrfs_free_delayed_extent_op(extent_op); | ||
2690 | extent_op = NULL; | ||
2691 | } | ||
2692 | |||
2693 | if (extent_op) { | ||
2694 | spin_unlock(&locked_ref->lock); | ||
2695 | ret = run_delayed_extent_op(trans, fs_info, | ||
2696 | ref, extent_op); | ||
2697 | btrfs_free_delayed_extent_op(extent_op); | ||
2698 | |||
2699 | if (ret) { | ||
2700 | /* | ||
2701 | * Need to reset must_insert_reserved if | ||
2702 | * there was an error so the abort stuff | ||
2703 | * can cleanup the reserved space | ||
2704 | * properly. | ||
2705 | */ | ||
2706 | if (must_insert_reserved) | ||
2707 | locked_ref->must_insert_reserved = 1; | ||
2708 | spin_lock(&delayed_refs->lock); | ||
2709 | locked_ref->processing = 0; | ||
2710 | delayed_refs->num_heads_ready++; | ||
2711 | spin_unlock(&delayed_refs->lock); | ||
2712 | btrfs_debug(fs_info, | ||
2713 | "run_delayed_extent_op returned %d", | ||
2714 | ret); | ||
2715 | btrfs_delayed_ref_unlock(locked_ref); | ||
2716 | return ret; | ||
2717 | } | ||
2718 | continue; | 2732 | continue; |
2733 | } else if (ret) { | ||
2734 | return ret; | ||
2719 | } | 2735 | } |
2736 | locked_ref = NULL; | ||
2737 | count++; | ||
2738 | continue; | ||
2739 | } | ||
2720 | 2740 | ||
2721 | /* | 2741 | actual_count++; |
2722 | * Need to drop our head ref lock and re-acquire the | 2742 | ref->in_tree = 0; |
2723 | * delayed ref lock and then re-check to make sure | 2743 | rb_erase(&ref->ref_node, &locked_ref->ref_tree); |
2724 | * nobody got added. | 2744 | RB_CLEAR_NODE(&ref->ref_node); |
2725 | */ | 2745 | if (!list_empty(&ref->add_list)) |
2726 | spin_unlock(&locked_ref->lock); | 2746 | list_del(&ref->add_list); |
2727 | spin_lock(&delayed_refs->lock); | 2747 | /* |
2728 | spin_lock(&locked_ref->lock); | 2748 | * When we play the delayed ref, also correct the ref_mod on |
2729 | if (!list_empty(&locked_ref->ref_list) || | 2749 | * head |
2730 | locked_ref->extent_op) { | 2750 | */ |
2731 | spin_unlock(&locked_ref->lock); | 2751 | switch (ref->action) { |
2732 | spin_unlock(&delayed_refs->lock); | 2752 | case BTRFS_ADD_DELAYED_REF: |
2733 | continue; | 2753 | case BTRFS_ADD_DELAYED_EXTENT: |
2734 | } | 2754 | locked_ref->ref_mod -= ref->ref_mod; |
2735 | ref->in_tree = 0; | 2755 | break; |
2736 | delayed_refs->num_heads--; | 2756 | case BTRFS_DROP_DELAYED_REF: |
2737 | rb_erase(&locked_ref->href_node, | 2757 | locked_ref->ref_mod += ref->ref_mod; |
2738 | &delayed_refs->href_root); | 2758 | break; |
2739 | spin_unlock(&delayed_refs->lock); | 2759 | default: |
2740 | } else { | 2760 | WARN_ON(1); |
2741 | actual_count++; | ||
2742 | ref->in_tree = 0; | ||
2743 | list_del(&ref->list); | ||
2744 | if (!list_empty(&ref->add_list)) | ||
2745 | list_del(&ref->add_list); | ||
2746 | } | 2761 | } |
2747 | atomic_dec(&delayed_refs->num_entries); | 2762 | atomic_dec(&delayed_refs->num_entries); |
2748 | 2763 | ||
2749 | if (!btrfs_delayed_ref_is_head(ref)) { | 2764 | /* |
2750 | /* | 2765 | * Record the must-insert_reserved flag before we drop the spin |
2751 | * when we play the delayed ref, also correct the | 2766 | * lock. |
2752 | * ref_mod on head | 2767 | */ |
2753 | */ | 2768 | must_insert_reserved = locked_ref->must_insert_reserved; |
2754 | switch (ref->action) { | 2769 | locked_ref->must_insert_reserved = 0; |
2755 | case BTRFS_ADD_DELAYED_REF: | 2770 | |
2756 | case BTRFS_ADD_DELAYED_EXTENT: | 2771 | extent_op = locked_ref->extent_op; |
2757 | locked_ref->node.ref_mod -= ref->ref_mod; | 2772 | locked_ref->extent_op = NULL; |
2758 | break; | ||
2759 | case BTRFS_DROP_DELAYED_REF: | ||
2760 | locked_ref->node.ref_mod += ref->ref_mod; | ||
2761 | break; | ||
2762 | default: | ||
2763 | WARN_ON(1); | ||
2764 | } | ||
2765 | } | ||
2766 | spin_unlock(&locked_ref->lock); | 2773 | spin_unlock(&locked_ref->lock); |
2767 | 2774 | ||
2768 | ret = run_one_delayed_ref(trans, fs_info, ref, extent_op, | 2775 | ret = run_one_delayed_ref(trans, fs_info, ref, extent_op, |
@@ -2770,33 +2777,13 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
2770 | 2777 | ||
2771 | btrfs_free_delayed_extent_op(extent_op); | 2778 | btrfs_free_delayed_extent_op(extent_op); |
2772 | if (ret) { | 2779 | if (ret) { |
2773 | spin_lock(&delayed_refs->lock); | 2780 | unselect_delayed_ref_head(delayed_refs, locked_ref); |
2774 | locked_ref->processing = 0; | ||
2775 | delayed_refs->num_heads_ready++; | ||
2776 | spin_unlock(&delayed_refs->lock); | ||
2777 | btrfs_delayed_ref_unlock(locked_ref); | ||
2778 | btrfs_put_delayed_ref(ref); | 2781 | btrfs_put_delayed_ref(ref); |
2779 | btrfs_debug(fs_info, "run_one_delayed_ref returned %d", | 2782 | btrfs_debug(fs_info, "run_one_delayed_ref returned %d", |
2780 | ret); | 2783 | ret); |
2781 | return ret; | 2784 | return ret; |
2782 | } | 2785 | } |
2783 | 2786 | ||
2784 | /* | ||
2785 | * If this node is a head, that means all the refs in this head | ||
2786 | * have been dealt with, and we will pick the next head to deal | ||
2787 | * with, so we must unlock the head and drop it from the cluster | ||
2788 | * list before we release it. | ||
2789 | */ | ||
2790 | if (btrfs_delayed_ref_is_head(ref)) { | ||
2791 | if (locked_ref->is_data && | ||
2792 | locked_ref->total_ref_mod < 0) { | ||
2793 | spin_lock(&delayed_refs->lock); | ||
2794 | delayed_refs->pending_csums -= ref->num_bytes; | ||
2795 | spin_unlock(&delayed_refs->lock); | ||
2796 | } | ||
2797 | btrfs_delayed_ref_unlock(locked_ref); | ||
2798 | locked_ref = NULL; | ||
2799 | } | ||
2800 | btrfs_put_delayed_ref(ref); | 2787 | btrfs_put_delayed_ref(ref); |
2801 | count++; | 2788 | count++; |
2802 | cond_resched(); | 2789 | cond_resched(); |
@@ -3100,33 +3087,16 @@ again: | |||
3100 | spin_unlock(&delayed_refs->lock); | 3087 | spin_unlock(&delayed_refs->lock); |
3101 | goto out; | 3088 | goto out; |
3102 | } | 3089 | } |
3090 | head = rb_entry(node, struct btrfs_delayed_ref_head, | ||
3091 | href_node); | ||
3092 | refcount_inc(&head->refs); | ||
3093 | spin_unlock(&delayed_refs->lock); | ||
3103 | 3094 | ||
3104 | while (node) { | 3095 | /* Mutex was contended, block until it's released and retry. */ |
3105 | head = rb_entry(node, struct btrfs_delayed_ref_head, | 3096 | mutex_lock(&head->mutex); |
3106 | href_node); | 3097 | mutex_unlock(&head->mutex); |
3107 | if (btrfs_delayed_ref_is_head(&head->node)) { | ||
3108 | struct btrfs_delayed_ref_node *ref; | ||
3109 | |||
3110 | ref = &head->node; | ||
3111 | refcount_inc(&ref->refs); | ||
3112 | |||
3113 | spin_unlock(&delayed_refs->lock); | ||
3114 | /* | ||
3115 | * Mutex was contended, block until it's | ||
3116 | * released and try again | ||
3117 | */ | ||
3118 | mutex_lock(&head->mutex); | ||
3119 | mutex_unlock(&head->mutex); | ||
3120 | 3098 | ||
3121 | btrfs_put_delayed_ref(ref); | 3099 | btrfs_put_delayed_ref_head(head); |
3122 | cond_resched(); | ||
3123 | goto again; | ||
3124 | } else { | ||
3125 | WARN_ON(1); | ||
3126 | } | ||
3127 | node = rb_next(node); | ||
3128 | } | ||
3129 | spin_unlock(&delayed_refs->lock); | ||
3130 | cond_resched(); | 3100 | cond_resched(); |
3131 | goto again; | 3101 | goto again; |
3132 | } | 3102 | } |
@@ -3169,6 +3139,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root, | |||
3169 | struct btrfs_delayed_data_ref *data_ref; | 3139 | struct btrfs_delayed_data_ref *data_ref; |
3170 | struct btrfs_delayed_ref_root *delayed_refs; | 3140 | struct btrfs_delayed_ref_root *delayed_refs; |
3171 | struct btrfs_transaction *cur_trans; | 3141 | struct btrfs_transaction *cur_trans; |
3142 | struct rb_node *node; | ||
3172 | int ret = 0; | 3143 | int ret = 0; |
3173 | 3144 | ||
3174 | cur_trans = root->fs_info->running_transaction; | 3145 | cur_trans = root->fs_info->running_transaction; |
@@ -3184,7 +3155,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root, | |||
3184 | } | 3155 | } |
3185 | 3156 | ||
3186 | if (!mutex_trylock(&head->mutex)) { | 3157 | if (!mutex_trylock(&head->mutex)) { |
3187 | refcount_inc(&head->node.refs); | 3158 | refcount_inc(&head->refs); |
3188 | spin_unlock(&delayed_refs->lock); | 3159 | spin_unlock(&delayed_refs->lock); |
3189 | 3160 | ||
3190 | btrfs_release_path(path); | 3161 | btrfs_release_path(path); |
@@ -3195,13 +3166,18 @@ static noinline int check_delayed_ref(struct btrfs_root *root, | |||
3195 | */ | 3166 | */ |
3196 | mutex_lock(&head->mutex); | 3167 | mutex_lock(&head->mutex); |
3197 | mutex_unlock(&head->mutex); | 3168 | mutex_unlock(&head->mutex); |
3198 | btrfs_put_delayed_ref(&head->node); | 3169 | btrfs_put_delayed_ref_head(head); |
3199 | return -EAGAIN; | 3170 | return -EAGAIN; |
3200 | } | 3171 | } |
3201 | spin_unlock(&delayed_refs->lock); | 3172 | spin_unlock(&delayed_refs->lock); |
3202 | 3173 | ||
3203 | spin_lock(&head->lock); | 3174 | spin_lock(&head->lock); |
3204 | list_for_each_entry(ref, &head->ref_list, list) { | 3175 | /* |
3176 | * XXX: We should replace this with a proper search function in the | ||
3177 | * future. | ||
3178 | */ | ||
3179 | for (node = rb_first(&head->ref_tree); node; node = rb_next(node)) { | ||
3180 | ref = rb_entry(node, struct btrfs_delayed_ref_node, ref_node); | ||
3205 | /* If it's a shared ref we know a cross reference exists */ | 3181 | /* If it's a shared ref we know a cross reference exists */ |
3206 | if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) { | 3182 | if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) { |
3207 | ret = 1; | 3183 | ret = 1; |
@@ -3351,7 +3327,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, | |||
3351 | int level; | 3327 | int level; |
3352 | int ret = 0; | 3328 | int ret = 0; |
3353 | int (*process_func)(struct btrfs_trans_handle *, | 3329 | int (*process_func)(struct btrfs_trans_handle *, |
3354 | struct btrfs_fs_info *, | 3330 | struct btrfs_root *, |
3355 | u64, u64, u64, u64, u64, u64); | 3331 | u64, u64, u64, u64, u64, u64); |
3356 | 3332 | ||
3357 | 3333 | ||
@@ -3391,7 +3367,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, | |||
3391 | 3367 | ||
3392 | num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi); | 3368 | num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi); |
3393 | key.offset -= btrfs_file_extent_offset(buf, fi); | 3369 | key.offset -= btrfs_file_extent_offset(buf, fi); |
3394 | ret = process_func(trans, fs_info, bytenr, num_bytes, | 3370 | ret = process_func(trans, root, bytenr, num_bytes, |
3395 | parent, ref_root, key.objectid, | 3371 | parent, ref_root, key.objectid, |
3396 | key.offset); | 3372 | key.offset); |
3397 | if (ret) | 3373 | if (ret) |
@@ -3399,7 +3375,7 @@ static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, | |||
3399 | } else { | 3375 | } else { |
3400 | bytenr = btrfs_node_blockptr(buf, i); | 3376 | bytenr = btrfs_node_blockptr(buf, i); |
3401 | num_bytes = fs_info->nodesize; | 3377 | num_bytes = fs_info->nodesize; |
3402 | ret = process_func(trans, fs_info, bytenr, num_bytes, | 3378 | ret = process_func(trans, root, bytenr, num_bytes, |
3403 | parent, ref_root, level - 1, 0); | 3379 | parent, ref_root, level - 1, 0); |
3404 | if (ret) | 3380 | if (ret) |
3405 | goto fail; | 3381 | goto fail; |
@@ -4843,7 +4819,6 @@ static inline u64 calc_reclaim_items_nr(struct btrfs_fs_info *fs_info, | |||
4843 | static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim, | 4819 | static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim, |
4844 | u64 orig, bool wait_ordered) | 4820 | u64 orig, bool wait_ordered) |
4845 | { | 4821 | { |
4846 | struct btrfs_block_rsv *block_rsv; | ||
4847 | struct btrfs_space_info *space_info; | 4822 | struct btrfs_space_info *space_info; |
4848 | struct btrfs_trans_handle *trans; | 4823 | struct btrfs_trans_handle *trans; |
4849 | u64 delalloc_bytes; | 4824 | u64 delalloc_bytes; |
@@ -4859,8 +4834,7 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info, u64 to_reclaim, | |||
4859 | to_reclaim = items * EXTENT_SIZE_PER_ITEM; | 4834 | to_reclaim = items * EXTENT_SIZE_PER_ITEM; |
4860 | 4835 | ||
4861 | trans = (struct btrfs_trans_handle *)current->journal_info; | 4836 | trans = (struct btrfs_trans_handle *)current->journal_info; |
4862 | block_rsv = &fs_info->delalloc_block_rsv; | 4837 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); |
4863 | space_info = block_rsv->space_info; | ||
4864 | 4838 | ||
4865 | delalloc_bytes = percpu_counter_sum_positive( | 4839 | delalloc_bytes = percpu_counter_sum_positive( |
4866 | &fs_info->delalloc_bytes); | 4840 | &fs_info->delalloc_bytes); |
@@ -4919,6 +4893,13 @@ skip_async: | |||
4919 | } | 4893 | } |
4920 | } | 4894 | } |
4921 | 4895 | ||
4896 | struct reserve_ticket { | ||
4897 | u64 bytes; | ||
4898 | int error; | ||
4899 | struct list_head list; | ||
4900 | wait_queue_head_t wait; | ||
4901 | }; | ||
4902 | |||
4922 | /** | 4903 | /** |
4923 | * maybe_commit_transaction - possibly commit the transaction if its ok to | 4904 | * maybe_commit_transaction - possibly commit the transaction if its ok to |
4924 | * @root - the root we're allocating for | 4905 | * @root - the root we're allocating for |
@@ -4930,18 +4911,29 @@ skip_async: | |||
4930 | * will return -ENOSPC. | 4911 | * will return -ENOSPC. |
4931 | */ | 4912 | */ |
4932 | static int may_commit_transaction(struct btrfs_fs_info *fs_info, | 4913 | static int may_commit_transaction(struct btrfs_fs_info *fs_info, |
4933 | struct btrfs_space_info *space_info, | 4914 | struct btrfs_space_info *space_info) |
4934 | u64 bytes, int force) | ||
4935 | { | 4915 | { |
4916 | struct reserve_ticket *ticket = NULL; | ||
4936 | struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv; | 4917 | struct btrfs_block_rsv *delayed_rsv = &fs_info->delayed_block_rsv; |
4937 | struct btrfs_trans_handle *trans; | 4918 | struct btrfs_trans_handle *trans; |
4919 | u64 bytes; | ||
4938 | 4920 | ||
4939 | trans = (struct btrfs_trans_handle *)current->journal_info; | 4921 | trans = (struct btrfs_trans_handle *)current->journal_info; |
4940 | if (trans) | 4922 | if (trans) |
4941 | return -EAGAIN; | 4923 | return -EAGAIN; |
4942 | 4924 | ||
4943 | if (force) | 4925 | spin_lock(&space_info->lock); |
4944 | goto commit; | 4926 | if (!list_empty(&space_info->priority_tickets)) |
4927 | ticket = list_first_entry(&space_info->priority_tickets, | ||
4928 | struct reserve_ticket, list); | ||
4929 | else if (!list_empty(&space_info->tickets)) | ||
4930 | ticket = list_first_entry(&space_info->tickets, | ||
4931 | struct reserve_ticket, list); | ||
4932 | bytes = (ticket) ? ticket->bytes : 0; | ||
4933 | spin_unlock(&space_info->lock); | ||
4934 | |||
4935 | if (!bytes) | ||
4936 | return 0; | ||
4945 | 4937 | ||
4946 | /* See if there is enough pinned space to make this reservation */ | 4938 | /* See if there is enough pinned space to make this reservation */ |
4947 | if (percpu_counter_compare(&space_info->total_bytes_pinned, | 4939 | if (percpu_counter_compare(&space_info->total_bytes_pinned, |
@@ -4956,8 +4948,12 @@ static int may_commit_transaction(struct btrfs_fs_info *fs_info, | |||
4956 | return -ENOSPC; | 4948 | return -ENOSPC; |
4957 | 4949 | ||
4958 | spin_lock(&delayed_rsv->lock); | 4950 | spin_lock(&delayed_rsv->lock); |
4951 | if (delayed_rsv->size > bytes) | ||
4952 | bytes = 0; | ||
4953 | else | ||
4954 | bytes -= delayed_rsv->size; | ||
4959 | if (percpu_counter_compare(&space_info->total_bytes_pinned, | 4955 | if (percpu_counter_compare(&space_info->total_bytes_pinned, |
4960 | bytes - delayed_rsv->size) < 0) { | 4956 | bytes) < 0) { |
4961 | spin_unlock(&delayed_rsv->lock); | 4957 | spin_unlock(&delayed_rsv->lock); |
4962 | return -ENOSPC; | 4958 | return -ENOSPC; |
4963 | } | 4959 | } |
@@ -4971,13 +4967,6 @@ commit: | |||
4971 | return btrfs_commit_transaction(trans); | 4967 | return btrfs_commit_transaction(trans); |
4972 | } | 4968 | } |
4973 | 4969 | ||
4974 | struct reserve_ticket { | ||
4975 | u64 bytes; | ||
4976 | int error; | ||
4977 | struct list_head list; | ||
4978 | wait_queue_head_t wait; | ||
4979 | }; | ||
4980 | |||
4981 | /* | 4970 | /* |
4982 | * Try to flush some data based on policy set by @state. This is only advisory | 4971 | * Try to flush some data based on policy set by @state. This is only advisory |
4983 | * and may fail for various reasons. The caller is supposed to examine the | 4972 | * and may fail for various reasons. The caller is supposed to examine the |
@@ -5027,8 +5016,7 @@ static void flush_space(struct btrfs_fs_info *fs_info, | |||
5027 | ret = 0; | 5016 | ret = 0; |
5028 | break; | 5017 | break; |
5029 | case COMMIT_TRANS: | 5018 | case COMMIT_TRANS: |
5030 | ret = may_commit_transaction(fs_info, space_info, | 5019 | ret = may_commit_transaction(fs_info, space_info); |
5031 | num_bytes, 0); | ||
5032 | break; | 5020 | break; |
5033 | default: | 5021 | default: |
5034 | ret = -ENOSPC; | 5022 | ret = -ENOSPC; |
@@ -5582,11 +5570,12 @@ again: | |||
5582 | } | 5570 | } |
5583 | } | 5571 | } |
5584 | 5572 | ||
5585 | static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, | 5573 | static u64 block_rsv_release_bytes(struct btrfs_fs_info *fs_info, |
5586 | struct btrfs_block_rsv *block_rsv, | 5574 | struct btrfs_block_rsv *block_rsv, |
5587 | struct btrfs_block_rsv *dest, u64 num_bytes) | 5575 | struct btrfs_block_rsv *dest, u64 num_bytes) |
5588 | { | 5576 | { |
5589 | struct btrfs_space_info *space_info = block_rsv->space_info; | 5577 | struct btrfs_space_info *space_info = block_rsv->space_info; |
5578 | u64 ret; | ||
5590 | 5579 | ||
5591 | spin_lock(&block_rsv->lock); | 5580 | spin_lock(&block_rsv->lock); |
5592 | if (num_bytes == (u64)-1) | 5581 | if (num_bytes == (u64)-1) |
@@ -5601,6 +5590,7 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, | |||
5601 | } | 5590 | } |
5602 | spin_unlock(&block_rsv->lock); | 5591 | spin_unlock(&block_rsv->lock); |
5603 | 5592 | ||
5593 | ret = num_bytes; | ||
5604 | if (num_bytes > 0) { | 5594 | if (num_bytes > 0) { |
5605 | if (dest) { | 5595 | if (dest) { |
5606 | spin_lock(&dest->lock); | 5596 | spin_lock(&dest->lock); |
@@ -5620,6 +5610,7 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info, | |||
5620 | space_info_add_old_bytes(fs_info, space_info, | 5610 | space_info_add_old_bytes(fs_info, space_info, |
5621 | num_bytes); | 5611 | num_bytes); |
5622 | } | 5612 | } |
5613 | return ret; | ||
5623 | } | 5614 | } |
5624 | 5615 | ||
5625 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src, | 5616 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src, |
@@ -5643,6 +5634,15 @@ void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv, unsigned short type) | |||
5643 | rsv->type = type; | 5634 | rsv->type = type; |
5644 | } | 5635 | } |
5645 | 5636 | ||
5637 | void btrfs_init_metadata_block_rsv(struct btrfs_fs_info *fs_info, | ||
5638 | struct btrfs_block_rsv *rsv, | ||
5639 | unsigned short type) | ||
5640 | { | ||
5641 | btrfs_init_block_rsv(rsv, type); | ||
5642 | rsv->space_info = __find_space_info(fs_info, | ||
5643 | BTRFS_BLOCK_GROUP_METADATA); | ||
5644 | } | ||
5645 | |||
5646 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info, | 5646 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info, |
5647 | unsigned short type) | 5647 | unsigned short type) |
5648 | { | 5648 | { |
@@ -5652,9 +5652,7 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_fs_info *fs_info, | |||
5652 | if (!block_rsv) | 5652 | if (!block_rsv) |
5653 | return NULL; | 5653 | return NULL; |
5654 | 5654 | ||
5655 | btrfs_init_block_rsv(block_rsv, type); | 5655 | btrfs_init_metadata_block_rsv(fs_info, block_rsv, type); |
5656 | block_rsv->space_info = __find_space_info(fs_info, | ||
5657 | BTRFS_BLOCK_GROUP_METADATA); | ||
5658 | return block_rsv; | 5656 | return block_rsv; |
5659 | } | 5657 | } |
5660 | 5658 | ||
@@ -5737,6 +5735,66 @@ int btrfs_block_rsv_refill(struct btrfs_root *root, | |||
5737 | return ret; | 5735 | return ret; |
5738 | } | 5736 | } |
5739 | 5737 | ||
5738 | /** | ||
5739 | * btrfs_inode_rsv_refill - refill the inode block rsv. | ||
5740 | * @inode - the inode we are refilling. | ||
5741 | * @flush - the flusing restriction. | ||
5742 | * | ||
5743 | * Essentially the same as btrfs_block_rsv_refill, except it uses the | ||
5744 | * block_rsv->size as the minimum size. We'll either refill the missing amount | ||
5745 | * or return if we already have enough space. This will also handle the resreve | ||
5746 | * tracepoint for the reserved amount. | ||
5747 | */ | ||
5748 | int btrfs_inode_rsv_refill(struct btrfs_inode *inode, | ||
5749 | enum btrfs_reserve_flush_enum flush) | ||
5750 | { | ||
5751 | struct btrfs_root *root = inode->root; | ||
5752 | struct btrfs_block_rsv *block_rsv = &inode->block_rsv; | ||
5753 | u64 num_bytes = 0; | ||
5754 | int ret = -ENOSPC; | ||
5755 | |||
5756 | spin_lock(&block_rsv->lock); | ||
5757 | if (block_rsv->reserved < block_rsv->size) | ||
5758 | num_bytes = block_rsv->size - block_rsv->reserved; | ||
5759 | spin_unlock(&block_rsv->lock); | ||
5760 | |||
5761 | if (num_bytes == 0) | ||
5762 | return 0; | ||
5763 | |||
5764 | ret = reserve_metadata_bytes(root, block_rsv, num_bytes, flush); | ||
5765 | if (!ret) { | ||
5766 | block_rsv_add_bytes(block_rsv, num_bytes, 0); | ||
5767 | trace_btrfs_space_reservation(root->fs_info, "delalloc", | ||
5768 | btrfs_ino(inode), num_bytes, 1); | ||
5769 | } | ||
5770 | return ret; | ||
5771 | } | ||
5772 | |||
5773 | /** | ||
5774 | * btrfs_inode_rsv_release - release any excessive reservation. | ||
5775 | * @inode - the inode we need to release from. | ||
5776 | * | ||
5777 | * This is the same as btrfs_block_rsv_release, except that it handles the | ||
5778 | * tracepoint for the reservation. | ||
5779 | */ | ||
5780 | void btrfs_inode_rsv_release(struct btrfs_inode *inode) | ||
5781 | { | ||
5782 | struct btrfs_fs_info *fs_info = inode->root->fs_info; | ||
5783 | struct btrfs_block_rsv *global_rsv = &fs_info->global_block_rsv; | ||
5784 | struct btrfs_block_rsv *block_rsv = &inode->block_rsv; | ||
5785 | u64 released = 0; | ||
5786 | |||
5787 | /* | ||
5788 | * Since we statically set the block_rsv->size we just want to say we | ||
5789 | * are releasing 0 bytes, and then we'll just get the reservation over | ||
5790 | * the size free'd. | ||
5791 | */ | ||
5792 | released = block_rsv_release_bytes(fs_info, block_rsv, global_rsv, 0); | ||
5793 | if (released > 0) | ||
5794 | trace_btrfs_space_reservation(fs_info, "delalloc", | ||
5795 | btrfs_ino(inode), released, 0); | ||
5796 | } | ||
5797 | |||
5740 | void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, | 5798 | void btrfs_block_rsv_release(struct btrfs_fs_info *fs_info, |
5741 | struct btrfs_block_rsv *block_rsv, | 5799 | struct btrfs_block_rsv *block_rsv, |
5742 | u64 num_bytes) | 5800 | u64 num_bytes) |
@@ -5808,7 +5866,6 @@ static void init_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
5808 | 5866 | ||
5809 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | 5867 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); |
5810 | fs_info->global_block_rsv.space_info = space_info; | 5868 | fs_info->global_block_rsv.space_info = space_info; |
5811 | fs_info->delalloc_block_rsv.space_info = space_info; | ||
5812 | fs_info->trans_block_rsv.space_info = space_info; | 5869 | fs_info->trans_block_rsv.space_info = space_info; |
5813 | fs_info->empty_block_rsv.space_info = space_info; | 5870 | fs_info->empty_block_rsv.space_info = space_info; |
5814 | fs_info->delayed_block_rsv.space_info = space_info; | 5871 | fs_info->delayed_block_rsv.space_info = space_info; |
@@ -5828,8 +5885,6 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
5828 | { | 5885 | { |
5829 | block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL, | 5886 | block_rsv_release_bytes(fs_info, &fs_info->global_block_rsv, NULL, |
5830 | (u64)-1); | 5887 | (u64)-1); |
5831 | WARN_ON(fs_info->delalloc_block_rsv.size > 0); | ||
5832 | WARN_ON(fs_info->delalloc_block_rsv.reserved > 0); | ||
5833 | WARN_ON(fs_info->trans_block_rsv.size > 0); | 5888 | WARN_ON(fs_info->trans_block_rsv.size > 0); |
5834 | WARN_ON(fs_info->trans_block_rsv.reserved > 0); | 5889 | WARN_ON(fs_info->trans_block_rsv.reserved > 0); |
5835 | WARN_ON(fs_info->chunk_block_rsv.size > 0); | 5890 | WARN_ON(fs_info->chunk_block_rsv.size > 0); |
@@ -5841,12 +5896,15 @@ static void release_global_block_rsv(struct btrfs_fs_info *fs_info) | |||
5841 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | 5896 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, |
5842 | struct btrfs_fs_info *fs_info) | 5897 | struct btrfs_fs_info *fs_info) |
5843 | { | 5898 | { |
5844 | if (!trans->block_rsv) | 5899 | if (!trans->block_rsv) { |
5900 | ASSERT(!trans->bytes_reserved); | ||
5845 | return; | 5901 | return; |
5902 | } | ||
5846 | 5903 | ||
5847 | if (!trans->bytes_reserved) | 5904 | if (!trans->bytes_reserved) |
5848 | return; | 5905 | return; |
5849 | 5906 | ||
5907 | ASSERT(trans->block_rsv == &fs_info->trans_block_rsv); | ||
5850 | trace_btrfs_space_reservation(fs_info, "transaction", | 5908 | trace_btrfs_space_reservation(fs_info, "transaction", |
5851 | trans->transid, trans->bytes_reserved, 0); | 5909 | trans->transid, trans->bytes_reserved, 0); |
5852 | btrfs_block_rsv_release(fs_info, trans->block_rsv, | 5910 | btrfs_block_rsv_release(fs_info, trans->block_rsv, |
@@ -5968,104 +6026,37 @@ void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info, | |||
5968 | btrfs_block_rsv_release(fs_info, rsv, (u64)-1); | 6026 | btrfs_block_rsv_release(fs_info, rsv, (u64)-1); |
5969 | } | 6027 | } |
5970 | 6028 | ||
5971 | /** | 6029 | static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, |
5972 | * drop_outstanding_extent - drop an outstanding extent | 6030 | struct btrfs_inode *inode) |
5973 | * @inode: the inode we're dropping the extent for | ||
5974 | * @num_bytes: the number of bytes we're releasing. | ||
5975 | * | ||
5976 | * This is called when we are freeing up an outstanding extent, either called | ||
5977 | * after an error or after an extent is written. This will return the number of | ||
5978 | * reserved extents that need to be freed. This must be called with | ||
5979 | * BTRFS_I(inode)->lock held. | ||
5980 | */ | ||
5981 | static unsigned drop_outstanding_extent(struct btrfs_inode *inode, | ||
5982 | u64 num_bytes) | ||
5983 | { | ||
5984 | unsigned drop_inode_space = 0; | ||
5985 | unsigned dropped_extents = 0; | ||
5986 | unsigned num_extents; | ||
5987 | |||
5988 | num_extents = count_max_extents(num_bytes); | ||
5989 | ASSERT(num_extents); | ||
5990 | ASSERT(inode->outstanding_extents >= num_extents); | ||
5991 | inode->outstanding_extents -= num_extents; | ||
5992 | |||
5993 | if (inode->outstanding_extents == 0 && | ||
5994 | test_and_clear_bit(BTRFS_INODE_DELALLOC_META_RESERVED, | ||
5995 | &inode->runtime_flags)) | ||
5996 | drop_inode_space = 1; | ||
5997 | |||
5998 | /* | ||
5999 | * If we have more or the same amount of outstanding extents than we have | ||
6000 | * reserved then we need to leave the reserved extents count alone. | ||
6001 | */ | ||
6002 | if (inode->outstanding_extents >= inode->reserved_extents) | ||
6003 | return drop_inode_space; | ||
6004 | |||
6005 | dropped_extents = inode->reserved_extents - inode->outstanding_extents; | ||
6006 | inode->reserved_extents -= dropped_extents; | ||
6007 | return dropped_extents + drop_inode_space; | ||
6008 | } | ||
6009 | |||
6010 | /** | ||
6011 | * calc_csum_metadata_size - return the amount of metadata space that must be | ||
6012 | * reserved/freed for the given bytes. | ||
6013 | * @inode: the inode we're manipulating | ||
6014 | * @num_bytes: the number of bytes in question | ||
6015 | * @reserve: 1 if we are reserving space, 0 if we are freeing space | ||
6016 | * | ||
6017 | * This adjusts the number of csum_bytes in the inode and then returns the | ||
6018 | * correct amount of metadata that must either be reserved or freed. We | ||
6019 | * calculate how many checksums we can fit into one leaf and then divide the | ||
6020 | * number of bytes that will need to be checksumed by this value to figure out | ||
6021 | * how many checksums will be required. If we are adding bytes then the number | ||
6022 | * may go up and we will return the number of additional bytes that must be | ||
6023 | * reserved. If it is going down we will return the number of bytes that must | ||
6024 | * be freed. | ||
6025 | * | ||
6026 | * This must be called with BTRFS_I(inode)->lock held. | ||
6027 | */ | ||
6028 | static u64 calc_csum_metadata_size(struct btrfs_inode *inode, u64 num_bytes, | ||
6029 | int reserve) | ||
6030 | { | 6031 | { |
6031 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); | 6032 | struct btrfs_block_rsv *block_rsv = &inode->block_rsv; |
6032 | u64 old_csums, num_csums; | 6033 | u64 reserve_size = 0; |
6033 | 6034 | u64 csum_leaves; | |
6034 | if (inode->flags & BTRFS_INODE_NODATASUM && inode->csum_bytes == 0) | 6035 | unsigned outstanding_extents; |
6035 | return 0; | ||
6036 | |||
6037 | old_csums = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes); | ||
6038 | if (reserve) | ||
6039 | inode->csum_bytes += num_bytes; | ||
6040 | else | ||
6041 | inode->csum_bytes -= num_bytes; | ||
6042 | num_csums = btrfs_csum_bytes_to_leaves(fs_info, inode->csum_bytes); | ||
6043 | |||
6044 | /* No change, no need to reserve more */ | ||
6045 | if (old_csums == num_csums) | ||
6046 | return 0; | ||
6047 | 6036 | ||
6048 | if (reserve) | 6037 | lockdep_assert_held(&inode->lock); |
6049 | return btrfs_calc_trans_metadata_size(fs_info, | 6038 | outstanding_extents = inode->outstanding_extents; |
6050 | num_csums - old_csums); | 6039 | if (outstanding_extents) |
6040 | reserve_size = btrfs_calc_trans_metadata_size(fs_info, | ||
6041 | outstanding_extents + 1); | ||
6042 | csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, | ||
6043 | inode->csum_bytes); | ||
6044 | reserve_size += btrfs_calc_trans_metadata_size(fs_info, | ||
6045 | csum_leaves); | ||
6051 | 6046 | ||
6052 | return btrfs_calc_trans_metadata_size(fs_info, old_csums - num_csums); | 6047 | spin_lock(&block_rsv->lock); |
6048 | block_rsv->size = reserve_size; | ||
6049 | spin_unlock(&block_rsv->lock); | ||
6053 | } | 6050 | } |
6054 | 6051 | ||
6055 | int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) | 6052 | int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) |
6056 | { | 6053 | { |
6057 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); | 6054 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); |
6058 | struct btrfs_root *root = inode->root; | 6055 | struct btrfs_root *root = inode->root; |
6059 | struct btrfs_block_rsv *block_rsv = &fs_info->delalloc_block_rsv; | ||
6060 | u64 to_reserve = 0; | ||
6061 | u64 csum_bytes; | ||
6062 | unsigned nr_extents; | 6056 | unsigned nr_extents; |
6063 | enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; | 6057 | enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; |
6064 | int ret = 0; | 6058 | int ret = 0; |
6065 | bool delalloc_lock = true; | 6059 | bool delalloc_lock = true; |
6066 | u64 to_free = 0; | ||
6067 | unsigned dropped; | ||
6068 | bool release_extra = false; | ||
6069 | 6060 | ||
6070 | /* If we are a free space inode we need to not flush since we will be in | 6061 | /* If we are a free space inode we need to not flush since we will be in |
6071 | * the middle of a transaction commit. We also don't need the delalloc | 6062 | * the middle of a transaction commit. We also don't need the delalloc |
@@ -6091,19 +6082,12 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) | |||
6091 | 6082 | ||
6092 | num_bytes = ALIGN(num_bytes, fs_info->sectorsize); | 6083 | num_bytes = ALIGN(num_bytes, fs_info->sectorsize); |
6093 | 6084 | ||
6085 | /* Add our new extents and calculate the new rsv size. */ | ||
6094 | spin_lock(&inode->lock); | 6086 | spin_lock(&inode->lock); |
6095 | nr_extents = count_max_extents(num_bytes); | 6087 | nr_extents = count_max_extents(num_bytes); |
6096 | inode->outstanding_extents += nr_extents; | 6088 | btrfs_mod_outstanding_extents(inode, nr_extents); |
6097 | 6089 | inode->csum_bytes += num_bytes; | |
6098 | nr_extents = 0; | 6090 | btrfs_calculate_inode_block_rsv_size(fs_info, inode); |
6099 | if (inode->outstanding_extents > inode->reserved_extents) | ||
6100 | nr_extents += inode->outstanding_extents - | ||
6101 | inode->reserved_extents; | ||
6102 | |||
6103 | /* We always want to reserve a slot for updating the inode. */ | ||
6104 | to_reserve = btrfs_calc_trans_metadata_size(fs_info, nr_extents + 1); | ||
6105 | to_reserve += calc_csum_metadata_size(inode, num_bytes, 1); | ||
6106 | csum_bytes = inode->csum_bytes; | ||
6107 | spin_unlock(&inode->lock); | 6091 | spin_unlock(&inode->lock); |
6108 | 6092 | ||
6109 | if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { | 6093 | if (test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) { |
@@ -6113,92 +6097,26 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) | |||
6113 | goto out_fail; | 6097 | goto out_fail; |
6114 | } | 6098 | } |
6115 | 6099 | ||
6116 | ret = btrfs_block_rsv_add(root, block_rsv, to_reserve, flush); | 6100 | ret = btrfs_inode_rsv_refill(inode, flush); |
6117 | if (unlikely(ret)) { | 6101 | if (unlikely(ret)) { |
6118 | btrfs_qgroup_free_meta(root, | 6102 | btrfs_qgroup_free_meta(root, |
6119 | nr_extents * fs_info->nodesize); | 6103 | nr_extents * fs_info->nodesize); |
6120 | goto out_fail; | 6104 | goto out_fail; |
6121 | } | 6105 | } |
6122 | 6106 | ||
6123 | spin_lock(&inode->lock); | ||
6124 | if (test_and_set_bit(BTRFS_INODE_DELALLOC_META_RESERVED, | ||
6125 | &inode->runtime_flags)) { | ||
6126 | to_reserve -= btrfs_calc_trans_metadata_size(fs_info, 1); | ||
6127 | release_extra = true; | ||
6128 | } | ||
6129 | inode->reserved_extents += nr_extents; | ||
6130 | spin_unlock(&inode->lock); | ||
6131 | |||
6132 | if (delalloc_lock) | 6107 | if (delalloc_lock) |
6133 | mutex_unlock(&inode->delalloc_mutex); | 6108 | mutex_unlock(&inode->delalloc_mutex); |
6134 | |||
6135 | if (to_reserve) | ||
6136 | trace_btrfs_space_reservation(fs_info, "delalloc", | ||
6137 | btrfs_ino(inode), to_reserve, 1); | ||
6138 | if (release_extra) | ||
6139 | btrfs_block_rsv_release(fs_info, block_rsv, | ||
6140 | btrfs_calc_trans_metadata_size(fs_info, 1)); | ||
6141 | return 0; | 6109 | return 0; |
6142 | 6110 | ||
6143 | out_fail: | 6111 | out_fail: |
6144 | spin_lock(&inode->lock); | 6112 | spin_lock(&inode->lock); |
6145 | dropped = drop_outstanding_extent(inode, num_bytes); | 6113 | nr_extents = count_max_extents(num_bytes); |
6146 | /* | 6114 | btrfs_mod_outstanding_extents(inode, -nr_extents); |
6147 | * If the inodes csum_bytes is the same as the original | 6115 | inode->csum_bytes -= num_bytes; |
6148 | * csum_bytes then we know we haven't raced with any free()ers | 6116 | btrfs_calculate_inode_block_rsv_size(fs_info, inode); |
6149 | * so we can just reduce our inodes csum bytes and carry on. | ||
6150 | */ | ||
6151 | if (inode->csum_bytes == csum_bytes) { | ||
6152 | calc_csum_metadata_size(inode, num_bytes, 0); | ||
6153 | } else { | ||
6154 | u64 orig_csum_bytes = inode->csum_bytes; | ||
6155 | u64 bytes; | ||
6156 | |||
6157 | /* | ||
6158 | * This is tricky, but first we need to figure out how much we | ||
6159 | * freed from any free-ers that occurred during this | ||
6160 | * reservation, so we reset ->csum_bytes to the csum_bytes | ||
6161 | * before we dropped our lock, and then call the free for the | ||
6162 | * number of bytes that were freed while we were trying our | ||
6163 | * reservation. | ||
6164 | */ | ||
6165 | bytes = csum_bytes - inode->csum_bytes; | ||
6166 | inode->csum_bytes = csum_bytes; | ||
6167 | to_free = calc_csum_metadata_size(inode, bytes, 0); | ||
6168 | |||
6169 | |||
6170 | /* | ||
6171 | * Now we need to see how much we would have freed had we not | ||
6172 | * been making this reservation and our ->csum_bytes were not | ||
6173 | * artificially inflated. | ||
6174 | */ | ||
6175 | inode->csum_bytes = csum_bytes - num_bytes; | ||
6176 | bytes = csum_bytes - orig_csum_bytes; | ||
6177 | bytes = calc_csum_metadata_size(inode, bytes, 0); | ||
6178 | |||
6179 | /* | ||
6180 | * Now reset ->csum_bytes to what it should be. If bytes is | ||
6181 | * more than to_free then we would have freed more space had we | ||
6182 | * not had an artificially high ->csum_bytes, so we need to free | ||
6183 | * the remainder. If bytes is the same or less then we don't | ||
6184 | * need to do anything, the other free-ers did the correct | ||
6185 | * thing. | ||
6186 | */ | ||
6187 | inode->csum_bytes = orig_csum_bytes - num_bytes; | ||
6188 | if (bytes > to_free) | ||
6189 | to_free = bytes - to_free; | ||
6190 | else | ||
6191 | to_free = 0; | ||
6192 | } | ||
6193 | spin_unlock(&inode->lock); | 6117 | spin_unlock(&inode->lock); |
6194 | if (dropped) | ||
6195 | to_free += btrfs_calc_trans_metadata_size(fs_info, dropped); | ||
6196 | 6118 | ||
6197 | if (to_free) { | 6119 | btrfs_inode_rsv_release(inode); |
6198 | btrfs_block_rsv_release(fs_info, block_rsv, to_free); | ||
6199 | trace_btrfs_space_reservation(fs_info, "delalloc", | ||
6200 | btrfs_ino(inode), to_free, 0); | ||
6201 | } | ||
6202 | if (delalloc_lock) | 6120 | if (delalloc_lock) |
6203 | mutex_unlock(&inode->delalloc_mutex); | 6121 | mutex_unlock(&inode->delalloc_mutex); |
6204 | return ret; | 6122 | return ret; |
@@ -6206,36 +6124,55 @@ out_fail: | |||
6206 | 6124 | ||
6207 | /** | 6125 | /** |
6208 | * btrfs_delalloc_release_metadata - release a metadata reservation for an inode | 6126 | * btrfs_delalloc_release_metadata - release a metadata reservation for an inode |
6209 | * @inode: the inode to release the reservation for | 6127 | * @inode: the inode to release the reservation for. |
6210 | * @num_bytes: the number of bytes we're releasing | 6128 | * @num_bytes: the number of bytes we are releasing. |
6211 | * | 6129 | * |
6212 | * This will release the metadata reservation for an inode. This can be called | 6130 | * This will release the metadata reservation for an inode. This can be called |
6213 | * once we complete IO for a given set of bytes to release their metadata | 6131 | * once we complete IO for a given set of bytes to release their metadata |
6214 | * reservations. | 6132 | * reservations, or on error for the same reason. |
6215 | */ | 6133 | */ |
6216 | void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes) | 6134 | void btrfs_delalloc_release_metadata(struct btrfs_inode *inode, u64 num_bytes) |
6217 | { | 6135 | { |
6218 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); | 6136 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); |
6219 | u64 to_free = 0; | ||
6220 | unsigned dropped; | ||
6221 | 6137 | ||
6222 | num_bytes = ALIGN(num_bytes, fs_info->sectorsize); | 6138 | num_bytes = ALIGN(num_bytes, fs_info->sectorsize); |
6223 | spin_lock(&inode->lock); | 6139 | spin_lock(&inode->lock); |
6224 | dropped = drop_outstanding_extent(inode, num_bytes); | 6140 | inode->csum_bytes -= num_bytes; |
6225 | 6141 | btrfs_calculate_inode_block_rsv_size(fs_info, inode); | |
6226 | if (num_bytes) | ||
6227 | to_free = calc_csum_metadata_size(inode, num_bytes, 0); | ||
6228 | spin_unlock(&inode->lock); | 6142 | spin_unlock(&inode->lock); |
6229 | if (dropped > 0) | ||
6230 | to_free += btrfs_calc_trans_metadata_size(fs_info, dropped); | ||
6231 | 6143 | ||
6232 | if (btrfs_is_testing(fs_info)) | 6144 | if (btrfs_is_testing(fs_info)) |
6233 | return; | 6145 | return; |
6234 | 6146 | ||
6235 | trace_btrfs_space_reservation(fs_info, "delalloc", btrfs_ino(inode), | 6147 | btrfs_inode_rsv_release(inode); |
6236 | to_free, 0); | 6148 | } |
6149 | |||
6150 | /** | ||
6151 | * btrfs_delalloc_release_extents - release our outstanding_extents | ||
6152 | * @inode: the inode to balance the reservation for. | ||
6153 | * @num_bytes: the number of bytes we originally reserved with | ||
6154 | * | ||
6155 | * When we reserve space we increase outstanding_extents for the extents we may | ||
6156 | * add. Once we've set the range as delalloc or created our ordered extents we | ||
6157 | * have outstanding_extents to track the real usage, so we use this to free our | ||
6158 | * temporarily tracked outstanding_extents. This _must_ be used in conjunction | ||
6159 | * with btrfs_delalloc_reserve_metadata. | ||
6160 | */ | ||
6161 | void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes) | ||
6162 | { | ||
6163 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); | ||
6164 | unsigned num_extents; | ||
6165 | |||
6166 | spin_lock(&inode->lock); | ||
6167 | num_extents = count_max_extents(num_bytes); | ||
6168 | btrfs_mod_outstanding_extents(inode, -num_extents); | ||
6169 | btrfs_calculate_inode_block_rsv_size(fs_info, inode); | ||
6170 | spin_unlock(&inode->lock); | ||
6171 | |||
6172 | if (btrfs_is_testing(fs_info)) | ||
6173 | return; | ||
6237 | 6174 | ||
6238 | btrfs_block_rsv_release(fs_info, &fs_info->delalloc_block_rsv, to_free); | 6175 | btrfs_inode_rsv_release(inode); |
6239 | } | 6176 | } |
6240 | 6177 | ||
6241 | /** | 6178 | /** |
@@ -6282,10 +6219,7 @@ int btrfs_delalloc_reserve_space(struct inode *inode, | |||
6282 | * @inode: inode we're releasing space for | 6219 | * @inode: inode we're releasing space for |
6283 | * @start: start position of the space already reserved | 6220 | * @start: start position of the space already reserved |
6284 | * @len: the len of the space already reserved | 6221 | * @len: the len of the space already reserved |
6285 | * | 6222 | * @release_bytes: the len of the space we consumed or didn't use |
6286 | * This must be matched with a call to btrfs_delalloc_reserve_space. This is | ||
6287 | * called in the case that we don't need the metadata AND data reservations | ||
6288 | * anymore. So if there is an error or we insert an inline extent. | ||
6289 | * | 6223 | * |
6290 | * This function will release the metadata space that was not used and will | 6224 | * This function will release the metadata space that was not used and will |
6291 | * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes | 6225 | * decrement ->delalloc_bytes and remove it from the fs_info delalloc_inodes |
@@ -6293,7 +6227,8 @@ int btrfs_delalloc_reserve_space(struct inode *inode, | |||
6293 | * Also it will handle the qgroup reserved space. | 6227 | * Also it will handle the qgroup reserved space. |
6294 | */ | 6228 | */ |
6295 | void btrfs_delalloc_release_space(struct inode *inode, | 6229 | void btrfs_delalloc_release_space(struct inode *inode, |
6296 | struct extent_changeset *reserved, u64 start, u64 len) | 6230 | struct extent_changeset *reserved, |
6231 | u64 start, u64 len) | ||
6297 | { | 6232 | { |
6298 | btrfs_delalloc_release_metadata(BTRFS_I(inode), len); | 6233 | btrfs_delalloc_release_metadata(BTRFS_I(inode), len); |
6299 | btrfs_free_reserved_data_space(inode, reserved, start, len); | 6234 | btrfs_free_reserved_data_space(inode, reserved, start, len); |
@@ -6958,7 +6893,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
6958 | BUG_ON(!is_data && refs_to_drop != 1); | 6893 | BUG_ON(!is_data && refs_to_drop != 1); |
6959 | 6894 | ||
6960 | if (is_data) | 6895 | if (is_data) |
6961 | skinny_metadata = 0; | 6896 | skinny_metadata = false; |
6962 | 6897 | ||
6963 | ret = lookup_extent_backref(trans, info, path, &iref, | 6898 | ret = lookup_extent_backref(trans, info, path, &iref, |
6964 | bytenr, num_bytes, parent, | 6899 | bytenr, num_bytes, parent, |
@@ -7213,7 +7148,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
7213 | goto out_delayed_unlock; | 7148 | goto out_delayed_unlock; |
7214 | 7149 | ||
7215 | spin_lock(&head->lock); | 7150 | spin_lock(&head->lock); |
7216 | if (!list_empty(&head->ref_list)) | 7151 | if (!RB_EMPTY_ROOT(&head->ref_tree)) |
7217 | goto out; | 7152 | goto out; |
7218 | 7153 | ||
7219 | if (head->extent_op) { | 7154 | if (head->extent_op) { |
@@ -7234,9 +7169,8 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
7234 | * at this point we have a head with no other entries. Go | 7169 | * at this point we have a head with no other entries. Go |
7235 | * ahead and process it. | 7170 | * ahead and process it. |
7236 | */ | 7171 | */ |
7237 | head->node.in_tree = 0; | ||
7238 | rb_erase(&head->href_node, &delayed_refs->href_root); | 7172 | rb_erase(&head->href_node, &delayed_refs->href_root); |
7239 | 7173 | RB_CLEAR_NODE(&head->href_node); | |
7240 | atomic_dec(&delayed_refs->num_entries); | 7174 | atomic_dec(&delayed_refs->num_entries); |
7241 | 7175 | ||
7242 | /* | 7176 | /* |
@@ -7255,7 +7189,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
7255 | ret = 1; | 7189 | ret = 1; |
7256 | 7190 | ||
7257 | mutex_unlock(&head->mutex); | 7191 | mutex_unlock(&head->mutex); |
7258 | btrfs_put_delayed_ref(&head->node); | 7192 | btrfs_put_delayed_ref_head(head); |
7259 | return ret; | 7193 | return ret; |
7260 | out: | 7194 | out: |
7261 | spin_unlock(&head->lock); | 7195 | spin_unlock(&head->lock); |
@@ -7277,6 +7211,10 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | |||
7277 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | 7211 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { |
7278 | int old_ref_mod, new_ref_mod; | 7212 | int old_ref_mod, new_ref_mod; |
7279 | 7213 | ||
7214 | btrfs_ref_tree_mod(root, buf->start, buf->len, parent, | ||
7215 | root->root_key.objectid, | ||
7216 | btrfs_header_level(buf), 0, | ||
7217 | BTRFS_DROP_DELAYED_REF); | ||
7280 | ret = btrfs_add_delayed_tree_ref(fs_info, trans, buf->start, | 7218 | ret = btrfs_add_delayed_tree_ref(fs_info, trans, buf->start, |
7281 | buf->len, parent, | 7219 | buf->len, parent, |
7282 | root->root_key.objectid, | 7220 | root->root_key.objectid, |
@@ -7329,16 +7267,21 @@ out: | |||
7329 | 7267 | ||
7330 | /* Can return -ENOMEM */ | 7268 | /* Can return -ENOMEM */ |
7331 | int btrfs_free_extent(struct btrfs_trans_handle *trans, | 7269 | int btrfs_free_extent(struct btrfs_trans_handle *trans, |
7332 | struct btrfs_fs_info *fs_info, | 7270 | struct btrfs_root *root, |
7333 | u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, | 7271 | u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, |
7334 | u64 owner, u64 offset) | 7272 | u64 owner, u64 offset) |
7335 | { | 7273 | { |
7274 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
7336 | int old_ref_mod, new_ref_mod; | 7275 | int old_ref_mod, new_ref_mod; |
7337 | int ret; | 7276 | int ret; |
7338 | 7277 | ||
7339 | if (btrfs_is_testing(fs_info)) | 7278 | if (btrfs_is_testing(fs_info)) |
7340 | return 0; | 7279 | return 0; |
7341 | 7280 | ||
7281 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) | ||
7282 | btrfs_ref_tree_mod(root, bytenr, num_bytes, parent, | ||
7283 | root_objectid, owner, offset, | ||
7284 | BTRFS_DROP_DELAYED_REF); | ||
7342 | 7285 | ||
7343 | /* | 7286 | /* |
7344 | * tree log blocks never actually go into the extent allocation | 7287 | * tree log blocks never actually go into the extent allocation |
@@ -8306,17 +8249,22 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
8306 | } | 8249 | } |
8307 | 8250 | ||
8308 | int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | 8251 | int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, |
8309 | u64 root_objectid, u64 owner, | 8252 | struct btrfs_root *root, u64 owner, |
8310 | u64 offset, u64 ram_bytes, | 8253 | u64 offset, u64 ram_bytes, |
8311 | struct btrfs_key *ins) | 8254 | struct btrfs_key *ins) |
8312 | { | 8255 | { |
8313 | struct btrfs_fs_info *fs_info = trans->fs_info; | 8256 | struct btrfs_fs_info *fs_info = root->fs_info; |
8314 | int ret; | 8257 | int ret; |
8315 | 8258 | ||
8316 | BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID); | 8259 | BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); |
8260 | |||
8261 | btrfs_ref_tree_mod(root, ins->objectid, ins->offset, 0, | ||
8262 | root->root_key.objectid, owner, offset, | ||
8263 | BTRFS_ADD_DELAYED_EXTENT); | ||
8317 | 8264 | ||
8318 | ret = btrfs_add_delayed_data_ref(fs_info, trans, ins->objectid, | 8265 | ret = btrfs_add_delayed_data_ref(fs_info, trans, ins->objectid, |
8319 | ins->offset, 0, root_objectid, owner, | 8266 | ins->offset, 0, |
8267 | root->root_key.objectid, owner, | ||
8320 | offset, ram_bytes, | 8268 | offset, ram_bytes, |
8321 | BTRFS_ADD_DELAYED_EXTENT, NULL, NULL); | 8269 | BTRFS_ADD_DELAYED_EXTENT, NULL, NULL); |
8322 | return ret; | 8270 | return ret; |
@@ -8538,6 +8486,9 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, | |||
8538 | extent_op->is_data = false; | 8486 | extent_op->is_data = false; |
8539 | extent_op->level = level; | 8487 | extent_op->level = level; |
8540 | 8488 | ||
8489 | btrfs_ref_tree_mod(root, ins.objectid, ins.offset, parent, | ||
8490 | root_objectid, level, 0, | ||
8491 | BTRFS_ADD_DELAYED_EXTENT); | ||
8541 | ret = btrfs_add_delayed_tree_ref(fs_info, trans, ins.objectid, | 8492 | ret = btrfs_add_delayed_tree_ref(fs_info, trans, ins.objectid, |
8542 | ins.offset, parent, | 8493 | ins.offset, parent, |
8543 | root_objectid, level, | 8494 | root_objectid, level, |
@@ -8894,7 +8845,7 @@ skip: | |||
8894 | ret); | 8845 | ret); |
8895 | } | 8846 | } |
8896 | } | 8847 | } |
8897 | ret = btrfs_free_extent(trans, fs_info, bytenr, blocksize, | 8848 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, |
8898 | parent, root->root_key.objectid, | 8849 | parent, root->root_key.objectid, |
8899 | level - 1, 0); | 8850 | level - 1, 0); |
8900 | if (ret) | 8851 | if (ret) |
@@ -9311,7 +9262,7 @@ out: | |||
9311 | * don't have it in the radix (like when we recover after a power fail | 9262 | * don't have it in the radix (like when we recover after a power fail |
9312 | * or unmount) so we don't leak memory. | 9263 | * or unmount) so we don't leak memory. |
9313 | */ | 9264 | */ |
9314 | if (!for_reloc && root_dropped == false) | 9265 | if (!for_reloc && !root_dropped) |
9315 | btrfs_add_dead_root(root); | 9266 | btrfs_add_dead_root(root); |
9316 | if (err && err != -EAGAIN) | 9267 | if (err && err != -EAGAIN) |
9317 | btrfs_handle_fs_error(fs_info, err, NULL); | 9268 | btrfs_handle_fs_error(fs_info, err, NULL); |
@@ -9968,9 +9919,9 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
9968 | return 0; | 9919 | return 0; |
9969 | } | 9920 | } |
9970 | 9921 | ||
9971 | static void __link_block_group(struct btrfs_space_info *space_info, | 9922 | static void link_block_group(struct btrfs_block_group_cache *cache) |
9972 | struct btrfs_block_group_cache *cache) | ||
9973 | { | 9923 | { |
9924 | struct btrfs_space_info *space_info = cache->space_info; | ||
9974 | int index = get_block_group_index(cache); | 9925 | int index = get_block_group_index(cache); |
9975 | bool first = false; | 9926 | bool first = false; |
9976 | 9927 | ||
@@ -10178,7 +10129,7 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info) | |||
10178 | 10129 | ||
10179 | cache->space_info = space_info; | 10130 | cache->space_info = space_info; |
10180 | 10131 | ||
10181 | __link_block_group(space_info, cache); | 10132 | link_block_group(cache); |
10182 | 10133 | ||
10183 | set_avail_alloc_bits(info, cache->flags); | 10134 | set_avail_alloc_bits(info, cache->flags); |
10184 | if (btrfs_chunk_readonly(info, cache->key.objectid)) { | 10135 | if (btrfs_chunk_readonly(info, cache->key.objectid)) { |
@@ -10337,7 +10288,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
10337 | cache->bytes_super, &cache->space_info); | 10288 | cache->bytes_super, &cache->space_info); |
10338 | update_global_block_rsv(fs_info); | 10289 | update_global_block_rsv(fs_info); |
10339 | 10290 | ||
10340 | __link_block_group(cache->space_info, cache); | 10291 | link_block_group(cache); |
10341 | 10292 | ||
10342 | list_add_tail(&cache->bg_list, &trans->new_bgs); | 10293 | list_add_tail(&cache->bg_list, &trans->new_bgs); |
10343 | 10294 | ||
@@ -10387,6 +10338,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
10387 | * remove it. | 10338 | * remove it. |
10388 | */ | 10339 | */ |
10389 | free_excluded_extents(fs_info, block_group); | 10340 | free_excluded_extents(fs_info, block_group); |
10341 | btrfs_free_ref_tree_range(fs_info, block_group->key.objectid, | ||
10342 | block_group->key.offset); | ||
10390 | 10343 | ||
10391 | memcpy(&key, &block_group->key, sizeof(key)); | 10344 | memcpy(&key, &block_group->key, sizeof(key)); |
10392 | index = get_block_group_index(block_group); | 10345 | index = get_block_group_index(block_group); |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 7fa50e12f18e..adbbc017191c 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -110,7 +110,6 @@ struct extent_page_data { | |||
110 | struct bio *bio; | 110 | struct bio *bio; |
111 | struct extent_io_tree *tree; | 111 | struct extent_io_tree *tree; |
112 | get_extent_t *get_extent; | 112 | get_extent_t *get_extent; |
113 | unsigned long bio_flags; | ||
114 | 113 | ||
115 | /* tells writepage not to lock the state bits for this range | 114 | /* tells writepage not to lock the state bits for this range |
116 | * it still does the unlocking | 115 | * it still does the unlocking |
@@ -2762,8 +2761,8 @@ static int merge_bio(struct extent_io_tree *tree, struct page *page, | |||
2762 | */ | 2761 | */ |
2763 | static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree, | 2762 | static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree, |
2764 | struct writeback_control *wbc, | 2763 | struct writeback_control *wbc, |
2765 | struct page *page, sector_t sector, | 2764 | struct page *page, u64 offset, |
2766 | size_t size, unsigned long offset, | 2765 | size_t size, unsigned long pg_offset, |
2767 | struct block_device *bdev, | 2766 | struct block_device *bdev, |
2768 | struct bio **bio_ret, | 2767 | struct bio **bio_ret, |
2769 | bio_end_io_t end_io_func, | 2768 | bio_end_io_t end_io_func, |
@@ -2777,6 +2776,7 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree, | |||
2777 | int contig = 0; | 2776 | int contig = 0; |
2778 | int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED; | 2777 | int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED; |
2779 | size_t page_size = min_t(size_t, size, PAGE_SIZE); | 2778 | size_t page_size = min_t(size_t, size, PAGE_SIZE); |
2779 | sector_t sector = offset >> 9; | ||
2780 | 2780 | ||
2781 | if (bio_ret && *bio_ret) { | 2781 | if (bio_ret && *bio_ret) { |
2782 | bio = *bio_ret; | 2782 | bio = *bio_ret; |
@@ -2787,8 +2787,8 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree, | |||
2787 | 2787 | ||
2788 | if (prev_bio_flags != bio_flags || !contig || | 2788 | if (prev_bio_flags != bio_flags || !contig || |
2789 | force_bio_submit || | 2789 | force_bio_submit || |
2790 | merge_bio(tree, page, offset, page_size, bio, bio_flags) || | 2790 | merge_bio(tree, page, pg_offset, page_size, bio, bio_flags) || |
2791 | bio_add_page(bio, page, page_size, offset) < page_size) { | 2791 | bio_add_page(bio, page, page_size, pg_offset) < page_size) { |
2792 | ret = submit_one_bio(bio, mirror_num, prev_bio_flags); | 2792 | ret = submit_one_bio(bio, mirror_num, prev_bio_flags); |
2793 | if (ret < 0) { | 2793 | if (ret < 0) { |
2794 | *bio_ret = NULL; | 2794 | *bio_ret = NULL; |
@@ -2802,8 +2802,8 @@ static int submit_extent_page(unsigned int opf, struct extent_io_tree *tree, | |||
2802 | } | 2802 | } |
2803 | } | 2803 | } |
2804 | 2804 | ||
2805 | bio = btrfs_bio_alloc(bdev, (u64)sector << 9); | 2805 | bio = btrfs_bio_alloc(bdev, offset); |
2806 | bio_add_page(bio, page, page_size, offset); | 2806 | bio_add_page(bio, page, page_size, pg_offset); |
2807 | bio->bi_end_io = end_io_func; | 2807 | bio->bi_end_io = end_io_func; |
2808 | bio->bi_private = tree; | 2808 | bio->bi_private = tree; |
2809 | bio->bi_write_hint = page->mapping->host->i_write_hint; | 2809 | bio->bi_write_hint = page->mapping->host->i_write_hint; |
@@ -2893,7 +2893,6 @@ static int __do_readpage(struct extent_io_tree *tree, | |||
2893 | u64 last_byte = i_size_read(inode); | 2893 | u64 last_byte = i_size_read(inode); |
2894 | u64 block_start; | 2894 | u64 block_start; |
2895 | u64 cur_end; | 2895 | u64 cur_end; |
2896 | sector_t sector; | ||
2897 | struct extent_map *em; | 2896 | struct extent_map *em; |
2898 | struct block_device *bdev; | 2897 | struct block_device *bdev; |
2899 | int ret = 0; | 2898 | int ret = 0; |
@@ -2929,6 +2928,7 @@ static int __do_readpage(struct extent_io_tree *tree, | |||
2929 | } | 2928 | } |
2930 | while (cur <= end) { | 2929 | while (cur <= end) { |
2931 | bool force_bio_submit = false; | 2930 | bool force_bio_submit = false; |
2931 | u64 offset; | ||
2932 | 2932 | ||
2933 | if (cur >= last_byte) { | 2933 | if (cur >= last_byte) { |
2934 | char *userpage; | 2934 | char *userpage; |
@@ -2968,9 +2968,9 @@ static int __do_readpage(struct extent_io_tree *tree, | |||
2968 | iosize = ALIGN(iosize, blocksize); | 2968 | iosize = ALIGN(iosize, blocksize); |
2969 | if (this_bio_flag & EXTENT_BIO_COMPRESSED) { | 2969 | if (this_bio_flag & EXTENT_BIO_COMPRESSED) { |
2970 | disk_io_size = em->block_len; | 2970 | disk_io_size = em->block_len; |
2971 | sector = em->block_start >> 9; | 2971 | offset = em->block_start; |
2972 | } else { | 2972 | } else { |
2973 | sector = (em->block_start + extent_offset) >> 9; | 2973 | offset = em->block_start + extent_offset; |
2974 | disk_io_size = iosize; | 2974 | disk_io_size = iosize; |
2975 | } | 2975 | } |
2976 | bdev = em->bdev; | 2976 | bdev = em->bdev; |
@@ -3063,8 +3063,8 @@ static int __do_readpage(struct extent_io_tree *tree, | |||
3063 | } | 3063 | } |
3064 | 3064 | ||
3065 | ret = submit_extent_page(REQ_OP_READ | read_flags, tree, NULL, | 3065 | ret = submit_extent_page(REQ_OP_READ | read_flags, tree, NULL, |
3066 | page, sector, disk_io_size, pg_offset, | 3066 | page, offset, disk_io_size, |
3067 | bdev, bio, | 3067 | pg_offset, bdev, bio, |
3068 | end_bio_extent_readpage, mirror_num, | 3068 | end_bio_extent_readpage, mirror_num, |
3069 | *bio_flags, | 3069 | *bio_flags, |
3070 | this_bio_flag, | 3070 | this_bio_flag, |
@@ -3325,7 +3325,6 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, | |||
3325 | u64 extent_offset; | 3325 | u64 extent_offset; |
3326 | u64 block_start; | 3326 | u64 block_start; |
3327 | u64 iosize; | 3327 | u64 iosize; |
3328 | sector_t sector; | ||
3329 | struct extent_map *em; | 3328 | struct extent_map *em; |
3330 | struct block_device *bdev; | 3329 | struct block_device *bdev; |
3331 | size_t pg_offset = 0; | 3330 | size_t pg_offset = 0; |
@@ -3368,6 +3367,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, | |||
3368 | 3367 | ||
3369 | while (cur <= end) { | 3368 | while (cur <= end) { |
3370 | u64 em_end; | 3369 | u64 em_end; |
3370 | u64 offset; | ||
3371 | 3371 | ||
3372 | if (cur >= i_size) { | 3372 | if (cur >= i_size) { |
3373 | if (tree->ops && tree->ops->writepage_end_io_hook) | 3373 | if (tree->ops && tree->ops->writepage_end_io_hook) |
@@ -3389,7 +3389,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, | |||
3389 | BUG_ON(end < cur); | 3389 | BUG_ON(end < cur); |
3390 | iosize = min(em_end - cur, end - cur + 1); | 3390 | iosize = min(em_end - cur, end - cur + 1); |
3391 | iosize = ALIGN(iosize, blocksize); | 3391 | iosize = ALIGN(iosize, blocksize); |
3392 | sector = (em->block_start + extent_offset) >> 9; | 3392 | offset = em->block_start + extent_offset; |
3393 | bdev = em->bdev; | 3393 | bdev = em->bdev; |
3394 | block_start = em->block_start; | 3394 | block_start = em->block_start; |
3395 | compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); | 3395 | compressed = test_bit(EXTENT_FLAG_COMPRESSED, &em->flags); |
@@ -3432,7 +3432,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode, | |||
3432 | } | 3432 | } |
3433 | 3433 | ||
3434 | ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc, | 3434 | ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc, |
3435 | page, sector, iosize, pg_offset, | 3435 | page, offset, iosize, pg_offset, |
3436 | bdev, &epd->bio, | 3436 | bdev, &epd->bio, |
3437 | end_bio_extent_writepage, | 3437 | end_bio_extent_writepage, |
3438 | 0, 0, 0, false); | 3438 | 0, 0, 0, false); |
@@ -3716,7 +3716,6 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, | |||
3716 | u64 offset = eb->start; | 3716 | u64 offset = eb->start; |
3717 | u32 nritems; | 3717 | u32 nritems; |
3718 | unsigned long i, num_pages; | 3718 | unsigned long i, num_pages; |
3719 | unsigned long bio_flags = 0; | ||
3720 | unsigned long start, end; | 3719 | unsigned long start, end; |
3721 | unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META; | 3720 | unsigned int write_flags = wbc_to_write_flags(wbc) | REQ_META; |
3722 | int ret = 0; | 3721 | int ret = 0; |
@@ -3724,8 +3723,6 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, | |||
3724 | clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags); | 3723 | clear_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags); |
3725 | num_pages = num_extent_pages(eb->start, eb->len); | 3724 | num_pages = num_extent_pages(eb->start, eb->len); |
3726 | atomic_set(&eb->io_pages, num_pages); | 3725 | atomic_set(&eb->io_pages, num_pages); |
3727 | if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID) | ||
3728 | bio_flags = EXTENT_BIO_TREE_LOG; | ||
3729 | 3726 | ||
3730 | /* set btree blocks beyond nritems with 0 to avoid stale content. */ | 3727 | /* set btree blocks beyond nritems with 0 to avoid stale content. */ |
3731 | nritems = btrfs_header_nritems(eb); | 3728 | nritems = btrfs_header_nritems(eb); |
@@ -3749,11 +3746,10 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, | |||
3749 | clear_page_dirty_for_io(p); | 3746 | clear_page_dirty_for_io(p); |
3750 | set_page_writeback(p); | 3747 | set_page_writeback(p); |
3751 | ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc, | 3748 | ret = submit_extent_page(REQ_OP_WRITE | write_flags, tree, wbc, |
3752 | p, offset >> 9, PAGE_SIZE, 0, bdev, | 3749 | p, offset, PAGE_SIZE, 0, bdev, |
3753 | &epd->bio, | 3750 | &epd->bio, |
3754 | end_bio_extent_buffer_writepage, | 3751 | end_bio_extent_buffer_writepage, |
3755 | 0, epd->bio_flags, bio_flags, false); | 3752 | 0, 0, 0, false); |
3756 | epd->bio_flags = bio_flags; | ||
3757 | if (ret) { | 3753 | if (ret) { |
3758 | set_btree_ioerr(p); | 3754 | set_btree_ioerr(p); |
3759 | if (PageWriteback(p)) | 3755 | if (PageWriteback(p)) |
@@ -3790,7 +3786,6 @@ int btree_write_cache_pages(struct address_space *mapping, | |||
3790 | .tree = tree, | 3786 | .tree = tree, |
3791 | .extent_locked = 0, | 3787 | .extent_locked = 0, |
3792 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | 3788 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, |
3793 | .bio_flags = 0, | ||
3794 | }; | 3789 | }; |
3795 | int ret = 0; | 3790 | int ret = 0; |
3796 | int done = 0; | 3791 | int done = 0; |
@@ -4063,7 +4058,7 @@ static void flush_epd_write_bio(struct extent_page_data *epd) | |||
4063 | if (epd->bio) { | 4058 | if (epd->bio) { |
4064 | int ret; | 4059 | int ret; |
4065 | 4060 | ||
4066 | ret = submit_one_bio(epd->bio, 0, epd->bio_flags); | 4061 | ret = submit_one_bio(epd->bio, 0, 0); |
4067 | BUG_ON(ret < 0); /* -ENOMEM */ | 4062 | BUG_ON(ret < 0); /* -ENOMEM */ |
4068 | epd->bio = NULL; | 4063 | epd->bio = NULL; |
4069 | } | 4064 | } |
@@ -4086,7 +4081,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, | |||
4086 | .get_extent = get_extent, | 4081 | .get_extent = get_extent, |
4087 | .extent_locked = 0, | 4082 | .extent_locked = 0, |
4088 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | 4083 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, |
4089 | .bio_flags = 0, | ||
4090 | }; | 4084 | }; |
4091 | 4085 | ||
4092 | ret = __extent_writepage(page, wbc, &epd); | 4086 | ret = __extent_writepage(page, wbc, &epd); |
@@ -4111,7 +4105,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, | |||
4111 | .get_extent = get_extent, | 4105 | .get_extent = get_extent, |
4112 | .extent_locked = 1, | 4106 | .extent_locked = 1, |
4113 | .sync_io = mode == WB_SYNC_ALL, | 4107 | .sync_io = mode == WB_SYNC_ALL, |
4114 | .bio_flags = 0, | ||
4115 | }; | 4108 | }; |
4116 | struct writeback_control wbc_writepages = { | 4109 | struct writeback_control wbc_writepages = { |
4117 | .sync_mode = mode, | 4110 | .sync_mode = mode, |
@@ -4151,7 +4144,6 @@ int extent_writepages(struct extent_io_tree *tree, | |||
4151 | .get_extent = get_extent, | 4144 | .get_extent = get_extent, |
4152 | .extent_locked = 0, | 4145 | .extent_locked = 0, |
4153 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, | 4146 | .sync_io = wbc->sync_mode == WB_SYNC_ALL, |
4154 | .bio_flags = 0, | ||
4155 | }; | 4147 | }; |
4156 | 4148 | ||
4157 | ret = extent_write_cache_pages(mapping, wbc, __extent_writepage, &epd, | 4149 | ret = extent_write_cache_pages(mapping, wbc, __extent_writepage, &epd, |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index e5535bbe6953..4a8861379d3e 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -34,7 +34,6 @@ | |||
34 | * type for this bio | 34 | * type for this bio |
35 | */ | 35 | */ |
36 | #define EXTENT_BIO_COMPRESSED 1 | 36 | #define EXTENT_BIO_COMPRESSED 1 |
37 | #define EXTENT_BIO_TREE_LOG 2 | ||
38 | #define EXTENT_BIO_FLAG_SHIFT 16 | 37 | #define EXTENT_BIO_FLAG_SHIFT 16 |
39 | 38 | ||
40 | /* these are bit numbers for test/set bit */ | 39 | /* these are bit numbers for test/set bit */ |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index aafcc785f840..f80254d82f40 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -856,7 +856,7 @@ next_slot: | |||
856 | btrfs_mark_buffer_dirty(leaf); | 856 | btrfs_mark_buffer_dirty(leaf); |
857 | 857 | ||
858 | if (update_refs && disk_bytenr > 0) { | 858 | if (update_refs && disk_bytenr > 0) { |
859 | ret = btrfs_inc_extent_ref(trans, fs_info, | 859 | ret = btrfs_inc_extent_ref(trans, root, |
860 | disk_bytenr, num_bytes, 0, | 860 | disk_bytenr, num_bytes, 0, |
861 | root->root_key.objectid, | 861 | root->root_key.objectid, |
862 | new_key.objectid, | 862 | new_key.objectid, |
@@ -940,7 +940,7 @@ delete_extent_item: | |||
940 | extent_end = ALIGN(extent_end, | 940 | extent_end = ALIGN(extent_end, |
941 | fs_info->sectorsize); | 941 | fs_info->sectorsize); |
942 | } else if (update_refs && disk_bytenr > 0) { | 942 | } else if (update_refs && disk_bytenr > 0) { |
943 | ret = btrfs_free_extent(trans, fs_info, | 943 | ret = btrfs_free_extent(trans, root, |
944 | disk_bytenr, num_bytes, 0, | 944 | disk_bytenr, num_bytes, 0, |
945 | root->root_key.objectid, | 945 | root->root_key.objectid, |
946 | key.objectid, key.offset - | 946 | key.objectid, key.offset - |
@@ -1234,7 +1234,7 @@ again: | |||
1234 | extent_end - split); | 1234 | extent_end - split); |
1235 | btrfs_mark_buffer_dirty(leaf); | 1235 | btrfs_mark_buffer_dirty(leaf); |
1236 | 1236 | ||
1237 | ret = btrfs_inc_extent_ref(trans, fs_info, bytenr, num_bytes, | 1237 | ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, |
1238 | 0, root->root_key.objectid, | 1238 | 0, root->root_key.objectid, |
1239 | ino, orig_offset); | 1239 | ino, orig_offset); |
1240 | if (ret) { | 1240 | if (ret) { |
@@ -1268,7 +1268,7 @@ again: | |||
1268 | extent_end = other_end; | 1268 | extent_end = other_end; |
1269 | del_slot = path->slots[0] + 1; | 1269 | del_slot = path->slots[0] + 1; |
1270 | del_nr++; | 1270 | del_nr++; |
1271 | ret = btrfs_free_extent(trans, fs_info, bytenr, num_bytes, | 1271 | ret = btrfs_free_extent(trans, root, bytenr, num_bytes, |
1272 | 0, root->root_key.objectid, | 1272 | 0, root->root_key.objectid, |
1273 | ino, orig_offset); | 1273 | ino, orig_offset); |
1274 | if (ret) { | 1274 | if (ret) { |
@@ -1288,7 +1288,7 @@ again: | |||
1288 | key.offset = other_start; | 1288 | key.offset = other_start; |
1289 | del_slot = path->slots[0]; | 1289 | del_slot = path->slots[0]; |
1290 | del_nr++; | 1290 | del_nr++; |
1291 | ret = btrfs_free_extent(trans, fs_info, bytenr, num_bytes, | 1291 | ret = btrfs_free_extent(trans, root, bytenr, num_bytes, |
1292 | 0, root->root_key.objectid, | 1292 | 0, root->root_key.objectid, |
1293 | ino, orig_offset); | 1293 | ino, orig_offset); |
1294 | if (ret) { | 1294 | if (ret) { |
@@ -1590,7 +1590,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1590 | int ret = 0; | 1590 | int ret = 0; |
1591 | bool only_release_metadata = false; | 1591 | bool only_release_metadata = false; |
1592 | bool force_page_uptodate = false; | 1592 | bool force_page_uptodate = false; |
1593 | bool need_unlock; | ||
1594 | 1593 | ||
1595 | nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE), | 1594 | nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE), |
1596 | PAGE_SIZE / (sizeof(struct page *))); | 1595 | PAGE_SIZE / (sizeof(struct page *))); |
@@ -1613,6 +1612,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1613 | size_t copied; | 1612 | size_t copied; |
1614 | size_t dirty_sectors; | 1613 | size_t dirty_sectors; |
1615 | size_t num_sectors; | 1614 | size_t num_sectors; |
1615 | int extents_locked; | ||
1616 | 1616 | ||
1617 | WARN_ON(num_pages > nrptrs); | 1617 | WARN_ON(num_pages > nrptrs); |
1618 | 1618 | ||
@@ -1656,6 +1656,7 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1656 | } | 1656 | } |
1657 | } | 1657 | } |
1658 | 1658 | ||
1659 | WARN_ON(reserve_bytes == 0); | ||
1659 | ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), | 1660 | ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), |
1660 | reserve_bytes); | 1661 | reserve_bytes); |
1661 | if (ret) { | 1662 | if (ret) { |
@@ -1669,7 +1670,6 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1669 | } | 1670 | } |
1670 | 1671 | ||
1671 | release_bytes = reserve_bytes; | 1672 | release_bytes = reserve_bytes; |
1672 | need_unlock = false; | ||
1673 | again: | 1673 | again: |
1674 | /* | 1674 | /* |
1675 | * This is going to setup the pages array with the number of | 1675 | * This is going to setup the pages array with the number of |
@@ -1679,19 +1679,23 @@ again: | |||
1679 | ret = prepare_pages(inode, pages, num_pages, | 1679 | ret = prepare_pages(inode, pages, num_pages, |
1680 | pos, write_bytes, | 1680 | pos, write_bytes, |
1681 | force_page_uptodate); | 1681 | force_page_uptodate); |
1682 | if (ret) | 1682 | if (ret) { |
1683 | btrfs_delalloc_release_extents(BTRFS_I(inode), | ||
1684 | reserve_bytes); | ||
1683 | break; | 1685 | break; |
1686 | } | ||
1684 | 1687 | ||
1685 | ret = lock_and_cleanup_extent_if_need(BTRFS_I(inode), pages, | 1688 | extents_locked = lock_and_cleanup_extent_if_need( |
1689 | BTRFS_I(inode), pages, | ||
1686 | num_pages, pos, write_bytes, &lockstart, | 1690 | num_pages, pos, write_bytes, &lockstart, |
1687 | &lockend, &cached_state); | 1691 | &lockend, &cached_state); |
1688 | if (ret < 0) { | 1692 | if (extents_locked < 0) { |
1689 | if (ret == -EAGAIN) | 1693 | if (extents_locked == -EAGAIN) |
1690 | goto again; | 1694 | goto again; |
1695 | btrfs_delalloc_release_extents(BTRFS_I(inode), | ||
1696 | reserve_bytes); | ||
1697 | ret = extents_locked; | ||
1691 | break; | 1698 | break; |
1692 | } else if (ret > 0) { | ||
1693 | need_unlock = true; | ||
1694 | ret = 0; | ||
1695 | } | 1699 | } |
1696 | 1700 | ||
1697 | copied = btrfs_copy_from_user(pos, write_bytes, pages, i); | 1701 | copied = btrfs_copy_from_user(pos, write_bytes, pages, i); |
@@ -1718,23 +1722,10 @@ again: | |||
1718 | PAGE_SIZE); | 1722 | PAGE_SIZE); |
1719 | } | 1723 | } |
1720 | 1724 | ||
1721 | /* | ||
1722 | * If we had a short copy we need to release the excess delaloc | ||
1723 | * bytes we reserved. We need to increment outstanding_extents | ||
1724 | * because btrfs_delalloc_release_space and | ||
1725 | * btrfs_delalloc_release_metadata will decrement it, but | ||
1726 | * we still have an outstanding extent for the chunk we actually | ||
1727 | * managed to copy. | ||
1728 | */ | ||
1729 | if (num_sectors > dirty_sectors) { | 1725 | if (num_sectors > dirty_sectors) { |
1730 | /* release everything except the sectors we dirtied */ | 1726 | /* release everything except the sectors we dirtied */ |
1731 | release_bytes -= dirty_sectors << | 1727 | release_bytes -= dirty_sectors << |
1732 | fs_info->sb->s_blocksize_bits; | 1728 | fs_info->sb->s_blocksize_bits; |
1733 | if (copied > 0) { | ||
1734 | spin_lock(&BTRFS_I(inode)->lock); | ||
1735 | BTRFS_I(inode)->outstanding_extents++; | ||
1736 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1737 | } | ||
1738 | if (only_release_metadata) { | 1729 | if (only_release_metadata) { |
1739 | btrfs_delalloc_release_metadata(BTRFS_I(inode), | 1730 | btrfs_delalloc_release_metadata(BTRFS_I(inode), |
1740 | release_bytes); | 1731 | release_bytes); |
@@ -1756,10 +1747,11 @@ again: | |||
1756 | if (copied > 0) | 1747 | if (copied > 0) |
1757 | ret = btrfs_dirty_pages(inode, pages, dirty_pages, | 1748 | ret = btrfs_dirty_pages(inode, pages, dirty_pages, |
1758 | pos, copied, NULL); | 1749 | pos, copied, NULL); |
1759 | if (need_unlock) | 1750 | if (extents_locked) |
1760 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | 1751 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, |
1761 | lockstart, lockend, &cached_state, | 1752 | lockstart, lockend, &cached_state, |
1762 | GFP_NOFS); | 1753 | GFP_NOFS); |
1754 | btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes); | ||
1763 | if (ret) { | 1755 | if (ret) { |
1764 | btrfs_drop_pages(pages, num_pages); | 1756 | btrfs_drop_pages(pages, num_pages); |
1765 | break; | 1757 | break; |
@@ -2046,7 +2038,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
2046 | struct btrfs_trans_handle *trans; | 2038 | struct btrfs_trans_handle *trans; |
2047 | struct btrfs_log_ctx ctx; | 2039 | struct btrfs_log_ctx ctx; |
2048 | int ret = 0, err; | 2040 | int ret = 0, err; |
2049 | bool full_sync = 0; | 2041 | bool full_sync = false; |
2050 | u64 len; | 2042 | u64 len; |
2051 | 2043 | ||
2052 | /* | 2044 | /* |
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 684f12247db7..fe5e0324dca9 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c | |||
@@ -1286,12 +1286,8 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans, | |||
1286 | struct btrfs_block_group_cache *block_group, | 1286 | struct btrfs_block_group_cache *block_group, |
1287 | struct btrfs_path *path) | 1287 | struct btrfs_path *path) |
1288 | { | 1288 | { |
1289 | u64 start, end; | ||
1290 | int ret; | 1289 | int ret; |
1291 | 1290 | ||
1292 | start = block_group->key.objectid; | ||
1293 | end = block_group->key.objectid + block_group->key.offset; | ||
1294 | |||
1295 | block_group->needs_free_space = 0; | 1291 | block_group->needs_free_space = 0; |
1296 | 1292 | ||
1297 | ret = add_new_free_space_info(trans, fs_info, block_group, path); | 1293 | ret = add_new_free_space_info(trans, fs_info, block_group, path); |
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index d02019747d00..022b19336fee 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c | |||
@@ -500,11 +500,12 @@ again: | |||
500 | ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc, | 500 | ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, prealloc, |
501 | prealloc, prealloc, &alloc_hint); | 501 | prealloc, prealloc, &alloc_hint); |
502 | if (ret) { | 502 | if (ret) { |
503 | btrfs_delalloc_release_metadata(BTRFS_I(inode), prealloc); | 503 | btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc); |
504 | goto out_put; | 504 | goto out_put; |
505 | } | 505 | } |
506 | 506 | ||
507 | ret = btrfs_write_out_ino_cache(root, trans, path, inode); | 507 | ret = btrfs_write_out_ino_cache(root, trans, path, inode); |
508 | btrfs_delalloc_release_extents(BTRFS_I(inode), prealloc); | ||
508 | out_put: | 509 | out_put: |
509 | iput(inode); | 510 | iput(inode); |
510 | out_release: | 511 | out_release: |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d94e3f68b9b1..b93fe05a39c7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #include <linux/blkdev.h> | 42 | #include <linux/blkdev.h> |
43 | #include <linux/posix_acl_xattr.h> | 43 | #include <linux/posix_acl_xattr.h> |
44 | #include <linux/uio.h> | 44 | #include <linux/uio.h> |
45 | #include <linux/magic.h> | ||
45 | #include "ctree.h" | 46 | #include "ctree.h" |
46 | #include "disk-io.h" | 47 | #include "disk-io.h" |
47 | #include "transaction.h" | 48 | #include "transaction.h" |
@@ -67,7 +68,6 @@ struct btrfs_iget_args { | |||
67 | }; | 68 | }; |
68 | 69 | ||
69 | struct btrfs_dio_data { | 70 | struct btrfs_dio_data { |
70 | u64 outstanding_extents; | ||
71 | u64 reserve; | 71 | u64 reserve; |
72 | u64 unsubmitted_oe_range_start; | 72 | u64 unsubmitted_oe_range_start; |
73 | u64 unsubmitted_oe_range_end; | 73 | u64 unsubmitted_oe_range_end; |
@@ -316,7 +316,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root, | |||
316 | btrfs_free_path(path); | 316 | btrfs_free_path(path); |
317 | return PTR_ERR(trans); | 317 | return PTR_ERR(trans); |
318 | } | 318 | } |
319 | trans->block_rsv = &fs_info->delalloc_block_rsv; | 319 | trans->block_rsv = &BTRFS_I(inode)->block_rsv; |
320 | 320 | ||
321 | if (compressed_size && compressed_pages) | 321 | if (compressed_size && compressed_pages) |
322 | extent_item_size = btrfs_file_extent_calc_inline_size( | 322 | extent_item_size = btrfs_file_extent_calc_inline_size( |
@@ -348,7 +348,6 @@ static noinline int cow_file_range_inline(struct btrfs_root *root, | |||
348 | } | 348 | } |
349 | 349 | ||
350 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); | 350 | set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); |
351 | btrfs_delalloc_release_metadata(BTRFS_I(inode), end + 1 - start); | ||
352 | btrfs_drop_extent_cache(BTRFS_I(inode), start, aligned_end - 1, 0); | 351 | btrfs_drop_extent_cache(BTRFS_I(inode), start, aligned_end - 1, 0); |
353 | out: | 352 | out: |
354 | /* | 353 | /* |
@@ -458,7 +457,6 @@ static noinline void compress_file_range(struct inode *inode, | |||
458 | { | 457 | { |
459 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); | 458 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
460 | struct btrfs_root *root = BTRFS_I(inode)->root; | 459 | struct btrfs_root *root = BTRFS_I(inode)->root; |
461 | u64 num_bytes; | ||
462 | u64 blocksize = fs_info->sectorsize; | 460 | u64 blocksize = fs_info->sectorsize; |
463 | u64 actual_end; | 461 | u64 actual_end; |
464 | u64 isize = i_size_read(inode); | 462 | u64 isize = i_size_read(inode); |
@@ -508,8 +506,6 @@ again: | |||
508 | 506 | ||
509 | total_compressed = min_t(unsigned long, total_compressed, | 507 | total_compressed = min_t(unsigned long, total_compressed, |
510 | BTRFS_MAX_UNCOMPRESSED); | 508 | BTRFS_MAX_UNCOMPRESSED); |
511 | num_bytes = ALIGN(end - start + 1, blocksize); | ||
512 | num_bytes = max(blocksize, num_bytes); | ||
513 | total_in = 0; | 509 | total_in = 0; |
514 | ret = 0; | 510 | ret = 0; |
515 | 511 | ||
@@ -542,7 +538,10 @@ again: | |||
542 | */ | 538 | */ |
543 | extent_range_clear_dirty_for_io(inode, start, end); | 539 | extent_range_clear_dirty_for_io(inode, start, end); |
544 | redirty = 1; | 540 | redirty = 1; |
545 | ret = btrfs_compress_pages(compress_type, | 541 | |
542 | /* Compression level is applied here and only here */ | ||
543 | ret = btrfs_compress_pages( | ||
544 | compress_type | (fs_info->compress_level << 4), | ||
546 | inode->i_mapping, start, | 545 | inode->i_mapping, start, |
547 | pages, | 546 | pages, |
548 | &nr_pages, | 547 | &nr_pages, |
@@ -570,7 +569,7 @@ again: | |||
570 | cont: | 569 | cont: |
571 | if (start == 0) { | 570 | if (start == 0) { |
572 | /* lets try to make an inline extent */ | 571 | /* lets try to make an inline extent */ |
573 | if (ret || total_in < (actual_end - start)) { | 572 | if (ret || total_in < actual_end) { |
574 | /* we didn't compress the entire range, try | 573 | /* we didn't compress the entire range, try |
575 | * to make an uncompressed inline extent. | 574 | * to make an uncompressed inline extent. |
576 | */ | 575 | */ |
@@ -584,16 +583,21 @@ cont: | |||
584 | } | 583 | } |
585 | if (ret <= 0) { | 584 | if (ret <= 0) { |
586 | unsigned long clear_flags = EXTENT_DELALLOC | | 585 | unsigned long clear_flags = EXTENT_DELALLOC | |
587 | EXTENT_DELALLOC_NEW | EXTENT_DEFRAG; | 586 | EXTENT_DELALLOC_NEW | EXTENT_DEFRAG | |
587 | EXTENT_DO_ACCOUNTING; | ||
588 | unsigned long page_error_op; | 588 | unsigned long page_error_op; |
589 | 589 | ||
590 | clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0; | ||
591 | page_error_op = ret < 0 ? PAGE_SET_ERROR : 0; | 590 | page_error_op = ret < 0 ? PAGE_SET_ERROR : 0; |
592 | 591 | ||
593 | /* | 592 | /* |
594 | * inline extent creation worked or returned error, | 593 | * inline extent creation worked or returned error, |
595 | * we don't need to create any more async work items. | 594 | * we don't need to create any more async work items. |
596 | * Unlock and free up our temp pages. | 595 | * Unlock and free up our temp pages. |
596 | * | ||
597 | * We use DO_ACCOUNTING here because we need the | ||
598 | * delalloc_release_metadata to be done _after_ we drop | ||
599 | * our outstanding extent for clearing delalloc for this | ||
600 | * range. | ||
597 | */ | 601 | */ |
598 | extent_clear_unlock_delalloc(inode, start, end, end, | 602 | extent_clear_unlock_delalloc(inode, start, end, end, |
599 | NULL, clear_flags, | 603 | NULL, clear_flags, |
@@ -602,10 +606,6 @@ cont: | |||
602 | PAGE_SET_WRITEBACK | | 606 | PAGE_SET_WRITEBACK | |
603 | page_error_op | | 607 | page_error_op | |
604 | PAGE_END_WRITEBACK); | 608 | PAGE_END_WRITEBACK); |
605 | if (ret == 0) | ||
606 | btrfs_free_reserved_data_space_noquota(inode, | ||
607 | start, | ||
608 | end - start + 1); | ||
609 | goto free_pages_out; | 609 | goto free_pages_out; |
610 | } | 610 | } |
611 | } | 611 | } |
@@ -625,7 +625,6 @@ cont: | |||
625 | */ | 625 | */ |
626 | total_in = ALIGN(total_in, PAGE_SIZE); | 626 | total_in = ALIGN(total_in, PAGE_SIZE); |
627 | if (total_compressed + blocksize <= total_in) { | 627 | if (total_compressed + blocksize <= total_in) { |
628 | num_bytes = total_in; | ||
629 | *num_added += 1; | 628 | *num_added += 1; |
630 | 629 | ||
631 | /* | 630 | /* |
@@ -633,12 +632,12 @@ cont: | |||
633 | * allocation on disk for these compressed pages, and | 632 | * allocation on disk for these compressed pages, and |
634 | * will submit them to the elevator. | 633 | * will submit them to the elevator. |
635 | */ | 634 | */ |
636 | add_async_extent(async_cow, start, num_bytes, | 635 | add_async_extent(async_cow, start, total_in, |
637 | total_compressed, pages, nr_pages, | 636 | total_compressed, pages, nr_pages, |
638 | compress_type); | 637 | compress_type); |
639 | 638 | ||
640 | if (start + num_bytes < end) { | 639 | if (start + total_in < end) { |
641 | start += num_bytes; | 640 | start += total_in; |
642 | pages = NULL; | 641 | pages = NULL; |
643 | cond_resched(); | 642 | cond_resched(); |
644 | goto again; | 643 | goto again; |
@@ -982,15 +981,19 @@ static noinline int cow_file_range(struct inode *inode, | |||
982 | ret = cow_file_range_inline(root, inode, start, end, 0, | 981 | ret = cow_file_range_inline(root, inode, start, end, 0, |
983 | BTRFS_COMPRESS_NONE, NULL); | 982 | BTRFS_COMPRESS_NONE, NULL); |
984 | if (ret == 0) { | 983 | if (ret == 0) { |
984 | /* | ||
985 | * We use DO_ACCOUNTING here because we need the | ||
986 | * delalloc_release_metadata to be run _after_ we drop | ||
987 | * our outstanding extent for clearing delalloc for this | ||
988 | * range. | ||
989 | */ | ||
985 | extent_clear_unlock_delalloc(inode, start, end, | 990 | extent_clear_unlock_delalloc(inode, start, end, |
986 | delalloc_end, NULL, | 991 | delalloc_end, NULL, |
987 | EXTENT_LOCKED | EXTENT_DELALLOC | | 992 | EXTENT_LOCKED | EXTENT_DELALLOC | |
988 | EXTENT_DELALLOC_NEW | | 993 | EXTENT_DELALLOC_NEW | EXTENT_DEFRAG | |
989 | EXTENT_DEFRAG, PAGE_UNLOCK | | 994 | EXTENT_DO_ACCOUNTING, PAGE_UNLOCK | |
990 | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | | 995 | PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK | |
991 | PAGE_END_WRITEBACK); | 996 | PAGE_END_WRITEBACK); |
992 | btrfs_free_reserved_data_space_noquota(inode, start, | ||
993 | end - start + 1); | ||
994 | *nr_written = *nr_written + | 997 | *nr_written = *nr_written + |
995 | (end - start + PAGE_SIZE) / PAGE_SIZE; | 998 | (end - start + PAGE_SIZE) / PAGE_SIZE; |
996 | *page_started = 1; | 999 | *page_started = 1; |
@@ -1226,13 +1229,6 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, | |||
1226 | 1229 | ||
1227 | btrfs_queue_work(fs_info->delalloc_workers, &async_cow->work); | 1230 | btrfs_queue_work(fs_info->delalloc_workers, &async_cow->work); |
1228 | 1231 | ||
1229 | while (atomic_read(&fs_info->async_submit_draining) && | ||
1230 | atomic_read(&fs_info->async_delalloc_pages)) { | ||
1231 | wait_event(fs_info->async_submit_wait, | ||
1232 | (atomic_read(&fs_info->async_delalloc_pages) == | ||
1233 | 0)); | ||
1234 | } | ||
1235 | |||
1236 | *nr_written += nr_pages; | 1232 | *nr_written += nr_pages; |
1237 | start = cur_end + 1; | 1233 | start = cur_end + 1; |
1238 | } | 1234 | } |
@@ -1635,7 +1631,7 @@ static void btrfs_split_extent_hook(void *private_data, | |||
1635 | } | 1631 | } |
1636 | 1632 | ||
1637 | spin_lock(&BTRFS_I(inode)->lock); | 1633 | spin_lock(&BTRFS_I(inode)->lock); |
1638 | BTRFS_I(inode)->outstanding_extents++; | 1634 | btrfs_mod_outstanding_extents(BTRFS_I(inode), 1); |
1639 | spin_unlock(&BTRFS_I(inode)->lock); | 1635 | spin_unlock(&BTRFS_I(inode)->lock); |
1640 | } | 1636 | } |
1641 | 1637 | ||
@@ -1665,7 +1661,7 @@ static void btrfs_merge_extent_hook(void *private_data, | |||
1665 | /* we're not bigger than the max, unreserve the space and go */ | 1661 | /* we're not bigger than the max, unreserve the space and go */ |
1666 | if (new_size <= BTRFS_MAX_EXTENT_SIZE) { | 1662 | if (new_size <= BTRFS_MAX_EXTENT_SIZE) { |
1667 | spin_lock(&BTRFS_I(inode)->lock); | 1663 | spin_lock(&BTRFS_I(inode)->lock); |
1668 | BTRFS_I(inode)->outstanding_extents--; | 1664 | btrfs_mod_outstanding_extents(BTRFS_I(inode), -1); |
1669 | spin_unlock(&BTRFS_I(inode)->lock); | 1665 | spin_unlock(&BTRFS_I(inode)->lock); |
1670 | return; | 1666 | return; |
1671 | } | 1667 | } |
@@ -1696,7 +1692,7 @@ static void btrfs_merge_extent_hook(void *private_data, | |||
1696 | return; | 1692 | return; |
1697 | 1693 | ||
1698 | spin_lock(&BTRFS_I(inode)->lock); | 1694 | spin_lock(&BTRFS_I(inode)->lock); |
1699 | BTRFS_I(inode)->outstanding_extents--; | 1695 | btrfs_mod_outstanding_extents(BTRFS_I(inode), -1); |
1700 | spin_unlock(&BTRFS_I(inode)->lock); | 1696 | spin_unlock(&BTRFS_I(inode)->lock); |
1701 | } | 1697 | } |
1702 | 1698 | ||
@@ -1766,15 +1762,12 @@ static void btrfs_set_bit_hook(void *private_data, | |||
1766 | if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { | 1762 | if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { |
1767 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1763 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1768 | u64 len = state->end + 1 - state->start; | 1764 | u64 len = state->end + 1 - state->start; |
1765 | u32 num_extents = count_max_extents(len); | ||
1769 | bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode)); | 1766 | bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode)); |
1770 | 1767 | ||
1771 | if (*bits & EXTENT_FIRST_DELALLOC) { | 1768 | spin_lock(&BTRFS_I(inode)->lock); |
1772 | *bits &= ~EXTENT_FIRST_DELALLOC; | 1769 | btrfs_mod_outstanding_extents(BTRFS_I(inode), num_extents); |
1773 | } else { | 1770 | spin_unlock(&BTRFS_I(inode)->lock); |
1774 | spin_lock(&BTRFS_I(inode)->lock); | ||
1775 | BTRFS_I(inode)->outstanding_extents++; | ||
1776 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1777 | } | ||
1778 | 1771 | ||
1779 | /* For sanity tests */ | 1772 | /* For sanity tests */ |
1780 | if (btrfs_is_testing(fs_info)) | 1773 | if (btrfs_is_testing(fs_info)) |
@@ -1828,13 +1821,9 @@ static void btrfs_clear_bit_hook(void *private_data, | |||
1828 | struct btrfs_root *root = inode->root; | 1821 | struct btrfs_root *root = inode->root; |
1829 | bool do_list = !btrfs_is_free_space_inode(inode); | 1822 | bool do_list = !btrfs_is_free_space_inode(inode); |
1830 | 1823 | ||
1831 | if (*bits & EXTENT_FIRST_DELALLOC) { | 1824 | spin_lock(&inode->lock); |
1832 | *bits &= ~EXTENT_FIRST_DELALLOC; | 1825 | btrfs_mod_outstanding_extents(inode, -num_extents); |
1833 | } else if (!(*bits & EXTENT_CLEAR_META_RESV)) { | 1826 | spin_unlock(&inode->lock); |
1834 | spin_lock(&inode->lock); | ||
1835 | inode->outstanding_extents -= num_extents; | ||
1836 | spin_unlock(&inode->lock); | ||
1837 | } | ||
1838 | 1827 | ||
1839 | /* | 1828 | /* |
1840 | * We don't reserve metadata space for space cache inodes so we | 1829 | * We don't reserve metadata space for space cache inodes so we |
@@ -2105,6 +2094,7 @@ again: | |||
2105 | 0); | 2094 | 0); |
2106 | ClearPageChecked(page); | 2095 | ClearPageChecked(page); |
2107 | set_page_dirty(page); | 2096 | set_page_dirty(page); |
2097 | btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); | ||
2108 | out: | 2098 | out: |
2109 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end, | 2099 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end, |
2110 | &cached_state, GFP_NOFS); | 2100 | &cached_state, GFP_NOFS); |
@@ -2229,8 +2219,9 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
2229 | if (ret < 0) | 2219 | if (ret < 0) |
2230 | goto out; | 2220 | goto out; |
2231 | qg_released = ret; | 2221 | qg_released = ret; |
2232 | ret = btrfs_alloc_reserved_file_extent(trans, root->root_key.objectid, | 2222 | ret = btrfs_alloc_reserved_file_extent(trans, root, |
2233 | btrfs_ino(BTRFS_I(inode)), file_pos, qg_released, &ins); | 2223 | btrfs_ino(BTRFS_I(inode)), |
2224 | file_pos, qg_released, &ins); | ||
2234 | out: | 2225 | out: |
2235 | btrfs_free_path(path); | 2226 | btrfs_free_path(path); |
2236 | 2227 | ||
@@ -2464,7 +2455,7 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path, | |||
2464 | ret = iterate_inodes_from_logical(old->bytenr + | 2455 | ret = iterate_inodes_from_logical(old->bytenr + |
2465 | old->extent_offset, fs_info, | 2456 | old->extent_offset, fs_info, |
2466 | path, record_one_backref, | 2457 | path, record_one_backref, |
2467 | old); | 2458 | old, false); |
2468 | if (ret < 0 && ret != -ENOENT) | 2459 | if (ret < 0 && ret != -ENOENT) |
2469 | return false; | 2460 | return false; |
2470 | 2461 | ||
@@ -2682,7 +2673,7 @@ again: | |||
2682 | inode_add_bytes(inode, len); | 2673 | inode_add_bytes(inode, len); |
2683 | btrfs_release_path(path); | 2674 | btrfs_release_path(path); |
2684 | 2675 | ||
2685 | ret = btrfs_inc_extent_ref(trans, fs_info, new->bytenr, | 2676 | ret = btrfs_inc_extent_ref(trans, root, new->bytenr, |
2686 | new->disk_len, 0, | 2677 | new->disk_len, 0, |
2687 | backref->root_id, backref->inum, | 2678 | backref->root_id, backref->inum, |
2688 | new->file_pos); /* start - extent_offset */ | 2679 | new->file_pos); /* start - extent_offset */ |
@@ -2964,7 +2955,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) | |||
2964 | trans = NULL; | 2955 | trans = NULL; |
2965 | goto out; | 2956 | goto out; |
2966 | } | 2957 | } |
2967 | trans->block_rsv = &fs_info->delalloc_block_rsv; | 2958 | trans->block_rsv = &BTRFS_I(inode)->block_rsv; |
2968 | ret = btrfs_update_inode_fallback(trans, root, inode); | 2959 | ret = btrfs_update_inode_fallback(trans, root, inode); |
2969 | if (ret) /* -ENOMEM or corruption */ | 2960 | if (ret) /* -ENOMEM or corruption */ |
2970 | btrfs_abort_transaction(trans, ret); | 2961 | btrfs_abort_transaction(trans, ret); |
@@ -3000,7 +2991,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) | |||
3000 | goto out; | 2991 | goto out; |
3001 | } | 2992 | } |
3002 | 2993 | ||
3003 | trans->block_rsv = &fs_info->delalloc_block_rsv; | 2994 | trans->block_rsv = &BTRFS_I(inode)->block_rsv; |
3004 | 2995 | ||
3005 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) | 2996 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) |
3006 | compress_type = ordered_extent->compress_type; | 2997 | compress_type = ordered_extent->compress_type; |
@@ -3058,9 +3049,6 @@ out: | |||
3058 | 0, &cached_state, GFP_NOFS); | 3049 | 0, &cached_state, GFP_NOFS); |
3059 | } | 3050 | } |
3060 | 3051 | ||
3061 | if (root != fs_info->tree_root) | ||
3062 | btrfs_delalloc_release_metadata(BTRFS_I(inode), | ||
3063 | ordered_extent->len); | ||
3064 | if (trans) | 3052 | if (trans) |
3065 | btrfs_end_transaction(trans); | 3053 | btrfs_end_transaction(trans); |
3066 | 3054 | ||
@@ -4372,47 +4360,11 @@ static int truncate_space_check(struct btrfs_trans_handle *trans, | |||
4372 | 4360 | ||
4373 | } | 4361 | } |
4374 | 4362 | ||
4375 | static int truncate_inline_extent(struct inode *inode, | 4363 | /* |
4376 | struct btrfs_path *path, | 4364 | * Return this if we need to call truncate_block for the last bit of the |
4377 | struct btrfs_key *found_key, | 4365 | * truncate. |
4378 | const u64 item_end, | 4366 | */ |
4379 | const u64 new_size) | 4367 | #define NEED_TRUNCATE_BLOCK 1 |
4380 | { | ||
4381 | struct extent_buffer *leaf = path->nodes[0]; | ||
4382 | int slot = path->slots[0]; | ||
4383 | struct btrfs_file_extent_item *fi; | ||
4384 | u32 size = (u32)(new_size - found_key->offset); | ||
4385 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
4386 | |||
4387 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); | ||
4388 | |||
4389 | if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) { | ||
4390 | loff_t offset = new_size; | ||
4391 | loff_t page_end = ALIGN(offset, PAGE_SIZE); | ||
4392 | |||
4393 | /* | ||
4394 | * Zero out the remaining of the last page of our inline extent, | ||
4395 | * instead of directly truncating our inline extent here - that | ||
4396 | * would be much more complex (decompressing all the data, then | ||
4397 | * compressing the truncated data, which might be bigger than | ||
4398 | * the size of the inline extent, resize the extent, etc). | ||
4399 | * We release the path because to get the page we might need to | ||
4400 | * read the extent item from disk (data not in the page cache). | ||
4401 | */ | ||
4402 | btrfs_release_path(path); | ||
4403 | return btrfs_truncate_block(inode, offset, page_end - offset, | ||
4404 | 0); | ||
4405 | } | ||
4406 | |||
4407 | btrfs_set_file_extent_ram_bytes(leaf, fi, size); | ||
4408 | size = btrfs_file_extent_calc_inline_size(size); | ||
4409 | btrfs_truncate_item(root->fs_info, path, size, 1); | ||
4410 | |||
4411 | if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) | ||
4412 | inode_sub_bytes(inode, item_end + 1 - new_size); | ||
4413 | |||
4414 | return 0; | ||
4415 | } | ||
4416 | 4368 | ||
4417 | /* | 4369 | /* |
4418 | * this can truncate away extent items, csum items and directory items. | 4370 | * this can truncate away extent items, csum items and directory items. |
@@ -4451,9 +4403,9 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
4451 | int err = 0; | 4403 | int err = 0; |
4452 | u64 ino = btrfs_ino(BTRFS_I(inode)); | 4404 | u64 ino = btrfs_ino(BTRFS_I(inode)); |
4453 | u64 bytes_deleted = 0; | 4405 | u64 bytes_deleted = 0; |
4454 | bool be_nice = 0; | 4406 | bool be_nice = false; |
4455 | bool should_throttle = 0; | 4407 | bool should_throttle = false; |
4456 | bool should_end = 0; | 4408 | bool should_end = false; |
4457 | 4409 | ||
4458 | BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); | 4410 | BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); |
4459 | 4411 | ||
@@ -4463,7 +4415,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
4463 | */ | 4415 | */ |
4464 | if (!btrfs_is_free_space_inode(BTRFS_I(inode)) && | 4416 | if (!btrfs_is_free_space_inode(BTRFS_I(inode)) && |
4465 | test_bit(BTRFS_ROOT_REF_COWS, &root->state)) | 4417 | test_bit(BTRFS_ROOT_REF_COWS, &root->state)) |
4466 | be_nice = 1; | 4418 | be_nice = true; |
4467 | 4419 | ||
4468 | path = btrfs_alloc_path(); | 4420 | path = btrfs_alloc_path(); |
4469 | if (!path) | 4421 | if (!path) |
@@ -4573,11 +4525,6 @@ search_again: | |||
4573 | if (found_type != BTRFS_EXTENT_DATA_KEY) | 4525 | if (found_type != BTRFS_EXTENT_DATA_KEY) |
4574 | goto delete; | 4526 | goto delete; |
4575 | 4527 | ||
4576 | if (del_item) | ||
4577 | last_size = found_key.offset; | ||
4578 | else | ||
4579 | last_size = new_size; | ||
4580 | |||
4581 | if (extent_type != BTRFS_FILE_EXTENT_INLINE) { | 4528 | if (extent_type != BTRFS_FILE_EXTENT_INLINE) { |
4582 | u64 num_dec; | 4529 | u64 num_dec; |
4583 | extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); | 4530 | extent_start = btrfs_file_extent_disk_bytenr(leaf, fi); |
@@ -4619,40 +4566,30 @@ search_again: | |||
4619 | */ | 4566 | */ |
4620 | if (!del_item && | 4567 | if (!del_item && |
4621 | btrfs_file_extent_encryption(leaf, fi) == 0 && | 4568 | btrfs_file_extent_encryption(leaf, fi) == 0 && |
4622 | btrfs_file_extent_other_encoding(leaf, fi) == 0) { | 4569 | btrfs_file_extent_other_encoding(leaf, fi) == 0 && |
4623 | 4570 | btrfs_file_extent_compression(leaf, fi) == 0) { | |
4571 | u32 size = (u32)(new_size - found_key.offset); | ||
4572 | |||
4573 | btrfs_set_file_extent_ram_bytes(leaf, fi, size); | ||
4574 | size = btrfs_file_extent_calc_inline_size(size); | ||
4575 | btrfs_truncate_item(root->fs_info, path, size, 1); | ||
4576 | } else if (!del_item) { | ||
4624 | /* | 4577 | /* |
4625 | * Need to release path in order to truncate a | 4578 | * We have to bail so the last_size is set to |
4626 | * compressed extent. So delete any accumulated | 4579 | * just before this extent. |
4627 | * extent items so far. | ||
4628 | */ | 4580 | */ |
4629 | if (btrfs_file_extent_compression(leaf, fi) != | 4581 | err = NEED_TRUNCATE_BLOCK; |
4630 | BTRFS_COMPRESS_NONE && pending_del_nr) { | 4582 | break; |
4631 | err = btrfs_del_items(trans, root, path, | 4583 | } |
4632 | pending_del_slot, | ||
4633 | pending_del_nr); | ||
4634 | if (err) { | ||
4635 | btrfs_abort_transaction(trans, | ||
4636 | err); | ||
4637 | goto error; | ||
4638 | } | ||
4639 | pending_del_nr = 0; | ||
4640 | } | ||
4641 | 4584 | ||
4642 | err = truncate_inline_extent(inode, path, | 4585 | if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) |
4643 | &found_key, | ||
4644 | item_end, | ||
4645 | new_size); | ||
4646 | if (err) { | ||
4647 | btrfs_abort_transaction(trans, err); | ||
4648 | goto error; | ||
4649 | } | ||
4650 | } else if (test_bit(BTRFS_ROOT_REF_COWS, | ||
4651 | &root->state)) { | ||
4652 | inode_sub_bytes(inode, item_end + 1 - new_size); | 4586 | inode_sub_bytes(inode, item_end + 1 - new_size); |
4653 | } | ||
4654 | } | 4587 | } |
4655 | delete: | 4588 | delete: |
4589 | if (del_item) | ||
4590 | last_size = found_key.offset; | ||
4591 | else | ||
4592 | last_size = new_size; | ||
4656 | if (del_item) { | 4593 | if (del_item) { |
4657 | if (!pending_del_nr) { | 4594 | if (!pending_del_nr) { |
4658 | /* no pending yet, add ourselves */ | 4595 | /* no pending yet, add ourselves */ |
@@ -4669,14 +4606,14 @@ delete: | |||
4669 | } else { | 4606 | } else { |
4670 | break; | 4607 | break; |
4671 | } | 4608 | } |
4672 | should_throttle = 0; | 4609 | should_throttle = false; |
4673 | 4610 | ||
4674 | if (found_extent && | 4611 | if (found_extent && |
4675 | (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || | 4612 | (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || |
4676 | root == fs_info->tree_root)) { | 4613 | root == fs_info->tree_root)) { |
4677 | btrfs_set_path_blocking(path); | 4614 | btrfs_set_path_blocking(path); |
4678 | bytes_deleted += extent_num_bytes; | 4615 | bytes_deleted += extent_num_bytes; |
4679 | ret = btrfs_free_extent(trans, fs_info, extent_start, | 4616 | ret = btrfs_free_extent(trans, root, extent_start, |
4680 | extent_num_bytes, 0, | 4617 | extent_num_bytes, 0, |
4681 | btrfs_header_owner(leaf), | 4618 | btrfs_header_owner(leaf), |
4682 | ino, extent_offset); | 4619 | ino, extent_offset); |
@@ -4688,11 +4625,11 @@ delete: | |||
4688 | if (be_nice) { | 4625 | if (be_nice) { |
4689 | if (truncate_space_check(trans, root, | 4626 | if (truncate_space_check(trans, root, |
4690 | extent_num_bytes)) { | 4627 | extent_num_bytes)) { |
4691 | should_end = 1; | 4628 | should_end = true; |
4692 | } | 4629 | } |
4693 | if (btrfs_should_throttle_delayed_refs(trans, | 4630 | if (btrfs_should_throttle_delayed_refs(trans, |
4694 | fs_info)) | 4631 | fs_info)) |
4695 | should_throttle = 1; | 4632 | should_throttle = true; |
4696 | } | 4633 | } |
4697 | } | 4634 | } |
4698 | 4635 | ||
@@ -4801,8 +4738,11 @@ int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len, | |||
4801 | (!len || ((len & (blocksize - 1)) == 0))) | 4738 | (!len || ((len & (blocksize - 1)) == 0))) |
4802 | goto out; | 4739 | goto out; |
4803 | 4740 | ||
4741 | block_start = round_down(from, blocksize); | ||
4742 | block_end = block_start + blocksize - 1; | ||
4743 | |||
4804 | ret = btrfs_delalloc_reserve_space(inode, &data_reserved, | 4744 | ret = btrfs_delalloc_reserve_space(inode, &data_reserved, |
4805 | round_down(from, blocksize), blocksize); | 4745 | block_start, blocksize); |
4806 | if (ret) | 4746 | if (ret) |
4807 | goto out; | 4747 | goto out; |
4808 | 4748 | ||
@@ -4810,15 +4750,12 @@ again: | |||
4810 | page = find_or_create_page(mapping, index, mask); | 4750 | page = find_or_create_page(mapping, index, mask); |
4811 | if (!page) { | 4751 | if (!page) { |
4812 | btrfs_delalloc_release_space(inode, data_reserved, | 4752 | btrfs_delalloc_release_space(inode, data_reserved, |
4813 | round_down(from, blocksize), | 4753 | block_start, blocksize); |
4814 | blocksize); | 4754 | btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize); |
4815 | ret = -ENOMEM; | 4755 | ret = -ENOMEM; |
4816 | goto out; | 4756 | goto out; |
4817 | } | 4757 | } |
4818 | 4758 | ||
4819 | block_start = round_down(from, blocksize); | ||
4820 | block_end = block_start + blocksize - 1; | ||
4821 | |||
4822 | if (!PageUptodate(page)) { | 4759 | if (!PageUptodate(page)) { |
4823 | ret = btrfs_readpage(NULL, page); | 4760 | ret = btrfs_readpage(NULL, page); |
4824 | lock_page(page); | 4761 | lock_page(page); |
@@ -4883,6 +4820,7 @@ out_unlock: | |||
4883 | if (ret) | 4820 | if (ret) |
4884 | btrfs_delalloc_release_space(inode, data_reserved, block_start, | 4821 | btrfs_delalloc_release_space(inode, data_reserved, block_start, |
4885 | blocksize); | 4822 | blocksize); |
4823 | btrfs_delalloc_release_extents(BTRFS_I(inode), blocksize); | ||
4886 | unlock_page(page); | 4824 | unlock_page(page); |
4887 | put_page(page); | 4825 | put_page(page); |
4888 | out: | 4826 | out: |
@@ -7797,33 +7735,6 @@ static struct extent_map *create_io_em(struct inode *inode, u64 start, u64 len, | |||
7797 | return em; | 7735 | return em; |
7798 | } | 7736 | } |
7799 | 7737 | ||
7800 | static void adjust_dio_outstanding_extents(struct inode *inode, | ||
7801 | struct btrfs_dio_data *dio_data, | ||
7802 | const u64 len) | ||
7803 | { | ||
7804 | unsigned num_extents = count_max_extents(len); | ||
7805 | |||
7806 | /* | ||
7807 | * If we have an outstanding_extents count still set then we're | ||
7808 | * within our reservation, otherwise we need to adjust our inode | ||
7809 | * counter appropriately. | ||
7810 | */ | ||
7811 | if (dio_data->outstanding_extents >= num_extents) { | ||
7812 | dio_data->outstanding_extents -= num_extents; | ||
7813 | } else { | ||
7814 | /* | ||
7815 | * If dio write length has been split due to no large enough | ||
7816 | * contiguous space, we need to compensate our inode counter | ||
7817 | * appropriately. | ||
7818 | */ | ||
7819 | u64 num_needed = num_extents - dio_data->outstanding_extents; | ||
7820 | |||
7821 | spin_lock(&BTRFS_I(inode)->lock); | ||
7822 | BTRFS_I(inode)->outstanding_extents += num_needed; | ||
7823 | spin_unlock(&BTRFS_I(inode)->lock); | ||
7824 | } | ||
7825 | } | ||
7826 | |||
7827 | static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | 7738 | static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, |
7828 | struct buffer_head *bh_result, int create) | 7739 | struct buffer_head *bh_result, int create) |
7829 | { | 7740 | { |
@@ -7985,7 +7896,6 @@ unlock: | |||
7985 | if (!dio_data->overwrite && start + len > i_size_read(inode)) | 7896 | if (!dio_data->overwrite && start + len > i_size_read(inode)) |
7986 | i_size_write(inode, start + len); | 7897 | i_size_write(inode, start + len); |
7987 | 7898 | ||
7988 | adjust_dio_outstanding_extents(inode, dio_data, len); | ||
7989 | WARN_ON(dio_data->reserve < len); | 7899 | WARN_ON(dio_data->reserve < len); |
7990 | dio_data->reserve -= len; | 7900 | dio_data->reserve -= len; |
7991 | dio_data->unsubmitted_oe_range_end = start + len; | 7901 | dio_data->unsubmitted_oe_range_end = start + len; |
@@ -8015,14 +7925,6 @@ unlock_err: | |||
8015 | err: | 7925 | err: |
8016 | if (dio_data) | 7926 | if (dio_data) |
8017 | current->journal_info = dio_data; | 7927 | current->journal_info = dio_data; |
8018 | /* | ||
8019 | * Compensate the delalloc release we do in btrfs_direct_IO() when we | ||
8020 | * write less data then expected, so that we don't underflow our inode's | ||
8021 | * outstanding extents counter. | ||
8022 | */ | ||
8023 | if (create && dio_data) | ||
8024 | adjust_dio_outstanding_extents(inode, dio_data, len); | ||
8025 | |||
8026 | return ret; | 7928 | return ret; |
8027 | } | 7929 | } |
8028 | 7930 | ||
@@ -8495,7 +8397,7 @@ static void btrfs_end_dio_bio(struct bio *bio) | |||
8495 | if (dip->errors) { | 8397 | if (dip->errors) { |
8496 | bio_io_error(dip->orig_bio); | 8398 | bio_io_error(dip->orig_bio); |
8497 | } else { | 8399 | } else { |
8498 | dip->dio_bio->bi_status = 0; | 8400 | dip->dio_bio->bi_status = BLK_STS_OK; |
8499 | bio_endio(dip->orig_bio); | 8401 | bio_endio(dip->orig_bio); |
8500 | } | 8402 | } |
8501 | out: | 8403 | out: |
@@ -8577,7 +8479,7 @@ __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, u64 file_offset, | |||
8577 | goto err; | 8479 | goto err; |
8578 | } | 8480 | } |
8579 | map: | 8481 | map: |
8580 | ret = btrfs_map_bio(fs_info, bio, 0, async_submit); | 8482 | ret = btrfs_map_bio(fs_info, bio, 0, 0); |
8581 | err: | 8483 | err: |
8582 | bio_put(bio); | 8484 | bio_put(bio); |
8583 | return ret; | 8485 | return ret; |
@@ -8786,7 +8688,6 @@ free_ordered: | |||
8786 | } | 8688 | } |
8787 | 8689 | ||
8788 | static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info, | 8690 | static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info, |
8789 | struct kiocb *iocb, | ||
8790 | const struct iov_iter *iter, loff_t offset) | 8691 | const struct iov_iter *iter, loff_t offset) |
8791 | { | 8692 | { |
8792 | int seg; | 8693 | int seg; |
@@ -8833,7 +8734,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) | |||
8833 | bool relock = false; | 8734 | bool relock = false; |
8834 | ssize_t ret; | 8735 | ssize_t ret; |
8835 | 8736 | ||
8836 | if (check_direct_IO(fs_info, iocb, iter, offset)) | 8737 | if (check_direct_IO(fs_info, iter, offset)) |
8837 | return 0; | 8738 | return 0; |
8838 | 8739 | ||
8839 | inode_dio_begin(inode); | 8740 | inode_dio_begin(inode); |
@@ -8868,7 +8769,6 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) | |||
8868 | offset, count); | 8769 | offset, count); |
8869 | if (ret) | 8770 | if (ret) |
8870 | goto out; | 8771 | goto out; |
8871 | dio_data.outstanding_extents = count_max_extents(count); | ||
8872 | 8772 | ||
8873 | /* | 8773 | /* |
8874 | * We need to know how many extents we reserved so that we can | 8774 | * We need to know how many extents we reserved so that we can |
@@ -8915,6 +8815,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) | |||
8915 | } else if (ret >= 0 && (size_t)ret < count) | 8815 | } else if (ret >= 0 && (size_t)ret < count) |
8916 | btrfs_delalloc_release_space(inode, data_reserved, | 8816 | btrfs_delalloc_release_space(inode, data_reserved, |
8917 | offset, count - (size_t)ret); | 8817 | offset, count - (size_t)ret); |
8818 | btrfs_delalloc_release_extents(BTRFS_I(inode), count); | ||
8918 | } | 8819 | } |
8919 | out: | 8820 | out: |
8920 | if (wakeup) | 8821 | if (wakeup) |
@@ -9232,9 +9133,6 @@ again: | |||
9232 | fs_info->sectorsize); | 9133 | fs_info->sectorsize); |
9233 | if (reserved_space < PAGE_SIZE) { | 9134 | if (reserved_space < PAGE_SIZE) { |
9234 | end = page_start + reserved_space - 1; | 9135 | end = page_start + reserved_space - 1; |
9235 | spin_lock(&BTRFS_I(inode)->lock); | ||
9236 | BTRFS_I(inode)->outstanding_extents++; | ||
9237 | spin_unlock(&BTRFS_I(inode)->lock); | ||
9238 | btrfs_delalloc_release_space(inode, data_reserved, | 9136 | btrfs_delalloc_release_space(inode, data_reserved, |
9239 | page_start, PAGE_SIZE - reserved_space); | 9137 | page_start, PAGE_SIZE - reserved_space); |
9240 | } | 9138 | } |
@@ -9286,12 +9184,14 @@ again: | |||
9286 | 9184 | ||
9287 | out_unlock: | 9185 | out_unlock: |
9288 | if (!ret) { | 9186 | if (!ret) { |
9187 | btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); | ||
9289 | sb_end_pagefault(inode->i_sb); | 9188 | sb_end_pagefault(inode->i_sb); |
9290 | extent_changeset_free(data_reserved); | 9189 | extent_changeset_free(data_reserved); |
9291 | return VM_FAULT_LOCKED; | 9190 | return VM_FAULT_LOCKED; |
9292 | } | 9191 | } |
9293 | unlock_page(page); | 9192 | unlock_page(page); |
9294 | out: | 9193 | out: |
9194 | btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); | ||
9295 | btrfs_delalloc_release_space(inode, data_reserved, page_start, | 9195 | btrfs_delalloc_release_space(inode, data_reserved, page_start, |
9296 | reserved_space); | 9196 | reserved_space); |
9297 | out_noreserve: | 9197 | out_noreserve: |
@@ -9387,12 +9287,12 @@ static int btrfs_truncate(struct inode *inode) | |||
9387 | ret = btrfs_truncate_inode_items(trans, root, inode, | 9287 | ret = btrfs_truncate_inode_items(trans, root, inode, |
9388 | inode->i_size, | 9288 | inode->i_size, |
9389 | BTRFS_EXTENT_DATA_KEY); | 9289 | BTRFS_EXTENT_DATA_KEY); |
9290 | trans->block_rsv = &fs_info->trans_block_rsv; | ||
9390 | if (ret != -ENOSPC && ret != -EAGAIN) { | 9291 | if (ret != -ENOSPC && ret != -EAGAIN) { |
9391 | err = ret; | 9292 | err = ret; |
9392 | break; | 9293 | break; |
9393 | } | 9294 | } |
9394 | 9295 | ||
9395 | trans->block_rsv = &fs_info->trans_block_rsv; | ||
9396 | ret = btrfs_update_inode(trans, root, inode); | 9296 | ret = btrfs_update_inode(trans, root, inode); |
9397 | if (ret) { | 9297 | if (ret) { |
9398 | err = ret; | 9298 | err = ret; |
@@ -9416,6 +9316,27 @@ static int btrfs_truncate(struct inode *inode) | |||
9416 | trans->block_rsv = rsv; | 9316 | trans->block_rsv = rsv; |
9417 | } | 9317 | } |
9418 | 9318 | ||
9319 | /* | ||
9320 | * We can't call btrfs_truncate_block inside a trans handle as we could | ||
9321 | * deadlock with freeze, if we got NEED_TRUNCATE_BLOCK then we know | ||
9322 | * we've truncated everything except the last little bit, and can do | ||
9323 | * btrfs_truncate_block and then update the disk_i_size. | ||
9324 | */ | ||
9325 | if (ret == NEED_TRUNCATE_BLOCK) { | ||
9326 | btrfs_end_transaction(trans); | ||
9327 | btrfs_btree_balance_dirty(fs_info); | ||
9328 | |||
9329 | ret = btrfs_truncate_block(inode, inode->i_size, 0, 0); | ||
9330 | if (ret) | ||
9331 | goto out; | ||
9332 | trans = btrfs_start_transaction(root, 1); | ||
9333 | if (IS_ERR(trans)) { | ||
9334 | ret = PTR_ERR(trans); | ||
9335 | goto out; | ||
9336 | } | ||
9337 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | ||
9338 | } | ||
9339 | |||
9419 | if (ret == 0 && inode->i_nlink > 0) { | 9340 | if (ret == 0 && inode->i_nlink > 0) { |
9420 | trans->block_rsv = root->orphan_block_rsv; | 9341 | trans->block_rsv = root->orphan_block_rsv; |
9421 | ret = btrfs_orphan_del(trans, BTRFS_I(inode)); | 9342 | ret = btrfs_orphan_del(trans, BTRFS_I(inode)); |
@@ -9480,6 +9401,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | |||
9480 | 9401 | ||
9481 | struct inode *btrfs_alloc_inode(struct super_block *sb) | 9402 | struct inode *btrfs_alloc_inode(struct super_block *sb) |
9482 | { | 9403 | { |
9404 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); | ||
9483 | struct btrfs_inode *ei; | 9405 | struct btrfs_inode *ei; |
9484 | struct inode *inode; | 9406 | struct inode *inode; |
9485 | 9407 | ||
@@ -9506,8 +9428,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
9506 | 9428 | ||
9507 | spin_lock_init(&ei->lock); | 9429 | spin_lock_init(&ei->lock); |
9508 | ei->outstanding_extents = 0; | 9430 | ei->outstanding_extents = 0; |
9509 | ei->reserved_extents = 0; | 9431 | if (sb->s_magic != BTRFS_TEST_MAGIC) |
9510 | 9432 | btrfs_init_metadata_block_rsv(fs_info, &ei->block_rsv, | |
9433 | BTRFS_BLOCK_RSV_DELALLOC); | ||
9511 | ei->runtime_flags = 0; | 9434 | ei->runtime_flags = 0; |
9512 | ei->prop_compress = BTRFS_COMPRESS_NONE; | 9435 | ei->prop_compress = BTRFS_COMPRESS_NONE; |
9513 | ei->defrag_compress = BTRFS_COMPRESS_NONE; | 9436 | ei->defrag_compress = BTRFS_COMPRESS_NONE; |
@@ -9557,8 +9480,9 @@ void btrfs_destroy_inode(struct inode *inode) | |||
9557 | 9480 | ||
9558 | WARN_ON(!hlist_empty(&inode->i_dentry)); | 9481 | WARN_ON(!hlist_empty(&inode->i_dentry)); |
9559 | WARN_ON(inode->i_data.nrpages); | 9482 | WARN_ON(inode->i_data.nrpages); |
9483 | WARN_ON(BTRFS_I(inode)->block_rsv.reserved); | ||
9484 | WARN_ON(BTRFS_I(inode)->block_rsv.size); | ||
9560 | WARN_ON(BTRFS_I(inode)->outstanding_extents); | 9485 | WARN_ON(BTRFS_I(inode)->outstanding_extents); |
9561 | WARN_ON(BTRFS_I(inode)->reserved_extents); | ||
9562 | WARN_ON(BTRFS_I(inode)->delalloc_bytes); | 9486 | WARN_ON(BTRFS_I(inode)->delalloc_bytes); |
9563 | WARN_ON(BTRFS_I(inode)->new_delalloc_bytes); | 9487 | WARN_ON(BTRFS_I(inode)->new_delalloc_bytes); |
9564 | WARN_ON(BTRFS_I(inode)->csum_bytes); | 9488 | WARN_ON(BTRFS_I(inode)->csum_bytes); |
@@ -10337,19 +10261,6 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
10337 | ret = __start_delalloc_inodes(root, delay_iput, -1); | 10261 | ret = __start_delalloc_inodes(root, delay_iput, -1); |
10338 | if (ret > 0) | 10262 | if (ret > 0) |
10339 | ret = 0; | 10263 | ret = 0; |
10340 | /* | ||
10341 | * the filemap_flush will queue IO into the worker threads, but | ||
10342 | * we have to make sure the IO is actually started and that | ||
10343 | * ordered extents get created before we return | ||
10344 | */ | ||
10345 | atomic_inc(&fs_info->async_submit_draining); | ||
10346 | while (atomic_read(&fs_info->nr_async_submits) || | ||
10347 | atomic_read(&fs_info->async_delalloc_pages)) { | ||
10348 | wait_event(fs_info->async_submit_wait, | ||
10349 | (atomic_read(&fs_info->nr_async_submits) == 0 && | ||
10350 | atomic_read(&fs_info->async_delalloc_pages) == 0)); | ||
10351 | } | ||
10352 | atomic_dec(&fs_info->async_submit_draining); | ||
10353 | return ret; | 10264 | return ret; |
10354 | } | 10265 | } |
10355 | 10266 | ||
@@ -10391,14 +10302,6 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput, | |||
10391 | spin_unlock(&fs_info->delalloc_root_lock); | 10302 | spin_unlock(&fs_info->delalloc_root_lock); |
10392 | 10303 | ||
10393 | ret = 0; | 10304 | ret = 0; |
10394 | atomic_inc(&fs_info->async_submit_draining); | ||
10395 | while (atomic_read(&fs_info->nr_async_submits) || | ||
10396 | atomic_read(&fs_info->async_delalloc_pages)) { | ||
10397 | wait_event(fs_info->async_submit_wait, | ||
10398 | (atomic_read(&fs_info->nr_async_submits) == 0 && | ||
10399 | atomic_read(&fs_info->async_delalloc_pages) == 0)); | ||
10400 | } | ||
10401 | atomic_dec(&fs_info->async_submit_draining); | ||
10402 | out: | 10305 | out: |
10403 | if (!list_empty_careful(&splice)) { | 10306 | if (!list_empty_careful(&splice)) { |
10404 | spin_lock(&fs_info->delalloc_root_lock); | 10307 | spin_lock(&fs_info->delalloc_root_lock); |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 6c7a49faf4e0..fd172a93d11a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -86,6 +86,19 @@ struct btrfs_ioctl_received_subvol_args_32 { | |||
86 | struct btrfs_ioctl_received_subvol_args_32) | 86 | struct btrfs_ioctl_received_subvol_args_32) |
87 | #endif | 87 | #endif |
88 | 88 | ||
89 | #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) | ||
90 | struct btrfs_ioctl_send_args_32 { | ||
91 | __s64 send_fd; /* in */ | ||
92 | __u64 clone_sources_count; /* in */ | ||
93 | compat_uptr_t clone_sources; /* in */ | ||
94 | __u64 parent_root; /* in */ | ||
95 | __u64 flags; /* in */ | ||
96 | __u64 reserved[4]; /* in */ | ||
97 | } __attribute__ ((__packed__)); | ||
98 | |||
99 | #define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \ | ||
100 | struct btrfs_ioctl_send_args_32) | ||
101 | #endif | ||
89 | 102 | ||
90 | static int btrfs_clone(struct inode *src, struct inode *inode, | 103 | static int btrfs_clone(struct inode *src, struct inode *inode, |
91 | u64 off, u64 olen, u64 olen_aligned, u64 destoff, | 104 | u64 off, u64 olen, u64 olen_aligned, u64 destoff, |
@@ -609,23 +622,6 @@ fail_free: | |||
609 | return ret; | 622 | return ret; |
610 | } | 623 | } |
611 | 624 | ||
612 | static void btrfs_wait_for_no_snapshotting_writes(struct btrfs_root *root) | ||
613 | { | ||
614 | s64 writers; | ||
615 | DEFINE_WAIT(wait); | ||
616 | |||
617 | do { | ||
618 | prepare_to_wait(&root->subv_writers->wait, &wait, | ||
619 | TASK_UNINTERRUPTIBLE); | ||
620 | |||
621 | writers = percpu_counter_sum(&root->subv_writers->counter); | ||
622 | if (writers) | ||
623 | schedule(); | ||
624 | |||
625 | finish_wait(&root->subv_writers->wait, &wait); | ||
626 | } while (writers); | ||
627 | } | ||
628 | |||
629 | static int create_snapshot(struct btrfs_root *root, struct inode *dir, | 625 | static int create_snapshot(struct btrfs_root *root, struct inode *dir, |
630 | struct dentry *dentry, | 626 | struct dentry *dentry, |
631 | u64 *async_transid, bool readonly, | 627 | u64 *async_transid, bool readonly, |
@@ -654,7 +650,9 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, | |||
654 | 650 | ||
655 | atomic_inc(&root->will_be_snapshotted); | 651 | atomic_inc(&root->will_be_snapshotted); |
656 | smp_mb__after_atomic(); | 652 | smp_mb__after_atomic(); |
657 | btrfs_wait_for_no_snapshotting_writes(root); | 653 | /* wait for no snapshot writes */ |
654 | wait_event(root->subv_writers->wait, | ||
655 | percpu_counter_sum(&root->subv_writers->counter) == 0); | ||
658 | 656 | ||
659 | ret = btrfs_start_delalloc_inodes(root, 0); | 657 | ret = btrfs_start_delalloc_inodes(root, 0); |
660 | if (ret) | 658 | if (ret) |
@@ -1219,6 +1217,7 @@ again: | |||
1219 | unlock_page(pages[i]); | 1217 | unlock_page(pages[i]); |
1220 | put_page(pages[i]); | 1218 | put_page(pages[i]); |
1221 | } | 1219 | } |
1220 | btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT); | ||
1222 | extent_changeset_free(data_reserved); | 1221 | extent_changeset_free(data_reserved); |
1223 | return i_done; | 1222 | return i_done; |
1224 | out: | 1223 | out: |
@@ -1229,6 +1228,7 @@ out: | |||
1229 | btrfs_delalloc_release_space(inode, data_reserved, | 1228 | btrfs_delalloc_release_space(inode, data_reserved, |
1230 | start_index << PAGE_SHIFT, | 1229 | start_index << PAGE_SHIFT, |
1231 | page_cnt << PAGE_SHIFT); | 1230 | page_cnt << PAGE_SHIFT); |
1231 | btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT); | ||
1232 | extent_changeset_free(data_reserved); | 1232 | extent_changeset_free(data_reserved); |
1233 | return ret; | 1233 | return ret; |
1234 | 1234 | ||
@@ -1420,21 +1420,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
1420 | filemap_flush(inode->i_mapping); | 1420 | filemap_flush(inode->i_mapping); |
1421 | } | 1421 | } |
1422 | 1422 | ||
1423 | if (do_compress) { | ||
1424 | /* the filemap_flush will queue IO into the worker threads, but | ||
1425 | * we have to make sure the IO is actually started and that | ||
1426 | * ordered extents get created before we return | ||
1427 | */ | ||
1428 | atomic_inc(&fs_info->async_submit_draining); | ||
1429 | while (atomic_read(&fs_info->nr_async_submits) || | ||
1430 | atomic_read(&fs_info->async_delalloc_pages)) { | ||
1431 | wait_event(fs_info->async_submit_wait, | ||
1432 | (atomic_read(&fs_info->nr_async_submits) == 0 && | ||
1433 | atomic_read(&fs_info->async_delalloc_pages) == 0)); | ||
1434 | } | ||
1435 | atomic_dec(&fs_info->async_submit_draining); | ||
1436 | } | ||
1437 | |||
1438 | if (range->compress_type == BTRFS_COMPRESS_LZO) { | 1423 | if (range->compress_type == BTRFS_COMPRESS_LZO) { |
1439 | btrfs_set_fs_incompat(fs_info, COMPRESS_LZO); | 1424 | btrfs_set_fs_incompat(fs_info, COMPRESS_LZO); |
1440 | } else if (range->compress_type == BTRFS_COMPRESS_ZSTD) { | 1425 | } else if (range->compress_type == BTRFS_COMPRESS_ZSTD) { |
@@ -1842,8 +1827,13 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file, | |||
1842 | 1827 | ||
1843 | ret = btrfs_update_root(trans, fs_info->tree_root, | 1828 | ret = btrfs_update_root(trans, fs_info->tree_root, |
1844 | &root->root_key, &root->root_item); | 1829 | &root->root_key, &root->root_item); |
1830 | if (ret < 0) { | ||
1831 | btrfs_end_transaction(trans); | ||
1832 | goto out_reset; | ||
1833 | } | ||
1834 | |||
1835 | ret = btrfs_commit_transaction(trans); | ||
1845 | 1836 | ||
1846 | btrfs_commit_transaction(trans); | ||
1847 | out_reset: | 1837 | out_reset: |
1848 | if (ret) | 1838 | if (ret) |
1849 | btrfs_set_root_flags(&root->root_item, root_flags); | 1839 | btrfs_set_root_flags(&root->root_item, root_flags); |
@@ -2179,7 +2169,7 @@ static noinline int btrfs_ioctl_tree_search_v2(struct file *file, | |||
2179 | 2169 | ||
2180 | inode = file_inode(file); | 2170 | inode = file_inode(file); |
2181 | ret = search_ioctl(inode, &args.key, &buf_size, | 2171 | ret = search_ioctl(inode, &args.key, &buf_size, |
2182 | (char *)(&uarg->buf[0])); | 2172 | (char __user *)(&uarg->buf[0])); |
2183 | if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key))) | 2173 | if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key))) |
2184 | ret = -EFAULT; | 2174 | ret = -EFAULT; |
2185 | else if (ret == -EOVERFLOW && | 2175 | else if (ret == -EOVERFLOW && |
@@ -3706,7 +3696,7 @@ process_slot: | |||
3706 | if (disko) { | 3696 | if (disko) { |
3707 | inode_add_bytes(inode, datal); | 3697 | inode_add_bytes(inode, datal); |
3708 | ret = btrfs_inc_extent_ref(trans, | 3698 | ret = btrfs_inc_extent_ref(trans, |
3709 | fs_info, | 3699 | root, |
3710 | disko, diskl, 0, | 3700 | disko, diskl, 0, |
3711 | root->root_key.objectid, | 3701 | root->root_key.objectid, |
3712 | btrfs_ino(BTRFS_I(inode)), | 3702 | btrfs_ino(BTRFS_I(inode)), |
@@ -4129,10 +4119,12 @@ static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info, | |||
4129 | struct btrfs_ioctl_space_info *dest_orig; | 4119 | struct btrfs_ioctl_space_info *dest_orig; |
4130 | struct btrfs_ioctl_space_info __user *user_dest; | 4120 | struct btrfs_ioctl_space_info __user *user_dest; |
4131 | struct btrfs_space_info *info; | 4121 | struct btrfs_space_info *info; |
4132 | u64 types[] = {BTRFS_BLOCK_GROUP_DATA, | 4122 | static const u64 types[] = { |
4133 | BTRFS_BLOCK_GROUP_SYSTEM, | 4123 | BTRFS_BLOCK_GROUP_DATA, |
4134 | BTRFS_BLOCK_GROUP_METADATA, | 4124 | BTRFS_BLOCK_GROUP_SYSTEM, |
4135 | BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA}; | 4125 | BTRFS_BLOCK_GROUP_METADATA, |
4126 | BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA | ||
4127 | }; | ||
4136 | int num_types = 4; | 4128 | int num_types = 4; |
4137 | int alloc_size; | 4129 | int alloc_size; |
4138 | int ret = 0; | 4130 | int ret = 0; |
@@ -4504,8 +4496,8 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) | |||
4504 | ipath->fspath->val[i] = rel_ptr; | 4496 | ipath->fspath->val[i] = rel_ptr; |
4505 | } | 4497 | } |
4506 | 4498 | ||
4507 | ret = copy_to_user((void *)(unsigned long)ipa->fspath, | 4499 | ret = copy_to_user((void __user *)(unsigned long)ipa->fspath, |
4508 | (void *)(unsigned long)ipath->fspath, size); | 4500 | ipath->fspath, size); |
4509 | if (ret) { | 4501 | if (ret) { |
4510 | ret = -EFAULT; | 4502 | ret = -EFAULT; |
4511 | goto out; | 4503 | goto out; |
@@ -4540,13 +4532,14 @@ static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx) | |||
4540 | } | 4532 | } |
4541 | 4533 | ||
4542 | static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, | 4534 | static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, |
4543 | void __user *arg) | 4535 | void __user *arg, int version) |
4544 | { | 4536 | { |
4545 | int ret = 0; | 4537 | int ret = 0; |
4546 | int size; | 4538 | int size; |
4547 | struct btrfs_ioctl_logical_ino_args *loi; | 4539 | struct btrfs_ioctl_logical_ino_args *loi; |
4548 | struct btrfs_data_container *inodes = NULL; | 4540 | struct btrfs_data_container *inodes = NULL; |
4549 | struct btrfs_path *path = NULL; | 4541 | struct btrfs_path *path = NULL; |
4542 | bool ignore_offset; | ||
4550 | 4543 | ||
4551 | if (!capable(CAP_SYS_ADMIN)) | 4544 | if (!capable(CAP_SYS_ADMIN)) |
4552 | return -EPERM; | 4545 | return -EPERM; |
@@ -4555,13 +4548,30 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, | |||
4555 | if (IS_ERR(loi)) | 4548 | if (IS_ERR(loi)) |
4556 | return PTR_ERR(loi); | 4549 | return PTR_ERR(loi); |
4557 | 4550 | ||
4551 | if (version == 1) { | ||
4552 | ignore_offset = false; | ||
4553 | size = min_t(u32, loi->size, SZ_64K); | ||
4554 | } else { | ||
4555 | /* All reserved bits must be 0 for now */ | ||
4556 | if (memchr_inv(loi->reserved, 0, sizeof(loi->reserved))) { | ||
4557 | ret = -EINVAL; | ||
4558 | goto out_loi; | ||
4559 | } | ||
4560 | /* Only accept flags we have defined so far */ | ||
4561 | if (loi->flags & ~(BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET)) { | ||
4562 | ret = -EINVAL; | ||
4563 | goto out_loi; | ||
4564 | } | ||
4565 | ignore_offset = loi->flags & BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET; | ||
4566 | size = min_t(u32, loi->size, SZ_16M); | ||
4567 | } | ||
4568 | |||
4558 | path = btrfs_alloc_path(); | 4569 | path = btrfs_alloc_path(); |
4559 | if (!path) { | 4570 | if (!path) { |
4560 | ret = -ENOMEM; | 4571 | ret = -ENOMEM; |
4561 | goto out; | 4572 | goto out; |
4562 | } | 4573 | } |
4563 | 4574 | ||
4564 | size = min_t(u32, loi->size, SZ_64K); | ||
4565 | inodes = init_data_container(size); | 4575 | inodes = init_data_container(size); |
4566 | if (IS_ERR(inodes)) { | 4576 | if (IS_ERR(inodes)) { |
4567 | ret = PTR_ERR(inodes); | 4577 | ret = PTR_ERR(inodes); |
@@ -4570,20 +4580,21 @@ static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, | |||
4570 | } | 4580 | } |
4571 | 4581 | ||
4572 | ret = iterate_inodes_from_logical(loi->logical, fs_info, path, | 4582 | ret = iterate_inodes_from_logical(loi->logical, fs_info, path, |
4573 | build_ino_list, inodes); | 4583 | build_ino_list, inodes, ignore_offset); |
4574 | if (ret == -EINVAL) | 4584 | if (ret == -EINVAL) |
4575 | ret = -ENOENT; | 4585 | ret = -ENOENT; |
4576 | if (ret < 0) | 4586 | if (ret < 0) |
4577 | goto out; | 4587 | goto out; |
4578 | 4588 | ||
4579 | ret = copy_to_user((void *)(unsigned long)loi->inodes, | 4589 | ret = copy_to_user((void __user *)(unsigned long)loi->inodes, inodes, |
4580 | (void *)(unsigned long)inodes, size); | 4590 | size); |
4581 | if (ret) | 4591 | if (ret) |
4582 | ret = -EFAULT; | 4592 | ret = -EFAULT; |
4583 | 4593 | ||
4584 | out: | 4594 | out: |
4585 | btrfs_free_path(path); | 4595 | btrfs_free_path(path); |
4586 | kvfree(inodes); | 4596 | kvfree(inodes); |
4597 | out_loi: | ||
4587 | kfree(loi); | 4598 | kfree(loi); |
4588 | 4599 | ||
4589 | return ret; | 4600 | return ret; |
@@ -5160,15 +5171,11 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file, | |||
5160 | root->root_key.objectid); | 5171 | root->root_key.objectid); |
5161 | if (ret < 0 && ret != -EEXIST) { | 5172 | if (ret < 0 && ret != -EEXIST) { |
5162 | btrfs_abort_transaction(trans, ret); | 5173 | btrfs_abort_transaction(trans, ret); |
5174 | btrfs_end_transaction(trans); | ||
5163 | goto out; | 5175 | goto out; |
5164 | } | 5176 | } |
5165 | } | 5177 | } |
5166 | ret = btrfs_commit_transaction(trans); | 5178 | ret = btrfs_commit_transaction(trans); |
5167 | if (ret < 0) { | ||
5168 | btrfs_abort_transaction(trans, ret); | ||
5169 | goto out; | ||
5170 | } | ||
5171 | |||
5172 | out: | 5179 | out: |
5173 | up_write(&fs_info->subvol_sem); | 5180 | up_write(&fs_info->subvol_sem); |
5174 | mnt_drop_write_file(file); | 5181 | mnt_drop_write_file(file); |
@@ -5490,6 +5497,41 @@ out_drop_write: | |||
5490 | return ret; | 5497 | return ret; |
5491 | } | 5498 | } |
5492 | 5499 | ||
5500 | static int _btrfs_ioctl_send(struct file *file, void __user *argp, bool compat) | ||
5501 | { | ||
5502 | struct btrfs_ioctl_send_args *arg; | ||
5503 | int ret; | ||
5504 | |||
5505 | if (compat) { | ||
5506 | #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) | ||
5507 | struct btrfs_ioctl_send_args_32 args32; | ||
5508 | |||
5509 | ret = copy_from_user(&args32, argp, sizeof(args32)); | ||
5510 | if (ret) | ||
5511 | return -EFAULT; | ||
5512 | arg = kzalloc(sizeof(*arg), GFP_KERNEL); | ||
5513 | if (!arg) | ||
5514 | return -ENOMEM; | ||
5515 | arg->send_fd = args32.send_fd; | ||
5516 | arg->clone_sources_count = args32.clone_sources_count; | ||
5517 | arg->clone_sources = compat_ptr(args32.clone_sources); | ||
5518 | arg->parent_root = args32.parent_root; | ||
5519 | arg->flags = args32.flags; | ||
5520 | memcpy(arg->reserved, args32.reserved, | ||
5521 | sizeof(args32.reserved)); | ||
5522 | #else | ||
5523 | return -ENOTTY; | ||
5524 | #endif | ||
5525 | } else { | ||
5526 | arg = memdup_user(argp, sizeof(*arg)); | ||
5527 | if (IS_ERR(arg)) | ||
5528 | return PTR_ERR(arg); | ||
5529 | } | ||
5530 | ret = btrfs_ioctl_send(file, arg); | ||
5531 | kfree(arg); | ||
5532 | return ret; | ||
5533 | } | ||
5534 | |||
5493 | long btrfs_ioctl(struct file *file, unsigned int | 5535 | long btrfs_ioctl(struct file *file, unsigned int |
5494 | cmd, unsigned long arg) | 5536 | cmd, unsigned long arg) |
5495 | { | 5537 | { |
@@ -5554,7 +5596,9 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
5554 | case BTRFS_IOC_INO_PATHS: | 5596 | case BTRFS_IOC_INO_PATHS: |
5555 | return btrfs_ioctl_ino_to_path(root, argp); | 5597 | return btrfs_ioctl_ino_to_path(root, argp); |
5556 | case BTRFS_IOC_LOGICAL_INO: | 5598 | case BTRFS_IOC_LOGICAL_INO: |
5557 | return btrfs_ioctl_logical_to_ino(fs_info, argp); | 5599 | return btrfs_ioctl_logical_to_ino(fs_info, argp, 1); |
5600 | case BTRFS_IOC_LOGICAL_INO_V2: | ||
5601 | return btrfs_ioctl_logical_to_ino(fs_info, argp, 2); | ||
5558 | case BTRFS_IOC_SPACE_INFO: | 5602 | case BTRFS_IOC_SPACE_INFO: |
5559 | return btrfs_ioctl_space_info(fs_info, argp); | 5603 | return btrfs_ioctl_space_info(fs_info, argp); |
5560 | case BTRFS_IOC_SYNC: { | 5604 | case BTRFS_IOC_SYNC: { |
@@ -5595,7 +5639,11 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
5595 | return btrfs_ioctl_set_received_subvol_32(file, argp); | 5639 | return btrfs_ioctl_set_received_subvol_32(file, argp); |
5596 | #endif | 5640 | #endif |
5597 | case BTRFS_IOC_SEND: | 5641 | case BTRFS_IOC_SEND: |
5598 | return btrfs_ioctl_send(file, argp); | 5642 | return _btrfs_ioctl_send(file, argp, false); |
5643 | #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) | ||
5644 | case BTRFS_IOC_SEND_32: | ||
5645 | return _btrfs_ioctl_send(file, argp, true); | ||
5646 | #endif | ||
5599 | case BTRFS_IOC_GET_DEV_STATS: | 5647 | case BTRFS_IOC_GET_DEV_STATS: |
5600 | return btrfs_ioctl_get_dev_stats(fs_info, argp); | 5648 | return btrfs_ioctl_get_dev_stats(fs_info, argp); |
5601 | case BTRFS_IOC_QUOTA_CTL: | 5649 | case BTRFS_IOC_QUOTA_CTL: |
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c index d433e75d489a..6c7f18cd3b61 100644 --- a/fs/btrfs/lzo.c +++ b/fs/btrfs/lzo.c | |||
@@ -430,10 +430,15 @@ out: | |||
430 | return ret; | 430 | return ret; |
431 | } | 431 | } |
432 | 432 | ||
433 | static void lzo_set_level(struct list_head *ws, unsigned int type) | ||
434 | { | ||
435 | } | ||
436 | |||
433 | const struct btrfs_compress_op btrfs_lzo_compress = { | 437 | const struct btrfs_compress_op btrfs_lzo_compress = { |
434 | .alloc_workspace = lzo_alloc_workspace, | 438 | .alloc_workspace = lzo_alloc_workspace, |
435 | .free_workspace = lzo_free_workspace, | 439 | .free_workspace = lzo_free_workspace, |
436 | .compress_pages = lzo_compress_pages, | 440 | .compress_pages = lzo_compress_pages, |
437 | .decompress_bio = lzo_decompress_bio, | 441 | .decompress_bio = lzo_decompress_bio, |
438 | .decompress = lzo_decompress, | 442 | .decompress = lzo_decompress, |
443 | .set_level = lzo_set_level, | ||
439 | }; | 444 | }; |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index a3aca495e33e..5b311aeddcc8 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -242,6 +242,15 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
242 | } | 242 | } |
243 | spin_unlock(&root->ordered_extent_lock); | 243 | spin_unlock(&root->ordered_extent_lock); |
244 | 244 | ||
245 | /* | ||
246 | * We don't need the count_max_extents here, we can assume that all of | ||
247 | * that work has been done at higher layers, so this is truly the | ||
248 | * smallest the extent is going to get. | ||
249 | */ | ||
250 | spin_lock(&BTRFS_I(inode)->lock); | ||
251 | btrfs_mod_outstanding_extents(BTRFS_I(inode), 1); | ||
252 | spin_unlock(&BTRFS_I(inode)->lock); | ||
253 | |||
245 | return 0; | 254 | return 0; |
246 | } | 255 | } |
247 | 256 | ||
@@ -591,11 +600,19 @@ void btrfs_remove_ordered_extent(struct inode *inode, | |||
591 | { | 600 | { |
592 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); | 601 | struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); |
593 | struct btrfs_ordered_inode_tree *tree; | 602 | struct btrfs_ordered_inode_tree *tree; |
594 | struct btrfs_root *root = BTRFS_I(inode)->root; | 603 | struct btrfs_inode *btrfs_inode = BTRFS_I(inode); |
604 | struct btrfs_root *root = btrfs_inode->root; | ||
595 | struct rb_node *node; | 605 | struct rb_node *node; |
596 | bool dec_pending_ordered = false; | 606 | bool dec_pending_ordered = false; |
597 | 607 | ||
598 | tree = &BTRFS_I(inode)->ordered_tree; | 608 | /* This is paired with btrfs_add_ordered_extent. */ |
609 | spin_lock(&btrfs_inode->lock); | ||
610 | btrfs_mod_outstanding_extents(btrfs_inode, -1); | ||
611 | spin_unlock(&btrfs_inode->lock); | ||
612 | if (root != fs_info->tree_root) | ||
613 | btrfs_delalloc_release_metadata(btrfs_inode, entry->len); | ||
614 | |||
615 | tree = &btrfs_inode->ordered_tree; | ||
599 | spin_lock_irq(&tree->lock); | 616 | spin_lock_irq(&tree->lock); |
600 | node = &entry->rb_node; | 617 | node = &entry->rb_node; |
601 | rb_erase(node, &tree->tree); | 618 | rb_erase(node, &tree->tree); |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index e172d4843eae..168fd03ca3ac 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
@@ -1441,7 +1441,7 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_fs_info *fs_info, | |||
1441 | u64 bytenr = qrecord->bytenr; | 1441 | u64 bytenr = qrecord->bytenr; |
1442 | int ret; | 1442 | int ret; |
1443 | 1443 | ||
1444 | ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root); | 1444 | ret = btrfs_find_all_roots(NULL, fs_info, bytenr, 0, &old_root, false); |
1445 | if (ret < 0) | 1445 | if (ret < 0) |
1446 | return ret; | 1446 | return ret; |
1447 | 1447 | ||
@@ -2031,7 +2031,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans, | |||
2031 | /* Search commit root to find old_roots */ | 2031 | /* Search commit root to find old_roots */ |
2032 | ret = btrfs_find_all_roots(NULL, fs_info, | 2032 | ret = btrfs_find_all_roots(NULL, fs_info, |
2033 | record->bytenr, 0, | 2033 | record->bytenr, 0, |
2034 | &record->old_roots); | 2034 | &record->old_roots, false); |
2035 | if (ret < 0) | 2035 | if (ret < 0) |
2036 | goto cleanup; | 2036 | goto cleanup; |
2037 | } | 2037 | } |
@@ -2042,7 +2042,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans, | |||
2042 | * root. It's safe inside commit_transaction(). | 2042 | * root. It's safe inside commit_transaction(). |
2043 | */ | 2043 | */ |
2044 | ret = btrfs_find_all_roots(trans, fs_info, | 2044 | ret = btrfs_find_all_roots(trans, fs_info, |
2045 | record->bytenr, SEQ_LAST, &new_roots); | 2045 | record->bytenr, SEQ_LAST, &new_roots, false); |
2046 | if (ret < 0) | 2046 | if (ret < 0) |
2047 | goto cleanup; | 2047 | goto cleanup; |
2048 | if (qgroup_to_skip) { | 2048 | if (qgroup_to_skip) { |
@@ -2570,7 +2570,7 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path, | |||
2570 | num_bytes = found.offset; | 2570 | num_bytes = found.offset; |
2571 | 2571 | ||
2572 | ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0, | 2572 | ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0, |
2573 | &roots); | 2573 | &roots, false); |
2574 | if (ret < 0) | 2574 | if (ret < 0) |
2575 | goto out; | 2575 | goto out; |
2576 | /* For rescan, just pass old_roots as NULL */ | 2576 | /* For rescan, just pass old_roots as NULL */ |
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 24a62224b24b..a7f79254ecca 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c | |||
@@ -1326,6 +1326,9 @@ write_data: | |||
1326 | 1326 | ||
1327 | cleanup: | 1327 | cleanup: |
1328 | rbio_orig_end_io(rbio, BLK_STS_IOERR); | 1328 | rbio_orig_end_io(rbio, BLK_STS_IOERR); |
1329 | |||
1330 | while ((bio = bio_list_pop(&bio_list))) | ||
1331 | bio_put(bio); | ||
1329 | } | 1332 | } |
1330 | 1333 | ||
1331 | /* | 1334 | /* |
@@ -1582,6 +1585,10 @@ static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) | |||
1582 | 1585 | ||
1583 | cleanup: | 1586 | cleanup: |
1584 | rbio_orig_end_io(rbio, BLK_STS_IOERR); | 1587 | rbio_orig_end_io(rbio, BLK_STS_IOERR); |
1588 | |||
1589 | while ((bio = bio_list_pop(&bio_list))) | ||
1590 | bio_put(bio); | ||
1591 | |||
1585 | return -EIO; | 1592 | return -EIO; |
1586 | 1593 | ||
1587 | finish: | 1594 | finish: |
@@ -2107,6 +2114,10 @@ cleanup: | |||
2107 | if (rbio->operation == BTRFS_RBIO_READ_REBUILD || | 2114 | if (rbio->operation == BTRFS_RBIO_READ_REBUILD || |
2108 | rbio->operation == BTRFS_RBIO_REBUILD_MISSING) | 2115 | rbio->operation == BTRFS_RBIO_REBUILD_MISSING) |
2109 | rbio_orig_end_io(rbio, BLK_STS_IOERR); | 2116 | rbio_orig_end_io(rbio, BLK_STS_IOERR); |
2117 | |||
2118 | while ((bio = bio_list_pop(&bio_list))) | ||
2119 | bio_put(bio); | ||
2120 | |||
2110 | return -EIO; | 2121 | return -EIO; |
2111 | } | 2122 | } |
2112 | 2123 | ||
@@ -2231,12 +2242,18 @@ raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, | |||
2231 | ASSERT(!bio->bi_iter.bi_size); | 2242 | ASSERT(!bio->bi_iter.bi_size); |
2232 | rbio->operation = BTRFS_RBIO_PARITY_SCRUB; | 2243 | rbio->operation = BTRFS_RBIO_PARITY_SCRUB; |
2233 | 2244 | ||
2234 | for (i = 0; i < rbio->real_stripes; i++) { | 2245 | /* |
2246 | * After mapping bbio with BTRFS_MAP_WRITE, parities have been sorted | ||
2247 | * to the end position, so this search can start from the first parity | ||
2248 | * stripe. | ||
2249 | */ | ||
2250 | for (i = rbio->nr_data; i < rbio->real_stripes; i++) { | ||
2235 | if (bbio->stripes[i].dev == scrub_dev) { | 2251 | if (bbio->stripes[i].dev == scrub_dev) { |
2236 | rbio->scrubp = i; | 2252 | rbio->scrubp = i; |
2237 | break; | 2253 | break; |
2238 | } | 2254 | } |
2239 | } | 2255 | } |
2256 | ASSERT(i < rbio->real_stripes); | ||
2240 | 2257 | ||
2241 | /* Now we just support the sectorsize equals to page size */ | 2258 | /* Now we just support the sectorsize equals to page size */ |
2242 | ASSERT(fs_info->sectorsize == PAGE_SIZE); | 2259 | ASSERT(fs_info->sectorsize == PAGE_SIZE); |
@@ -2454,6 +2471,9 @@ submit_write: | |||
2454 | 2471 | ||
2455 | cleanup: | 2472 | cleanup: |
2456 | rbio_orig_end_io(rbio, BLK_STS_IOERR); | 2473 | rbio_orig_end_io(rbio, BLK_STS_IOERR); |
2474 | |||
2475 | while ((bio = bio_list_pop(&bio_list))) | ||
2476 | bio_put(bio); | ||
2457 | } | 2477 | } |
2458 | 2478 | ||
2459 | static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe) | 2479 | static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe) |
@@ -2563,12 +2583,12 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio) | |||
2563 | int stripe; | 2583 | int stripe; |
2564 | struct bio *bio; | 2584 | struct bio *bio; |
2565 | 2585 | ||
2586 | bio_list_init(&bio_list); | ||
2587 | |||
2566 | ret = alloc_rbio_essential_pages(rbio); | 2588 | ret = alloc_rbio_essential_pages(rbio); |
2567 | if (ret) | 2589 | if (ret) |
2568 | goto cleanup; | 2590 | goto cleanup; |
2569 | 2591 | ||
2570 | bio_list_init(&bio_list); | ||
2571 | |||
2572 | atomic_set(&rbio->error, 0); | 2592 | atomic_set(&rbio->error, 0); |
2573 | /* | 2593 | /* |
2574 | * build a list of bios to read all the missing parts of this | 2594 | * build a list of bios to read all the missing parts of this |
@@ -2636,6 +2656,10 @@ static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio) | |||
2636 | 2656 | ||
2637 | cleanup: | 2657 | cleanup: |
2638 | rbio_orig_end_io(rbio, BLK_STS_IOERR); | 2658 | rbio_orig_end_io(rbio, BLK_STS_IOERR); |
2659 | |||
2660 | while ((bio = bio_list_pop(&bio_list))) | ||
2661 | bio_put(bio); | ||
2662 | |||
2639 | return; | 2663 | return; |
2640 | 2664 | ||
2641 | finish: | 2665 | finish: |
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c new file mode 100644 index 000000000000..34878699d363 --- /dev/null +++ b/fs/btrfs/ref-verify.c | |||
@@ -0,0 +1,1031 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2014 Facebook. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/sched.h> | ||
20 | #include <linux/stacktrace.h> | ||
21 | #include "ctree.h" | ||
22 | #include "disk-io.h" | ||
23 | #include "locking.h" | ||
24 | #include "delayed-ref.h" | ||
25 | #include "ref-verify.h" | ||
26 | |||
27 | /* | ||
28 | * Used to keep track the roots and number of refs each root has for a given | ||
29 | * bytenr. This just tracks the number of direct references, no shared | ||
30 | * references. | ||
31 | */ | ||
32 | struct root_entry { | ||
33 | u64 root_objectid; | ||
34 | u64 num_refs; | ||
35 | struct rb_node node; | ||
36 | }; | ||
37 | |||
38 | /* | ||
39 | * These are meant to represent what should exist in the extent tree, these can | ||
40 | * be used to verify the extent tree is consistent as these should all match | ||
41 | * what the extent tree says. | ||
42 | */ | ||
43 | struct ref_entry { | ||
44 | u64 root_objectid; | ||
45 | u64 parent; | ||
46 | u64 owner; | ||
47 | u64 offset; | ||
48 | u64 num_refs; | ||
49 | struct rb_node node; | ||
50 | }; | ||
51 | |||
52 | #define MAX_TRACE 16 | ||
53 | |||
54 | /* | ||
55 | * Whenever we add/remove a reference we record the action. The action maps | ||
56 | * back to the delayed ref action. We hold the ref we are changing in the | ||
57 | * action so we can account for the history properly, and we record the root we | ||
58 | * were called with since it could be different from ref_root. We also store | ||
59 | * stack traces because thats how I roll. | ||
60 | */ | ||
61 | struct ref_action { | ||
62 | int action; | ||
63 | u64 root; | ||
64 | struct ref_entry ref; | ||
65 | struct list_head list; | ||
66 | unsigned long trace[MAX_TRACE]; | ||
67 | unsigned int trace_len; | ||
68 | }; | ||
69 | |||
70 | /* | ||
71 | * One of these for every block we reference, it holds the roots and references | ||
72 | * to it as well as all of the ref actions that have occured to it. We never | ||
73 | * free it until we unmount the file system in order to make sure re-allocations | ||
74 | * are happening properly. | ||
75 | */ | ||
76 | struct block_entry { | ||
77 | u64 bytenr; | ||
78 | u64 len; | ||
79 | u64 num_refs; | ||
80 | int metadata; | ||
81 | int from_disk; | ||
82 | struct rb_root roots; | ||
83 | struct rb_root refs; | ||
84 | struct rb_node node; | ||
85 | struct list_head actions; | ||
86 | }; | ||
87 | |||
88 | static struct block_entry *insert_block_entry(struct rb_root *root, | ||
89 | struct block_entry *be) | ||
90 | { | ||
91 | struct rb_node **p = &root->rb_node; | ||
92 | struct rb_node *parent_node = NULL; | ||
93 | struct block_entry *entry; | ||
94 | |||
95 | while (*p) { | ||
96 | parent_node = *p; | ||
97 | entry = rb_entry(parent_node, struct block_entry, node); | ||
98 | if (entry->bytenr > be->bytenr) | ||
99 | p = &(*p)->rb_left; | ||
100 | else if (entry->bytenr < be->bytenr) | ||
101 | p = &(*p)->rb_right; | ||
102 | else | ||
103 | return entry; | ||
104 | } | ||
105 | |||
106 | rb_link_node(&be->node, parent_node, p); | ||
107 | rb_insert_color(&be->node, root); | ||
108 | return NULL; | ||
109 | } | ||
110 | |||
111 | static struct block_entry *lookup_block_entry(struct rb_root *root, u64 bytenr) | ||
112 | { | ||
113 | struct rb_node *n; | ||
114 | struct block_entry *entry = NULL; | ||
115 | |||
116 | n = root->rb_node; | ||
117 | while (n) { | ||
118 | entry = rb_entry(n, struct block_entry, node); | ||
119 | if (entry->bytenr < bytenr) | ||
120 | n = n->rb_right; | ||
121 | else if (entry->bytenr > bytenr) | ||
122 | n = n->rb_left; | ||
123 | else | ||
124 | return entry; | ||
125 | } | ||
126 | return NULL; | ||
127 | } | ||
128 | |||
129 | static struct root_entry *insert_root_entry(struct rb_root *root, | ||
130 | struct root_entry *re) | ||
131 | { | ||
132 | struct rb_node **p = &root->rb_node; | ||
133 | struct rb_node *parent_node = NULL; | ||
134 | struct root_entry *entry; | ||
135 | |||
136 | while (*p) { | ||
137 | parent_node = *p; | ||
138 | entry = rb_entry(parent_node, struct root_entry, node); | ||
139 | if (entry->root_objectid > re->root_objectid) | ||
140 | p = &(*p)->rb_left; | ||
141 | else if (entry->root_objectid < re->root_objectid) | ||
142 | p = &(*p)->rb_right; | ||
143 | else | ||
144 | return entry; | ||
145 | } | ||
146 | |||
147 | rb_link_node(&re->node, parent_node, p); | ||
148 | rb_insert_color(&re->node, root); | ||
149 | return NULL; | ||
150 | |||
151 | } | ||
152 | |||
153 | static int comp_refs(struct ref_entry *ref1, struct ref_entry *ref2) | ||
154 | { | ||
155 | if (ref1->root_objectid < ref2->root_objectid) | ||
156 | return -1; | ||
157 | if (ref1->root_objectid > ref2->root_objectid) | ||
158 | return 1; | ||
159 | if (ref1->parent < ref2->parent) | ||
160 | return -1; | ||
161 | if (ref1->parent > ref2->parent) | ||
162 | return 1; | ||
163 | if (ref1->owner < ref2->owner) | ||
164 | return -1; | ||
165 | if (ref1->owner > ref2->owner) | ||
166 | return 1; | ||
167 | if (ref1->offset < ref2->offset) | ||
168 | return -1; | ||
169 | if (ref1->offset > ref2->offset) | ||
170 | return 1; | ||
171 | return 0; | ||
172 | } | ||
173 | |||
174 | static struct ref_entry *insert_ref_entry(struct rb_root *root, | ||
175 | struct ref_entry *ref) | ||
176 | { | ||
177 | struct rb_node **p = &root->rb_node; | ||
178 | struct rb_node *parent_node = NULL; | ||
179 | struct ref_entry *entry; | ||
180 | int cmp; | ||
181 | |||
182 | while (*p) { | ||
183 | parent_node = *p; | ||
184 | entry = rb_entry(parent_node, struct ref_entry, node); | ||
185 | cmp = comp_refs(entry, ref); | ||
186 | if (cmp > 0) | ||
187 | p = &(*p)->rb_left; | ||
188 | else if (cmp < 0) | ||
189 | p = &(*p)->rb_right; | ||
190 | else | ||
191 | return entry; | ||
192 | } | ||
193 | |||
194 | rb_link_node(&ref->node, parent_node, p); | ||
195 | rb_insert_color(&ref->node, root); | ||
196 | return NULL; | ||
197 | |||
198 | } | ||
199 | |||
200 | static struct root_entry *lookup_root_entry(struct rb_root *root, u64 objectid) | ||
201 | { | ||
202 | struct rb_node *n; | ||
203 | struct root_entry *entry = NULL; | ||
204 | |||
205 | n = root->rb_node; | ||
206 | while (n) { | ||
207 | entry = rb_entry(n, struct root_entry, node); | ||
208 | if (entry->root_objectid < objectid) | ||
209 | n = n->rb_right; | ||
210 | else if (entry->root_objectid > objectid) | ||
211 | n = n->rb_left; | ||
212 | else | ||
213 | return entry; | ||
214 | } | ||
215 | return NULL; | ||
216 | } | ||
217 | |||
218 | #ifdef CONFIG_STACKTRACE | ||
219 | static void __save_stack_trace(struct ref_action *ra) | ||
220 | { | ||
221 | struct stack_trace stack_trace; | ||
222 | |||
223 | stack_trace.max_entries = MAX_TRACE; | ||
224 | stack_trace.nr_entries = 0; | ||
225 | stack_trace.entries = ra->trace; | ||
226 | stack_trace.skip = 2; | ||
227 | save_stack_trace(&stack_trace); | ||
228 | ra->trace_len = stack_trace.nr_entries; | ||
229 | } | ||
230 | |||
231 | static void __print_stack_trace(struct btrfs_fs_info *fs_info, | ||
232 | struct ref_action *ra) | ||
233 | { | ||
234 | struct stack_trace trace; | ||
235 | |||
236 | if (ra->trace_len == 0) { | ||
237 | btrfs_err(fs_info, " ref-verify: no stacktrace"); | ||
238 | return; | ||
239 | } | ||
240 | trace.nr_entries = ra->trace_len; | ||
241 | trace.entries = ra->trace; | ||
242 | print_stack_trace(&trace, 2); | ||
243 | } | ||
244 | #else | ||
245 | static void inline __save_stack_trace(struct ref_action *ra) | ||
246 | { | ||
247 | } | ||
248 | |||
249 | static void inline __print_stack_trace(struct btrfs_fs_info *fs_info, | ||
250 | struct ref_action *ra) | ||
251 | { | ||
252 | btrfs_err(fs_info, " ref-verify: no stacktrace support"); | ||
253 | } | ||
254 | #endif | ||
255 | |||
256 | static void free_block_entry(struct block_entry *be) | ||
257 | { | ||
258 | struct root_entry *re; | ||
259 | struct ref_entry *ref; | ||
260 | struct ref_action *ra; | ||
261 | struct rb_node *n; | ||
262 | |||
263 | while ((n = rb_first(&be->roots))) { | ||
264 | re = rb_entry(n, struct root_entry, node); | ||
265 | rb_erase(&re->node, &be->roots); | ||
266 | kfree(re); | ||
267 | } | ||
268 | |||
269 | while((n = rb_first(&be->refs))) { | ||
270 | ref = rb_entry(n, struct ref_entry, node); | ||
271 | rb_erase(&ref->node, &be->refs); | ||
272 | kfree(ref); | ||
273 | } | ||
274 | |||
275 | while (!list_empty(&be->actions)) { | ||
276 | ra = list_first_entry(&be->actions, struct ref_action, | ||
277 | list); | ||
278 | list_del(&ra->list); | ||
279 | kfree(ra); | ||
280 | } | ||
281 | kfree(be); | ||
282 | } | ||
283 | |||
284 | static struct block_entry *add_block_entry(struct btrfs_fs_info *fs_info, | ||
285 | u64 bytenr, u64 len, | ||
286 | u64 root_objectid) | ||
287 | { | ||
288 | struct block_entry *be = NULL, *exist; | ||
289 | struct root_entry *re = NULL; | ||
290 | |||
291 | re = kzalloc(sizeof(struct root_entry), GFP_KERNEL); | ||
292 | be = kzalloc(sizeof(struct block_entry), GFP_KERNEL); | ||
293 | if (!be || !re) { | ||
294 | kfree(re); | ||
295 | kfree(be); | ||
296 | return ERR_PTR(-ENOMEM); | ||
297 | } | ||
298 | be->bytenr = bytenr; | ||
299 | be->len = len; | ||
300 | |||
301 | re->root_objectid = root_objectid; | ||
302 | re->num_refs = 0; | ||
303 | |||
304 | spin_lock(&fs_info->ref_verify_lock); | ||
305 | exist = insert_block_entry(&fs_info->block_tree, be); | ||
306 | if (exist) { | ||
307 | if (root_objectid) { | ||
308 | struct root_entry *exist_re; | ||
309 | |||
310 | exist_re = insert_root_entry(&exist->roots, re); | ||
311 | if (exist_re) | ||
312 | kfree(re); | ||
313 | } | ||
314 | kfree(be); | ||
315 | return exist; | ||
316 | } | ||
317 | |||
318 | be->num_refs = 0; | ||
319 | be->metadata = 0; | ||
320 | be->from_disk = 0; | ||
321 | be->roots = RB_ROOT; | ||
322 | be->refs = RB_ROOT; | ||
323 | INIT_LIST_HEAD(&be->actions); | ||
324 | if (root_objectid) | ||
325 | insert_root_entry(&be->roots, re); | ||
326 | else | ||
327 | kfree(re); | ||
328 | return be; | ||
329 | } | ||
330 | |||
331 | static int add_tree_block(struct btrfs_fs_info *fs_info, u64 ref_root, | ||
332 | u64 parent, u64 bytenr, int level) | ||
333 | { | ||
334 | struct block_entry *be; | ||
335 | struct root_entry *re; | ||
336 | struct ref_entry *ref = NULL, *exist; | ||
337 | |||
338 | ref = kmalloc(sizeof(struct ref_entry), GFP_KERNEL); | ||
339 | if (!ref) | ||
340 | return -ENOMEM; | ||
341 | |||
342 | if (parent) | ||
343 | ref->root_objectid = 0; | ||
344 | else | ||
345 | ref->root_objectid = ref_root; | ||
346 | ref->parent = parent; | ||
347 | ref->owner = level; | ||
348 | ref->offset = 0; | ||
349 | ref->num_refs = 1; | ||
350 | |||
351 | be = add_block_entry(fs_info, bytenr, fs_info->nodesize, ref_root); | ||
352 | if (IS_ERR(be)) { | ||
353 | kfree(ref); | ||
354 | return PTR_ERR(be); | ||
355 | } | ||
356 | be->num_refs++; | ||
357 | be->from_disk = 1; | ||
358 | be->metadata = 1; | ||
359 | |||
360 | if (!parent) { | ||
361 | ASSERT(ref_root); | ||
362 | re = lookup_root_entry(&be->roots, ref_root); | ||
363 | ASSERT(re); | ||
364 | re->num_refs++; | ||
365 | } | ||
366 | exist = insert_ref_entry(&be->refs, ref); | ||
367 | if (exist) { | ||
368 | exist->num_refs++; | ||
369 | kfree(ref); | ||
370 | } | ||
371 | spin_unlock(&fs_info->ref_verify_lock); | ||
372 | |||
373 | return 0; | ||
374 | } | ||
375 | |||
376 | static int add_shared_data_ref(struct btrfs_fs_info *fs_info, | ||
377 | u64 parent, u32 num_refs, u64 bytenr, | ||
378 | u64 num_bytes) | ||
379 | { | ||
380 | struct block_entry *be; | ||
381 | struct ref_entry *ref; | ||
382 | |||
383 | ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL); | ||
384 | if (!ref) | ||
385 | return -ENOMEM; | ||
386 | be = add_block_entry(fs_info, bytenr, num_bytes, 0); | ||
387 | if (IS_ERR(be)) { | ||
388 | kfree(ref); | ||
389 | return PTR_ERR(be); | ||
390 | } | ||
391 | be->num_refs += num_refs; | ||
392 | |||
393 | ref->parent = parent; | ||
394 | ref->num_refs = num_refs; | ||
395 | if (insert_ref_entry(&be->refs, ref)) { | ||
396 | spin_unlock(&fs_info->ref_verify_lock); | ||
397 | btrfs_err(fs_info, "existing shared ref when reading from disk?"); | ||
398 | kfree(ref); | ||
399 | return -EINVAL; | ||
400 | } | ||
401 | spin_unlock(&fs_info->ref_verify_lock); | ||
402 | return 0; | ||
403 | } | ||
404 | |||
405 | static int add_extent_data_ref(struct btrfs_fs_info *fs_info, | ||
406 | struct extent_buffer *leaf, | ||
407 | struct btrfs_extent_data_ref *dref, | ||
408 | u64 bytenr, u64 num_bytes) | ||
409 | { | ||
410 | struct block_entry *be; | ||
411 | struct ref_entry *ref; | ||
412 | struct root_entry *re; | ||
413 | u64 ref_root = btrfs_extent_data_ref_root(leaf, dref); | ||
414 | u64 owner = btrfs_extent_data_ref_objectid(leaf, dref); | ||
415 | u64 offset = btrfs_extent_data_ref_offset(leaf, dref); | ||
416 | u32 num_refs = btrfs_extent_data_ref_count(leaf, dref); | ||
417 | |||
418 | ref = kzalloc(sizeof(struct ref_entry), GFP_KERNEL); | ||
419 | if (!ref) | ||
420 | return -ENOMEM; | ||
421 | be = add_block_entry(fs_info, bytenr, num_bytes, ref_root); | ||
422 | if (IS_ERR(be)) { | ||
423 | kfree(ref); | ||
424 | return PTR_ERR(be); | ||
425 | } | ||
426 | be->num_refs += num_refs; | ||
427 | |||
428 | ref->parent = 0; | ||
429 | ref->owner = owner; | ||
430 | ref->root_objectid = ref_root; | ||
431 | ref->offset = offset; | ||
432 | ref->num_refs = num_refs; | ||
433 | if (insert_ref_entry(&be->refs, ref)) { | ||
434 | spin_unlock(&fs_info->ref_verify_lock); | ||
435 | btrfs_err(fs_info, "existing ref when reading from disk?"); | ||
436 | kfree(ref); | ||
437 | return -EINVAL; | ||
438 | } | ||
439 | |||
440 | re = lookup_root_entry(&be->roots, ref_root); | ||
441 | if (!re) { | ||
442 | spin_unlock(&fs_info->ref_verify_lock); | ||
443 | btrfs_err(fs_info, "missing root in new block entry?"); | ||
444 | return -EINVAL; | ||
445 | } | ||
446 | re->num_refs += num_refs; | ||
447 | spin_unlock(&fs_info->ref_verify_lock); | ||
448 | return 0; | ||
449 | } | ||
450 | |||
451 | static int process_extent_item(struct btrfs_fs_info *fs_info, | ||
452 | struct btrfs_path *path, struct btrfs_key *key, | ||
453 | int slot, int *tree_block_level) | ||
454 | { | ||
455 | struct btrfs_extent_item *ei; | ||
456 | struct btrfs_extent_inline_ref *iref; | ||
457 | struct btrfs_extent_data_ref *dref; | ||
458 | struct btrfs_shared_data_ref *sref; | ||
459 | struct extent_buffer *leaf = path->nodes[0]; | ||
460 | u32 item_size = btrfs_item_size_nr(leaf, slot); | ||
461 | unsigned long end, ptr; | ||
462 | u64 offset, flags, count; | ||
463 | int type, ret; | ||
464 | |||
465 | ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); | ||
466 | flags = btrfs_extent_flags(leaf, ei); | ||
467 | |||
468 | if ((key->type == BTRFS_EXTENT_ITEM_KEY) && | ||
469 | flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | ||
470 | struct btrfs_tree_block_info *info; | ||
471 | |||
472 | info = (struct btrfs_tree_block_info *)(ei + 1); | ||
473 | *tree_block_level = btrfs_tree_block_level(leaf, info); | ||
474 | iref = (struct btrfs_extent_inline_ref *)(info + 1); | ||
475 | } else { | ||
476 | if (key->type == BTRFS_METADATA_ITEM_KEY) | ||
477 | *tree_block_level = key->offset; | ||
478 | iref = (struct btrfs_extent_inline_ref *)(ei + 1); | ||
479 | } | ||
480 | |||
481 | ptr = (unsigned long)iref; | ||
482 | end = (unsigned long)ei + item_size; | ||
483 | while (ptr < end) { | ||
484 | iref = (struct btrfs_extent_inline_ref *)ptr; | ||
485 | type = btrfs_extent_inline_ref_type(leaf, iref); | ||
486 | offset = btrfs_extent_inline_ref_offset(leaf, iref); | ||
487 | switch (type) { | ||
488 | case BTRFS_TREE_BLOCK_REF_KEY: | ||
489 | ret = add_tree_block(fs_info, offset, 0, key->objectid, | ||
490 | *tree_block_level); | ||
491 | break; | ||
492 | case BTRFS_SHARED_BLOCK_REF_KEY: | ||
493 | ret = add_tree_block(fs_info, 0, offset, key->objectid, | ||
494 | *tree_block_level); | ||
495 | break; | ||
496 | case BTRFS_EXTENT_DATA_REF_KEY: | ||
497 | dref = (struct btrfs_extent_data_ref *)(&iref->offset); | ||
498 | ret = add_extent_data_ref(fs_info, leaf, dref, | ||
499 | key->objectid, key->offset); | ||
500 | break; | ||
501 | case BTRFS_SHARED_DATA_REF_KEY: | ||
502 | sref = (struct btrfs_shared_data_ref *)(iref + 1); | ||
503 | count = btrfs_shared_data_ref_count(leaf, sref); | ||
504 | ret = add_shared_data_ref(fs_info, offset, count, | ||
505 | key->objectid, key->offset); | ||
506 | break; | ||
507 | default: | ||
508 | btrfs_err(fs_info, "invalid key type in iref"); | ||
509 | ret = -EINVAL; | ||
510 | break; | ||
511 | } | ||
512 | if (ret) | ||
513 | break; | ||
514 | ptr += btrfs_extent_inline_ref_size(type); | ||
515 | } | ||
516 | return ret; | ||
517 | } | ||
518 | |||
519 | static int process_leaf(struct btrfs_root *root, | ||
520 | struct btrfs_path *path, u64 *bytenr, u64 *num_bytes) | ||
521 | { | ||
522 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
523 | struct extent_buffer *leaf = path->nodes[0]; | ||
524 | struct btrfs_extent_data_ref *dref; | ||
525 | struct btrfs_shared_data_ref *sref; | ||
526 | u32 count; | ||
527 | int i = 0, tree_block_level = 0, ret; | ||
528 | struct btrfs_key key; | ||
529 | int nritems = btrfs_header_nritems(leaf); | ||
530 | |||
531 | for (i = 0; i < nritems; i++) { | ||
532 | btrfs_item_key_to_cpu(leaf, &key, i); | ||
533 | switch (key.type) { | ||
534 | case BTRFS_EXTENT_ITEM_KEY: | ||
535 | *num_bytes = key.offset; | ||
536 | case BTRFS_METADATA_ITEM_KEY: | ||
537 | *bytenr = key.objectid; | ||
538 | ret = process_extent_item(fs_info, path, &key, i, | ||
539 | &tree_block_level); | ||
540 | break; | ||
541 | case BTRFS_TREE_BLOCK_REF_KEY: | ||
542 | ret = add_tree_block(fs_info, key.offset, 0, | ||
543 | key.objectid, tree_block_level); | ||
544 | break; | ||
545 | case BTRFS_SHARED_BLOCK_REF_KEY: | ||
546 | ret = add_tree_block(fs_info, 0, key.offset, | ||
547 | key.objectid, tree_block_level); | ||
548 | break; | ||
549 | case BTRFS_EXTENT_DATA_REF_KEY: | ||
550 | dref = btrfs_item_ptr(leaf, i, | ||
551 | struct btrfs_extent_data_ref); | ||
552 | ret = add_extent_data_ref(fs_info, leaf, dref, *bytenr, | ||
553 | *num_bytes); | ||
554 | break; | ||
555 | case BTRFS_SHARED_DATA_REF_KEY: | ||
556 | sref = btrfs_item_ptr(leaf, i, | ||
557 | struct btrfs_shared_data_ref); | ||
558 | count = btrfs_shared_data_ref_count(leaf, sref); | ||
559 | ret = add_shared_data_ref(fs_info, key.offset, count, | ||
560 | *bytenr, *num_bytes); | ||
561 | break; | ||
562 | default: | ||
563 | break; | ||
564 | } | ||
565 | if (ret) | ||
566 | break; | ||
567 | } | ||
568 | return ret; | ||
569 | } | ||
570 | |||
571 | /* Walk down to the leaf from the given level */ | ||
572 | static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path, | ||
573 | int level, u64 *bytenr, u64 *num_bytes) | ||
574 | { | ||
575 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
576 | struct extent_buffer *eb; | ||
577 | u64 block_bytenr, gen; | ||
578 | int ret = 0; | ||
579 | |||
580 | while (level >= 0) { | ||
581 | if (level) { | ||
582 | block_bytenr = btrfs_node_blockptr(path->nodes[level], | ||
583 | path->slots[level]); | ||
584 | gen = btrfs_node_ptr_generation(path->nodes[level], | ||
585 | path->slots[level]); | ||
586 | eb = read_tree_block(fs_info, block_bytenr, gen); | ||
587 | if (IS_ERR(eb)) | ||
588 | return PTR_ERR(eb); | ||
589 | if (!extent_buffer_uptodate(eb)) { | ||
590 | free_extent_buffer(eb); | ||
591 | return -EIO; | ||
592 | } | ||
593 | btrfs_tree_read_lock(eb); | ||
594 | btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); | ||
595 | path->nodes[level-1] = eb; | ||
596 | path->slots[level-1] = 0; | ||
597 | path->locks[level-1] = BTRFS_READ_LOCK_BLOCKING; | ||
598 | } else { | ||
599 | ret = process_leaf(root, path, bytenr, num_bytes); | ||
600 | if (ret) | ||
601 | break; | ||
602 | } | ||
603 | level--; | ||
604 | } | ||
605 | return ret; | ||
606 | } | ||
607 | |||
608 | /* Walk up to the next node that needs to be processed */ | ||
609 | static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path, | ||
610 | int *level) | ||
611 | { | ||
612 | int l; | ||
613 | |||
614 | for (l = 0; l < BTRFS_MAX_LEVEL; l++) { | ||
615 | if (!path->nodes[l]) | ||
616 | continue; | ||
617 | if (l) { | ||
618 | path->slots[l]++; | ||
619 | if (path->slots[l] < | ||
620 | btrfs_header_nritems(path->nodes[l])) { | ||
621 | *level = l; | ||
622 | return 0; | ||
623 | } | ||
624 | } | ||
625 | btrfs_tree_unlock_rw(path->nodes[l], path->locks[l]); | ||
626 | free_extent_buffer(path->nodes[l]); | ||
627 | path->nodes[l] = NULL; | ||
628 | path->slots[l] = 0; | ||
629 | path->locks[l] = 0; | ||
630 | } | ||
631 | |||
632 | return 1; | ||
633 | } | ||
634 | |||
635 | static void dump_ref_action(struct btrfs_fs_info *fs_info, | ||
636 | struct ref_action *ra) | ||
637 | { | ||
638 | btrfs_err(fs_info, | ||
639 | " Ref action %d, root %llu, ref_root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu", | ||
640 | ra->action, ra->root, ra->ref.root_objectid, ra->ref.parent, | ||
641 | ra->ref.owner, ra->ref.offset, ra->ref.num_refs); | ||
642 | __print_stack_trace(fs_info, ra); | ||
643 | } | ||
644 | |||
645 | /* | ||
646 | * Dumps all the information from the block entry to printk, it's going to be | ||
647 | * awesome. | ||
648 | */ | ||
649 | static void dump_block_entry(struct btrfs_fs_info *fs_info, | ||
650 | struct block_entry *be) | ||
651 | { | ||
652 | struct ref_entry *ref; | ||
653 | struct root_entry *re; | ||
654 | struct ref_action *ra; | ||
655 | struct rb_node *n; | ||
656 | |||
657 | btrfs_err(fs_info, | ||
658 | "dumping block entry [%llu %llu], num_refs %llu, metadata %d, from disk %d", | ||
659 | be->bytenr, be->len, be->num_refs, be->metadata, | ||
660 | be->from_disk); | ||
661 | |||
662 | for (n = rb_first(&be->refs); n; n = rb_next(n)) { | ||
663 | ref = rb_entry(n, struct ref_entry, node); | ||
664 | btrfs_err(fs_info, | ||
665 | " ref root %llu, parent %llu, owner %llu, offset %llu, num_refs %llu", | ||
666 | ref->root_objectid, ref->parent, ref->owner, | ||
667 | ref->offset, ref->num_refs); | ||
668 | } | ||
669 | |||
670 | for (n = rb_first(&be->roots); n; n = rb_next(n)) { | ||
671 | re = rb_entry(n, struct root_entry, node); | ||
672 | btrfs_err(fs_info, " root entry %llu, num_refs %llu", | ||
673 | re->root_objectid, re->num_refs); | ||
674 | } | ||
675 | |||
676 | list_for_each_entry(ra, &be->actions, list) | ||
677 | dump_ref_action(fs_info, ra); | ||
678 | } | ||
679 | |||
680 | /* | ||
681 | * btrfs_ref_tree_mod: called when we modify a ref for a bytenr | ||
682 | * @root: the root we are making this modification from. | ||
683 | * @bytenr: the bytenr we are modifying. | ||
684 | * @num_bytes: number of bytes. | ||
685 | * @parent: the parent bytenr. | ||
686 | * @ref_root: the original root owner of the bytenr. | ||
687 | * @owner: level in the case of metadata, inode in the case of data. | ||
688 | * @offset: 0 for metadata, file offset for data. | ||
689 | * @action: the action that we are doing, this is the same as the delayed ref | ||
690 | * action. | ||
691 | * | ||
692 | * This will add an action item to the given bytenr and do sanity checks to make | ||
693 | * sure we haven't messed something up. If we are making a new allocation and | ||
694 | * this block entry has history we will delete all previous actions as long as | ||
695 | * our sanity checks pass as they are no longer needed. | ||
696 | */ | ||
697 | int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes, | ||
698 | u64 parent, u64 ref_root, u64 owner, u64 offset, | ||
699 | int action) | ||
700 | { | ||
701 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
702 | struct ref_entry *ref = NULL, *exist; | ||
703 | struct ref_action *ra = NULL; | ||
704 | struct block_entry *be = NULL; | ||
705 | struct root_entry *re = NULL; | ||
706 | int ret = 0; | ||
707 | bool metadata = owner < BTRFS_FIRST_FREE_OBJECTID; | ||
708 | |||
709 | if (!btrfs_test_opt(root->fs_info, REF_VERIFY)) | ||
710 | return 0; | ||
711 | |||
712 | ref = kzalloc(sizeof(struct ref_entry), GFP_NOFS); | ||
713 | ra = kmalloc(sizeof(struct ref_action), GFP_NOFS); | ||
714 | if (!ra || !ref) { | ||
715 | kfree(ref); | ||
716 | kfree(ra); | ||
717 | ret = -ENOMEM; | ||
718 | goto out; | ||
719 | } | ||
720 | |||
721 | if (parent) { | ||
722 | ref->parent = parent; | ||
723 | } else { | ||
724 | ref->root_objectid = ref_root; | ||
725 | ref->owner = owner; | ||
726 | ref->offset = offset; | ||
727 | } | ||
728 | ref->num_refs = (action == BTRFS_DROP_DELAYED_REF) ? -1 : 1; | ||
729 | |||
730 | memcpy(&ra->ref, ref, sizeof(struct ref_entry)); | ||
731 | /* | ||
732 | * Save the extra info from the delayed ref in the ref action to make it | ||
733 | * easier to figure out what is happening. The real ref's we add to the | ||
734 | * ref tree need to reflect what we save on disk so it matches any | ||
735 | * on-disk refs we pre-loaded. | ||
736 | */ | ||
737 | ra->ref.owner = owner; | ||
738 | ra->ref.offset = offset; | ||
739 | ra->ref.root_objectid = ref_root; | ||
740 | __save_stack_trace(ra); | ||
741 | |||
742 | INIT_LIST_HEAD(&ra->list); | ||
743 | ra->action = action; | ||
744 | ra->root = root->objectid; | ||
745 | |||
746 | /* | ||
747 | * This is an allocation, preallocate the block_entry in case we haven't | ||
748 | * used it before. | ||
749 | */ | ||
750 | ret = -EINVAL; | ||
751 | if (action == BTRFS_ADD_DELAYED_EXTENT) { | ||
752 | /* | ||
753 | * For subvol_create we'll just pass in whatever the parent root | ||
754 | * is and the new root objectid, so let's not treat the passed | ||
755 | * in root as if it really has a ref for this bytenr. | ||
756 | */ | ||
757 | be = add_block_entry(root->fs_info, bytenr, num_bytes, ref_root); | ||
758 | if (IS_ERR(be)) { | ||
759 | kfree(ra); | ||
760 | ret = PTR_ERR(be); | ||
761 | goto out; | ||
762 | } | ||
763 | be->num_refs++; | ||
764 | if (metadata) | ||
765 | be->metadata = 1; | ||
766 | |||
767 | if (be->num_refs != 1) { | ||
768 | btrfs_err(fs_info, | ||
769 | "re-allocated a block that still has references to it!"); | ||
770 | dump_block_entry(fs_info, be); | ||
771 | dump_ref_action(fs_info, ra); | ||
772 | goto out_unlock; | ||
773 | } | ||
774 | |||
775 | while (!list_empty(&be->actions)) { | ||
776 | struct ref_action *tmp; | ||
777 | |||
778 | tmp = list_first_entry(&be->actions, struct ref_action, | ||
779 | list); | ||
780 | list_del(&tmp->list); | ||
781 | kfree(tmp); | ||
782 | } | ||
783 | } else { | ||
784 | struct root_entry *tmp; | ||
785 | |||
786 | if (!parent) { | ||
787 | re = kmalloc(sizeof(struct root_entry), GFP_NOFS); | ||
788 | if (!re) { | ||
789 | kfree(ref); | ||
790 | kfree(ra); | ||
791 | ret = -ENOMEM; | ||
792 | goto out; | ||
793 | } | ||
794 | /* | ||
795 | * This is the root that is modifying us, so it's the | ||
796 | * one we want to lookup below when we modify the | ||
797 | * re->num_refs. | ||
798 | */ | ||
799 | ref_root = root->objectid; | ||
800 | re->root_objectid = root->objectid; | ||
801 | re->num_refs = 0; | ||
802 | } | ||
803 | |||
804 | spin_lock(&root->fs_info->ref_verify_lock); | ||
805 | be = lookup_block_entry(&root->fs_info->block_tree, bytenr); | ||
806 | if (!be) { | ||
807 | btrfs_err(fs_info, | ||
808 | "trying to do action %d to bytenr %llu num_bytes %llu but there is no existing entry!", | ||
809 | action, (unsigned long long)bytenr, | ||
810 | (unsigned long long)num_bytes); | ||
811 | dump_ref_action(fs_info, ra); | ||
812 | kfree(ref); | ||
813 | kfree(ra); | ||
814 | goto out_unlock; | ||
815 | } | ||
816 | |||
817 | if (!parent) { | ||
818 | tmp = insert_root_entry(&be->roots, re); | ||
819 | if (tmp) { | ||
820 | kfree(re); | ||
821 | re = tmp; | ||
822 | } | ||
823 | } | ||
824 | } | ||
825 | |||
826 | exist = insert_ref_entry(&be->refs, ref); | ||
827 | if (exist) { | ||
828 | if (action == BTRFS_DROP_DELAYED_REF) { | ||
829 | if (exist->num_refs == 0) { | ||
830 | btrfs_err(fs_info, | ||
831 | "dropping a ref for a existing root that doesn't have a ref on the block"); | ||
832 | dump_block_entry(fs_info, be); | ||
833 | dump_ref_action(fs_info, ra); | ||
834 | kfree(ra); | ||
835 | goto out_unlock; | ||
836 | } | ||
837 | exist->num_refs--; | ||
838 | if (exist->num_refs == 0) { | ||
839 | rb_erase(&exist->node, &be->refs); | ||
840 | kfree(exist); | ||
841 | } | ||
842 | } else if (!be->metadata) { | ||
843 | exist->num_refs++; | ||
844 | } else { | ||
845 | btrfs_err(fs_info, | ||
846 | "attempting to add another ref for an existing ref on a tree block"); | ||
847 | dump_block_entry(fs_info, be); | ||
848 | dump_ref_action(fs_info, ra); | ||
849 | kfree(ra); | ||
850 | goto out_unlock; | ||
851 | } | ||
852 | kfree(ref); | ||
853 | } else { | ||
854 | if (action == BTRFS_DROP_DELAYED_REF) { | ||
855 | btrfs_err(fs_info, | ||
856 | "dropping a ref for a root that doesn't have a ref on the block"); | ||
857 | dump_block_entry(fs_info, be); | ||
858 | dump_ref_action(fs_info, ra); | ||
859 | kfree(ra); | ||
860 | goto out_unlock; | ||
861 | } | ||
862 | } | ||
863 | |||
864 | if (!parent && !re) { | ||
865 | re = lookup_root_entry(&be->roots, ref_root); | ||
866 | if (!re) { | ||
867 | /* | ||
868 | * This shouldn't happen because we will add our re | ||
869 | * above when we lookup the be with !parent, but just in | ||
870 | * case catch this case so we don't panic because I | ||
871 | * didn't thik of some other corner case. | ||
872 | */ | ||
873 | btrfs_err(fs_info, "failed to find root %llu for %llu", | ||
874 | root->objectid, be->bytenr); | ||
875 | dump_block_entry(fs_info, be); | ||
876 | dump_ref_action(fs_info, ra); | ||
877 | kfree(ra); | ||
878 | goto out_unlock; | ||
879 | } | ||
880 | } | ||
881 | if (action == BTRFS_DROP_DELAYED_REF) { | ||
882 | if (re) | ||
883 | re->num_refs--; | ||
884 | be->num_refs--; | ||
885 | } else if (action == BTRFS_ADD_DELAYED_REF) { | ||
886 | be->num_refs++; | ||
887 | if (re) | ||
888 | re->num_refs++; | ||
889 | } | ||
890 | list_add_tail(&ra->list, &be->actions); | ||
891 | ret = 0; | ||
892 | out_unlock: | ||
893 | spin_unlock(&root->fs_info->ref_verify_lock); | ||
894 | out: | ||
895 | if (ret) | ||
896 | btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); | ||
897 | return ret; | ||
898 | } | ||
899 | |||
900 | /* Free up the ref cache */ | ||
901 | void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info) | ||
902 | { | ||
903 | struct block_entry *be; | ||
904 | struct rb_node *n; | ||
905 | |||
906 | if (!btrfs_test_opt(fs_info, REF_VERIFY)) | ||
907 | return; | ||
908 | |||
909 | spin_lock(&fs_info->ref_verify_lock); | ||
910 | while ((n = rb_first(&fs_info->block_tree))) { | ||
911 | be = rb_entry(n, struct block_entry, node); | ||
912 | rb_erase(&be->node, &fs_info->block_tree); | ||
913 | free_block_entry(be); | ||
914 | cond_resched_lock(&fs_info->ref_verify_lock); | ||
915 | } | ||
916 | spin_unlock(&fs_info->ref_verify_lock); | ||
917 | } | ||
918 | |||
919 | void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start, | ||
920 | u64 len) | ||
921 | { | ||
922 | struct block_entry *be = NULL, *entry; | ||
923 | struct rb_node *n; | ||
924 | |||
925 | if (!btrfs_test_opt(fs_info, REF_VERIFY)) | ||
926 | return; | ||
927 | |||
928 | spin_lock(&fs_info->ref_verify_lock); | ||
929 | n = fs_info->block_tree.rb_node; | ||
930 | while (n) { | ||
931 | entry = rb_entry(n, struct block_entry, node); | ||
932 | if (entry->bytenr < start) { | ||
933 | n = n->rb_right; | ||
934 | } else if (entry->bytenr > start) { | ||
935 | n = n->rb_left; | ||
936 | } else { | ||
937 | be = entry; | ||
938 | break; | ||
939 | } | ||
940 | /* We want to get as close to start as possible */ | ||
941 | if (be == NULL || | ||
942 | (entry->bytenr < start && be->bytenr > start) || | ||
943 | (entry->bytenr < start && entry->bytenr > be->bytenr)) | ||
944 | be = entry; | ||
945 | } | ||
946 | |||
947 | /* | ||
948 | * Could have an empty block group, maybe have something to check for | ||
949 | * this case to verify we were actually empty? | ||
950 | */ | ||
951 | if (!be) { | ||
952 | spin_unlock(&fs_info->ref_verify_lock); | ||
953 | return; | ||
954 | } | ||
955 | |||
956 | n = &be->node; | ||
957 | while (n) { | ||
958 | be = rb_entry(n, struct block_entry, node); | ||
959 | n = rb_next(n); | ||
960 | if (be->bytenr < start && be->bytenr + be->len > start) { | ||
961 | btrfs_err(fs_info, | ||
962 | "block entry overlaps a block group [%llu,%llu]!", | ||
963 | start, len); | ||
964 | dump_block_entry(fs_info, be); | ||
965 | continue; | ||
966 | } | ||
967 | if (be->bytenr < start) | ||
968 | continue; | ||
969 | if (be->bytenr >= start + len) | ||
970 | break; | ||
971 | if (be->bytenr + be->len > start + len) { | ||
972 | btrfs_err(fs_info, | ||
973 | "block entry overlaps a block group [%llu,%llu]!", | ||
974 | start, len); | ||
975 | dump_block_entry(fs_info, be); | ||
976 | } | ||
977 | rb_erase(&be->node, &fs_info->block_tree); | ||
978 | free_block_entry(be); | ||
979 | } | ||
980 | spin_unlock(&fs_info->ref_verify_lock); | ||
981 | } | ||
982 | |||
983 | /* Walk down all roots and build the ref tree, meant to be called at mount */ | ||
984 | int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info) | ||
985 | { | ||
986 | struct btrfs_path *path; | ||
987 | struct btrfs_root *root; | ||
988 | struct extent_buffer *eb; | ||
989 | u64 bytenr = 0, num_bytes = 0; | ||
990 | int ret, level; | ||
991 | |||
992 | if (!btrfs_test_opt(fs_info, REF_VERIFY)) | ||
993 | return 0; | ||
994 | |||
995 | path = btrfs_alloc_path(); | ||
996 | if (!path) | ||
997 | return -ENOMEM; | ||
998 | |||
999 | eb = btrfs_read_lock_root_node(fs_info->extent_root); | ||
1000 | btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); | ||
1001 | level = btrfs_header_level(eb); | ||
1002 | path->nodes[level] = eb; | ||
1003 | path->slots[level] = 0; | ||
1004 | path->locks[level] = BTRFS_READ_LOCK_BLOCKING; | ||
1005 | |||
1006 | while (1) { | ||
1007 | /* | ||
1008 | * We have to keep track of the bytenr/num_bytes we last hit | ||
1009 | * because we could have run out of space for an inline ref, and | ||
1010 | * would have had to added a ref key item which may appear on a | ||
1011 | * different leaf from the original extent item. | ||
1012 | */ | ||
1013 | ret = walk_down_tree(fs_info->extent_root, path, level, | ||
1014 | &bytenr, &num_bytes); | ||
1015 | if (ret) | ||
1016 | break; | ||
1017 | ret = walk_up_tree(root, path, &level); | ||
1018 | if (ret < 0) | ||
1019 | break; | ||
1020 | if (ret > 0) { | ||
1021 | ret = 0; | ||
1022 | break; | ||
1023 | } | ||
1024 | } | ||
1025 | if (ret) { | ||
1026 | btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY); | ||
1027 | btrfs_free_ref_cache(fs_info); | ||
1028 | } | ||
1029 | btrfs_free_path(path); | ||
1030 | return ret; | ||
1031 | } | ||
diff --git a/fs/btrfs/ref-verify.h b/fs/btrfs/ref-verify.h new file mode 100644 index 000000000000..3bf02ce0e1e2 --- /dev/null +++ b/fs/btrfs/ref-verify.h | |||
@@ -0,0 +1,62 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2014 Facebook. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | #ifndef __REF_VERIFY__ | ||
19 | #define __REF_VERIFY__ | ||
20 | |||
21 | #ifdef CONFIG_BTRFS_FS_REF_VERIFY | ||
22 | int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info); | ||
23 | void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info); | ||
24 | int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, u64 num_bytes, | ||
25 | u64 parent, u64 ref_root, u64 owner, u64 offset, | ||
26 | int action); | ||
27 | void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, u64 start, | ||
28 | u64 len); | ||
29 | |||
30 | static inline void btrfs_init_ref_verify(struct btrfs_fs_info *fs_info) | ||
31 | { | ||
32 | spin_lock_init(&fs_info->ref_verify_lock); | ||
33 | fs_info->block_tree = RB_ROOT; | ||
34 | } | ||
35 | #else | ||
36 | static inline int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info) | ||
37 | { | ||
38 | return 0; | ||
39 | } | ||
40 | |||
41 | static inline void btrfs_free_ref_cache(struct btrfs_fs_info *fs_info) | ||
42 | { | ||
43 | } | ||
44 | |||
45 | static inline int btrfs_ref_tree_mod(struct btrfs_root *root, u64 bytenr, | ||
46 | u64 num_bytes, u64 parent, u64 ref_root, | ||
47 | u64 owner, u64 offset, int action) | ||
48 | { | ||
49 | return 0; | ||
50 | } | ||
51 | |||
52 | static inline void btrfs_free_ref_tree_range(struct btrfs_fs_info *fs_info, | ||
53 | u64 start, u64 len) | ||
54 | { | ||
55 | } | ||
56 | |||
57 | static inline void btrfs_init_ref_verify(struct btrfs_fs_info *fs_info) | ||
58 | { | ||
59 | } | ||
60 | |||
61 | #endif /* CONFIG_BTRFS_FS_REF_VERIFY */ | ||
62 | #endif /* _REF_VERIFY__ */ | ||
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 9841faef08ea..4cf2eb67eba6 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -1742,7 +1742,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans, | |||
1742 | dirty = 1; | 1742 | dirty = 1; |
1743 | 1743 | ||
1744 | key.offset -= btrfs_file_extent_offset(leaf, fi); | 1744 | key.offset -= btrfs_file_extent_offset(leaf, fi); |
1745 | ret = btrfs_inc_extent_ref(trans, fs_info, new_bytenr, | 1745 | ret = btrfs_inc_extent_ref(trans, root, new_bytenr, |
1746 | num_bytes, parent, | 1746 | num_bytes, parent, |
1747 | btrfs_header_owner(leaf), | 1747 | btrfs_header_owner(leaf), |
1748 | key.objectid, key.offset); | 1748 | key.objectid, key.offset); |
@@ -1751,7 +1751,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans, | |||
1751 | break; | 1751 | break; |
1752 | } | 1752 | } |
1753 | 1753 | ||
1754 | ret = btrfs_free_extent(trans, fs_info, bytenr, num_bytes, | 1754 | ret = btrfs_free_extent(trans, root, bytenr, num_bytes, |
1755 | parent, btrfs_header_owner(leaf), | 1755 | parent, btrfs_header_owner(leaf), |
1756 | key.objectid, key.offset); | 1756 | key.objectid, key.offset); |
1757 | if (ret) { | 1757 | if (ret) { |
@@ -1952,21 +1952,21 @@ again: | |||
1952 | path->slots[level], old_ptr_gen); | 1952 | path->slots[level], old_ptr_gen); |
1953 | btrfs_mark_buffer_dirty(path->nodes[level]); | 1953 | btrfs_mark_buffer_dirty(path->nodes[level]); |
1954 | 1954 | ||
1955 | ret = btrfs_inc_extent_ref(trans, fs_info, old_bytenr, | 1955 | ret = btrfs_inc_extent_ref(trans, src, old_bytenr, |
1956 | blocksize, path->nodes[level]->start, | 1956 | blocksize, path->nodes[level]->start, |
1957 | src->root_key.objectid, level - 1, 0); | 1957 | src->root_key.objectid, level - 1, 0); |
1958 | BUG_ON(ret); | 1958 | BUG_ON(ret); |
1959 | ret = btrfs_inc_extent_ref(trans, fs_info, new_bytenr, | 1959 | ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, |
1960 | blocksize, 0, dest->root_key.objectid, | 1960 | blocksize, 0, dest->root_key.objectid, |
1961 | level - 1, 0); | 1961 | level - 1, 0); |
1962 | BUG_ON(ret); | 1962 | BUG_ON(ret); |
1963 | 1963 | ||
1964 | ret = btrfs_free_extent(trans, fs_info, new_bytenr, blocksize, | 1964 | ret = btrfs_free_extent(trans, src, new_bytenr, blocksize, |
1965 | path->nodes[level]->start, | 1965 | path->nodes[level]->start, |
1966 | src->root_key.objectid, level - 1, 0); | 1966 | src->root_key.objectid, level - 1, 0); |
1967 | BUG_ON(ret); | 1967 | BUG_ON(ret); |
1968 | 1968 | ||
1969 | ret = btrfs_free_extent(trans, fs_info, old_bytenr, blocksize, | 1969 | ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize, |
1970 | 0, dest->root_key.objectid, level - 1, | 1970 | 0, dest->root_key.objectid, level - 1, |
1971 | 0); | 1971 | 0); |
1972 | BUG_ON(ret); | 1972 | BUG_ON(ret); |
@@ -2808,7 +2808,7 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2808 | trans->transid); | 2808 | trans->transid); |
2809 | btrfs_mark_buffer_dirty(upper->eb); | 2809 | btrfs_mark_buffer_dirty(upper->eb); |
2810 | 2810 | ||
2811 | ret = btrfs_inc_extent_ref(trans, root->fs_info, | 2811 | ret = btrfs_inc_extent_ref(trans, root, |
2812 | node->eb->start, blocksize, | 2812 | node->eb->start, blocksize, |
2813 | upper->eb->start, | 2813 | upper->eb->start, |
2814 | btrfs_header_owner(upper->eb), | 2814 | btrfs_header_owner(upper->eb), |
@@ -3246,6 +3246,8 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
3246 | put_page(page); | 3246 | put_page(page); |
3247 | btrfs_delalloc_release_metadata(BTRFS_I(inode), | 3247 | btrfs_delalloc_release_metadata(BTRFS_I(inode), |
3248 | PAGE_SIZE); | 3248 | PAGE_SIZE); |
3249 | btrfs_delalloc_release_extents(BTRFS_I(inode), | ||
3250 | PAGE_SIZE); | ||
3249 | ret = -EIO; | 3251 | ret = -EIO; |
3250 | goto out; | 3252 | goto out; |
3251 | } | 3253 | } |
@@ -3275,6 +3277,7 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
3275 | put_page(page); | 3277 | put_page(page); |
3276 | 3278 | ||
3277 | index++; | 3279 | index++; |
3280 | btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE); | ||
3278 | balance_dirty_pages_ratelimited(inode->i_mapping); | 3281 | balance_dirty_pages_ratelimited(inode->i_mapping); |
3279 | btrfs_throttle(fs_info); | 3282 | btrfs_throttle(fs_info); |
3280 | } | 3283 | } |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 95bcc3cce78f..3338407ef0f0 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -226,10 +226,6 @@ int btrfs_find_orphan_roots(struct btrfs_fs_info *fs_info) | |||
226 | struct btrfs_root *root; | 226 | struct btrfs_root *root; |
227 | int err = 0; | 227 | int err = 0; |
228 | int ret; | 228 | int ret; |
229 | bool can_recover = true; | ||
230 | |||
231 | if (sb_rdonly(fs_info->sb)) | ||
232 | can_recover = false; | ||
233 | 229 | ||
234 | path = btrfs_alloc_path(); | 230 | path = btrfs_alloc_path(); |
235 | if (!path) | 231 | if (!path) |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index e3f6c49e5c4d..b2f871d80982 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -231,7 +231,7 @@ struct scrub_warning { | |||
231 | struct btrfs_path *path; | 231 | struct btrfs_path *path; |
232 | u64 extent_item_size; | 232 | u64 extent_item_size; |
233 | const char *errstr; | 233 | const char *errstr; |
234 | sector_t sector; | 234 | u64 physical; |
235 | u64 logical; | 235 | u64 logical; |
236 | struct btrfs_device *dev; | 236 | struct btrfs_device *dev; |
237 | }; | 237 | }; |
@@ -797,10 +797,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, | |||
797 | */ | 797 | */ |
798 | for (i = 0; i < ipath->fspath->elem_cnt; ++i) | 798 | for (i = 0; i < ipath->fspath->elem_cnt; ++i) |
799 | btrfs_warn_in_rcu(fs_info, | 799 | btrfs_warn_in_rcu(fs_info, |
800 | "%s at logical %llu on dev %s, sector %llu, root %llu, inode %llu, offset %llu, length %llu, links %u (path: %s)", | 800 | "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu, length %llu, links %u (path: %s)", |
801 | swarn->errstr, swarn->logical, | 801 | swarn->errstr, swarn->logical, |
802 | rcu_str_deref(swarn->dev->name), | 802 | rcu_str_deref(swarn->dev->name), |
803 | (unsigned long long)swarn->sector, | 803 | swarn->physical, |
804 | root, inum, offset, | 804 | root, inum, offset, |
805 | min(isize - offset, (u64)PAGE_SIZE), nlink, | 805 | min(isize - offset, (u64)PAGE_SIZE), nlink, |
806 | (char *)(unsigned long)ipath->fspath->val[i]); | 806 | (char *)(unsigned long)ipath->fspath->val[i]); |
@@ -810,10 +810,10 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, | |||
810 | 810 | ||
811 | err: | 811 | err: |
812 | btrfs_warn_in_rcu(fs_info, | 812 | btrfs_warn_in_rcu(fs_info, |
813 | "%s at logical %llu on dev %s, sector %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d", | 813 | "%s at logical %llu on dev %s, physical %llu, root %llu, inode %llu, offset %llu: path resolving failed with ret=%d", |
814 | swarn->errstr, swarn->logical, | 814 | swarn->errstr, swarn->logical, |
815 | rcu_str_deref(swarn->dev->name), | 815 | rcu_str_deref(swarn->dev->name), |
816 | (unsigned long long)swarn->sector, | 816 | swarn->physical, |
817 | root, inum, offset, ret); | 817 | root, inum, offset, ret); |
818 | 818 | ||
819 | free_ipath(ipath); | 819 | free_ipath(ipath); |
@@ -845,7 +845,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
845 | if (!path) | 845 | if (!path) |
846 | return; | 846 | return; |
847 | 847 | ||
848 | swarn.sector = (sblock->pagev[0]->physical) >> 9; | 848 | swarn.physical = sblock->pagev[0]->physical; |
849 | swarn.logical = sblock->pagev[0]->logical; | 849 | swarn.logical = sblock->pagev[0]->logical; |
850 | swarn.errstr = errstr; | 850 | swarn.errstr = errstr; |
851 | swarn.dev = NULL; | 851 | swarn.dev = NULL; |
@@ -868,10 +868,10 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
868 | item_size, &ref_root, | 868 | item_size, &ref_root, |
869 | &ref_level); | 869 | &ref_level); |
870 | btrfs_warn_in_rcu(fs_info, | 870 | btrfs_warn_in_rcu(fs_info, |
871 | "%s at logical %llu on dev %s, sector %llu: metadata %s (level %d) in tree %llu", | 871 | "%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu", |
872 | errstr, swarn.logical, | 872 | errstr, swarn.logical, |
873 | rcu_str_deref(dev->name), | 873 | rcu_str_deref(dev->name), |
874 | (unsigned long long)swarn.sector, | 874 | swarn.physical, |
875 | ref_level ? "node" : "leaf", | 875 | ref_level ? "node" : "leaf", |
876 | ret < 0 ? -1 : ref_level, | 876 | ret < 0 ? -1 : ref_level, |
877 | ret < 0 ? -1 : ref_root); | 877 | ret < 0 ? -1 : ref_root); |
@@ -883,7 +883,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) | |||
883 | swarn.dev = dev; | 883 | swarn.dev = dev; |
884 | iterate_extent_inodes(fs_info, found_key.objectid, | 884 | iterate_extent_inodes(fs_info, found_key.objectid, |
885 | extent_item_pos, 1, | 885 | extent_item_pos, 1, |
886 | scrub_print_warning_inode, &swarn); | 886 | scrub_print_warning_inode, &swarn, false); |
887 | } | 887 | } |
888 | 888 | ||
889 | out: | 889 | out: |
@@ -1047,7 +1047,7 @@ static void scrub_fixup_nodatasum(struct btrfs_work *work) | |||
1047 | * can be found. | 1047 | * can be found. |
1048 | */ | 1048 | */ |
1049 | ret = iterate_inodes_from_logical(fixup->logical, fs_info, path, | 1049 | ret = iterate_inodes_from_logical(fixup->logical, fs_info, path, |
1050 | scrub_fixup_readpage, fixup); | 1050 | scrub_fixup_readpage, fixup, false); |
1051 | if (ret < 0) { | 1051 | if (ret < 0) { |
1052 | uncorrectable = 1; | 1052 | uncorrectable = 1; |
1053 | goto out; | 1053 | goto out; |
@@ -4390,7 +4390,7 @@ static void copy_nocow_pages_worker(struct btrfs_work *work) | |||
4390 | } | 4390 | } |
4391 | 4391 | ||
4392 | ret = iterate_inodes_from_logical(logical, fs_info, path, | 4392 | ret = iterate_inodes_from_logical(logical, fs_info, path, |
4393 | record_inode_for_nocow, nocow_ctx); | 4393 | record_inode_for_nocow, nocow_ctx, false); |
4394 | if (ret != 0 && ret != -ENOENT) { | 4394 | if (ret != 0 && ret != -ENOENT) { |
4395 | btrfs_warn(fs_info, | 4395 | btrfs_warn(fs_info, |
4396 | "iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d", | 4396 | "iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d", |
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 8fd195cfe81b..c10e4c70f02d 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/radix-tree.h> | 26 | #include <linux/radix-tree.h> |
27 | #include <linux/vmalloc.h> | 27 | #include <linux/vmalloc.h> |
28 | #include <linux/string.h> | 28 | #include <linux/string.h> |
29 | #include <linux/compat.h> | ||
29 | 30 | ||
30 | #include "send.h" | 31 | #include "send.h" |
31 | #include "backref.h" | 32 | #include "backref.h" |
@@ -992,7 +993,6 @@ typedef int (*iterate_dir_item_t)(int num, struct btrfs_key *di_key, | |||
992 | * path must point to the dir item when called. | 993 | * path must point to the dir item when called. |
993 | */ | 994 | */ |
994 | static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | 995 | static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, |
995 | struct btrfs_key *found_key, | ||
996 | iterate_dir_item_t iterate, void *ctx) | 996 | iterate_dir_item_t iterate, void *ctx) |
997 | { | 997 | { |
998 | int ret = 0; | 998 | int ret = 0; |
@@ -1271,12 +1271,6 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) | |||
1271 | */ | 1271 | */ |
1272 | if (ino >= bctx->cur_objectid) | 1272 | if (ino >= bctx->cur_objectid) |
1273 | return 0; | 1273 | return 0; |
1274 | #if 0 | ||
1275 | if (ino > bctx->cur_objectid) | ||
1276 | return 0; | ||
1277 | if (offset + bctx->extent_len > bctx->cur_offset) | ||
1278 | return 0; | ||
1279 | #endif | ||
1280 | } | 1274 | } |
1281 | 1275 | ||
1282 | bctx->found++; | 1276 | bctx->found++; |
@@ -1429,7 +1423,7 @@ static int find_extent_clone(struct send_ctx *sctx, | |||
1429 | extent_item_pos = 0; | 1423 | extent_item_pos = 0; |
1430 | ret = iterate_extent_inodes(fs_info, found_key.objectid, | 1424 | ret = iterate_extent_inodes(fs_info, found_key.objectid, |
1431 | extent_item_pos, 1, __iterate_backrefs, | 1425 | extent_item_pos, 1, __iterate_backrefs, |
1432 | backref_ctx); | 1426 | backref_ctx, false); |
1433 | 1427 | ||
1434 | if (ret < 0) | 1428 | if (ret < 0) |
1435 | goto out; | 1429 | goto out; |
@@ -4106,8 +4100,8 @@ out: | |||
4106 | return ret; | 4100 | return ret; |
4107 | } | 4101 | } |
4108 | 4102 | ||
4109 | static int record_ref(struct btrfs_root *root, int num, u64 dir, int index, | 4103 | static int record_ref(struct btrfs_root *root, u64 dir, struct fs_path *name, |
4110 | struct fs_path *name, void *ctx, struct list_head *refs) | 4104 | void *ctx, struct list_head *refs) |
4111 | { | 4105 | { |
4112 | int ret = 0; | 4106 | int ret = 0; |
4113 | struct send_ctx *sctx = ctx; | 4107 | struct send_ctx *sctx = ctx; |
@@ -4143,8 +4137,7 @@ static int __record_new_ref(int num, u64 dir, int index, | |||
4143 | void *ctx) | 4137 | void *ctx) |
4144 | { | 4138 | { |
4145 | struct send_ctx *sctx = ctx; | 4139 | struct send_ctx *sctx = ctx; |
4146 | return record_ref(sctx->send_root, num, dir, index, name, | 4140 | return record_ref(sctx->send_root, dir, name, ctx, &sctx->new_refs); |
4147 | ctx, &sctx->new_refs); | ||
4148 | } | 4141 | } |
4149 | 4142 | ||
4150 | 4143 | ||
@@ -4153,8 +4146,8 @@ static int __record_deleted_ref(int num, u64 dir, int index, | |||
4153 | void *ctx) | 4146 | void *ctx) |
4154 | { | 4147 | { |
4155 | struct send_ctx *sctx = ctx; | 4148 | struct send_ctx *sctx = ctx; |
4156 | return record_ref(sctx->parent_root, num, dir, index, name, | 4149 | return record_ref(sctx->parent_root, dir, name, ctx, |
4157 | ctx, &sctx->deleted_refs); | 4150 | &sctx->deleted_refs); |
4158 | } | 4151 | } |
4159 | 4152 | ||
4160 | static int record_new_ref(struct send_ctx *sctx) | 4153 | static int record_new_ref(struct send_ctx *sctx) |
@@ -4498,7 +4491,7 @@ static int process_new_xattr(struct send_ctx *sctx) | |||
4498 | int ret = 0; | 4491 | int ret = 0; |
4499 | 4492 | ||
4500 | ret = iterate_dir_item(sctx->send_root, sctx->left_path, | 4493 | ret = iterate_dir_item(sctx->send_root, sctx->left_path, |
4501 | sctx->cmp_key, __process_new_xattr, sctx); | 4494 | __process_new_xattr, sctx); |
4502 | 4495 | ||
4503 | return ret; | 4496 | return ret; |
4504 | } | 4497 | } |
@@ -4506,7 +4499,7 @@ static int process_new_xattr(struct send_ctx *sctx) | |||
4506 | static int process_deleted_xattr(struct send_ctx *sctx) | 4499 | static int process_deleted_xattr(struct send_ctx *sctx) |
4507 | { | 4500 | { |
4508 | return iterate_dir_item(sctx->parent_root, sctx->right_path, | 4501 | return iterate_dir_item(sctx->parent_root, sctx->right_path, |
4509 | sctx->cmp_key, __process_deleted_xattr, sctx); | 4502 | __process_deleted_xattr, sctx); |
4510 | } | 4503 | } |
4511 | 4504 | ||
4512 | struct find_xattr_ctx { | 4505 | struct find_xattr_ctx { |
@@ -4551,7 +4544,7 @@ static int find_xattr(struct btrfs_root *root, | |||
4551 | ctx.found_data = NULL; | 4544 | ctx.found_data = NULL; |
4552 | ctx.found_data_len = 0; | 4545 | ctx.found_data_len = 0; |
4553 | 4546 | ||
4554 | ret = iterate_dir_item(root, path, key, __find_xattr, &ctx); | 4547 | ret = iterate_dir_item(root, path, __find_xattr, &ctx); |
4555 | if (ret < 0) | 4548 | if (ret < 0) |
4556 | return ret; | 4549 | return ret; |
4557 | 4550 | ||
@@ -4621,11 +4614,11 @@ static int process_changed_xattr(struct send_ctx *sctx) | |||
4621 | int ret = 0; | 4614 | int ret = 0; |
4622 | 4615 | ||
4623 | ret = iterate_dir_item(sctx->send_root, sctx->left_path, | 4616 | ret = iterate_dir_item(sctx->send_root, sctx->left_path, |
4624 | sctx->cmp_key, __process_changed_new_xattr, sctx); | 4617 | __process_changed_new_xattr, sctx); |
4625 | if (ret < 0) | 4618 | if (ret < 0) |
4626 | goto out; | 4619 | goto out; |
4627 | ret = iterate_dir_item(sctx->parent_root, sctx->right_path, | 4620 | ret = iterate_dir_item(sctx->parent_root, sctx->right_path, |
4628 | sctx->cmp_key, __process_changed_deleted_xattr, sctx); | 4621 | __process_changed_deleted_xattr, sctx); |
4629 | 4622 | ||
4630 | out: | 4623 | out: |
4631 | return ret; | 4624 | return ret; |
@@ -4675,8 +4668,7 @@ static int process_all_new_xattrs(struct send_ctx *sctx) | |||
4675 | goto out; | 4668 | goto out; |
4676 | } | 4669 | } |
4677 | 4670 | ||
4678 | ret = iterate_dir_item(root, path, &found_key, | 4671 | ret = iterate_dir_item(root, path, __process_new_xattr, sctx); |
4679 | __process_new_xattr, sctx); | ||
4680 | if (ret < 0) | 4672 | if (ret < 0) |
4681 | goto out; | 4673 | goto out; |
4682 | 4674 | ||
@@ -4723,16 +4715,27 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) | |||
4723 | /* initial readahead */ | 4715 | /* initial readahead */ |
4724 | memset(&sctx->ra, 0, sizeof(struct file_ra_state)); | 4716 | memset(&sctx->ra, 0, sizeof(struct file_ra_state)); |
4725 | file_ra_state_init(&sctx->ra, inode->i_mapping); | 4717 | file_ra_state_init(&sctx->ra, inode->i_mapping); |
4726 | page_cache_sync_readahead(inode->i_mapping, &sctx->ra, NULL, index, | ||
4727 | last_index - index + 1); | ||
4728 | 4718 | ||
4729 | while (index <= last_index) { | 4719 | while (index <= last_index) { |
4730 | unsigned cur_len = min_t(unsigned, len, | 4720 | unsigned cur_len = min_t(unsigned, len, |
4731 | PAGE_SIZE - pg_offset); | 4721 | PAGE_SIZE - pg_offset); |
4732 | page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL); | 4722 | |
4723 | page = find_lock_page(inode->i_mapping, index); | ||
4733 | if (!page) { | 4724 | if (!page) { |
4734 | ret = -ENOMEM; | 4725 | page_cache_sync_readahead(inode->i_mapping, &sctx->ra, |
4735 | break; | 4726 | NULL, index, last_index + 1 - index); |
4727 | |||
4728 | page = find_or_create_page(inode->i_mapping, index, | ||
4729 | GFP_KERNEL); | ||
4730 | if (!page) { | ||
4731 | ret = -ENOMEM; | ||
4732 | break; | ||
4733 | } | ||
4734 | } | ||
4735 | |||
4736 | if (PageReadahead(page)) { | ||
4737 | page_cache_async_readahead(inode->i_mapping, &sctx->ra, | ||
4738 | NULL, page, index, last_index + 1 - index); | ||
4736 | } | 4739 | } |
4737 | 4740 | ||
4738 | if (!PageUptodate(page)) { | 4741 | if (!PageUptodate(page)) { |
@@ -6162,9 +6165,7 @@ out: | |||
6162 | * Updates compare related fields in sctx and simply forwards to the actual | 6165 | * Updates compare related fields in sctx and simply forwards to the actual |
6163 | * changed_xxx functions. | 6166 | * changed_xxx functions. |
6164 | */ | 6167 | */ |
6165 | static int changed_cb(struct btrfs_root *left_root, | 6168 | static int changed_cb(struct btrfs_path *left_path, |
6166 | struct btrfs_root *right_root, | ||
6167 | struct btrfs_path *left_path, | ||
6168 | struct btrfs_path *right_path, | 6169 | struct btrfs_path *right_path, |
6169 | struct btrfs_key *key, | 6170 | struct btrfs_key *key, |
6170 | enum btrfs_compare_tree_result result, | 6171 | enum btrfs_compare_tree_result result, |
@@ -6246,8 +6247,8 @@ static int full_send_tree(struct send_ctx *sctx) | |||
6246 | slot = path->slots[0]; | 6247 | slot = path->slots[0]; |
6247 | btrfs_item_key_to_cpu(eb, &found_key, slot); | 6248 | btrfs_item_key_to_cpu(eb, &found_key, slot); |
6248 | 6249 | ||
6249 | ret = changed_cb(send_root, NULL, path, NULL, | 6250 | ret = changed_cb(path, NULL, &found_key, |
6250 | &found_key, BTRFS_COMPARE_TREE_NEW, sctx); | 6251 | BTRFS_COMPARE_TREE_NEW, sctx); |
6251 | if (ret < 0) | 6252 | if (ret < 0) |
6252 | goto out; | 6253 | goto out; |
6253 | 6254 | ||
@@ -6365,13 +6366,12 @@ static void btrfs_root_dec_send_in_progress(struct btrfs_root* root) | |||
6365 | spin_unlock(&root->root_item_lock); | 6366 | spin_unlock(&root->root_item_lock); |
6366 | } | 6367 | } |
6367 | 6368 | ||
6368 | long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | 6369 | long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) |
6369 | { | 6370 | { |
6370 | int ret = 0; | 6371 | int ret = 0; |
6371 | struct btrfs_root *send_root = BTRFS_I(file_inode(mnt_file))->root; | 6372 | struct btrfs_root *send_root = BTRFS_I(file_inode(mnt_file))->root; |
6372 | struct btrfs_fs_info *fs_info = send_root->fs_info; | 6373 | struct btrfs_fs_info *fs_info = send_root->fs_info; |
6373 | struct btrfs_root *clone_root; | 6374 | struct btrfs_root *clone_root; |
6374 | struct btrfs_ioctl_send_args *arg = NULL; | ||
6375 | struct btrfs_key key; | 6375 | struct btrfs_key key; |
6376 | struct send_ctx *sctx = NULL; | 6376 | struct send_ctx *sctx = NULL; |
6377 | u32 i; | 6377 | u32 i; |
@@ -6407,13 +6407,6 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
6407 | goto out; | 6407 | goto out; |
6408 | } | 6408 | } |
6409 | 6409 | ||
6410 | arg = memdup_user(arg_, sizeof(*arg)); | ||
6411 | if (IS_ERR(arg)) { | ||
6412 | ret = PTR_ERR(arg); | ||
6413 | arg = NULL; | ||
6414 | goto out; | ||
6415 | } | ||
6416 | |||
6417 | /* | 6410 | /* |
6418 | * Check that we don't overflow at later allocations, we request | 6411 | * Check that we don't overflow at later allocations, we request |
6419 | * clone_sources_count + 1 items, and compare to unsigned long inside | 6412 | * clone_sources_count + 1 items, and compare to unsigned long inside |
@@ -6654,7 +6647,6 @@ out: | |||
6654 | if (sctx && !IS_ERR_OR_NULL(sctx->parent_root)) | 6647 | if (sctx && !IS_ERR_OR_NULL(sctx->parent_root)) |
6655 | btrfs_root_dec_send_in_progress(sctx->parent_root); | 6648 | btrfs_root_dec_send_in_progress(sctx->parent_root); |
6656 | 6649 | ||
6657 | kfree(arg); | ||
6658 | kvfree(clone_sources_tmp); | 6650 | kvfree(clone_sources_tmp); |
6659 | 6651 | ||
6660 | if (sctx) { | 6652 | if (sctx) { |
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h index 02e00166c4da..3aa4bc55754f 100644 --- a/fs/btrfs/send.h +++ b/fs/btrfs/send.h | |||
@@ -130,5 +130,5 @@ enum { | |||
130 | #define BTRFS_SEND_A_MAX (__BTRFS_SEND_A_MAX - 1) | 130 | #define BTRFS_SEND_A_MAX (__BTRFS_SEND_A_MAX - 1) |
131 | 131 | ||
132 | #ifdef __KERNEL__ | 132 | #ifdef __KERNEL__ |
133 | long btrfs_ioctl_send(struct file *mnt_file, void __user *arg); | 133 | long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg); |
134 | #endif | 134 | #endif |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 161694b66038..65af029559b5 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -202,7 +202,6 @@ static struct ratelimit_state printk_limits[] = { | |||
202 | 202 | ||
203 | void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) | 203 | void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) |
204 | { | 204 | { |
205 | struct super_block *sb = fs_info->sb; | ||
206 | char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1] = "\0"; | 205 | char lvl[PRINTK_MAX_SINGLE_HEADER_LEN + 1] = "\0"; |
207 | struct va_format vaf; | 206 | struct va_format vaf; |
208 | va_list args; | 207 | va_list args; |
@@ -228,7 +227,8 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...) | |||
228 | vaf.va = &args; | 227 | vaf.va = &args; |
229 | 228 | ||
230 | if (__ratelimit(ratelimit)) | 229 | if (__ratelimit(ratelimit)) |
231 | printk("%sBTRFS %s (device %s): %pV\n", lvl, type, sb->s_id, &vaf); | 230 | printk("%sBTRFS %s (device %s): %pV\n", lvl, type, |
231 | fs_info ? fs_info->sb->s_id : "<unknown>", &vaf); | ||
232 | 232 | ||
233 | va_end(args); | 233 | va_end(args); |
234 | } | 234 | } |
@@ -292,7 +292,7 @@ void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function, | |||
292 | vaf.va = &args; | 292 | vaf.va = &args; |
293 | 293 | ||
294 | errstr = btrfs_decode_error(errno); | 294 | errstr = btrfs_decode_error(errno); |
295 | if (fs_info && (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR)) | 295 | if (fs_info && (btrfs_test_opt(fs_info, PANIC_ON_FATAL_ERROR))) |
296 | panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n", | 296 | panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (errno=%d %s)\n", |
297 | s_id, function, line, &vaf, errno, errstr); | 297 | s_id, function, line, &vaf, errno, errstr); |
298 | 298 | ||
@@ -326,6 +326,9 @@ enum { | |||
326 | #ifdef CONFIG_BTRFS_DEBUG | 326 | #ifdef CONFIG_BTRFS_DEBUG |
327 | Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all, | 327 | Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all, |
328 | #endif | 328 | #endif |
329 | #ifdef CONFIG_BTRFS_FS_REF_VERIFY | ||
330 | Opt_ref_verify, | ||
331 | #endif | ||
329 | Opt_err, | 332 | Opt_err, |
330 | }; | 333 | }; |
331 | 334 | ||
@@ -387,6 +390,9 @@ static const match_table_t tokens = { | |||
387 | {Opt_fragment_metadata, "fragment=metadata"}, | 390 | {Opt_fragment_metadata, "fragment=metadata"}, |
388 | {Opt_fragment_all, "fragment=all"}, | 391 | {Opt_fragment_all, "fragment=all"}, |
389 | #endif | 392 | #endif |
393 | #ifdef CONFIG_BTRFS_FS_REF_VERIFY | ||
394 | {Opt_ref_verify, "ref_verify"}, | ||
395 | #endif | ||
390 | {Opt_err, NULL}, | 396 | {Opt_err, NULL}, |
391 | }; | 397 | }; |
392 | 398 | ||
@@ -502,6 +508,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, | |||
502 | strncmp(args[0].from, "zlib", 4) == 0) { | 508 | strncmp(args[0].from, "zlib", 4) == 0) { |
503 | compress_type = "zlib"; | 509 | compress_type = "zlib"; |
504 | info->compress_type = BTRFS_COMPRESS_ZLIB; | 510 | info->compress_type = BTRFS_COMPRESS_ZLIB; |
511 | info->compress_level = | ||
512 | btrfs_compress_str2level(args[0].from); | ||
505 | btrfs_set_opt(info->mount_opt, COMPRESS); | 513 | btrfs_set_opt(info->mount_opt, COMPRESS); |
506 | btrfs_clear_opt(info->mount_opt, NODATACOW); | 514 | btrfs_clear_opt(info->mount_opt, NODATACOW); |
507 | btrfs_clear_opt(info->mount_opt, NODATASUM); | 515 | btrfs_clear_opt(info->mount_opt, NODATASUM); |
@@ -549,9 +557,9 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, | |||
549 | compress_force != saved_compress_force)) || | 557 | compress_force != saved_compress_force)) || |
550 | (!btrfs_test_opt(info, COMPRESS) && | 558 | (!btrfs_test_opt(info, COMPRESS) && |
551 | no_compress == 1)) { | 559 | no_compress == 1)) { |
552 | btrfs_info(info, "%s %s compression", | 560 | btrfs_info(info, "%s %s compression, level %d", |
553 | (compress_force) ? "force" : "use", | 561 | (compress_force) ? "force" : "use", |
554 | compress_type); | 562 | compress_type, info->compress_level); |
555 | } | 563 | } |
556 | compress_force = false; | 564 | compress_force = false; |
557 | break; | 565 | break; |
@@ -825,6 +833,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, | |||
825 | btrfs_set_opt(info->mount_opt, FRAGMENT_DATA); | 833 | btrfs_set_opt(info->mount_opt, FRAGMENT_DATA); |
826 | break; | 834 | break; |
827 | #endif | 835 | #endif |
836 | #ifdef CONFIG_BTRFS_FS_REF_VERIFY | ||
837 | case Opt_ref_verify: | ||
838 | btrfs_info(info, "doing ref verification"); | ||
839 | btrfs_set_opt(info->mount_opt, REF_VERIFY); | ||
840 | break; | ||
841 | #endif | ||
828 | case Opt_err: | 842 | case Opt_err: |
829 | btrfs_info(info, "unrecognized mount option '%s'", p); | 843 | btrfs_info(info, "unrecognized mount option '%s'", p); |
830 | ret = -EINVAL; | 844 | ret = -EINVAL; |
@@ -1205,8 +1219,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
1205 | * happens. The pending operations are delayed to the | 1219 | * happens. The pending operations are delayed to the |
1206 | * next commit after thawing. | 1220 | * next commit after thawing. |
1207 | */ | 1221 | */ |
1208 | if (__sb_start_write(sb, SB_FREEZE_WRITE, false)) | 1222 | if (sb_start_write_trylock(sb)) |
1209 | __sb_end_write(sb, SB_FREEZE_WRITE); | 1223 | sb_end_write(sb); |
1210 | else | 1224 | else |
1211 | return 0; | 1225 | return 0; |
1212 | trans = btrfs_start_transaction(root, 0); | 1226 | trans = btrfs_start_transaction(root, 0); |
@@ -1246,6 +1260,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) | |||
1246 | seq_printf(seq, ",compress-force=%s", compress_type); | 1260 | seq_printf(seq, ",compress-force=%s", compress_type); |
1247 | else | 1261 | else |
1248 | seq_printf(seq, ",compress=%s", compress_type); | 1262 | seq_printf(seq, ",compress=%s", compress_type); |
1263 | if (info->compress_level) | ||
1264 | seq_printf(seq, ":%d", info->compress_level); | ||
1249 | } | 1265 | } |
1250 | if (btrfs_test_opt(info, NOSSD)) | 1266 | if (btrfs_test_opt(info, NOSSD)) |
1251 | seq_puts(seq, ",nossd"); | 1267 | seq_puts(seq, ",nossd"); |
@@ -1305,6 +1321,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) | |||
1305 | if (btrfs_test_opt(info, FRAGMENT_METADATA)) | 1321 | if (btrfs_test_opt(info, FRAGMENT_METADATA)) |
1306 | seq_puts(seq, ",fragment=metadata"); | 1322 | seq_puts(seq, ",fragment=metadata"); |
1307 | #endif | 1323 | #endif |
1324 | if (btrfs_test_opt(info, REF_VERIFY)) | ||
1325 | seq_puts(seq, ",ref_verify"); | ||
1308 | seq_printf(seq, ",subvolid=%llu", | 1326 | seq_printf(seq, ",subvolid=%llu", |
1309 | BTRFS_I(d_inode(dentry))->root->root_key.objectid); | 1327 | BTRFS_I(d_inode(dentry))->root->root_key.objectid); |
1310 | seq_puts(seq, ",subvol="); | 1328 | seq_puts(seq, ",subvol="); |
@@ -2112,7 +2130,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
2112 | * succeed even if the Avail is zero. But this is better than the other | 2130 | * succeed even if the Avail is zero. But this is better than the other |
2113 | * way around. | 2131 | * way around. |
2114 | */ | 2132 | */ |
2115 | thresh = 4 * 1024 * 1024; | 2133 | thresh = SZ_4M; |
2116 | 2134 | ||
2117 | if (!mixed && total_free_meta - thresh < block_rsv->size) | 2135 | if (!mixed && total_free_meta - thresh < block_rsv->size) |
2118 | buf->f_bavail = 0; | 2136 | buf->f_bavail = 0; |
@@ -2319,6 +2337,9 @@ static void btrfs_print_mod_info(void) | |||
2319 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | 2337 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY |
2320 | ", integrity-checker=on" | 2338 | ", integrity-checker=on" |
2321 | #endif | 2339 | #endif |
2340 | #ifdef CONFIG_BTRFS_FS_REF_VERIFY | ||
2341 | ", ref-verify=on" | ||
2342 | #endif | ||
2322 | "\n", | 2343 | "\n", |
2323 | btrfs_crc32c_impl()); | 2344 | btrfs_crc32c_impl()); |
2324 | } | 2345 | } |
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 883881b16c86..a28bba801264 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c | |||
@@ -247,7 +247,7 @@ static ssize_t global_rsv_size_show(struct kobject *kobj, | |||
247 | struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; | 247 | struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; |
248 | return btrfs_show_u64(&block_rsv->size, &block_rsv->lock, buf); | 248 | return btrfs_show_u64(&block_rsv->size, &block_rsv->lock, buf); |
249 | } | 249 | } |
250 | BTRFS_ATTR(global_rsv_size, global_rsv_size_show); | 250 | BTRFS_ATTR(allocation, global_rsv_size, global_rsv_size_show); |
251 | 251 | ||
252 | static ssize_t global_rsv_reserved_show(struct kobject *kobj, | 252 | static ssize_t global_rsv_reserved_show(struct kobject *kobj, |
253 | struct kobj_attribute *a, char *buf) | 253 | struct kobj_attribute *a, char *buf) |
@@ -256,15 +256,15 @@ static ssize_t global_rsv_reserved_show(struct kobject *kobj, | |||
256 | struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; | 256 | struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; |
257 | return btrfs_show_u64(&block_rsv->reserved, &block_rsv->lock, buf); | 257 | return btrfs_show_u64(&block_rsv->reserved, &block_rsv->lock, buf); |
258 | } | 258 | } |
259 | BTRFS_ATTR(global_rsv_reserved, global_rsv_reserved_show); | 259 | BTRFS_ATTR(allocation, global_rsv_reserved, global_rsv_reserved_show); |
260 | 260 | ||
261 | #define to_space_info(_kobj) container_of(_kobj, struct btrfs_space_info, kobj) | 261 | #define to_space_info(_kobj) container_of(_kobj, struct btrfs_space_info, kobj) |
262 | #define to_raid_kobj(_kobj) container_of(_kobj, struct raid_kobject, kobj) | 262 | #define to_raid_kobj(_kobj) container_of(_kobj, struct raid_kobject, kobj) |
263 | 263 | ||
264 | static ssize_t raid_bytes_show(struct kobject *kobj, | 264 | static ssize_t raid_bytes_show(struct kobject *kobj, |
265 | struct kobj_attribute *attr, char *buf); | 265 | struct kobj_attribute *attr, char *buf); |
266 | BTRFS_RAID_ATTR(total_bytes, raid_bytes_show); | 266 | BTRFS_ATTR(raid, total_bytes, raid_bytes_show); |
267 | BTRFS_RAID_ATTR(used_bytes, raid_bytes_show); | 267 | BTRFS_ATTR(raid, used_bytes, raid_bytes_show); |
268 | 268 | ||
269 | static ssize_t raid_bytes_show(struct kobject *kobj, | 269 | static ssize_t raid_bytes_show(struct kobject *kobj, |
270 | struct kobj_attribute *attr, char *buf) | 270 | struct kobj_attribute *attr, char *buf) |
@@ -277,7 +277,7 @@ static ssize_t raid_bytes_show(struct kobject *kobj, | |||
277 | 277 | ||
278 | down_read(&sinfo->groups_sem); | 278 | down_read(&sinfo->groups_sem); |
279 | list_for_each_entry(block_group, &sinfo->block_groups[index], list) { | 279 | list_for_each_entry(block_group, &sinfo->block_groups[index], list) { |
280 | if (&attr->attr == BTRFS_RAID_ATTR_PTR(total_bytes)) | 280 | if (&attr->attr == BTRFS_ATTR_PTR(raid, total_bytes)) |
281 | val += block_group->key.offset; | 281 | val += block_group->key.offset; |
282 | else | 282 | else |
283 | val += btrfs_block_group_used(&block_group->item); | 283 | val += btrfs_block_group_used(&block_group->item); |
@@ -287,8 +287,8 @@ static ssize_t raid_bytes_show(struct kobject *kobj, | |||
287 | } | 287 | } |
288 | 288 | ||
289 | static struct attribute *raid_attributes[] = { | 289 | static struct attribute *raid_attributes[] = { |
290 | BTRFS_RAID_ATTR_PTR(total_bytes), | 290 | BTRFS_ATTR_PTR(raid, total_bytes), |
291 | BTRFS_RAID_ATTR_PTR(used_bytes), | 291 | BTRFS_ATTR_PTR(raid, used_bytes), |
292 | NULL | 292 | NULL |
293 | }; | 293 | }; |
294 | 294 | ||
@@ -311,7 +311,7 @@ static ssize_t btrfs_space_info_show_##field(struct kobject *kobj, \ | |||
311 | struct btrfs_space_info *sinfo = to_space_info(kobj); \ | 311 | struct btrfs_space_info *sinfo = to_space_info(kobj); \ |
312 | return btrfs_show_u64(&sinfo->field, &sinfo->lock, buf); \ | 312 | return btrfs_show_u64(&sinfo->field, &sinfo->lock, buf); \ |
313 | } \ | 313 | } \ |
314 | BTRFS_ATTR(field, btrfs_space_info_show_##field) | 314 | BTRFS_ATTR(space_info, field, btrfs_space_info_show_##field) |
315 | 315 | ||
316 | static ssize_t btrfs_space_info_show_total_bytes_pinned(struct kobject *kobj, | 316 | static ssize_t btrfs_space_info_show_total_bytes_pinned(struct kobject *kobj, |
317 | struct kobj_attribute *a, | 317 | struct kobj_attribute *a, |
@@ -331,19 +331,20 @@ SPACE_INFO_ATTR(bytes_may_use); | |||
331 | SPACE_INFO_ATTR(bytes_readonly); | 331 | SPACE_INFO_ATTR(bytes_readonly); |
332 | SPACE_INFO_ATTR(disk_used); | 332 | SPACE_INFO_ATTR(disk_used); |
333 | SPACE_INFO_ATTR(disk_total); | 333 | SPACE_INFO_ATTR(disk_total); |
334 | BTRFS_ATTR(total_bytes_pinned, btrfs_space_info_show_total_bytes_pinned); | 334 | BTRFS_ATTR(space_info, total_bytes_pinned, |
335 | btrfs_space_info_show_total_bytes_pinned); | ||
335 | 336 | ||
336 | static struct attribute *space_info_attrs[] = { | 337 | static struct attribute *space_info_attrs[] = { |
337 | BTRFS_ATTR_PTR(flags), | 338 | BTRFS_ATTR_PTR(space_info, flags), |
338 | BTRFS_ATTR_PTR(total_bytes), | 339 | BTRFS_ATTR_PTR(space_info, total_bytes), |
339 | BTRFS_ATTR_PTR(bytes_used), | 340 | BTRFS_ATTR_PTR(space_info, bytes_used), |
340 | BTRFS_ATTR_PTR(bytes_pinned), | 341 | BTRFS_ATTR_PTR(space_info, bytes_pinned), |
341 | BTRFS_ATTR_PTR(bytes_reserved), | 342 | BTRFS_ATTR_PTR(space_info, bytes_reserved), |
342 | BTRFS_ATTR_PTR(bytes_may_use), | 343 | BTRFS_ATTR_PTR(space_info, bytes_may_use), |
343 | BTRFS_ATTR_PTR(bytes_readonly), | 344 | BTRFS_ATTR_PTR(space_info, bytes_readonly), |
344 | BTRFS_ATTR_PTR(disk_used), | 345 | BTRFS_ATTR_PTR(space_info, disk_used), |
345 | BTRFS_ATTR_PTR(disk_total), | 346 | BTRFS_ATTR_PTR(space_info, disk_total), |
346 | BTRFS_ATTR_PTR(total_bytes_pinned), | 347 | BTRFS_ATTR_PTR(space_info, total_bytes_pinned), |
347 | NULL, | 348 | NULL, |
348 | }; | 349 | }; |
349 | 350 | ||
@@ -361,8 +362,8 @@ struct kobj_type space_info_ktype = { | |||
361 | }; | 362 | }; |
362 | 363 | ||
363 | static const struct attribute *allocation_attrs[] = { | 364 | static const struct attribute *allocation_attrs[] = { |
364 | BTRFS_ATTR_PTR(global_rsv_reserved), | 365 | BTRFS_ATTR_PTR(allocation, global_rsv_reserved), |
365 | BTRFS_ATTR_PTR(global_rsv_size), | 366 | BTRFS_ATTR_PTR(allocation, global_rsv_size), |
366 | NULL, | 367 | NULL, |
367 | }; | 368 | }; |
368 | 369 | ||
@@ -415,7 +416,7 @@ static ssize_t btrfs_label_store(struct kobject *kobj, | |||
415 | 416 | ||
416 | return len; | 417 | return len; |
417 | } | 418 | } |
418 | BTRFS_ATTR_RW(label, btrfs_label_show, btrfs_label_store); | 419 | BTRFS_ATTR_RW(, label, btrfs_label_show, btrfs_label_store); |
419 | 420 | ||
420 | static ssize_t btrfs_nodesize_show(struct kobject *kobj, | 421 | static ssize_t btrfs_nodesize_show(struct kobject *kobj, |
421 | struct kobj_attribute *a, char *buf) | 422 | struct kobj_attribute *a, char *buf) |
@@ -425,7 +426,7 @@ static ssize_t btrfs_nodesize_show(struct kobject *kobj, | |||
425 | return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->nodesize); | 426 | return snprintf(buf, PAGE_SIZE, "%u\n", fs_info->super_copy->nodesize); |
426 | } | 427 | } |
427 | 428 | ||
428 | BTRFS_ATTR(nodesize, btrfs_nodesize_show); | 429 | BTRFS_ATTR(, nodesize, btrfs_nodesize_show); |
429 | 430 | ||
430 | static ssize_t btrfs_sectorsize_show(struct kobject *kobj, | 431 | static ssize_t btrfs_sectorsize_show(struct kobject *kobj, |
431 | struct kobj_attribute *a, char *buf) | 432 | struct kobj_attribute *a, char *buf) |
@@ -436,7 +437,7 @@ static ssize_t btrfs_sectorsize_show(struct kobject *kobj, | |||
436 | fs_info->super_copy->sectorsize); | 437 | fs_info->super_copy->sectorsize); |
437 | } | 438 | } |
438 | 439 | ||
439 | BTRFS_ATTR(sectorsize, btrfs_sectorsize_show); | 440 | BTRFS_ATTR(, sectorsize, btrfs_sectorsize_show); |
440 | 441 | ||
441 | static ssize_t btrfs_clone_alignment_show(struct kobject *kobj, | 442 | static ssize_t btrfs_clone_alignment_show(struct kobject *kobj, |
442 | struct kobj_attribute *a, char *buf) | 443 | struct kobj_attribute *a, char *buf) |
@@ -447,7 +448,7 @@ static ssize_t btrfs_clone_alignment_show(struct kobject *kobj, | |||
447 | fs_info->super_copy->sectorsize); | 448 | fs_info->super_copy->sectorsize); |
448 | } | 449 | } |
449 | 450 | ||
450 | BTRFS_ATTR(clone_alignment, btrfs_clone_alignment_show); | 451 | BTRFS_ATTR(, clone_alignment, btrfs_clone_alignment_show); |
451 | 452 | ||
452 | static ssize_t quota_override_show(struct kobject *kobj, | 453 | static ssize_t quota_override_show(struct kobject *kobj, |
453 | struct kobj_attribute *a, char *buf) | 454 | struct kobj_attribute *a, char *buf) |
@@ -487,14 +488,14 @@ static ssize_t quota_override_store(struct kobject *kobj, | |||
487 | return len; | 488 | return len; |
488 | } | 489 | } |
489 | 490 | ||
490 | BTRFS_ATTR_RW(quota_override, quota_override_show, quota_override_store); | 491 | BTRFS_ATTR_RW(, quota_override, quota_override_show, quota_override_store); |
491 | 492 | ||
492 | static const struct attribute *btrfs_attrs[] = { | 493 | static const struct attribute *btrfs_attrs[] = { |
493 | BTRFS_ATTR_PTR(label), | 494 | BTRFS_ATTR_PTR(, label), |
494 | BTRFS_ATTR_PTR(nodesize), | 495 | BTRFS_ATTR_PTR(, nodesize), |
495 | BTRFS_ATTR_PTR(sectorsize), | 496 | BTRFS_ATTR_PTR(, sectorsize), |
496 | BTRFS_ATTR_PTR(clone_alignment), | 497 | BTRFS_ATTR_PTR(, clone_alignment), |
497 | BTRFS_ATTR_PTR(quota_override), | 498 | BTRFS_ATTR_PTR(, quota_override), |
498 | NULL, | 499 | NULL, |
499 | }; | 500 | }; |
500 | 501 | ||
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h index 4cb908305e5d..80457f31c29f 100644 --- a/fs/btrfs/sysfs.h +++ b/fs/btrfs/sysfs.h | |||
@@ -21,21 +21,16 @@ enum btrfs_feature_set { | |||
21 | .store = _store, \ | 21 | .store = _store, \ |
22 | } | 22 | } |
23 | 23 | ||
24 | #define BTRFS_ATTR_RW(_name, _show, _store) \ | 24 | #define BTRFS_ATTR_RW(_prefix, _name, _show, _store) \ |
25 | static struct kobj_attribute btrfs_attr_##_name = \ | 25 | static struct kobj_attribute btrfs_attr_##_prefix##_##_name = \ |
26 | __INIT_KOBJ_ATTR(_name, 0644, _show, _store) | 26 | __INIT_KOBJ_ATTR(_name, 0644, _show, _store) |
27 | 27 | ||
28 | #define BTRFS_ATTR(_name, _show) \ | 28 | #define BTRFS_ATTR(_prefix, _name, _show) \ |
29 | static struct kobj_attribute btrfs_attr_##_name = \ | 29 | static struct kobj_attribute btrfs_attr_##_prefix##_##_name = \ |
30 | __INIT_KOBJ_ATTR(_name, 0444, _show, NULL) | 30 | __INIT_KOBJ_ATTR(_name, 0444, _show, NULL) |
31 | 31 | ||
32 | #define BTRFS_ATTR_PTR(_name) (&btrfs_attr_##_name.attr) | 32 | #define BTRFS_ATTR_PTR(_prefix, _name) \ |
33 | 33 | (&btrfs_attr_##_prefix##_##_name.attr) | |
34 | #define BTRFS_RAID_ATTR(_name, _show) \ | ||
35 | static struct kobj_attribute btrfs_raid_attr_##_name = \ | ||
36 | __INIT_KOBJ_ATTR(_name, 0444, _show, NULL) | ||
37 | |||
38 | #define BTRFS_RAID_ATTR_PTR(_name) (&btrfs_raid_attr_##_name.attr) | ||
39 | 34 | ||
40 | 35 | ||
41 | struct btrfs_feature_attr { | 36 | struct btrfs_feature_attr { |
@@ -44,15 +39,16 @@ struct btrfs_feature_attr { | |||
44 | u64 feature_bit; | 39 | u64 feature_bit; |
45 | }; | 40 | }; |
46 | 41 | ||
47 | #define BTRFS_FEAT_ATTR(_name, _feature_set, _prefix, _feature_bit) \ | 42 | #define BTRFS_FEAT_ATTR(_name, _feature_set, _feature_prefix, _feature_bit) \ |
48 | static struct btrfs_feature_attr btrfs_attr_##_name = { \ | 43 | static struct btrfs_feature_attr btrfs_attr_features_##_name = { \ |
49 | .kobj_attr = __INIT_KOBJ_ATTR(_name, S_IRUGO, \ | 44 | .kobj_attr = __INIT_KOBJ_ATTR(_name, S_IRUGO, \ |
50 | btrfs_feature_attr_show, \ | 45 | btrfs_feature_attr_show, \ |
51 | btrfs_feature_attr_store), \ | 46 | btrfs_feature_attr_store), \ |
52 | .feature_set = _feature_set, \ | 47 | .feature_set = _feature_set, \ |
53 | .feature_bit = _prefix ##_## _feature_bit, \ | 48 | .feature_bit = _feature_prefix ##_## _feature_bit, \ |
54 | } | 49 | } |
55 | #define BTRFS_FEAT_ATTR_PTR(_name) (&btrfs_attr_##_name.kobj_attr.attr) | 50 | #define BTRFS_FEAT_ATTR_PTR(_name) \ |
51 | (&btrfs_attr_features_##_name.kobj_attr.attr) | ||
56 | 52 | ||
57 | #define BTRFS_FEAT_ATTR_COMPAT(name, feature) \ | 53 | #define BTRFS_FEAT_ATTR_COMPAT(name, feature) \ |
58 | BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature) | 54 | BTRFS_FEAT_ATTR(name, FEAT_COMPAT, BTRFS_FEATURE_COMPAT, feature) |
diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c index 1458bb0ea124..8444a018cca2 100644 --- a/fs/btrfs/tests/free-space-tree-tests.c +++ b/fs/btrfs/tests/free-space-tree-tests.c | |||
@@ -500,7 +500,8 @@ static int run_test(test_func_t test_func, int bitmaps, u32 sectorsize, | |||
500 | path = btrfs_alloc_path(); | 500 | path = btrfs_alloc_path(); |
501 | if (!path) { | 501 | if (!path) { |
502 | test_msg("Couldn't allocate path\n"); | 502 | test_msg("Couldn't allocate path\n"); |
503 | return -ENOMEM; | 503 | ret = -ENOMEM; |
504 | goto out; | ||
504 | } | 505 | } |
505 | 506 | ||
506 | ret = add_block_group_free_space(&trans, root->fs_info, cache); | 507 | ret = add_block_group_free_space(&trans, root->fs_info, cache); |
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index 8c91d03cc82d..f797642c013d 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c | |||
@@ -770,7 +770,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize) | |||
770 | offset = em->start + em->len; | 770 | offset = em->start + em->len; |
771 | free_extent_map(em); | 771 | free_extent_map(em); |
772 | 772 | ||
773 | em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, 4096 * 1024, 0); | 773 | em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, offset, SZ_4M, 0); |
774 | if (IS_ERR(em)) { | 774 | if (IS_ERR(em)) { |
775 | test_msg("Got an error when we shouldn't have\n"); | 775 | test_msg("Got an error when we shouldn't have\n"); |
776 | goto out; | 776 | goto out; |
@@ -968,7 +968,6 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) | |||
968 | btrfs_test_inode_set_ops(inode); | 968 | btrfs_test_inode_set_ops(inode); |
969 | 969 | ||
970 | /* [BTRFS_MAX_EXTENT_SIZE] */ | 970 | /* [BTRFS_MAX_EXTENT_SIZE] */ |
971 | BTRFS_I(inode)->outstanding_extents++; | ||
972 | ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1, | 971 | ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1, |
973 | NULL, 0); | 972 | NULL, 0); |
974 | if (ret) { | 973 | if (ret) { |
@@ -983,7 +982,6 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) | |||
983 | } | 982 | } |
984 | 983 | ||
985 | /* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */ | 984 | /* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */ |
986 | BTRFS_I(inode)->outstanding_extents++; | ||
987 | ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE, | 985 | ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE, |
988 | BTRFS_MAX_EXTENT_SIZE + sectorsize - 1, | 986 | BTRFS_MAX_EXTENT_SIZE + sectorsize - 1, |
989 | NULL, 0); | 987 | NULL, 0); |
@@ -1003,7 +1001,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) | |||
1003 | BTRFS_MAX_EXTENT_SIZE >> 1, | 1001 | BTRFS_MAX_EXTENT_SIZE >> 1, |
1004 | (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1, | 1002 | (BTRFS_MAX_EXTENT_SIZE >> 1) + sectorsize - 1, |
1005 | EXTENT_DELALLOC | EXTENT_DIRTY | | 1003 | EXTENT_DELALLOC | EXTENT_DIRTY | |
1006 | EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0, | 1004 | EXTENT_UPTODATE, 0, 0, |
1007 | NULL, GFP_KERNEL); | 1005 | NULL, GFP_KERNEL); |
1008 | if (ret) { | 1006 | if (ret) { |
1009 | test_msg("clear_extent_bit returned %d\n", ret); | 1007 | test_msg("clear_extent_bit returned %d\n", ret); |
@@ -1017,7 +1015,6 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) | |||
1017 | } | 1015 | } |
1018 | 1016 | ||
1019 | /* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */ | 1017 | /* [BTRFS_MAX_EXTENT_SIZE][sectorsize] */ |
1020 | BTRFS_I(inode)->outstanding_extents++; | ||
1021 | ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1, | 1018 | ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1, |
1022 | (BTRFS_MAX_EXTENT_SIZE >> 1) | 1019 | (BTRFS_MAX_EXTENT_SIZE >> 1) |
1023 | + sectorsize - 1, | 1020 | + sectorsize - 1, |
@@ -1035,12 +1032,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) | |||
1035 | 1032 | ||
1036 | /* | 1033 | /* |
1037 | * [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize HOLE][BTRFS_MAX_EXTENT_SIZE+sectorsize] | 1034 | * [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize HOLE][BTRFS_MAX_EXTENT_SIZE+sectorsize] |
1038 | * | ||
1039 | * I'm artificially adding 2 to outstanding_extents because in the | ||
1040 | * buffered IO case we'd add things up as we go, but I don't feel like | ||
1041 | * doing that here, this isn't the interesting case we want to test. | ||
1042 | */ | 1035 | */ |
1043 | BTRFS_I(inode)->outstanding_extents += 2; | ||
1044 | ret = btrfs_set_extent_delalloc(inode, | 1036 | ret = btrfs_set_extent_delalloc(inode, |
1045 | BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize, | 1037 | BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize, |
1046 | (BTRFS_MAX_EXTENT_SIZE << 1) + 3 * sectorsize - 1, | 1038 | (BTRFS_MAX_EXTENT_SIZE << 1) + 3 * sectorsize - 1, |
@@ -1059,7 +1051,6 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) | |||
1059 | /* | 1051 | /* |
1060 | * [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize][BTRFS_MAX_EXTENT_SIZE+sectorsize] | 1052 | * [BTRFS_MAX_EXTENT_SIZE+sectorsize][sectorsize][BTRFS_MAX_EXTENT_SIZE+sectorsize] |
1061 | */ | 1053 | */ |
1062 | BTRFS_I(inode)->outstanding_extents++; | ||
1063 | ret = btrfs_set_extent_delalloc(inode, | 1054 | ret = btrfs_set_extent_delalloc(inode, |
1064 | BTRFS_MAX_EXTENT_SIZE + sectorsize, | 1055 | BTRFS_MAX_EXTENT_SIZE + sectorsize, |
1065 | BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL, 0); | 1056 | BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL, 0); |
@@ -1079,7 +1070,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) | |||
1079 | BTRFS_MAX_EXTENT_SIZE + sectorsize, | 1070 | BTRFS_MAX_EXTENT_SIZE + sectorsize, |
1080 | BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, | 1071 | BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, |
1081 | EXTENT_DIRTY | EXTENT_DELALLOC | | 1072 | EXTENT_DIRTY | EXTENT_DELALLOC | |
1082 | EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, | 1073 | EXTENT_UPTODATE, 0, 0, |
1083 | NULL, GFP_KERNEL); | 1074 | NULL, GFP_KERNEL); |
1084 | if (ret) { | 1075 | if (ret) { |
1085 | test_msg("clear_extent_bit returned %d\n", ret); | 1076 | test_msg("clear_extent_bit returned %d\n", ret); |
@@ -1096,7 +1087,6 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) | |||
1096 | * Refill the hole again just for good measure, because I thought it | 1087 | * Refill the hole again just for good measure, because I thought it |
1097 | * might fail and I'd rather satisfy my paranoia at this point. | 1088 | * might fail and I'd rather satisfy my paranoia at this point. |
1098 | */ | 1089 | */ |
1099 | BTRFS_I(inode)->outstanding_extents++; | ||
1100 | ret = btrfs_set_extent_delalloc(inode, | 1090 | ret = btrfs_set_extent_delalloc(inode, |
1101 | BTRFS_MAX_EXTENT_SIZE + sectorsize, | 1091 | BTRFS_MAX_EXTENT_SIZE + sectorsize, |
1102 | BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL, 0); | 1092 | BTRFS_MAX_EXTENT_SIZE + 2 * sectorsize - 1, NULL, 0); |
@@ -1114,7 +1104,7 @@ static int test_extent_accounting(u32 sectorsize, u32 nodesize) | |||
1114 | /* Empty */ | 1104 | /* Empty */ |
1115 | ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, | 1105 | ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, |
1116 | EXTENT_DIRTY | EXTENT_DELALLOC | | 1106 | EXTENT_DIRTY | EXTENT_DELALLOC | |
1117 | EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, | 1107 | EXTENT_UPTODATE, 0, 0, |
1118 | NULL, GFP_KERNEL); | 1108 | NULL, GFP_KERNEL); |
1119 | if (ret) { | 1109 | if (ret) { |
1120 | test_msg("clear_extent_bit returned %d\n", ret); | 1110 | test_msg("clear_extent_bit returned %d\n", ret); |
@@ -1131,7 +1121,7 @@ out: | |||
1131 | if (ret) | 1121 | if (ret) |
1132 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, | 1122 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1, |
1133 | EXTENT_DIRTY | EXTENT_DELALLOC | | 1123 | EXTENT_DIRTY | EXTENT_DELALLOC | |
1134 | EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0, | 1124 | EXTENT_UPTODATE, 0, 0, |
1135 | NULL, GFP_KERNEL); | 1125 | NULL, GFP_KERNEL); |
1136 | iput(inode); | 1126 | iput(inode); |
1137 | btrfs_free_dummy_root(root); | 1127 | btrfs_free_dummy_root(root); |
diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index 0f4ce970d195..90204b166643 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c | |||
@@ -240,7 +240,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root, | |||
240 | * we can only call btrfs_qgroup_account_extent() directly to test | 240 | * we can only call btrfs_qgroup_account_extent() directly to test |
241 | * quota. | 241 | * quota. |
242 | */ | 242 | */ |
243 | ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); | 243 | ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, |
244 | false); | ||
244 | if (ret) { | 245 | if (ret) { |
245 | ulist_free(old_roots); | 246 | ulist_free(old_roots); |
246 | test_msg("Couldn't find old roots: %d\n", ret); | 247 | test_msg("Couldn't find old roots: %d\n", ret); |
@@ -252,7 +253,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root, | |||
252 | if (ret) | 253 | if (ret) |
253 | return ret; | 254 | return ret; |
254 | 255 | ||
255 | ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); | 256 | ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, |
257 | false); | ||
256 | if (ret) { | 258 | if (ret) { |
257 | ulist_free(old_roots); | 259 | ulist_free(old_roots); |
258 | ulist_free(new_roots); | 260 | ulist_free(new_roots); |
@@ -275,7 +277,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root, | |||
275 | old_roots = NULL; | 277 | old_roots = NULL; |
276 | new_roots = NULL; | 278 | new_roots = NULL; |
277 | 279 | ||
278 | ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); | 280 | ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, |
281 | false); | ||
279 | if (ret) { | 282 | if (ret) { |
280 | ulist_free(old_roots); | 283 | ulist_free(old_roots); |
281 | test_msg("Couldn't find old roots: %d\n", ret); | 284 | test_msg("Couldn't find old roots: %d\n", ret); |
@@ -286,7 +289,8 @@ static int test_no_shared_qgroup(struct btrfs_root *root, | |||
286 | if (ret) | 289 | if (ret) |
287 | return -EINVAL; | 290 | return -EINVAL; |
288 | 291 | ||
289 | ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); | 292 | ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, |
293 | false); | ||
290 | if (ret) { | 294 | if (ret) { |
291 | ulist_free(old_roots); | 295 | ulist_free(old_roots); |
292 | ulist_free(new_roots); | 296 | ulist_free(new_roots); |
@@ -337,7 +341,8 @@ static int test_multiple_refs(struct btrfs_root *root, | |||
337 | return ret; | 341 | return ret; |
338 | } | 342 | } |
339 | 343 | ||
340 | ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); | 344 | ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, |
345 | false); | ||
341 | if (ret) { | 346 | if (ret) { |
342 | ulist_free(old_roots); | 347 | ulist_free(old_roots); |
343 | test_msg("Couldn't find old roots: %d\n", ret); | 348 | test_msg("Couldn't find old roots: %d\n", ret); |
@@ -349,7 +354,8 @@ static int test_multiple_refs(struct btrfs_root *root, | |||
349 | if (ret) | 354 | if (ret) |
350 | return ret; | 355 | return ret; |
351 | 356 | ||
352 | ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); | 357 | ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, |
358 | false); | ||
353 | if (ret) { | 359 | if (ret) { |
354 | ulist_free(old_roots); | 360 | ulist_free(old_roots); |
355 | ulist_free(new_roots); | 361 | ulist_free(new_roots); |
@@ -370,7 +376,8 @@ static int test_multiple_refs(struct btrfs_root *root, | |||
370 | return -EINVAL; | 376 | return -EINVAL; |
371 | } | 377 | } |
372 | 378 | ||
373 | ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); | 379 | ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, |
380 | false); | ||
374 | if (ret) { | 381 | if (ret) { |
375 | ulist_free(old_roots); | 382 | ulist_free(old_roots); |
376 | test_msg("Couldn't find old roots: %d\n", ret); | 383 | test_msg("Couldn't find old roots: %d\n", ret); |
@@ -382,7 +389,8 @@ static int test_multiple_refs(struct btrfs_root *root, | |||
382 | if (ret) | 389 | if (ret) |
383 | return ret; | 390 | return ret; |
384 | 391 | ||
385 | ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); | 392 | ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, |
393 | false); | ||
386 | if (ret) { | 394 | if (ret) { |
387 | ulist_free(old_roots); | 395 | ulist_free(old_roots); |
388 | ulist_free(new_roots); | 396 | ulist_free(new_roots); |
@@ -409,7 +417,8 @@ static int test_multiple_refs(struct btrfs_root *root, | |||
409 | return -EINVAL; | 417 | return -EINVAL; |
410 | } | 418 | } |
411 | 419 | ||
412 | ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots); | 420 | ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, |
421 | false); | ||
413 | if (ret) { | 422 | if (ret) { |
414 | ulist_free(old_roots); | 423 | ulist_free(old_roots); |
415 | test_msg("Couldn't find old roots: %d\n", ret); | 424 | test_msg("Couldn't find old roots: %d\n", ret); |
@@ -421,7 +430,8 @@ static int test_multiple_refs(struct btrfs_root *root, | |||
421 | if (ret) | 430 | if (ret) |
422 | return ret; | 431 | return ret; |
423 | 432 | ||
424 | ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots); | 433 | ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, |
434 | false); | ||
425 | if (ret) { | 435 | if (ret) { |
426 | ulist_free(old_roots); | 436 | ulist_free(old_roots); |
427 | ulist_free(new_roots); | 437 | ulist_free(new_roots); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f615d59b0489..5a8c2649af2f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -797,8 +797,7 @@ static int should_end_transaction(struct btrfs_trans_handle *trans) | |||
797 | { | 797 | { |
798 | struct btrfs_fs_info *fs_info = trans->fs_info; | 798 | struct btrfs_fs_info *fs_info = trans->fs_info; |
799 | 799 | ||
800 | if (fs_info->global_block_rsv.space_info->full && | 800 | if (btrfs_check_space_for_delayed_refs(trans, fs_info)) |
801 | btrfs_check_space_for_delayed_refs(trans, fs_info)) | ||
802 | return 1; | 801 | return 1; |
803 | 802 | ||
804 | return !!btrfs_block_rsv_check(&fs_info->global_block_rsv, 5); | 803 | return !!btrfs_block_rsv_check(&fs_info->global_block_rsv, 5); |
@@ -950,6 +949,7 @@ int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info, | |||
950 | u64 start = 0; | 949 | u64 start = 0; |
951 | u64 end; | 950 | u64 end; |
952 | 951 | ||
952 | atomic_inc(&BTRFS_I(fs_info->btree_inode)->sync_writers); | ||
953 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, | 953 | while (!find_first_extent_bit(dirty_pages, start, &start, &end, |
954 | mark, &cached_state)) { | 954 | mark, &cached_state)) { |
955 | bool wait_writeback = false; | 955 | bool wait_writeback = false; |
@@ -985,6 +985,7 @@ int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info, | |||
985 | cond_resched(); | 985 | cond_resched(); |
986 | start = end + 1; | 986 | start = end + 1; |
987 | } | 987 | } |
988 | atomic_dec(&BTRFS_I(fs_info->btree_inode)->sync_writers); | ||
988 | return werr; | 989 | return werr; |
989 | } | 990 | } |
990 | 991 | ||
@@ -1915,8 +1916,17 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, | |||
1915 | 1916 | ||
1916 | static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) | 1917 | static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) |
1917 | { | 1918 | { |
1919 | /* | ||
1920 | * We use writeback_inodes_sb here because if we used | ||
1921 | * btrfs_start_delalloc_roots we would deadlock with fs freeze. | ||
1922 | * Currently are holding the fs freeze lock, if we do an async flush | ||
1923 | * we'll do btrfs_join_transaction() and deadlock because we need to | ||
1924 | * wait for the fs freeze lock. Using the direct flushing we benefit | ||
1925 | * from already being in a transaction and our join_transaction doesn't | ||
1926 | * have to re-take the fs freeze lock. | ||
1927 | */ | ||
1918 | if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) | 1928 | if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) |
1919 | return btrfs_start_delalloc_roots(fs_info, 1, -1); | 1929 | writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC); |
1920 | return 0; | 1930 | return 0; |
1921 | } | 1931 | } |
1922 | 1932 | ||
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c new file mode 100644 index 000000000000..114fc5f0ecc5 --- /dev/null +++ b/fs/btrfs/tree-checker.c | |||
@@ -0,0 +1,425 @@ | |||
1 | /* | ||
2 | * Copyright (C) Qu Wenruo 2017. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program. | ||
15 | */ | ||
16 | |||
17 | /* | ||
18 | * The module is used to catch unexpected/corrupted tree block data. | ||
19 | * Such behavior can be caused either by a fuzzed image or bugs. | ||
20 | * | ||
21 | * The objective is to do leaf/node validation checks when tree block is read | ||
22 | * from disk, and check *every* possible member, so other code won't | ||
23 | * need to checking them again. | ||
24 | * | ||
25 | * Due to the potential and unwanted damage, every checker needs to be | ||
26 | * carefully reviewed otherwise so it does not prevent mount of valid images. | ||
27 | */ | ||
28 | |||
29 | #include "ctree.h" | ||
30 | #include "tree-checker.h" | ||
31 | #include "disk-io.h" | ||
32 | #include "compression.h" | ||
33 | |||
34 | /* | ||
35 | * Error message should follow the following format: | ||
36 | * corrupt <type>: <identifier>, <reason>[, <bad_value>] | ||
37 | * | ||
38 | * @type: leaf or node | ||
39 | * @identifier: the necessary info to locate the leaf/node. | ||
40 | * It's recommened to decode key.objecitd/offset if it's | ||
41 | * meaningful. | ||
42 | * @reason: describe the error | ||
43 | * @bad_value: optional, it's recommened to output bad value and its | ||
44 | * expected value (range). | ||
45 | * | ||
46 | * Since comma is used to separate the components, only space is allowed | ||
47 | * inside each component. | ||
48 | */ | ||
49 | |||
50 | /* | ||
51 | * Append generic "corrupt leaf/node root=%llu block=%llu slot=%d: " to @fmt. | ||
52 | * Allows callers to customize the output. | ||
53 | */ | ||
54 | __printf(4, 5) | ||
55 | static void generic_err(const struct btrfs_root *root, | ||
56 | const struct extent_buffer *eb, int slot, | ||
57 | const char *fmt, ...) | ||
58 | { | ||
59 | struct va_format vaf; | ||
60 | va_list args; | ||
61 | |||
62 | va_start(args, fmt); | ||
63 | |||
64 | vaf.fmt = fmt; | ||
65 | vaf.va = &args; | ||
66 | |||
67 | btrfs_crit(root->fs_info, | ||
68 | "corrupt %s: root=%llu block=%llu slot=%d, %pV", | ||
69 | btrfs_header_level(eb) == 0 ? "leaf" : "node", | ||
70 | root->objectid, btrfs_header_bytenr(eb), slot, &vaf); | ||
71 | va_end(args); | ||
72 | } | ||
73 | |||
74 | /* | ||
75 | * Customized reporter for extent data item, since its key objectid and | ||
76 | * offset has its own meaning. | ||
77 | */ | ||
78 | __printf(4, 5) | ||
79 | static void file_extent_err(const struct btrfs_root *root, | ||
80 | const struct extent_buffer *eb, int slot, | ||
81 | const char *fmt, ...) | ||
82 | { | ||
83 | struct btrfs_key key; | ||
84 | struct va_format vaf; | ||
85 | va_list args; | ||
86 | |||
87 | btrfs_item_key_to_cpu(eb, &key, slot); | ||
88 | va_start(args, fmt); | ||
89 | |||
90 | vaf.fmt = fmt; | ||
91 | vaf.va = &args; | ||
92 | |||
93 | btrfs_crit(root->fs_info, | ||
94 | "corrupt %s: root=%llu block=%llu slot=%d ino=%llu file_offset=%llu, %pV", | ||
95 | btrfs_header_level(eb) == 0 ? "leaf" : "node", root->objectid, | ||
96 | btrfs_header_bytenr(eb), slot, key.objectid, key.offset, &vaf); | ||
97 | va_end(args); | ||
98 | } | ||
99 | |||
100 | /* | ||
101 | * Return 0 if the btrfs_file_extent_##name is aligned to @alignment | ||
102 | * Else return 1 | ||
103 | */ | ||
104 | #define CHECK_FE_ALIGNED(root, leaf, slot, fi, name, alignment) \ | ||
105 | ({ \ | ||
106 | if (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))) \ | ||
107 | file_extent_err((root), (leaf), (slot), \ | ||
108 | "invalid %s for file extent, have %llu, should be aligned to %u", \ | ||
109 | (#name), btrfs_file_extent_##name((leaf), (fi)), \ | ||
110 | (alignment)); \ | ||
111 | (!IS_ALIGNED(btrfs_file_extent_##name((leaf), (fi)), (alignment))); \ | ||
112 | }) | ||
113 | |||
114 | static int check_extent_data_item(struct btrfs_root *root, | ||
115 | struct extent_buffer *leaf, | ||
116 | struct btrfs_key *key, int slot) | ||
117 | { | ||
118 | struct btrfs_file_extent_item *fi; | ||
119 | u32 sectorsize = root->fs_info->sectorsize; | ||
120 | u32 item_size = btrfs_item_size_nr(leaf, slot); | ||
121 | |||
122 | if (!IS_ALIGNED(key->offset, sectorsize)) { | ||
123 | file_extent_err(root, leaf, slot, | ||
124 | "unaligned file_offset for file extent, have %llu should be aligned to %u", | ||
125 | key->offset, sectorsize); | ||
126 | return -EUCLEAN; | ||
127 | } | ||
128 | |||
129 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); | ||
130 | |||
131 | if (btrfs_file_extent_type(leaf, fi) > BTRFS_FILE_EXTENT_TYPES) { | ||
132 | file_extent_err(root, leaf, slot, | ||
133 | "invalid type for file extent, have %u expect range [0, %u]", | ||
134 | btrfs_file_extent_type(leaf, fi), | ||
135 | BTRFS_FILE_EXTENT_TYPES); | ||
136 | return -EUCLEAN; | ||
137 | } | ||
138 | |||
139 | /* | ||
140 | * Support for new compression/encrption must introduce incompat flag, | ||
141 | * and must be caught in open_ctree(). | ||
142 | */ | ||
143 | if (btrfs_file_extent_compression(leaf, fi) > BTRFS_COMPRESS_TYPES) { | ||
144 | file_extent_err(root, leaf, slot, | ||
145 | "invalid compression for file extent, have %u expect range [0, %u]", | ||
146 | btrfs_file_extent_compression(leaf, fi), | ||
147 | BTRFS_COMPRESS_TYPES); | ||
148 | return -EUCLEAN; | ||
149 | } | ||
150 | if (btrfs_file_extent_encryption(leaf, fi)) { | ||
151 | file_extent_err(root, leaf, slot, | ||
152 | "invalid encryption for file extent, have %u expect 0", | ||
153 | btrfs_file_extent_encryption(leaf, fi)); | ||
154 | return -EUCLEAN; | ||
155 | } | ||
156 | if (btrfs_file_extent_type(leaf, fi) == BTRFS_FILE_EXTENT_INLINE) { | ||
157 | /* Inline extent must have 0 as key offset */ | ||
158 | if (key->offset) { | ||
159 | file_extent_err(root, leaf, slot, | ||
160 | "invalid file_offset for inline file extent, have %llu expect 0", | ||
161 | key->offset); | ||
162 | return -EUCLEAN; | ||
163 | } | ||
164 | |||
165 | /* Compressed inline extent has no on-disk size, skip it */ | ||
166 | if (btrfs_file_extent_compression(leaf, fi) != | ||
167 | BTRFS_COMPRESS_NONE) | ||
168 | return 0; | ||
169 | |||
170 | /* Uncompressed inline extent size must match item size */ | ||
171 | if (item_size != BTRFS_FILE_EXTENT_INLINE_DATA_START + | ||
172 | btrfs_file_extent_ram_bytes(leaf, fi)) { | ||
173 | file_extent_err(root, leaf, slot, | ||
174 | "invalid ram_bytes for uncompressed inline extent, have %u expect %llu", | ||
175 | item_size, BTRFS_FILE_EXTENT_INLINE_DATA_START + | ||
176 | btrfs_file_extent_ram_bytes(leaf, fi)); | ||
177 | return -EUCLEAN; | ||
178 | } | ||
179 | return 0; | ||
180 | } | ||
181 | |||
182 | /* Regular or preallocated extent has fixed item size */ | ||
183 | if (item_size != sizeof(*fi)) { | ||
184 | file_extent_err(root, leaf, slot, | ||
185 | "invalid item size for reg/prealloc file extent, have %u expect %zu", | ||
186 | item_size, sizeof(*fi)); | ||
187 | return -EUCLEAN; | ||
188 | } | ||
189 | if (CHECK_FE_ALIGNED(root, leaf, slot, fi, ram_bytes, sectorsize) || | ||
190 | CHECK_FE_ALIGNED(root, leaf, slot, fi, disk_bytenr, sectorsize) || | ||
191 | CHECK_FE_ALIGNED(root, leaf, slot, fi, disk_num_bytes, sectorsize) || | ||
192 | CHECK_FE_ALIGNED(root, leaf, slot, fi, offset, sectorsize) || | ||
193 | CHECK_FE_ALIGNED(root, leaf, slot, fi, num_bytes, sectorsize)) | ||
194 | return -EUCLEAN; | ||
195 | return 0; | ||
196 | } | ||
197 | |||
198 | static int check_csum_item(struct btrfs_root *root, struct extent_buffer *leaf, | ||
199 | struct btrfs_key *key, int slot) | ||
200 | { | ||
201 | u32 sectorsize = root->fs_info->sectorsize; | ||
202 | u32 csumsize = btrfs_super_csum_size(root->fs_info->super_copy); | ||
203 | |||
204 | if (key->objectid != BTRFS_EXTENT_CSUM_OBJECTID) { | ||
205 | generic_err(root, leaf, slot, | ||
206 | "invalid key objectid for csum item, have %llu expect %llu", | ||
207 | key->objectid, BTRFS_EXTENT_CSUM_OBJECTID); | ||
208 | return -EUCLEAN; | ||
209 | } | ||
210 | if (!IS_ALIGNED(key->offset, sectorsize)) { | ||
211 | generic_err(root, leaf, slot, | ||
212 | "unaligned key offset for csum item, have %llu should be aligned to %u", | ||
213 | key->offset, sectorsize); | ||
214 | return -EUCLEAN; | ||
215 | } | ||
216 | if (!IS_ALIGNED(btrfs_item_size_nr(leaf, slot), csumsize)) { | ||
217 | generic_err(root, leaf, slot, | ||
218 | "unaligned item size for csum item, have %u should be aligned to %u", | ||
219 | btrfs_item_size_nr(leaf, slot), csumsize); | ||
220 | return -EUCLEAN; | ||
221 | } | ||
222 | return 0; | ||
223 | } | ||
224 | |||
225 | /* | ||
226 | * Common point to switch the item-specific validation. | ||
227 | */ | ||
228 | static int check_leaf_item(struct btrfs_root *root, | ||
229 | struct extent_buffer *leaf, | ||
230 | struct btrfs_key *key, int slot) | ||
231 | { | ||
232 | int ret = 0; | ||
233 | |||
234 | switch (key->type) { | ||
235 | case BTRFS_EXTENT_DATA_KEY: | ||
236 | ret = check_extent_data_item(root, leaf, key, slot); | ||
237 | break; | ||
238 | case BTRFS_EXTENT_CSUM_KEY: | ||
239 | ret = check_csum_item(root, leaf, key, slot); | ||
240 | break; | ||
241 | } | ||
242 | return ret; | ||
243 | } | ||
244 | |||
245 | int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf) | ||
246 | { | ||
247 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
248 | /* No valid key type is 0, so all key should be larger than this key */ | ||
249 | struct btrfs_key prev_key = {0, 0, 0}; | ||
250 | struct btrfs_key key; | ||
251 | u32 nritems = btrfs_header_nritems(leaf); | ||
252 | int slot; | ||
253 | |||
254 | /* | ||
255 | * Extent buffers from a relocation tree have a owner field that | ||
256 | * corresponds to the subvolume tree they are based on. So just from an | ||
257 | * extent buffer alone we can not find out what is the id of the | ||
258 | * corresponding subvolume tree, so we can not figure out if the extent | ||
259 | * buffer corresponds to the root of the relocation tree or not. So | ||
260 | * skip this check for relocation trees. | ||
261 | */ | ||
262 | if (nritems == 0 && !btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_RELOC)) { | ||
263 | struct btrfs_root *check_root; | ||
264 | |||
265 | key.objectid = btrfs_header_owner(leaf); | ||
266 | key.type = BTRFS_ROOT_ITEM_KEY; | ||
267 | key.offset = (u64)-1; | ||
268 | |||
269 | check_root = btrfs_get_fs_root(fs_info, &key, false); | ||
270 | /* | ||
271 | * The only reason we also check NULL here is that during | ||
272 | * open_ctree() some roots has not yet been set up. | ||
273 | */ | ||
274 | if (!IS_ERR_OR_NULL(check_root)) { | ||
275 | struct extent_buffer *eb; | ||
276 | |||
277 | eb = btrfs_root_node(check_root); | ||
278 | /* if leaf is the root, then it's fine */ | ||
279 | if (leaf != eb) { | ||
280 | generic_err(check_root, leaf, 0, | ||
281 | "invalid nritems, have %u should not be 0 for non-root leaf", | ||
282 | nritems); | ||
283 | free_extent_buffer(eb); | ||
284 | return -EUCLEAN; | ||
285 | } | ||
286 | free_extent_buffer(eb); | ||
287 | } | ||
288 | return 0; | ||
289 | } | ||
290 | |||
291 | if (nritems == 0) | ||
292 | return 0; | ||
293 | |||
294 | /* | ||
295 | * Check the following things to make sure this is a good leaf, and | ||
296 | * leaf users won't need to bother with similar sanity checks: | ||
297 | * | ||
298 | * 1) key ordering | ||
299 | * 2) item offset and size | ||
300 | * No overlap, no hole, all inside the leaf. | ||
301 | * 3) item content | ||
302 | * If possible, do comprehensive sanity check. | ||
303 | * NOTE: All checks must only rely on the item data itself. | ||
304 | */ | ||
305 | for (slot = 0; slot < nritems; slot++) { | ||
306 | u32 item_end_expected; | ||
307 | int ret; | ||
308 | |||
309 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
310 | |||
311 | /* Make sure the keys are in the right order */ | ||
312 | if (btrfs_comp_cpu_keys(&prev_key, &key) >= 0) { | ||
313 | generic_err(root, leaf, slot, | ||
314 | "bad key order, prev (%llu %u %llu) current (%llu %u %llu)", | ||
315 | prev_key.objectid, prev_key.type, | ||
316 | prev_key.offset, key.objectid, key.type, | ||
317 | key.offset); | ||
318 | return -EUCLEAN; | ||
319 | } | ||
320 | |||
321 | /* | ||
322 | * Make sure the offset and ends are right, remember that the | ||
323 | * item data starts at the end of the leaf and grows towards the | ||
324 | * front. | ||
325 | */ | ||
326 | if (slot == 0) | ||
327 | item_end_expected = BTRFS_LEAF_DATA_SIZE(fs_info); | ||
328 | else | ||
329 | item_end_expected = btrfs_item_offset_nr(leaf, | ||
330 | slot - 1); | ||
331 | if (btrfs_item_end_nr(leaf, slot) != item_end_expected) { | ||
332 | generic_err(root, leaf, slot, | ||
333 | "unexpected item end, have %u expect %u", | ||
334 | btrfs_item_end_nr(leaf, slot), | ||
335 | item_end_expected); | ||
336 | return -EUCLEAN; | ||
337 | } | ||
338 | |||
339 | /* | ||
340 | * Check to make sure that we don't point outside of the leaf, | ||
341 | * just in case all the items are consistent to each other, but | ||
342 | * all point outside of the leaf. | ||
343 | */ | ||
344 | if (btrfs_item_end_nr(leaf, slot) > | ||
345 | BTRFS_LEAF_DATA_SIZE(fs_info)) { | ||
346 | generic_err(root, leaf, slot, | ||
347 | "slot end outside of leaf, have %u expect range [0, %u]", | ||
348 | btrfs_item_end_nr(leaf, slot), | ||
349 | BTRFS_LEAF_DATA_SIZE(fs_info)); | ||
350 | return -EUCLEAN; | ||
351 | } | ||
352 | |||
353 | /* Also check if the item pointer overlaps with btrfs item. */ | ||
354 | if (btrfs_item_nr_offset(slot) + sizeof(struct btrfs_item) > | ||
355 | btrfs_item_ptr_offset(leaf, slot)) { | ||
356 | generic_err(root, leaf, slot, | ||
357 | "slot overlaps with its data, item end %lu data start %lu", | ||
358 | btrfs_item_nr_offset(slot) + | ||
359 | sizeof(struct btrfs_item), | ||
360 | btrfs_item_ptr_offset(leaf, slot)); | ||
361 | return -EUCLEAN; | ||
362 | } | ||
363 | |||
364 | /* Check if the item size and content meet other criteria */ | ||
365 | ret = check_leaf_item(root, leaf, &key, slot); | ||
366 | if (ret < 0) | ||
367 | return ret; | ||
368 | |||
369 | prev_key.objectid = key.objectid; | ||
370 | prev_key.type = key.type; | ||
371 | prev_key.offset = key.offset; | ||
372 | } | ||
373 | |||
374 | return 0; | ||
375 | } | ||
376 | |||
377 | int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node) | ||
378 | { | ||
379 | unsigned long nr = btrfs_header_nritems(node); | ||
380 | struct btrfs_key key, next_key; | ||
381 | int slot; | ||
382 | u64 bytenr; | ||
383 | int ret = 0; | ||
384 | |||
385 | if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)) { | ||
386 | btrfs_crit(root->fs_info, | ||
387 | "corrupt node: root=%llu block=%llu, nritems too %s, have %lu expect range [1,%u]", | ||
388 | root->objectid, node->start, | ||
389 | nr == 0 ? "small" : "large", nr, | ||
390 | BTRFS_NODEPTRS_PER_BLOCK(root->fs_info)); | ||
391 | return -EUCLEAN; | ||
392 | } | ||
393 | |||
394 | for (slot = 0; slot < nr - 1; slot++) { | ||
395 | bytenr = btrfs_node_blockptr(node, slot); | ||
396 | btrfs_node_key_to_cpu(node, &key, slot); | ||
397 | btrfs_node_key_to_cpu(node, &next_key, slot + 1); | ||
398 | |||
399 | if (!bytenr) { | ||
400 | generic_err(root, node, slot, | ||
401 | "invalid NULL node pointer"); | ||
402 | ret = -EUCLEAN; | ||
403 | goto out; | ||
404 | } | ||
405 | if (!IS_ALIGNED(bytenr, root->fs_info->sectorsize)) { | ||
406 | generic_err(root, node, slot, | ||
407 | "unaligned pointer, have %llu should be aligned to %u", | ||
408 | bytenr, root->fs_info->sectorsize); | ||
409 | ret = -EUCLEAN; | ||
410 | goto out; | ||
411 | } | ||
412 | |||
413 | if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) { | ||
414 | generic_err(root, node, slot, | ||
415 | "bad key order, current (%llu %u %llu) next (%llu %u %llu)", | ||
416 | key.objectid, key.type, key.offset, | ||
417 | next_key.objectid, next_key.type, | ||
418 | next_key.offset); | ||
419 | ret = -EUCLEAN; | ||
420 | goto out; | ||
421 | } | ||
422 | } | ||
423 | out: | ||
424 | return ret; | ||
425 | } | ||
diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h new file mode 100644 index 000000000000..96c486e95d70 --- /dev/null +++ b/fs/btrfs/tree-checker.h | |||
@@ -0,0 +1,26 @@ | |||
1 | /* | ||
2 | * Copyright (C) Qu Wenruo 2017. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program. | ||
15 | */ | ||
16 | |||
17 | #ifndef __BTRFS_TREE_CHECKER__ | ||
18 | #define __BTRFS_TREE_CHECKER__ | ||
19 | |||
20 | #include "ctree.h" | ||
21 | #include "extent_io.h" | ||
22 | |||
23 | int btrfs_check_leaf(struct btrfs_root *root, struct extent_buffer *leaf); | ||
24 | int btrfs_check_node(struct btrfs_root *root, struct extent_buffer *node); | ||
25 | |||
26 | #endif | ||
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c800d067fcbf..aa7c71cff575 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -717,7 +717,7 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans, | |||
717 | ret = btrfs_lookup_data_extent(fs_info, ins.objectid, | 717 | ret = btrfs_lookup_data_extent(fs_info, ins.objectid, |
718 | ins.offset); | 718 | ins.offset); |
719 | if (ret == 0) { | 719 | if (ret == 0) { |
720 | ret = btrfs_inc_extent_ref(trans, fs_info, | 720 | ret = btrfs_inc_extent_ref(trans, root, |
721 | ins.objectid, ins.offset, | 721 | ins.objectid, ins.offset, |
722 | 0, root->root_key.objectid, | 722 | 0, root->root_key.objectid, |
723 | key->objectid, offset); | 723 | key->objectid, offset); |
@@ -2699,34 +2699,36 @@ static void wait_log_commit(struct btrfs_root *root, int transid) | |||
2699 | * so we know that if ours is more than 2 older than the | 2699 | * so we know that if ours is more than 2 older than the |
2700 | * current transaction, we're done | 2700 | * current transaction, we're done |
2701 | */ | 2701 | */ |
2702 | do { | 2702 | for (;;) { |
2703 | prepare_to_wait(&root->log_commit_wait[index], | 2703 | prepare_to_wait(&root->log_commit_wait[index], |
2704 | &wait, TASK_UNINTERRUPTIBLE); | 2704 | &wait, TASK_UNINTERRUPTIBLE); |
2705 | mutex_unlock(&root->log_mutex); | ||
2706 | 2705 | ||
2707 | if (root->log_transid_committed < transid && | 2706 | if (!(root->log_transid_committed < transid && |
2708 | atomic_read(&root->log_commit[index])) | 2707 | atomic_read(&root->log_commit[index]))) |
2709 | schedule(); | 2708 | break; |
2710 | 2709 | ||
2711 | finish_wait(&root->log_commit_wait[index], &wait); | 2710 | mutex_unlock(&root->log_mutex); |
2711 | schedule(); | ||
2712 | mutex_lock(&root->log_mutex); | 2712 | mutex_lock(&root->log_mutex); |
2713 | } while (root->log_transid_committed < transid && | 2713 | } |
2714 | atomic_read(&root->log_commit[index])); | 2714 | finish_wait(&root->log_commit_wait[index], &wait); |
2715 | } | 2715 | } |
2716 | 2716 | ||
2717 | static void wait_for_writer(struct btrfs_root *root) | 2717 | static void wait_for_writer(struct btrfs_root *root) |
2718 | { | 2718 | { |
2719 | DEFINE_WAIT(wait); | 2719 | DEFINE_WAIT(wait); |
2720 | 2720 | ||
2721 | while (atomic_read(&root->log_writers)) { | 2721 | for (;;) { |
2722 | prepare_to_wait(&root->log_writer_wait, | 2722 | prepare_to_wait(&root->log_writer_wait, &wait, |
2723 | &wait, TASK_UNINTERRUPTIBLE); | 2723 | TASK_UNINTERRUPTIBLE); |
2724 | if (!atomic_read(&root->log_writers)) | ||
2725 | break; | ||
2726 | |||
2724 | mutex_unlock(&root->log_mutex); | 2727 | mutex_unlock(&root->log_mutex); |
2725 | if (atomic_read(&root->log_writers)) | 2728 | schedule(); |
2726 | schedule(); | ||
2727 | finish_wait(&root->log_writer_wait, &wait); | ||
2728 | mutex_lock(&root->log_mutex); | 2729 | mutex_lock(&root->log_mutex); |
2729 | } | 2730 | } |
2731 | finish_wait(&root->log_writer_wait, &wait); | ||
2730 | } | 2732 | } |
2731 | 2733 | ||
2732 | static inline void btrfs_remove_log_ctx(struct btrfs_root *root, | 2734 | static inline void btrfs_remove_log_ctx(struct btrfs_root *root, |
@@ -4645,7 +4647,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
4645 | struct btrfs_key min_key; | 4647 | struct btrfs_key min_key; |
4646 | struct btrfs_key max_key; | 4648 | struct btrfs_key max_key; |
4647 | struct btrfs_root *log = root->log_root; | 4649 | struct btrfs_root *log = root->log_root; |
4648 | struct extent_buffer *src = NULL; | ||
4649 | LIST_HEAD(logged_list); | 4650 | LIST_HEAD(logged_list); |
4650 | u64 last_extent = 0; | 4651 | u64 last_extent = 0; |
4651 | int err = 0; | 4652 | int err = 0; |
@@ -4888,7 +4889,6 @@ again: | |||
4888 | goto next_slot; | 4889 | goto next_slot; |
4889 | } | 4890 | } |
4890 | 4891 | ||
4891 | src = path->nodes[0]; | ||
4892 | if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { | 4892 | if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { |
4893 | ins_nr++; | 4893 | ins_nr++; |
4894 | goto next_slot; | 4894 | goto next_slot; |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index b39737568c22..f1ecb938ba4d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -360,7 +360,6 @@ static noinline void run_scheduled_bios(struct btrfs_device *device) | |||
360 | int again = 0; | 360 | int again = 0; |
361 | unsigned long num_run; | 361 | unsigned long num_run; |
362 | unsigned long batch_run = 0; | 362 | unsigned long batch_run = 0; |
363 | unsigned long limit; | ||
364 | unsigned long last_waited = 0; | 363 | unsigned long last_waited = 0; |
365 | int force_reg = 0; | 364 | int force_reg = 0; |
366 | int sync_pending = 0; | 365 | int sync_pending = 0; |
@@ -375,8 +374,6 @@ static noinline void run_scheduled_bios(struct btrfs_device *device) | |||
375 | blk_start_plug(&plug); | 374 | blk_start_plug(&plug); |
376 | 375 | ||
377 | bdi = device->bdev->bd_bdi; | 376 | bdi = device->bdev->bd_bdi; |
378 | limit = btrfs_async_submit_limit(fs_info); | ||
379 | limit = limit * 2 / 3; | ||
380 | 377 | ||
381 | loop: | 378 | loop: |
382 | spin_lock(&device->io_lock); | 379 | spin_lock(&device->io_lock); |
@@ -443,13 +440,6 @@ loop_lock: | |||
443 | pending = pending->bi_next; | 440 | pending = pending->bi_next; |
444 | cur->bi_next = NULL; | 441 | cur->bi_next = NULL; |
445 | 442 | ||
446 | /* | ||
447 | * atomic_dec_return implies a barrier for waitqueue_active | ||
448 | */ | ||
449 | if (atomic_dec_return(&fs_info->nr_async_bios) < limit && | ||
450 | waitqueue_active(&fs_info->async_submit_wait)) | ||
451 | wake_up(&fs_info->async_submit_wait); | ||
452 | |||
453 | BUG_ON(atomic_read(&cur->__bi_cnt) == 0); | 443 | BUG_ON(atomic_read(&cur->__bi_cnt) == 0); |
454 | 444 | ||
455 | /* | 445 | /* |
@@ -517,12 +507,6 @@ loop_lock: | |||
517 | &device->work); | 507 | &device->work); |
518 | goto done; | 508 | goto done; |
519 | } | 509 | } |
520 | /* unplug every 64 requests just for good measure */ | ||
521 | if (batch_run % 64 == 0) { | ||
522 | blk_finish_plug(&plug); | ||
523 | blk_start_plug(&plug); | ||
524 | sync_pending = 0; | ||
525 | } | ||
526 | } | 510 | } |
527 | 511 | ||
528 | cond_resched(); | 512 | cond_resched(); |
@@ -547,7 +531,7 @@ static void pending_bios_fn(struct btrfs_work *work) | |||
547 | } | 531 | } |
548 | 532 | ||
549 | 533 | ||
550 | void btrfs_free_stale_device(struct btrfs_device *cur_dev) | 534 | static void btrfs_free_stale_device(struct btrfs_device *cur_dev) |
551 | { | 535 | { |
552 | struct btrfs_fs_devices *fs_devs; | 536 | struct btrfs_fs_devices *fs_devs; |
553 | struct btrfs_device *dev; | 537 | struct btrfs_device *dev; |
@@ -1068,14 +1052,15 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
1068 | return ret; | 1052 | return ret; |
1069 | } | 1053 | } |
1070 | 1054 | ||
1071 | void btrfs_release_disk_super(struct page *page) | 1055 | static void btrfs_release_disk_super(struct page *page) |
1072 | { | 1056 | { |
1073 | kunmap(page); | 1057 | kunmap(page); |
1074 | put_page(page); | 1058 | put_page(page); |
1075 | } | 1059 | } |
1076 | 1060 | ||
1077 | int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr, | 1061 | static int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr, |
1078 | struct page **page, struct btrfs_super_block **disk_super) | 1062 | struct page **page, |
1063 | struct btrfs_super_block **disk_super) | ||
1079 | { | 1064 | { |
1080 | void *p; | 1065 | void *p; |
1081 | pgoff_t index; | 1066 | pgoff_t index; |
@@ -1817,8 +1802,8 @@ static int btrfs_check_raid_min_devices(struct btrfs_fs_info *fs_info, | |||
1817 | return 0; | 1802 | return 0; |
1818 | } | 1803 | } |
1819 | 1804 | ||
1820 | struct btrfs_device *btrfs_find_next_active_device(struct btrfs_fs_devices *fs_devs, | 1805 | static struct btrfs_device * btrfs_find_next_active_device( |
1821 | struct btrfs_device *device) | 1806 | struct btrfs_fs_devices *fs_devs, struct btrfs_device *device) |
1822 | { | 1807 | { |
1823 | struct btrfs_device *next_device; | 1808 | struct btrfs_device *next_device; |
1824 | 1809 | ||
@@ -2031,19 +2016,20 @@ void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info, | |||
2031 | } | 2016 | } |
2032 | 2017 | ||
2033 | btrfs_close_bdev(srcdev); | 2018 | btrfs_close_bdev(srcdev); |
2034 | |||
2035 | call_rcu(&srcdev->rcu, free_device); | 2019 | call_rcu(&srcdev->rcu, free_device); |
2036 | 2020 | ||
2037 | /* | ||
2038 | * unless fs_devices is seed fs, num_devices shouldn't go | ||
2039 | * zero | ||
2040 | */ | ||
2041 | BUG_ON(!fs_devices->num_devices && !fs_devices->seeding); | ||
2042 | |||
2043 | /* if this is no devs we rather delete the fs_devices */ | 2021 | /* if this is no devs we rather delete the fs_devices */ |
2044 | if (!fs_devices->num_devices) { | 2022 | if (!fs_devices->num_devices) { |
2045 | struct btrfs_fs_devices *tmp_fs_devices; | 2023 | struct btrfs_fs_devices *tmp_fs_devices; |
2046 | 2024 | ||
2025 | /* | ||
2026 | * On a mounted FS, num_devices can't be zero unless it's a | ||
2027 | * seed. In case of a seed device being replaced, the replace | ||
2028 | * target added to the sprout FS, so there will be no more | ||
2029 | * device left under the seed FS. | ||
2030 | */ | ||
2031 | ASSERT(fs_devices->seeding); | ||
2032 | |||
2047 | tmp_fs_devices = fs_info->fs_devices; | 2033 | tmp_fs_devices = fs_info->fs_devices; |
2048 | while (tmp_fs_devices) { | 2034 | while (tmp_fs_devices) { |
2049 | if (tmp_fs_devices->seed == fs_devices) { | 2035 | if (tmp_fs_devices->seed == fs_devices) { |
@@ -2323,6 +2309,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path | |||
2323 | u64 tmp; | 2309 | u64 tmp; |
2324 | int seeding_dev = 0; | 2310 | int seeding_dev = 0; |
2325 | int ret = 0; | 2311 | int ret = 0; |
2312 | bool unlocked = false; | ||
2326 | 2313 | ||
2327 | if (sb_rdonly(sb) && !fs_info->fs_devices->seeding) | 2314 | if (sb_rdonly(sb) && !fs_info->fs_devices->seeding) |
2328 | return -EROFS; | 2315 | return -EROFS; |
@@ -2399,7 +2386,10 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path | |||
2399 | if (seeding_dev) { | 2386 | if (seeding_dev) { |
2400 | sb->s_flags &= ~MS_RDONLY; | 2387 | sb->s_flags &= ~MS_RDONLY; |
2401 | ret = btrfs_prepare_sprout(fs_info); | 2388 | ret = btrfs_prepare_sprout(fs_info); |
2402 | BUG_ON(ret); /* -ENOMEM */ | 2389 | if (ret) { |
2390 | btrfs_abort_transaction(trans, ret); | ||
2391 | goto error_trans; | ||
2392 | } | ||
2403 | } | 2393 | } |
2404 | 2394 | ||
2405 | device->fs_devices = fs_info->fs_devices; | 2395 | device->fs_devices = fs_info->fs_devices; |
@@ -2445,14 +2435,14 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path | |||
2445 | mutex_unlock(&fs_info->chunk_mutex); | 2435 | mutex_unlock(&fs_info->chunk_mutex); |
2446 | if (ret) { | 2436 | if (ret) { |
2447 | btrfs_abort_transaction(trans, ret); | 2437 | btrfs_abort_transaction(trans, ret); |
2448 | goto error_trans; | 2438 | goto error_sysfs; |
2449 | } | 2439 | } |
2450 | } | 2440 | } |
2451 | 2441 | ||
2452 | ret = btrfs_add_device(trans, fs_info, device); | 2442 | ret = btrfs_add_device(trans, fs_info, device); |
2453 | if (ret) { | 2443 | if (ret) { |
2454 | btrfs_abort_transaction(trans, ret); | 2444 | btrfs_abort_transaction(trans, ret); |
2455 | goto error_trans; | 2445 | goto error_sysfs; |
2456 | } | 2446 | } |
2457 | 2447 | ||
2458 | if (seeding_dev) { | 2448 | if (seeding_dev) { |
@@ -2461,7 +2451,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path | |||
2461 | ret = btrfs_finish_sprout(trans, fs_info); | 2451 | ret = btrfs_finish_sprout(trans, fs_info); |
2462 | if (ret) { | 2452 | if (ret) { |
2463 | btrfs_abort_transaction(trans, ret); | 2453 | btrfs_abort_transaction(trans, ret); |
2464 | goto error_trans; | 2454 | goto error_sysfs; |
2465 | } | 2455 | } |
2466 | 2456 | ||
2467 | /* Sprouting would change fsid of the mounted root, | 2457 | /* Sprouting would change fsid of the mounted root, |
@@ -2479,6 +2469,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path | |||
2479 | if (seeding_dev) { | 2469 | if (seeding_dev) { |
2480 | mutex_unlock(&uuid_mutex); | 2470 | mutex_unlock(&uuid_mutex); |
2481 | up_write(&sb->s_umount); | 2471 | up_write(&sb->s_umount); |
2472 | unlocked = true; | ||
2482 | 2473 | ||
2483 | if (ret) /* transaction commit */ | 2474 | if (ret) /* transaction commit */ |
2484 | return ret; | 2475 | return ret; |
@@ -2491,7 +2482,9 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path | |||
2491 | if (IS_ERR(trans)) { | 2482 | if (IS_ERR(trans)) { |
2492 | if (PTR_ERR(trans) == -ENOENT) | 2483 | if (PTR_ERR(trans) == -ENOENT) |
2493 | return 0; | 2484 | return 0; |
2494 | return PTR_ERR(trans); | 2485 | ret = PTR_ERR(trans); |
2486 | trans = NULL; | ||
2487 | goto error_sysfs; | ||
2495 | } | 2488 | } |
2496 | ret = btrfs_commit_transaction(trans); | 2489 | ret = btrfs_commit_transaction(trans); |
2497 | } | 2490 | } |
@@ -2500,14 +2493,18 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path | |||
2500 | update_dev_time(device_path); | 2493 | update_dev_time(device_path); |
2501 | return ret; | 2494 | return ret; |
2502 | 2495 | ||
2496 | error_sysfs: | ||
2497 | btrfs_sysfs_rm_device_link(fs_info->fs_devices, device); | ||
2503 | error_trans: | 2498 | error_trans: |
2504 | btrfs_end_transaction(trans); | 2499 | if (seeding_dev) |
2500 | sb->s_flags |= MS_RDONLY; | ||
2501 | if (trans) | ||
2502 | btrfs_end_transaction(trans); | ||
2505 | rcu_string_free(device->name); | 2503 | rcu_string_free(device->name); |
2506 | btrfs_sysfs_rm_device_link(fs_info->fs_devices, device); | ||
2507 | kfree(device); | 2504 | kfree(device); |
2508 | error: | 2505 | error: |
2509 | blkdev_put(bdev, FMODE_EXCL); | 2506 | blkdev_put(bdev, FMODE_EXCL); |
2510 | if (seeding_dev) { | 2507 | if (seeding_dev && !unlocked) { |
2511 | mutex_unlock(&uuid_mutex); | 2508 | mutex_unlock(&uuid_mutex); |
2512 | up_write(&sb->s_umount); | 2509 | up_write(&sb->s_umount); |
2513 | } | 2510 | } |
@@ -4813,16 +4810,16 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
4813 | em_tree = &info->mapping_tree.map_tree; | 4810 | em_tree = &info->mapping_tree.map_tree; |
4814 | write_lock(&em_tree->lock); | 4811 | write_lock(&em_tree->lock); |
4815 | ret = add_extent_mapping(em_tree, em, 0); | 4812 | ret = add_extent_mapping(em_tree, em, 0); |
4816 | if (!ret) { | ||
4817 | list_add_tail(&em->list, &trans->transaction->pending_chunks); | ||
4818 | refcount_inc(&em->refs); | ||
4819 | } | ||
4820 | write_unlock(&em_tree->lock); | ||
4821 | if (ret) { | 4813 | if (ret) { |
4814 | write_unlock(&em_tree->lock); | ||
4822 | free_extent_map(em); | 4815 | free_extent_map(em); |
4823 | goto error; | 4816 | goto error; |
4824 | } | 4817 | } |
4825 | 4818 | ||
4819 | list_add_tail(&em->list, &trans->transaction->pending_chunks); | ||
4820 | refcount_inc(&em->refs); | ||
4821 | write_unlock(&em_tree->lock); | ||
4822 | |||
4826 | ret = btrfs_make_block_group(trans, info, 0, type, start, num_bytes); | 4823 | ret = btrfs_make_block_group(trans, info, 0, type, start, num_bytes); |
4827 | if (ret) | 4824 | if (ret) |
4828 | goto error_del_extent; | 4825 | goto error_del_extent; |
@@ -5695,10 +5692,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, | |||
5695 | if (map->type & BTRFS_BLOCK_GROUP_RAID0) { | 5692 | if (map->type & BTRFS_BLOCK_GROUP_RAID0) { |
5696 | stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, | 5693 | stripe_nr = div_u64_rem(stripe_nr, map->num_stripes, |
5697 | &stripe_index); | 5694 | &stripe_index); |
5698 | if (op != BTRFS_MAP_WRITE && op != BTRFS_MAP_GET_READ_MIRRORS) | 5695 | if (!need_full_stripe(op)) |
5699 | mirror_num = 1; | 5696 | mirror_num = 1; |
5700 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { | 5697 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { |
5701 | if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) | 5698 | if (need_full_stripe(op)) |
5702 | num_stripes = map->num_stripes; | 5699 | num_stripes = map->num_stripes; |
5703 | else if (mirror_num) | 5700 | else if (mirror_num) |
5704 | stripe_index = mirror_num - 1; | 5701 | stripe_index = mirror_num - 1; |
@@ -5711,7 +5708,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, | |||
5711 | } | 5708 | } |
5712 | 5709 | ||
5713 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { | 5710 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { |
5714 | if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) { | 5711 | if (need_full_stripe(op)) { |
5715 | num_stripes = map->num_stripes; | 5712 | num_stripes = map->num_stripes; |
5716 | } else if (mirror_num) { | 5713 | } else if (mirror_num) { |
5717 | stripe_index = mirror_num - 1; | 5714 | stripe_index = mirror_num - 1; |
@@ -5725,7 +5722,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, | |||
5725 | stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index); | 5722 | stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index); |
5726 | stripe_index *= map->sub_stripes; | 5723 | stripe_index *= map->sub_stripes; |
5727 | 5724 | ||
5728 | if (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS) | 5725 | if (need_full_stripe(op)) |
5729 | num_stripes = map->sub_stripes; | 5726 | num_stripes = map->sub_stripes; |
5730 | else if (mirror_num) | 5727 | else if (mirror_num) |
5731 | stripe_index += mirror_num - 1; | 5728 | stripe_index += mirror_num - 1; |
@@ -5740,9 +5737,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, | |||
5740 | } | 5737 | } |
5741 | 5738 | ||
5742 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { | 5739 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) { |
5743 | if (need_raid_map && | 5740 | if (need_raid_map && (need_full_stripe(op) || mirror_num > 1)) { |
5744 | (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS || | ||
5745 | mirror_num > 1)) { | ||
5746 | /* push stripe_nr back to the start of the full stripe */ | 5741 | /* push stripe_nr back to the start of the full stripe */ |
5747 | stripe_nr = div64_u64(raid56_full_stripe_start, | 5742 | stripe_nr = div64_u64(raid56_full_stripe_start, |
5748 | stripe_len * nr_data_stripes(map)); | 5743 | stripe_len * nr_data_stripes(map)); |
@@ -5769,9 +5764,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, | |||
5769 | /* We distribute the parity blocks across stripes */ | 5764 | /* We distribute the parity blocks across stripes */ |
5770 | div_u64_rem(stripe_nr + stripe_index, map->num_stripes, | 5765 | div_u64_rem(stripe_nr + stripe_index, map->num_stripes, |
5771 | &stripe_index); | 5766 | &stripe_index); |
5772 | if ((op != BTRFS_MAP_WRITE && | 5767 | if (!need_full_stripe(op) && mirror_num <= 1) |
5773 | op != BTRFS_MAP_GET_READ_MIRRORS) && | ||
5774 | mirror_num <= 1) | ||
5775 | mirror_num = 1; | 5768 | mirror_num = 1; |
5776 | } | 5769 | } |
5777 | } else { | 5770 | } else { |
@@ -6033,7 +6026,7 @@ static void btrfs_end_bio(struct bio *bio) | |||
6033 | * this bio is actually up to date, we didn't | 6026 | * this bio is actually up to date, we didn't |
6034 | * go over the max number of errors | 6027 | * go over the max number of errors |
6035 | */ | 6028 | */ |
6036 | bio->bi_status = 0; | 6029 | bio->bi_status = BLK_STS_OK; |
6037 | } | 6030 | } |
6038 | 6031 | ||
6039 | btrfs_end_bbio(bbio, bio); | 6032 | btrfs_end_bbio(bbio, bio); |
@@ -6069,13 +6062,6 @@ static noinline void btrfs_schedule_bio(struct btrfs_device *device, | |||
6069 | return; | 6062 | return; |
6070 | } | 6063 | } |
6071 | 6064 | ||
6072 | /* | ||
6073 | * nr_async_bios allows us to reliably return congestion to the | ||
6074 | * higher layers. Otherwise, the async bio makes it appear we have | ||
6075 | * made progress against dirty pages when we've really just put it | ||
6076 | * on a queue for later | ||
6077 | */ | ||
6078 | atomic_inc(&fs_info->nr_async_bios); | ||
6079 | WARN_ON(bio->bi_next); | 6065 | WARN_ON(bio->bi_next); |
6080 | bio->bi_next = NULL; | 6066 | bio->bi_next = NULL; |
6081 | 6067 | ||
@@ -6144,7 +6130,10 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical) | |||
6144 | 6130 | ||
6145 | btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; | 6131 | btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; |
6146 | bio->bi_iter.bi_sector = logical >> 9; | 6132 | bio->bi_iter.bi_sector = logical >> 9; |
6147 | bio->bi_status = BLK_STS_IOERR; | 6133 | if (atomic_read(&bbio->error) > bbio->max_errors) |
6134 | bio->bi_status = BLK_STS_IOERR; | ||
6135 | else | ||
6136 | bio->bi_status = BLK_STS_OK; | ||
6148 | btrfs_end_bbio(bbio, bio); | 6137 | btrfs_end_bbio(bbio, bio); |
6149 | } | 6138 | } |
6150 | } | 6139 | } |
@@ -6249,7 +6238,7 @@ static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices, | |||
6249 | 6238 | ||
6250 | device = btrfs_alloc_device(NULL, &devid, dev_uuid); | 6239 | device = btrfs_alloc_device(NULL, &devid, dev_uuid); |
6251 | if (IS_ERR(device)) | 6240 | if (IS_ERR(device)) |
6252 | return NULL; | 6241 | return device; |
6253 | 6242 | ||
6254 | list_add(&device->dev_list, &fs_devices->devices); | 6243 | list_add(&device->dev_list, &fs_devices->devices); |
6255 | device->fs_devices = fs_devices; | 6244 | device->fs_devices = fs_devices; |
@@ -6377,6 +6366,17 @@ static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info, | |||
6377 | return 0; | 6366 | return 0; |
6378 | } | 6367 | } |
6379 | 6368 | ||
6369 | static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info, | ||
6370 | u64 devid, u8 *uuid, bool error) | ||
6371 | { | ||
6372 | if (error) | ||
6373 | btrfs_err_rl(fs_info, "devid %llu uuid %pU is missing", | ||
6374 | devid, uuid); | ||
6375 | else | ||
6376 | btrfs_warn_rl(fs_info, "devid %llu uuid %pU is missing", | ||
6377 | devid, uuid); | ||
6378 | } | ||
6379 | |||
6380 | static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, | 6380 | static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, |
6381 | struct extent_buffer *leaf, | 6381 | struct extent_buffer *leaf, |
6382 | struct btrfs_chunk *chunk) | 6382 | struct btrfs_chunk *chunk) |
@@ -6447,18 +6447,21 @@ static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key, | |||
6447 | if (!map->stripes[i].dev && | 6447 | if (!map->stripes[i].dev && |
6448 | !btrfs_test_opt(fs_info, DEGRADED)) { | 6448 | !btrfs_test_opt(fs_info, DEGRADED)) { |
6449 | free_extent_map(em); | 6449 | free_extent_map(em); |
6450 | btrfs_report_missing_device(fs_info, devid, uuid); | 6450 | btrfs_report_missing_device(fs_info, devid, uuid, true); |
6451 | return -EIO; | 6451 | return -ENOENT; |
6452 | } | 6452 | } |
6453 | if (!map->stripes[i].dev) { | 6453 | if (!map->stripes[i].dev) { |
6454 | map->stripes[i].dev = | 6454 | map->stripes[i].dev = |
6455 | add_missing_dev(fs_info->fs_devices, devid, | 6455 | add_missing_dev(fs_info->fs_devices, devid, |
6456 | uuid); | 6456 | uuid); |
6457 | if (!map->stripes[i].dev) { | 6457 | if (IS_ERR(map->stripes[i].dev)) { |
6458 | free_extent_map(em); | 6458 | free_extent_map(em); |
6459 | return -EIO; | 6459 | btrfs_err(fs_info, |
6460 | "failed to init missing dev %llu: %ld", | ||
6461 | devid, PTR_ERR(map->stripes[i].dev)); | ||
6462 | return PTR_ERR(map->stripes[i].dev); | ||
6460 | } | 6463 | } |
6461 | btrfs_report_missing_device(fs_info, devid, uuid); | 6464 | btrfs_report_missing_device(fs_info, devid, uuid, false); |
6462 | } | 6465 | } |
6463 | map->stripes[i].dev->in_fs_metadata = 1; | 6466 | map->stripes[i].dev->in_fs_metadata = 1; |
6464 | } | 6467 | } |
@@ -6577,19 +6580,28 @@ static int read_one_dev(struct btrfs_fs_info *fs_info, | |||
6577 | device = btrfs_find_device(fs_info, devid, dev_uuid, fs_uuid); | 6580 | device = btrfs_find_device(fs_info, devid, dev_uuid, fs_uuid); |
6578 | if (!device) { | 6581 | if (!device) { |
6579 | if (!btrfs_test_opt(fs_info, DEGRADED)) { | 6582 | if (!btrfs_test_opt(fs_info, DEGRADED)) { |
6580 | btrfs_report_missing_device(fs_info, devid, dev_uuid); | 6583 | btrfs_report_missing_device(fs_info, devid, |
6581 | return -EIO; | 6584 | dev_uuid, true); |
6585 | return -ENOENT; | ||
6582 | } | 6586 | } |
6583 | 6587 | ||
6584 | device = add_missing_dev(fs_devices, devid, dev_uuid); | 6588 | device = add_missing_dev(fs_devices, devid, dev_uuid); |
6585 | if (!device) | 6589 | if (IS_ERR(device)) { |
6586 | return -ENOMEM; | 6590 | btrfs_err(fs_info, |
6587 | btrfs_report_missing_device(fs_info, devid, dev_uuid); | 6591 | "failed to add missing dev %llu: %ld", |
6592 | devid, PTR_ERR(device)); | ||
6593 | return PTR_ERR(device); | ||
6594 | } | ||
6595 | btrfs_report_missing_device(fs_info, devid, dev_uuid, false); | ||
6588 | } else { | 6596 | } else { |
6589 | if (!device->bdev) { | 6597 | if (!device->bdev) { |
6590 | btrfs_report_missing_device(fs_info, devid, dev_uuid); | 6598 | if (!btrfs_test_opt(fs_info, DEGRADED)) { |
6591 | if (!btrfs_test_opt(fs_info, DEGRADED)) | 6599 | btrfs_report_missing_device(fs_info, |
6592 | return -EIO; | 6600 | devid, dev_uuid, true); |
6601 | return -ENOENT; | ||
6602 | } | ||
6603 | btrfs_report_missing_device(fs_info, devid, | ||
6604 | dev_uuid, false); | ||
6593 | } | 6605 | } |
6594 | 6606 | ||
6595 | if(!device->bdev && !device->missing) { | 6607 | if(!device->bdev && !device->missing) { |
@@ -6756,12 +6768,6 @@ out_short_read: | |||
6756 | return -EIO; | 6768 | return -EIO; |
6757 | } | 6769 | } |
6758 | 6770 | ||
6759 | void btrfs_report_missing_device(struct btrfs_fs_info *fs_info, u64 devid, | ||
6760 | u8 *uuid) | ||
6761 | { | ||
6762 | btrfs_warn_rl(fs_info, "devid %llu uuid %pU is missing", devid, uuid); | ||
6763 | } | ||
6764 | |||
6765 | /* | 6771 | /* |
6766 | * Check if all chunks in the fs are OK for read-write degraded mount | 6772 | * Check if all chunks in the fs are OK for read-write degraded mount |
6767 | * | 6773 | * |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 6108fdfec67f..ff15208344a7 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -542,7 +542,5 @@ void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info); | |||
542 | void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info); | 542 | void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info); |
543 | 543 | ||
544 | bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info); | 544 | bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info); |
545 | void btrfs_report_missing_device(struct btrfs_fs_info *fs_info, u64 devid, | ||
546 | u8 *uuid); | ||
547 | 545 | ||
548 | #endif | 546 | #endif |
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c index c248f9286366..2b52950dc2c6 100644 --- a/fs/btrfs/zlib.c +++ b/fs/btrfs/zlib.c | |||
@@ -37,6 +37,7 @@ struct workspace { | |||
37 | z_stream strm; | 37 | z_stream strm; |
38 | char *buf; | 38 | char *buf; |
39 | struct list_head list; | 39 | struct list_head list; |
40 | int level; | ||
40 | }; | 41 | }; |
41 | 42 | ||
42 | static void zlib_free_workspace(struct list_head *ws) | 43 | static void zlib_free_workspace(struct list_head *ws) |
@@ -96,7 +97,7 @@ static int zlib_compress_pages(struct list_head *ws, | |||
96 | *total_out = 0; | 97 | *total_out = 0; |
97 | *total_in = 0; | 98 | *total_in = 0; |
98 | 99 | ||
99 | if (Z_OK != zlib_deflateInit(&workspace->strm, 3)) { | 100 | if (Z_OK != zlib_deflateInit(&workspace->strm, workspace->level)) { |
100 | pr_warn("BTRFS: deflateInit failed\n"); | 101 | pr_warn("BTRFS: deflateInit failed\n"); |
101 | ret = -EIO; | 102 | ret = -EIO; |
102 | goto out; | 103 | goto out; |
@@ -402,10 +403,22 @@ next: | |||
402 | return ret; | 403 | return ret; |
403 | } | 404 | } |
404 | 405 | ||
406 | static void zlib_set_level(struct list_head *ws, unsigned int type) | ||
407 | { | ||
408 | struct workspace *workspace = list_entry(ws, struct workspace, list); | ||
409 | unsigned level = (type & 0xF0) >> 4; | ||
410 | |||
411 | if (level > 9) | ||
412 | level = 9; | ||
413 | |||
414 | workspace->level = level > 0 ? level : 3; | ||
415 | } | ||
416 | |||
405 | const struct btrfs_compress_op btrfs_zlib_compress = { | 417 | const struct btrfs_compress_op btrfs_zlib_compress = { |
406 | .alloc_workspace = zlib_alloc_workspace, | 418 | .alloc_workspace = zlib_alloc_workspace, |
407 | .free_workspace = zlib_free_workspace, | 419 | .free_workspace = zlib_free_workspace, |
408 | .compress_pages = zlib_compress_pages, | 420 | .compress_pages = zlib_compress_pages, |
409 | .decompress_bio = zlib_decompress_bio, | 421 | .decompress_bio = zlib_decompress_bio, |
410 | .decompress = zlib_decompress, | 422 | .decompress = zlib_decompress, |
423 | .set_level = zlib_set_level, | ||
411 | }; | 424 | }; |
diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c index 607ce47b483a..17f2dd8fddb8 100644 --- a/fs/btrfs/zstd.c +++ b/fs/btrfs/zstd.c | |||
@@ -423,10 +423,15 @@ finish: | |||
423 | return ret; | 423 | return ret; |
424 | } | 424 | } |
425 | 425 | ||
426 | static void zstd_set_level(struct list_head *ws, unsigned int type) | ||
427 | { | ||
428 | } | ||
429 | |||
426 | const struct btrfs_compress_op btrfs_zstd_compress = { | 430 | const struct btrfs_compress_op btrfs_zstd_compress = { |
427 | .alloc_workspace = zstd_alloc_workspace, | 431 | .alloc_workspace = zstd_alloc_workspace, |
428 | .free_workspace = zstd_free_workspace, | 432 | .free_workspace = zstd_free_workspace, |
429 | .compress_pages = zstd_compress_pages, | 433 | .compress_pages = zstd_compress_pages, |
430 | .decompress_bio = zstd_decompress_bio, | 434 | .decompress_bio = zstd_decompress_bio, |
431 | .decompress = zstd_decompress, | 435 | .decompress = zstd_decompress, |
436 | .set_level = zstd_set_level, | ||
432 | }; | 437 | }; |
diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 32d0c1fe2bfa..4342a329821f 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h | |||
@@ -29,6 +29,13 @@ struct btrfs_qgroup_extent_record; | |||
29 | struct btrfs_qgroup; | 29 | struct btrfs_qgroup; |
30 | struct prelim_ref; | 30 | struct prelim_ref; |
31 | 31 | ||
32 | TRACE_DEFINE_ENUM(FLUSH_DELAYED_ITEMS_NR); | ||
33 | TRACE_DEFINE_ENUM(FLUSH_DELAYED_ITEMS); | ||
34 | TRACE_DEFINE_ENUM(FLUSH_DELALLOC); | ||
35 | TRACE_DEFINE_ENUM(FLUSH_DELALLOC_WAIT); | ||
36 | TRACE_DEFINE_ENUM(ALLOC_CHUNK); | ||
37 | TRACE_DEFINE_ENUM(COMMIT_TRANS); | ||
38 | |||
32 | #define show_ref_type(type) \ | 39 | #define show_ref_type(type) \ |
33 | __print_symbolic(type, \ | 40 | __print_symbolic(type, \ |
34 | { BTRFS_TREE_BLOCK_REF_KEY, "TREE_BLOCK_REF" }, \ | 41 | { BTRFS_TREE_BLOCK_REF_KEY, "TREE_BLOCK_REF" }, \ |
@@ -792,11 +799,10 @@ DEFINE_EVENT(btrfs_delayed_data_ref, run_delayed_data_ref, | |||
792 | DECLARE_EVENT_CLASS(btrfs_delayed_ref_head, | 799 | DECLARE_EVENT_CLASS(btrfs_delayed_ref_head, |
793 | 800 | ||
794 | TP_PROTO(const struct btrfs_fs_info *fs_info, | 801 | TP_PROTO(const struct btrfs_fs_info *fs_info, |
795 | const struct btrfs_delayed_ref_node *ref, | ||
796 | const struct btrfs_delayed_ref_head *head_ref, | 802 | const struct btrfs_delayed_ref_head *head_ref, |
797 | int action), | 803 | int action), |
798 | 804 | ||
799 | TP_ARGS(fs_info, ref, head_ref, action), | 805 | TP_ARGS(fs_info, head_ref, action), |
800 | 806 | ||
801 | TP_STRUCT__entry_btrfs( | 807 | TP_STRUCT__entry_btrfs( |
802 | __field( u64, bytenr ) | 808 | __field( u64, bytenr ) |
@@ -806,8 +812,8 @@ DECLARE_EVENT_CLASS(btrfs_delayed_ref_head, | |||
806 | ), | 812 | ), |
807 | 813 | ||
808 | TP_fast_assign_btrfs(fs_info, | 814 | TP_fast_assign_btrfs(fs_info, |
809 | __entry->bytenr = ref->bytenr; | 815 | __entry->bytenr = head_ref->bytenr; |
810 | __entry->num_bytes = ref->num_bytes; | 816 | __entry->num_bytes = head_ref->num_bytes; |
811 | __entry->action = action; | 817 | __entry->action = action; |
812 | __entry->is_data = head_ref->is_data; | 818 | __entry->is_data = head_ref->is_data; |
813 | ), | 819 | ), |
@@ -822,21 +828,19 @@ DECLARE_EVENT_CLASS(btrfs_delayed_ref_head, | |||
822 | DEFINE_EVENT(btrfs_delayed_ref_head, add_delayed_ref_head, | 828 | DEFINE_EVENT(btrfs_delayed_ref_head, add_delayed_ref_head, |
823 | 829 | ||
824 | TP_PROTO(const struct btrfs_fs_info *fs_info, | 830 | TP_PROTO(const struct btrfs_fs_info *fs_info, |
825 | const struct btrfs_delayed_ref_node *ref, | ||
826 | const struct btrfs_delayed_ref_head *head_ref, | 831 | const struct btrfs_delayed_ref_head *head_ref, |
827 | int action), | 832 | int action), |
828 | 833 | ||
829 | TP_ARGS(fs_info, ref, head_ref, action) | 834 | TP_ARGS(fs_info, head_ref, action) |
830 | ); | 835 | ); |
831 | 836 | ||
832 | DEFINE_EVENT(btrfs_delayed_ref_head, run_delayed_ref_head, | 837 | DEFINE_EVENT(btrfs_delayed_ref_head, run_delayed_ref_head, |
833 | 838 | ||
834 | TP_PROTO(const struct btrfs_fs_info *fs_info, | 839 | TP_PROTO(const struct btrfs_fs_info *fs_info, |
835 | const struct btrfs_delayed_ref_node *ref, | ||
836 | const struct btrfs_delayed_ref_head *head_ref, | 840 | const struct btrfs_delayed_ref_head *head_ref, |
837 | int action), | 841 | int action), |
838 | 842 | ||
839 | TP_ARGS(fs_info, ref, head_ref, action) | 843 | TP_ARGS(fs_info, head_ref, action) |
840 | ); | 844 | ); |
841 | 845 | ||
842 | #define show_chunk_type(type) \ | 846 | #define show_chunk_type(type) \ |
@@ -1692,6 +1696,27 @@ DEFINE_EVENT(btrfs__prelim_ref, btrfs_prelim_ref_insert, | |||
1692 | TP_ARGS(fs_info, oldref, newref, tree_size) | 1696 | TP_ARGS(fs_info, oldref, newref, tree_size) |
1693 | ); | 1697 | ); |
1694 | 1698 | ||
1699 | TRACE_EVENT(btrfs_inode_mod_outstanding_extents, | ||
1700 | TP_PROTO(struct btrfs_root *root, u64 ino, int mod), | ||
1701 | |||
1702 | TP_ARGS(root, ino, mod), | ||
1703 | |||
1704 | TP_STRUCT__entry_btrfs( | ||
1705 | __field( u64, root_objectid ) | ||
1706 | __field( u64, ino ) | ||
1707 | __field( int, mod ) | ||
1708 | ), | ||
1709 | |||
1710 | TP_fast_assign_btrfs(root->fs_info, | ||
1711 | __entry->root_objectid = root->objectid; | ||
1712 | __entry->ino = ino; | ||
1713 | __entry->mod = mod; | ||
1714 | ), | ||
1715 | |||
1716 | TP_printk_btrfs("root=%llu(%s) ino=%llu mod=%d", | ||
1717 | show_root_type(__entry->root_objectid), | ||
1718 | (unsigned long long)__entry->ino, __entry->mod) | ||
1719 | ); | ||
1695 | #endif /* _TRACE_BTRFS_H */ | 1720 | #endif /* _TRACE_BTRFS_H */ |
1696 | 1721 | ||
1697 | /* This part must be outside protection */ | 1722 | /* This part must be outside protection */ |
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 6cdfd12cd14c..ce615b75e855 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h | |||
@@ -609,10 +609,14 @@ struct btrfs_ioctl_ino_path_args { | |||
609 | struct btrfs_ioctl_logical_ino_args { | 609 | struct btrfs_ioctl_logical_ino_args { |
610 | __u64 logical; /* in */ | 610 | __u64 logical; /* in */ |
611 | __u64 size; /* in */ | 611 | __u64 size; /* in */ |
612 | __u64 reserved[4]; | 612 | __u64 reserved[3]; /* must be 0 for now */ |
613 | __u64 flags; /* in, v2 only */ | ||
613 | /* struct btrfs_data_container *inodes; out */ | 614 | /* struct btrfs_data_container *inodes; out */ |
614 | __u64 inodes; | 615 | __u64 inodes; |
615 | }; | 616 | }; |
617 | /* Return every ref to the extent, not just those containing logical block. | ||
618 | * Requires logical == extent bytenr. */ | ||
619 | #define BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET (1ULL << 0) | ||
616 | 620 | ||
617 | enum btrfs_dev_stat_values { | 621 | enum btrfs_dev_stat_values { |
618 | /* disk I/O failure stats */ | 622 | /* disk I/O failure stats */ |
@@ -836,5 +840,7 @@ enum btrfs_err_code { | |||
836 | struct btrfs_ioctl_feature_flags[3]) | 840 | struct btrfs_ioctl_feature_flags[3]) |
837 | #define BTRFS_IOC_RM_DEV_V2 _IOW(BTRFS_IOCTL_MAGIC, 58, \ | 841 | #define BTRFS_IOC_RM_DEV_V2 _IOW(BTRFS_IOCTL_MAGIC, 58, \ |
838 | struct btrfs_ioctl_vol_args_v2) | 842 | struct btrfs_ioctl_vol_args_v2) |
843 | #define BTRFS_IOC_LOGICAL_INO_V2 _IOWR(BTRFS_IOCTL_MAGIC, 59, \ | ||
844 | struct btrfs_ioctl_logical_ino_args) | ||
839 | 845 | ||
840 | #endif /* _UAPI_LINUX_BTRFS_H */ | 846 | #endif /* _UAPI_LINUX_BTRFS_H */ |
diff --git a/include/uapi/linux/btrfs_tree.h b/include/uapi/linux/btrfs_tree.h index 8f659bb7badc..6d6e5da51527 100644 --- a/include/uapi/linux/btrfs_tree.h +++ b/include/uapi/linux/btrfs_tree.h | |||
@@ -733,6 +733,7 @@ struct btrfs_balance_item { | |||
733 | #define BTRFS_FILE_EXTENT_INLINE 0 | 733 | #define BTRFS_FILE_EXTENT_INLINE 0 |
734 | #define BTRFS_FILE_EXTENT_REG 1 | 734 | #define BTRFS_FILE_EXTENT_REG 1 |
735 | #define BTRFS_FILE_EXTENT_PREALLOC 2 | 735 | #define BTRFS_FILE_EXTENT_PREALLOC 2 |
736 | #define BTRFS_FILE_EXTENT_TYPES 2 | ||
736 | 737 | ||
737 | struct btrfs_file_extent_item { | 738 | struct btrfs_file_extent_item { |
738 | /* | 739 | /* |