diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-27 19:43:52 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-27 19:43:52 -0400 |
commit | 22712200e175e0df5c7f9edfe6c6bf5c94c23b83 (patch) | |
tree | a3e332aab7f5a953ff4f12e67af2a0e5f32f5be5 /fs/btrfs | |
parent | 597a67e0ba758e3d2239c81fbb648c6e69ec30a2 (diff) | |
parent | ff95acb6733d41a8d45feb0e18b96df25e610e78 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable:
Btrfs: make sure reserve_metadata_bytes doesn't leak out strange errors
Btrfs: use the commit_root for reading free_space_inode crcs
Btrfs: reduce extent_state lock contention for metadata
Btrfs: remove lockdep magic from btrfs_next_leaf
Btrfs: make a lockdep class for each root
Btrfs: switch the btrfs tree locks to reader/writer
Btrfs: fix deadlock when throttling transactions
Btrfs: stop using highmem for extent_buffers
Btrfs: fix BUG_ON() caused by ENOSPC when relocating space
Btrfs: tag pages for writeback in sync
Btrfs: fix enospc problems with delalloc
Btrfs: don't flush delalloc arbitrarily
Btrfs: use find_or_create_page instead of grab_cache_page
Btrfs: use a worker thread to do caching
Btrfs: fix how we merge extent states and deal with cached states
Btrfs: use the normal checksumming infrastructure for free space cache
Btrfs: serialize flushers in reserve_metadata_bytes
Btrfs: do transaction space reservation before joining the transaction
Btrfs: try to only do one btrfs_search_slot in do_setxattr
Diffstat (limited to 'fs/btrfs')
-rw-r--r-- | fs/btrfs/btrfs_inode.h | 16 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 457 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 14 | ||||
-rw-r--r-- | fs/btrfs/delayed-inode.c | 2 | ||||
-rw-r--r-- | fs/btrfs/dir-item.c | 9 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 116 | ||||
-rw-r--r-- | fs/btrfs/disk-io.h | 10 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 285 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 168 | ||||
-rw-r--r-- | fs/btrfs/extent_io.h | 35 | ||||
-rw-r--r-- | fs/btrfs/file-item.c | 41 | ||||
-rw-r--r-- | fs/btrfs/file.c | 11 | ||||
-rw-r--r-- | fs/btrfs/free-space-cache.c | 173 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 90 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 8 | ||||
-rw-r--r-- | fs/btrfs/locking.c | 280 | ||||
-rw-r--r-- | fs/btrfs/locking.h | 36 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 3 | ||||
-rw-r--r-- | fs/btrfs/struct-funcs.c | 100 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 47 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 6 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 2 | ||||
-rw-r--r-- | fs/btrfs/xattr.c | 66 |
23 files changed, 965 insertions, 1010 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 52d7eca8c7b..502b9e98867 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -34,6 +34,9 @@ struct btrfs_inode { | |||
34 | */ | 34 | */ |
35 | struct btrfs_key location; | 35 | struct btrfs_key location; |
36 | 36 | ||
37 | /* Lock for counters */ | ||
38 | spinlock_t lock; | ||
39 | |||
37 | /* the extent_tree has caches of all the extent mappings to disk */ | 40 | /* the extent_tree has caches of all the extent mappings to disk */ |
38 | struct extent_map_tree extent_tree; | 41 | struct extent_map_tree extent_tree; |
39 | 42 | ||
@@ -134,8 +137,8 @@ struct btrfs_inode { | |||
134 | * items we think we'll end up using, and reserved_extents is the number | 137 | * items we think we'll end up using, and reserved_extents is the number |
135 | * of extent items we've reserved metadata for. | 138 | * of extent items we've reserved metadata for. |
136 | */ | 139 | */ |
137 | atomic_t outstanding_extents; | 140 | unsigned outstanding_extents; |
138 | atomic_t reserved_extents; | 141 | unsigned reserved_extents; |
139 | 142 | ||
140 | /* | 143 | /* |
141 | * ordered_data_close is set by truncate when a file that used | 144 | * ordered_data_close is set by truncate when a file that used |
@@ -184,4 +187,13 @@ static inline void btrfs_i_size_write(struct inode *inode, u64 size) | |||
184 | BTRFS_I(inode)->disk_i_size = size; | 187 | BTRFS_I(inode)->disk_i_size = size; |
185 | } | 188 | } |
186 | 189 | ||
190 | static inline bool btrfs_is_free_space_inode(struct btrfs_root *root, | ||
191 | struct inode *inode) | ||
192 | { | ||
193 | if (root == root->fs_info->tree_root || | ||
194 | BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) | ||
195 | return true; | ||
196 | return false; | ||
197 | } | ||
198 | |||
187 | #endif | 199 | #endif |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 2e667868e0d..011cab3aca8 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -54,8 +54,13 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p) | |||
54 | { | 54 | { |
55 | int i; | 55 | int i; |
56 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) { | 56 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) { |
57 | if (p->nodes[i] && p->locks[i]) | 57 | if (!p->nodes[i] || !p->locks[i]) |
58 | btrfs_set_lock_blocking(p->nodes[i]); | 58 | continue; |
59 | btrfs_set_lock_blocking_rw(p->nodes[i], p->locks[i]); | ||
60 | if (p->locks[i] == BTRFS_READ_LOCK) | ||
61 | p->locks[i] = BTRFS_READ_LOCK_BLOCKING; | ||
62 | else if (p->locks[i] == BTRFS_WRITE_LOCK) | ||
63 | p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING; | ||
59 | } | 64 | } |
60 | } | 65 | } |
61 | 66 | ||
@@ -68,7 +73,7 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p) | |||
68 | * for held | 73 | * for held |
69 | */ | 74 | */ |
70 | noinline void btrfs_clear_path_blocking(struct btrfs_path *p, | 75 | noinline void btrfs_clear_path_blocking(struct btrfs_path *p, |
71 | struct extent_buffer *held) | 76 | struct extent_buffer *held, int held_rw) |
72 | { | 77 | { |
73 | int i; | 78 | int i; |
74 | 79 | ||
@@ -79,19 +84,29 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, | |||
79 | * really sure by forcing the path to blocking before we clear | 84 | * really sure by forcing the path to blocking before we clear |
80 | * the path blocking. | 85 | * the path blocking. |
81 | */ | 86 | */ |
82 | if (held) | 87 | if (held) { |
83 | btrfs_set_lock_blocking(held); | 88 | btrfs_set_lock_blocking_rw(held, held_rw); |
89 | if (held_rw == BTRFS_WRITE_LOCK) | ||
90 | held_rw = BTRFS_WRITE_LOCK_BLOCKING; | ||
91 | else if (held_rw == BTRFS_READ_LOCK) | ||
92 | held_rw = BTRFS_READ_LOCK_BLOCKING; | ||
93 | } | ||
84 | btrfs_set_path_blocking(p); | 94 | btrfs_set_path_blocking(p); |
85 | #endif | 95 | #endif |
86 | 96 | ||
87 | for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { | 97 | for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { |
88 | if (p->nodes[i] && p->locks[i]) | 98 | if (p->nodes[i] && p->locks[i]) { |
89 | btrfs_clear_lock_blocking(p->nodes[i]); | 99 | btrfs_clear_lock_blocking_rw(p->nodes[i], p->locks[i]); |
100 | if (p->locks[i] == BTRFS_WRITE_LOCK_BLOCKING) | ||
101 | p->locks[i] = BTRFS_WRITE_LOCK; | ||
102 | else if (p->locks[i] == BTRFS_READ_LOCK_BLOCKING) | ||
103 | p->locks[i] = BTRFS_READ_LOCK; | ||
104 | } | ||
90 | } | 105 | } |
91 | 106 | ||
92 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 107 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
93 | if (held) | 108 | if (held) |
94 | btrfs_clear_lock_blocking(held); | 109 | btrfs_clear_lock_blocking_rw(held, held_rw); |
95 | #endif | 110 | #endif |
96 | } | 111 | } |
97 | 112 | ||
@@ -119,7 +134,7 @@ noinline void btrfs_release_path(struct btrfs_path *p) | |||
119 | if (!p->nodes[i]) | 134 | if (!p->nodes[i]) |
120 | continue; | 135 | continue; |
121 | if (p->locks[i]) { | 136 | if (p->locks[i]) { |
122 | btrfs_tree_unlock(p->nodes[i]); | 137 | btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]); |
123 | p->locks[i] = 0; | 138 | p->locks[i] = 0; |
124 | } | 139 | } |
125 | free_extent_buffer(p->nodes[i]); | 140 | free_extent_buffer(p->nodes[i]); |
@@ -167,6 +182,25 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) | |||
167 | return eb; | 182 | return eb; |
168 | } | 183 | } |
169 | 184 | ||
185 | /* loop around taking references on and locking the root node of the | ||
186 | * tree until you end up with a lock on the root. A locked buffer | ||
187 | * is returned, with a reference held. | ||
188 | */ | ||
189 | struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) | ||
190 | { | ||
191 | struct extent_buffer *eb; | ||
192 | |||
193 | while (1) { | ||
194 | eb = btrfs_root_node(root); | ||
195 | btrfs_tree_read_lock(eb); | ||
196 | if (eb == root->node) | ||
197 | break; | ||
198 | btrfs_tree_read_unlock(eb); | ||
199 | free_extent_buffer(eb); | ||
200 | } | ||
201 | return eb; | ||
202 | } | ||
203 | |||
170 | /* cowonly root (everything not a reference counted cow subvolume), just get | 204 | /* cowonly root (everything not a reference counted cow subvolume), just get |
171 | * put onto a simple dirty list. transaction.c walks this to make sure they | 205 | * put onto a simple dirty list. transaction.c walks this to make sure they |
172 | * get properly updated on disk. | 206 | * get properly updated on disk. |
@@ -626,14 +660,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
626 | for (i = start_slot; i < end_slot; i++) { | 660 | for (i = start_slot; i < end_slot; i++) { |
627 | int close = 1; | 661 | int close = 1; |
628 | 662 | ||
629 | if (!parent->map_token) { | ||
630 | map_extent_buffer(parent, | ||
631 | btrfs_node_key_ptr_offset(i), | ||
632 | sizeof(struct btrfs_key_ptr), | ||
633 | &parent->map_token, &parent->kaddr, | ||
634 | &parent->map_start, &parent->map_len, | ||
635 | KM_USER1); | ||
636 | } | ||
637 | btrfs_node_key(parent, &disk_key, i); | 663 | btrfs_node_key(parent, &disk_key, i); |
638 | if (!progress_passed && comp_keys(&disk_key, progress) < 0) | 664 | if (!progress_passed && comp_keys(&disk_key, progress) < 0) |
639 | continue; | 665 | continue; |
@@ -656,11 +682,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
656 | last_block = blocknr; | 682 | last_block = blocknr; |
657 | continue; | 683 | continue; |
658 | } | 684 | } |
659 | if (parent->map_token) { | ||
660 | unmap_extent_buffer(parent, parent->map_token, | ||
661 | KM_USER1); | ||
662 | parent->map_token = NULL; | ||
663 | } | ||
664 | 685 | ||
665 | cur = btrfs_find_tree_block(root, blocknr, blocksize); | 686 | cur = btrfs_find_tree_block(root, blocknr, blocksize); |
666 | if (cur) | 687 | if (cur) |
@@ -701,11 +722,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
701 | btrfs_tree_unlock(cur); | 722 | btrfs_tree_unlock(cur); |
702 | free_extent_buffer(cur); | 723 | free_extent_buffer(cur); |
703 | } | 724 | } |
704 | if (parent->map_token) { | ||
705 | unmap_extent_buffer(parent, parent->map_token, | ||
706 | KM_USER1); | ||
707 | parent->map_token = NULL; | ||
708 | } | ||
709 | return err; | 725 | return err; |
710 | } | 726 | } |
711 | 727 | ||
@@ -746,7 +762,6 @@ static noinline int generic_bin_search(struct extent_buffer *eb, | |||
746 | struct btrfs_disk_key *tmp = NULL; | 762 | struct btrfs_disk_key *tmp = NULL; |
747 | struct btrfs_disk_key unaligned; | 763 | struct btrfs_disk_key unaligned; |
748 | unsigned long offset; | 764 | unsigned long offset; |
749 | char *map_token = NULL; | ||
750 | char *kaddr = NULL; | 765 | char *kaddr = NULL; |
751 | unsigned long map_start = 0; | 766 | unsigned long map_start = 0; |
752 | unsigned long map_len = 0; | 767 | unsigned long map_len = 0; |
@@ -756,18 +771,13 @@ static noinline int generic_bin_search(struct extent_buffer *eb, | |||
756 | mid = (low + high) / 2; | 771 | mid = (low + high) / 2; |
757 | offset = p + mid * item_size; | 772 | offset = p + mid * item_size; |
758 | 773 | ||
759 | if (!map_token || offset < map_start || | 774 | if (!kaddr || offset < map_start || |
760 | (offset + sizeof(struct btrfs_disk_key)) > | 775 | (offset + sizeof(struct btrfs_disk_key)) > |
761 | map_start + map_len) { | 776 | map_start + map_len) { |
762 | if (map_token) { | ||
763 | unmap_extent_buffer(eb, map_token, KM_USER0); | ||
764 | map_token = NULL; | ||
765 | } | ||
766 | 777 | ||
767 | err = map_private_extent_buffer(eb, offset, | 778 | err = map_private_extent_buffer(eb, offset, |
768 | sizeof(struct btrfs_disk_key), | 779 | sizeof(struct btrfs_disk_key), |
769 | &map_token, &kaddr, | 780 | &kaddr, &map_start, &map_len); |
770 | &map_start, &map_len, KM_USER0); | ||
771 | 781 | ||
772 | if (!err) { | 782 | if (!err) { |
773 | tmp = (struct btrfs_disk_key *)(kaddr + offset - | 783 | tmp = (struct btrfs_disk_key *)(kaddr + offset - |
@@ -790,14 +800,10 @@ static noinline int generic_bin_search(struct extent_buffer *eb, | |||
790 | high = mid; | 800 | high = mid; |
791 | else { | 801 | else { |
792 | *slot = mid; | 802 | *slot = mid; |
793 | if (map_token) | ||
794 | unmap_extent_buffer(eb, map_token, KM_USER0); | ||
795 | return 0; | 803 | return 0; |
796 | } | 804 | } |
797 | } | 805 | } |
798 | *slot = low; | 806 | *slot = low; |
799 | if (map_token) | ||
800 | unmap_extent_buffer(eb, map_token, KM_USER0); | ||
801 | return 1; | 807 | return 1; |
802 | } | 808 | } |
803 | 809 | ||
@@ -890,7 +896,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
890 | 896 | ||
891 | mid = path->nodes[level]; | 897 | mid = path->nodes[level]; |
892 | 898 | ||
893 | WARN_ON(!path->locks[level]); | 899 | WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK && |
900 | path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING); | ||
894 | WARN_ON(btrfs_header_generation(mid) != trans->transid); | 901 | WARN_ON(btrfs_header_generation(mid) != trans->transid); |
895 | 902 | ||
896 | orig_ptr = btrfs_node_blockptr(mid, orig_slot); | 903 | orig_ptr = btrfs_node_blockptr(mid, orig_slot); |
@@ -1228,7 +1235,6 @@ static void reada_for_search(struct btrfs_root *root, | |||
1228 | u32 nr; | 1235 | u32 nr; |
1229 | u32 blocksize; | 1236 | u32 blocksize; |
1230 | u32 nscan = 0; | 1237 | u32 nscan = 0; |
1231 | bool map = true; | ||
1232 | 1238 | ||
1233 | if (level != 1) | 1239 | if (level != 1) |
1234 | return; | 1240 | return; |
@@ -1250,19 +1256,8 @@ static void reada_for_search(struct btrfs_root *root, | |||
1250 | 1256 | ||
1251 | nritems = btrfs_header_nritems(node); | 1257 | nritems = btrfs_header_nritems(node); |
1252 | nr = slot; | 1258 | nr = slot; |
1253 | if (node->map_token || path->skip_locking) | ||
1254 | map = false; | ||
1255 | 1259 | ||
1256 | while (1) { | 1260 | while (1) { |
1257 | if (map && !node->map_token) { | ||
1258 | unsigned long offset = btrfs_node_key_ptr_offset(nr); | ||
1259 | map_private_extent_buffer(node, offset, | ||
1260 | sizeof(struct btrfs_key_ptr), | ||
1261 | &node->map_token, | ||
1262 | &node->kaddr, | ||
1263 | &node->map_start, | ||
1264 | &node->map_len, KM_USER1); | ||
1265 | } | ||
1266 | if (direction < 0) { | 1261 | if (direction < 0) { |
1267 | if (nr == 0) | 1262 | if (nr == 0) |
1268 | break; | 1263 | break; |
@@ -1281,11 +1276,6 @@ static void reada_for_search(struct btrfs_root *root, | |||
1281 | if ((search <= target && target - search <= 65536) || | 1276 | if ((search <= target && target - search <= 65536) || |
1282 | (search > target && search - target <= 65536)) { | 1277 | (search > target && search - target <= 65536)) { |
1283 | gen = btrfs_node_ptr_generation(node, nr); | 1278 | gen = btrfs_node_ptr_generation(node, nr); |
1284 | if (map && node->map_token) { | ||
1285 | unmap_extent_buffer(node, node->map_token, | ||
1286 | KM_USER1); | ||
1287 | node->map_token = NULL; | ||
1288 | } | ||
1289 | readahead_tree_block(root, search, blocksize, gen); | 1279 | readahead_tree_block(root, search, blocksize, gen); |
1290 | nread += blocksize; | 1280 | nread += blocksize; |
1291 | } | 1281 | } |
@@ -1293,10 +1283,6 @@ static void reada_for_search(struct btrfs_root *root, | |||
1293 | if ((nread > 65536 || nscan > 32)) | 1283 | if ((nread > 65536 || nscan > 32)) |
1294 | break; | 1284 | break; |
1295 | } | 1285 | } |
1296 | if (map && node->map_token) { | ||
1297 | unmap_extent_buffer(node, node->map_token, KM_USER1); | ||
1298 | node->map_token = NULL; | ||
1299 | } | ||
1300 | } | 1286 | } |
1301 | 1287 | ||
1302 | /* | 1288 | /* |
@@ -1409,7 +1395,7 @@ static noinline void unlock_up(struct btrfs_path *path, int level, | |||
1409 | 1395 | ||
1410 | t = path->nodes[i]; | 1396 | t = path->nodes[i]; |
1411 | if (i >= lowest_unlock && i > skip_level && path->locks[i]) { | 1397 | if (i >= lowest_unlock && i > skip_level && path->locks[i]) { |
1412 | btrfs_tree_unlock(t); | 1398 | btrfs_tree_unlock_rw(t, path->locks[i]); |
1413 | path->locks[i] = 0; | 1399 | path->locks[i] = 0; |
1414 | } | 1400 | } |
1415 | } | 1401 | } |
@@ -1436,7 +1422,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) | |||
1436 | continue; | 1422 | continue; |
1437 | if (!path->locks[i]) | 1423 | if (!path->locks[i]) |
1438 | continue; | 1424 | continue; |
1439 | btrfs_tree_unlock(path->nodes[i]); | 1425 | btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]); |
1440 | path->locks[i] = 0; | 1426 | path->locks[i] = 0; |
1441 | } | 1427 | } |
1442 | } | 1428 | } |
@@ -1485,6 +1471,8 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
1485 | * we can trust our generation number | 1471 | * we can trust our generation number |
1486 | */ | 1472 | */ |
1487 | free_extent_buffer(tmp); | 1473 | free_extent_buffer(tmp); |
1474 | btrfs_set_path_blocking(p); | ||
1475 | |||
1488 | tmp = read_tree_block(root, blocknr, blocksize, gen); | 1476 | tmp = read_tree_block(root, blocknr, blocksize, gen); |
1489 | if (tmp && btrfs_buffer_uptodate(tmp, gen)) { | 1477 | if (tmp && btrfs_buffer_uptodate(tmp, gen)) { |
1490 | *eb_ret = tmp; | 1478 | *eb_ret = tmp; |
@@ -1540,20 +1528,27 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
1540 | static int | 1528 | static int |
1541 | setup_nodes_for_search(struct btrfs_trans_handle *trans, | 1529 | setup_nodes_for_search(struct btrfs_trans_handle *trans, |
1542 | struct btrfs_root *root, struct btrfs_path *p, | 1530 | struct btrfs_root *root, struct btrfs_path *p, |
1543 | struct extent_buffer *b, int level, int ins_len) | 1531 | struct extent_buffer *b, int level, int ins_len, |
1532 | int *write_lock_level) | ||
1544 | { | 1533 | { |
1545 | int ret; | 1534 | int ret; |
1546 | if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= | 1535 | if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= |
1547 | BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { | 1536 | BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { |
1548 | int sret; | 1537 | int sret; |
1549 | 1538 | ||
1539 | if (*write_lock_level < level + 1) { | ||
1540 | *write_lock_level = level + 1; | ||
1541 | btrfs_release_path(p); | ||
1542 | goto again; | ||
1543 | } | ||
1544 | |||
1550 | sret = reada_for_balance(root, p, level); | 1545 | sret = reada_for_balance(root, p, level); |
1551 | if (sret) | 1546 | if (sret) |
1552 | goto again; | 1547 | goto again; |
1553 | 1548 | ||
1554 | btrfs_set_path_blocking(p); | 1549 | btrfs_set_path_blocking(p); |
1555 | sret = split_node(trans, root, p, level); | 1550 | sret = split_node(trans, root, p, level); |
1556 | btrfs_clear_path_blocking(p, NULL); | 1551 | btrfs_clear_path_blocking(p, NULL, 0); |
1557 | 1552 | ||
1558 | BUG_ON(sret > 0); | 1553 | BUG_ON(sret > 0); |
1559 | if (sret) { | 1554 | if (sret) { |
@@ -1565,13 +1560,19 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans, | |||
1565 | BTRFS_NODEPTRS_PER_BLOCK(root) / 2) { | 1560 | BTRFS_NODEPTRS_PER_BLOCK(root) / 2) { |
1566 | int sret; | 1561 | int sret; |
1567 | 1562 | ||
1563 | if (*write_lock_level < level + 1) { | ||
1564 | *write_lock_level = level + 1; | ||
1565 | btrfs_release_path(p); | ||
1566 | goto again; | ||
1567 | } | ||
1568 | |||
1568 | sret = reada_for_balance(root, p, level); | 1569 | sret = reada_for_balance(root, p, level); |
1569 | if (sret) | 1570 | if (sret) |
1570 | goto again; | 1571 | goto again; |
1571 | 1572 | ||
1572 | btrfs_set_path_blocking(p); | 1573 | btrfs_set_path_blocking(p); |
1573 | sret = balance_level(trans, root, p, level); | 1574 | sret = balance_level(trans, root, p, level); |
1574 | btrfs_clear_path_blocking(p, NULL); | 1575 | btrfs_clear_path_blocking(p, NULL, 0); |
1575 | 1576 | ||
1576 | if (sret) { | 1577 | if (sret) { |
1577 | ret = sret; | 1578 | ret = sret; |
@@ -1615,27 +1616,78 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
1615 | int err; | 1616 | int err; |
1616 | int level; | 1617 | int level; |
1617 | int lowest_unlock = 1; | 1618 | int lowest_unlock = 1; |
1619 | int root_lock; | ||
1620 | /* everything at write_lock_level or lower must be write locked */ | ||
1621 | int write_lock_level = 0; | ||
1618 | u8 lowest_level = 0; | 1622 | u8 lowest_level = 0; |
1619 | 1623 | ||
1620 | lowest_level = p->lowest_level; | 1624 | lowest_level = p->lowest_level; |
1621 | WARN_ON(lowest_level && ins_len > 0); | 1625 | WARN_ON(lowest_level && ins_len > 0); |
1622 | WARN_ON(p->nodes[0] != NULL); | 1626 | WARN_ON(p->nodes[0] != NULL); |
1623 | 1627 | ||
1624 | if (ins_len < 0) | 1628 | if (ins_len < 0) { |
1625 | lowest_unlock = 2; | 1629 | lowest_unlock = 2; |
1626 | 1630 | ||
1631 | /* when we are removing items, we might have to go up to level | ||
1632 | * two as we update tree pointers Make sure we keep write | ||
1633 | * for those levels as well | ||
1634 | */ | ||
1635 | write_lock_level = 2; | ||
1636 | } else if (ins_len > 0) { | ||
1637 | /* | ||
1638 | * for inserting items, make sure we have a write lock on | ||
1639 | * level 1 so we can update keys | ||
1640 | */ | ||
1641 | write_lock_level = 1; | ||
1642 | } | ||
1643 | |||
1644 | if (!cow) | ||
1645 | write_lock_level = -1; | ||
1646 | |||
1647 | if (cow && (p->keep_locks || p->lowest_level)) | ||
1648 | write_lock_level = BTRFS_MAX_LEVEL; | ||
1649 | |||
1627 | again: | 1650 | again: |
1651 | /* | ||
1652 | * we try very hard to do read locks on the root | ||
1653 | */ | ||
1654 | root_lock = BTRFS_READ_LOCK; | ||
1655 | level = 0; | ||
1628 | if (p->search_commit_root) { | 1656 | if (p->search_commit_root) { |
1657 | /* | ||
1658 | * the commit roots are read only | ||
1659 | * so we always do read locks | ||
1660 | */ | ||
1629 | b = root->commit_root; | 1661 | b = root->commit_root; |
1630 | extent_buffer_get(b); | 1662 | extent_buffer_get(b); |
1663 | level = btrfs_header_level(b); | ||
1631 | if (!p->skip_locking) | 1664 | if (!p->skip_locking) |
1632 | btrfs_tree_lock(b); | 1665 | btrfs_tree_read_lock(b); |
1633 | } else { | 1666 | } else { |
1634 | if (p->skip_locking) | 1667 | if (p->skip_locking) { |
1635 | b = btrfs_root_node(root); | 1668 | b = btrfs_root_node(root); |
1636 | else | 1669 | level = btrfs_header_level(b); |
1637 | b = btrfs_lock_root_node(root); | 1670 | } else { |
1671 | /* we don't know the level of the root node | ||
1672 | * until we actually have it read locked | ||
1673 | */ | ||
1674 | b = btrfs_read_lock_root_node(root); | ||
1675 | level = btrfs_header_level(b); | ||
1676 | if (level <= write_lock_level) { | ||
1677 | /* whoops, must trade for write lock */ | ||
1678 | btrfs_tree_read_unlock(b); | ||
1679 | free_extent_buffer(b); | ||
1680 | b = btrfs_lock_root_node(root); | ||
1681 | root_lock = BTRFS_WRITE_LOCK; | ||
1682 | |||
1683 | /* the level might have changed, check again */ | ||
1684 | level = btrfs_header_level(b); | ||
1685 | } | ||
1686 | } | ||
1638 | } | 1687 | } |
1688 | p->nodes[level] = b; | ||
1689 | if (!p->skip_locking) | ||
1690 | p->locks[level] = root_lock; | ||
1639 | 1691 | ||
1640 | while (b) { | 1692 | while (b) { |
1641 | level = btrfs_header_level(b); | 1693 | level = btrfs_header_level(b); |
@@ -1644,10 +1696,6 @@ again: | |||
1644 | * setup the path here so we can release it under lock | 1696 | * setup the path here so we can release it under lock |
1645 | * contention with the cow code | 1697 | * contention with the cow code |
1646 | */ | 1698 | */ |
1647 | p->nodes[level] = b; | ||
1648 | if (!p->skip_locking) | ||
1649 | p->locks[level] = 1; | ||
1650 | |||
1651 | if (cow) { | 1699 | if (cow) { |
1652 | /* | 1700 | /* |
1653 | * if we don't really need to cow this block | 1701 | * if we don't really need to cow this block |
@@ -1659,6 +1707,16 @@ again: | |||
1659 | 1707 | ||
1660 | btrfs_set_path_blocking(p); | 1708 | btrfs_set_path_blocking(p); |
1661 | 1709 | ||
1710 | /* | ||
1711 | * must have write locks on this node and the | ||
1712 | * parent | ||
1713 | */ | ||
1714 | if (level + 1 > write_lock_level) { | ||
1715 | write_lock_level = level + 1; | ||
1716 | btrfs_release_path(p); | ||
1717 | goto again; | ||
1718 | } | ||
1719 | |||
1662 | err = btrfs_cow_block(trans, root, b, | 1720 | err = btrfs_cow_block(trans, root, b, |
1663 | p->nodes[level + 1], | 1721 | p->nodes[level + 1], |
1664 | p->slots[level + 1], &b); | 1722 | p->slots[level + 1], &b); |
@@ -1671,10 +1729,7 @@ cow_done: | |||
1671 | BUG_ON(!cow && ins_len); | 1729 | BUG_ON(!cow && ins_len); |
1672 | 1730 | ||
1673 | p->nodes[level] = b; | 1731 | p->nodes[level] = b; |
1674 | if (!p->skip_locking) | 1732 | btrfs_clear_path_blocking(p, NULL, 0); |
1675 | p->locks[level] = 1; | ||
1676 | |||
1677 | btrfs_clear_path_blocking(p, NULL); | ||
1678 | 1733 | ||
1679 | /* | 1734 | /* |
1680 | * we have a lock on b and as long as we aren't changing | 1735 | * we have a lock on b and as long as we aren't changing |
@@ -1700,7 +1755,7 @@ cow_done: | |||
1700 | } | 1755 | } |
1701 | p->slots[level] = slot; | 1756 | p->slots[level] = slot; |
1702 | err = setup_nodes_for_search(trans, root, p, b, level, | 1757 | err = setup_nodes_for_search(trans, root, p, b, level, |
1703 | ins_len); | 1758 | ins_len, &write_lock_level); |
1704 | if (err == -EAGAIN) | 1759 | if (err == -EAGAIN) |
1705 | goto again; | 1760 | goto again; |
1706 | if (err) { | 1761 | if (err) { |
@@ -1710,6 +1765,19 @@ cow_done: | |||
1710 | b = p->nodes[level]; | 1765 | b = p->nodes[level]; |
1711 | slot = p->slots[level]; | 1766 | slot = p->slots[level]; |
1712 | 1767 | ||
1768 | /* | ||
1769 | * slot 0 is special, if we change the key | ||
1770 | * we have to update the parent pointer | ||
1771 | * which means we must have a write lock | ||
1772 | * on the parent | ||
1773 | */ | ||
1774 | if (slot == 0 && cow && | ||
1775 | write_lock_level < level + 1) { | ||
1776 | write_lock_level = level + 1; | ||
1777 | btrfs_release_path(p); | ||
1778 | goto again; | ||
1779 | } | ||
1780 | |||
1713 | unlock_up(p, level, lowest_unlock); | 1781 | unlock_up(p, level, lowest_unlock); |
1714 | 1782 | ||
1715 | if (level == lowest_level) { | 1783 | if (level == lowest_level) { |
@@ -1728,23 +1796,42 @@ cow_done: | |||
1728 | } | 1796 | } |
1729 | 1797 | ||
1730 | if (!p->skip_locking) { | 1798 | if (!p->skip_locking) { |
1731 | btrfs_clear_path_blocking(p, NULL); | 1799 | level = btrfs_header_level(b); |
1732 | err = btrfs_try_spin_lock(b); | 1800 | if (level <= write_lock_level) { |
1733 | 1801 | err = btrfs_try_tree_write_lock(b); | |
1734 | if (!err) { | 1802 | if (!err) { |
1735 | btrfs_set_path_blocking(p); | 1803 | btrfs_set_path_blocking(p); |
1736 | btrfs_tree_lock(b); | 1804 | btrfs_tree_lock(b); |
1737 | btrfs_clear_path_blocking(p, b); | 1805 | btrfs_clear_path_blocking(p, b, |
1806 | BTRFS_WRITE_LOCK); | ||
1807 | } | ||
1808 | p->locks[level] = BTRFS_WRITE_LOCK; | ||
1809 | } else { | ||
1810 | err = btrfs_try_tree_read_lock(b); | ||
1811 | if (!err) { | ||
1812 | btrfs_set_path_blocking(p); | ||
1813 | btrfs_tree_read_lock(b); | ||
1814 | btrfs_clear_path_blocking(p, b, | ||
1815 | BTRFS_READ_LOCK); | ||
1816 | } | ||
1817 | p->locks[level] = BTRFS_READ_LOCK; | ||
1738 | } | 1818 | } |
1819 | p->nodes[level] = b; | ||
1739 | } | 1820 | } |
1740 | } else { | 1821 | } else { |
1741 | p->slots[level] = slot; | 1822 | p->slots[level] = slot; |
1742 | if (ins_len > 0 && | 1823 | if (ins_len > 0 && |
1743 | btrfs_leaf_free_space(root, b) < ins_len) { | 1824 | btrfs_leaf_free_space(root, b) < ins_len) { |
1825 | if (write_lock_level < 1) { | ||
1826 | write_lock_level = 1; | ||
1827 | btrfs_release_path(p); | ||
1828 | goto again; | ||
1829 | } | ||
1830 | |||
1744 | btrfs_set_path_blocking(p); | 1831 | btrfs_set_path_blocking(p); |
1745 | err = split_leaf(trans, root, key, | 1832 | err = split_leaf(trans, root, key, |
1746 | p, ins_len, ret == 0); | 1833 | p, ins_len, ret == 0); |
1747 | btrfs_clear_path_blocking(p, NULL); | 1834 | btrfs_clear_path_blocking(p, NULL, 0); |
1748 | 1835 | ||
1749 | BUG_ON(err > 0); | 1836 | BUG_ON(err > 0); |
1750 | if (err) { | 1837 | if (err) { |
@@ -2025,7 +2112,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2025 | add_root_to_dirty_list(root); | 2112 | add_root_to_dirty_list(root); |
2026 | extent_buffer_get(c); | 2113 | extent_buffer_get(c); |
2027 | path->nodes[level] = c; | 2114 | path->nodes[level] = c; |
2028 | path->locks[level] = 1; | 2115 | path->locks[level] = BTRFS_WRITE_LOCK; |
2029 | path->slots[level] = 0; | 2116 | path->slots[level] = 0; |
2030 | return 0; | 2117 | return 0; |
2031 | } | 2118 | } |
@@ -2253,14 +2340,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | |||
2253 | if (path->slots[0] == i) | 2340 | if (path->slots[0] == i) |
2254 | push_space += data_size; | 2341 | push_space += data_size; |
2255 | 2342 | ||
2256 | if (!left->map_token) { | ||
2257 | map_extent_buffer(left, (unsigned long)item, | ||
2258 | sizeof(struct btrfs_item), | ||
2259 | &left->map_token, &left->kaddr, | ||
2260 | &left->map_start, &left->map_len, | ||
2261 | KM_USER1); | ||
2262 | } | ||
2263 | |||
2264 | this_item_size = btrfs_item_size(left, item); | 2343 | this_item_size = btrfs_item_size(left, item); |
2265 | if (this_item_size + sizeof(*item) + push_space > free_space) | 2344 | if (this_item_size + sizeof(*item) + push_space > free_space) |
2266 | break; | 2345 | break; |
@@ -2271,10 +2350,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | |||
2271 | break; | 2350 | break; |
2272 | i--; | 2351 | i--; |
2273 | } | 2352 | } |
2274 | if (left->map_token) { | ||
2275 | unmap_extent_buffer(left, left->map_token, KM_USER1); | ||
2276 | left->map_token = NULL; | ||
2277 | } | ||
2278 | 2353 | ||
2279 | if (push_items == 0) | 2354 | if (push_items == 0) |
2280 | goto out_unlock; | 2355 | goto out_unlock; |
@@ -2316,21 +2391,10 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | |||
2316 | push_space = BTRFS_LEAF_DATA_SIZE(root); | 2391 | push_space = BTRFS_LEAF_DATA_SIZE(root); |
2317 | for (i = 0; i < right_nritems; i++) { | 2392 | for (i = 0; i < right_nritems; i++) { |
2318 | item = btrfs_item_nr(right, i); | 2393 | item = btrfs_item_nr(right, i); |
2319 | if (!right->map_token) { | ||
2320 | map_extent_buffer(right, (unsigned long)item, | ||
2321 | sizeof(struct btrfs_item), | ||
2322 | &right->map_token, &right->kaddr, | ||
2323 | &right->map_start, &right->map_len, | ||
2324 | KM_USER1); | ||
2325 | } | ||
2326 | push_space -= btrfs_item_size(right, item); | 2394 | push_space -= btrfs_item_size(right, item); |
2327 | btrfs_set_item_offset(right, item, push_space); | 2395 | btrfs_set_item_offset(right, item, push_space); |
2328 | } | 2396 | } |
2329 | 2397 | ||
2330 | if (right->map_token) { | ||
2331 | unmap_extent_buffer(right, right->map_token, KM_USER1); | ||
2332 | right->map_token = NULL; | ||
2333 | } | ||
2334 | left_nritems -= push_items; | 2398 | left_nritems -= push_items; |
2335 | btrfs_set_header_nritems(left, left_nritems); | 2399 | btrfs_set_header_nritems(left, left_nritems); |
2336 | 2400 | ||
@@ -2467,13 +2531,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2467 | 2531 | ||
2468 | for (i = 0; i < nr; i++) { | 2532 | for (i = 0; i < nr; i++) { |
2469 | item = btrfs_item_nr(right, i); | 2533 | item = btrfs_item_nr(right, i); |
2470 | if (!right->map_token) { | ||
2471 | map_extent_buffer(right, (unsigned long)item, | ||
2472 | sizeof(struct btrfs_item), | ||
2473 | &right->map_token, &right->kaddr, | ||
2474 | &right->map_start, &right->map_len, | ||
2475 | KM_USER1); | ||
2476 | } | ||
2477 | 2534 | ||
2478 | if (!empty && push_items > 0) { | 2535 | if (!empty && push_items > 0) { |
2479 | if (path->slots[0] < i) | 2536 | if (path->slots[0] < i) |
@@ -2496,11 +2553,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2496 | push_space += this_item_size + sizeof(*item); | 2553 | push_space += this_item_size + sizeof(*item); |
2497 | } | 2554 | } |
2498 | 2555 | ||
2499 | if (right->map_token) { | ||
2500 | unmap_extent_buffer(right, right->map_token, KM_USER1); | ||
2501 | right->map_token = NULL; | ||
2502 | } | ||
2503 | |||
2504 | if (push_items == 0) { | 2556 | if (push_items == 0) { |
2505 | ret = 1; | 2557 | ret = 1; |
2506 | goto out; | 2558 | goto out; |
@@ -2530,23 +2582,12 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2530 | u32 ioff; | 2582 | u32 ioff; |
2531 | 2583 | ||
2532 | item = btrfs_item_nr(left, i); | 2584 | item = btrfs_item_nr(left, i); |
2533 | if (!left->map_token) { | ||
2534 | map_extent_buffer(left, (unsigned long)item, | ||
2535 | sizeof(struct btrfs_item), | ||
2536 | &left->map_token, &left->kaddr, | ||
2537 | &left->map_start, &left->map_len, | ||
2538 | KM_USER1); | ||
2539 | } | ||
2540 | 2585 | ||
2541 | ioff = btrfs_item_offset(left, item); | 2586 | ioff = btrfs_item_offset(left, item); |
2542 | btrfs_set_item_offset(left, item, | 2587 | btrfs_set_item_offset(left, item, |
2543 | ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); | 2588 | ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); |
2544 | } | 2589 | } |
2545 | btrfs_set_header_nritems(left, old_left_nritems + push_items); | 2590 | btrfs_set_header_nritems(left, old_left_nritems + push_items); |
2546 | if (left->map_token) { | ||
2547 | unmap_extent_buffer(left, left->map_token, KM_USER1); | ||
2548 | left->map_token = NULL; | ||
2549 | } | ||
2550 | 2591 | ||
2551 | /* fixup right node */ | 2592 | /* fixup right node */ |
2552 | if (push_items > right_nritems) { | 2593 | if (push_items > right_nritems) { |
@@ -2574,21 +2615,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2574 | for (i = 0; i < right_nritems; i++) { | 2615 | for (i = 0; i < right_nritems; i++) { |
2575 | item = btrfs_item_nr(right, i); | 2616 | item = btrfs_item_nr(right, i); |
2576 | 2617 | ||
2577 | if (!right->map_token) { | ||
2578 | map_extent_buffer(right, (unsigned long)item, | ||
2579 | sizeof(struct btrfs_item), | ||
2580 | &right->map_token, &right->kaddr, | ||
2581 | &right->map_start, &right->map_len, | ||
2582 | KM_USER1); | ||
2583 | } | ||
2584 | |||
2585 | push_space = push_space - btrfs_item_size(right, item); | 2618 | push_space = push_space - btrfs_item_size(right, item); |
2586 | btrfs_set_item_offset(right, item, push_space); | 2619 | btrfs_set_item_offset(right, item, push_space); |
2587 | } | 2620 | } |
2588 | if (right->map_token) { | ||
2589 | unmap_extent_buffer(right, right->map_token, KM_USER1); | ||
2590 | right->map_token = NULL; | ||
2591 | } | ||
2592 | 2621 | ||
2593 | btrfs_mark_buffer_dirty(left); | 2622 | btrfs_mark_buffer_dirty(left); |
2594 | if (right_nritems) | 2623 | if (right_nritems) |
@@ -2729,23 +2758,10 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans, | |||
2729 | struct btrfs_item *item = btrfs_item_nr(right, i); | 2758 | struct btrfs_item *item = btrfs_item_nr(right, i); |
2730 | u32 ioff; | 2759 | u32 ioff; |
2731 | 2760 | ||
2732 | if (!right->map_token) { | ||
2733 | map_extent_buffer(right, (unsigned long)item, | ||
2734 | sizeof(struct btrfs_item), | ||
2735 | &right->map_token, &right->kaddr, | ||
2736 | &right->map_start, &right->map_len, | ||
2737 | KM_USER1); | ||
2738 | } | ||
2739 | |||
2740 | ioff = btrfs_item_offset(right, item); | 2761 | ioff = btrfs_item_offset(right, item); |
2741 | btrfs_set_item_offset(right, item, ioff + rt_data_off); | 2762 | btrfs_set_item_offset(right, item, ioff + rt_data_off); |
2742 | } | 2763 | } |
2743 | 2764 | ||
2744 | if (right->map_token) { | ||
2745 | unmap_extent_buffer(right, right->map_token, KM_USER1); | ||
2746 | right->map_token = NULL; | ||
2747 | } | ||
2748 | |||
2749 | btrfs_set_header_nritems(l, mid); | 2765 | btrfs_set_header_nritems(l, mid); |
2750 | ret = 0; | 2766 | ret = 0; |
2751 | btrfs_item_key(right, &disk_key, 0); | 2767 | btrfs_item_key(right, &disk_key, 0); |
@@ -3264,23 +3280,10 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, | |||
3264 | u32 ioff; | 3280 | u32 ioff; |
3265 | item = btrfs_item_nr(leaf, i); | 3281 | item = btrfs_item_nr(leaf, i); |
3266 | 3282 | ||
3267 | if (!leaf->map_token) { | ||
3268 | map_extent_buffer(leaf, (unsigned long)item, | ||
3269 | sizeof(struct btrfs_item), | ||
3270 | &leaf->map_token, &leaf->kaddr, | ||
3271 | &leaf->map_start, &leaf->map_len, | ||
3272 | KM_USER1); | ||
3273 | } | ||
3274 | |||
3275 | ioff = btrfs_item_offset(leaf, item); | 3283 | ioff = btrfs_item_offset(leaf, item); |
3276 | btrfs_set_item_offset(leaf, item, ioff + size_diff); | 3284 | btrfs_set_item_offset(leaf, item, ioff + size_diff); |
3277 | } | 3285 | } |
3278 | 3286 | ||
3279 | if (leaf->map_token) { | ||
3280 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
3281 | leaf->map_token = NULL; | ||
3282 | } | ||
3283 | |||
3284 | /* shift the data */ | 3287 | /* shift the data */ |
3285 | if (from_end) { | 3288 | if (from_end) { |
3286 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + | 3289 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + |
@@ -3377,22 +3380,10 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, | |||
3377 | u32 ioff; | 3380 | u32 ioff; |
3378 | item = btrfs_item_nr(leaf, i); | 3381 | item = btrfs_item_nr(leaf, i); |
3379 | 3382 | ||
3380 | if (!leaf->map_token) { | ||
3381 | map_extent_buffer(leaf, (unsigned long)item, | ||
3382 | sizeof(struct btrfs_item), | ||
3383 | &leaf->map_token, &leaf->kaddr, | ||
3384 | &leaf->map_start, &leaf->map_len, | ||
3385 | KM_USER1); | ||
3386 | } | ||
3387 | ioff = btrfs_item_offset(leaf, item); | 3383 | ioff = btrfs_item_offset(leaf, item); |
3388 | btrfs_set_item_offset(leaf, item, ioff - data_size); | 3384 | btrfs_set_item_offset(leaf, item, ioff - data_size); |
3389 | } | 3385 | } |
3390 | 3386 | ||
3391 | if (leaf->map_token) { | ||
3392 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
3393 | leaf->map_token = NULL; | ||
3394 | } | ||
3395 | |||
3396 | /* shift the data */ | 3387 | /* shift the data */ |
3397 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + | 3388 | memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + |
3398 | data_end - data_size, btrfs_leaf_data(leaf) + | 3389 | data_end - data_size, btrfs_leaf_data(leaf) + |
@@ -3494,27 +3485,13 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans, | |||
3494 | * item0..itemN ... dataN.offset..dataN.size .. data0.size | 3485 | * item0..itemN ... dataN.offset..dataN.size .. data0.size |
3495 | */ | 3486 | */ |
3496 | /* first correct the data pointers */ | 3487 | /* first correct the data pointers */ |
3497 | WARN_ON(leaf->map_token); | ||
3498 | for (i = slot; i < nritems; i++) { | 3488 | for (i = slot; i < nritems; i++) { |
3499 | u32 ioff; | 3489 | u32 ioff; |
3500 | 3490 | ||
3501 | item = btrfs_item_nr(leaf, i); | 3491 | item = btrfs_item_nr(leaf, i); |
3502 | if (!leaf->map_token) { | ||
3503 | map_extent_buffer(leaf, (unsigned long)item, | ||
3504 | sizeof(struct btrfs_item), | ||
3505 | &leaf->map_token, &leaf->kaddr, | ||
3506 | &leaf->map_start, &leaf->map_len, | ||
3507 | KM_USER1); | ||
3508 | } | ||
3509 | |||
3510 | ioff = btrfs_item_offset(leaf, item); | 3492 | ioff = btrfs_item_offset(leaf, item); |
3511 | btrfs_set_item_offset(leaf, item, ioff - total_data); | 3493 | btrfs_set_item_offset(leaf, item, ioff - total_data); |
3512 | } | 3494 | } |
3513 | if (leaf->map_token) { | ||
3514 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
3515 | leaf->map_token = NULL; | ||
3516 | } | ||
3517 | |||
3518 | /* shift the items */ | 3495 | /* shift the items */ |
3519 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), | 3496 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), |
3520 | btrfs_item_nr_offset(slot), | 3497 | btrfs_item_nr_offset(slot), |
@@ -3608,27 +3585,13 @@ int setup_items_for_insert(struct btrfs_trans_handle *trans, | |||
3608 | * item0..itemN ... dataN.offset..dataN.size .. data0.size | 3585 | * item0..itemN ... dataN.offset..dataN.size .. data0.size |
3609 | */ | 3586 | */ |
3610 | /* first correct the data pointers */ | 3587 | /* first correct the data pointers */ |
3611 | WARN_ON(leaf->map_token); | ||
3612 | for (i = slot; i < nritems; i++) { | 3588 | for (i = slot; i < nritems; i++) { |
3613 | u32 ioff; | 3589 | u32 ioff; |
3614 | 3590 | ||
3615 | item = btrfs_item_nr(leaf, i); | 3591 | item = btrfs_item_nr(leaf, i); |
3616 | if (!leaf->map_token) { | ||
3617 | map_extent_buffer(leaf, (unsigned long)item, | ||
3618 | sizeof(struct btrfs_item), | ||
3619 | &leaf->map_token, &leaf->kaddr, | ||
3620 | &leaf->map_start, &leaf->map_len, | ||
3621 | KM_USER1); | ||
3622 | } | ||
3623 | |||
3624 | ioff = btrfs_item_offset(leaf, item); | 3592 | ioff = btrfs_item_offset(leaf, item); |
3625 | btrfs_set_item_offset(leaf, item, ioff - total_data); | 3593 | btrfs_set_item_offset(leaf, item, ioff - total_data); |
3626 | } | 3594 | } |
3627 | if (leaf->map_token) { | ||
3628 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
3629 | leaf->map_token = NULL; | ||
3630 | } | ||
3631 | |||
3632 | /* shift the items */ | 3595 | /* shift the items */ |
3633 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), | 3596 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), |
3634 | btrfs_item_nr_offset(slot), | 3597 | btrfs_item_nr_offset(slot), |
@@ -3840,22 +3803,10 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
3840 | u32 ioff; | 3803 | u32 ioff; |
3841 | 3804 | ||
3842 | item = btrfs_item_nr(leaf, i); | 3805 | item = btrfs_item_nr(leaf, i); |
3843 | if (!leaf->map_token) { | ||
3844 | map_extent_buffer(leaf, (unsigned long)item, | ||
3845 | sizeof(struct btrfs_item), | ||
3846 | &leaf->map_token, &leaf->kaddr, | ||
3847 | &leaf->map_start, &leaf->map_len, | ||
3848 | KM_USER1); | ||
3849 | } | ||
3850 | ioff = btrfs_item_offset(leaf, item); | 3806 | ioff = btrfs_item_offset(leaf, item); |
3851 | btrfs_set_item_offset(leaf, item, ioff + dsize); | 3807 | btrfs_set_item_offset(leaf, item, ioff + dsize); |
3852 | } | 3808 | } |
3853 | 3809 | ||
3854 | if (leaf->map_token) { | ||
3855 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
3856 | leaf->map_token = NULL; | ||
3857 | } | ||
3858 | |||
3859 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), | 3810 | memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), |
3860 | btrfs_item_nr_offset(slot + nr), | 3811 | btrfs_item_nr_offset(slot + nr), |
3861 | sizeof(struct btrfs_item) * | 3812 | sizeof(struct btrfs_item) * |
@@ -4004,11 +3955,11 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, | |||
4004 | 3955 | ||
4005 | WARN_ON(!path->keep_locks); | 3956 | WARN_ON(!path->keep_locks); |
4006 | again: | 3957 | again: |
4007 | cur = btrfs_lock_root_node(root); | 3958 | cur = btrfs_read_lock_root_node(root); |
4008 | level = btrfs_header_level(cur); | 3959 | level = btrfs_header_level(cur); |
4009 | WARN_ON(path->nodes[level]); | 3960 | WARN_ON(path->nodes[level]); |
4010 | path->nodes[level] = cur; | 3961 | path->nodes[level] = cur; |
4011 | path->locks[level] = 1; | 3962 | path->locks[level] = BTRFS_READ_LOCK; |
4012 | 3963 | ||
4013 | if (btrfs_header_generation(cur) < min_trans) { | 3964 | if (btrfs_header_generation(cur) < min_trans) { |
4014 | ret = 1; | 3965 | ret = 1; |
@@ -4098,12 +4049,12 @@ find_next_key: | |||
4098 | cur = read_node_slot(root, cur, slot); | 4049 | cur = read_node_slot(root, cur, slot); |
4099 | BUG_ON(!cur); | 4050 | BUG_ON(!cur); |
4100 | 4051 | ||
4101 | btrfs_tree_lock(cur); | 4052 | btrfs_tree_read_lock(cur); |
4102 | 4053 | ||
4103 | path->locks[level - 1] = 1; | 4054 | path->locks[level - 1] = BTRFS_READ_LOCK; |
4104 | path->nodes[level - 1] = cur; | 4055 | path->nodes[level - 1] = cur; |
4105 | unlock_up(path, level, 1); | 4056 | unlock_up(path, level, 1); |
4106 | btrfs_clear_path_blocking(path, NULL); | 4057 | btrfs_clear_path_blocking(path, NULL, 0); |
4107 | } | 4058 | } |
4108 | out: | 4059 | out: |
4109 | if (ret == 0) | 4060 | if (ret == 0) |
@@ -4218,30 +4169,21 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
4218 | u32 nritems; | 4169 | u32 nritems; |
4219 | int ret; | 4170 | int ret; |
4220 | int old_spinning = path->leave_spinning; | 4171 | int old_spinning = path->leave_spinning; |
4221 | int force_blocking = 0; | 4172 | int next_rw_lock = 0; |
4222 | 4173 | ||
4223 | nritems = btrfs_header_nritems(path->nodes[0]); | 4174 | nritems = btrfs_header_nritems(path->nodes[0]); |
4224 | if (nritems == 0) | 4175 | if (nritems == 0) |
4225 | return 1; | 4176 | return 1; |
4226 | 4177 | ||
4227 | /* | ||
4228 | * we take the blocks in an order that upsets lockdep. Using | ||
4229 | * blocking mode is the only way around it. | ||
4230 | */ | ||
4231 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
4232 | force_blocking = 1; | ||
4233 | #endif | ||
4234 | |||
4235 | btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); | 4178 | btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); |
4236 | again: | 4179 | again: |
4237 | level = 1; | 4180 | level = 1; |
4238 | next = NULL; | 4181 | next = NULL; |
4182 | next_rw_lock = 0; | ||
4239 | btrfs_release_path(path); | 4183 | btrfs_release_path(path); |
4240 | 4184 | ||
4241 | path->keep_locks = 1; | 4185 | path->keep_locks = 1; |
4242 | 4186 | path->leave_spinning = 1; | |
4243 | if (!force_blocking) | ||
4244 | path->leave_spinning = 1; | ||
4245 | 4187 | ||
4246 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 4188 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
4247 | path->keep_locks = 0; | 4189 | path->keep_locks = 0; |
@@ -4281,11 +4223,12 @@ again: | |||
4281 | } | 4223 | } |
4282 | 4224 | ||
4283 | if (next) { | 4225 | if (next) { |
4284 | btrfs_tree_unlock(next); | 4226 | btrfs_tree_unlock_rw(next, next_rw_lock); |
4285 | free_extent_buffer(next); | 4227 | free_extent_buffer(next); |
4286 | } | 4228 | } |
4287 | 4229 | ||
4288 | next = c; | 4230 | next = c; |
4231 | next_rw_lock = path->locks[level]; | ||
4289 | ret = read_block_for_search(NULL, root, path, &next, level, | 4232 | ret = read_block_for_search(NULL, root, path, &next, level, |
4290 | slot, &key); | 4233 | slot, &key); |
4291 | if (ret == -EAGAIN) | 4234 | if (ret == -EAGAIN) |
@@ -4297,15 +4240,14 @@ again: | |||
4297 | } | 4240 | } |
4298 | 4241 | ||
4299 | if (!path->skip_locking) { | 4242 | if (!path->skip_locking) { |
4300 | ret = btrfs_try_spin_lock(next); | 4243 | ret = btrfs_try_tree_read_lock(next); |
4301 | if (!ret) { | 4244 | if (!ret) { |
4302 | btrfs_set_path_blocking(path); | 4245 | btrfs_set_path_blocking(path); |
4303 | btrfs_tree_lock(next); | 4246 | btrfs_tree_read_lock(next); |
4304 | if (!force_blocking) | 4247 | btrfs_clear_path_blocking(path, next, |
4305 | btrfs_clear_path_blocking(path, next); | 4248 | BTRFS_READ_LOCK); |
4306 | } | 4249 | } |
4307 | if (force_blocking) | 4250 | next_rw_lock = BTRFS_READ_LOCK; |
4308 | btrfs_set_lock_blocking(next); | ||
4309 | } | 4251 | } |
4310 | break; | 4252 | break; |
4311 | } | 4253 | } |
@@ -4314,14 +4256,13 @@ again: | |||
4314 | level--; | 4256 | level--; |
4315 | c = path->nodes[level]; | 4257 | c = path->nodes[level]; |
4316 | if (path->locks[level]) | 4258 | if (path->locks[level]) |
4317 | btrfs_tree_unlock(c); | 4259 | btrfs_tree_unlock_rw(c, path->locks[level]); |
4318 | 4260 | ||
4319 | free_extent_buffer(c); | 4261 | free_extent_buffer(c); |
4320 | path->nodes[level] = next; | 4262 | path->nodes[level] = next; |
4321 | path->slots[level] = 0; | 4263 | path->slots[level] = 0; |
4322 | if (!path->skip_locking) | 4264 | if (!path->skip_locking) |
4323 | path->locks[level] = 1; | 4265 | path->locks[level] = next_rw_lock; |
4324 | |||
4325 | if (!level) | 4266 | if (!level) |
4326 | break; | 4267 | break; |
4327 | 4268 | ||
@@ -4336,16 +4277,14 @@ again: | |||
4336 | } | 4277 | } |
4337 | 4278 | ||
4338 | if (!path->skip_locking) { | 4279 | if (!path->skip_locking) { |
4339 | btrfs_assert_tree_locked(path->nodes[level]); | 4280 | ret = btrfs_try_tree_read_lock(next); |
4340 | ret = btrfs_try_spin_lock(next); | ||
4341 | if (!ret) { | 4281 | if (!ret) { |
4342 | btrfs_set_path_blocking(path); | 4282 | btrfs_set_path_blocking(path); |
4343 | btrfs_tree_lock(next); | 4283 | btrfs_tree_read_lock(next); |
4344 | if (!force_blocking) | 4284 | btrfs_clear_path_blocking(path, next, |
4345 | btrfs_clear_path_blocking(path, next); | 4285 | BTRFS_READ_LOCK); |
4346 | } | 4286 | } |
4347 | if (force_blocking) | 4287 | next_rw_lock = BTRFS_READ_LOCK; |
4348 | btrfs_set_lock_blocking(next); | ||
4349 | } | 4288 | } |
4350 | } | 4289 | } |
4351 | ret = 0; | 4290 | ret = 0; |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index fe9287b0649..365c4e1dde0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -755,6 +755,8 @@ struct btrfs_space_info { | |||
755 | chunks for this space */ | 755 | chunks for this space */ |
756 | unsigned int chunk_alloc:1; /* set if we are allocating a chunk */ | 756 | unsigned int chunk_alloc:1; /* set if we are allocating a chunk */ |
757 | 757 | ||
758 | unsigned int flush:1; /* set if we are trying to make space */ | ||
759 | |||
758 | unsigned int force_alloc; /* set if we need to force a chunk | 760 | unsigned int force_alloc; /* set if we need to force a chunk |
759 | alloc for this space */ | 761 | alloc for this space */ |
760 | 762 | ||
@@ -764,7 +766,7 @@ struct btrfs_space_info { | |||
764 | struct list_head block_groups[BTRFS_NR_RAID_TYPES]; | 766 | struct list_head block_groups[BTRFS_NR_RAID_TYPES]; |
765 | spinlock_t lock; | 767 | spinlock_t lock; |
766 | struct rw_semaphore groups_sem; | 768 | struct rw_semaphore groups_sem; |
767 | atomic_t caching_threads; | 769 | wait_queue_head_t wait; |
768 | }; | 770 | }; |
769 | 771 | ||
770 | struct btrfs_block_rsv { | 772 | struct btrfs_block_rsv { |
@@ -824,6 +826,7 @@ struct btrfs_caching_control { | |||
824 | struct list_head list; | 826 | struct list_head list; |
825 | struct mutex mutex; | 827 | struct mutex mutex; |
826 | wait_queue_head_t wait; | 828 | wait_queue_head_t wait; |
829 | struct btrfs_work work; | ||
827 | struct btrfs_block_group_cache *block_group; | 830 | struct btrfs_block_group_cache *block_group; |
828 | u64 progress; | 831 | u64 progress; |
829 | atomic_t count; | 832 | atomic_t count; |
@@ -1032,6 +1035,8 @@ struct btrfs_fs_info { | |||
1032 | struct btrfs_workers endio_write_workers; | 1035 | struct btrfs_workers endio_write_workers; |
1033 | struct btrfs_workers endio_freespace_worker; | 1036 | struct btrfs_workers endio_freespace_worker; |
1034 | struct btrfs_workers submit_workers; | 1037 | struct btrfs_workers submit_workers; |
1038 | struct btrfs_workers caching_workers; | ||
1039 | |||
1035 | /* | 1040 | /* |
1036 | * fixup workers take dirty pages that didn't properly go through | 1041 | * fixup workers take dirty pages that didn't properly go through |
1037 | * the cow mechanism and make them safe to write. It happens | 1042 | * the cow mechanism and make them safe to write. It happens |
@@ -2128,7 +2133,7 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info) | |||
2128 | 2133 | ||
2129 | /* extent-tree.c */ | 2134 | /* extent-tree.c */ |
2130 | static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, | 2135 | static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, |
2131 | int num_items) | 2136 | unsigned num_items) |
2132 | { | 2137 | { |
2133 | return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * | 2138 | return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * |
2134 | 3 * num_items; | 2139 | 3 * num_items; |
@@ -2222,9 +2227,6 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); | |||
2222 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 2227 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
2223 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); | 2228 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); |
2224 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); | 2229 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); |
2225 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, | ||
2226 | struct btrfs_root *root, | ||
2227 | int num_items); | ||
2228 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | 2230 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, |
2229 | struct btrfs_root *root); | 2231 | struct btrfs_root *root); |
2230 | int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, | 2232 | int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, |
@@ -2330,7 +2332,7 @@ struct btrfs_path *btrfs_alloc_path(void); | |||
2330 | void btrfs_free_path(struct btrfs_path *p); | 2332 | void btrfs_free_path(struct btrfs_path *p); |
2331 | void btrfs_set_path_blocking(struct btrfs_path *p); | 2333 | void btrfs_set_path_blocking(struct btrfs_path *p); |
2332 | void btrfs_clear_path_blocking(struct btrfs_path *p, | 2334 | void btrfs_clear_path_blocking(struct btrfs_path *p, |
2333 | struct extent_buffer *held); | 2335 | struct extent_buffer *held, int held_rw); |
2334 | void btrfs_unlock_up_safe(struct btrfs_path *p, int level); | 2336 | void btrfs_unlock_up_safe(struct btrfs_path *p, int level); |
2335 | 2337 | ||
2336 | int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 2338 | int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 98c68e658a9..b52c672f4c1 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -735,7 +735,7 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans, | |||
735 | } | 735 | } |
736 | 736 | ||
737 | /* reset all the locked nodes in the patch to spinning locks. */ | 737 | /* reset all the locked nodes in the patch to spinning locks. */ |
738 | btrfs_clear_path_blocking(path, NULL); | 738 | btrfs_clear_path_blocking(path, NULL, 0); |
739 | 739 | ||
740 | /* insert the keys of the items */ | 740 | /* insert the keys of the items */ |
741 | ret = setup_items_for_insert(trans, root, path, keys, data_size, | 741 | ret = setup_items_for_insert(trans, root, path, keys, data_size, |
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 685f2593c4f..c360a848d97 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c | |||
@@ -89,13 +89,8 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, | |||
89 | data_size = sizeof(*dir_item) + name_len + data_len; | 89 | data_size = sizeof(*dir_item) + name_len + data_len; |
90 | dir_item = insert_with_overflow(trans, root, path, &key, data_size, | 90 | dir_item = insert_with_overflow(trans, root, path, &key, data_size, |
91 | name, name_len); | 91 | name, name_len); |
92 | /* | 92 | if (IS_ERR(dir_item)) |
93 | * FIXME: at some point we should handle xattr's that are larger than | 93 | return PTR_ERR(dir_item); |
94 | * what we can fit in our leaf. We set location to NULL b/c we arent | ||
95 | * pointing at anything else, that will change if we store the xattr | ||
96 | * data in a separate inode. | ||
97 | */ | ||
98 | BUG_ON(IS_ERR(dir_item)); | ||
99 | memset(&location, 0, sizeof(location)); | 94 | memset(&location, 0, sizeof(location)); |
100 | 95 | ||
101 | leaf = path->nodes[0]; | 96 | leaf = path->nodes[0]; |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b231ae13b26..07b3ac662e1 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -100,38 +100,83 @@ struct async_submit_bio { | |||
100 | struct btrfs_work work; | 100 | struct btrfs_work work; |
101 | }; | 101 | }; |
102 | 102 | ||
103 | /* These are used to set the lockdep class on the extent buffer locks. | 103 | /* |
104 | * The class is set by the readpage_end_io_hook after the buffer has | 104 | * Lockdep class keys for extent_buffer->lock's in this root. For a given |
105 | * passed csum validation but before the pages are unlocked. | 105 | * eb, the lockdep key is determined by the btrfs_root it belongs to and |
106 | * the level the eb occupies in the tree. | ||
107 | * | ||
108 | * Different roots are used for different purposes and may nest inside each | ||
109 | * other and they require separate keysets. As lockdep keys should be | ||
110 | * static, assign keysets according to the purpose of the root as indicated | ||
111 | * by btrfs_root->objectid. This ensures that all special purpose roots | ||
112 | * have separate keysets. | ||
106 | * | 113 | * |
107 | * The lockdep class is also set by btrfs_init_new_buffer on freshly | 114 | * Lock-nesting across peer nodes is always done with the immediate parent |
108 | * allocated blocks. | 115 | * node locked thus preventing deadlock. As lockdep doesn't know this, use |
116 | * subclass to avoid triggering lockdep warning in such cases. | ||
109 | * | 117 | * |
110 | * The class is based on the level in the tree block, which allows lockdep | 118 | * The key is set by the readpage_end_io_hook after the buffer has passed |
111 | * to know that lower nodes nest inside the locks of higher nodes. | 119 | * csum validation but before the pages are unlocked. It is also set by |
120 | * btrfs_init_new_buffer on freshly allocated blocks. | ||
112 | * | 121 | * |
113 | * We also add a check to make sure the highest level of the tree is | 122 | * We also add a check to make sure the highest level of the tree is the |
114 | * the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this | 123 | * same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code |
115 | * code needs update as well. | 124 | * needs update as well. |
116 | */ | 125 | */ |
117 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 126 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
118 | # if BTRFS_MAX_LEVEL != 8 | 127 | # if BTRFS_MAX_LEVEL != 8 |
119 | # error | 128 | # error |
120 | # endif | 129 | # endif |
121 | static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1]; | 130 | |
122 | static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = { | 131 | static struct btrfs_lockdep_keyset { |
123 | /* leaf */ | 132 | u64 id; /* root objectid */ |
124 | "btrfs-extent-00", | 133 | const char *name_stem; /* lock name stem */ |
125 | "btrfs-extent-01", | 134 | char names[BTRFS_MAX_LEVEL + 1][20]; |
126 | "btrfs-extent-02", | 135 | struct lock_class_key keys[BTRFS_MAX_LEVEL + 1]; |
127 | "btrfs-extent-03", | 136 | } btrfs_lockdep_keysets[] = { |
128 | "btrfs-extent-04", | 137 | { .id = BTRFS_ROOT_TREE_OBJECTID, .name_stem = "root" }, |
129 | "btrfs-extent-05", | 138 | { .id = BTRFS_EXTENT_TREE_OBJECTID, .name_stem = "extent" }, |
130 | "btrfs-extent-06", | 139 | { .id = BTRFS_CHUNK_TREE_OBJECTID, .name_stem = "chunk" }, |
131 | "btrfs-extent-07", | 140 | { .id = BTRFS_DEV_TREE_OBJECTID, .name_stem = "dev" }, |
132 | /* highest possible level */ | 141 | { .id = BTRFS_FS_TREE_OBJECTID, .name_stem = "fs" }, |
133 | "btrfs-extent-08", | 142 | { .id = BTRFS_CSUM_TREE_OBJECTID, .name_stem = "csum" }, |
143 | { .id = BTRFS_ORPHAN_OBJECTID, .name_stem = "orphan" }, | ||
144 | { .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" }, | ||
145 | { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" }, | ||
146 | { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" }, | ||
147 | { .id = 0, .name_stem = "tree" }, | ||
134 | }; | 148 | }; |
149 | |||
150 | void __init btrfs_init_lockdep(void) | ||
151 | { | ||
152 | int i, j; | ||
153 | |||
154 | /* initialize lockdep class names */ | ||
155 | for (i = 0; i < ARRAY_SIZE(btrfs_lockdep_keysets); i++) { | ||
156 | struct btrfs_lockdep_keyset *ks = &btrfs_lockdep_keysets[i]; | ||
157 | |||
158 | for (j = 0; j < ARRAY_SIZE(ks->names); j++) | ||
159 | snprintf(ks->names[j], sizeof(ks->names[j]), | ||
160 | "btrfs-%s-%02d", ks->name_stem, j); | ||
161 | } | ||
162 | } | ||
163 | |||
164 | void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, | ||
165 | int level) | ||
166 | { | ||
167 | struct btrfs_lockdep_keyset *ks; | ||
168 | |||
169 | BUG_ON(level >= ARRAY_SIZE(ks->keys)); | ||
170 | |||
171 | /* find the matching keyset, id 0 is the default entry */ | ||
172 | for (ks = btrfs_lockdep_keysets; ks->id; ks++) | ||
173 | if (ks->id == objectid) | ||
174 | break; | ||
175 | |||
176 | lockdep_set_class_and_name(&eb->lock, | ||
177 | &ks->keys[level], ks->names[level]); | ||
178 | } | ||
179 | |||
135 | #endif | 180 | #endif |
136 | 181 | ||
137 | /* | 182 | /* |
@@ -217,7 +262,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | |||
217 | unsigned long len; | 262 | unsigned long len; |
218 | unsigned long cur_len; | 263 | unsigned long cur_len; |
219 | unsigned long offset = BTRFS_CSUM_SIZE; | 264 | unsigned long offset = BTRFS_CSUM_SIZE; |
220 | char *map_token = NULL; | ||
221 | char *kaddr; | 265 | char *kaddr; |
222 | unsigned long map_start; | 266 | unsigned long map_start; |
223 | unsigned long map_len; | 267 | unsigned long map_len; |
@@ -228,8 +272,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | |||
228 | len = buf->len - offset; | 272 | len = buf->len - offset; |
229 | while (len > 0) { | 273 | while (len > 0) { |
230 | err = map_private_extent_buffer(buf, offset, 32, | 274 | err = map_private_extent_buffer(buf, offset, 32, |
231 | &map_token, &kaddr, | 275 | &kaddr, &map_start, &map_len); |
232 | &map_start, &map_len, KM_USER0); | ||
233 | if (err) | 276 | if (err) |
234 | return 1; | 277 | return 1; |
235 | cur_len = min(len, map_len - (offset - map_start)); | 278 | cur_len = min(len, map_len - (offset - map_start)); |
@@ -237,7 +280,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | |||
237 | crc, cur_len); | 280 | crc, cur_len); |
238 | len -= cur_len; | 281 | len -= cur_len; |
239 | offset += cur_len; | 282 | offset += cur_len; |
240 | unmap_extent_buffer(buf, map_token, KM_USER0); | ||
241 | } | 283 | } |
242 | if (csum_size > sizeof(inline_result)) { | 284 | if (csum_size > sizeof(inline_result)) { |
243 | result = kzalloc(csum_size * sizeof(char), GFP_NOFS); | 285 | result = kzalloc(csum_size * sizeof(char), GFP_NOFS); |
@@ -494,15 +536,6 @@ static noinline int check_leaf(struct btrfs_root *root, | |||
494 | return 0; | 536 | return 0; |
495 | } | 537 | } |
496 | 538 | ||
497 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
498 | void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) | ||
499 | { | ||
500 | lockdep_set_class_and_name(&eb->lock, | ||
501 | &btrfs_eb_class[level], | ||
502 | btrfs_eb_name[level]); | ||
503 | } | ||
504 | #endif | ||
505 | |||
506 | static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | 539 | static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, |
507 | struct extent_state *state) | 540 | struct extent_state *state) |
508 | { | 541 | { |
@@ -553,7 +586,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
553 | } | 586 | } |
554 | found_level = btrfs_header_level(eb); | 587 | found_level = btrfs_header_level(eb); |
555 | 588 | ||
556 | btrfs_set_buffer_lockdep_class(eb, found_level); | 589 | btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), |
590 | eb, found_level); | ||
557 | 591 | ||
558 | ret = csum_tree_block(root, eb, 1); | 592 | ret = csum_tree_block(root, eb, 1); |
559 | if (ret) { | 593 | if (ret) { |
@@ -1598,7 +1632,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1598 | goto fail_bdi; | 1632 | goto fail_bdi; |
1599 | } | 1633 | } |
1600 | 1634 | ||
1601 | fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS; | 1635 | mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); |
1602 | 1636 | ||
1603 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); | 1637 | INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); |
1604 | INIT_LIST_HEAD(&fs_info->trans_list); | 1638 | INIT_LIST_HEAD(&fs_info->trans_list); |
@@ -1802,6 +1836,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1802 | fs_info->thread_pool_size), | 1836 | fs_info->thread_pool_size), |
1803 | &fs_info->generic_worker); | 1837 | &fs_info->generic_worker); |
1804 | 1838 | ||
1839 | btrfs_init_workers(&fs_info->caching_workers, "cache", | ||
1840 | 2, &fs_info->generic_worker); | ||
1841 | |||
1805 | /* a higher idle thresh on the submit workers makes it much more | 1842 | /* a higher idle thresh on the submit workers makes it much more |
1806 | * likely that bios will be send down in a sane order to the | 1843 | * likely that bios will be send down in a sane order to the |
1807 | * devices | 1844 | * devices |
@@ -1855,6 +1892,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1855 | btrfs_start_workers(&fs_info->endio_write_workers, 1); | 1892 | btrfs_start_workers(&fs_info->endio_write_workers, 1); |
1856 | btrfs_start_workers(&fs_info->endio_freespace_worker, 1); | 1893 | btrfs_start_workers(&fs_info->endio_freespace_worker, 1); |
1857 | btrfs_start_workers(&fs_info->delayed_workers, 1); | 1894 | btrfs_start_workers(&fs_info->delayed_workers, 1); |
1895 | btrfs_start_workers(&fs_info->caching_workers, 1); | ||
1858 | 1896 | ||
1859 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); | 1897 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); |
1860 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, | 1898 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, |
@@ -2112,6 +2150,7 @@ fail_sb_buffer: | |||
2112 | btrfs_stop_workers(&fs_info->endio_freespace_worker); | 2150 | btrfs_stop_workers(&fs_info->endio_freespace_worker); |
2113 | btrfs_stop_workers(&fs_info->submit_workers); | 2151 | btrfs_stop_workers(&fs_info->submit_workers); |
2114 | btrfs_stop_workers(&fs_info->delayed_workers); | 2152 | btrfs_stop_workers(&fs_info->delayed_workers); |
2153 | btrfs_stop_workers(&fs_info->caching_workers); | ||
2115 | fail_alloc: | 2154 | fail_alloc: |
2116 | kfree(fs_info->delayed_root); | 2155 | kfree(fs_info->delayed_root); |
2117 | fail_iput: | 2156 | fail_iput: |
@@ -2577,6 +2616,7 @@ int close_ctree(struct btrfs_root *root) | |||
2577 | btrfs_stop_workers(&fs_info->endio_freespace_worker); | 2616 | btrfs_stop_workers(&fs_info->endio_freespace_worker); |
2578 | btrfs_stop_workers(&fs_info->submit_workers); | 2617 | btrfs_stop_workers(&fs_info->submit_workers); |
2579 | btrfs_stop_workers(&fs_info->delayed_workers); | 2618 | btrfs_stop_workers(&fs_info->delayed_workers); |
2619 | btrfs_stop_workers(&fs_info->caching_workers); | ||
2580 | 2620 | ||
2581 | btrfs_close_devices(fs_info->fs_devices); | 2621 | btrfs_close_devices(fs_info->fs_devices); |
2582 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2622 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index a0b610a67aa..bec3ea4bd67 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -87,10 +87,14 @@ int btree_lock_page_hook(struct page *page); | |||
87 | 87 | ||
88 | 88 | ||
89 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 89 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
90 | void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level); | 90 | void btrfs_init_lockdep(void); |
91 | void btrfs_set_buffer_lockdep_class(u64 objectid, | ||
92 | struct extent_buffer *eb, int level); | ||
91 | #else | 93 | #else |
92 | static inline void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, | 94 | static inline void btrfs_init_lockdep(void) |
93 | int level) | 95 | { } |
96 | static inline void btrfs_set_buffer_lockdep_class(u64 objectid, | ||
97 | struct extent_buffer *eb, int level) | ||
94 | { | 98 | { |
95 | } | 99 | } |
96 | #endif | 100 | #endif |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 71cd456fdb6..4d08ed79405 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -320,12 +320,12 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, | |||
320 | return total_added; | 320 | return total_added; |
321 | } | 321 | } |
322 | 322 | ||
323 | static int caching_kthread(void *data) | 323 | static noinline void caching_thread(struct btrfs_work *work) |
324 | { | 324 | { |
325 | struct btrfs_block_group_cache *block_group = data; | 325 | struct btrfs_block_group_cache *block_group; |
326 | struct btrfs_fs_info *fs_info = block_group->fs_info; | 326 | struct btrfs_fs_info *fs_info; |
327 | struct btrfs_caching_control *caching_ctl = block_group->caching_ctl; | 327 | struct btrfs_caching_control *caching_ctl; |
328 | struct btrfs_root *extent_root = fs_info->extent_root; | 328 | struct btrfs_root *extent_root; |
329 | struct btrfs_path *path; | 329 | struct btrfs_path *path; |
330 | struct extent_buffer *leaf; | 330 | struct extent_buffer *leaf; |
331 | struct btrfs_key key; | 331 | struct btrfs_key key; |
@@ -334,9 +334,14 @@ static int caching_kthread(void *data) | |||
334 | u32 nritems; | 334 | u32 nritems; |
335 | int ret = 0; | 335 | int ret = 0; |
336 | 336 | ||
337 | caching_ctl = container_of(work, struct btrfs_caching_control, work); | ||
338 | block_group = caching_ctl->block_group; | ||
339 | fs_info = block_group->fs_info; | ||
340 | extent_root = fs_info->extent_root; | ||
341 | |||
337 | path = btrfs_alloc_path(); | 342 | path = btrfs_alloc_path(); |
338 | if (!path) | 343 | if (!path) |
339 | return -ENOMEM; | 344 | goto out; |
340 | 345 | ||
341 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | 346 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); |
342 | 347 | ||
@@ -433,13 +438,11 @@ err: | |||
433 | free_excluded_extents(extent_root, block_group); | 438 | free_excluded_extents(extent_root, block_group); |
434 | 439 | ||
435 | mutex_unlock(&caching_ctl->mutex); | 440 | mutex_unlock(&caching_ctl->mutex); |
441 | out: | ||
436 | wake_up(&caching_ctl->wait); | 442 | wake_up(&caching_ctl->wait); |
437 | 443 | ||
438 | put_caching_control(caching_ctl); | 444 | put_caching_control(caching_ctl); |
439 | atomic_dec(&block_group->space_info->caching_threads); | ||
440 | btrfs_put_block_group(block_group); | 445 | btrfs_put_block_group(block_group); |
441 | |||
442 | return 0; | ||
443 | } | 446 | } |
444 | 447 | ||
445 | static int cache_block_group(struct btrfs_block_group_cache *cache, | 448 | static int cache_block_group(struct btrfs_block_group_cache *cache, |
@@ -449,7 +452,6 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
449 | { | 452 | { |
450 | struct btrfs_fs_info *fs_info = cache->fs_info; | 453 | struct btrfs_fs_info *fs_info = cache->fs_info; |
451 | struct btrfs_caching_control *caching_ctl; | 454 | struct btrfs_caching_control *caching_ctl; |
452 | struct task_struct *tsk; | ||
453 | int ret = 0; | 455 | int ret = 0; |
454 | 456 | ||
455 | smp_mb(); | 457 | smp_mb(); |
@@ -501,6 +503,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
501 | caching_ctl->progress = cache->key.objectid; | 503 | caching_ctl->progress = cache->key.objectid; |
502 | /* one for caching kthread, one for caching block group list */ | 504 | /* one for caching kthread, one for caching block group list */ |
503 | atomic_set(&caching_ctl->count, 2); | 505 | atomic_set(&caching_ctl->count, 2); |
506 | caching_ctl->work.func = caching_thread; | ||
504 | 507 | ||
505 | spin_lock(&cache->lock); | 508 | spin_lock(&cache->lock); |
506 | if (cache->cached != BTRFS_CACHE_NO) { | 509 | if (cache->cached != BTRFS_CACHE_NO) { |
@@ -516,16 +519,9 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
516 | list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); | 519 | list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); |
517 | up_write(&fs_info->extent_commit_sem); | 520 | up_write(&fs_info->extent_commit_sem); |
518 | 521 | ||
519 | atomic_inc(&cache->space_info->caching_threads); | ||
520 | btrfs_get_block_group(cache); | 522 | btrfs_get_block_group(cache); |
521 | 523 | ||
522 | tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", | 524 | btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work); |
523 | cache->key.objectid); | ||
524 | if (IS_ERR(tsk)) { | ||
525 | ret = PTR_ERR(tsk); | ||
526 | printk(KERN_ERR "error running thread %d\n", ret); | ||
527 | BUG(); | ||
528 | } | ||
529 | 525 | ||
530 | return ret; | 526 | return ret; |
531 | } | 527 | } |
@@ -2932,9 +2928,10 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
2932 | found->full = 0; | 2928 | found->full = 0; |
2933 | found->force_alloc = CHUNK_ALLOC_NO_FORCE; | 2929 | found->force_alloc = CHUNK_ALLOC_NO_FORCE; |
2934 | found->chunk_alloc = 0; | 2930 | found->chunk_alloc = 0; |
2931 | found->flush = 0; | ||
2932 | init_waitqueue_head(&found->wait); | ||
2935 | *space_info = found; | 2933 | *space_info = found; |
2936 | list_add_rcu(&found->list, &info->space_info); | 2934 | list_add_rcu(&found->list, &info->space_info); |
2937 | atomic_set(&found->caching_threads, 0); | ||
2938 | return 0; | 2935 | return 0; |
2939 | } | 2936 | } |
2940 | 2937 | ||
@@ -3314,6 +3311,14 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
3314 | if (reserved == 0) | 3311 | if (reserved == 0) |
3315 | return 0; | 3312 | return 0; |
3316 | 3313 | ||
3314 | smp_mb(); | ||
3315 | if (root->fs_info->delalloc_bytes == 0) { | ||
3316 | if (trans) | ||
3317 | return 0; | ||
3318 | btrfs_wait_ordered_extents(root, 0, 0); | ||
3319 | return 0; | ||
3320 | } | ||
3321 | |||
3317 | max_reclaim = min(reserved, to_reclaim); | 3322 | max_reclaim = min(reserved, to_reclaim); |
3318 | 3323 | ||
3319 | while (loops < 1024) { | 3324 | while (loops < 1024) { |
@@ -3356,6 +3361,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, | |||
3356 | } | 3361 | } |
3357 | 3362 | ||
3358 | } | 3363 | } |
3364 | if (reclaimed >= to_reclaim && !trans) | ||
3365 | btrfs_wait_ordered_extents(root, 0, 0); | ||
3359 | return reclaimed >= to_reclaim; | 3366 | return reclaimed >= to_reclaim; |
3360 | } | 3367 | } |
3361 | 3368 | ||
@@ -3380,15 +3387,36 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, | |||
3380 | u64 num_bytes = orig_bytes; | 3387 | u64 num_bytes = orig_bytes; |
3381 | int retries = 0; | 3388 | int retries = 0; |
3382 | int ret = 0; | 3389 | int ret = 0; |
3383 | bool reserved = false; | ||
3384 | bool committed = false; | 3390 | bool committed = false; |
3391 | bool flushing = false; | ||
3385 | 3392 | ||
3386 | again: | 3393 | again: |
3387 | ret = -ENOSPC; | 3394 | ret = 0; |
3388 | if (reserved) | ||
3389 | num_bytes = 0; | ||
3390 | |||
3391 | spin_lock(&space_info->lock); | 3395 | spin_lock(&space_info->lock); |
3396 | /* | ||
3397 | * We only want to wait if somebody other than us is flushing and we are | ||
3398 | * actually alloed to flush. | ||
3399 | */ | ||
3400 | while (flush && !flushing && space_info->flush) { | ||
3401 | spin_unlock(&space_info->lock); | ||
3402 | /* | ||
3403 | * If we have a trans handle we can't wait because the flusher | ||
3404 | * may have to commit the transaction, which would mean we would | ||
3405 | * deadlock since we are waiting for the flusher to finish, but | ||
3406 | * hold the current transaction open. | ||
3407 | */ | ||
3408 | if (trans) | ||
3409 | return -EAGAIN; | ||
3410 | ret = wait_event_interruptible(space_info->wait, | ||
3411 | !space_info->flush); | ||
3412 | /* Must have been interrupted, return */ | ||
3413 | if (ret) | ||
3414 | return -EINTR; | ||
3415 | |||
3416 | spin_lock(&space_info->lock); | ||
3417 | } | ||
3418 | |||
3419 | ret = -ENOSPC; | ||
3392 | unused = space_info->bytes_used + space_info->bytes_reserved + | 3420 | unused = space_info->bytes_used + space_info->bytes_reserved + |
3393 | space_info->bytes_pinned + space_info->bytes_readonly + | 3421 | space_info->bytes_pinned + space_info->bytes_readonly + |
3394 | space_info->bytes_may_use; | 3422 | space_info->bytes_may_use; |
@@ -3403,8 +3431,7 @@ again: | |||
3403 | if (unused <= space_info->total_bytes) { | 3431 | if (unused <= space_info->total_bytes) { |
3404 | unused = space_info->total_bytes - unused; | 3432 | unused = space_info->total_bytes - unused; |
3405 | if (unused >= num_bytes) { | 3433 | if (unused >= num_bytes) { |
3406 | if (!reserved) | 3434 | space_info->bytes_reserved += orig_bytes; |
3407 | space_info->bytes_reserved += orig_bytes; | ||
3408 | ret = 0; | 3435 | ret = 0; |
3409 | } else { | 3436 | } else { |
3410 | /* | 3437 | /* |
@@ -3429,17 +3456,14 @@ again: | |||
3429 | * to reclaim space we can actually use it instead of somebody else | 3456 | * to reclaim space we can actually use it instead of somebody else |
3430 | * stealing it from us. | 3457 | * stealing it from us. |
3431 | */ | 3458 | */ |
3432 | if (ret && !reserved) { | 3459 | if (ret && flush) { |
3433 | space_info->bytes_reserved += orig_bytes; | 3460 | flushing = true; |
3434 | reserved = true; | 3461 | space_info->flush = 1; |
3435 | } | 3462 | } |
3436 | 3463 | ||
3437 | spin_unlock(&space_info->lock); | 3464 | spin_unlock(&space_info->lock); |
3438 | 3465 | ||
3439 | if (!ret) | 3466 | if (!ret || !flush) |
3440 | return 0; | ||
3441 | |||
3442 | if (!flush) | ||
3443 | goto out; | 3467 | goto out; |
3444 | 3468 | ||
3445 | /* | 3469 | /* |
@@ -3447,11 +3471,11 @@ again: | |||
3447 | * metadata until after the IO is completed. | 3471 | * metadata until after the IO is completed. |
3448 | */ | 3472 | */ |
3449 | ret = shrink_delalloc(trans, root, num_bytes, 1); | 3473 | ret = shrink_delalloc(trans, root, num_bytes, 1); |
3450 | if (ret > 0) | 3474 | if (ret < 0) |
3451 | return 0; | ||
3452 | else if (ret < 0) | ||
3453 | goto out; | 3475 | goto out; |
3454 | 3476 | ||
3477 | ret = 0; | ||
3478 | |||
3455 | /* | 3479 | /* |
3456 | * So if we were overcommitted it's possible that somebody else flushed | 3480 | * So if we were overcommitted it's possible that somebody else flushed |
3457 | * out enough space and we simply didn't have enough space to reclaim, | 3481 | * out enough space and we simply didn't have enough space to reclaim, |
@@ -3462,11 +3486,11 @@ again: | |||
3462 | goto again; | 3486 | goto again; |
3463 | } | 3487 | } |
3464 | 3488 | ||
3465 | spin_lock(&space_info->lock); | ||
3466 | /* | 3489 | /* |
3467 | * Not enough space to be reclaimed, don't bother committing the | 3490 | * Not enough space to be reclaimed, don't bother committing the |
3468 | * transaction. | 3491 | * transaction. |
3469 | */ | 3492 | */ |
3493 | spin_lock(&space_info->lock); | ||
3470 | if (space_info->bytes_pinned < orig_bytes) | 3494 | if (space_info->bytes_pinned < orig_bytes) |
3471 | ret = -ENOSPC; | 3495 | ret = -ENOSPC; |
3472 | spin_unlock(&space_info->lock); | 3496 | spin_unlock(&space_info->lock); |
@@ -3474,10 +3498,13 @@ again: | |||
3474 | goto out; | 3498 | goto out; |
3475 | 3499 | ||
3476 | ret = -EAGAIN; | 3500 | ret = -EAGAIN; |
3477 | if (trans || committed) | 3501 | if (trans) |
3478 | goto out; | 3502 | goto out; |
3479 | 3503 | ||
3480 | ret = -ENOSPC; | 3504 | ret = -ENOSPC; |
3505 | if (committed) | ||
3506 | goto out; | ||
3507 | |||
3481 | trans = btrfs_join_transaction(root); | 3508 | trans = btrfs_join_transaction(root); |
3482 | if (IS_ERR(trans)) | 3509 | if (IS_ERR(trans)) |
3483 | goto out; | 3510 | goto out; |
@@ -3489,12 +3516,12 @@ again: | |||
3489 | } | 3516 | } |
3490 | 3517 | ||
3491 | out: | 3518 | out: |
3492 | if (reserved) { | 3519 | if (flushing) { |
3493 | spin_lock(&space_info->lock); | 3520 | spin_lock(&space_info->lock); |
3494 | space_info->bytes_reserved -= orig_bytes; | 3521 | space_info->flush = 0; |
3522 | wake_up_all(&space_info->wait); | ||
3495 | spin_unlock(&space_info->lock); | 3523 | spin_unlock(&space_info->lock); |
3496 | } | 3524 | } |
3497 | |||
3498 | return ret; | 3525 | return ret; |
3499 | } | 3526 | } |
3500 | 3527 | ||
@@ -3704,7 +3731,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | |||
3704 | if (commit_trans) { | 3731 | if (commit_trans) { |
3705 | if (trans) | 3732 | if (trans) |
3706 | return -EAGAIN; | 3733 | return -EAGAIN; |
3707 | |||
3708 | trans = btrfs_join_transaction(root); | 3734 | trans = btrfs_join_transaction(root); |
3709 | BUG_ON(IS_ERR(trans)); | 3735 | BUG_ON(IS_ERR(trans)); |
3710 | ret = btrfs_commit_transaction(trans, root); | 3736 | ret = btrfs_commit_transaction(trans, root); |
@@ -3874,26 +3900,6 @@ int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, | |||
3874 | return 0; | 3900 | return 0; |
3875 | } | 3901 | } |
3876 | 3902 | ||
3877 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, | ||
3878 | struct btrfs_root *root, | ||
3879 | int num_items) | ||
3880 | { | ||
3881 | u64 num_bytes; | ||
3882 | int ret; | ||
3883 | |||
3884 | if (num_items == 0 || root->fs_info->chunk_root == root) | ||
3885 | return 0; | ||
3886 | |||
3887 | num_bytes = btrfs_calc_trans_metadata_size(root, num_items); | ||
3888 | ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, | ||
3889 | num_bytes); | ||
3890 | if (!ret) { | ||
3891 | trans->bytes_reserved += num_bytes; | ||
3892 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
3893 | } | ||
3894 | return ret; | ||
3895 | } | ||
3896 | |||
3897 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | 3903 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, |
3898 | struct btrfs_root *root) | 3904 | struct btrfs_root *root) |
3899 | { | 3905 | { |
@@ -3944,6 +3950,30 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, | |||
3944 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | 3950 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); |
3945 | } | 3951 | } |
3946 | 3952 | ||
3953 | static unsigned drop_outstanding_extent(struct inode *inode) | ||
3954 | { | ||
3955 | unsigned dropped_extents = 0; | ||
3956 | |||
3957 | spin_lock(&BTRFS_I(inode)->lock); | ||
3958 | BUG_ON(!BTRFS_I(inode)->outstanding_extents); | ||
3959 | BTRFS_I(inode)->outstanding_extents--; | ||
3960 | |||
3961 | /* | ||
3962 | * If we have more or the same amount of outsanding extents than we have | ||
3963 | * reserved then we need to leave the reserved extents count alone. | ||
3964 | */ | ||
3965 | if (BTRFS_I(inode)->outstanding_extents >= | ||
3966 | BTRFS_I(inode)->reserved_extents) | ||
3967 | goto out; | ||
3968 | |||
3969 | dropped_extents = BTRFS_I(inode)->reserved_extents - | ||
3970 | BTRFS_I(inode)->outstanding_extents; | ||
3971 | BTRFS_I(inode)->reserved_extents -= dropped_extents; | ||
3972 | out: | ||
3973 | spin_unlock(&BTRFS_I(inode)->lock); | ||
3974 | return dropped_extents; | ||
3975 | } | ||
3976 | |||
3947 | static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) | 3977 | static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) |
3948 | { | 3978 | { |
3949 | return num_bytes >>= 3; | 3979 | return num_bytes >>= 3; |
@@ -3953,9 +3983,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
3953 | { | 3983 | { |
3954 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3984 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3955 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; | 3985 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; |
3956 | u64 to_reserve; | 3986 | u64 to_reserve = 0; |
3957 | int nr_extents; | 3987 | unsigned nr_extents = 0; |
3958 | int reserved_extents; | ||
3959 | int ret; | 3988 | int ret; |
3960 | 3989 | ||
3961 | if (btrfs_transaction_in_commit(root->fs_info)) | 3990 | if (btrfs_transaction_in_commit(root->fs_info)) |
@@ -3963,66 +3992,49 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | |||
3963 | 3992 | ||
3964 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 3993 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
3965 | 3994 | ||
3966 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; | 3995 | spin_lock(&BTRFS_I(inode)->lock); |
3967 | reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); | 3996 | BTRFS_I(inode)->outstanding_extents++; |
3997 | |||
3998 | if (BTRFS_I(inode)->outstanding_extents > | ||
3999 | BTRFS_I(inode)->reserved_extents) { | ||
4000 | nr_extents = BTRFS_I(inode)->outstanding_extents - | ||
4001 | BTRFS_I(inode)->reserved_extents; | ||
4002 | BTRFS_I(inode)->reserved_extents += nr_extents; | ||
3968 | 4003 | ||
3969 | if (nr_extents > reserved_extents) { | ||
3970 | nr_extents -= reserved_extents; | ||
3971 | to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); | 4004 | to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); |
3972 | } else { | ||
3973 | nr_extents = 0; | ||
3974 | to_reserve = 0; | ||
3975 | } | 4005 | } |
4006 | spin_unlock(&BTRFS_I(inode)->lock); | ||
3976 | 4007 | ||
3977 | to_reserve += calc_csum_metadata_size(inode, num_bytes); | 4008 | to_reserve += calc_csum_metadata_size(inode, num_bytes); |
3978 | ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); | 4009 | ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); |
3979 | if (ret) | 4010 | if (ret) { |
4011 | unsigned dropped; | ||
4012 | /* | ||
4013 | * We don't need the return value since our reservation failed, | ||
4014 | * we just need to clean up our counter. | ||
4015 | */ | ||
4016 | dropped = drop_outstanding_extent(inode); | ||
4017 | WARN_ON(dropped > 1); | ||
3980 | return ret; | 4018 | return ret; |
3981 | 4019 | } | |
3982 | atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents); | ||
3983 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | ||
3984 | 4020 | ||
3985 | block_rsv_add_bytes(block_rsv, to_reserve, 1); | 4021 | block_rsv_add_bytes(block_rsv, to_reserve, 1); |
3986 | 4022 | ||
3987 | if (block_rsv->size > 512 * 1024 * 1024) | ||
3988 | shrink_delalloc(NULL, root, to_reserve, 0); | ||
3989 | |||
3990 | return 0; | 4023 | return 0; |
3991 | } | 4024 | } |
3992 | 4025 | ||
3993 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | 4026 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) |
3994 | { | 4027 | { |
3995 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4028 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3996 | u64 to_free; | 4029 | u64 to_free = 0; |
3997 | int nr_extents; | 4030 | unsigned dropped; |
3998 | int reserved_extents; | ||
3999 | 4031 | ||
4000 | num_bytes = ALIGN(num_bytes, root->sectorsize); | 4032 | num_bytes = ALIGN(num_bytes, root->sectorsize); |
4001 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | 4033 | dropped = drop_outstanding_extent(inode); |
4002 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0); | ||
4003 | |||
4004 | reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); | ||
4005 | do { | ||
4006 | int old, new; | ||
4007 | |||
4008 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); | ||
4009 | if (nr_extents >= reserved_extents) { | ||
4010 | nr_extents = 0; | ||
4011 | break; | ||
4012 | } | ||
4013 | old = reserved_extents; | ||
4014 | nr_extents = reserved_extents - nr_extents; | ||
4015 | new = reserved_extents - nr_extents; | ||
4016 | old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents, | ||
4017 | reserved_extents, new); | ||
4018 | if (likely(old == reserved_extents)) | ||
4019 | break; | ||
4020 | reserved_extents = old; | ||
4021 | } while (1); | ||
4022 | 4034 | ||
4023 | to_free = calc_csum_metadata_size(inode, num_bytes); | 4035 | to_free = calc_csum_metadata_size(inode, num_bytes); |
4024 | if (nr_extents > 0) | 4036 | if (dropped > 0) |
4025 | to_free += btrfs_calc_trans_metadata_size(root, nr_extents); | 4037 | to_free += btrfs_calc_trans_metadata_size(root, dropped); |
4026 | 4038 | ||
4027 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, | 4039 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, |
4028 | to_free); | 4040 | to_free); |
@@ -4990,14 +5002,10 @@ have_block_group: | |||
4990 | } | 5002 | } |
4991 | 5003 | ||
4992 | /* | 5004 | /* |
4993 | * We only want to start kthread caching if we are at | 5005 | * The caching workers are limited to 2 threads, so we |
4994 | * the point where we will wait for caching to make | 5006 | * can queue as much work as we care to. |
4995 | * progress, or if our ideal search is over and we've | ||
4996 | * found somebody to start caching. | ||
4997 | */ | 5007 | */ |
4998 | if (loop > LOOP_CACHING_NOWAIT || | 5008 | if (loop > LOOP_FIND_IDEAL) { |
4999 | (loop > LOOP_FIND_IDEAL && | ||
5000 | atomic_read(&space_info->caching_threads) < 2)) { | ||
5001 | ret = cache_block_group(block_group, trans, | 5009 | ret = cache_block_group(block_group, trans, |
5002 | orig_root, 0); | 5010 | orig_root, 0); |
5003 | BUG_ON(ret); | 5011 | BUG_ON(ret); |
@@ -5219,8 +5227,7 @@ loop: | |||
5219 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { | 5227 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { |
5220 | found_uncached_bg = false; | 5228 | found_uncached_bg = false; |
5221 | loop++; | 5229 | loop++; |
5222 | if (!ideal_cache_percent && | 5230 | if (!ideal_cache_percent) |
5223 | atomic_read(&space_info->caching_threads)) | ||
5224 | goto search; | 5231 | goto search; |
5225 | 5232 | ||
5226 | /* | 5233 | /* |
@@ -5623,7 +5630,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | |||
5623 | if (!buf) | 5630 | if (!buf) |
5624 | return ERR_PTR(-ENOMEM); | 5631 | return ERR_PTR(-ENOMEM); |
5625 | btrfs_set_header_generation(buf, trans->transid); | 5632 | btrfs_set_header_generation(buf, trans->transid); |
5626 | btrfs_set_buffer_lockdep_class(buf, level); | 5633 | btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); |
5627 | btrfs_tree_lock(buf); | 5634 | btrfs_tree_lock(buf); |
5628 | clean_tree_block(trans, root, buf); | 5635 | clean_tree_block(trans, root, buf); |
5629 | 5636 | ||
@@ -5910,7 +5917,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
5910 | return 1; | 5917 | return 1; |
5911 | 5918 | ||
5912 | if (path->locks[level] && !wc->keep_locks) { | 5919 | if (path->locks[level] && !wc->keep_locks) { |
5913 | btrfs_tree_unlock(eb); | 5920 | btrfs_tree_unlock_rw(eb, path->locks[level]); |
5914 | path->locks[level] = 0; | 5921 | path->locks[level] = 0; |
5915 | } | 5922 | } |
5916 | return 0; | 5923 | return 0; |
@@ -5934,7 +5941,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, | |||
5934 | * keep the tree lock | 5941 | * keep the tree lock |
5935 | */ | 5942 | */ |
5936 | if (path->locks[level] && level > 0) { | 5943 | if (path->locks[level] && level > 0) { |
5937 | btrfs_tree_unlock(eb); | 5944 | btrfs_tree_unlock_rw(eb, path->locks[level]); |
5938 | path->locks[level] = 0; | 5945 | path->locks[level] = 0; |
5939 | } | 5946 | } |
5940 | return 0; | 5947 | return 0; |
@@ -6047,7 +6054,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, | |||
6047 | BUG_ON(level != btrfs_header_level(next)); | 6054 | BUG_ON(level != btrfs_header_level(next)); |
6048 | path->nodes[level] = next; | 6055 | path->nodes[level] = next; |
6049 | path->slots[level] = 0; | 6056 | path->slots[level] = 0; |
6050 | path->locks[level] = 1; | 6057 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
6051 | wc->level = level; | 6058 | wc->level = level; |
6052 | if (wc->level == 1) | 6059 | if (wc->level == 1) |
6053 | wc->reada_slot = 0; | 6060 | wc->reada_slot = 0; |
@@ -6118,7 +6125,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
6118 | BUG_ON(level == 0); | 6125 | BUG_ON(level == 0); |
6119 | btrfs_tree_lock(eb); | 6126 | btrfs_tree_lock(eb); |
6120 | btrfs_set_lock_blocking(eb); | 6127 | btrfs_set_lock_blocking(eb); |
6121 | path->locks[level] = 1; | 6128 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
6122 | 6129 | ||
6123 | ret = btrfs_lookup_extent_info(trans, root, | 6130 | ret = btrfs_lookup_extent_info(trans, root, |
6124 | eb->start, eb->len, | 6131 | eb->start, eb->len, |
@@ -6127,8 +6134,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
6127 | BUG_ON(ret); | 6134 | BUG_ON(ret); |
6128 | BUG_ON(wc->refs[level] == 0); | 6135 | BUG_ON(wc->refs[level] == 0); |
6129 | if (wc->refs[level] == 1) { | 6136 | if (wc->refs[level] == 1) { |
6130 | btrfs_tree_unlock(eb); | 6137 | btrfs_tree_unlock_rw(eb, path->locks[level]); |
6131 | path->locks[level] = 0; | ||
6132 | return 1; | 6138 | return 1; |
6133 | } | 6139 | } |
6134 | } | 6140 | } |
@@ -6150,7 +6156,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
6150 | btrfs_header_generation(eb) == trans->transid) { | 6156 | btrfs_header_generation(eb) == trans->transid) { |
6151 | btrfs_tree_lock(eb); | 6157 | btrfs_tree_lock(eb); |
6152 | btrfs_set_lock_blocking(eb); | 6158 | btrfs_set_lock_blocking(eb); |
6153 | path->locks[level] = 1; | 6159 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
6154 | } | 6160 | } |
6155 | clean_tree_block(trans, root, eb); | 6161 | clean_tree_block(trans, root, eb); |
6156 | } | 6162 | } |
@@ -6229,7 +6235,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, | |||
6229 | return 0; | 6235 | return 0; |
6230 | 6236 | ||
6231 | if (path->locks[level]) { | 6237 | if (path->locks[level]) { |
6232 | btrfs_tree_unlock(path->nodes[level]); | 6238 | btrfs_tree_unlock_rw(path->nodes[level], |
6239 | path->locks[level]); | ||
6233 | path->locks[level] = 0; | 6240 | path->locks[level] = 0; |
6234 | } | 6241 | } |
6235 | free_extent_buffer(path->nodes[level]); | 6242 | free_extent_buffer(path->nodes[level]); |
@@ -6281,7 +6288,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, | |||
6281 | path->nodes[level] = btrfs_lock_root_node(root); | 6288 | path->nodes[level] = btrfs_lock_root_node(root); |
6282 | btrfs_set_lock_blocking(path->nodes[level]); | 6289 | btrfs_set_lock_blocking(path->nodes[level]); |
6283 | path->slots[level] = 0; | 6290 | path->slots[level] = 0; |
6284 | path->locks[level] = 1; | 6291 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
6285 | memset(&wc->update_progress, 0, | 6292 | memset(&wc->update_progress, 0, |
6286 | sizeof(wc->update_progress)); | 6293 | sizeof(wc->update_progress)); |
6287 | } else { | 6294 | } else { |
@@ -6449,7 +6456,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | |||
6449 | level = btrfs_header_level(node); | 6456 | level = btrfs_header_level(node); |
6450 | path->nodes[level] = node; | 6457 | path->nodes[level] = node; |
6451 | path->slots[level] = 0; | 6458 | path->slots[level] = 0; |
6452 | path->locks[level] = 1; | 6459 | path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; |
6453 | 6460 | ||
6454 | wc->refs[parent_level] = 1; | 6461 | wc->refs[parent_level] = 1; |
6455 | wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; | 6462 | wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; |
@@ -6524,15 +6531,28 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) | |||
6524 | return flags; | 6531 | return flags; |
6525 | } | 6532 | } |
6526 | 6533 | ||
6527 | static int set_block_group_ro(struct btrfs_block_group_cache *cache) | 6534 | static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force) |
6528 | { | 6535 | { |
6529 | struct btrfs_space_info *sinfo = cache->space_info; | 6536 | struct btrfs_space_info *sinfo = cache->space_info; |
6530 | u64 num_bytes; | 6537 | u64 num_bytes; |
6538 | u64 min_allocable_bytes; | ||
6531 | int ret = -ENOSPC; | 6539 | int ret = -ENOSPC; |
6532 | 6540 | ||
6533 | if (cache->ro) | 6541 | if (cache->ro) |
6534 | return 0; | 6542 | return 0; |
6535 | 6543 | ||
6544 | /* | ||
6545 | * We need some metadata space and system metadata space for | ||
6546 | * allocating chunks in some corner cases until we force to set | ||
6547 | * it to be readonly. | ||
6548 | */ | ||
6549 | if ((sinfo->flags & | ||
6550 | (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) && | ||
6551 | !force) | ||
6552 | min_allocable_bytes = 1 * 1024 * 1024; | ||
6553 | else | ||
6554 | min_allocable_bytes = 0; | ||
6555 | |||
6536 | spin_lock(&sinfo->lock); | 6556 | spin_lock(&sinfo->lock); |
6537 | spin_lock(&cache->lock); | 6557 | spin_lock(&cache->lock); |
6538 | num_bytes = cache->key.offset - cache->reserved - cache->pinned - | 6558 | num_bytes = cache->key.offset - cache->reserved - cache->pinned - |
@@ -6540,7 +6560,8 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache) | |||
6540 | 6560 | ||
6541 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + | 6561 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + |
6542 | sinfo->bytes_may_use + sinfo->bytes_readonly + | 6562 | sinfo->bytes_may_use + sinfo->bytes_readonly + |
6543 | cache->reserved_pinned + num_bytes <= sinfo->total_bytes) { | 6563 | cache->reserved_pinned + num_bytes + min_allocable_bytes <= |
6564 | sinfo->total_bytes) { | ||
6544 | sinfo->bytes_readonly += num_bytes; | 6565 | sinfo->bytes_readonly += num_bytes; |
6545 | sinfo->bytes_reserved += cache->reserved_pinned; | 6566 | sinfo->bytes_reserved += cache->reserved_pinned; |
6546 | cache->reserved_pinned = 0; | 6567 | cache->reserved_pinned = 0; |
@@ -6571,7 +6592,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
6571 | do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, | 6592 | do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, |
6572 | CHUNK_ALLOC_FORCE); | 6593 | CHUNK_ALLOC_FORCE); |
6573 | 6594 | ||
6574 | ret = set_block_group_ro(cache); | 6595 | ret = set_block_group_ro(cache, 0); |
6575 | if (!ret) | 6596 | if (!ret) |
6576 | goto out; | 6597 | goto out; |
6577 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); | 6598 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); |
@@ -6579,7 +6600,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, | |||
6579 | CHUNK_ALLOC_FORCE); | 6600 | CHUNK_ALLOC_FORCE); |
6580 | if (ret < 0) | 6601 | if (ret < 0) |
6581 | goto out; | 6602 | goto out; |
6582 | ret = set_block_group_ro(cache); | 6603 | ret = set_block_group_ro(cache, 0); |
6583 | out: | 6604 | out: |
6584 | btrfs_end_transaction(trans, root); | 6605 | btrfs_end_transaction(trans, root); |
6585 | return ret; | 6606 | return ret; |
@@ -7016,7 +7037,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7016 | 7037 | ||
7017 | set_avail_alloc_bits(root->fs_info, cache->flags); | 7038 | set_avail_alloc_bits(root->fs_info, cache->flags); |
7018 | if (btrfs_chunk_readonly(root, cache->key.objectid)) | 7039 | if (btrfs_chunk_readonly(root, cache->key.objectid)) |
7019 | set_block_group_ro(cache); | 7040 | set_block_group_ro(cache, 1); |
7020 | } | 7041 | } |
7021 | 7042 | ||
7022 | list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { | 7043 | list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { |
@@ -7030,9 +7051,9 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7030 | * mirrored block groups. | 7051 | * mirrored block groups. |
7031 | */ | 7052 | */ |
7032 | list_for_each_entry(cache, &space_info->block_groups[3], list) | 7053 | list_for_each_entry(cache, &space_info->block_groups[3], list) |
7033 | set_block_group_ro(cache); | 7054 | set_block_group_ro(cache, 1); |
7034 | list_for_each_entry(cache, &space_info->block_groups[4], list) | 7055 | list_for_each_entry(cache, &space_info->block_groups[4], list) |
7035 | set_block_group_ro(cache); | 7056 | set_block_group_ro(cache, 1); |
7036 | } | 7057 | } |
7037 | 7058 | ||
7038 | init_global_block_rsv(info); | 7059 | init_global_block_rsv(info); |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 561262d3568..067b1747421 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -281,11 +281,10 @@ static int merge_state(struct extent_io_tree *tree, | |||
281 | if (other->start == state->end + 1 && | 281 | if (other->start == state->end + 1 && |
282 | other->state == state->state) { | 282 | other->state == state->state) { |
283 | merge_cb(tree, state, other); | 283 | merge_cb(tree, state, other); |
284 | other->start = state->start; | 284 | state->end = other->end; |
285 | state->tree = NULL; | 285 | other->tree = NULL; |
286 | rb_erase(&state->rb_node, &tree->state); | 286 | rb_erase(&other->rb_node, &tree->state); |
287 | free_extent_state(state); | 287 | free_extent_state(other); |
288 | state = NULL; | ||
289 | } | 288 | } |
290 | } | 289 | } |
291 | 290 | ||
@@ -351,7 +350,6 @@ static int insert_state(struct extent_io_tree *tree, | |||
351 | "%llu %llu\n", (unsigned long long)found->start, | 350 | "%llu %llu\n", (unsigned long long)found->start, |
352 | (unsigned long long)found->end, | 351 | (unsigned long long)found->end, |
353 | (unsigned long long)start, (unsigned long long)end); | 352 | (unsigned long long)start, (unsigned long long)end); |
354 | free_extent_state(state); | ||
355 | return -EEXIST; | 353 | return -EEXIST; |
356 | } | 354 | } |
357 | state->tree = tree; | 355 | state->tree = tree; |
@@ -500,7 +498,8 @@ again: | |||
500 | cached_state = NULL; | 498 | cached_state = NULL; |
501 | } | 499 | } |
502 | 500 | ||
503 | if (cached && cached->tree && cached->start == start) { | 501 | if (cached && cached->tree && cached->start <= start && |
502 | cached->end > start) { | ||
504 | if (clear) | 503 | if (clear) |
505 | atomic_dec(&cached->refs); | 504 | atomic_dec(&cached->refs); |
506 | state = cached; | 505 | state = cached; |
@@ -742,7 +741,8 @@ again: | |||
742 | spin_lock(&tree->lock); | 741 | spin_lock(&tree->lock); |
743 | if (cached_state && *cached_state) { | 742 | if (cached_state && *cached_state) { |
744 | state = *cached_state; | 743 | state = *cached_state; |
745 | if (state->start == start && state->tree) { | 744 | if (state->start <= start && state->end > start && |
745 | state->tree) { | ||
746 | node = &state->rb_node; | 746 | node = &state->rb_node; |
747 | goto hit_next; | 747 | goto hit_next; |
748 | } | 748 | } |
@@ -783,13 +783,13 @@ hit_next: | |||
783 | if (err) | 783 | if (err) |
784 | goto out; | 784 | goto out; |
785 | 785 | ||
786 | next_node = rb_next(node); | ||
787 | cache_state(state, cached_state); | 786 | cache_state(state, cached_state); |
788 | merge_state(tree, state); | 787 | merge_state(tree, state); |
789 | if (last_end == (u64)-1) | 788 | if (last_end == (u64)-1) |
790 | goto out; | 789 | goto out; |
791 | 790 | ||
792 | start = last_end + 1; | 791 | start = last_end + 1; |
792 | next_node = rb_next(&state->rb_node); | ||
793 | if (next_node && start < end && prealloc && !need_resched()) { | 793 | if (next_node && start < end && prealloc && !need_resched()) { |
794 | state = rb_entry(next_node, struct extent_state, | 794 | state = rb_entry(next_node, struct extent_state, |
795 | rb_node); | 795 | rb_node); |
@@ -862,7 +862,6 @@ hit_next: | |||
862 | * Avoid to free 'prealloc' if it can be merged with | 862 | * Avoid to free 'prealloc' if it can be merged with |
863 | * the later extent. | 863 | * the later extent. |
864 | */ | 864 | */ |
865 | atomic_inc(&prealloc->refs); | ||
866 | err = insert_state(tree, prealloc, start, this_end, | 865 | err = insert_state(tree, prealloc, start, this_end, |
867 | &bits); | 866 | &bits); |
868 | BUG_ON(err == -EEXIST); | 867 | BUG_ON(err == -EEXIST); |
@@ -872,7 +871,6 @@ hit_next: | |||
872 | goto out; | 871 | goto out; |
873 | } | 872 | } |
874 | cache_state(prealloc, cached_state); | 873 | cache_state(prealloc, cached_state); |
875 | free_extent_state(prealloc); | ||
876 | prealloc = NULL; | 874 | prealloc = NULL; |
877 | start = this_end + 1; | 875 | start = this_end + 1; |
878 | goto search_again; | 876 | goto search_again; |
@@ -1564,7 +1562,8 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
1564 | int bitset = 0; | 1562 | int bitset = 0; |
1565 | 1563 | ||
1566 | spin_lock(&tree->lock); | 1564 | spin_lock(&tree->lock); |
1567 | if (cached && cached->tree && cached->start == start) | 1565 | if (cached && cached->tree && cached->start <= start && |
1566 | cached->end > start) | ||
1568 | node = &cached->rb_node; | 1567 | node = &cached->rb_node; |
1569 | else | 1568 | else |
1570 | node = tree_search(tree, start); | 1569 | node = tree_search(tree, start); |
@@ -2432,6 +2431,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
2432 | pgoff_t index; | 2431 | pgoff_t index; |
2433 | pgoff_t end; /* Inclusive */ | 2432 | pgoff_t end; /* Inclusive */ |
2434 | int scanned = 0; | 2433 | int scanned = 0; |
2434 | int tag; | ||
2435 | 2435 | ||
2436 | pagevec_init(&pvec, 0); | 2436 | pagevec_init(&pvec, 0); |
2437 | if (wbc->range_cyclic) { | 2437 | if (wbc->range_cyclic) { |
@@ -2442,11 +2442,16 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, | |||
2442 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2442 | end = wbc->range_end >> PAGE_CACHE_SHIFT; |
2443 | scanned = 1; | 2443 | scanned = 1; |
2444 | } | 2444 | } |
2445 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
2446 | tag = PAGECACHE_TAG_TOWRITE; | ||
2447 | else | ||
2448 | tag = PAGECACHE_TAG_DIRTY; | ||
2445 | retry: | 2449 | retry: |
2450 | if (wbc->sync_mode == WB_SYNC_ALL) | ||
2451 | tag_pages_for_writeback(mapping, index, end); | ||
2446 | while (!done && !nr_to_write_done && (index <= end) && | 2452 | while (!done && !nr_to_write_done && (index <= end) && |
2447 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 2453 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, |
2448 | PAGECACHE_TAG_DIRTY, min(end - index, | 2454 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { |
2449 | (pgoff_t)PAGEVEC_SIZE-1) + 1))) { | ||
2450 | unsigned i; | 2455 | unsigned i; |
2451 | 2456 | ||
2452 | scanned = 1; | 2457 | scanned = 1; |
@@ -3020,8 +3025,15 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, | |||
3020 | return NULL; | 3025 | return NULL; |
3021 | eb->start = start; | 3026 | eb->start = start; |
3022 | eb->len = len; | 3027 | eb->len = len; |
3023 | spin_lock_init(&eb->lock); | 3028 | rwlock_init(&eb->lock); |
3024 | init_waitqueue_head(&eb->lock_wq); | 3029 | atomic_set(&eb->write_locks, 0); |
3030 | atomic_set(&eb->read_locks, 0); | ||
3031 | atomic_set(&eb->blocking_readers, 0); | ||
3032 | atomic_set(&eb->blocking_writers, 0); | ||
3033 | atomic_set(&eb->spinning_readers, 0); | ||
3034 | atomic_set(&eb->spinning_writers, 0); | ||
3035 | init_waitqueue_head(&eb->write_lock_wq); | ||
3036 | init_waitqueue_head(&eb->read_lock_wq); | ||
3025 | 3037 | ||
3026 | #if LEAK_DEBUG | 3038 | #if LEAK_DEBUG |
3027 | spin_lock_irqsave(&leak_lock, flags); | 3039 | spin_lock_irqsave(&leak_lock, flags); |
@@ -3117,7 +3129,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, | |||
3117 | i = 0; | 3129 | i = 0; |
3118 | } | 3130 | } |
3119 | for (; i < num_pages; i++, index++) { | 3131 | for (; i < num_pages; i++, index++) { |
3120 | p = find_or_create_page(mapping, index, GFP_NOFS | __GFP_HIGHMEM); | 3132 | p = find_or_create_page(mapping, index, GFP_NOFS); |
3121 | if (!p) { | 3133 | if (!p) { |
3122 | WARN_ON(1); | 3134 | WARN_ON(1); |
3123 | goto free_eb; | 3135 | goto free_eb; |
@@ -3264,6 +3276,22 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, | |||
3264 | return was_dirty; | 3276 | return was_dirty; |
3265 | } | 3277 | } |
3266 | 3278 | ||
3279 | static int __eb_straddles_pages(u64 start, u64 len) | ||
3280 | { | ||
3281 | if (len < PAGE_CACHE_SIZE) | ||
3282 | return 1; | ||
3283 | if (start & (PAGE_CACHE_SIZE - 1)) | ||
3284 | return 1; | ||
3285 | if ((start + len) & (PAGE_CACHE_SIZE - 1)) | ||
3286 | return 1; | ||
3287 | return 0; | ||
3288 | } | ||
3289 | |||
3290 | static int eb_straddles_pages(struct extent_buffer *eb) | ||
3291 | { | ||
3292 | return __eb_straddles_pages(eb->start, eb->len); | ||
3293 | } | ||
3294 | |||
3267 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, | 3295 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, |
3268 | struct extent_buffer *eb, | 3296 | struct extent_buffer *eb, |
3269 | struct extent_state **cached_state) | 3297 | struct extent_state **cached_state) |
@@ -3275,8 +3303,10 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree, | |||
3275 | num_pages = num_extent_pages(eb->start, eb->len); | 3303 | num_pages = num_extent_pages(eb->start, eb->len); |
3276 | clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); | 3304 | clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); |
3277 | 3305 | ||
3278 | clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, | 3306 | if (eb_straddles_pages(eb)) { |
3279 | cached_state, GFP_NOFS); | 3307 | clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, |
3308 | cached_state, GFP_NOFS); | ||
3309 | } | ||
3280 | for (i = 0; i < num_pages; i++) { | 3310 | for (i = 0; i < num_pages; i++) { |
3281 | page = extent_buffer_page(eb, i); | 3311 | page = extent_buffer_page(eb, i); |
3282 | if (page) | 3312 | if (page) |
@@ -3294,8 +3324,10 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree, | |||
3294 | 3324 | ||
3295 | num_pages = num_extent_pages(eb->start, eb->len); | 3325 | num_pages = num_extent_pages(eb->start, eb->len); |
3296 | 3326 | ||
3297 | set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, | 3327 | if (eb_straddles_pages(eb)) { |
3298 | NULL, GFP_NOFS); | 3328 | set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, |
3329 | NULL, GFP_NOFS); | ||
3330 | } | ||
3299 | for (i = 0; i < num_pages; i++) { | 3331 | for (i = 0; i < num_pages; i++) { |
3300 | page = extent_buffer_page(eb, i); | 3332 | page = extent_buffer_page(eb, i); |
3301 | if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || | 3333 | if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || |
@@ -3318,9 +3350,12 @@ int extent_range_uptodate(struct extent_io_tree *tree, | |||
3318 | int uptodate; | 3350 | int uptodate; |
3319 | unsigned long index; | 3351 | unsigned long index; |
3320 | 3352 | ||
3321 | ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL); | 3353 | if (__eb_straddles_pages(start, end - start + 1)) { |
3322 | if (ret) | 3354 | ret = test_range_bit(tree, start, end, |
3323 | return 1; | 3355 | EXTENT_UPTODATE, 1, NULL); |
3356 | if (ret) | ||
3357 | return 1; | ||
3358 | } | ||
3324 | while (start <= end) { | 3359 | while (start <= end) { |
3325 | index = start >> PAGE_CACHE_SHIFT; | 3360 | index = start >> PAGE_CACHE_SHIFT; |
3326 | page = find_get_page(tree->mapping, index); | 3361 | page = find_get_page(tree->mapping, index); |
@@ -3348,10 +3383,12 @@ int extent_buffer_uptodate(struct extent_io_tree *tree, | |||
3348 | if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) | 3383 | if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) |
3349 | return 1; | 3384 | return 1; |
3350 | 3385 | ||
3351 | ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, | 3386 | if (eb_straddles_pages(eb)) { |
3352 | EXTENT_UPTODATE, 1, cached_state); | 3387 | ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, |
3353 | if (ret) | 3388 | EXTENT_UPTODATE, 1, cached_state); |
3354 | return ret; | 3389 | if (ret) |
3390 | return ret; | ||
3391 | } | ||
3355 | 3392 | ||
3356 | num_pages = num_extent_pages(eb->start, eb->len); | 3393 | num_pages = num_extent_pages(eb->start, eb->len); |
3357 | for (i = 0; i < num_pages; i++) { | 3394 | for (i = 0; i < num_pages; i++) { |
@@ -3384,9 +3421,11 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, | |||
3384 | if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) | 3421 | if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) |
3385 | return 0; | 3422 | return 0; |
3386 | 3423 | ||
3387 | if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, | 3424 | if (eb_straddles_pages(eb)) { |
3388 | EXTENT_UPTODATE, 1, NULL)) { | 3425 | if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, |
3389 | return 0; | 3426 | EXTENT_UPTODATE, 1, NULL)) { |
3427 | return 0; | ||
3428 | } | ||
3390 | } | 3429 | } |
3391 | 3430 | ||
3392 | if (start) { | 3431 | if (start) { |
@@ -3490,9 +3529,8 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, | |||
3490 | page = extent_buffer_page(eb, i); | 3529 | page = extent_buffer_page(eb, i); |
3491 | 3530 | ||
3492 | cur = min(len, (PAGE_CACHE_SIZE - offset)); | 3531 | cur = min(len, (PAGE_CACHE_SIZE - offset)); |
3493 | kaddr = kmap_atomic(page, KM_USER1); | 3532 | kaddr = page_address(page); |
3494 | memcpy(dst, kaddr + offset, cur); | 3533 | memcpy(dst, kaddr + offset, cur); |
3495 | kunmap_atomic(kaddr, KM_USER1); | ||
3496 | 3534 | ||
3497 | dst += cur; | 3535 | dst += cur; |
3498 | len -= cur; | 3536 | len -= cur; |
@@ -3502,9 +3540,9 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, | |||
3502 | } | 3540 | } |
3503 | 3541 | ||
3504 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, | 3542 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, |
3505 | unsigned long min_len, char **token, char **map, | 3543 | unsigned long min_len, char **map, |
3506 | unsigned long *map_start, | 3544 | unsigned long *map_start, |
3507 | unsigned long *map_len, int km) | 3545 | unsigned long *map_len) |
3508 | { | 3546 | { |
3509 | size_t offset = start & (PAGE_CACHE_SIZE - 1); | 3547 | size_t offset = start & (PAGE_CACHE_SIZE - 1); |
3510 | char *kaddr; | 3548 | char *kaddr; |
@@ -3534,42 +3572,12 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, | |||
3534 | } | 3572 | } |
3535 | 3573 | ||
3536 | p = extent_buffer_page(eb, i); | 3574 | p = extent_buffer_page(eb, i); |
3537 | kaddr = kmap_atomic(p, km); | 3575 | kaddr = page_address(p); |
3538 | *token = kaddr; | ||
3539 | *map = kaddr + offset; | 3576 | *map = kaddr + offset; |
3540 | *map_len = PAGE_CACHE_SIZE - offset; | 3577 | *map_len = PAGE_CACHE_SIZE - offset; |
3541 | return 0; | 3578 | return 0; |
3542 | } | 3579 | } |
3543 | 3580 | ||
3544 | int map_extent_buffer(struct extent_buffer *eb, unsigned long start, | ||
3545 | unsigned long min_len, | ||
3546 | char **token, char **map, | ||
3547 | unsigned long *map_start, | ||
3548 | unsigned long *map_len, int km) | ||
3549 | { | ||
3550 | int err; | ||
3551 | int save = 0; | ||
3552 | if (eb->map_token) { | ||
3553 | unmap_extent_buffer(eb, eb->map_token, km); | ||
3554 | eb->map_token = NULL; | ||
3555 | save = 1; | ||
3556 | } | ||
3557 | err = map_private_extent_buffer(eb, start, min_len, token, map, | ||
3558 | map_start, map_len, km); | ||
3559 | if (!err && save) { | ||
3560 | eb->map_token = *token; | ||
3561 | eb->kaddr = *map; | ||
3562 | eb->map_start = *map_start; | ||
3563 | eb->map_len = *map_len; | ||
3564 | } | ||
3565 | return err; | ||
3566 | } | ||
3567 | |||
3568 | void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) | ||
3569 | { | ||
3570 | kunmap_atomic(token, km); | ||
3571 | } | ||
3572 | |||
3573 | int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, | 3581 | int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, |
3574 | unsigned long start, | 3582 | unsigned long start, |
3575 | unsigned long len) | 3583 | unsigned long len) |
@@ -3593,9 +3601,8 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, | |||
3593 | 3601 | ||
3594 | cur = min(len, (PAGE_CACHE_SIZE - offset)); | 3602 | cur = min(len, (PAGE_CACHE_SIZE - offset)); |
3595 | 3603 | ||
3596 | kaddr = kmap_atomic(page, KM_USER0); | 3604 | kaddr = page_address(page); |
3597 | ret = memcmp(ptr, kaddr + offset, cur); | 3605 | ret = memcmp(ptr, kaddr + offset, cur); |
3598 | kunmap_atomic(kaddr, KM_USER0); | ||
3599 | if (ret) | 3606 | if (ret) |
3600 | break; | 3607 | break; |
3601 | 3608 | ||
@@ -3628,9 +3635,8 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, | |||
3628 | WARN_ON(!PageUptodate(page)); | 3635 | WARN_ON(!PageUptodate(page)); |
3629 | 3636 | ||
3630 | cur = min(len, PAGE_CACHE_SIZE - offset); | 3637 | cur = min(len, PAGE_CACHE_SIZE - offset); |
3631 | kaddr = kmap_atomic(page, KM_USER1); | 3638 | kaddr = page_address(page); |
3632 | memcpy(kaddr + offset, src, cur); | 3639 | memcpy(kaddr + offset, src, cur); |
3633 | kunmap_atomic(kaddr, KM_USER1); | ||
3634 | 3640 | ||
3635 | src += cur; | 3641 | src += cur; |
3636 | len -= cur; | 3642 | len -= cur; |
@@ -3659,9 +3665,8 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, | |||
3659 | WARN_ON(!PageUptodate(page)); | 3665 | WARN_ON(!PageUptodate(page)); |
3660 | 3666 | ||
3661 | cur = min(len, PAGE_CACHE_SIZE - offset); | 3667 | cur = min(len, PAGE_CACHE_SIZE - offset); |
3662 | kaddr = kmap_atomic(page, KM_USER0); | 3668 | kaddr = page_address(page); |
3663 | memset(kaddr + offset, c, cur); | 3669 | memset(kaddr + offset, c, cur); |
3664 | kunmap_atomic(kaddr, KM_USER0); | ||
3665 | 3670 | ||
3666 | len -= cur; | 3671 | len -= cur; |
3667 | offset = 0; | 3672 | offset = 0; |
@@ -3692,9 +3697,8 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, | |||
3692 | 3697 | ||
3693 | cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); | 3698 | cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); |
3694 | 3699 | ||
3695 | kaddr = kmap_atomic(page, KM_USER0); | 3700 | kaddr = page_address(page); |
3696 | read_extent_buffer(src, kaddr + offset, src_offset, cur); | 3701 | read_extent_buffer(src, kaddr + offset, src_offset, cur); |
3697 | kunmap_atomic(kaddr, KM_USER0); | ||
3698 | 3702 | ||
3699 | src_offset += cur; | 3703 | src_offset += cur; |
3700 | len -= cur; | 3704 | len -= cur; |
@@ -3707,20 +3711,17 @@ static void move_pages(struct page *dst_page, struct page *src_page, | |||
3707 | unsigned long dst_off, unsigned long src_off, | 3711 | unsigned long dst_off, unsigned long src_off, |
3708 | unsigned long len) | 3712 | unsigned long len) |
3709 | { | 3713 | { |
3710 | char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); | 3714 | char *dst_kaddr = page_address(dst_page); |
3711 | if (dst_page == src_page) { | 3715 | if (dst_page == src_page) { |
3712 | memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); | 3716 | memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); |
3713 | } else { | 3717 | } else { |
3714 | char *src_kaddr = kmap_atomic(src_page, KM_USER1); | 3718 | char *src_kaddr = page_address(src_page); |
3715 | char *p = dst_kaddr + dst_off + len; | 3719 | char *p = dst_kaddr + dst_off + len; |
3716 | char *s = src_kaddr + src_off + len; | 3720 | char *s = src_kaddr + src_off + len; |
3717 | 3721 | ||
3718 | while (len--) | 3722 | while (len--) |
3719 | *--p = *--s; | 3723 | *--p = *--s; |
3720 | |||
3721 | kunmap_atomic(src_kaddr, KM_USER1); | ||
3722 | } | 3724 | } |
3723 | kunmap_atomic(dst_kaddr, KM_USER0); | ||
3724 | } | 3725 | } |
3725 | 3726 | ||
3726 | static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len) | 3727 | static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len) |
@@ -3733,20 +3734,17 @@ static void copy_pages(struct page *dst_page, struct page *src_page, | |||
3733 | unsigned long dst_off, unsigned long src_off, | 3734 | unsigned long dst_off, unsigned long src_off, |
3734 | unsigned long len) | 3735 | unsigned long len) |
3735 | { | 3736 | { |
3736 | char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); | 3737 | char *dst_kaddr = page_address(dst_page); |
3737 | char *src_kaddr; | 3738 | char *src_kaddr; |
3738 | 3739 | ||
3739 | if (dst_page != src_page) { | 3740 | if (dst_page != src_page) { |
3740 | src_kaddr = kmap_atomic(src_page, KM_USER1); | 3741 | src_kaddr = page_address(src_page); |
3741 | } else { | 3742 | } else { |
3742 | src_kaddr = dst_kaddr; | 3743 | src_kaddr = dst_kaddr; |
3743 | BUG_ON(areas_overlap(src_off, dst_off, len)); | 3744 | BUG_ON(areas_overlap(src_off, dst_off, len)); |
3744 | } | 3745 | } |
3745 | 3746 | ||
3746 | memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); | 3747 | memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); |
3747 | kunmap_atomic(dst_kaddr, KM_USER0); | ||
3748 | if (dst_page != src_page) | ||
3749 | kunmap_atomic(src_kaddr, KM_USER1); | ||
3750 | } | 3748 | } |
3751 | 3749 | ||
3752 | void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, | 3750 | void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index a11a92ee2d3..21a7ca9e728 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -120,8 +120,6 @@ struct extent_state { | |||
120 | struct extent_buffer { | 120 | struct extent_buffer { |
121 | u64 start; | 121 | u64 start; |
122 | unsigned long len; | 122 | unsigned long len; |
123 | char *map_token; | ||
124 | char *kaddr; | ||
125 | unsigned long map_start; | 123 | unsigned long map_start; |
126 | unsigned long map_len; | 124 | unsigned long map_len; |
127 | struct page *first_page; | 125 | struct page *first_page; |
@@ -130,14 +128,26 @@ struct extent_buffer { | |||
130 | struct rcu_head rcu_head; | 128 | struct rcu_head rcu_head; |
131 | atomic_t refs; | 129 | atomic_t refs; |
132 | 130 | ||
133 | /* the spinlock is used to protect most operations */ | 131 | /* count of read lock holders on the extent buffer */ |
134 | spinlock_t lock; | 132 | atomic_t write_locks; |
133 | atomic_t read_locks; | ||
134 | atomic_t blocking_writers; | ||
135 | atomic_t blocking_readers; | ||
136 | atomic_t spinning_readers; | ||
137 | atomic_t spinning_writers; | ||
138 | |||
139 | /* protects write locks */ | ||
140 | rwlock_t lock; | ||
135 | 141 | ||
136 | /* | 142 | /* readers use lock_wq while they wait for the write |
137 | * when we keep the lock held while blocking, waiters go onto | 143 | * lock holders to unlock |
138 | * the wq | ||
139 | */ | 144 | */ |
140 | wait_queue_head_t lock_wq; | 145 | wait_queue_head_t write_lock_wq; |
146 | |||
147 | /* writers use read_lock_wq while they wait for readers | ||
148 | * to unlock | ||
149 | */ | ||
150 | wait_queue_head_t read_lock_wq; | ||
141 | }; | 151 | }; |
142 | 152 | ||
143 | static inline void extent_set_compress_type(unsigned long *bio_flags, | 153 | static inline void extent_set_compress_type(unsigned long *bio_flags, |
@@ -279,15 +289,10 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree, | |||
279 | int extent_buffer_uptodate(struct extent_io_tree *tree, | 289 | int extent_buffer_uptodate(struct extent_io_tree *tree, |
280 | struct extent_buffer *eb, | 290 | struct extent_buffer *eb, |
281 | struct extent_state *cached_state); | 291 | struct extent_state *cached_state); |
282 | int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, | ||
283 | unsigned long min_len, char **token, char **map, | ||
284 | unsigned long *map_start, | ||
285 | unsigned long *map_len, int km); | ||
286 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, | 292 | int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, |
287 | unsigned long min_len, char **token, char **map, | 293 | unsigned long min_len, char **map, |
288 | unsigned long *map_start, | 294 | unsigned long *map_start, |
289 | unsigned long *map_len, int km); | 295 | unsigned long *map_len); |
290 | void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); | ||
291 | int extent_range_uptodate(struct extent_io_tree *tree, | 296 | int extent_range_uptodate(struct extent_io_tree *tree, |
292 | u64 start, u64 end); | 297 | u64 start, u64 end); |
293 | int extent_clear_unlock_delalloc(struct inode *inode, | 298 | int extent_clear_unlock_delalloc(struct inode *inode, |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 90d4ee52cd4..08bcfa92a22 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -177,6 +177,15 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, | |||
177 | 177 | ||
178 | WARN_ON(bio->bi_vcnt <= 0); | 178 | WARN_ON(bio->bi_vcnt <= 0); |
179 | 179 | ||
180 | /* | ||
181 | * the free space stuff is only read when it hasn't been | ||
182 | * updated in the current transaction. So, we can safely | ||
183 | * read from the commit root and sidestep a nasty deadlock | ||
184 | * between reading the free space cache and updating the csum tree. | ||
185 | */ | ||
186 | if (btrfs_is_free_space_inode(root, inode)) | ||
187 | path->search_commit_root = 1; | ||
188 | |||
180 | disk_bytenr = (u64)bio->bi_sector << 9; | 189 | disk_bytenr = (u64)bio->bi_sector << 9; |
181 | if (dio) | 190 | if (dio) |
182 | offset = logical_offset; | 191 | offset = logical_offset; |
@@ -664,10 +673,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, | |||
664 | struct btrfs_sector_sum *sector_sum; | 673 | struct btrfs_sector_sum *sector_sum; |
665 | u32 nritems; | 674 | u32 nritems; |
666 | u32 ins_size; | 675 | u32 ins_size; |
667 | char *eb_map; | ||
668 | char *eb_token; | ||
669 | unsigned long map_len; | ||
670 | unsigned long map_start; | ||
671 | u16 csum_size = | 676 | u16 csum_size = |
672 | btrfs_super_csum_size(&root->fs_info->super_copy); | 677 | btrfs_super_csum_size(&root->fs_info->super_copy); |
673 | 678 | ||
@@ -814,30 +819,9 @@ found: | |||
814 | item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); | 819 | item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); |
815 | item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + | 820 | item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + |
816 | btrfs_item_size_nr(leaf, path->slots[0])); | 821 | btrfs_item_size_nr(leaf, path->slots[0])); |
817 | eb_token = NULL; | ||
818 | next_sector: | 822 | next_sector: |
819 | 823 | ||
820 | if (!eb_token || | 824 | write_extent_buffer(leaf, §or_sum->sum, (unsigned long)item, csum_size); |
821 | (unsigned long)item + csum_size >= map_start + map_len) { | ||
822 | int err; | ||
823 | |||
824 | if (eb_token) | ||
825 | unmap_extent_buffer(leaf, eb_token, KM_USER1); | ||
826 | eb_token = NULL; | ||
827 | err = map_private_extent_buffer(leaf, (unsigned long)item, | ||
828 | csum_size, | ||
829 | &eb_token, &eb_map, | ||
830 | &map_start, &map_len, KM_USER1); | ||
831 | if (err) | ||
832 | eb_token = NULL; | ||
833 | } | ||
834 | if (eb_token) { | ||
835 | memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)), | ||
836 | §or_sum->sum, csum_size); | ||
837 | } else { | ||
838 | write_extent_buffer(leaf, §or_sum->sum, | ||
839 | (unsigned long)item, csum_size); | ||
840 | } | ||
841 | 825 | ||
842 | total_bytes += root->sectorsize; | 826 | total_bytes += root->sectorsize; |
843 | sector_sum++; | 827 | sector_sum++; |
@@ -850,10 +834,7 @@ next_sector: | |||
850 | goto next_sector; | 834 | goto next_sector; |
851 | } | 835 | } |
852 | } | 836 | } |
853 | if (eb_token) { | 837 | |
854 | unmap_extent_buffer(leaf, eb_token, KM_USER1); | ||
855 | eb_token = NULL; | ||
856 | } | ||
857 | btrfs_mark_buffer_dirty(path->nodes[0]); | 838 | btrfs_mark_buffer_dirty(path->nodes[0]); |
858 | if (total_bytes < sums->len) { | 839 | if (total_bytes < sums->len) { |
859 | btrfs_release_path(path); | 840 | btrfs_release_path(path); |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 59cbdb120ad..a35e51c9f23 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -1081,7 +1081,8 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, | |||
1081 | 1081 | ||
1082 | again: | 1082 | again: |
1083 | for (i = 0; i < num_pages; i++) { | 1083 | for (i = 0; i < num_pages; i++) { |
1084 | pages[i] = grab_cache_page(inode->i_mapping, index + i); | 1084 | pages[i] = find_or_create_page(inode->i_mapping, index + i, |
1085 | GFP_NOFS); | ||
1085 | if (!pages[i]) { | 1086 | if (!pages[i]) { |
1086 | faili = i - 1; | 1087 | faili = i - 1; |
1087 | err = -ENOMEM; | 1088 | err = -ENOMEM; |
@@ -1238,9 +1239,11 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1238 | * managed to copy. | 1239 | * managed to copy. |
1239 | */ | 1240 | */ |
1240 | if (num_pages > dirty_pages) { | 1241 | if (num_pages > dirty_pages) { |
1241 | if (copied > 0) | 1242 | if (copied > 0) { |
1242 | atomic_inc( | 1243 | spin_lock(&BTRFS_I(inode)->lock); |
1243 | &BTRFS_I(inode)->outstanding_extents); | 1244 | BTRFS_I(inode)->outstanding_extents++; |
1245 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1246 | } | ||
1244 | btrfs_delalloc_release_space(inode, | 1247 | btrfs_delalloc_release_space(inode, |
1245 | (num_pages - dirty_pages) << | 1248 | (num_pages - dirty_pages) << |
1246 | PAGE_CACHE_SHIFT); | 1249 | PAGE_CACHE_SHIFT); |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index bf0d61567f3..6377713f639 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -98,6 +98,12 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, | |||
98 | return inode; | 98 | return inode; |
99 | 99 | ||
100 | spin_lock(&block_group->lock); | 100 | spin_lock(&block_group->lock); |
101 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) { | ||
102 | printk(KERN_INFO "Old style space inode found, converting.\n"); | ||
103 | BTRFS_I(inode)->flags &= ~BTRFS_INODE_NODATASUM; | ||
104 | block_group->disk_cache_state = BTRFS_DC_CLEAR; | ||
105 | } | ||
106 | |||
101 | if (!btrfs_fs_closing(root->fs_info)) { | 107 | if (!btrfs_fs_closing(root->fs_info)) { |
102 | block_group->inode = igrab(inode); | 108 | block_group->inode = igrab(inode); |
103 | block_group->iref = 1; | 109 | block_group->iref = 1; |
@@ -135,7 +141,7 @@ int __create_free_space_inode(struct btrfs_root *root, | |||
135 | btrfs_set_inode_gid(leaf, inode_item, 0); | 141 | btrfs_set_inode_gid(leaf, inode_item, 0); |
136 | btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); | 142 | btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); |
137 | btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS | | 143 | btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS | |
138 | BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM); | 144 | BTRFS_INODE_PREALLOC); |
139 | btrfs_set_inode_nlink(leaf, inode_item, 1); | 145 | btrfs_set_inode_nlink(leaf, inode_item, 1); |
140 | btrfs_set_inode_transid(leaf, inode_item, trans->transid); | 146 | btrfs_set_inode_transid(leaf, inode_item, trans->transid); |
141 | btrfs_set_inode_block_group(leaf, inode_item, offset); | 147 | btrfs_set_inode_block_group(leaf, inode_item, offset); |
@@ -239,17 +245,12 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
239 | struct btrfs_free_space_header *header; | 245 | struct btrfs_free_space_header *header; |
240 | struct extent_buffer *leaf; | 246 | struct extent_buffer *leaf; |
241 | struct page *page; | 247 | struct page *page; |
242 | u32 *checksums = NULL, *crc; | ||
243 | char *disk_crcs = NULL; | ||
244 | struct btrfs_key key; | 248 | struct btrfs_key key; |
245 | struct list_head bitmaps; | 249 | struct list_head bitmaps; |
246 | u64 num_entries; | 250 | u64 num_entries; |
247 | u64 num_bitmaps; | 251 | u64 num_bitmaps; |
248 | u64 generation; | 252 | u64 generation; |
249 | u32 cur_crc = ~(u32)0; | ||
250 | pgoff_t index = 0; | 253 | pgoff_t index = 0; |
251 | unsigned long first_page_offset; | ||
252 | int num_checksums; | ||
253 | int ret = 0; | 254 | int ret = 0; |
254 | 255 | ||
255 | INIT_LIST_HEAD(&bitmaps); | 256 | INIT_LIST_HEAD(&bitmaps); |
@@ -292,16 +293,6 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
292 | if (!num_entries) | 293 | if (!num_entries) |
293 | goto out; | 294 | goto out; |
294 | 295 | ||
295 | /* Setup everything for doing checksumming */ | ||
296 | num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; | ||
297 | checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS); | ||
298 | if (!checksums) | ||
299 | goto out; | ||
300 | first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); | ||
301 | disk_crcs = kzalloc(first_page_offset, GFP_NOFS); | ||
302 | if (!disk_crcs) | ||
303 | goto out; | ||
304 | |||
305 | ret = readahead_cache(inode); | 296 | ret = readahead_cache(inode); |
306 | if (ret) | 297 | if (ret) |
307 | goto out; | 298 | goto out; |
@@ -311,18 +302,12 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
311 | struct btrfs_free_space *e; | 302 | struct btrfs_free_space *e; |
312 | void *addr; | 303 | void *addr; |
313 | unsigned long offset = 0; | 304 | unsigned long offset = 0; |
314 | unsigned long start_offset = 0; | ||
315 | int need_loop = 0; | 305 | int need_loop = 0; |
316 | 306 | ||
317 | if (!num_entries && !num_bitmaps) | 307 | if (!num_entries && !num_bitmaps) |
318 | break; | 308 | break; |
319 | 309 | ||
320 | if (index == 0) { | 310 | page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); |
321 | start_offset = first_page_offset; | ||
322 | offset = start_offset; | ||
323 | } | ||
324 | |||
325 | page = grab_cache_page(inode->i_mapping, index); | ||
326 | if (!page) | 311 | if (!page) |
327 | goto free_cache; | 312 | goto free_cache; |
328 | 313 | ||
@@ -342,8 +327,15 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
342 | if (index == 0) { | 327 | if (index == 0) { |
343 | u64 *gen; | 328 | u64 *gen; |
344 | 329 | ||
345 | memcpy(disk_crcs, addr, first_page_offset); | 330 | /* |
346 | gen = addr + (sizeof(u32) * num_checksums); | 331 | * We put a bogus crc in the front of the first page in |
332 | * case old kernels try to mount a fs with the new | ||
333 | * format to make sure they discard the cache. | ||
334 | */ | ||
335 | addr += sizeof(u64); | ||
336 | offset += sizeof(u64); | ||
337 | |||
338 | gen = addr; | ||
347 | if (*gen != BTRFS_I(inode)->generation) { | 339 | if (*gen != BTRFS_I(inode)->generation) { |
348 | printk(KERN_ERR "btrfs: space cache generation" | 340 | printk(KERN_ERR "btrfs: space cache generation" |
349 | " (%llu) does not match inode (%llu)\n", | 341 | " (%llu) does not match inode (%llu)\n", |
@@ -355,24 +347,10 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
355 | page_cache_release(page); | 347 | page_cache_release(page); |
356 | goto free_cache; | 348 | goto free_cache; |
357 | } | 349 | } |
358 | crc = (u32 *)disk_crcs; | 350 | addr += sizeof(u64); |
359 | } | 351 | offset += sizeof(u64); |
360 | entry = addr + start_offset; | ||
361 | |||
362 | /* First lets check our crc before we do anything fun */ | ||
363 | cur_crc = ~(u32)0; | ||
364 | cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc, | ||
365 | PAGE_CACHE_SIZE - start_offset); | ||
366 | btrfs_csum_final(cur_crc, (char *)&cur_crc); | ||
367 | if (cur_crc != *crc) { | ||
368 | printk(KERN_ERR "btrfs: crc mismatch for page %lu\n", | ||
369 | index); | ||
370 | kunmap(page); | ||
371 | unlock_page(page); | ||
372 | page_cache_release(page); | ||
373 | goto free_cache; | ||
374 | } | 352 | } |
375 | crc++; | 353 | entry = addr; |
376 | 354 | ||
377 | while (1) { | 355 | while (1) { |
378 | if (!num_entries) | 356 | if (!num_entries) |
@@ -470,8 +448,6 @@ next: | |||
470 | 448 | ||
471 | ret = 1; | 449 | ret = 1; |
472 | out: | 450 | out: |
473 | kfree(checksums); | ||
474 | kfree(disk_crcs); | ||
475 | return ret; | 451 | return ret; |
476 | free_cache: | 452 | free_cache: |
477 | __btrfs_remove_free_space_cache(ctl); | 453 | __btrfs_remove_free_space_cache(ctl); |
@@ -569,8 +545,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
569 | struct btrfs_key key; | 545 | struct btrfs_key key; |
570 | u64 start, end, len; | 546 | u64 start, end, len; |
571 | u64 bytes = 0; | 547 | u64 bytes = 0; |
572 | u32 *crc, *checksums; | 548 | u32 crc = ~(u32)0; |
573 | unsigned long first_page_offset; | ||
574 | int index = 0, num_pages = 0; | 549 | int index = 0, num_pages = 0; |
575 | int entries = 0; | 550 | int entries = 0; |
576 | int bitmaps = 0; | 551 | int bitmaps = 0; |
@@ -590,34 +565,13 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
590 | num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> | 565 | num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> |
591 | PAGE_CACHE_SHIFT; | 566 | PAGE_CACHE_SHIFT; |
592 | 567 | ||
593 | /* Since the first page has all of our checksums and our generation we | ||
594 | * need to calculate the offset into the page that we can start writing | ||
595 | * our entries. | ||
596 | */ | ||
597 | first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64); | ||
598 | |||
599 | filemap_write_and_wait(inode->i_mapping); | 568 | filemap_write_and_wait(inode->i_mapping); |
600 | btrfs_wait_ordered_range(inode, inode->i_size & | 569 | btrfs_wait_ordered_range(inode, inode->i_size & |
601 | ~(root->sectorsize - 1), (u64)-1); | 570 | ~(root->sectorsize - 1), (u64)-1); |
602 | 571 | ||
603 | /* make sure we don't overflow that first page */ | ||
604 | if (first_page_offset + sizeof(struct btrfs_free_space_entry) >= PAGE_CACHE_SIZE) { | ||
605 | /* this is really the same as running out of space, where we also return 0 */ | ||
606 | printk(KERN_CRIT "Btrfs: free space cache was too big for the crc page\n"); | ||
607 | ret = 0; | ||
608 | goto out_update; | ||
609 | } | ||
610 | |||
611 | /* We need a checksum per page. */ | ||
612 | crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); | ||
613 | if (!crc) | ||
614 | return -1; | ||
615 | |||
616 | pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); | 572 | pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); |
617 | if (!pages) { | 573 | if (!pages) |
618 | kfree(crc); | ||
619 | return -1; | 574 | return -1; |
620 | } | ||
621 | 575 | ||
622 | /* Get the cluster for this block_group if it exists */ | 576 | /* Get the cluster for this block_group if it exists */ |
623 | if (block_group && !list_empty(&block_group->cluster_list)) | 577 | if (block_group && !list_empty(&block_group->cluster_list)) |
@@ -640,7 +594,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
640 | * know and don't freak out. | 594 | * know and don't freak out. |
641 | */ | 595 | */ |
642 | while (index < num_pages) { | 596 | while (index < num_pages) { |
643 | page = grab_cache_page(inode->i_mapping, index); | 597 | page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); |
644 | if (!page) { | 598 | if (!page) { |
645 | int i; | 599 | int i; |
646 | 600 | ||
@@ -648,7 +602,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
648 | unlock_page(pages[i]); | 602 | unlock_page(pages[i]); |
649 | page_cache_release(pages[i]); | 603 | page_cache_release(pages[i]); |
650 | } | 604 | } |
651 | goto out_free; | 605 | goto out; |
652 | } | 606 | } |
653 | pages[index] = page; | 607 | pages[index] = page; |
654 | index++; | 608 | index++; |
@@ -668,17 +622,11 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
668 | /* Write out the extent entries */ | 622 | /* Write out the extent entries */ |
669 | do { | 623 | do { |
670 | struct btrfs_free_space_entry *entry; | 624 | struct btrfs_free_space_entry *entry; |
671 | void *addr; | 625 | void *addr, *orig; |
672 | unsigned long offset = 0; | 626 | unsigned long offset = 0; |
673 | unsigned long start_offset = 0; | ||
674 | 627 | ||
675 | next_page = false; | 628 | next_page = false; |
676 | 629 | ||
677 | if (index == 0) { | ||
678 | start_offset = first_page_offset; | ||
679 | offset = start_offset; | ||
680 | } | ||
681 | |||
682 | if (index >= num_pages) { | 630 | if (index >= num_pages) { |
683 | out_of_space = true; | 631 | out_of_space = true; |
684 | break; | 632 | break; |
@@ -686,10 +634,26 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
686 | 634 | ||
687 | page = pages[index]; | 635 | page = pages[index]; |
688 | 636 | ||
689 | addr = kmap(page); | 637 | orig = addr = kmap(page); |
690 | entry = addr + start_offset; | 638 | if (index == 0) { |
639 | u64 *gen; | ||
691 | 640 | ||
692 | memset(addr, 0, PAGE_CACHE_SIZE); | 641 | /* |
642 | * We're going to put in a bogus crc for this page to | ||
643 | * make sure that old kernels who aren't aware of this | ||
644 | * format will be sure to discard the cache. | ||
645 | */ | ||
646 | addr += sizeof(u64); | ||
647 | offset += sizeof(u64); | ||
648 | |||
649 | gen = addr; | ||
650 | *gen = trans->transid; | ||
651 | addr += sizeof(u64); | ||
652 | offset += sizeof(u64); | ||
653 | } | ||
654 | entry = addr; | ||
655 | |||
656 | memset(addr, 0, PAGE_CACHE_SIZE - offset); | ||
693 | while (node && !next_page) { | 657 | while (node && !next_page) { |
694 | struct btrfs_free_space *e; | 658 | struct btrfs_free_space *e; |
695 | 659 | ||
@@ -752,13 +716,19 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
752 | next_page = true; | 716 | next_page = true; |
753 | entry++; | 717 | entry++; |
754 | } | 718 | } |
755 | *crc = ~(u32)0; | ||
756 | *crc = btrfs_csum_data(root, addr + start_offset, *crc, | ||
757 | PAGE_CACHE_SIZE - start_offset); | ||
758 | kunmap(page); | ||
759 | 719 | ||
760 | btrfs_csum_final(*crc, (char *)crc); | 720 | /* Generate bogus crc value */ |
761 | crc++; | 721 | if (index == 0) { |
722 | u32 *tmp; | ||
723 | crc = btrfs_csum_data(root, orig + sizeof(u64), crc, | ||
724 | PAGE_CACHE_SIZE - sizeof(u64)); | ||
725 | btrfs_csum_final(crc, (char *)&crc); | ||
726 | crc++; | ||
727 | tmp = orig; | ||
728 | *tmp = crc; | ||
729 | } | ||
730 | |||
731 | kunmap(page); | ||
762 | 732 | ||
763 | bytes += PAGE_CACHE_SIZE; | 733 | bytes += PAGE_CACHE_SIZE; |
764 | 734 | ||
@@ -779,11 +749,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
779 | 749 | ||
780 | addr = kmap(page); | 750 | addr = kmap(page); |
781 | memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); | 751 | memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); |
782 | *crc = ~(u32)0; | ||
783 | *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE); | ||
784 | kunmap(page); | 752 | kunmap(page); |
785 | btrfs_csum_final(*crc, (char *)crc); | ||
786 | crc++; | ||
787 | bytes += PAGE_CACHE_SIZE; | 753 | bytes += PAGE_CACHE_SIZE; |
788 | 754 | ||
789 | list_del_init(&entry->list); | 755 | list_del_init(&entry->list); |
@@ -796,7 +762,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
796 | i_size_read(inode) - 1, &cached_state, | 762 | i_size_read(inode) - 1, &cached_state, |
797 | GFP_NOFS); | 763 | GFP_NOFS); |
798 | ret = 0; | 764 | ret = 0; |
799 | goto out_free; | 765 | goto out; |
800 | } | 766 | } |
801 | 767 | ||
802 | /* Zero out the rest of the pages just to make sure */ | 768 | /* Zero out the rest of the pages just to make sure */ |
@@ -811,20 +777,6 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
811 | index++; | 777 | index++; |
812 | } | 778 | } |
813 | 779 | ||
814 | /* Write the checksums and trans id to the first page */ | ||
815 | { | ||
816 | void *addr; | ||
817 | u64 *gen; | ||
818 | |||
819 | page = pages[0]; | ||
820 | |||
821 | addr = kmap(page); | ||
822 | memcpy(addr, checksums, sizeof(u32) * num_pages); | ||
823 | gen = addr + (sizeof(u32) * num_pages); | ||
824 | *gen = trans->transid; | ||
825 | kunmap(page); | ||
826 | } | ||
827 | |||
828 | ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0, | 780 | ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0, |
829 | bytes, &cached_state); | 781 | bytes, &cached_state); |
830 | btrfs_drop_pages(pages, num_pages); | 782 | btrfs_drop_pages(pages, num_pages); |
@@ -833,7 +785,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
833 | 785 | ||
834 | if (ret) { | 786 | if (ret) { |
835 | ret = 0; | 787 | ret = 0; |
836 | goto out_free; | 788 | goto out; |
837 | } | 789 | } |
838 | 790 | ||
839 | BTRFS_I(inode)->generation = trans->transid; | 791 | BTRFS_I(inode)->generation = trans->transid; |
@@ -850,7 +802,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
850 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, | 802 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, |
851 | EXTENT_DIRTY | EXTENT_DELALLOC | | 803 | EXTENT_DIRTY | EXTENT_DELALLOC | |
852 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); | 804 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); |
853 | goto out_free; | 805 | goto out; |
854 | } | 806 | } |
855 | leaf = path->nodes[0]; | 807 | leaf = path->nodes[0]; |
856 | if (ret > 0) { | 808 | if (ret > 0) { |
@@ -866,7 +818,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
866 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, | 818 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, |
867 | GFP_NOFS); | 819 | GFP_NOFS); |
868 | btrfs_release_path(path); | 820 | btrfs_release_path(path); |
869 | goto out_free; | 821 | goto out; |
870 | } | 822 | } |
871 | } | 823 | } |
872 | header = btrfs_item_ptr(leaf, path->slots[0], | 824 | header = btrfs_item_ptr(leaf, path->slots[0], |
@@ -879,11 +831,8 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
879 | 831 | ||
880 | ret = 1; | 832 | ret = 1; |
881 | 833 | ||
882 | out_free: | 834 | out: |
883 | kfree(checksums); | ||
884 | kfree(pages); | 835 | kfree(pages); |
885 | |||
886 | out_update: | ||
887 | if (ret != 1) { | 836 | if (ret != 1) { |
888 | invalidate_inode_pages2_range(inode->i_mapping, 0, index); | 837 | invalidate_inode_pages2_range(inode->i_mapping, 0, index); |
889 | BTRFS_I(inode)->generation = 0; | 838 | BTRFS_I(inode)->generation = 0; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index caa26ab5ed6..13e6255182e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -750,15 +750,6 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start, | |||
750 | return alloc_hint; | 750 | return alloc_hint; |
751 | } | 751 | } |
752 | 752 | ||
753 | static inline bool is_free_space_inode(struct btrfs_root *root, | ||
754 | struct inode *inode) | ||
755 | { | ||
756 | if (root == root->fs_info->tree_root || | ||
757 | BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) | ||
758 | return true; | ||
759 | return false; | ||
760 | } | ||
761 | |||
762 | /* | 753 | /* |
763 | * when extent_io.c finds a delayed allocation range in the file, | 754 | * when extent_io.c finds a delayed allocation range in the file, |
764 | * the call backs end up in this code. The basic idea is to | 755 | * the call backs end up in this code. The basic idea is to |
@@ -791,7 +782,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
791 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 782 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
792 | int ret = 0; | 783 | int ret = 0; |
793 | 784 | ||
794 | BUG_ON(is_free_space_inode(root, inode)); | 785 | BUG_ON(btrfs_is_free_space_inode(root, inode)); |
795 | trans = btrfs_join_transaction(root); | 786 | trans = btrfs_join_transaction(root); |
796 | BUG_ON(IS_ERR(trans)); | 787 | BUG_ON(IS_ERR(trans)); |
797 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 788 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
@@ -1072,7 +1063,7 @@ static noinline int run_delalloc_nocow(struct inode *inode, | |||
1072 | path = btrfs_alloc_path(); | 1063 | path = btrfs_alloc_path(); |
1073 | BUG_ON(!path); | 1064 | BUG_ON(!path); |
1074 | 1065 | ||
1075 | nolock = is_free_space_inode(root, inode); | 1066 | nolock = btrfs_is_free_space_inode(root, inode); |
1076 | 1067 | ||
1077 | if (nolock) | 1068 | if (nolock) |
1078 | trans = btrfs_join_transaction_nolock(root); | 1069 | trans = btrfs_join_transaction_nolock(root); |
@@ -1298,7 +1289,9 @@ static int btrfs_split_extent_hook(struct inode *inode, | |||
1298 | if (!(orig->state & EXTENT_DELALLOC)) | 1289 | if (!(orig->state & EXTENT_DELALLOC)) |
1299 | return 0; | 1290 | return 0; |
1300 | 1291 | ||
1301 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | 1292 | spin_lock(&BTRFS_I(inode)->lock); |
1293 | BTRFS_I(inode)->outstanding_extents++; | ||
1294 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1302 | return 0; | 1295 | return 0; |
1303 | } | 1296 | } |
1304 | 1297 | ||
@@ -1316,7 +1309,9 @@ static int btrfs_merge_extent_hook(struct inode *inode, | |||
1316 | if (!(other->state & EXTENT_DELALLOC)) | 1309 | if (!(other->state & EXTENT_DELALLOC)) |
1317 | return 0; | 1310 | return 0; |
1318 | 1311 | ||
1319 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | 1312 | spin_lock(&BTRFS_I(inode)->lock); |
1313 | BTRFS_I(inode)->outstanding_extents--; | ||
1314 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1320 | return 0; | 1315 | return 0; |
1321 | } | 1316 | } |
1322 | 1317 | ||
@@ -1337,12 +1332,15 @@ static int btrfs_set_bit_hook(struct inode *inode, | |||
1337 | if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { | 1332 | if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { |
1338 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1333 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1339 | u64 len = state->end + 1 - state->start; | 1334 | u64 len = state->end + 1 - state->start; |
1340 | bool do_list = !is_free_space_inode(root, inode); | 1335 | bool do_list = !btrfs_is_free_space_inode(root, inode); |
1341 | 1336 | ||
1342 | if (*bits & EXTENT_FIRST_DELALLOC) | 1337 | if (*bits & EXTENT_FIRST_DELALLOC) { |
1343 | *bits &= ~EXTENT_FIRST_DELALLOC; | 1338 | *bits &= ~EXTENT_FIRST_DELALLOC; |
1344 | else | 1339 | } else { |
1345 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | 1340 | spin_lock(&BTRFS_I(inode)->lock); |
1341 | BTRFS_I(inode)->outstanding_extents++; | ||
1342 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1343 | } | ||
1346 | 1344 | ||
1347 | spin_lock(&root->fs_info->delalloc_lock); | 1345 | spin_lock(&root->fs_info->delalloc_lock); |
1348 | BTRFS_I(inode)->delalloc_bytes += len; | 1346 | BTRFS_I(inode)->delalloc_bytes += len; |
@@ -1370,12 +1368,15 @@ static int btrfs_clear_bit_hook(struct inode *inode, | |||
1370 | if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { | 1368 | if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { |
1371 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1369 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1372 | u64 len = state->end + 1 - state->start; | 1370 | u64 len = state->end + 1 - state->start; |
1373 | bool do_list = !is_free_space_inode(root, inode); | 1371 | bool do_list = !btrfs_is_free_space_inode(root, inode); |
1374 | 1372 | ||
1375 | if (*bits & EXTENT_FIRST_DELALLOC) | 1373 | if (*bits & EXTENT_FIRST_DELALLOC) { |
1376 | *bits &= ~EXTENT_FIRST_DELALLOC; | 1374 | *bits &= ~EXTENT_FIRST_DELALLOC; |
1377 | else if (!(*bits & EXTENT_DO_ACCOUNTING)) | 1375 | } else if (!(*bits & EXTENT_DO_ACCOUNTING)) { |
1378 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | 1376 | spin_lock(&BTRFS_I(inode)->lock); |
1377 | BTRFS_I(inode)->outstanding_extents--; | ||
1378 | spin_unlock(&BTRFS_I(inode)->lock); | ||
1379 | } | ||
1379 | 1380 | ||
1380 | if (*bits & EXTENT_DO_ACCOUNTING) | 1381 | if (*bits & EXTENT_DO_ACCOUNTING) |
1381 | btrfs_delalloc_release_metadata(inode, len); | 1382 | btrfs_delalloc_release_metadata(inode, len); |
@@ -1477,7 +1478,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
1477 | 1478 | ||
1478 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | 1479 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; |
1479 | 1480 | ||
1480 | if (is_free_space_inode(root, inode)) | 1481 | if (btrfs_is_free_space_inode(root, inode)) |
1481 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2); | 1482 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2); |
1482 | else | 1483 | else |
1483 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | 1484 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); |
@@ -1726,7 +1727,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1726 | return 0; | 1727 | return 0; |
1727 | BUG_ON(!ordered_extent); | 1728 | BUG_ON(!ordered_extent); |
1728 | 1729 | ||
1729 | nolock = is_free_space_inode(root, inode); | 1730 | nolock = btrfs_is_free_space_inode(root, inode); |
1730 | 1731 | ||
1731 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { | 1732 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { |
1732 | BUG_ON(!list_empty(&ordered_extent->list)); | 1733 | BUG_ON(!list_empty(&ordered_extent->list)); |
@@ -2531,13 +2532,6 @@ static void btrfs_read_locked_inode(struct inode *inode) | |||
2531 | 2532 | ||
2532 | inode_item = btrfs_item_ptr(leaf, path->slots[0], | 2533 | inode_item = btrfs_item_ptr(leaf, path->slots[0], |
2533 | struct btrfs_inode_item); | 2534 | struct btrfs_inode_item); |
2534 | if (!leaf->map_token) | ||
2535 | map_private_extent_buffer(leaf, (unsigned long)inode_item, | ||
2536 | sizeof(struct btrfs_inode_item), | ||
2537 | &leaf->map_token, &leaf->kaddr, | ||
2538 | &leaf->map_start, &leaf->map_len, | ||
2539 | KM_USER1); | ||
2540 | |||
2541 | inode->i_mode = btrfs_inode_mode(leaf, inode_item); | 2535 | inode->i_mode = btrfs_inode_mode(leaf, inode_item); |
2542 | inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); | 2536 | inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); |
2543 | inode->i_uid = btrfs_inode_uid(leaf, inode_item); | 2537 | inode->i_uid = btrfs_inode_uid(leaf, inode_item); |
@@ -2575,11 +2569,6 @@ cache_acl: | |||
2575 | if (!maybe_acls) | 2569 | if (!maybe_acls) |
2576 | cache_no_acl(inode); | 2570 | cache_no_acl(inode); |
2577 | 2571 | ||
2578 | if (leaf->map_token) { | ||
2579 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
2580 | leaf->map_token = NULL; | ||
2581 | } | ||
2582 | |||
2583 | btrfs_free_path(path); | 2572 | btrfs_free_path(path); |
2584 | 2573 | ||
2585 | switch (inode->i_mode & S_IFMT) { | 2574 | switch (inode->i_mode & S_IFMT) { |
@@ -2624,13 +2613,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
2624 | struct btrfs_inode_item *item, | 2613 | struct btrfs_inode_item *item, |
2625 | struct inode *inode) | 2614 | struct inode *inode) |
2626 | { | 2615 | { |
2627 | if (!leaf->map_token) | ||
2628 | map_private_extent_buffer(leaf, (unsigned long)item, | ||
2629 | sizeof(struct btrfs_inode_item), | ||
2630 | &leaf->map_token, &leaf->kaddr, | ||
2631 | &leaf->map_start, &leaf->map_len, | ||
2632 | KM_USER1); | ||
2633 | |||
2634 | btrfs_set_inode_uid(leaf, item, inode->i_uid); | 2616 | btrfs_set_inode_uid(leaf, item, inode->i_uid); |
2635 | btrfs_set_inode_gid(leaf, item, inode->i_gid); | 2617 | btrfs_set_inode_gid(leaf, item, inode->i_gid); |
2636 | btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); | 2618 | btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); |
@@ -2659,11 +2641,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
2659 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); | 2641 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); |
2660 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); | 2642 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); |
2661 | btrfs_set_inode_block_group(leaf, item, 0); | 2643 | btrfs_set_inode_block_group(leaf, item, 0); |
2662 | |||
2663 | if (leaf->map_token) { | ||
2664 | unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); | ||
2665 | leaf->map_token = NULL; | ||
2666 | } | ||
2667 | } | 2644 | } |
2668 | 2645 | ||
2669 | /* | 2646 | /* |
@@ -2684,7 +2661,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, | |||
2684 | * The data relocation inode should also be directly updated | 2661 | * The data relocation inode should also be directly updated |
2685 | * without delay | 2662 | * without delay |
2686 | */ | 2663 | */ |
2687 | if (!is_free_space_inode(root, inode) | 2664 | if (!btrfs_is_free_space_inode(root, inode) |
2688 | && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { | 2665 | && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { |
2689 | ret = btrfs_delayed_update_inode(trans, root, inode); | 2666 | ret = btrfs_delayed_update_inode(trans, root, inode); |
2690 | if (!ret) | 2667 | if (!ret) |
@@ -3398,7 +3375,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
3398 | 3375 | ||
3399 | ret = -ENOMEM; | 3376 | ret = -ENOMEM; |
3400 | again: | 3377 | again: |
3401 | page = grab_cache_page(mapping, index); | 3378 | page = find_or_create_page(mapping, index, GFP_NOFS); |
3402 | if (!page) { | 3379 | if (!page) { |
3403 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | 3380 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); |
3404 | goto out; | 3381 | goto out; |
@@ -3634,7 +3611,7 @@ void btrfs_evict_inode(struct inode *inode) | |||
3634 | 3611 | ||
3635 | truncate_inode_pages(&inode->i_data, 0); | 3612 | truncate_inode_pages(&inode->i_data, 0); |
3636 | if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || | 3613 | if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || |
3637 | is_free_space_inode(root, inode))) | 3614 | btrfs_is_free_space_inode(root, inode))) |
3638 | goto no_delete; | 3615 | goto no_delete; |
3639 | 3616 | ||
3640 | if (is_bad_inode(inode)) { | 3617 | if (is_bad_inode(inode)) { |
@@ -4271,7 +4248,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
4271 | if (BTRFS_I(inode)->dummy_inode) | 4248 | if (BTRFS_I(inode)->dummy_inode) |
4272 | return 0; | 4249 | return 0; |
4273 | 4250 | ||
4274 | if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode)) | 4251 | if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode)) |
4275 | nolock = true; | 4252 | nolock = true; |
4276 | 4253 | ||
4277 | if (wbc->sync_mode == WB_SYNC_ALL) { | 4254 | if (wbc->sync_mode == WB_SYNC_ALL) { |
@@ -6728,8 +6705,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
6728 | ei->index_cnt = (u64)-1; | 6705 | ei->index_cnt = (u64)-1; |
6729 | ei->last_unlink_trans = 0; | 6706 | ei->last_unlink_trans = 0; |
6730 | 6707 | ||
6731 | atomic_set(&ei->outstanding_extents, 0); | 6708 | spin_lock_init(&ei->lock); |
6732 | atomic_set(&ei->reserved_extents, 0); | 6709 | ei->outstanding_extents = 0; |
6710 | ei->reserved_extents = 0; | ||
6733 | 6711 | ||
6734 | ei->ordered_data_close = 0; | 6712 | ei->ordered_data_close = 0; |
6735 | ei->orphan_meta_reserved = 0; | 6713 | ei->orphan_meta_reserved = 0; |
@@ -6767,8 +6745,8 @@ void btrfs_destroy_inode(struct inode *inode) | |||
6767 | 6745 | ||
6768 | WARN_ON(!list_empty(&inode->i_dentry)); | 6746 | WARN_ON(!list_empty(&inode->i_dentry)); |
6769 | WARN_ON(inode->i_data.nrpages); | 6747 | WARN_ON(inode->i_data.nrpages); |
6770 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); | 6748 | WARN_ON(BTRFS_I(inode)->outstanding_extents); |
6771 | WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents)); | 6749 | WARN_ON(BTRFS_I(inode)->reserved_extents); |
6772 | 6750 | ||
6773 | /* | 6751 | /* |
6774 | * This can happen where we create an inode, but somebody else also | 6752 | * This can happen where we create an inode, but somebody else also |
@@ -6823,7 +6801,7 @@ int btrfs_drop_inode(struct inode *inode) | |||
6823 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6801 | struct btrfs_root *root = BTRFS_I(inode)->root; |
6824 | 6802 | ||
6825 | if (btrfs_root_refs(&root->root_item) == 0 && | 6803 | if (btrfs_root_refs(&root->root_item) == 0 && |
6826 | !is_free_space_inode(root, inode)) | 6804 | !btrfs_is_free_space_inode(root, inode)) |
6827 | return 1; | 6805 | return 1; |
6828 | else | 6806 | else |
6829 | return generic_drop_inode(inode); | 6807 | return generic_drop_inode(inode); |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 622543309eb..0b980afc5ed 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -859,8 +859,8 @@ again: | |||
859 | /* step one, lock all the pages */ | 859 | /* step one, lock all the pages */ |
860 | for (i = 0; i < num_pages; i++) { | 860 | for (i = 0; i < num_pages; i++) { |
861 | struct page *page; | 861 | struct page *page; |
862 | page = grab_cache_page(inode->i_mapping, | 862 | page = find_or_create_page(inode->i_mapping, |
863 | start_index + i); | 863 | start_index + i, GFP_NOFS); |
864 | if (!page) | 864 | if (!page) |
865 | break; | 865 | break; |
866 | 866 | ||
@@ -930,7 +930,9 @@ again: | |||
930 | GFP_NOFS); | 930 | GFP_NOFS); |
931 | 931 | ||
932 | if (i_done != num_pages) { | 932 | if (i_done != num_pages) { |
933 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | 933 | spin_lock(&BTRFS_I(inode)->lock); |
934 | BTRFS_I(inode)->outstanding_extents++; | ||
935 | spin_unlock(&BTRFS_I(inode)->lock); | ||
934 | btrfs_delalloc_release_space(inode, | 936 | btrfs_delalloc_release_space(inode, |
935 | (num_pages - i_done) << PAGE_CACHE_SHIFT); | 937 | (num_pages - i_done) << PAGE_CACHE_SHIFT); |
936 | } | 938 | } |
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 66fa43dc3f0..d77b67c4b27 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c | |||
@@ -24,185 +24,197 @@ | |||
24 | #include "extent_io.h" | 24 | #include "extent_io.h" |
25 | #include "locking.h" | 25 | #include "locking.h" |
26 | 26 | ||
27 | static inline void spin_nested(struct extent_buffer *eb) | 27 | void btrfs_assert_tree_read_locked(struct extent_buffer *eb); |
28 | { | ||
29 | spin_lock(&eb->lock); | ||
30 | } | ||
31 | 28 | ||
32 | /* | 29 | /* |
33 | * Setting a lock to blocking will drop the spinlock and set the | 30 | * if we currently have a spinning reader or writer lock |
34 | * flag that forces other procs who want the lock to wait. After | 31 | * (indicated by the rw flag) this will bump the count |
35 | * this you can safely schedule with the lock held. | 32 | * of blocking holders and drop the spinlock. |
36 | */ | 33 | */ |
37 | void btrfs_set_lock_blocking(struct extent_buffer *eb) | 34 | void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw) |
38 | { | 35 | { |
39 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { | 36 | if (rw == BTRFS_WRITE_LOCK) { |
40 | set_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); | 37 | if (atomic_read(&eb->blocking_writers) == 0) { |
41 | spin_unlock(&eb->lock); | 38 | WARN_ON(atomic_read(&eb->spinning_writers) != 1); |
39 | atomic_dec(&eb->spinning_writers); | ||
40 | btrfs_assert_tree_locked(eb); | ||
41 | atomic_inc(&eb->blocking_writers); | ||
42 | write_unlock(&eb->lock); | ||
43 | } | ||
44 | } else if (rw == BTRFS_READ_LOCK) { | ||
45 | btrfs_assert_tree_read_locked(eb); | ||
46 | atomic_inc(&eb->blocking_readers); | ||
47 | WARN_ON(atomic_read(&eb->spinning_readers) == 0); | ||
48 | atomic_dec(&eb->spinning_readers); | ||
49 | read_unlock(&eb->lock); | ||
42 | } | 50 | } |
43 | /* exit with the spin lock released and the bit set */ | 51 | return; |
44 | } | 52 | } |
45 | 53 | ||
46 | /* | 54 | /* |
47 | * clearing the blocking flag will take the spinlock again. | 55 | * if we currently have a blocking lock, take the spinlock |
48 | * After this you can't safely schedule | 56 | * and drop our blocking count |
49 | */ | 57 | */ |
50 | void btrfs_clear_lock_blocking(struct extent_buffer *eb) | 58 | void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw) |
51 | { | 59 | { |
52 | if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { | 60 | if (rw == BTRFS_WRITE_LOCK_BLOCKING) { |
53 | spin_nested(eb); | 61 | BUG_ON(atomic_read(&eb->blocking_writers) != 1); |
54 | clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); | 62 | write_lock(&eb->lock); |
55 | smp_mb__after_clear_bit(); | 63 | WARN_ON(atomic_read(&eb->spinning_writers)); |
64 | atomic_inc(&eb->spinning_writers); | ||
65 | if (atomic_dec_and_test(&eb->blocking_writers)) | ||
66 | wake_up(&eb->write_lock_wq); | ||
67 | } else if (rw == BTRFS_READ_LOCK_BLOCKING) { | ||
68 | BUG_ON(atomic_read(&eb->blocking_readers) == 0); | ||
69 | read_lock(&eb->lock); | ||
70 | atomic_inc(&eb->spinning_readers); | ||
71 | if (atomic_dec_and_test(&eb->blocking_readers)) | ||
72 | wake_up(&eb->read_lock_wq); | ||
56 | } | 73 | } |
57 | /* exit with the spin lock held */ | 74 | return; |
58 | } | 75 | } |
59 | 76 | ||
60 | /* | 77 | /* |
61 | * unfortunately, many of the places that currently set a lock to blocking | 78 | * take a spinning read lock. This will wait for any blocking |
62 | * don't end up blocking for very long, and often they don't block | 79 | * writers |
63 | * at all. For a dbench 50 run, if we don't spin on the blocking bit | ||
64 | * at all, the context switch rate can jump up to 400,000/sec or more. | ||
65 | * | ||
66 | * So, we're still stuck with this crummy spin on the blocking bit, | ||
67 | * at least until the most common causes of the short blocks | ||
68 | * can be dealt with. | ||
69 | */ | 80 | */ |
70 | static int btrfs_spin_on_block(struct extent_buffer *eb) | 81 | void btrfs_tree_read_lock(struct extent_buffer *eb) |
71 | { | 82 | { |
72 | int i; | 83 | again: |
73 | 84 | wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0); | |
74 | for (i = 0; i < 512; i++) { | 85 | read_lock(&eb->lock); |
75 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 86 | if (atomic_read(&eb->blocking_writers)) { |
76 | return 1; | 87 | read_unlock(&eb->lock); |
77 | if (need_resched()) | 88 | wait_event(eb->write_lock_wq, |
78 | break; | 89 | atomic_read(&eb->blocking_writers) == 0); |
79 | cpu_relax(); | 90 | goto again; |
80 | } | 91 | } |
81 | return 0; | 92 | atomic_inc(&eb->read_locks); |
93 | atomic_inc(&eb->spinning_readers); | ||
82 | } | 94 | } |
83 | 95 | ||
84 | /* | 96 | /* |
85 | * This is somewhat different from trylock. It will take the | 97 | * returns 1 if we get the read lock and 0 if we don't |
86 | * spinlock but if it finds the lock is set to blocking, it will | 98 | * this won't wait for blocking writers |
87 | * return without the lock held. | ||
88 | * | ||
89 | * returns 1 if it was able to take the lock and zero otherwise | ||
90 | * | ||
91 | * After this call, scheduling is not safe without first calling | ||
92 | * btrfs_set_lock_blocking() | ||
93 | */ | 99 | */ |
94 | int btrfs_try_spin_lock(struct extent_buffer *eb) | 100 | int btrfs_try_tree_read_lock(struct extent_buffer *eb) |
95 | { | 101 | { |
96 | int i; | 102 | if (atomic_read(&eb->blocking_writers)) |
103 | return 0; | ||
97 | 104 | ||
98 | if (btrfs_spin_on_block(eb)) { | 105 | read_lock(&eb->lock); |
99 | spin_nested(eb); | 106 | if (atomic_read(&eb->blocking_writers)) { |
100 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 107 | read_unlock(&eb->lock); |
101 | return 1; | 108 | return 0; |
102 | spin_unlock(&eb->lock); | ||
103 | } | 109 | } |
104 | /* spin for a bit on the BLOCKING flag */ | 110 | atomic_inc(&eb->read_locks); |
105 | for (i = 0; i < 2; i++) { | 111 | atomic_inc(&eb->spinning_readers); |
106 | cpu_relax(); | 112 | return 1; |
107 | if (!btrfs_spin_on_block(eb)) | ||
108 | break; | ||
109 | |||
110 | spin_nested(eb); | ||
111 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | ||
112 | return 1; | ||
113 | spin_unlock(&eb->lock); | ||
114 | } | ||
115 | return 0; | ||
116 | } | 113 | } |
117 | 114 | ||
118 | /* | 115 | /* |
119 | * the autoremove wake function will return 0 if it tried to wake up | 116 | * returns 1 if we get the read lock and 0 if we don't |
120 | * a process that was already awake, which means that process won't | 117 | * this won't wait for blocking writers or readers |
121 | * count as an exclusive wakeup. The waitq code will continue waking | ||
122 | * procs until it finds one that was actually sleeping. | ||
123 | * | ||
124 | * For btrfs, this isn't quite what we want. We want a single proc | ||
125 | * to be notified that the lock is ready for taking. If that proc | ||
126 | * already happen to be awake, great, it will loop around and try for | ||
127 | * the lock. | ||
128 | * | ||
129 | * So, btrfs_wake_function always returns 1, even when the proc that we | ||
130 | * tried to wake up was already awake. | ||
131 | */ | 118 | */ |
132 | static int btrfs_wake_function(wait_queue_t *wait, unsigned mode, | 119 | int btrfs_try_tree_write_lock(struct extent_buffer *eb) |
133 | int sync, void *key) | ||
134 | { | 120 | { |
135 | autoremove_wake_function(wait, mode, sync, key); | 121 | if (atomic_read(&eb->blocking_writers) || |
122 | atomic_read(&eb->blocking_readers)) | ||
123 | return 0; | ||
124 | write_lock(&eb->lock); | ||
125 | if (atomic_read(&eb->blocking_writers) || | ||
126 | atomic_read(&eb->blocking_readers)) { | ||
127 | write_unlock(&eb->lock); | ||
128 | return 0; | ||
129 | } | ||
130 | atomic_inc(&eb->write_locks); | ||
131 | atomic_inc(&eb->spinning_writers); | ||
136 | return 1; | 132 | return 1; |
137 | } | 133 | } |
138 | 134 | ||
139 | /* | 135 | /* |
140 | * returns with the extent buffer spinlocked. | 136 | * drop a spinning read lock |
141 | * | 137 | */ |
142 | * This will spin and/or wait as required to take the lock, and then | 138 | void btrfs_tree_read_unlock(struct extent_buffer *eb) |
143 | * return with the spinlock held. | 139 | { |
144 | * | 140 | btrfs_assert_tree_read_locked(eb); |
145 | * After this call, scheduling is not safe without first calling | 141 | WARN_ON(atomic_read(&eb->spinning_readers) == 0); |
146 | * btrfs_set_lock_blocking() | 142 | atomic_dec(&eb->spinning_readers); |
143 | atomic_dec(&eb->read_locks); | ||
144 | read_unlock(&eb->lock); | ||
145 | } | ||
146 | |||
147 | /* | ||
148 | * drop a blocking read lock | ||
149 | */ | ||
150 | void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb) | ||
151 | { | ||
152 | btrfs_assert_tree_read_locked(eb); | ||
153 | WARN_ON(atomic_read(&eb->blocking_readers) == 0); | ||
154 | if (atomic_dec_and_test(&eb->blocking_readers)) | ||
155 | wake_up(&eb->read_lock_wq); | ||
156 | atomic_dec(&eb->read_locks); | ||
157 | } | ||
158 | |||
159 | /* | ||
160 | * take a spinning write lock. This will wait for both | ||
161 | * blocking readers or writers | ||
147 | */ | 162 | */ |
148 | int btrfs_tree_lock(struct extent_buffer *eb) | 163 | int btrfs_tree_lock(struct extent_buffer *eb) |
149 | { | 164 | { |
150 | DEFINE_WAIT(wait); | 165 | again: |
151 | wait.func = btrfs_wake_function; | 166 | wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0); |
152 | 167 | wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0); | |
153 | if (!btrfs_spin_on_block(eb)) | 168 | write_lock(&eb->lock); |
154 | goto sleep; | 169 | if (atomic_read(&eb->blocking_readers)) { |
155 | 170 | write_unlock(&eb->lock); | |
156 | while(1) { | 171 | wait_event(eb->read_lock_wq, |
157 | spin_nested(eb); | 172 | atomic_read(&eb->blocking_readers) == 0); |
158 | 173 | goto again; | |
159 | /* nobody is blocking, exit with the spinlock held */ | ||
160 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | ||
161 | return 0; | ||
162 | |||
163 | /* | ||
164 | * we have the spinlock, but the real owner is blocking. | ||
165 | * wait for them | ||
166 | */ | ||
167 | spin_unlock(&eb->lock); | ||
168 | |||
169 | /* | ||
170 | * spin for a bit, and if the blocking flag goes away, | ||
171 | * loop around | ||
172 | */ | ||
173 | cpu_relax(); | ||
174 | if (btrfs_spin_on_block(eb)) | ||
175 | continue; | ||
176 | sleep: | ||
177 | prepare_to_wait_exclusive(&eb->lock_wq, &wait, | ||
178 | TASK_UNINTERRUPTIBLE); | ||
179 | |||
180 | if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | ||
181 | schedule(); | ||
182 | |||
183 | finish_wait(&eb->lock_wq, &wait); | ||
184 | } | 174 | } |
175 | if (atomic_read(&eb->blocking_writers)) { | ||
176 | write_unlock(&eb->lock); | ||
177 | wait_event(eb->write_lock_wq, | ||
178 | atomic_read(&eb->blocking_writers) == 0); | ||
179 | goto again; | ||
180 | } | ||
181 | WARN_ON(atomic_read(&eb->spinning_writers)); | ||
182 | atomic_inc(&eb->spinning_writers); | ||
183 | atomic_inc(&eb->write_locks); | ||
185 | return 0; | 184 | return 0; |
186 | } | 185 | } |
187 | 186 | ||
187 | /* | ||
188 | * drop a spinning or a blocking write lock. | ||
189 | */ | ||
188 | int btrfs_tree_unlock(struct extent_buffer *eb) | 190 | int btrfs_tree_unlock(struct extent_buffer *eb) |
189 | { | 191 | { |
190 | /* | 192 | int blockers = atomic_read(&eb->blocking_writers); |
191 | * if we were a blocking owner, we don't have the spinlock held | 193 | |
192 | * just clear the bit and look for waiters | 194 | BUG_ON(blockers > 1); |
193 | */ | 195 | |
194 | if (test_and_clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 196 | btrfs_assert_tree_locked(eb); |
195 | smp_mb__after_clear_bit(); | 197 | atomic_dec(&eb->write_locks); |
196 | else | 198 | |
197 | spin_unlock(&eb->lock); | 199 | if (blockers) { |
198 | 200 | WARN_ON(atomic_read(&eb->spinning_writers)); | |
199 | if (waitqueue_active(&eb->lock_wq)) | 201 | atomic_dec(&eb->blocking_writers); |
200 | wake_up(&eb->lock_wq); | 202 | smp_wmb(); |
203 | wake_up(&eb->write_lock_wq); | ||
204 | } else { | ||
205 | WARN_ON(atomic_read(&eb->spinning_writers) != 1); | ||
206 | atomic_dec(&eb->spinning_writers); | ||
207 | write_unlock(&eb->lock); | ||
208 | } | ||
201 | return 0; | 209 | return 0; |
202 | } | 210 | } |
203 | 211 | ||
204 | void btrfs_assert_tree_locked(struct extent_buffer *eb) | 212 | void btrfs_assert_tree_locked(struct extent_buffer *eb) |
205 | { | 213 | { |
206 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 214 | BUG_ON(!atomic_read(&eb->write_locks)); |
207 | assert_spin_locked(&eb->lock); | 215 | } |
216 | |||
217 | void btrfs_assert_tree_read_locked(struct extent_buffer *eb) | ||
218 | { | ||
219 | BUG_ON(!atomic_read(&eb->read_locks)); | ||
208 | } | 220 | } |
diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index 5c33a560a2f..17247ddb81a 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h | |||
@@ -19,11 +19,43 @@ | |||
19 | #ifndef __BTRFS_LOCKING_ | 19 | #ifndef __BTRFS_LOCKING_ |
20 | #define __BTRFS_LOCKING_ | 20 | #define __BTRFS_LOCKING_ |
21 | 21 | ||
22 | #define BTRFS_WRITE_LOCK 1 | ||
23 | #define BTRFS_READ_LOCK 2 | ||
24 | #define BTRFS_WRITE_LOCK_BLOCKING 3 | ||
25 | #define BTRFS_READ_LOCK_BLOCKING 4 | ||
26 | |||
22 | int btrfs_tree_lock(struct extent_buffer *eb); | 27 | int btrfs_tree_lock(struct extent_buffer *eb); |
23 | int btrfs_tree_unlock(struct extent_buffer *eb); | 28 | int btrfs_tree_unlock(struct extent_buffer *eb); |
24 | int btrfs_try_spin_lock(struct extent_buffer *eb); | 29 | int btrfs_try_spin_lock(struct extent_buffer *eb); |
25 | 30 | ||
26 | void btrfs_set_lock_blocking(struct extent_buffer *eb); | 31 | void btrfs_tree_read_lock(struct extent_buffer *eb); |
27 | void btrfs_clear_lock_blocking(struct extent_buffer *eb); | 32 | void btrfs_tree_read_unlock(struct extent_buffer *eb); |
33 | void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb); | ||
34 | void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw); | ||
35 | void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw); | ||
28 | void btrfs_assert_tree_locked(struct extent_buffer *eb); | 36 | void btrfs_assert_tree_locked(struct extent_buffer *eb); |
37 | int btrfs_try_tree_read_lock(struct extent_buffer *eb); | ||
38 | int btrfs_try_tree_write_lock(struct extent_buffer *eb); | ||
39 | |||
40 | static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw) | ||
41 | { | ||
42 | if (rw == BTRFS_WRITE_LOCK || rw == BTRFS_WRITE_LOCK_BLOCKING) | ||
43 | btrfs_tree_unlock(eb); | ||
44 | else if (rw == BTRFS_READ_LOCK_BLOCKING) | ||
45 | btrfs_tree_read_unlock_blocking(eb); | ||
46 | else if (rw == BTRFS_READ_LOCK) | ||
47 | btrfs_tree_read_unlock(eb); | ||
48 | else | ||
49 | BUG(); | ||
50 | } | ||
51 | |||
52 | static inline void btrfs_set_lock_blocking(struct extent_buffer *eb) | ||
53 | { | ||
54 | btrfs_set_lock_blocking_rw(eb, BTRFS_WRITE_LOCK); | ||
55 | } | ||
56 | |||
57 | static inline void btrfs_clear_lock_blocking(struct extent_buffer *eb) | ||
58 | { | ||
59 | btrfs_clear_lock_blocking_rw(eb, BTRFS_WRITE_LOCK_BLOCKING); | ||
60 | } | ||
29 | #endif | 61 | #endif |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 5e0a3dc79a4..59bb1764273 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -2955,7 +2955,8 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2955 | page_cache_sync_readahead(inode->i_mapping, | 2955 | page_cache_sync_readahead(inode->i_mapping, |
2956 | ra, NULL, index, | 2956 | ra, NULL, index, |
2957 | last_index + 1 - index); | 2957 | last_index + 1 - index); |
2958 | page = grab_cache_page(inode->i_mapping, index); | 2958 | page = find_or_create_page(inode->i_mapping, index, |
2959 | GFP_NOFS); | ||
2959 | if (!page) { | 2960 | if (!page) { |
2960 | btrfs_delalloc_release_metadata(inode, | 2961 | btrfs_delalloc_release_metadata(inode, |
2961 | PAGE_CACHE_SIZE); | 2962 | PAGE_CACHE_SIZE); |
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c index c0f7ecaf1e7..bc1f6ad1844 100644 --- a/fs/btrfs/struct-funcs.c +++ b/fs/btrfs/struct-funcs.c | |||
@@ -50,36 +50,22 @@ u##bits btrfs_##name(struct extent_buffer *eb, \ | |||
50 | unsigned long part_offset = (unsigned long)s; \ | 50 | unsigned long part_offset = (unsigned long)s; \ |
51 | unsigned long offset = part_offset + offsetof(type, member); \ | 51 | unsigned long offset = part_offset + offsetof(type, member); \ |
52 | type *p; \ | 52 | type *p; \ |
53 | /* ugly, but we want the fast path here */ \ | 53 | int err; \ |
54 | if (eb->map_token && offset >= eb->map_start && \ | 54 | char *kaddr; \ |
55 | offset + sizeof(((type *)0)->member) <= eb->map_start + \ | 55 | unsigned long map_start; \ |
56 | eb->map_len) { \ | 56 | unsigned long map_len; \ |
57 | p = (type *)(eb->kaddr + part_offset - eb->map_start); \ | 57 | u##bits res; \ |
58 | return le##bits##_to_cpu(p->member); \ | 58 | err = map_private_extent_buffer(eb, offset, \ |
59 | } \ | 59 | sizeof(((type *)0)->member), \ |
60 | { \ | 60 | &kaddr, &map_start, &map_len); \ |
61 | int err; \ | 61 | if (err) { \ |
62 | char *map_token; \ | 62 | __le##bits leres; \ |
63 | char *kaddr; \ | 63 | read_eb_member(eb, s, type, member, &leres); \ |
64 | int unmap_on_exit = (eb->map_token == NULL); \ | 64 | return le##bits##_to_cpu(leres); \ |
65 | unsigned long map_start; \ | 65 | } \ |
66 | unsigned long map_len; \ | 66 | p = (type *)(kaddr + part_offset - map_start); \ |
67 | u##bits res; \ | 67 | res = le##bits##_to_cpu(p->member); \ |
68 | err = map_extent_buffer(eb, offset, \ | 68 | return res; \ |
69 | sizeof(((type *)0)->member), \ | ||
70 | &map_token, &kaddr, \ | ||
71 | &map_start, &map_len, KM_USER1); \ | ||
72 | if (err) { \ | ||
73 | __le##bits leres; \ | ||
74 | read_eb_member(eb, s, type, member, &leres); \ | ||
75 | return le##bits##_to_cpu(leres); \ | ||
76 | } \ | ||
77 | p = (type *)(kaddr + part_offset - map_start); \ | ||
78 | res = le##bits##_to_cpu(p->member); \ | ||
79 | if (unmap_on_exit) \ | ||
80 | unmap_extent_buffer(eb, map_token, KM_USER1); \ | ||
81 | return res; \ | ||
82 | } \ | ||
83 | } \ | 69 | } \ |
84 | void btrfs_set_##name(struct extent_buffer *eb, \ | 70 | void btrfs_set_##name(struct extent_buffer *eb, \ |
85 | type *s, u##bits val) \ | 71 | type *s, u##bits val) \ |
@@ -87,36 +73,21 @@ void btrfs_set_##name(struct extent_buffer *eb, \ | |||
87 | unsigned long part_offset = (unsigned long)s; \ | 73 | unsigned long part_offset = (unsigned long)s; \ |
88 | unsigned long offset = part_offset + offsetof(type, member); \ | 74 | unsigned long offset = part_offset + offsetof(type, member); \ |
89 | type *p; \ | 75 | type *p; \ |
90 | /* ugly, but we want the fast path here */ \ | 76 | int err; \ |
91 | if (eb->map_token && offset >= eb->map_start && \ | 77 | char *kaddr; \ |
92 | offset + sizeof(((type *)0)->member) <= eb->map_start + \ | 78 | unsigned long map_start; \ |
93 | eb->map_len) { \ | 79 | unsigned long map_len; \ |
94 | p = (type *)(eb->kaddr + part_offset - eb->map_start); \ | 80 | err = map_private_extent_buffer(eb, offset, \ |
95 | p->member = cpu_to_le##bits(val); \ | 81 | sizeof(((type *)0)->member), \ |
96 | return; \ | 82 | &kaddr, &map_start, &map_len); \ |
97 | } \ | 83 | if (err) { \ |
98 | { \ | 84 | __le##bits val2; \ |
99 | int err; \ | 85 | val2 = cpu_to_le##bits(val); \ |
100 | char *map_token; \ | 86 | write_eb_member(eb, s, type, member, &val2); \ |
101 | char *kaddr; \ | 87 | return; \ |
102 | int unmap_on_exit = (eb->map_token == NULL); \ | 88 | } \ |
103 | unsigned long map_start; \ | 89 | p = (type *)(kaddr + part_offset - map_start); \ |
104 | unsigned long map_len; \ | 90 | p->member = cpu_to_le##bits(val); \ |
105 | err = map_extent_buffer(eb, offset, \ | ||
106 | sizeof(((type *)0)->member), \ | ||
107 | &map_token, &kaddr, \ | ||
108 | &map_start, &map_len, KM_USER1); \ | ||
109 | if (err) { \ | ||
110 | __le##bits val2; \ | ||
111 | val2 = cpu_to_le##bits(val); \ | ||
112 | write_eb_member(eb, s, type, member, &val2); \ | ||
113 | return; \ | ||
114 | } \ | ||
115 | p = (type *)(kaddr + part_offset - map_start); \ | ||
116 | p->member = cpu_to_le##bits(val); \ | ||
117 | if (unmap_on_exit) \ | ||
118 | unmap_extent_buffer(eb, map_token, KM_USER1); \ | ||
119 | } \ | ||
120 | } | 91 | } |
121 | 92 | ||
122 | #include "ctree.h" | 93 | #include "ctree.h" |
@@ -125,15 +96,6 @@ void btrfs_node_key(struct extent_buffer *eb, | |||
125 | struct btrfs_disk_key *disk_key, int nr) | 96 | struct btrfs_disk_key *disk_key, int nr) |
126 | { | 97 | { |
127 | unsigned long ptr = btrfs_node_key_ptr_offset(nr); | 98 | unsigned long ptr = btrfs_node_key_ptr_offset(nr); |
128 | if (eb->map_token && ptr >= eb->map_start && | ||
129 | ptr + sizeof(*disk_key) <= eb->map_start + eb->map_len) { | ||
130 | memcpy(disk_key, eb->kaddr + ptr - eb->map_start, | ||
131 | sizeof(*disk_key)); | ||
132 | return; | ||
133 | } else if (eb->map_token) { | ||
134 | unmap_extent_buffer(eb, eb->map_token, KM_USER1); | ||
135 | eb->map_token = NULL; | ||
136 | } | ||
137 | read_eb_member(eb, (struct btrfs_key_ptr *)ptr, | 99 | read_eb_member(eb, (struct btrfs_key_ptr *)ptr, |
138 | struct btrfs_key_ptr, key, disk_key); | 100 | struct btrfs_key_ptr, key, disk_key); |
139 | } | 101 | } |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 51dcec86757..eb55863bb4a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -260,7 +260,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
260 | { | 260 | { |
261 | struct btrfs_trans_handle *h; | 261 | struct btrfs_trans_handle *h; |
262 | struct btrfs_transaction *cur_trans; | 262 | struct btrfs_transaction *cur_trans; |
263 | int retries = 0; | 263 | u64 num_bytes = 0; |
264 | int ret; | 264 | int ret; |
265 | 265 | ||
266 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | 266 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) |
@@ -274,6 +274,19 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
274 | h->block_rsv = NULL; | 274 | h->block_rsv = NULL; |
275 | goto got_it; | 275 | goto got_it; |
276 | } | 276 | } |
277 | |||
278 | /* | ||
279 | * Do the reservation before we join the transaction so we can do all | ||
280 | * the appropriate flushing if need be. | ||
281 | */ | ||
282 | if (num_items > 0 && root != root->fs_info->chunk_root) { | ||
283 | num_bytes = btrfs_calc_trans_metadata_size(root, num_items); | ||
284 | ret = btrfs_block_rsv_add(NULL, root, | ||
285 | &root->fs_info->trans_block_rsv, | ||
286 | num_bytes); | ||
287 | if (ret) | ||
288 | return ERR_PTR(ret); | ||
289 | } | ||
277 | again: | 290 | again: |
278 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | 291 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); |
279 | if (!h) | 292 | if (!h) |
@@ -310,24 +323,9 @@ again: | |||
310 | goto again; | 323 | goto again; |
311 | } | 324 | } |
312 | 325 | ||
313 | if (num_items > 0) { | 326 | if (num_bytes) { |
314 | ret = btrfs_trans_reserve_metadata(h, root, num_items); | 327 | h->block_rsv = &root->fs_info->trans_block_rsv; |
315 | if (ret == -EAGAIN && !retries) { | 328 | h->bytes_reserved = num_bytes; |
316 | retries++; | ||
317 | btrfs_commit_transaction(h, root); | ||
318 | goto again; | ||
319 | } else if (ret == -EAGAIN) { | ||
320 | /* | ||
321 | * We have already retried and got EAGAIN, so really we | ||
322 | * don't have space, so set ret to -ENOSPC. | ||
323 | */ | ||
324 | ret = -ENOSPC; | ||
325 | } | ||
326 | |||
327 | if (ret < 0) { | ||
328 | btrfs_end_transaction(h, root); | ||
329 | return ERR_PTR(ret); | ||
330 | } | ||
331 | } | 329 | } |
332 | 330 | ||
333 | got_it: | 331 | got_it: |
@@ -499,10 +497,17 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
499 | } | 497 | } |
500 | 498 | ||
501 | if (lock && cur_trans->blocked && !cur_trans->in_commit) { | 499 | if (lock && cur_trans->blocked && !cur_trans->in_commit) { |
502 | if (throttle) | 500 | if (throttle) { |
501 | /* | ||
502 | * We may race with somebody else here so end up having | ||
503 | * to call end_transaction on ourselves again, so inc | ||
504 | * our use_count. | ||
505 | */ | ||
506 | trans->use_count++; | ||
503 | return btrfs_commit_transaction(trans, root); | 507 | return btrfs_commit_transaction(trans, root); |
504 | else | 508 | } else { |
505 | wake_up_process(info->transaction_kthread); | 509 | wake_up_process(info->transaction_kthread); |
510 | } | ||
506 | } | 511 | } |
507 | 512 | ||
508 | WARN_ON(cur_trans != info->running_transaction); | 513 | WARN_ON(cur_trans != info->running_transaction); |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 4ce8a9f41d1..ac278dd8317 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -1730,8 +1730,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
1730 | btrfs_read_buffer(next, ptr_gen); | 1730 | btrfs_read_buffer(next, ptr_gen); |
1731 | 1731 | ||
1732 | btrfs_tree_lock(next); | 1732 | btrfs_tree_lock(next); |
1733 | clean_tree_block(trans, root, next); | ||
1734 | btrfs_set_lock_blocking(next); | 1733 | btrfs_set_lock_blocking(next); |
1734 | clean_tree_block(trans, root, next); | ||
1735 | btrfs_wait_tree_block_writeback(next); | 1735 | btrfs_wait_tree_block_writeback(next); |
1736 | btrfs_tree_unlock(next); | 1736 | btrfs_tree_unlock(next); |
1737 | 1737 | ||
@@ -1796,8 +1796,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, | |||
1796 | next = path->nodes[*level]; | 1796 | next = path->nodes[*level]; |
1797 | 1797 | ||
1798 | btrfs_tree_lock(next); | 1798 | btrfs_tree_lock(next); |
1799 | clean_tree_block(trans, root, next); | ||
1800 | btrfs_set_lock_blocking(next); | 1799 | btrfs_set_lock_blocking(next); |
1800 | clean_tree_block(trans, root, next); | ||
1801 | btrfs_wait_tree_block_writeback(next); | 1801 | btrfs_wait_tree_block_writeback(next); |
1802 | btrfs_tree_unlock(next); | 1802 | btrfs_tree_unlock(next); |
1803 | 1803 | ||
@@ -1864,8 +1864,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, | |||
1864 | next = path->nodes[orig_level]; | 1864 | next = path->nodes[orig_level]; |
1865 | 1865 | ||
1866 | btrfs_tree_lock(next); | 1866 | btrfs_tree_lock(next); |
1867 | clean_tree_block(trans, log, next); | ||
1868 | btrfs_set_lock_blocking(next); | 1867 | btrfs_set_lock_blocking(next); |
1868 | clean_tree_block(trans, log, next); | ||
1869 | btrfs_wait_tree_block_writeback(next); | 1869 | btrfs_wait_tree_block_writeback(next); |
1870 | btrfs_tree_unlock(next); | 1870 | btrfs_tree_unlock(next); |
1871 | 1871 | ||
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 19450bc5363..b89e372c754 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -3595,7 +3595,7 @@ int btrfs_read_sys_array(struct btrfs_root *root) | |||
3595 | if (!sb) | 3595 | if (!sb) |
3596 | return -ENOMEM; | 3596 | return -ENOMEM; |
3597 | btrfs_set_buffer_uptodate(sb); | 3597 | btrfs_set_buffer_uptodate(sb); |
3598 | btrfs_set_buffer_lockdep_class(sb, 0); | 3598 | btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0); |
3599 | 3599 | ||
3600 | write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); | 3600 | write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); |
3601 | array_size = btrfs_super_sys_array_size(super_copy); | 3601 | array_size = btrfs_super_sys_array_size(super_copy); |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 5366fe452ab..d733b9cfea3 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -102,43 +102,57 @@ static int do_setxattr(struct btrfs_trans_handle *trans, | |||
102 | if (!path) | 102 | if (!path) |
103 | return -ENOMEM; | 103 | return -ENOMEM; |
104 | 104 | ||
105 | /* first lets see if we already have this xattr */ | 105 | if (flags & XATTR_REPLACE) { |
106 | di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name, | 106 | di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name, |
107 | strlen(name), -1); | 107 | name_len, -1); |
108 | if (IS_ERR(di)) { | 108 | if (IS_ERR(di)) { |
109 | ret = PTR_ERR(di); | 109 | ret = PTR_ERR(di); |
110 | goto out; | 110 | goto out; |
111 | } | 111 | } else if (!di) { |
112 | 112 | ret = -ENODATA; | |
113 | /* ok we already have this xattr, lets remove it */ | ||
114 | if (di) { | ||
115 | /* if we want create only exit */ | ||
116 | if (flags & XATTR_CREATE) { | ||
117 | ret = -EEXIST; | ||
118 | goto out; | 113 | goto out; |
119 | } | 114 | } |
120 | |||
121 | ret = btrfs_delete_one_dir_name(trans, root, path, di); | 115 | ret = btrfs_delete_one_dir_name(trans, root, path, di); |
122 | BUG_ON(ret); | 116 | if (ret) |
117 | goto out; | ||
123 | btrfs_release_path(path); | 118 | btrfs_release_path(path); |
119 | } | ||
124 | 120 | ||
125 | /* if we don't have a value then we are removing the xattr */ | 121 | again: |
126 | if (!value) | 122 | ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode), |
123 | name, name_len, value, size); | ||
124 | if (ret == -EEXIST) { | ||
125 | if (flags & XATTR_CREATE) | ||
127 | goto out; | 126 | goto out; |
128 | } else { | 127 | /* |
128 | * We can't use the path we already have since we won't have the | ||
129 | * proper locking for a delete, so release the path and | ||
130 | * re-lookup to delete the thing. | ||
131 | */ | ||
129 | btrfs_release_path(path); | 132 | btrfs_release_path(path); |
133 | di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), | ||
134 | name, name_len, -1); | ||
135 | if (IS_ERR(di)) { | ||
136 | ret = PTR_ERR(di); | ||
137 | goto out; | ||
138 | } else if (!di) { | ||
139 | /* Shouldn't happen but just in case... */ | ||
140 | btrfs_release_path(path); | ||
141 | goto again; | ||
142 | } | ||
130 | 143 | ||
131 | if (flags & XATTR_REPLACE) { | 144 | ret = btrfs_delete_one_dir_name(trans, root, path, di); |
132 | /* we couldn't find the attr to replace */ | 145 | if (ret) |
133 | ret = -ENODATA; | ||
134 | goto out; | 146 | goto out; |
147 | |||
148 | /* | ||
149 | * We have a value to set, so go back and try to insert it now. | ||
150 | */ | ||
151 | if (value) { | ||
152 | btrfs_release_path(path); | ||
153 | goto again; | ||
135 | } | 154 | } |
136 | } | 155 | } |
137 | |||
138 | /* ok we have to create a completely new xattr */ | ||
139 | ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode), | ||
140 | name, name_len, value, size); | ||
141 | BUG_ON(ret); | ||
142 | out: | 156 | out: |
143 | btrfs_free_path(path); | 157 | btrfs_free_path(path); |
144 | return ret; | 158 | return ret; |