diff options
-rw-r--r-- | fs/btrfs/ctree.h | 22 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 3 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 471 | ||||
-rw-r--r-- | fs/btrfs/free-space-cache.c | 42 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 23 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 2 |
6 files changed, 439 insertions, 124 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 0cbf3491bb7c..42b03c4ee494 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -691,6 +691,7 @@ struct btrfs_space_info { | |||
691 | struct list_head block_groups; | 691 | struct list_head block_groups; |
692 | spinlock_t lock; | 692 | spinlock_t lock; |
693 | struct rw_semaphore groups_sem; | 693 | struct rw_semaphore groups_sem; |
694 | atomic_t caching_threads; | ||
694 | }; | 695 | }; |
695 | 696 | ||
696 | /* | 697 | /* |
@@ -721,11 +722,17 @@ struct btrfs_free_cluster { | |||
721 | struct list_head block_group_list; | 722 | struct list_head block_group_list; |
722 | }; | 723 | }; |
723 | 724 | ||
725 | enum btrfs_caching_type { | ||
726 | BTRFS_CACHE_NO = 0, | ||
727 | BTRFS_CACHE_STARTED = 1, | ||
728 | BTRFS_CACHE_FINISHED = 2, | ||
729 | }; | ||
730 | |||
724 | struct btrfs_block_group_cache { | 731 | struct btrfs_block_group_cache { |
725 | struct btrfs_key key; | 732 | struct btrfs_key key; |
726 | struct btrfs_block_group_item item; | 733 | struct btrfs_block_group_item item; |
734 | struct btrfs_fs_info *fs_info; | ||
727 | spinlock_t lock; | 735 | spinlock_t lock; |
728 | struct mutex cache_mutex; | ||
729 | u64 pinned; | 736 | u64 pinned; |
730 | u64 reserved; | 737 | u64 reserved; |
731 | u64 flags; | 738 | u64 flags; |
@@ -733,15 +740,19 @@ struct btrfs_block_group_cache { | |||
733 | int extents_thresh; | 740 | int extents_thresh; |
734 | int free_extents; | 741 | int free_extents; |
735 | int total_bitmaps; | 742 | int total_bitmaps; |
736 | int cached; | ||
737 | int ro; | 743 | int ro; |
738 | int dirty; | 744 | int dirty; |
739 | 745 | ||
746 | /* cache tracking stuff */ | ||
747 | wait_queue_head_t caching_q; | ||
748 | int cached; | ||
749 | |||
740 | struct btrfs_space_info *space_info; | 750 | struct btrfs_space_info *space_info; |
741 | 751 | ||
742 | /* free space cache stuff */ | 752 | /* free space cache stuff */ |
743 | spinlock_t tree_lock; | 753 | spinlock_t tree_lock; |
744 | struct rb_root free_space_offset; | 754 | struct rb_root free_space_offset; |
755 | u64 free_space; | ||
745 | 756 | ||
746 | /* block group cache stuff */ | 757 | /* block group cache stuff */ |
747 | struct rb_node cache_node; | 758 | struct rb_node cache_node; |
@@ -834,6 +845,7 @@ struct btrfs_fs_info { | |||
834 | atomic_t async_submit_draining; | 845 | atomic_t async_submit_draining; |
835 | atomic_t nr_async_bios; | 846 | atomic_t nr_async_bios; |
836 | atomic_t async_delalloc_pages; | 847 | atomic_t async_delalloc_pages; |
848 | atomic_t async_caching_threads; | ||
837 | 849 | ||
838 | /* | 850 | /* |
839 | * this is used by the balancing code to wait for all the pending | 851 | * this is used by the balancing code to wait for all the pending |
@@ -950,6 +962,9 @@ struct btrfs_root { | |||
950 | /* the node lock is held while changing the node pointer */ | 962 | /* the node lock is held while changing the node pointer */ |
951 | spinlock_t node_lock; | 963 | spinlock_t node_lock; |
952 | 964 | ||
965 | /* taken when updating the commit root */ | ||
966 | struct rw_semaphore commit_root_sem; | ||
967 | |||
953 | struct extent_buffer *commit_root; | 968 | struct extent_buffer *commit_root; |
954 | struct btrfs_root *log_root; | 969 | struct btrfs_root *log_root; |
955 | struct btrfs_root *reloc_root; | 970 | struct btrfs_root *reloc_root; |
@@ -1911,7 +1926,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | |||
1911 | struct btrfs_root *root, unsigned long count); | 1926 | struct btrfs_root *root, unsigned long count); |
1912 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); | 1927 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); |
1913 | int btrfs_update_pinned_extents(struct btrfs_root *root, | 1928 | int btrfs_update_pinned_extents(struct btrfs_root *root, |
1914 | u64 bytenr, u64 num, int pin); | 1929 | u64 bytenr, u64 num, int pin, int mark_free); |
1915 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | 1930 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, |
1916 | struct btrfs_root *root, struct extent_buffer *leaf); | 1931 | struct btrfs_root *root, struct extent_buffer *leaf); |
1917 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, | 1932 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, |
@@ -1996,6 +2011,7 @@ void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, | |||
1996 | u64 bytes); | 2011 | u64 bytes); |
1997 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, | 2012 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, |
1998 | u64 bytes); | 2013 | u64 bytes); |
2014 | void btrfs_free_super_mirror_extents(struct btrfs_fs_info *info); | ||
1999 | /* ctree.c */ | 2015 | /* ctree.c */ |
2000 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 2016 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
2001 | int level, int *slot); | 2017 | int level, int *slot); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 55d9d188e693..ec2c915f7f4a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -907,6 +907,7 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
907 | spin_lock_init(&root->inode_lock); | 907 | spin_lock_init(&root->inode_lock); |
908 | mutex_init(&root->objectid_mutex); | 908 | mutex_init(&root->objectid_mutex); |
909 | mutex_init(&root->log_mutex); | 909 | mutex_init(&root->log_mutex); |
910 | init_rwsem(&root->commit_root_sem); | ||
910 | init_waitqueue_head(&root->log_writer_wait); | 911 | init_waitqueue_head(&root->log_writer_wait); |
911 | init_waitqueue_head(&root->log_commit_wait[0]); | 912 | init_waitqueue_head(&root->log_commit_wait[0]); |
912 | init_waitqueue_head(&root->log_commit_wait[1]); | 913 | init_waitqueue_head(&root->log_commit_wait[1]); |
@@ -1566,6 +1567,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1566 | atomic_set(&fs_info->async_delalloc_pages, 0); | 1567 | atomic_set(&fs_info->async_delalloc_pages, 0); |
1567 | atomic_set(&fs_info->async_submit_draining, 0); | 1568 | atomic_set(&fs_info->async_submit_draining, 0); |
1568 | atomic_set(&fs_info->nr_async_bios, 0); | 1569 | atomic_set(&fs_info->nr_async_bios, 0); |
1570 | atomic_set(&fs_info->async_caching_threads, 0); | ||
1569 | fs_info->sb = sb; | 1571 | fs_info->sb = sb; |
1570 | fs_info->max_extent = (u64)-1; | 1572 | fs_info->max_extent = (u64)-1; |
1571 | fs_info->max_inline = 8192 * 1024; | 1573 | fs_info->max_inline = 8192 * 1024; |
@@ -2337,6 +2339,7 @@ int close_ctree(struct btrfs_root *root) | |||
2337 | free_extent_buffer(root->fs_info->csum_root->commit_root); | 2339 | free_extent_buffer(root->fs_info->csum_root->commit_root); |
2338 | 2340 | ||
2339 | btrfs_free_block_groups(root->fs_info); | 2341 | btrfs_free_block_groups(root->fs_info); |
2342 | btrfs_free_super_mirror_extents(root->fs_info); | ||
2340 | 2343 | ||
2341 | del_fs_roots(fs_info); | 2344 | del_fs_roots(fs_info); |
2342 | 2345 | ||
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 98697be6bdde..9a489cc89fd3 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/blkdev.h> | 21 | #include <linux/blkdev.h> |
22 | #include <linux/sort.h> | 22 | #include <linux/sort.h> |
23 | #include <linux/rcupdate.h> | 23 | #include <linux/rcupdate.h> |
24 | #include <linux/kthread.h> | ||
24 | #include "compat.h" | 25 | #include "compat.h" |
25 | #include "hash.h" | 26 | #include "hash.h" |
26 | #include "ctree.h" | 27 | #include "ctree.h" |
@@ -61,6 +62,13 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
61 | struct btrfs_root *extent_root, u64 alloc_bytes, | 62 | struct btrfs_root *extent_root, u64 alloc_bytes, |
62 | u64 flags, int force); | 63 | u64 flags, int force); |
63 | 64 | ||
65 | static noinline int | ||
66 | block_group_cache_done(struct btrfs_block_group_cache *cache) | ||
67 | { | ||
68 | smp_mb(); | ||
69 | return cache->cached == BTRFS_CACHE_FINISHED; | ||
70 | } | ||
71 | |||
64 | static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) | 72 | static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) |
65 | { | 73 | { |
66 | return (cache->flags & bits) == bits; | 74 | return (cache->flags & bits) == bits; |
@@ -145,21 +153,64 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, | |||
145 | return ret; | 153 | return ret; |
146 | } | 154 | } |
147 | 155 | ||
156 | void btrfs_free_super_mirror_extents(struct btrfs_fs_info *info) | ||
157 | { | ||
158 | u64 start, end, last = 0; | ||
159 | int ret; | ||
160 | |||
161 | while (1) { | ||
162 | ret = find_first_extent_bit(&info->pinned_extents, last, | ||
163 | &start, &end, EXTENT_LOCKED); | ||
164 | if (ret) | ||
165 | break; | ||
166 | |||
167 | unlock_extent(&info->pinned_extents, start, end, GFP_NOFS); | ||
168 | last = end+1; | ||
169 | } | ||
170 | } | ||
171 | |||
172 | static int remove_sb_from_cache(struct btrfs_root *root, | ||
173 | struct btrfs_block_group_cache *cache) | ||
174 | { | ||
175 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
176 | u64 bytenr; | ||
177 | u64 *logical; | ||
178 | int stripe_len; | ||
179 | int i, nr, ret; | ||
180 | |||
181 | for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { | ||
182 | bytenr = btrfs_sb_offset(i); | ||
183 | ret = btrfs_rmap_block(&root->fs_info->mapping_tree, | ||
184 | cache->key.objectid, bytenr, | ||
185 | 0, &logical, &nr, &stripe_len); | ||
186 | BUG_ON(ret); | ||
187 | while (nr--) { | ||
188 | try_lock_extent(&fs_info->pinned_extents, | ||
189 | logical[nr], | ||
190 | logical[nr] + stripe_len - 1, GFP_NOFS); | ||
191 | } | ||
192 | kfree(logical); | ||
193 | } | ||
194 | |||
195 | return 0; | ||
196 | } | ||
197 | |||
148 | /* | 198 | /* |
149 | * this is only called by cache_block_group, since we could have freed extents | 199 | * this is only called by cache_block_group, since we could have freed extents |
150 | * we need to check the pinned_extents for any extents that can't be used yet | 200 | * we need to check the pinned_extents for any extents that can't be used yet |
151 | * since their free space will be released as soon as the transaction commits. | 201 | * since their free space will be released as soon as the transaction commits. |
152 | */ | 202 | */ |
153 | static int add_new_free_space(struct btrfs_block_group_cache *block_group, | 203 | static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, |
154 | struct btrfs_fs_info *info, u64 start, u64 end) | 204 | struct btrfs_fs_info *info, u64 start, u64 end) |
155 | { | 205 | { |
156 | u64 extent_start, extent_end, size; | 206 | u64 extent_start, extent_end, size, total_added = 0; |
157 | int ret; | 207 | int ret; |
158 | 208 | ||
159 | while (start < end) { | 209 | while (start < end) { |
160 | ret = find_first_extent_bit(&info->pinned_extents, start, | 210 | ret = find_first_extent_bit(&info->pinned_extents, start, |
161 | &extent_start, &extent_end, | 211 | &extent_start, &extent_end, |
162 | EXTENT_DIRTY); | 212 | EXTENT_DIRTY|EXTENT_LOCKED| |
213 | EXTENT_DELALLOC); | ||
163 | if (ret) | 214 | if (ret) |
164 | break; | 215 | break; |
165 | 216 | ||
@@ -167,6 +218,7 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group, | |||
167 | start = extent_end + 1; | 218 | start = extent_end + 1; |
168 | } else if (extent_start > start && extent_start < end) { | 219 | } else if (extent_start > start && extent_start < end) { |
169 | size = extent_start - start; | 220 | size = extent_start - start; |
221 | total_added += size; | ||
170 | ret = btrfs_add_free_space(block_group, start, | 222 | ret = btrfs_add_free_space(block_group, start, |
171 | size); | 223 | size); |
172 | BUG_ON(ret); | 224 | BUG_ON(ret); |
@@ -178,84 +230,139 @@ static int add_new_free_space(struct btrfs_block_group_cache *block_group, | |||
178 | 230 | ||
179 | if (start < end) { | 231 | if (start < end) { |
180 | size = end - start; | 232 | size = end - start; |
233 | total_added += size; | ||
181 | ret = btrfs_add_free_space(block_group, start, size); | 234 | ret = btrfs_add_free_space(block_group, start, size); |
182 | BUG_ON(ret); | 235 | BUG_ON(ret); |
183 | } | 236 | } |
184 | 237 | ||
185 | return 0; | 238 | return total_added; |
186 | } | 239 | } |
187 | 240 | ||
188 | static int remove_sb_from_cache(struct btrfs_root *root, | 241 | DEFINE_MUTEX(discard_mutex); |
189 | struct btrfs_block_group_cache *cache) | 242 | |
243 | /* | ||
244 | * if async kthreads are running when we cross transactions, we mark any pinned | ||
245 | * extents with EXTENT_DELALLOC and then let the caching kthreads clean up those | ||
246 | * extents when they are done. Also we run this from btrfs_finish_extent_commit | ||
247 | * in case there were some pinned extents that were missed because we had | ||
248 | * already cached that block group. | ||
249 | */ | ||
250 | static void btrfs_discard_pinned_extents(struct btrfs_fs_info *fs_info, | ||
251 | struct btrfs_block_group_cache *cache) | ||
190 | { | 252 | { |
191 | u64 bytenr; | 253 | u64 start, end, last; |
192 | u64 *logical; | 254 | int ret; |
193 | int stripe_len; | ||
194 | int i, nr, ret; | ||
195 | 255 | ||
196 | for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { | 256 | if (!cache) |
197 | bytenr = btrfs_sb_offset(i); | 257 | last = 0; |
198 | ret = btrfs_rmap_block(&root->fs_info->mapping_tree, | 258 | else |
199 | cache->key.objectid, bytenr, 0, | 259 | last = cache->key.objectid; |
200 | &logical, &nr, &stripe_len); | 260 | |
201 | BUG_ON(ret); | 261 | mutex_lock(&discard_mutex); |
202 | while (nr--) { | 262 | while (1) { |
203 | btrfs_remove_free_space(cache, logical[nr], | 263 | ret = find_first_extent_bit(&fs_info->pinned_extents, last, |
204 | stripe_len); | 264 | &start, &end, EXTENT_DELALLOC); |
265 | if (ret) | ||
266 | break; | ||
267 | |||
268 | if (cache && start >= cache->key.objectid + cache->key.offset) | ||
269 | break; | ||
270 | |||
271 | |||
272 | if (!cache) { | ||
273 | cache = btrfs_lookup_block_group(fs_info, start); | ||
274 | BUG_ON(!cache); | ||
275 | |||
276 | start = max(start, cache->key.objectid); | ||
277 | end = min(end, cache->key.objectid + cache->key.offset - 1); | ||
278 | |||
279 | if (block_group_cache_done(cache)) | ||
280 | btrfs_add_free_space(cache, start, | ||
281 | end - start + 1); | ||
282 | cache = NULL; | ||
283 | } else { | ||
284 | start = max(start, cache->key.objectid); | ||
285 | end = min(end, cache->key.objectid + cache->key.offset - 1); | ||
286 | btrfs_add_free_space(cache, start, end - start + 1); | ||
287 | } | ||
288 | |||
289 | clear_extent_bits(&fs_info->pinned_extents, start, end, | ||
290 | EXTENT_DELALLOC, GFP_NOFS); | ||
291 | last = end + 1; | ||
292 | |||
293 | if (need_resched()) { | ||
294 | mutex_unlock(&discard_mutex); | ||
295 | cond_resched(); | ||
296 | mutex_lock(&discard_mutex); | ||
205 | } | 297 | } |
206 | kfree(logical); | ||
207 | } | 298 | } |
208 | return 0; | 299 | mutex_unlock(&discard_mutex); |
209 | } | 300 | } |
210 | 301 | ||
211 | static int cache_block_group(struct btrfs_root *root, | 302 | static int caching_kthread(void *data) |
212 | struct btrfs_block_group_cache *block_group) | ||
213 | { | 303 | { |
304 | struct btrfs_block_group_cache *block_group = data; | ||
305 | struct btrfs_fs_info *fs_info = block_group->fs_info; | ||
306 | u64 last = 0; | ||
214 | struct btrfs_path *path; | 307 | struct btrfs_path *path; |
215 | int ret = 0; | 308 | int ret = 0; |
216 | struct btrfs_key key; | 309 | struct btrfs_key key; |
217 | struct extent_buffer *leaf; | 310 | struct extent_buffer *leaf; |
218 | int slot; | 311 | int slot; |
219 | u64 last; | 312 | u64 total_found = 0; |
220 | |||
221 | if (!block_group) | ||
222 | return 0; | ||
223 | 313 | ||
224 | root = root->fs_info->extent_root; | 314 | BUG_ON(!fs_info); |
225 | |||
226 | if (block_group->cached) | ||
227 | return 0; | ||
228 | 315 | ||
229 | path = btrfs_alloc_path(); | 316 | path = btrfs_alloc_path(); |
230 | if (!path) | 317 | if (!path) |
231 | return -ENOMEM; | 318 | return -ENOMEM; |
232 | 319 | ||
233 | path->reada = 2; | 320 | atomic_inc(&fs_info->async_caching_threads); |
321 | atomic_inc(&block_group->space_info->caching_threads); | ||
322 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | ||
323 | again: | ||
324 | /* need to make sure the commit_root doesn't disappear */ | ||
325 | down_read(&fs_info->extent_root->commit_root_sem); | ||
326 | |||
234 | /* | 327 | /* |
235 | * we get into deadlocks with paths held by callers of this function. | 328 | * We don't want to deadlock with somebody trying to allocate a new |
236 | * since the alloc_mutex is protecting things right now, just | 329 | * extent for the extent root while also trying to search the extent |
237 | * skip the locking here | 330 | * root to add free space. So we skip locking and search the commit |
331 | * root, since its read-only | ||
238 | */ | 332 | */ |
239 | path->skip_locking = 1; | 333 | path->skip_locking = 1; |
240 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | 334 | path->search_commit_root = 1; |
335 | path->reada = 2; | ||
336 | |||
241 | key.objectid = last; | 337 | key.objectid = last; |
242 | key.offset = 0; | 338 | key.offset = 0; |
243 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); | 339 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); |
244 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 340 | ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); |
245 | if (ret < 0) | 341 | if (ret < 0) |
246 | goto err; | 342 | goto err; |
247 | 343 | ||
248 | while (1) { | 344 | while (1) { |
345 | smp_mb(); | ||
346 | if (block_group->fs_info->closing) | ||
347 | break; | ||
348 | |||
249 | leaf = path->nodes[0]; | 349 | leaf = path->nodes[0]; |
250 | slot = path->slots[0]; | 350 | slot = path->slots[0]; |
251 | if (slot >= btrfs_header_nritems(leaf)) { | 351 | if (slot >= btrfs_header_nritems(leaf)) { |
252 | ret = btrfs_next_leaf(root, path); | 352 | ret = btrfs_next_leaf(fs_info->extent_root, path); |
253 | if (ret < 0) | 353 | if (ret < 0) |
254 | goto err; | 354 | goto err; |
255 | if (ret == 0) | 355 | else if (ret) |
256 | continue; | ||
257 | else | ||
258 | break; | 356 | break; |
357 | |||
358 | if (need_resched()) { | ||
359 | btrfs_release_path(fs_info->extent_root, path); | ||
360 | up_read(&fs_info->extent_root->commit_root_sem); | ||
361 | cond_resched(); | ||
362 | goto again; | ||
363 | } | ||
364 | |||
365 | continue; | ||
259 | } | 366 | } |
260 | btrfs_item_key_to_cpu(leaf, &key, slot); | 367 | btrfs_item_key_to_cpu(leaf, &key, slot); |
261 | if (key.objectid < block_group->key.objectid) | 368 | if (key.objectid < block_group->key.objectid) |
@@ -266,24 +373,63 @@ static int cache_block_group(struct btrfs_root *root, | |||
266 | break; | 373 | break; |
267 | 374 | ||
268 | if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { | 375 | if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { |
269 | add_new_free_space(block_group, root->fs_info, last, | 376 | total_found += add_new_free_space(block_group, |
270 | key.objectid); | 377 | fs_info, last, |
271 | 378 | key.objectid); | |
272 | last = key.objectid + key.offset; | 379 | last = key.objectid + key.offset; |
273 | } | 380 | } |
381 | |||
382 | if (total_found > (1024 * 1024 * 2)) { | ||
383 | total_found = 0; | ||
384 | wake_up(&block_group->caching_q); | ||
385 | } | ||
274 | next: | 386 | next: |
275 | path->slots[0]++; | 387 | path->slots[0]++; |
276 | } | 388 | } |
389 | ret = 0; | ||
277 | 390 | ||
278 | add_new_free_space(block_group, root->fs_info, last, | 391 | total_found += add_new_free_space(block_group, fs_info, last, |
279 | block_group->key.objectid + | 392 | block_group->key.objectid + |
280 | block_group->key.offset); | 393 | block_group->key.offset); |
394 | |||
395 | spin_lock(&block_group->lock); | ||
396 | block_group->cached = BTRFS_CACHE_FINISHED; | ||
397 | spin_unlock(&block_group->lock); | ||
281 | 398 | ||
282 | block_group->cached = 1; | ||
283 | remove_sb_from_cache(root, block_group); | ||
284 | ret = 0; | ||
285 | err: | 399 | err: |
286 | btrfs_free_path(path); | 400 | btrfs_free_path(path); |
401 | up_read(&fs_info->extent_root->commit_root_sem); | ||
402 | atomic_dec(&fs_info->async_caching_threads); | ||
403 | atomic_dec(&block_group->space_info->caching_threads); | ||
404 | wake_up(&block_group->caching_q); | ||
405 | |||
406 | if (!ret) | ||
407 | btrfs_discard_pinned_extents(fs_info, block_group); | ||
408 | |||
409 | return 0; | ||
410 | } | ||
411 | |||
412 | static int cache_block_group(struct btrfs_block_group_cache *cache) | ||
413 | { | ||
414 | struct task_struct *tsk; | ||
415 | int ret = 0; | ||
416 | |||
417 | spin_lock(&cache->lock); | ||
418 | if (cache->cached != BTRFS_CACHE_NO) { | ||
419 | spin_unlock(&cache->lock); | ||
420 | return ret; | ||
421 | } | ||
422 | cache->cached = BTRFS_CACHE_STARTED; | ||
423 | spin_unlock(&cache->lock); | ||
424 | |||
425 | tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", | ||
426 | cache->key.objectid); | ||
427 | if (IS_ERR(tsk)) { | ||
428 | ret = PTR_ERR(tsk); | ||
429 | printk(KERN_ERR "error running thread %d\n", ret); | ||
430 | BUG(); | ||
431 | } | ||
432 | |||
287 | return ret; | 433 | return ret; |
288 | } | 434 | } |
289 | 435 | ||
@@ -1721,7 +1867,7 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, | |||
1721 | BUG_ON(ret); | 1867 | BUG_ON(ret); |
1722 | } | 1868 | } |
1723 | btrfs_update_pinned_extents(root, node->bytenr, | 1869 | btrfs_update_pinned_extents(root, node->bytenr, |
1724 | node->num_bytes, 1); | 1870 | node->num_bytes, 1, 0); |
1725 | update_reserved_extents(root, node->bytenr, | 1871 | update_reserved_extents(root, node->bytenr, |
1726 | node->num_bytes, 0); | 1872 | node->num_bytes, 0); |
1727 | } | 1873 | } |
@@ -2496,6 +2642,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
2496 | found->force_alloc = 0; | 2642 | found->force_alloc = 0; |
2497 | *space_info = found; | 2643 | *space_info = found; |
2498 | list_add_rcu(&found->list, &info->space_info); | 2644 | list_add_rcu(&found->list, &info->space_info); |
2645 | atomic_set(&found->caching_threads, 0); | ||
2499 | return 0; | 2646 | return 0; |
2500 | } | 2647 | } |
2501 | 2648 | ||
@@ -2953,7 +3100,7 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) | |||
2953 | } | 3100 | } |
2954 | 3101 | ||
2955 | int btrfs_update_pinned_extents(struct btrfs_root *root, | 3102 | int btrfs_update_pinned_extents(struct btrfs_root *root, |
2956 | u64 bytenr, u64 num, int pin) | 3103 | u64 bytenr, u64 num, int pin, int mark_free) |
2957 | { | 3104 | { |
2958 | u64 len; | 3105 | u64 len; |
2959 | struct btrfs_block_group_cache *cache; | 3106 | struct btrfs_block_group_cache *cache; |
@@ -2988,7 +3135,7 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, | |||
2988 | spin_unlock(&cache->lock); | 3135 | spin_unlock(&cache->lock); |
2989 | spin_unlock(&cache->space_info->lock); | 3136 | spin_unlock(&cache->space_info->lock); |
2990 | fs_info->total_pinned -= len; | 3137 | fs_info->total_pinned -= len; |
2991 | if (cache->cached) | 3138 | if (block_group_cache_done(cache) && mark_free) |
2992 | btrfs_add_free_space(cache, bytenr, len); | 3139 | btrfs_add_free_space(cache, bytenr, len); |
2993 | } | 3140 | } |
2994 | btrfs_put_block_group(cache); | 3141 | btrfs_put_block_group(cache); |
@@ -3034,14 +3181,27 @@ int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) | |||
3034 | u64 last = 0; | 3181 | u64 last = 0; |
3035 | u64 start; | 3182 | u64 start; |
3036 | u64 end; | 3183 | u64 end; |
3184 | bool caching_kthreads = false; | ||
3037 | struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; | 3185 | struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; |
3038 | int ret; | 3186 | int ret; |
3039 | 3187 | ||
3188 | if (atomic_read(&root->fs_info->async_caching_threads)) | ||
3189 | caching_kthreads = true; | ||
3190 | |||
3040 | while (1) { | 3191 | while (1) { |
3041 | ret = find_first_extent_bit(pinned_extents, last, | 3192 | ret = find_first_extent_bit(pinned_extents, last, |
3042 | &start, &end, EXTENT_DIRTY); | 3193 | &start, &end, EXTENT_DIRTY); |
3043 | if (ret) | 3194 | if (ret) |
3044 | break; | 3195 | break; |
3196 | |||
3197 | /* | ||
3198 | * we need to make sure that the pinned extents don't go away | ||
3199 | * while we are caching block groups | ||
3200 | */ | ||
3201 | if (unlikely(caching_kthreads)) | ||
3202 | set_extent_delalloc(pinned_extents, start, end, | ||
3203 | GFP_NOFS); | ||
3204 | |||
3045 | set_extent_dirty(copy, start, end, GFP_NOFS); | 3205 | set_extent_dirty(copy, start, end, GFP_NOFS); |
3046 | last = end + 1; | 3206 | last = end + 1; |
3047 | } | 3207 | } |
@@ -3055,6 +3215,12 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
3055 | u64 start; | 3215 | u64 start; |
3056 | u64 end; | 3216 | u64 end; |
3057 | int ret; | 3217 | int ret; |
3218 | int mark_free = 1; | ||
3219 | |||
3220 | ret = find_first_extent_bit(&root->fs_info->pinned_extents, 0, | ||
3221 | &start, &end, EXTENT_DELALLOC); | ||
3222 | if (!ret) | ||
3223 | mark_free = 0; | ||
3058 | 3224 | ||
3059 | while (1) { | 3225 | while (1) { |
3060 | ret = find_first_extent_bit(unpin, 0, &start, &end, | 3226 | ret = find_first_extent_bit(unpin, 0, &start, &end, |
@@ -3065,11 +3231,16 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
3065 | ret = btrfs_discard_extent(root, start, end + 1 - start); | 3231 | ret = btrfs_discard_extent(root, start, end + 1 - start); |
3066 | 3232 | ||
3067 | /* unlocks the pinned mutex */ | 3233 | /* unlocks the pinned mutex */ |
3068 | btrfs_update_pinned_extents(root, start, end + 1 - start, 0); | 3234 | btrfs_update_pinned_extents(root, start, end + 1 - start, 0, |
3235 | mark_free); | ||
3069 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | 3236 | clear_extent_dirty(unpin, start, end, GFP_NOFS); |
3070 | 3237 | ||
3071 | cond_resched(); | 3238 | cond_resched(); |
3072 | } | 3239 | } |
3240 | |||
3241 | if (unlikely(!mark_free)) | ||
3242 | btrfs_discard_pinned_extents(root->fs_info, NULL); | ||
3243 | |||
3073 | return ret; | 3244 | return ret; |
3074 | } | 3245 | } |
3075 | 3246 | ||
@@ -3110,7 +3281,7 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, | |||
3110 | pinit: | 3281 | pinit: |
3111 | btrfs_set_path_blocking(path); | 3282 | btrfs_set_path_blocking(path); |
3112 | /* unlocks the pinned mutex */ | 3283 | /* unlocks the pinned mutex */ |
3113 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | 3284 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1, 0); |
3114 | 3285 | ||
3115 | BUG_ON(err < 0); | 3286 | BUG_ON(err < 0); |
3116 | return 0; | 3287 | return 0; |
@@ -3421,7 +3592,7 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
3421 | if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { | 3592 | if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { |
3422 | WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); | 3593 | WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); |
3423 | /* unlocks the pinned mutex */ | 3594 | /* unlocks the pinned mutex */ |
3424 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | 3595 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1, 0); |
3425 | update_reserved_extents(root, bytenr, num_bytes, 0); | 3596 | update_reserved_extents(root, bytenr, num_bytes, 0); |
3426 | ret = 0; | 3597 | ret = 0; |
3427 | } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { | 3598 | } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { |
@@ -3448,6 +3619,45 @@ static u64 stripe_align(struct btrfs_root *root, u64 val) | |||
3448 | } | 3619 | } |
3449 | 3620 | ||
3450 | /* | 3621 | /* |
3622 | * when we wait for progress in the block group caching, its because | ||
3623 | * our allocation attempt failed at least once. So, we must sleep | ||
3624 | * and let some progress happen before we try again. | ||
3625 | * | ||
3626 | * This function will sleep at least once waiting for new free space to | ||
3627 | * show up, and then it will check the block group free space numbers | ||
3628 | * for our min num_bytes. Another option is to have it go ahead | ||
3629 | * and look in the rbtree for a free extent of a given size, but this | ||
3630 | * is a good start. | ||
3631 | */ | ||
3632 | static noinline int | ||
3633 | wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, | ||
3634 | u64 num_bytes) | ||
3635 | { | ||
3636 | DEFINE_WAIT(wait); | ||
3637 | |||
3638 | prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE); | ||
3639 | |||
3640 | if (block_group_cache_done(cache)) { | ||
3641 | finish_wait(&cache->caching_q, &wait); | ||
3642 | return 0; | ||
3643 | } | ||
3644 | schedule(); | ||
3645 | finish_wait(&cache->caching_q, &wait); | ||
3646 | |||
3647 | wait_event(cache->caching_q, block_group_cache_done(cache) || | ||
3648 | (cache->free_space >= num_bytes)); | ||
3649 | return 0; | ||
3650 | } | ||
3651 | |||
3652 | enum btrfs_loop_type { | ||
3653 | LOOP_CACHED_ONLY = 0, | ||
3654 | LOOP_CACHING_NOWAIT = 1, | ||
3655 | LOOP_CACHING_WAIT = 2, | ||
3656 | LOOP_ALLOC_CHUNK = 3, | ||
3657 | LOOP_NO_EMPTY_SIZE = 4, | ||
3658 | }; | ||
3659 | |||
3660 | /* | ||
3451 | * walks the btree of allocated extents and find a hole of a given size. | 3661 | * walks the btree of allocated extents and find a hole of a given size. |
3452 | * The key ins is changed to record the hole: | 3662 | * The key ins is changed to record the hole: |
3453 | * ins->objectid == block start | 3663 | * ins->objectid == block start |
@@ -3472,6 +3682,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
3472 | struct btrfs_space_info *space_info; | 3682 | struct btrfs_space_info *space_info; |
3473 | int last_ptr_loop = 0; | 3683 | int last_ptr_loop = 0; |
3474 | int loop = 0; | 3684 | int loop = 0; |
3685 | bool found_uncached_bg = false; | ||
3475 | 3686 | ||
3476 | WARN_ON(num_bytes < root->sectorsize); | 3687 | WARN_ON(num_bytes < root->sectorsize); |
3477 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); | 3688 | btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); |
@@ -3503,15 +3714,18 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
3503 | search_start = max(search_start, first_logical_byte(root, 0)); | 3714 | search_start = max(search_start, first_logical_byte(root, 0)); |
3504 | search_start = max(search_start, hint_byte); | 3715 | search_start = max(search_start, hint_byte); |
3505 | 3716 | ||
3506 | if (!last_ptr) { | 3717 | if (!last_ptr) |
3507 | empty_cluster = 0; | 3718 | empty_cluster = 0; |
3508 | loop = 1; | ||
3509 | } | ||
3510 | 3719 | ||
3511 | if (search_start == hint_byte) { | 3720 | if (search_start == hint_byte) { |
3512 | block_group = btrfs_lookup_block_group(root->fs_info, | 3721 | block_group = btrfs_lookup_block_group(root->fs_info, |
3513 | search_start); | 3722 | search_start); |
3514 | if (block_group && block_group_bits(block_group, data)) { | 3723 | /* |
3724 | * we don't want to use the block group if it doesn't match our | ||
3725 | * allocation bits, or if its not cached. | ||
3726 | */ | ||
3727 | if (block_group && block_group_bits(block_group, data) && | ||
3728 | block_group_cache_done(block_group)) { | ||
3515 | down_read(&space_info->groups_sem); | 3729 | down_read(&space_info->groups_sem); |
3516 | if (list_empty(&block_group->list) || | 3730 | if (list_empty(&block_group->list) || |
3517 | block_group->ro) { | 3731 | block_group->ro) { |
@@ -3534,21 +3748,35 @@ search: | |||
3534 | down_read(&space_info->groups_sem); | 3748 | down_read(&space_info->groups_sem); |
3535 | list_for_each_entry(block_group, &space_info->block_groups, list) { | 3749 | list_for_each_entry(block_group, &space_info->block_groups, list) { |
3536 | u64 offset; | 3750 | u64 offset; |
3751 | int cached; | ||
3537 | 3752 | ||
3538 | atomic_inc(&block_group->count); | 3753 | atomic_inc(&block_group->count); |
3539 | search_start = block_group->key.objectid; | 3754 | search_start = block_group->key.objectid; |
3540 | 3755 | ||
3541 | have_block_group: | 3756 | have_block_group: |
3542 | if (unlikely(!block_group->cached)) { | 3757 | if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { |
3543 | mutex_lock(&block_group->cache_mutex); | 3758 | /* |
3544 | ret = cache_block_group(root, block_group); | 3759 | * we want to start caching kthreads, but not too many |
3545 | mutex_unlock(&block_group->cache_mutex); | 3760 | * right off the bat so we don't overwhelm the system, |
3546 | if (ret) { | 3761 | * so only start them if there are less than 2 and we're |
3547 | btrfs_put_block_group(block_group); | 3762 | * in the initial allocation phase. |
3548 | break; | 3763 | */ |
3764 | if (loop > LOOP_CACHING_NOWAIT || | ||
3765 | atomic_read(&space_info->caching_threads) < 2) { | ||
3766 | ret = cache_block_group(block_group); | ||
3767 | BUG_ON(ret); | ||
3549 | } | 3768 | } |
3550 | } | 3769 | } |
3551 | 3770 | ||
3771 | cached = block_group_cache_done(block_group); | ||
3772 | if (unlikely(!cached)) { | ||
3773 | found_uncached_bg = true; | ||
3774 | |||
3775 | /* if we only want cached bgs, loop */ | ||
3776 | if (loop == LOOP_CACHED_ONLY) | ||
3777 | goto loop; | ||
3778 | } | ||
3779 | |||
3552 | if (unlikely(block_group->ro)) | 3780 | if (unlikely(block_group->ro)) |
3553 | goto loop; | 3781 | goto loop; |
3554 | 3782 | ||
@@ -3627,14 +3855,21 @@ refill_cluster: | |||
3627 | spin_unlock(&last_ptr->refill_lock); | 3855 | spin_unlock(&last_ptr->refill_lock); |
3628 | goto checks; | 3856 | goto checks; |
3629 | } | 3857 | } |
3858 | } else if (!cached && loop > LOOP_CACHING_NOWAIT) { | ||
3859 | spin_unlock(&last_ptr->refill_lock); | ||
3860 | |||
3861 | wait_block_group_cache_progress(block_group, | ||
3862 | num_bytes + empty_cluster + empty_size); | ||
3863 | goto have_block_group; | ||
3630 | } | 3864 | } |
3865 | |||
3631 | /* | 3866 | /* |
3632 | * at this point we either didn't find a cluster | 3867 | * at this point we either didn't find a cluster |
3633 | * or we weren't able to allocate a block from our | 3868 | * or we weren't able to allocate a block from our |
3634 | * cluster. Free the cluster we've been trying | 3869 | * cluster. Free the cluster we've been trying |
3635 | * to use, and go to the next block group | 3870 | * to use, and go to the next block group |
3636 | */ | 3871 | */ |
3637 | if (loop < 2) { | 3872 | if (loop < LOOP_NO_EMPTY_SIZE) { |
3638 | btrfs_return_cluster_to_free_space(NULL, | 3873 | btrfs_return_cluster_to_free_space(NULL, |
3639 | last_ptr); | 3874 | last_ptr); |
3640 | spin_unlock(&last_ptr->refill_lock); | 3875 | spin_unlock(&last_ptr->refill_lock); |
@@ -3645,8 +3880,15 @@ refill_cluster: | |||
3645 | 3880 | ||
3646 | offset = btrfs_find_space_for_alloc(block_group, search_start, | 3881 | offset = btrfs_find_space_for_alloc(block_group, search_start, |
3647 | num_bytes, empty_size); | 3882 | num_bytes, empty_size); |
3648 | if (!offset) | 3883 | if (!offset && (cached || (!cached && |
3884 | loop == LOOP_CACHING_NOWAIT))) { | ||
3649 | goto loop; | 3885 | goto loop; |
3886 | } else if (!offset && (!cached && | ||
3887 | loop > LOOP_CACHING_NOWAIT)) { | ||
3888 | wait_block_group_cache_progress(block_group, | ||
3889 | num_bytes + empty_size); | ||
3890 | goto have_block_group; | ||
3891 | } | ||
3650 | checks: | 3892 | checks: |
3651 | search_start = stripe_align(root, offset); | 3893 | search_start = stripe_align(root, offset); |
3652 | /* move on to the next group */ | 3894 | /* move on to the next group */ |
@@ -3694,13 +3936,26 @@ loop: | |||
3694 | } | 3936 | } |
3695 | up_read(&space_info->groups_sem); | 3937 | up_read(&space_info->groups_sem); |
3696 | 3938 | ||
3697 | /* loop == 0, try to find a clustered alloc in every block group | 3939 | /* LOOP_CACHED_ONLY, only search fully cached block groups |
3698 | * loop == 1, try again after forcing a chunk allocation | 3940 | * LOOP_CACHING_NOWAIT, search partially cached block groups, but |
3699 | * loop == 2, set empty_size and empty_cluster to 0 and try again | 3941 | * dont wait foR them to finish caching |
3942 | * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching | ||
3943 | * LOOP_ALLOC_CHUNK, force a chunk allocation and try again | ||
3944 | * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try | ||
3945 | * again | ||
3700 | */ | 3946 | */ |
3701 | if (!ins->objectid && loop < 3 && | 3947 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && |
3702 | (empty_size || empty_cluster || allowed_chunk_alloc)) { | 3948 | (found_uncached_bg || empty_size || empty_cluster || |
3703 | if (loop >= 2) { | 3949 | allowed_chunk_alloc)) { |
3950 | if (found_uncached_bg) { | ||
3951 | found_uncached_bg = false; | ||
3952 | if (loop < LOOP_CACHING_WAIT) { | ||
3953 | loop++; | ||
3954 | goto search; | ||
3955 | } | ||
3956 | } | ||
3957 | |||
3958 | if (loop == LOOP_ALLOC_CHUNK) { | ||
3704 | empty_size = 0; | 3959 | empty_size = 0; |
3705 | empty_cluster = 0; | 3960 | empty_cluster = 0; |
3706 | } | 3961 | } |
@@ -3713,7 +3968,7 @@ loop: | |||
3713 | space_info->force_alloc = 1; | 3968 | space_info->force_alloc = 1; |
3714 | } | 3969 | } |
3715 | 3970 | ||
3716 | if (loop < 3) { | 3971 | if (loop < LOOP_NO_EMPTY_SIZE) { |
3717 | loop++; | 3972 | loop++; |
3718 | goto search; | 3973 | goto search; |
3719 | } | 3974 | } |
@@ -3809,7 +4064,7 @@ again: | |||
3809 | num_bytes, data, 1); | 4064 | num_bytes, data, 1); |
3810 | goto again; | 4065 | goto again; |
3811 | } | 4066 | } |
3812 | if (ret) { | 4067 | if (ret == -ENOSPC) { |
3813 | struct btrfs_space_info *sinfo; | 4068 | struct btrfs_space_info *sinfo; |
3814 | 4069 | ||
3815 | sinfo = __find_space_info(root->fs_info, data); | 4070 | sinfo = __find_space_info(root->fs_info, data); |
@@ -3817,7 +4072,6 @@ again: | |||
3817 | "wanted %llu\n", (unsigned long long)data, | 4072 | "wanted %llu\n", (unsigned long long)data, |
3818 | (unsigned long long)num_bytes); | 4073 | (unsigned long long)num_bytes); |
3819 | dump_space_info(sinfo, num_bytes); | 4074 | dump_space_info(sinfo, num_bytes); |
3820 | BUG(); | ||
3821 | } | 4075 | } |
3822 | 4076 | ||
3823 | return ret; | 4077 | return ret; |
@@ -3855,7 +4109,9 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, | |||
3855 | ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, | 4109 | ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, |
3856 | empty_size, hint_byte, search_end, ins, | 4110 | empty_size, hint_byte, search_end, ins, |
3857 | data); | 4111 | data); |
3858 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | 4112 | if (!ret) |
4113 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | ||
4114 | |||
3859 | return ret; | 4115 | return ret; |
3860 | } | 4116 | } |
3861 | 4117 | ||
@@ -4017,9 +4273,9 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
4017 | struct btrfs_block_group_cache *block_group; | 4273 | struct btrfs_block_group_cache *block_group; |
4018 | 4274 | ||
4019 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); | 4275 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); |
4020 | mutex_lock(&block_group->cache_mutex); | 4276 | cache_block_group(block_group); |
4021 | cache_block_group(root, block_group); | 4277 | wait_event(block_group->caching_q, |
4022 | mutex_unlock(&block_group->cache_mutex); | 4278 | block_group_cache_done(block_group)); |
4023 | 4279 | ||
4024 | ret = btrfs_remove_free_space(block_group, ins->objectid, | 4280 | ret = btrfs_remove_free_space(block_group, ins->objectid, |
4025 | ins->offset); | 4281 | ins->offset); |
@@ -4050,7 +4306,8 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans, | |||
4050 | ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, | 4306 | ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, |
4051 | empty_size, hint_byte, search_end, | 4307 | empty_size, hint_byte, search_end, |
4052 | ins, 0); | 4308 | ins, 0); |
4053 | BUG_ON(ret); | 4309 | if (ret) |
4310 | return ret; | ||
4054 | 4311 | ||
4055 | if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { | 4312 | if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { |
4056 | if (parent == 0) | 4313 | if (parent == 0) |
@@ -6966,11 +7223,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
6966 | &info->block_group_cache_tree); | 7223 | &info->block_group_cache_tree); |
6967 | spin_unlock(&info->block_group_cache_lock); | 7224 | spin_unlock(&info->block_group_cache_lock); |
6968 | 7225 | ||
6969 | btrfs_remove_free_space_cache(block_group); | ||
6970 | down_write(&block_group->space_info->groups_sem); | 7226 | down_write(&block_group->space_info->groups_sem); |
6971 | list_del(&block_group->list); | 7227 | list_del(&block_group->list); |
6972 | up_write(&block_group->space_info->groups_sem); | 7228 | up_write(&block_group->space_info->groups_sem); |
6973 | 7229 | ||
7230 | if (block_group->cached == BTRFS_CACHE_STARTED) | ||
7231 | wait_event(block_group->caching_q, | ||
7232 | block_group_cache_done(block_group)); | ||
7233 | |||
7234 | btrfs_remove_free_space_cache(block_group); | ||
7235 | |||
6974 | WARN_ON(atomic_read(&block_group->count) != 1); | 7236 | WARN_ON(atomic_read(&block_group->count) != 1); |
6975 | kfree(block_group); | 7237 | kfree(block_group); |
6976 | 7238 | ||
@@ -7036,10 +7298,10 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7036 | atomic_set(&cache->count, 1); | 7298 | atomic_set(&cache->count, 1); |
7037 | spin_lock_init(&cache->lock); | 7299 | spin_lock_init(&cache->lock); |
7038 | spin_lock_init(&cache->tree_lock); | 7300 | spin_lock_init(&cache->tree_lock); |
7039 | mutex_init(&cache->cache_mutex); | 7301 | cache->fs_info = info; |
7302 | init_waitqueue_head(&cache->caching_q); | ||
7040 | INIT_LIST_HEAD(&cache->list); | 7303 | INIT_LIST_HEAD(&cache->list); |
7041 | INIT_LIST_HEAD(&cache->cluster_list); | 7304 | INIT_LIST_HEAD(&cache->cluster_list); |
7042 | cache->sectorsize = root->sectorsize; | ||
7043 | 7305 | ||
7044 | /* | 7306 | /* |
7045 | * we only want to have 32k of ram per block group for keeping | 7307 | * we only want to have 32k of ram per block group for keeping |
@@ -7057,6 +7319,26 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7057 | key.objectid = found_key.objectid + found_key.offset; | 7319 | key.objectid = found_key.objectid + found_key.offset; |
7058 | btrfs_release_path(root, path); | 7320 | btrfs_release_path(root, path); |
7059 | cache->flags = btrfs_block_group_flags(&cache->item); | 7321 | cache->flags = btrfs_block_group_flags(&cache->item); |
7322 | cache->sectorsize = root->sectorsize; | ||
7323 | |||
7324 | remove_sb_from_cache(root, cache); | ||
7325 | |||
7326 | /* | ||
7327 | * check for two cases, either we are full, and therefore | ||
7328 | * don't need to bother with the caching work since we won't | ||
7329 | * find any space, or we are empty, and we can just add all | ||
7330 | * the space in and be done with it. This saves us _alot_ of | ||
7331 | * time, particularly in the full case. | ||
7332 | */ | ||
7333 | if (found_key.offset == btrfs_block_group_used(&cache->item)) { | ||
7334 | cache->cached = BTRFS_CACHE_FINISHED; | ||
7335 | } else if (btrfs_block_group_used(&cache->item) == 0) { | ||
7336 | cache->cached = BTRFS_CACHE_FINISHED; | ||
7337 | add_new_free_space(cache, root->fs_info, | ||
7338 | found_key.objectid, | ||
7339 | found_key.objectid + | ||
7340 | found_key.offset); | ||
7341 | } | ||
7060 | 7342 | ||
7061 | ret = update_space_info(info, cache->flags, found_key.offset, | 7343 | ret = update_space_info(info, cache->flags, found_key.offset, |
7062 | btrfs_block_group_used(&cache->item), | 7344 | btrfs_block_group_used(&cache->item), |
@@ -7112,7 +7394,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
7112 | atomic_set(&cache->count, 1); | 7394 | atomic_set(&cache->count, 1); |
7113 | spin_lock_init(&cache->lock); | 7395 | spin_lock_init(&cache->lock); |
7114 | spin_lock_init(&cache->tree_lock); | 7396 | spin_lock_init(&cache->tree_lock); |
7115 | mutex_init(&cache->cache_mutex); | 7397 | init_waitqueue_head(&cache->caching_q); |
7116 | INIT_LIST_HEAD(&cache->list); | 7398 | INIT_LIST_HEAD(&cache->list); |
7117 | INIT_LIST_HEAD(&cache->cluster_list); | 7399 | INIT_LIST_HEAD(&cache->cluster_list); |
7118 | 7400 | ||
@@ -7121,11 +7403,12 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
7121 | cache->flags = type; | 7403 | cache->flags = type; |
7122 | btrfs_set_block_group_flags(&cache->item, type); | 7404 | btrfs_set_block_group_flags(&cache->item, type); |
7123 | 7405 | ||
7124 | cache->cached = 1; | 7406 | cache->cached = BTRFS_CACHE_FINISHED; |
7125 | ret = btrfs_add_free_space(cache, chunk_offset, size); | ||
7126 | BUG_ON(ret); | ||
7127 | remove_sb_from_cache(root, cache); | 7407 | remove_sb_from_cache(root, cache); |
7128 | 7408 | ||
7409 | add_new_free_space(cache, root->fs_info, chunk_offset, | ||
7410 | chunk_offset + size); | ||
7411 | |||
7129 | ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, | 7412 | ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, |
7130 | &cache->space_info); | 7413 | &cache->space_info); |
7131 | BUG_ON(ret); | 7414 | BUG_ON(ret); |
@@ -7184,7 +7467,7 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
7184 | rb_erase(&block_group->cache_node, | 7467 | rb_erase(&block_group->cache_node, |
7185 | &root->fs_info->block_group_cache_tree); | 7468 | &root->fs_info->block_group_cache_tree); |
7186 | spin_unlock(&root->fs_info->block_group_cache_lock); | 7469 | spin_unlock(&root->fs_info->block_group_cache_lock); |
7187 | btrfs_remove_free_space_cache(block_group); | 7470 | |
7188 | down_write(&block_group->space_info->groups_sem); | 7471 | down_write(&block_group->space_info->groups_sem); |
7189 | /* | 7472 | /* |
7190 | * we must use list_del_init so people can check to see if they | 7473 | * we must use list_del_init so people can check to see if they |
@@ -7193,6 +7476,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
7193 | list_del_init(&block_group->list); | 7476 | list_del_init(&block_group->list); |
7194 | up_write(&block_group->space_info->groups_sem); | 7477 | up_write(&block_group->space_info->groups_sem); |
7195 | 7478 | ||
7479 | if (block_group->cached == BTRFS_CACHE_STARTED) | ||
7480 | wait_event(block_group->caching_q, | ||
7481 | block_group_cache_done(block_group)); | ||
7482 | |||
7483 | btrfs_remove_free_space_cache(block_group); | ||
7484 | |||
7196 | spin_lock(&block_group->space_info->lock); | 7485 | spin_lock(&block_group->space_info->lock); |
7197 | block_group->space_info->total_bytes -= block_group->key.offset; | 7486 | block_group->space_info->total_bytes -= block_group->key.offset; |
7198 | block_group->space_info->bytes_readonly -= block_group->key.offset; | 7487 | block_group->space_info->bytes_readonly -= block_group->key.offset; |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index ab8cad8b46c9..af99b78b288e 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -238,6 +238,7 @@ static void unlink_free_space(struct btrfs_block_group_cache *block_group, | |||
238 | { | 238 | { |
239 | rb_erase(&info->offset_index, &block_group->free_space_offset); | 239 | rb_erase(&info->offset_index, &block_group->free_space_offset); |
240 | block_group->free_extents--; | 240 | block_group->free_extents--; |
241 | block_group->free_space -= info->bytes; | ||
241 | } | 242 | } |
242 | 243 | ||
243 | static int link_free_space(struct btrfs_block_group_cache *block_group, | 244 | static int link_free_space(struct btrfs_block_group_cache *block_group, |
@@ -251,6 +252,7 @@ static int link_free_space(struct btrfs_block_group_cache *block_group, | |||
251 | if (ret) | 252 | if (ret) |
252 | return ret; | 253 | return ret; |
253 | 254 | ||
255 | block_group->free_space += info->bytes; | ||
254 | block_group->free_extents++; | 256 | block_group->free_extents++; |
255 | return ret; | 257 | return ret; |
256 | } | 258 | } |
@@ -285,36 +287,40 @@ static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) | |||
285 | } | 287 | } |
286 | } | 288 | } |
287 | 289 | ||
288 | static void bitmap_clear_bits(struct btrfs_free_space *info, u64 offset, u64 bytes, | 290 | static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group, |
289 | u64 sectorsize) | 291 | struct btrfs_free_space *info, u64 offset, |
292 | u64 bytes) | ||
290 | { | 293 | { |
291 | unsigned long start, end; | 294 | unsigned long start, end; |
292 | unsigned long i; | 295 | unsigned long i; |
293 | 296 | ||
294 | start = offset_to_bit(info->offset, sectorsize, offset); | 297 | start = offset_to_bit(info->offset, block_group->sectorsize, offset); |
295 | end = start + bytes_to_bits(bytes, sectorsize); | 298 | end = start + bytes_to_bits(bytes, block_group->sectorsize); |
296 | BUG_ON(end > BITS_PER_BITMAP); | 299 | BUG_ON(end > BITS_PER_BITMAP); |
297 | 300 | ||
298 | for (i = start; i < end; i++) | 301 | for (i = start; i < end; i++) |
299 | clear_bit(i, info->bitmap); | 302 | clear_bit(i, info->bitmap); |
300 | 303 | ||
301 | info->bytes -= bytes; | 304 | info->bytes -= bytes; |
305 | block_group->free_space -= bytes; | ||
302 | } | 306 | } |
303 | 307 | ||
304 | static void bitmap_set_bits(struct btrfs_free_space *info, u64 offset, u64 bytes, | 308 | static void bitmap_set_bits(struct btrfs_block_group_cache *block_group, |
305 | u64 sectorsize) | 309 | struct btrfs_free_space *info, u64 offset, |
310 | u64 bytes) | ||
306 | { | 311 | { |
307 | unsigned long start, end; | 312 | unsigned long start, end; |
308 | unsigned long i; | 313 | unsigned long i; |
309 | 314 | ||
310 | start = offset_to_bit(info->offset, sectorsize, offset); | 315 | start = offset_to_bit(info->offset, block_group->sectorsize, offset); |
311 | end = start + bytes_to_bits(bytes, sectorsize); | 316 | end = start + bytes_to_bits(bytes, block_group->sectorsize); |
312 | BUG_ON(end > BITS_PER_BITMAP); | 317 | BUG_ON(end > BITS_PER_BITMAP); |
313 | 318 | ||
314 | for (i = start; i < end; i++) | 319 | for (i = start; i < end; i++) |
315 | set_bit(i, info->bitmap); | 320 | set_bit(i, info->bitmap); |
316 | 321 | ||
317 | info->bytes += bytes; | 322 | info->bytes += bytes; |
323 | block_group->free_space += bytes; | ||
318 | } | 324 | } |
319 | 325 | ||
320 | static int search_bitmap(struct btrfs_block_group_cache *block_group, | 326 | static int search_bitmap(struct btrfs_block_group_cache *block_group, |
@@ -414,13 +420,12 @@ again: | |||
414 | (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1; | 420 | (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1; |
415 | 421 | ||
416 | if (*offset > bitmap_info->offset && *offset + *bytes > end) { | 422 | if (*offset > bitmap_info->offset && *offset + *bytes > end) { |
417 | bitmap_clear_bits(bitmap_info, *offset, | 423 | bitmap_clear_bits(block_group, bitmap_info, *offset, |
418 | end - *offset + 1, block_group->sectorsize); | 424 | end - *offset + 1); |
419 | *bytes -= end - *offset + 1; | 425 | *bytes -= end - *offset + 1; |
420 | *offset = end + 1; | 426 | *offset = end + 1; |
421 | } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { | 427 | } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { |
422 | bitmap_clear_bits(bitmap_info, *offset, | 428 | bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes); |
423 | *bytes, block_group->sectorsize); | ||
424 | *bytes = 0; | 429 | *bytes = 0; |
425 | } | 430 | } |
426 | 431 | ||
@@ -495,14 +500,13 @@ again: | |||
495 | (u64)(BITS_PER_BITMAP * block_group->sectorsize); | 500 | (u64)(BITS_PER_BITMAP * block_group->sectorsize); |
496 | 501 | ||
497 | if (offset >= bitmap_info->offset && offset + bytes > end) { | 502 | if (offset >= bitmap_info->offset && offset + bytes > end) { |
498 | bitmap_set_bits(bitmap_info, offset, end - offset, | 503 | bitmap_set_bits(block_group, bitmap_info, offset, |
499 | block_group->sectorsize); | 504 | end - offset); |
500 | bytes -= end - offset; | 505 | bytes -= end - offset; |
501 | offset = end; | 506 | offset = end; |
502 | added = 0; | 507 | added = 0; |
503 | } else if (offset >= bitmap_info->offset && offset + bytes <= end) { | 508 | } else if (offset >= bitmap_info->offset && offset + bytes <= end) { |
504 | bitmap_set_bits(bitmap_info, offset, bytes, | 509 | bitmap_set_bits(block_group, bitmap_info, offset, bytes); |
505 | block_group->sectorsize); | ||
506 | bytes = 0; | 510 | bytes = 0; |
507 | } else { | 511 | } else { |
508 | BUG(); | 512 | BUG(); |
@@ -870,8 +874,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, | |||
870 | 874 | ||
871 | ret = offset; | 875 | ret = offset; |
872 | if (entry->bitmap) { | 876 | if (entry->bitmap) { |
873 | bitmap_clear_bits(entry, offset, bytes, | 877 | bitmap_clear_bits(block_group, entry, offset, bytes); |
874 | block_group->sectorsize); | ||
875 | if (!entry->bytes) { | 878 | if (!entry->bytes) { |
876 | unlink_free_space(block_group, entry); | 879 | unlink_free_space(block_group, entry); |
877 | kfree(entry->bitmap); | 880 | kfree(entry->bitmap); |
@@ -891,6 +894,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, | |||
891 | 894 | ||
892 | out: | 895 | out: |
893 | spin_unlock(&block_group->tree_lock); | 896 | spin_unlock(&block_group->tree_lock); |
897 | |||
894 | return ret; | 898 | return ret; |
895 | } | 899 | } |
896 | 900 | ||
@@ -967,7 +971,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, | |||
967 | goto out; | 971 | goto out; |
968 | 972 | ||
969 | ret = search_start; | 973 | ret = search_start; |
970 | bitmap_clear_bits(entry, ret, bytes, block_group->sectorsize); | 974 | bitmap_clear_bits(block_group, entry, ret, bytes); |
971 | out: | 975 | out: |
972 | spin_unlock(&cluster->lock); | 976 | spin_unlock(&cluster->lock); |
973 | spin_unlock(&block_group->tree_lock); | 977 | spin_unlock(&block_group->tree_lock); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 81f7124c3051..32454d1c566f 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -40,6 +40,14 @@ static noinline void put_transaction(struct btrfs_transaction *transaction) | |||
40 | } | 40 | } |
41 | } | 41 | } |
42 | 42 | ||
43 | static noinline void switch_commit_root(struct btrfs_root *root) | ||
44 | { | ||
45 | down_write(&root->commit_root_sem); | ||
46 | free_extent_buffer(root->commit_root); | ||
47 | root->commit_root = btrfs_root_node(root); | ||
48 | up_write(&root->commit_root_sem); | ||
49 | } | ||
50 | |||
43 | /* | 51 | /* |
44 | * either allocate a new transaction or hop into the existing one | 52 | * either allocate a new transaction or hop into the existing one |
45 | */ | 53 | */ |
@@ -458,8 +466,7 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, | |||
458 | ret = btrfs_write_dirty_block_groups(trans, root); | 466 | ret = btrfs_write_dirty_block_groups(trans, root); |
459 | BUG_ON(ret); | 467 | BUG_ON(ret); |
460 | } | 468 | } |
461 | free_extent_buffer(root->commit_root); | 469 | switch_commit_root(root); |
462 | root->commit_root = btrfs_root_node(root); | ||
463 | return 0; | 470 | return 0; |
464 | } | 471 | } |
465 | 472 | ||
@@ -537,8 +544,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
537 | btrfs_update_reloc_root(trans, root); | 544 | btrfs_update_reloc_root(trans, root); |
538 | 545 | ||
539 | if (root->commit_root != root->node) { | 546 | if (root->commit_root != root->node) { |
540 | free_extent_buffer(root->commit_root); | 547 | switch_commit_root(root); |
541 | root->commit_root = btrfs_root_node(root); | ||
542 | btrfs_set_root_node(&root->root_item, | 548 | btrfs_set_root_node(&root->root_item, |
543 | root->node); | 549 | root->node); |
544 | } | 550 | } |
@@ -1002,15 +1008,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1002 | 1008 | ||
1003 | btrfs_set_root_node(&root->fs_info->tree_root->root_item, | 1009 | btrfs_set_root_node(&root->fs_info->tree_root->root_item, |
1004 | root->fs_info->tree_root->node); | 1010 | root->fs_info->tree_root->node); |
1005 | free_extent_buffer(root->fs_info->tree_root->commit_root); | 1011 | switch_commit_root(root->fs_info->tree_root); |
1006 | root->fs_info->tree_root->commit_root = | ||
1007 | btrfs_root_node(root->fs_info->tree_root); | ||
1008 | 1012 | ||
1009 | btrfs_set_root_node(&root->fs_info->chunk_root->root_item, | 1013 | btrfs_set_root_node(&root->fs_info->chunk_root->root_item, |
1010 | root->fs_info->chunk_root->node); | 1014 | root->fs_info->chunk_root->node); |
1011 | free_extent_buffer(root->fs_info->chunk_root->commit_root); | 1015 | switch_commit_root(root->fs_info->chunk_root); |
1012 | root->fs_info->chunk_root->commit_root = | ||
1013 | btrfs_root_node(root->fs_info->chunk_root); | ||
1014 | 1016 | ||
1015 | update_super_roots(root); | 1017 | update_super_roots(root); |
1016 | 1018 | ||
@@ -1050,6 +1052,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1050 | cur_trans->commit_done = 1; | 1052 | cur_trans->commit_done = 1; |
1051 | 1053 | ||
1052 | root->fs_info->last_trans_committed = cur_trans->transid; | 1054 | root->fs_info->last_trans_committed = cur_trans->transid; |
1055 | |||
1053 | wake_up(&cur_trans->commit_wait); | 1056 | wake_up(&cur_trans->commit_wait); |
1054 | 1057 | ||
1055 | put_transaction(cur_trans); | 1058 | put_transaction(cur_trans); |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c13922206d1b..195606862618 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -264,7 +264,7 @@ static int process_one_buffer(struct btrfs_root *log, | |||
264 | { | 264 | { |
265 | if (wc->pin) | 265 | if (wc->pin) |
266 | btrfs_update_pinned_extents(log->fs_info->extent_root, | 266 | btrfs_update_pinned_extents(log->fs_info->extent_root, |
267 | eb->start, eb->len, 1); | 267 | eb->start, eb->len, 1, 0); |
268 | 268 | ||
269 | if (btrfs_buffer_uptodate(eb, gen)) { | 269 | if (btrfs_buffer_uptodate(eb, gen)) { |
270 | if (wc->write) | 270 | if (wc->write) |