aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorJosef Bacik <josef@redhat.com>2010-07-02 12:14:14 -0400
committerChris Mason <chris.mason@oracle.com>2010-10-29 09:26:29 -0400
commit0cb59c9953171e9adf6da8142a5c85ceb77bb60d (patch)
treef72af47fa18815491814290a1b4907082bd9316d /fs/btrfs
parent0af3d00bad38d3bb9912a60928ad0669f17bdb76 (diff)
Btrfs: write out free space cache
This is a simple bit, just dump the free space cache out to our preallocated inode when we're writing out dirty block groups. There are a bunch of changes in inode.c in order to account for special cases. Mostly when we're doing the writeout we're holding trans_mutex, so we need to use the nolock transacation functions. Also we can't do asynchronous completions since the async thread could be blocked on already completed IO waiting for the transaction lock. This has been tested with xfstests and btrfs filesystem balance, as well as my ENOSPC tests. Thanks, Signed-off-by: Josef Bacik <josef@redhat.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/disk-io.c17
-rw-r--r--fs/btrfs/extent-tree.c48
-rw-r--r--fs/btrfs/free-space-cache.c302
-rw-r--r--fs/btrfs/free-space-cache.h5
-rw-r--r--fs/btrfs/inode.c60
6 files changed, 420 insertions, 13 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 46f52e1beade..2c06b37cda75 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -982,6 +982,7 @@ struct btrfs_fs_info {
982 struct btrfs_workers endio_meta_workers; 982 struct btrfs_workers endio_meta_workers;
983 struct btrfs_workers endio_meta_write_workers; 983 struct btrfs_workers endio_meta_write_workers;
984 struct btrfs_workers endio_write_workers; 984 struct btrfs_workers endio_write_workers;
985 struct btrfs_workers endio_freespace_worker;
985 struct btrfs_workers submit_workers; 986 struct btrfs_workers submit_workers;
986 /* 987 /*
987 * fixup workers take dirty pages that didn't properly go through 988 * fixup workers take dirty pages that didn't properly go through
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 45cf64fc1e3e..77e5dabfd45a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -481,9 +481,12 @@ static void end_workqueue_bio(struct bio *bio, int err)
481 end_io_wq->work.flags = 0; 481 end_io_wq->work.flags = 0;
482 482
483 if (bio->bi_rw & REQ_WRITE) { 483 if (bio->bi_rw & REQ_WRITE) {
484 if (end_io_wq->metadata) 484 if (end_io_wq->metadata == 1)
485 btrfs_queue_worker(&fs_info->endio_meta_write_workers, 485 btrfs_queue_worker(&fs_info->endio_meta_write_workers,
486 &end_io_wq->work); 486 &end_io_wq->work);
487 else if (end_io_wq->metadata == 2)
488 btrfs_queue_worker(&fs_info->endio_freespace_worker,
489 &end_io_wq->work);
487 else 490 else
488 btrfs_queue_worker(&fs_info->endio_write_workers, 491 btrfs_queue_worker(&fs_info->endio_write_workers,
489 &end_io_wq->work); 492 &end_io_wq->work);
@@ -497,6 +500,13 @@ static void end_workqueue_bio(struct bio *bio, int err)
497 } 500 }
498} 501}
499 502
503/*
504 * For the metadata arg you want
505 *
506 * 0 - if data
507 * 1 - if normal metadta
508 * 2 - if writing to the free space cache area
509 */
500int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, 510int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
501 int metadata) 511 int metadata)
502{ 512{
@@ -1774,6 +1784,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1774 btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", 1784 btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
1775 fs_info->thread_pool_size, 1785 fs_info->thread_pool_size,
1776 &fs_info->generic_worker); 1786 &fs_info->generic_worker);
1787 btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write",
1788 1, &fs_info->generic_worker);
1777 1789
1778 /* 1790 /*
1779 * endios are largely parallel and should have a very 1791 * endios are largely parallel and should have a very
@@ -1794,6 +1806,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1794 btrfs_start_workers(&fs_info->endio_meta_workers, 1); 1806 btrfs_start_workers(&fs_info->endio_meta_workers, 1);
1795 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); 1807 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
1796 btrfs_start_workers(&fs_info->endio_write_workers, 1); 1808 btrfs_start_workers(&fs_info->endio_write_workers, 1);
1809 btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
1797 1810
1798 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); 1811 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
1799 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, 1812 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -2035,6 +2048,7 @@ fail_sb_buffer:
2035 btrfs_stop_workers(&fs_info->endio_meta_workers); 2048 btrfs_stop_workers(&fs_info->endio_meta_workers);
2036 btrfs_stop_workers(&fs_info->endio_meta_write_workers); 2049 btrfs_stop_workers(&fs_info->endio_meta_write_workers);
2037 btrfs_stop_workers(&fs_info->endio_write_workers); 2050 btrfs_stop_workers(&fs_info->endio_write_workers);
2051 btrfs_stop_workers(&fs_info->endio_freespace_worker);
2038 btrfs_stop_workers(&fs_info->submit_workers); 2052 btrfs_stop_workers(&fs_info->submit_workers);
2039fail_iput: 2053fail_iput:
2040 invalidate_inode_pages2(fs_info->btree_inode->i_mapping); 2054 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
@@ -2468,6 +2482,7 @@ int close_ctree(struct btrfs_root *root)
2468 btrfs_stop_workers(&fs_info->endio_meta_workers); 2482 btrfs_stop_workers(&fs_info->endio_meta_workers);
2469 btrfs_stop_workers(&fs_info->endio_meta_write_workers); 2483 btrfs_stop_workers(&fs_info->endio_meta_write_workers);
2470 btrfs_stop_workers(&fs_info->endio_write_workers); 2484 btrfs_stop_workers(&fs_info->endio_write_workers);
2485 btrfs_stop_workers(&fs_info->endio_freespace_worker);
2471 btrfs_stop_workers(&fs_info->submit_workers); 2486 btrfs_stop_workers(&fs_info->submit_workers);
2472 2487
2473 btrfs_close_devices(fs_info->fs_devices); 2488 btrfs_close_devices(fs_info->fs_devices);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index aab40fb3faed..d5455a2bf60b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2847,6 +2847,8 @@ again:
2847 continue; 2847 continue;
2848 } 2848 }
2849 2849
2850 if (cache->disk_cache_state == BTRFS_DC_SETUP)
2851 cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
2850 cache->dirty = 0; 2852 cache->dirty = 0;
2851 last = cache->key.objectid + cache->key.offset; 2853 last = cache->key.objectid + cache->key.offset;
2852 2854
@@ -2855,6 +2857,52 @@ again:
2855 btrfs_put_block_group(cache); 2857 btrfs_put_block_group(cache);
2856 } 2858 }
2857 2859
2860 while (1) {
2861 /*
2862 * I don't think this is needed since we're just marking our
2863 * preallocated extent as written, but just in case it can't
2864 * hurt.
2865 */
2866 if (last == 0) {
2867 err = btrfs_run_delayed_refs(trans, root,
2868 (unsigned long)-1);
2869 BUG_ON(err);
2870 }
2871
2872 cache = btrfs_lookup_first_block_group(root->fs_info, last);
2873 while (cache) {
2874 /*
2875 * Really this shouldn't happen, but it could if we
2876 * couldn't write the entire preallocated extent and
2877 * splitting the extent resulted in a new block.
2878 */
2879 if (cache->dirty) {
2880 btrfs_put_block_group(cache);
2881 goto again;
2882 }
2883 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
2884 break;
2885 cache = next_block_group(root, cache);
2886 }
2887 if (!cache) {
2888 if (last == 0)
2889 break;
2890 last = 0;
2891 continue;
2892 }
2893
2894 btrfs_write_out_cache(root, trans, cache, path);
2895
2896 /*
2897 * If we didn't have an error then the cache state is still
2898 * NEED_WRITE, so we can set it to WRITTEN.
2899 */
2900 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
2901 cache->disk_cache_state = BTRFS_DC_WRITTEN;
2902 last = cache->key.objectid + cache->key.offset;
2903 btrfs_put_block_group(cache);
2904 }
2905
2858 btrfs_free_path(path); 2906 btrfs_free_path(path);
2859 return 0; 2907 return 0;
2860} 2908}
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 05efcc7061a7..7f972e59cc04 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -28,6 +28,11 @@
28#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 28#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
29#define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 29#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
30 30
31static void recalculate_thresholds(struct btrfs_block_group_cache
32 *block_group);
33static int link_free_space(struct btrfs_block_group_cache *block_group,
34 struct btrfs_free_space *info);
35
31struct inode *lookup_free_space_inode(struct btrfs_root *root, 36struct inode *lookup_free_space_inode(struct btrfs_root *root,
32 struct btrfs_block_group_cache 37 struct btrfs_block_group_cache
33 *block_group, struct btrfs_path *path) 38 *block_group, struct btrfs_path *path)
@@ -182,6 +187,303 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
182 return btrfs_update_inode(trans, root, inode); 187 return btrfs_update_inode(trans, root, inode);
183} 188}
184 189
190int btrfs_write_out_cache(struct btrfs_root *root,
191 struct btrfs_trans_handle *trans,
192 struct btrfs_block_group_cache *block_group,
193 struct btrfs_path *path)
194{
195 struct btrfs_free_space_header *header;
196 struct extent_buffer *leaf;
197 struct inode *inode;
198 struct rb_node *node;
199 struct list_head *pos, *n;
200 struct page *page;
201 struct extent_state *cached_state = NULL;
202 struct list_head bitmap_list;
203 struct btrfs_key key;
204 u64 bytes = 0;
205 u32 *crc, *checksums;
206 pgoff_t index = 0, last_index = 0;
207 unsigned long first_page_offset;
208 int num_checksums;
209 int entries = 0;
210 int bitmaps = 0;
211 int ret = 0;
212
213 root = root->fs_info->tree_root;
214
215 INIT_LIST_HEAD(&bitmap_list);
216
217 spin_lock(&block_group->lock);
218 if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
219 spin_unlock(&block_group->lock);
220 return 0;
221 }
222 spin_unlock(&block_group->lock);
223
224 inode = lookup_free_space_inode(root, block_group, path);
225 if (IS_ERR(inode))
226 return 0;
227
228 if (!i_size_read(inode)) {
229 iput(inode);
230 return 0;
231 }
232
233 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
234 filemap_write_and_wait(inode->i_mapping);
235 btrfs_wait_ordered_range(inode, inode->i_size &
236 ~(root->sectorsize - 1), (u64)-1);
237
238 /* We need a checksum per page. */
239 num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
240 crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
241 if (!crc) {
242 iput(inode);
243 return 0;
244 }
245
246 /* Since the first page has all of our checksums and our generation we
247 * need to calculate the offset into the page that we can start writing
248 * our entries.
249 */
250 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
251
252 node = rb_first(&block_group->free_space_offset);
253 if (!node)
254 goto out_free;
255
256 /*
257 * Lock all pages first so we can lock the extent safely.
258 *
259 * NOTE: Because we hold the ref the entire time we're going to write to
260 * the page find_get_page should never fail, so we don't do a check
261 * after find_get_page at this point. Just putting this here so people
262 * know and don't freak out.
263 */
264 while (index <= last_index) {
265 page = grab_cache_page(inode->i_mapping, index);
266 if (!page) {
267 pgoff_t i = 0;
268
269 while (i < index) {
270 page = find_get_page(inode->i_mapping, i);
271 unlock_page(page);
272 page_cache_release(page);
273 page_cache_release(page);
274 i++;
275 }
276 goto out_free;
277 }
278 index++;
279 }
280
281 index = 0;
282 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
283 0, &cached_state, GFP_NOFS);
284
285 /* Write out the extent entries */
286 do {
287 struct btrfs_free_space_entry *entry;
288 void *addr;
289 unsigned long offset = 0;
290 unsigned long start_offset = 0;
291
292 if (index == 0) {
293 start_offset = first_page_offset;
294 offset = start_offset;
295 }
296
297 page = find_get_page(inode->i_mapping, index);
298
299 addr = kmap(page);
300 entry = addr + start_offset;
301
302 memset(addr, 0, PAGE_CACHE_SIZE);
303 while (1) {
304 struct btrfs_free_space *e;
305
306 e = rb_entry(node, struct btrfs_free_space, offset_index);
307 entries++;
308
309 entry->offset = cpu_to_le64(e->offset);
310 entry->bytes = cpu_to_le64(e->bytes);
311 if (e->bitmap) {
312 entry->type = BTRFS_FREE_SPACE_BITMAP;
313 list_add_tail(&e->list, &bitmap_list);
314 bitmaps++;
315 } else {
316 entry->type = BTRFS_FREE_SPACE_EXTENT;
317 }
318 node = rb_next(node);
319 if (!node)
320 break;
321 offset += sizeof(struct btrfs_free_space_entry);
322 if (offset + sizeof(struct btrfs_free_space_entry) >=
323 PAGE_CACHE_SIZE)
324 break;
325 entry++;
326 }
327 *crc = ~(u32)0;
328 *crc = btrfs_csum_data(root, addr + start_offset, *crc,
329 PAGE_CACHE_SIZE - start_offset);
330 kunmap(page);
331
332 btrfs_csum_final(*crc, (char *)crc);
333 crc++;
334
335 bytes += PAGE_CACHE_SIZE;
336
337 ClearPageChecked(page);
338 set_page_extent_mapped(page);
339 SetPageUptodate(page);
340 set_page_dirty(page);
341
342 /*
343 * We need to release our reference we got for grab_cache_page,
344 * except for the first page which will hold our checksums, we
345 * do that below.
346 */
347 if (index != 0) {
348 unlock_page(page);
349 page_cache_release(page);
350 }
351
352 page_cache_release(page);
353
354 index++;
355 } while (node);
356
357 /* Write out the bitmaps */
358 list_for_each_safe(pos, n, &bitmap_list) {
359 void *addr;
360 struct btrfs_free_space *entry =
361 list_entry(pos, struct btrfs_free_space, list);
362
363 page = find_get_page(inode->i_mapping, index);
364
365 addr = kmap(page);
366 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
367 *crc = ~(u32)0;
368 *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE);
369 kunmap(page);
370 btrfs_csum_final(*crc, (char *)crc);
371 crc++;
372 bytes += PAGE_CACHE_SIZE;
373
374 ClearPageChecked(page);
375 set_page_extent_mapped(page);
376 SetPageUptodate(page);
377 set_page_dirty(page);
378 unlock_page(page);
379 page_cache_release(page);
380 page_cache_release(page);
381 list_del_init(&entry->list);
382 index++;
383 }
384
385 /* Zero out the rest of the pages just to make sure */
386 while (index <= last_index) {
387 void *addr;
388
389 page = find_get_page(inode->i_mapping, index);
390
391 addr = kmap(page);
392 memset(addr, 0, PAGE_CACHE_SIZE);
393 kunmap(page);
394 ClearPageChecked(page);
395 set_page_extent_mapped(page);
396 SetPageUptodate(page);
397 set_page_dirty(page);
398 unlock_page(page);
399 page_cache_release(page);
400 page_cache_release(page);
401 bytes += PAGE_CACHE_SIZE;
402 index++;
403 }
404
405 btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
406
407 /* Write the checksums and trans id to the first page */
408 {
409 void *addr;
410 u64 *gen;
411
412 page = find_get_page(inode->i_mapping, 0);
413
414 addr = kmap(page);
415 memcpy(addr, checksums, sizeof(u32) * num_checksums);
416 gen = addr + (sizeof(u32) * num_checksums);
417 *gen = trans->transid;
418 kunmap(page);
419 ClearPageChecked(page);
420 set_page_extent_mapped(page);
421 SetPageUptodate(page);
422 set_page_dirty(page);
423 unlock_page(page);
424 page_cache_release(page);
425 page_cache_release(page);
426 }
427 BTRFS_I(inode)->generation = trans->transid;
428
429 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
430 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
431
432 filemap_write_and_wait(inode->i_mapping);
433
434 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
435 key.offset = block_group->key.objectid;
436 key.type = 0;
437
438 ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
439 if (ret < 0) {
440 ret = 0;
441 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
442 EXTENT_DIRTY | EXTENT_DELALLOC |
443 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
444 goto out_free;
445 }
446 leaf = path->nodes[0];
447 if (ret > 0) {
448 struct btrfs_key found_key;
449 BUG_ON(!path->slots[0]);
450 path->slots[0]--;
451 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
452 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
453 found_key.offset != block_group->key.objectid) {
454 ret = 0;
455 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
456 EXTENT_DIRTY | EXTENT_DELALLOC |
457 EXTENT_DO_ACCOUNTING, 0, 0, NULL,
458 GFP_NOFS);
459 btrfs_release_path(root, path);
460 goto out_free;
461 }
462 }
463 header = btrfs_item_ptr(leaf, path->slots[0],
464 struct btrfs_free_space_header);
465 btrfs_set_free_space_entries(leaf, header, entries);
466 btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
467 btrfs_set_free_space_generation(leaf, header, trans->transid);
468 btrfs_mark_buffer_dirty(leaf);
469 btrfs_release_path(root, path);
470
471 ret = 1;
472
473out_free:
474 if (ret == 0) {
475 invalidate_inode_pages2_range(inode->i_mapping, 0, index);
476 spin_lock(&block_group->lock);
477 block_group->disk_cache_state = BTRFS_DC_ERROR;
478 spin_unlock(&block_group->lock);
479 BTRFS_I(inode)->generation = 0;
480 }
481 kfree(checksums);
482 btrfs_update_inode(trans, root, inode);
483 iput(inode);
484 return ret;
485}
486
185static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize, 487static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
186 u64 offset) 488 u64 offset)
187{ 489{
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 45be29e5f01e..189f740bd3c0 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -34,10 +34,15 @@ int create_free_space_inode(struct btrfs_root *root,
34 struct btrfs_trans_handle *trans, 34 struct btrfs_trans_handle *trans,
35 struct btrfs_block_group_cache *block_group, 35 struct btrfs_block_group_cache *block_group,
36 struct btrfs_path *path); 36 struct btrfs_path *path);
37
37int btrfs_truncate_free_space_cache(struct btrfs_root *root, 38int btrfs_truncate_free_space_cache(struct btrfs_root *root,
38 struct btrfs_trans_handle *trans, 39 struct btrfs_trans_handle *trans,
39 struct btrfs_path *path, 40 struct btrfs_path *path,
40 struct inode *inode); 41 struct inode *inode);
42int btrfs_write_out_cache(struct btrfs_root *root,
43 struct btrfs_trans_handle *trans,
44 struct btrfs_block_group_cache *block_group,
45 struct btrfs_path *path);
41int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, 46int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
42 u64 bytenr, u64 size); 47 u64 bytenr, u64 size);
43int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 48int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1af1ea88e8a8..f2fb974ed8f0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -764,6 +764,7 @@ static noinline int cow_file_range(struct inode *inode,
764 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 764 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
765 int ret = 0; 765 int ret = 0;
766 766
767 BUG_ON(root == root->fs_info->tree_root);
767 trans = btrfs_join_transaction(root, 1); 768 trans = btrfs_join_transaction(root, 1);
768 BUG_ON(!trans); 769 BUG_ON(!trans);
769 btrfs_set_trans_block_group(trans, inode); 770 btrfs_set_trans_block_group(trans, inode);
@@ -1035,10 +1036,16 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1035 int type; 1036 int type;
1036 int nocow; 1037 int nocow;
1037 int check_prev = 1; 1038 int check_prev = 1;
1039 bool nolock = false;
1038 1040
1039 path = btrfs_alloc_path(); 1041 path = btrfs_alloc_path();
1040 BUG_ON(!path); 1042 BUG_ON(!path);
1041 trans = btrfs_join_transaction(root, 1); 1043 if (root == root->fs_info->tree_root) {
1044 nolock = true;
1045 trans = btrfs_join_transaction_nolock(root, 1);
1046 } else {
1047 trans = btrfs_join_transaction(root, 1);
1048 }
1042 BUG_ON(!trans); 1049 BUG_ON(!trans);
1043 1050
1044 cow_start = (u64)-1; 1051 cow_start = (u64)-1;
@@ -1211,8 +1218,13 @@ out_check:
1211 BUG_ON(ret); 1218 BUG_ON(ret);
1212 } 1219 }
1213 1220
1214 ret = btrfs_end_transaction(trans, root); 1221 if (nolock) {
1215 BUG_ON(ret); 1222 ret = btrfs_end_transaction_nolock(trans, root);
1223 BUG_ON(ret);
1224 } else {
1225 ret = btrfs_end_transaction(trans, root);
1226 BUG_ON(ret);
1227 }
1216 btrfs_free_path(path); 1228 btrfs_free_path(path);
1217 return 0; 1229 return 0;
1218} 1230}
@@ -1289,6 +1301,8 @@ static int btrfs_set_bit_hook(struct inode *inode,
1289 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { 1301 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1290 struct btrfs_root *root = BTRFS_I(inode)->root; 1302 struct btrfs_root *root = BTRFS_I(inode)->root;
1291 u64 len = state->end + 1 - state->start; 1303 u64 len = state->end + 1 - state->start;
1304 int do_list = (root->root_key.objectid !=
1305 BTRFS_ROOT_TREE_OBJECTID);
1292 1306
1293 if (*bits & EXTENT_FIRST_DELALLOC) 1307 if (*bits & EXTENT_FIRST_DELALLOC)
1294 *bits &= ~EXTENT_FIRST_DELALLOC; 1308 *bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1298,7 +1312,7 @@ static int btrfs_set_bit_hook(struct inode *inode,
1298 spin_lock(&root->fs_info->delalloc_lock); 1312 spin_lock(&root->fs_info->delalloc_lock);
1299 BTRFS_I(inode)->delalloc_bytes += len; 1313 BTRFS_I(inode)->delalloc_bytes += len;
1300 root->fs_info->delalloc_bytes += len; 1314 root->fs_info->delalloc_bytes += len;
1301 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1315 if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1302 list_add_tail(&BTRFS_I(inode)->delalloc_inodes, 1316 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
1303 &root->fs_info->delalloc_inodes); 1317 &root->fs_info->delalloc_inodes);
1304 } 1318 }
@@ -1321,6 +1335,8 @@ static int btrfs_clear_bit_hook(struct inode *inode,
1321 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { 1335 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1322 struct btrfs_root *root = BTRFS_I(inode)->root; 1336 struct btrfs_root *root = BTRFS_I(inode)->root;
1323 u64 len = state->end + 1 - state->start; 1337 u64 len = state->end + 1 - state->start;
1338 int do_list = (root->root_key.objectid !=
1339 BTRFS_ROOT_TREE_OBJECTID);
1324 1340
1325 if (*bits & EXTENT_FIRST_DELALLOC) 1341 if (*bits & EXTENT_FIRST_DELALLOC)
1326 *bits &= ~EXTENT_FIRST_DELALLOC; 1342 *bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1330,14 +1346,15 @@ static int btrfs_clear_bit_hook(struct inode *inode,
1330 if (*bits & EXTENT_DO_ACCOUNTING) 1346 if (*bits & EXTENT_DO_ACCOUNTING)
1331 btrfs_delalloc_release_metadata(inode, len); 1347 btrfs_delalloc_release_metadata(inode, len);
1332 1348
1333 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) 1349 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
1350 && do_list)
1334 btrfs_free_reserved_data_space(inode, len); 1351 btrfs_free_reserved_data_space(inode, len);
1335 1352
1336 spin_lock(&root->fs_info->delalloc_lock); 1353 spin_lock(&root->fs_info->delalloc_lock);
1337 root->fs_info->delalloc_bytes -= len; 1354 root->fs_info->delalloc_bytes -= len;
1338 BTRFS_I(inode)->delalloc_bytes -= len; 1355 BTRFS_I(inode)->delalloc_bytes -= len;
1339 1356
1340 if (BTRFS_I(inode)->delalloc_bytes == 0 && 1357 if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
1341 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1358 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1342 list_del_init(&BTRFS_I(inode)->delalloc_inodes); 1359 list_del_init(&BTRFS_I(inode)->delalloc_inodes);
1343 } 1360 }
@@ -1426,7 +1443,10 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1426 1443
1427 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 1444 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
1428 1445
1429 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 1446 if (root == root->fs_info->tree_root)
1447 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
1448 else
1449 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
1430 BUG_ON(ret); 1450 BUG_ON(ret);
1431 1451
1432 if (!(rw & REQ_WRITE)) { 1452 if (!(rw & REQ_WRITE)) {
@@ -1662,6 +1682,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1662 struct extent_state *cached_state = NULL; 1682 struct extent_state *cached_state = NULL;
1663 int compressed = 0; 1683 int compressed = 0;
1664 int ret; 1684 int ret;
1685 bool nolock = false;
1665 1686
1666 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, 1687 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
1667 end - start + 1); 1688 end - start + 1);
@@ -1669,11 +1690,17 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1669 return 0; 1690 return 0;
1670 BUG_ON(!ordered_extent); 1691 BUG_ON(!ordered_extent);
1671 1692
1693 nolock = (root == root->fs_info->tree_root);
1694
1672 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { 1695 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1673 BUG_ON(!list_empty(&ordered_extent->list)); 1696 BUG_ON(!list_empty(&ordered_extent->list));
1674 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1697 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1675 if (!ret) { 1698 if (!ret) {
1676 trans = btrfs_join_transaction(root, 1); 1699 if (nolock)
1700 trans = btrfs_join_transaction_nolock(root, 1);
1701 else
1702 trans = btrfs_join_transaction(root, 1);
1703 BUG_ON(!trans);
1677 btrfs_set_trans_block_group(trans, inode); 1704 btrfs_set_trans_block_group(trans, inode);
1678 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1705 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1679 ret = btrfs_update_inode(trans, root, inode); 1706 ret = btrfs_update_inode(trans, root, inode);
@@ -1686,7 +1713,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1686 ordered_extent->file_offset + ordered_extent->len - 1, 1713 ordered_extent->file_offset + ordered_extent->len - 1,
1687 0, &cached_state, GFP_NOFS); 1714 0, &cached_state, GFP_NOFS);
1688 1715
1689 trans = btrfs_join_transaction(root, 1); 1716 if (nolock)
1717 trans = btrfs_join_transaction_nolock(root, 1);
1718 else
1719 trans = btrfs_join_transaction(root, 1);
1690 btrfs_set_trans_block_group(trans, inode); 1720 btrfs_set_trans_block_group(trans, inode);
1691 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1721 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1692 1722
@@ -1725,9 +1755,15 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1725 ret = btrfs_update_inode(trans, root, inode); 1755 ret = btrfs_update_inode(trans, root, inode);
1726 BUG_ON(ret); 1756 BUG_ON(ret);
1727out: 1757out:
1728 btrfs_delalloc_release_metadata(inode, ordered_extent->len); 1758 if (nolock) {
1729 if (trans) 1759 if (trans)
1730 btrfs_end_transaction(trans, root); 1760 btrfs_end_transaction_nolock(trans, root);
1761 } else {
1762 btrfs_delalloc_release_metadata(inode, ordered_extent->len);
1763 if (trans)
1764 btrfs_end_transaction(trans, root);
1765 }
1766
1731 /* once for us */ 1767 /* once for us */
1732 btrfs_put_ordered_extent(ordered_extent); 1768 btrfs_put_ordered_extent(ordered_extent);
1733 /* once for the tree */ 1769 /* once for the tree */