aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/ctree.h1
-rw-r--r--fs/btrfs/disk-io.c17
-rw-r--r--fs/btrfs/extent-tree.c48
-rw-r--r--fs/btrfs/free-space-cache.c302
-rw-r--r--fs/btrfs/free-space-cache.h5
-rw-r--r--fs/btrfs/inode.c60
6 files changed, 420 insertions, 13 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 46f52e1beade..2c06b37cda75 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -982,6 +982,7 @@ struct btrfs_fs_info {
982 struct btrfs_workers endio_meta_workers; 982 struct btrfs_workers endio_meta_workers;
983 struct btrfs_workers endio_meta_write_workers; 983 struct btrfs_workers endio_meta_write_workers;
984 struct btrfs_workers endio_write_workers; 984 struct btrfs_workers endio_write_workers;
985 struct btrfs_workers endio_freespace_worker;
985 struct btrfs_workers submit_workers; 986 struct btrfs_workers submit_workers;
986 /* 987 /*
987 * fixup workers take dirty pages that didn't properly go through 988 * fixup workers take dirty pages that didn't properly go through
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 45cf64fc1e3e..77e5dabfd45a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -481,9 +481,12 @@ static void end_workqueue_bio(struct bio *bio, int err)
481 end_io_wq->work.flags = 0; 481 end_io_wq->work.flags = 0;
482 482
483 if (bio->bi_rw & REQ_WRITE) { 483 if (bio->bi_rw & REQ_WRITE) {
484 if (end_io_wq->metadata) 484 if (end_io_wq->metadata == 1)
485 btrfs_queue_worker(&fs_info->endio_meta_write_workers, 485 btrfs_queue_worker(&fs_info->endio_meta_write_workers,
486 &end_io_wq->work); 486 &end_io_wq->work);
487 else if (end_io_wq->metadata == 2)
488 btrfs_queue_worker(&fs_info->endio_freespace_worker,
489 &end_io_wq->work);
487 else 490 else
488 btrfs_queue_worker(&fs_info->endio_write_workers, 491 btrfs_queue_worker(&fs_info->endio_write_workers,
489 &end_io_wq->work); 492 &end_io_wq->work);
@@ -497,6 +500,13 @@ static void end_workqueue_bio(struct bio *bio, int err)
497 } 500 }
498} 501}
499 502
503/*
504 * For the metadata arg you want
505 *
506 * 0 - if data
507 * 1 - if normal metadta
508 * 2 - if writing to the free space cache area
509 */
500int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, 510int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
501 int metadata) 511 int metadata)
502{ 512{
@@ -1774,6 +1784,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1774 btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", 1784 btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
1775 fs_info->thread_pool_size, 1785 fs_info->thread_pool_size,
1776 &fs_info->generic_worker); 1786 &fs_info->generic_worker);
1787 btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write",
1788 1, &fs_info->generic_worker);
1777 1789
1778 /* 1790 /*
1779 * endios are largely parallel and should have a very 1791 * endios are largely parallel and should have a very
@@ -1794,6 +1806,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1794 btrfs_start_workers(&fs_info->endio_meta_workers, 1); 1806 btrfs_start_workers(&fs_info->endio_meta_workers, 1);
1795 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); 1807 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
1796 btrfs_start_workers(&fs_info->endio_write_workers, 1); 1808 btrfs_start_workers(&fs_info->endio_write_workers, 1);
1809 btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
1797 1810
1798 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); 1811 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
1799 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, 1812 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -2035,6 +2048,7 @@ fail_sb_buffer:
2035 btrfs_stop_workers(&fs_info->endio_meta_workers); 2048 btrfs_stop_workers(&fs_info->endio_meta_workers);
2036 btrfs_stop_workers(&fs_info->endio_meta_write_workers); 2049 btrfs_stop_workers(&fs_info->endio_meta_write_workers);
2037 btrfs_stop_workers(&fs_info->endio_write_workers); 2050 btrfs_stop_workers(&fs_info->endio_write_workers);
2051 btrfs_stop_workers(&fs_info->endio_freespace_worker);
2038 btrfs_stop_workers(&fs_info->submit_workers); 2052 btrfs_stop_workers(&fs_info->submit_workers);
2039fail_iput: 2053fail_iput:
2040 invalidate_inode_pages2(fs_info->btree_inode->i_mapping); 2054 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
@@ -2468,6 +2482,7 @@ int close_ctree(struct btrfs_root *root)
2468 btrfs_stop_workers(&fs_info->endio_meta_workers); 2482 btrfs_stop_workers(&fs_info->endio_meta_workers);
2469 btrfs_stop_workers(&fs_info->endio_meta_write_workers); 2483 btrfs_stop_workers(&fs_info->endio_meta_write_workers);
2470 btrfs_stop_workers(&fs_info->endio_write_workers); 2484 btrfs_stop_workers(&fs_info->endio_write_workers);
2485 btrfs_stop_workers(&fs_info->endio_freespace_worker);
2471 btrfs_stop_workers(&fs_info->submit_workers); 2486 btrfs_stop_workers(&fs_info->submit_workers);
2472 2487
2473 btrfs_close_devices(fs_info->fs_devices); 2488 btrfs_close_devices(fs_info->fs_devices);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index aab40fb3faed..d5455a2bf60b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2847,6 +2847,8 @@ again:
2847 continue; 2847 continue;
2848 } 2848 }
2849 2849
2850 if (cache->disk_cache_state == BTRFS_DC_SETUP)
2851 cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
2850 cache->dirty = 0; 2852 cache->dirty = 0;
2851 last = cache->key.objectid + cache->key.offset; 2853 last = cache->key.objectid + cache->key.offset;
2852 2854
@@ -2855,6 +2857,52 @@ again:
2855 btrfs_put_block_group(cache); 2857 btrfs_put_block_group(cache);
2856 } 2858 }
2857 2859
2860 while (1) {
2861 /*
2862 * I don't think this is needed since we're just marking our
2863 * preallocated extent as written, but just in case it can't
2864 * hurt.
2865 */
2866 if (last == 0) {
2867 err = btrfs_run_delayed_refs(trans, root,
2868 (unsigned long)-1);
2869 BUG_ON(err);
2870 }
2871
2872 cache = btrfs_lookup_first_block_group(root->fs_info, last);
2873 while (cache) {
2874 /*
2875 * Really this shouldn't happen, but it could if we
2876 * couldn't write the entire preallocated extent and
2877 * splitting the extent resulted in a new block.
2878 */
2879 if (cache->dirty) {
2880 btrfs_put_block_group(cache);
2881 goto again;
2882 }
2883 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
2884 break;
2885 cache = next_block_group(root, cache);
2886 }
2887 if (!cache) {
2888 if (last == 0)
2889 break;
2890 last = 0;
2891 continue;
2892 }
2893
2894 btrfs_write_out_cache(root, trans, cache, path);
2895
2896 /*
2897 * If we didn't have an error then the cache state is still
2898 * NEED_WRITE, so we can set it to WRITTEN.
2899 */
2900 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
2901 cache->disk_cache_state = BTRFS_DC_WRITTEN;
2902 last = cache->key.objectid + cache->key.offset;
2903 btrfs_put_block_group(cache);
2904 }
2905
2858 btrfs_free_path(path); 2906 btrfs_free_path(path);
2859 return 0; 2907 return 0;
2860} 2908}
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 05efcc7061a7..7f972e59cc04 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -28,6 +28,11 @@
28#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 28#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
29#define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 29#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
30 30
31static void recalculate_thresholds(struct btrfs_block_group_cache
32 *block_group);
33static int link_free_space(struct btrfs_block_group_cache *block_group,
34 struct btrfs_free_space *info);
35
31struct inode *lookup_free_space_inode(struct btrfs_root *root, 36struct inode *lookup_free_space_inode(struct btrfs_root *root,
32 struct btrfs_block_group_cache 37 struct btrfs_block_group_cache
33 *block_group, struct btrfs_path *path) 38 *block_group, struct btrfs_path *path)
@@ -182,6 +187,303 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
182 return btrfs_update_inode(trans, root, inode); 187 return btrfs_update_inode(trans, root, inode);
183} 188}
184 189
190int btrfs_write_out_cache(struct btrfs_root *root,
191 struct btrfs_trans_handle *trans,
192 struct btrfs_block_group_cache *block_group,
193 struct btrfs_path *path)
194{
195 struct btrfs_free_space_header *header;
196 struct extent_buffer *leaf;
197 struct inode *inode;
198 struct rb_node *node;
199 struct list_head *pos, *n;
200 struct page *page;
201 struct extent_state *cached_state = NULL;
202 struct list_head bitmap_list;
203 struct btrfs_key key;
204 u64 bytes = 0;
205 u32 *crc, *checksums;
206 pgoff_t index = 0, last_index = 0;
207 unsigned long first_page_offset;
208 int num_checksums;
209 int entries = 0;
210 int bitmaps = 0;
211 int ret = 0;
212
213 root = root->fs_info->tree_root;
214
215 INIT_LIST_HEAD(&bitmap_list);
216
217 spin_lock(&block_group->lock);
218 if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
219 spin_unlock(&block_group->lock);
220 return 0;
221 }
222 spin_unlock(&block_group->lock);
223
224 inode = lookup_free_space_inode(root, block_group, path);
225 if (IS_ERR(inode))
226 return 0;
227
228 if (!i_size_read(inode)) {
229 iput(inode);
230 return 0;
231 }
232
233 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
234 filemap_write_and_wait(inode->i_mapping);
235 btrfs_wait_ordered_range(inode, inode->i_size &
236 ~(root->sectorsize - 1), (u64)-1);
237
238 /* We need a checksum per page. */
239 num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
240 crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
241 if (!crc) {
242 iput(inode);
243 return 0;
244 }
245
246 /* Since the first page has all of our checksums and our generation we
247 * need to calculate the offset into the page that we can start writing
248 * our entries.
249 */
250 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
251
252 node = rb_first(&block_group->free_space_offset);
253 if (!node)
254 goto out_free;
255
256 /*
257 * Lock all pages first so we can lock the extent safely.
258 *
259 * NOTE: Because we hold the ref the entire time we're going to write to
260 * the page find_get_page should never fail, so we don't do a check
261 * after find_get_page at this point. Just putting this here so people
262 * know and don't freak out.
263 */
264 while (index <= last_index) {
265 page = grab_cache_page(inode->i_mapping, index);
266 if (!page) {
267 pgoff_t i = 0;
268
269 while (i < index) {
270 page = find_get_page(inode->i_mapping, i);
271 unlock_page(page);
272 page_cache_release(page);
273 page_cache_release(page);
274 i++;
275 }
276 goto out_free;
277 }
278 index++;
279 }
280
281 index = 0;
282 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
283 0, &cached_state, GFP_NOFS);
284
285 /* Write out the extent entries */
286 do {
287 struct btrfs_free_space_entry *entry;
288 void *addr;
289 unsigned long offset = 0;
290 unsigned long start_offset = 0;
291
292 if (index == 0) {
293 start_offset = first_page_offset;
294 offset = start_offset;
295 }
296
297 page = find_get_page(inode->i_mapping, index);
298
299 addr = kmap(page);
300 entry = addr + start_offset;
301
302 memset(addr, 0, PAGE_CACHE_SIZE);
303 while (1) {
304 struct btrfs_free_space *e;
305
306 e = rb_entry(node, struct btrfs_free_space, offset_index);
307 entries++;
308
309 entry->offset = cpu_to_le64(e->offset);
310 entry->bytes = cpu_to_le64(e->bytes);
311 if (e->bitmap) {
312 entry->type = BTRFS_FREE_SPACE_BITMAP;
313 list_add_tail(&e->list, &bitmap_list);
314 bitmaps++;
315 } else {
316 entry->type = BTRFS_FREE_SPACE_EXTENT;
317 }
318 node = rb_next(node);
319 if (!node)
320 break;
321 offset += sizeof(struct btrfs_free_space_entry);
322 if (offset + sizeof(struct btrfs_free_space_entry) >=
323 PAGE_CACHE_SIZE)
324 break;
325 entry++;
326 }
327 *crc = ~(u32)0;
328 *crc = btrfs_csum_data(root, addr + start_offset, *crc,
329 PAGE_CACHE_SIZE - start_offset);
330 kunmap(page);
331
332 btrfs_csum_final(*crc, (char *)crc);
333 crc++;
334
335 bytes += PAGE_CACHE_SIZE;
336
337 ClearPageChecked(page);
338 set_page_extent_mapped(page);
339 SetPageUptodate(page);
340 set_page_dirty(page);
341
342 /*
343 * We need to release our reference we got for grab_cache_page,
344 * except for the first page which will hold our checksums, we
345 * do that below.
346 */
347 if (index != 0) {
348 unlock_page(page);
349 page_cache_release(page);
350 }
351
352 page_cache_release(page);
353
354 index++;
355 } while (node);
356
357 /* Write out the bitmaps */
358 list_for_each_safe(pos, n, &bitmap_list) {
359 void *addr;
360 struct btrfs_free_space *entry =
361 list_entry(pos, struct btrfs_free_space, list);
362
363 page = find_get_page(inode->i_mapping, index);
364
365 addr = kmap(page);
366 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
367 *crc = ~(u32)0;
368 *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE);
369 kunmap(page);
370 btrfs_csum_final(*crc, (char *)crc);
371 crc++;
372 bytes += PAGE_CACHE_SIZE;
373
374 ClearPageChecked(page);
375 set_page_extent_mapped(page);
376 SetPageUptodate(page);
377 set_page_dirty(page);
378 unlock_page(page);
379 page_cache_release(page);
380 page_cache_release(page);
381 list_del_init(&entry->list);
382 index++;
383 }
384
385 /* Zero out the rest of the pages just to make sure */
386 while (index <= last_index) {
387 void *addr;
388
389 page = find_get_page(inode->i_mapping, index);
390
391 addr = kmap(page);
392 memset(addr, 0, PAGE_CACHE_SIZE);
393 kunmap(page);
394 ClearPageChecked(page);
395 set_page_extent_mapped(page);
396 SetPageUptodate(page);
397 set_page_dirty(page);
398 unlock_page(page);
399 page_cache_release(page);
400 page_cache_release(page);
401 bytes += PAGE_CACHE_SIZE;
402 index++;
403 }
404
405 btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
406
407 /* Write the checksums and trans id to the first page */
408 {
409 void *addr;
410 u64 *gen;
411
412 page = find_get_page(inode->i_mapping, 0);
413
414 addr = kmap(page);
415 memcpy(addr, checksums, sizeof(u32) * num_checksums);
416 gen = addr + (sizeof(u32) * num_checksums);
417 *gen = trans->transid;
418 kunmap(page);
419 ClearPageChecked(page);
420 set_page_extent_mapped(page);
421 SetPageUptodate(page);
422 set_page_dirty(page);
423 unlock_page(page);
424 page_cache_release(page);
425 page_cache_release(page);
426 }
427 BTRFS_I(inode)->generation = trans->transid;
428
429 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
430 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
431
432 filemap_write_and_wait(inode->i_mapping);
433
434 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
435 key.offset = block_group->key.objectid;
436 key.type = 0;
437
438 ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
439 if (ret < 0) {
440 ret = 0;
441 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
442 EXTENT_DIRTY | EXTENT_DELALLOC |
443 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
444 goto out_free;
445 }
446 leaf = path->nodes[0];
447 if (ret > 0) {
448 struct btrfs_key found_key;
449 BUG_ON(!path->slots[0]);
450 path->slots[0]--;
451 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
452 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
453 found_key.offset != block_group->key.objectid) {
454 ret = 0;
455 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
456 EXTENT_DIRTY | EXTENT_DELALLOC |
457 EXTENT_DO_ACCOUNTING, 0, 0, NULL,
458 GFP_NOFS);
459 btrfs_release_path(root, path);
460 goto out_free;
461 }
462 }
463 header = btrfs_item_ptr(leaf, path->slots[0],
464 struct btrfs_free_space_header);
465 btrfs_set_free_space_entries(leaf, header, entries);
466 btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
467 btrfs_set_free_space_generation(leaf, header, trans->transid);
468 btrfs_mark_buffer_dirty(leaf);
469 btrfs_release_path(root, path);
470
471 ret = 1;
472
473out_free:
474 if (ret == 0) {
475 invalidate_inode_pages2_range(inode->i_mapping, 0, index);
476 spin_lock(&block_group->lock);
477 block_group->disk_cache_state = BTRFS_DC_ERROR;
478 spin_unlock(&block_group->lock);
479 BTRFS_I(inode)->generation = 0;
480 }
481 kfree(checksums);
482 btrfs_update_inode(trans, root, inode);
483 iput(inode);
484 return ret;
485}
486
185static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize, 487static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
186 u64 offset) 488 u64 offset)
187{ 489{
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 45be29e5f01e..189f740bd3c0 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -34,10 +34,15 @@ int create_free_space_inode(struct btrfs_root *root,
34 struct btrfs_trans_handle *trans, 34 struct btrfs_trans_handle *trans,
35 struct btrfs_block_group_cache *block_group, 35 struct btrfs_block_group_cache *block_group,
36 struct btrfs_path *path); 36 struct btrfs_path *path);
37
37int btrfs_truncate_free_space_cache(struct btrfs_root *root, 38int btrfs_truncate_free_space_cache(struct btrfs_root *root,
38 struct btrfs_trans_handle *trans, 39 struct btrfs_trans_handle *trans,
39 struct btrfs_path *path, 40 struct btrfs_path *path,
40 struct inode *inode); 41 struct inode *inode);
42int btrfs_write_out_cache(struct btrfs_root *root,
43 struct btrfs_trans_handle *trans,
44 struct btrfs_block_group_cache *block_group,
45 struct btrfs_path *path);
41int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, 46int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
42 u64 bytenr, u64 size); 47 u64 bytenr, u64 size);
43int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 48int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1af1ea88e8a8..f2fb974ed8f0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -764,6 +764,7 @@ static noinline int cow_file_range(struct inode *inode,
764 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 764 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
765 int ret = 0; 765 int ret = 0;
766 766
767 BUG_ON(root == root->fs_info->tree_root);
767 trans = btrfs_join_transaction(root, 1); 768 trans = btrfs_join_transaction(root, 1);
768 BUG_ON(!trans); 769 BUG_ON(!trans);
769 btrfs_set_trans_block_group(trans, inode); 770 btrfs_set_trans_block_group(trans, inode);
@@ -1035,10 +1036,16 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1035 int type; 1036 int type;
1036 int nocow; 1037 int nocow;
1037 int check_prev = 1; 1038 int check_prev = 1;
1039 bool nolock = false;
1038 1040
1039 path = btrfs_alloc_path(); 1041 path = btrfs_alloc_path();
1040 BUG_ON(!path); 1042 BUG_ON(!path);
1041 trans = btrfs_join_transaction(root, 1); 1043 if (root == root->fs_info->tree_root) {
1044 nolock = true;
1045 trans = btrfs_join_transaction_nolock(root, 1);
1046 } else {
1047 trans = btrfs_join_transaction(root, 1);
1048 }
1042 BUG_ON(!trans); 1049 BUG_ON(!trans);
1043 1050
1044 cow_start = (u64)-1; 1051 cow_start = (u64)-1;
@@ -1211,8 +1218,13 @@ out_check:
1211 BUG_ON(ret); 1218 BUG_ON(ret);
1212 } 1219 }
1213 1220
1214 ret = btrfs_end_transaction(trans, root); 1221 if (nolock) {
1215 BUG_ON(ret); 1222 ret = btrfs_end_transaction_nolock(trans, root);
1223 BUG_ON(ret);
1224 } else {
1225 ret = btrfs_end_transaction(trans, root);
1226 BUG_ON(ret);
1227 }
1216 btrfs_free_path(path); 1228 btrfs_free_path(path);
1217 return 0; 1229 return 0;
1218} 1230}
@@ -1289,6 +1301,8 @@ static int btrfs_set_bit_hook(struct inode *inode,
1289 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { 1301 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1290 struct btrfs_root *root = BTRFS_I(inode)->root; 1302 struct btrfs_root *root = BTRFS_I(inode)->root;
1291 u64 len = state->end + 1 - state->start; 1303 u64 len = state->end + 1 - state->start;
1304 int do_list = (root->root_key.objectid !=
1305 BTRFS_ROOT_TREE_OBJECTID);
1292 1306
1293 if (*bits & EXTENT_FIRST_DELALLOC) 1307 if (*bits & EXTENT_FIRST_DELALLOC)
1294 *bits &= ~EXTENT_FIRST_DELALLOC; 1308 *bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1298,7 +1312,7 @@ static int btrfs_set_bit_hook(struct inode *inode,
1298 spin_lock(&root->fs_info->delalloc_lock); 1312 spin_lock(&root->fs_info->delalloc_lock);
1299 BTRFS_I(inode)->delalloc_bytes += len; 1313 BTRFS_I(inode)->delalloc_bytes += len;
1300 root->fs_info->delalloc_bytes += len; 1314 root->fs_info->delalloc_bytes += len;
1301 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1315 if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1302 list_add_tail(&BTRFS_I(inode)->delalloc_inodes, 1316 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
1303 &root->fs_info->delalloc_inodes); 1317 &root->fs_info->delalloc_inodes);
1304 } 1318 }
@@ -1321,6 +1335,8 @@ static int btrfs_clear_bit_hook(struct inode *inode,
1321 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { 1335 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1322 struct btrfs_root *root = BTRFS_I(inode)->root; 1336 struct btrfs_root *root = BTRFS_I(inode)->root;
1323 u64 len = state->end + 1 - state->start; 1337 u64 len = state->end + 1 - state->start;
1338 int do_list = (root->root_key.objectid !=
1339 BTRFS_ROOT_TREE_OBJECTID);
1324 1340
1325 if (*bits & EXTENT_FIRST_DELALLOC) 1341 if (*bits & EXTENT_FIRST_DELALLOC)
1326 *bits &= ~EXTENT_FIRST_DELALLOC; 1342 *bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1330,14 +1346,15 @@ static int btrfs_clear_bit_hook(struct inode *inode,
1330 if (*bits & EXTENT_DO_ACCOUNTING) 1346 if (*bits & EXTENT_DO_ACCOUNTING)
1331 btrfs_delalloc_release_metadata(inode, len); 1347 btrfs_delalloc_release_metadata(inode, len);
1332 1348
1333 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) 1349 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
1350 && do_list)
1334 btrfs_free_reserved_data_space(inode, len); 1351 btrfs_free_reserved_data_space(inode, len);
1335 1352
1336 spin_lock(&root->fs_info->delalloc_lock); 1353 spin_lock(&root->fs_info->delalloc_lock);
1337 root->fs_info->delalloc_bytes -= len; 1354 root->fs_info->delalloc_bytes -= len;
1338 BTRFS_I(inode)->delalloc_bytes -= len; 1355 BTRFS_I(inode)->delalloc_bytes -= len;
1339 1356
1340 if (BTRFS_I(inode)->delalloc_bytes == 0 && 1357 if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
1341 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1358 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1342 list_del_init(&BTRFS_I(inode)->delalloc_inodes); 1359 list_del_init(&BTRFS_I(inode)->delalloc_inodes);
1343 } 1360 }
@@ -1426,7 +1443,10 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1426 1443
1427 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 1444 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
1428 1445
1429 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 1446 if (root == root->fs_info->tree_root)
1447 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
1448 else
1449 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
1430 BUG_ON(ret); 1450 BUG_ON(ret);
1431 1451
1432 if (!(rw & REQ_WRITE)) { 1452 if (!(rw & REQ_WRITE)) {
@@ -1662,6 +1682,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1662 struct extent_state *cached_state = NULL; 1682 struct extent_state *cached_state = NULL;
1663 int compressed = 0; 1683 int compressed = 0;
1664 int ret; 1684 int ret;
1685 bool nolock = false;
1665 1686
1666 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, 1687 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
1667 end - start + 1); 1688 end - start + 1);
@@ -1669,11 +1690,17 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1669 return 0; 1690 return 0;
1670 BUG_ON(!ordered_extent); 1691 BUG_ON(!ordered_extent);
1671 1692
1693 nolock = (root == root->fs_info->tree_root);
1694
1672 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { 1695 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1673 BUG_ON(!list_empty(&ordered_extent->list)); 1696 BUG_ON(!list_empty(&ordered_extent->list));
1674 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1697 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1675 if (!ret) { 1698 if (!ret) {
1676 trans = btrfs_join_transaction(root, 1); 1699 if (nolock)
1700 trans = btrfs_join_transaction_nolock(root, 1);
1701 else
1702 trans = btrfs_join_transaction(root, 1);
1703 BUG_ON(!trans);
1677 btrfs_set_trans_block_group(trans, inode); 1704 btrfs_set_trans_block_group(trans, inode);
1678 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1705 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1679 ret = btrfs_update_inode(trans, root, inode); 1706 ret = btrfs_update_inode(trans, root, inode);
@@ -1686,7 +1713,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1686 ordered_extent->file_offset + ordered_extent->len - 1, 1713 ordered_extent->file_offset + ordered_extent->len - 1,
1687 0, &cached_state, GFP_NOFS); 1714 0, &cached_state, GFP_NOFS);
1688 1715
1689 trans = btrfs_join_transaction(root, 1); 1716 if (nolock)
1717 trans = btrfs_join_transaction_nolock(root, 1);
1718 else
1719 trans = btrfs_join_transaction(root, 1);
1690 btrfs_set_trans_block_group(trans, inode); 1720 btrfs_set_trans_block_group(trans, inode);
1691 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1721 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1692 1722
@@ -1725,9 +1755,15 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1725 ret = btrfs_update_inode(trans, root, inode); 1755 ret = btrfs_update_inode(trans, root, inode);
1726 BUG_ON(ret); 1756 BUG_ON(ret);
1727out: 1757out:
1728 btrfs_delalloc_release_metadata(inode, ordered_extent->len); 1758 if (nolock) {
1729 if (trans) 1759 if (trans)
1730 btrfs_end_transaction(trans, root); 1760 btrfs_end_transaction_nolock(trans, root);
1761 } else {
1762 btrfs_delalloc_release_metadata(inode, ordered_extent->len);
1763 if (trans)
1764 btrfs_end_transaction(trans, root);
1765 }
1766
1731 /* once for us */ 1767 /* once for us */
1732 btrfs_put_ordered_extent(ordered_extent); 1768 btrfs_put_ordered_extent(ordered_extent);
1733 /* once for the tree */ 1769 /* once for the tree */