diff options
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 141 |
1 files changed, 110 insertions, 31 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5aebddd71193..92caa8035f36 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include "locking.h" | 38 | #include "locking.h" |
39 | #include "ref-cache.h" | 39 | #include "ref-cache.h" |
40 | #include "tree-log.h" | 40 | #include "tree-log.h" |
41 | #include "free-space-cache.h" | ||
41 | 42 | ||
42 | static struct extent_io_ops btree_extent_io_ops; | 43 | static struct extent_io_ops btree_extent_io_ops; |
43 | static void end_workqueue_fn(struct btrfs_work *work); | 44 | static void end_workqueue_fn(struct btrfs_work *work); |
@@ -75,6 +76,40 @@ struct async_submit_bio { | |||
75 | struct btrfs_work work; | 76 | struct btrfs_work work; |
76 | }; | 77 | }; |
77 | 78 | ||
79 | /* These are used to set the lockdep class on the extent buffer locks. | ||
80 | * The class is set by the readpage_end_io_hook after the buffer has | ||
81 | * passed csum validation but before the pages are unlocked. | ||
82 | * | ||
83 | * The lockdep class is also set by btrfs_init_new_buffer on freshly | ||
84 | * allocated blocks. | ||
85 | * | ||
86 | * The class is based on the level in the tree block, which allows lockdep | ||
87 | * to know that lower nodes nest inside the locks of higher nodes. | ||
88 | * | ||
89 | * We also add a check to make sure the highest level of the tree is | ||
90 | * the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this | ||
91 | * code needs update as well. | ||
92 | */ | ||
93 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
94 | # if BTRFS_MAX_LEVEL != 8 | ||
95 | # error | ||
96 | # endif | ||
97 | static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1]; | ||
98 | static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = { | ||
99 | /* leaf */ | ||
100 | "btrfs-extent-00", | ||
101 | "btrfs-extent-01", | ||
102 | "btrfs-extent-02", | ||
103 | "btrfs-extent-03", | ||
104 | "btrfs-extent-04", | ||
105 | "btrfs-extent-05", | ||
106 | "btrfs-extent-06", | ||
107 | "btrfs-extent-07", | ||
108 | /* highest possible level */ | ||
109 | "btrfs-extent-08", | ||
110 | }; | ||
111 | #endif | ||
112 | |||
78 | /* | 113 | /* |
79 | * extents on the btree inode are pretty simple, there's one extent | 114 | * extents on the btree inode are pretty simple, there's one extent |
80 | * that covers the entire device | 115 | * that covers the entire device |
@@ -347,6 +382,15 @@ static int check_tree_block_fsid(struct btrfs_root *root, | |||
347 | return ret; | 382 | return ret; |
348 | } | 383 | } |
349 | 384 | ||
385 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
386 | void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) | ||
387 | { | ||
388 | lockdep_set_class_and_name(&eb->lock, | ||
389 | &btrfs_eb_class[level], | ||
390 | btrfs_eb_name[level]); | ||
391 | } | ||
392 | #endif | ||
393 | |||
350 | static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | 394 | static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, |
351 | struct extent_state *state) | 395 | struct extent_state *state) |
352 | { | 396 | { |
@@ -392,6 +436,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
392 | } | 436 | } |
393 | found_level = btrfs_header_level(eb); | 437 | found_level = btrfs_header_level(eb); |
394 | 438 | ||
439 | btrfs_set_buffer_lockdep_class(eb, found_level); | ||
440 | |||
395 | ret = csum_tree_block(root, eb, 1); | 441 | ret = csum_tree_block(root, eb, 1); |
396 | if (ret) | 442 | if (ret) |
397 | ret = -EIO; | 443 | ret = -EIO; |
@@ -623,14 +669,31 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
623 | static int btree_writepage(struct page *page, struct writeback_control *wbc) | 669 | static int btree_writepage(struct page *page, struct writeback_control *wbc) |
624 | { | 670 | { |
625 | struct extent_io_tree *tree; | 671 | struct extent_io_tree *tree; |
672 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | ||
673 | struct extent_buffer *eb; | ||
674 | int was_dirty; | ||
675 | |||
626 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 676 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
677 | if (!(current->flags & PF_MEMALLOC)) { | ||
678 | return extent_write_full_page(tree, page, | ||
679 | btree_get_extent, wbc); | ||
680 | } | ||
627 | 681 | ||
628 | if (current->flags & PF_MEMALLOC) { | 682 | redirty_page_for_writepage(wbc, page); |
629 | redirty_page_for_writepage(wbc, page); | 683 | eb = btrfs_find_tree_block(root, page_offset(page), |
630 | unlock_page(page); | 684 | PAGE_CACHE_SIZE); |
631 | return 0; | 685 | WARN_ON(!eb); |
686 | |||
687 | was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); | ||
688 | if (!was_dirty) { | ||
689 | spin_lock(&root->fs_info->delalloc_lock); | ||
690 | root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE; | ||
691 | spin_unlock(&root->fs_info->delalloc_lock); | ||
632 | } | 692 | } |
633 | return extent_write_full_page(tree, page, btree_get_extent, wbc); | 693 | free_extent_buffer(eb); |
694 | |||
695 | unlock_page(page); | ||
696 | return 0; | ||
634 | } | 697 | } |
635 | 698 | ||
636 | static int btree_writepages(struct address_space *mapping, | 699 | static int btree_writepages(struct address_space *mapping, |
@@ -639,15 +702,15 @@ static int btree_writepages(struct address_space *mapping, | |||
639 | struct extent_io_tree *tree; | 702 | struct extent_io_tree *tree; |
640 | tree = &BTRFS_I(mapping->host)->io_tree; | 703 | tree = &BTRFS_I(mapping->host)->io_tree; |
641 | if (wbc->sync_mode == WB_SYNC_NONE) { | 704 | if (wbc->sync_mode == WB_SYNC_NONE) { |
705 | struct btrfs_root *root = BTRFS_I(mapping->host)->root; | ||
642 | u64 num_dirty; | 706 | u64 num_dirty; |
643 | u64 start = 0; | ||
644 | unsigned long thresh = 32 * 1024 * 1024; | 707 | unsigned long thresh = 32 * 1024 * 1024; |
645 | 708 | ||
646 | if (wbc->for_kupdate) | 709 | if (wbc->for_kupdate) |
647 | return 0; | 710 | return 0; |
648 | 711 | ||
649 | num_dirty = count_range_bits(tree, &start, (u64)-1, | 712 | /* this is a bit racy, but that's ok */ |
650 | thresh, EXTENT_DIRTY); | 713 | num_dirty = root->fs_info->dirty_metadata_bytes; |
651 | if (num_dirty < thresh) | 714 | if (num_dirty < thresh) |
652 | return 0; | 715 | return 0; |
653 | } | 716 | } |
@@ -812,11 +875,19 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
812 | struct inode *btree_inode = root->fs_info->btree_inode; | 875 | struct inode *btree_inode = root->fs_info->btree_inode; |
813 | if (btrfs_header_generation(buf) == | 876 | if (btrfs_header_generation(buf) == |
814 | root->fs_info->running_transaction->transid) { | 877 | root->fs_info->running_transaction->transid) { |
815 | WARN_ON(!btrfs_tree_locked(buf)); | 878 | btrfs_assert_tree_locked(buf); |
879 | |||
880 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) { | ||
881 | spin_lock(&root->fs_info->delalloc_lock); | ||
882 | if (root->fs_info->dirty_metadata_bytes >= buf->len) | ||
883 | root->fs_info->dirty_metadata_bytes -= buf->len; | ||
884 | else | ||
885 | WARN_ON(1); | ||
886 | spin_unlock(&root->fs_info->delalloc_lock); | ||
887 | } | ||
816 | 888 | ||
817 | /* ugh, clear_extent_buffer_dirty can be expensive */ | 889 | /* ugh, clear_extent_buffer_dirty needs to lock the page */ |
818 | btrfs_set_lock_blocking(buf); | 890 | btrfs_set_lock_blocking(buf); |
819 | |||
820 | clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, | 891 | clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, |
821 | buf); | 892 | buf); |
822 | } | 893 | } |
@@ -1342,8 +1413,6 @@ static int bio_ready_for_csum(struct bio *bio) | |||
1342 | 1413 | ||
1343 | ret = extent_range_uptodate(io_tree, start + length, | 1414 | ret = extent_range_uptodate(io_tree, start + length, |
1344 | start + buf_len - 1); | 1415 | start + buf_len - 1); |
1345 | if (ret == 1) | ||
1346 | return ret; | ||
1347 | return ret; | 1416 | return ret; |
1348 | } | 1417 | } |
1349 | 1418 | ||
@@ -1426,12 +1495,6 @@ static int transaction_kthread(void *arg) | |||
1426 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | 1495 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); |
1427 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | 1496 | mutex_lock(&root->fs_info->transaction_kthread_mutex); |
1428 | 1497 | ||
1429 | if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) { | ||
1430 | printk(KERN_INFO "btrfs: total reference cache " | ||
1431 | "size %llu\n", | ||
1432 | root->fs_info->total_ref_cache_size); | ||
1433 | } | ||
1434 | |||
1435 | mutex_lock(&root->fs_info->trans_mutex); | 1498 | mutex_lock(&root->fs_info->trans_mutex); |
1436 | cur = root->fs_info->running_transaction; | 1499 | cur = root->fs_info->running_transaction; |
1437 | if (!cur) { | 1500 | if (!cur) { |
@@ -1448,6 +1511,7 @@ static int transaction_kthread(void *arg) | |||
1448 | mutex_unlock(&root->fs_info->trans_mutex); | 1511 | mutex_unlock(&root->fs_info->trans_mutex); |
1449 | trans = btrfs_start_transaction(root, 1); | 1512 | trans = btrfs_start_transaction(root, 1); |
1450 | ret = btrfs_commit_transaction(trans, root); | 1513 | ret = btrfs_commit_transaction(trans, root); |
1514 | |||
1451 | sleep: | 1515 | sleep: |
1452 | wake_up_process(root->fs_info->cleaner_kthread); | 1516 | wake_up_process(root->fs_info->cleaner_kthread); |
1453 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | 1517 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); |
@@ -1507,6 +1571,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1507 | INIT_LIST_HEAD(&fs_info->dead_roots); | 1571 | INIT_LIST_HEAD(&fs_info->dead_roots); |
1508 | INIT_LIST_HEAD(&fs_info->hashers); | 1572 | INIT_LIST_HEAD(&fs_info->hashers); |
1509 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); | 1573 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); |
1574 | INIT_LIST_HEAD(&fs_info->ordered_operations); | ||
1510 | spin_lock_init(&fs_info->delalloc_lock); | 1575 | spin_lock_init(&fs_info->delalloc_lock); |
1511 | spin_lock_init(&fs_info->new_trans_lock); | 1576 | spin_lock_init(&fs_info->new_trans_lock); |
1512 | spin_lock_init(&fs_info->ref_cache_lock); | 1577 | spin_lock_init(&fs_info->ref_cache_lock); |
@@ -1566,10 +1631,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1566 | 1631 | ||
1567 | extent_io_tree_init(&fs_info->pinned_extents, | 1632 | extent_io_tree_init(&fs_info->pinned_extents, |
1568 | fs_info->btree_inode->i_mapping, GFP_NOFS); | 1633 | fs_info->btree_inode->i_mapping, GFP_NOFS); |
1569 | extent_io_tree_init(&fs_info->pending_del, | ||
1570 | fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
1571 | extent_io_tree_init(&fs_info->extent_ins, | ||
1572 | fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
1573 | fs_info->do_barriers = 1; | 1634 | fs_info->do_barriers = 1; |
1574 | 1635 | ||
1575 | INIT_LIST_HEAD(&fs_info->dead_reloc_roots); | 1636 | INIT_LIST_HEAD(&fs_info->dead_reloc_roots); |
@@ -1582,15 +1643,18 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1582 | insert_inode_hash(fs_info->btree_inode); | 1643 | insert_inode_hash(fs_info->btree_inode); |
1583 | 1644 | ||
1584 | mutex_init(&fs_info->trans_mutex); | 1645 | mutex_init(&fs_info->trans_mutex); |
1646 | mutex_init(&fs_info->ordered_operations_mutex); | ||
1585 | mutex_init(&fs_info->tree_log_mutex); | 1647 | mutex_init(&fs_info->tree_log_mutex); |
1586 | mutex_init(&fs_info->drop_mutex); | 1648 | mutex_init(&fs_info->drop_mutex); |
1587 | mutex_init(&fs_info->extent_ins_mutex); | ||
1588 | mutex_init(&fs_info->pinned_mutex); | ||
1589 | mutex_init(&fs_info->chunk_mutex); | 1649 | mutex_init(&fs_info->chunk_mutex); |
1590 | mutex_init(&fs_info->transaction_kthread_mutex); | 1650 | mutex_init(&fs_info->transaction_kthread_mutex); |
1591 | mutex_init(&fs_info->cleaner_mutex); | 1651 | mutex_init(&fs_info->cleaner_mutex); |
1592 | mutex_init(&fs_info->volume_mutex); | 1652 | mutex_init(&fs_info->volume_mutex); |
1593 | mutex_init(&fs_info->tree_reloc_mutex); | 1653 | mutex_init(&fs_info->tree_reloc_mutex); |
1654 | |||
1655 | btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); | ||
1656 | btrfs_init_free_cluster(&fs_info->data_alloc_cluster); | ||
1657 | |||
1594 | init_waitqueue_head(&fs_info->transaction_throttle); | 1658 | init_waitqueue_head(&fs_info->transaction_throttle); |
1595 | init_waitqueue_head(&fs_info->transaction_wait); | 1659 | init_waitqueue_head(&fs_info->transaction_wait); |
1596 | init_waitqueue_head(&fs_info->async_submit_wait); | 1660 | init_waitqueue_head(&fs_info->async_submit_wait); |
@@ -1777,7 +1841,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1777 | ret = find_and_setup_root(tree_root, fs_info, | 1841 | ret = find_and_setup_root(tree_root, fs_info, |
1778 | BTRFS_DEV_TREE_OBJECTID, dev_root); | 1842 | BTRFS_DEV_TREE_OBJECTID, dev_root); |
1779 | dev_root->track_dirty = 1; | 1843 | dev_root->track_dirty = 1; |
1780 | |||
1781 | if (ret) | 1844 | if (ret) |
1782 | goto fail_extent_root; | 1845 | goto fail_extent_root; |
1783 | 1846 | ||
@@ -2314,10 +2377,9 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
2314 | struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; | 2377 | struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; |
2315 | u64 transid = btrfs_header_generation(buf); | 2378 | u64 transid = btrfs_header_generation(buf); |
2316 | struct inode *btree_inode = root->fs_info->btree_inode; | 2379 | struct inode *btree_inode = root->fs_info->btree_inode; |
2380 | int was_dirty; | ||
2317 | 2381 | ||
2318 | btrfs_set_lock_blocking(buf); | 2382 | btrfs_assert_tree_locked(buf); |
2319 | |||
2320 | WARN_ON(!btrfs_tree_locked(buf)); | ||
2321 | if (transid != root->fs_info->generation) { | 2383 | if (transid != root->fs_info->generation) { |
2322 | printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " | 2384 | printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " |
2323 | "found %llu running %llu\n", | 2385 | "found %llu running %llu\n", |
@@ -2326,7 +2388,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
2326 | (unsigned long long)root->fs_info->generation); | 2388 | (unsigned long long)root->fs_info->generation); |
2327 | WARN_ON(1); | 2389 | WARN_ON(1); |
2328 | } | 2390 | } |
2329 | set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); | 2391 | was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, |
2392 | buf); | ||
2393 | if (!was_dirty) { | ||
2394 | spin_lock(&root->fs_info->delalloc_lock); | ||
2395 | root->fs_info->dirty_metadata_bytes += buf->len; | ||
2396 | spin_unlock(&root->fs_info->delalloc_lock); | ||
2397 | } | ||
2330 | } | 2398 | } |
2331 | 2399 | ||
2332 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | 2400 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) |
@@ -2341,7 +2409,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | |||
2341 | unsigned long thresh = 32 * 1024 * 1024; | 2409 | unsigned long thresh = 32 * 1024 * 1024; |
2342 | tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; | 2410 | tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; |
2343 | 2411 | ||
2344 | if (current_is_pdflush() || current->flags & PF_MEMALLOC) | 2412 | if (current->flags & PF_MEMALLOC) |
2345 | return; | 2413 | return; |
2346 | 2414 | ||
2347 | num_dirty = count_range_bits(tree, &start, (u64)-1, | 2415 | num_dirty = count_range_bits(tree, &start, (u64)-1, |
@@ -2366,6 +2434,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) | |||
2366 | int btree_lock_page_hook(struct page *page) | 2434 | int btree_lock_page_hook(struct page *page) |
2367 | { | 2435 | { |
2368 | struct inode *inode = page->mapping->host; | 2436 | struct inode *inode = page->mapping->host; |
2437 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
2369 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 2438 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
2370 | struct extent_buffer *eb; | 2439 | struct extent_buffer *eb; |
2371 | unsigned long len; | 2440 | unsigned long len; |
@@ -2381,6 +2450,16 @@ int btree_lock_page_hook(struct page *page) | |||
2381 | 2450 | ||
2382 | btrfs_tree_lock(eb); | 2451 | btrfs_tree_lock(eb); |
2383 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); | 2452 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); |
2453 | |||
2454 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { | ||
2455 | spin_lock(&root->fs_info->delalloc_lock); | ||
2456 | if (root->fs_info->dirty_metadata_bytes >= eb->len) | ||
2457 | root->fs_info->dirty_metadata_bytes -= eb->len; | ||
2458 | else | ||
2459 | WARN_ON(1); | ||
2460 | spin_unlock(&root->fs_info->delalloc_lock); | ||
2461 | } | ||
2462 | |||
2384 | btrfs_tree_unlock(eb); | 2463 | btrfs_tree_unlock(eb); |
2385 | free_extent_buffer(eb); | 2464 | free_extent_buffer(eb); |
2386 | out: | 2465 | out: |