aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c141
1 files changed, 110 insertions, 31 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5aebddd71193..92caa8035f36 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -38,6 +38,7 @@
38#include "locking.h" 38#include "locking.h"
39#include "ref-cache.h" 39#include "ref-cache.h"
40#include "tree-log.h" 40#include "tree-log.h"
41#include "free-space-cache.h"
41 42
42static struct extent_io_ops btree_extent_io_ops; 43static struct extent_io_ops btree_extent_io_ops;
43static void end_workqueue_fn(struct btrfs_work *work); 44static void end_workqueue_fn(struct btrfs_work *work);
@@ -75,6 +76,40 @@ struct async_submit_bio {
75 struct btrfs_work work; 76 struct btrfs_work work;
76}; 77};
77 78
79/* These are used to set the lockdep class on the extent buffer locks.
80 * The class is set by the readpage_end_io_hook after the buffer has
81 * passed csum validation but before the pages are unlocked.
82 *
83 * The lockdep class is also set by btrfs_init_new_buffer on freshly
84 * allocated blocks.
85 *
86 * The class is based on the level in the tree block, which allows lockdep
87 * to know that lower nodes nest inside the locks of higher nodes.
88 *
89 * We also add a check to make sure the highest level of the tree is
90 * the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this
91 * code needs update as well.
92 */
93#ifdef CONFIG_DEBUG_LOCK_ALLOC
94# if BTRFS_MAX_LEVEL != 8
95# error
96# endif
97static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1];
98static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = {
99 /* leaf */
100 "btrfs-extent-00",
101 "btrfs-extent-01",
102 "btrfs-extent-02",
103 "btrfs-extent-03",
104 "btrfs-extent-04",
105 "btrfs-extent-05",
106 "btrfs-extent-06",
107 "btrfs-extent-07",
108 /* highest possible level */
109 "btrfs-extent-08",
110};
111#endif
112
78/* 113/*
79 * extents on the btree inode are pretty simple, there's one extent 114 * extents on the btree inode are pretty simple, there's one extent
80 * that covers the entire device 115 * that covers the entire device
@@ -347,6 +382,15 @@ static int check_tree_block_fsid(struct btrfs_root *root,
347 return ret; 382 return ret;
348} 383}
349 384
385#ifdef CONFIG_DEBUG_LOCK_ALLOC
386void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level)
387{
388 lockdep_set_class_and_name(&eb->lock,
389 &btrfs_eb_class[level],
390 btrfs_eb_name[level]);
391}
392#endif
393
350static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, 394static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
351 struct extent_state *state) 395 struct extent_state *state)
352{ 396{
@@ -392,6 +436,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
392 } 436 }
393 found_level = btrfs_header_level(eb); 437 found_level = btrfs_header_level(eb);
394 438
439 btrfs_set_buffer_lockdep_class(eb, found_level);
440
395 ret = csum_tree_block(root, eb, 1); 441 ret = csum_tree_block(root, eb, 1);
396 if (ret) 442 if (ret)
397 ret = -EIO; 443 ret = -EIO;
@@ -623,14 +669,31 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
623static int btree_writepage(struct page *page, struct writeback_control *wbc) 669static int btree_writepage(struct page *page, struct writeback_control *wbc)
624{ 670{
625 struct extent_io_tree *tree; 671 struct extent_io_tree *tree;
672 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
673 struct extent_buffer *eb;
674 int was_dirty;
675
626 tree = &BTRFS_I(page->mapping->host)->io_tree; 676 tree = &BTRFS_I(page->mapping->host)->io_tree;
677 if (!(current->flags & PF_MEMALLOC)) {
678 return extent_write_full_page(tree, page,
679 btree_get_extent, wbc);
680 }
627 681
628 if (current->flags & PF_MEMALLOC) { 682 redirty_page_for_writepage(wbc, page);
629 redirty_page_for_writepage(wbc, page); 683 eb = btrfs_find_tree_block(root, page_offset(page),
630 unlock_page(page); 684 PAGE_CACHE_SIZE);
631 return 0; 685 WARN_ON(!eb);
686
687 was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
688 if (!was_dirty) {
689 spin_lock(&root->fs_info->delalloc_lock);
690 root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE;
691 spin_unlock(&root->fs_info->delalloc_lock);
632 } 692 }
633 return extent_write_full_page(tree, page, btree_get_extent, wbc); 693 free_extent_buffer(eb);
694
695 unlock_page(page);
696 return 0;
634} 697}
635 698
636static int btree_writepages(struct address_space *mapping, 699static int btree_writepages(struct address_space *mapping,
@@ -639,15 +702,15 @@ static int btree_writepages(struct address_space *mapping,
639 struct extent_io_tree *tree; 702 struct extent_io_tree *tree;
640 tree = &BTRFS_I(mapping->host)->io_tree; 703 tree = &BTRFS_I(mapping->host)->io_tree;
641 if (wbc->sync_mode == WB_SYNC_NONE) { 704 if (wbc->sync_mode == WB_SYNC_NONE) {
705 struct btrfs_root *root = BTRFS_I(mapping->host)->root;
642 u64 num_dirty; 706 u64 num_dirty;
643 u64 start = 0;
644 unsigned long thresh = 32 * 1024 * 1024; 707 unsigned long thresh = 32 * 1024 * 1024;
645 708
646 if (wbc->for_kupdate) 709 if (wbc->for_kupdate)
647 return 0; 710 return 0;
648 711
649 num_dirty = count_range_bits(tree, &start, (u64)-1, 712 /* this is a bit racy, but that's ok */
650 thresh, EXTENT_DIRTY); 713 num_dirty = root->fs_info->dirty_metadata_bytes;
651 if (num_dirty < thresh) 714 if (num_dirty < thresh)
652 return 0; 715 return 0;
653 } 716 }
@@ -812,11 +875,19 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
812 struct inode *btree_inode = root->fs_info->btree_inode; 875 struct inode *btree_inode = root->fs_info->btree_inode;
813 if (btrfs_header_generation(buf) == 876 if (btrfs_header_generation(buf) ==
814 root->fs_info->running_transaction->transid) { 877 root->fs_info->running_transaction->transid) {
815 WARN_ON(!btrfs_tree_locked(buf)); 878 btrfs_assert_tree_locked(buf);
879
880 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
881 spin_lock(&root->fs_info->delalloc_lock);
882 if (root->fs_info->dirty_metadata_bytes >= buf->len)
883 root->fs_info->dirty_metadata_bytes -= buf->len;
884 else
885 WARN_ON(1);
886 spin_unlock(&root->fs_info->delalloc_lock);
887 }
816 888
817 /* ugh, clear_extent_buffer_dirty can be expensive */ 889 /* ugh, clear_extent_buffer_dirty needs to lock the page */
818 btrfs_set_lock_blocking(buf); 890 btrfs_set_lock_blocking(buf);
819
820 clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, 891 clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
821 buf); 892 buf);
822 } 893 }
@@ -1342,8 +1413,6 @@ static int bio_ready_for_csum(struct bio *bio)
1342 1413
1343 ret = extent_range_uptodate(io_tree, start + length, 1414 ret = extent_range_uptodate(io_tree, start + length,
1344 start + buf_len - 1); 1415 start + buf_len - 1);
1345 if (ret == 1)
1346 return ret;
1347 return ret; 1416 return ret;
1348} 1417}
1349 1418
@@ -1426,12 +1495,6 @@ static int transaction_kthread(void *arg)
1426 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); 1495 vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
1427 mutex_lock(&root->fs_info->transaction_kthread_mutex); 1496 mutex_lock(&root->fs_info->transaction_kthread_mutex);
1428 1497
1429 if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) {
1430 printk(KERN_INFO "btrfs: total reference cache "
1431 "size %llu\n",
1432 root->fs_info->total_ref_cache_size);
1433 }
1434
1435 mutex_lock(&root->fs_info->trans_mutex); 1498 mutex_lock(&root->fs_info->trans_mutex);
1436 cur = root->fs_info->running_transaction; 1499 cur = root->fs_info->running_transaction;
1437 if (!cur) { 1500 if (!cur) {
@@ -1448,6 +1511,7 @@ static int transaction_kthread(void *arg)
1448 mutex_unlock(&root->fs_info->trans_mutex); 1511 mutex_unlock(&root->fs_info->trans_mutex);
1449 trans = btrfs_start_transaction(root, 1); 1512 trans = btrfs_start_transaction(root, 1);
1450 ret = btrfs_commit_transaction(trans, root); 1513 ret = btrfs_commit_transaction(trans, root);
1514
1451sleep: 1515sleep:
1452 wake_up_process(root->fs_info->cleaner_kthread); 1516 wake_up_process(root->fs_info->cleaner_kthread);
1453 mutex_unlock(&root->fs_info->transaction_kthread_mutex); 1517 mutex_unlock(&root->fs_info->transaction_kthread_mutex);
@@ -1507,6 +1571,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1507 INIT_LIST_HEAD(&fs_info->dead_roots); 1571 INIT_LIST_HEAD(&fs_info->dead_roots);
1508 INIT_LIST_HEAD(&fs_info->hashers); 1572 INIT_LIST_HEAD(&fs_info->hashers);
1509 INIT_LIST_HEAD(&fs_info->delalloc_inodes); 1573 INIT_LIST_HEAD(&fs_info->delalloc_inodes);
1574 INIT_LIST_HEAD(&fs_info->ordered_operations);
1510 spin_lock_init(&fs_info->delalloc_lock); 1575 spin_lock_init(&fs_info->delalloc_lock);
1511 spin_lock_init(&fs_info->new_trans_lock); 1576 spin_lock_init(&fs_info->new_trans_lock);
1512 spin_lock_init(&fs_info->ref_cache_lock); 1577 spin_lock_init(&fs_info->ref_cache_lock);
@@ -1566,10 +1631,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1566 1631
1567 extent_io_tree_init(&fs_info->pinned_extents, 1632 extent_io_tree_init(&fs_info->pinned_extents,
1568 fs_info->btree_inode->i_mapping, GFP_NOFS); 1633 fs_info->btree_inode->i_mapping, GFP_NOFS);
1569 extent_io_tree_init(&fs_info->pending_del,
1570 fs_info->btree_inode->i_mapping, GFP_NOFS);
1571 extent_io_tree_init(&fs_info->extent_ins,
1572 fs_info->btree_inode->i_mapping, GFP_NOFS);
1573 fs_info->do_barriers = 1; 1634 fs_info->do_barriers = 1;
1574 1635
1575 INIT_LIST_HEAD(&fs_info->dead_reloc_roots); 1636 INIT_LIST_HEAD(&fs_info->dead_reloc_roots);
@@ -1582,15 +1643,18 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1582 insert_inode_hash(fs_info->btree_inode); 1643 insert_inode_hash(fs_info->btree_inode);
1583 1644
1584 mutex_init(&fs_info->trans_mutex); 1645 mutex_init(&fs_info->trans_mutex);
1646 mutex_init(&fs_info->ordered_operations_mutex);
1585 mutex_init(&fs_info->tree_log_mutex); 1647 mutex_init(&fs_info->tree_log_mutex);
1586 mutex_init(&fs_info->drop_mutex); 1648 mutex_init(&fs_info->drop_mutex);
1587 mutex_init(&fs_info->extent_ins_mutex);
1588 mutex_init(&fs_info->pinned_mutex);
1589 mutex_init(&fs_info->chunk_mutex); 1649 mutex_init(&fs_info->chunk_mutex);
1590 mutex_init(&fs_info->transaction_kthread_mutex); 1650 mutex_init(&fs_info->transaction_kthread_mutex);
1591 mutex_init(&fs_info->cleaner_mutex); 1651 mutex_init(&fs_info->cleaner_mutex);
1592 mutex_init(&fs_info->volume_mutex); 1652 mutex_init(&fs_info->volume_mutex);
1593 mutex_init(&fs_info->tree_reloc_mutex); 1653 mutex_init(&fs_info->tree_reloc_mutex);
1654
1655 btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
1656 btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
1657
1594 init_waitqueue_head(&fs_info->transaction_throttle); 1658 init_waitqueue_head(&fs_info->transaction_throttle);
1595 init_waitqueue_head(&fs_info->transaction_wait); 1659 init_waitqueue_head(&fs_info->transaction_wait);
1596 init_waitqueue_head(&fs_info->async_submit_wait); 1660 init_waitqueue_head(&fs_info->async_submit_wait);
@@ -1777,7 +1841,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1777 ret = find_and_setup_root(tree_root, fs_info, 1841 ret = find_and_setup_root(tree_root, fs_info,
1778 BTRFS_DEV_TREE_OBJECTID, dev_root); 1842 BTRFS_DEV_TREE_OBJECTID, dev_root);
1779 dev_root->track_dirty = 1; 1843 dev_root->track_dirty = 1;
1780
1781 if (ret) 1844 if (ret)
1782 goto fail_extent_root; 1845 goto fail_extent_root;
1783 1846
@@ -2314,10 +2377,9 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
2314 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; 2377 struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root;
2315 u64 transid = btrfs_header_generation(buf); 2378 u64 transid = btrfs_header_generation(buf);
2316 struct inode *btree_inode = root->fs_info->btree_inode; 2379 struct inode *btree_inode = root->fs_info->btree_inode;
2380 int was_dirty;
2317 2381
2318 btrfs_set_lock_blocking(buf); 2382 btrfs_assert_tree_locked(buf);
2319
2320 WARN_ON(!btrfs_tree_locked(buf));
2321 if (transid != root->fs_info->generation) { 2383 if (transid != root->fs_info->generation) {
2322 printk(KERN_CRIT "btrfs transid mismatch buffer %llu, " 2384 printk(KERN_CRIT "btrfs transid mismatch buffer %llu, "
2323 "found %llu running %llu\n", 2385 "found %llu running %llu\n",
@@ -2326,7 +2388,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
2326 (unsigned long long)root->fs_info->generation); 2388 (unsigned long long)root->fs_info->generation);
2327 WARN_ON(1); 2389 WARN_ON(1);
2328 } 2390 }
2329 set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); 2391 was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree,
2392 buf);
2393 if (!was_dirty) {
2394 spin_lock(&root->fs_info->delalloc_lock);
2395 root->fs_info->dirty_metadata_bytes += buf->len;
2396 spin_unlock(&root->fs_info->delalloc_lock);
2397 }
2330} 2398}
2331 2399
2332void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) 2400void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
@@ -2341,7 +2409,7 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr)
2341 unsigned long thresh = 32 * 1024 * 1024; 2409 unsigned long thresh = 32 * 1024 * 1024;
2342 tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; 2410 tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
2343 2411
2344 if (current_is_pdflush() || current->flags & PF_MEMALLOC) 2412 if (current->flags & PF_MEMALLOC)
2345 return; 2413 return;
2346 2414
2347 num_dirty = count_range_bits(tree, &start, (u64)-1, 2415 num_dirty = count_range_bits(tree, &start, (u64)-1,
@@ -2366,6 +2434,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
2366int btree_lock_page_hook(struct page *page) 2434int btree_lock_page_hook(struct page *page)
2367{ 2435{
2368 struct inode *inode = page->mapping->host; 2436 struct inode *inode = page->mapping->host;
2437 struct btrfs_root *root = BTRFS_I(inode)->root;
2369 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 2438 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2370 struct extent_buffer *eb; 2439 struct extent_buffer *eb;
2371 unsigned long len; 2440 unsigned long len;
@@ -2381,6 +2450,16 @@ int btree_lock_page_hook(struct page *page)
2381 2450
2382 btrfs_tree_lock(eb); 2451 btrfs_tree_lock(eb);
2383 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); 2452 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
2453
2454 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
2455 spin_lock(&root->fs_info->delalloc_lock);
2456 if (root->fs_info->dirty_metadata_bytes >= eb->len)
2457 root->fs_info->dirty_metadata_bytes -= eb->len;
2458 else
2459 WARN_ON(1);
2460 spin_unlock(&root->fs_info->delalloc_lock);
2461 }
2462
2384 btrfs_tree_unlock(eb); 2463 btrfs_tree_unlock(eb);
2385 free_extent_buffer(eb); 2464 free_extent_buffer(eb);
2386out: 2465out: