aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c632
1 files changed, 533 insertions, 99 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 07b3ac662e19..632f8f3cc9db 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -256,8 +256,7 @@ void btrfs_csum_final(u32 crc, char *result)
256static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, 256static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
257 int verify) 257 int verify)
258{ 258{
259 u16 csum_size = 259 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
260 btrfs_super_csum_size(&root->fs_info->super_copy);
261 char *result = NULL; 260 char *result = NULL;
262 unsigned long len; 261 unsigned long len;
263 unsigned long cur_len; 262 unsigned long cur_len;
@@ -367,7 +366,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
367 clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); 366 clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
368 io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; 367 io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
369 while (1) { 368 while (1) {
370 ret = read_extent_buffer_pages(io_tree, eb, start, 1, 369 ret = read_extent_buffer_pages(io_tree, eb, start,
370 WAIT_COMPLETE,
371 btree_get_extent, mirror_num); 371 btree_get_extent, mirror_num);
372 if (!ret && 372 if (!ret &&
373 !verify_parent_transid(io_tree, eb, parent_transid)) 373 !verify_parent_transid(io_tree, eb, parent_transid))
@@ -608,11 +608,48 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
608 end = min_t(u64, eb->len, PAGE_CACHE_SIZE); 608 end = min_t(u64, eb->len, PAGE_CACHE_SIZE);
609 end = eb->start + end - 1; 609 end = eb->start + end - 1;
610err: 610err:
611 if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) {
612 clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags);
613 btree_readahead_hook(root, eb, eb->start, ret);
614 }
615
611 free_extent_buffer(eb); 616 free_extent_buffer(eb);
612out: 617out:
613 return ret; 618 return ret;
614} 619}
615 620
621static int btree_io_failed_hook(struct bio *failed_bio,
622 struct page *page, u64 start, u64 end,
623 int mirror_num, struct extent_state *state)
624{
625 struct extent_io_tree *tree;
626 unsigned long len;
627 struct extent_buffer *eb;
628 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
629
630 tree = &BTRFS_I(page->mapping->host)->io_tree;
631 if (page->private == EXTENT_PAGE_PRIVATE)
632 goto out;
633 if (!page->private)
634 goto out;
635
636 len = page->private >> 2;
637 WARN_ON(len == 0);
638
639 eb = alloc_extent_buffer(tree, start, len, page);
640 if (eb == NULL)
641 goto out;
642
643 if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) {
644 clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags);
645 btree_readahead_hook(root, eb, eb->start, -EIO);
646 }
647 free_extent_buffer(eb);
648
649out:
650 return -EIO; /* we fixed nothing */
651}
652
616static void end_workqueue_bio(struct bio *bio, int err) 653static void end_workqueue_bio(struct bio *bio, int err)
617{ 654{
618 struct end_io_wq *end_io_wq = bio->bi_private; 655 struct end_io_wq *end_io_wq = bio->bi_private;
@@ -908,7 +945,7 @@ static int btree_readpage(struct file *file, struct page *page)
908{ 945{
909 struct extent_io_tree *tree; 946 struct extent_io_tree *tree;
910 tree = &BTRFS_I(page->mapping->host)->io_tree; 947 tree = &BTRFS_I(page->mapping->host)->io_tree;
911 return extent_read_full_page(tree, page, btree_get_extent); 948 return extent_read_full_page(tree, page, btree_get_extent, 0);
912} 949}
913 950
914static int btree_releasepage(struct page *page, gfp_t gfp_flags) 951static int btree_releasepage(struct page *page, gfp_t gfp_flags)
@@ -974,11 +1011,43 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
974 if (!buf) 1011 if (!buf)
975 return 0; 1012 return 0;
976 read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, 1013 read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
977 buf, 0, 0, btree_get_extent, 0); 1014 buf, 0, WAIT_NONE, btree_get_extent, 0);
978 free_extent_buffer(buf); 1015 free_extent_buffer(buf);
979 return ret; 1016 return ret;
980} 1017}
981 1018
1019int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
1020 int mirror_num, struct extent_buffer **eb)
1021{
1022 struct extent_buffer *buf = NULL;
1023 struct inode *btree_inode = root->fs_info->btree_inode;
1024 struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree;
1025 int ret;
1026
1027 buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
1028 if (!buf)
1029 return 0;
1030
1031 set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags);
1032
1033 ret = read_extent_buffer_pages(io_tree, buf, 0, WAIT_PAGE_LOCK,
1034 btree_get_extent, mirror_num);
1035 if (ret) {
1036 free_extent_buffer(buf);
1037 return ret;
1038 }
1039
1040 if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) {
1041 free_extent_buffer(buf);
1042 return -EIO;
1043 } else if (extent_buffer_uptodate(io_tree, buf, NULL)) {
1044 *eb = buf;
1045 } else {
1046 free_extent_buffer(buf);
1047 }
1048 return 0;
1049}
1050
982struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, 1051struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
983 u64 bytenr, u32 blocksize) 1052 u64 bytenr, u32 blocksize)
984{ 1053{
@@ -1135,10 +1204,12 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
1135 1204
1136 generation = btrfs_root_generation(&root->root_item); 1205 generation = btrfs_root_generation(&root->root_item);
1137 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); 1206 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
1207 root->commit_root = NULL;
1138 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), 1208 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
1139 blocksize, generation); 1209 blocksize, generation);
1140 if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) { 1210 if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) {
1141 free_extent_buffer(root->node); 1211 free_extent_buffer(root->node);
1212 root->node = NULL;
1142 return -EIO; 1213 return -EIO;
1143 } 1214 }
1144 root->commit_root = btrfs_root_node(root); 1215 root->commit_root = btrfs_root_node(root);
@@ -1577,6 +1648,235 @@ sleep:
1577 return 0; 1648 return 0;
1578} 1649}
1579 1650
1651/*
1652 * this will find the highest generation in the array of
1653 * root backups. The index of the highest array is returned,
1654 * or -1 if we can't find anything.
1655 *
1656 * We check to make sure the array is valid by comparing the
1657 * generation of the latest root in the array with the generation
1658 * in the super block. If they don't match we pitch it.
1659 */
1660static int find_newest_super_backup(struct btrfs_fs_info *info, u64 newest_gen)
1661{
1662 u64 cur;
1663 int newest_index = -1;
1664 struct btrfs_root_backup *root_backup;
1665 int i;
1666
1667 for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
1668 root_backup = info->super_copy->super_roots + i;
1669 cur = btrfs_backup_tree_root_gen(root_backup);
1670 if (cur == newest_gen)
1671 newest_index = i;
1672 }
1673
1674 /* check to see if we actually wrapped around */
1675 if (newest_index == BTRFS_NUM_BACKUP_ROOTS - 1) {
1676 root_backup = info->super_copy->super_roots;
1677 cur = btrfs_backup_tree_root_gen(root_backup);
1678 if (cur == newest_gen)
1679 newest_index = 0;
1680 }
1681 return newest_index;
1682}
1683
1684
1685/*
1686 * find the oldest backup so we know where to store new entries
1687 * in the backup array. This will set the backup_root_index
1688 * field in the fs_info struct
1689 */
1690static void find_oldest_super_backup(struct btrfs_fs_info *info,
1691 u64 newest_gen)
1692{
1693 int newest_index = -1;
1694
1695 newest_index = find_newest_super_backup(info, newest_gen);
1696 /* if there was garbage in there, just move along */
1697 if (newest_index == -1) {
1698 info->backup_root_index = 0;
1699 } else {
1700 info->backup_root_index = (newest_index + 1) % BTRFS_NUM_BACKUP_ROOTS;
1701 }
1702}
1703
1704/*
1705 * copy all the root pointers into the super backup array.
1706 * this will bump the backup pointer by one when it is
1707 * done
1708 */
1709static void backup_super_roots(struct btrfs_fs_info *info)
1710{
1711 int next_backup;
1712 struct btrfs_root_backup *root_backup;
1713 int last_backup;
1714
1715 next_backup = info->backup_root_index;
1716 last_backup = (next_backup + BTRFS_NUM_BACKUP_ROOTS - 1) %
1717 BTRFS_NUM_BACKUP_ROOTS;
1718
1719 /*
1720 * just overwrite the last backup if we're at the same generation
1721 * this happens only at umount
1722 */
1723 root_backup = info->super_for_commit->super_roots + last_backup;
1724 if (btrfs_backup_tree_root_gen(root_backup) ==
1725 btrfs_header_generation(info->tree_root->node))
1726 next_backup = last_backup;
1727
1728 root_backup = info->super_for_commit->super_roots + next_backup;
1729
1730 /*
1731 * make sure all of our padding and empty slots get zero filled
1732 * regardless of which ones we use today
1733 */
1734 memset(root_backup, 0, sizeof(*root_backup));
1735
1736 info->backup_root_index = (next_backup + 1) % BTRFS_NUM_BACKUP_ROOTS;
1737
1738 btrfs_set_backup_tree_root(root_backup, info->tree_root->node->start);
1739 btrfs_set_backup_tree_root_gen(root_backup,
1740 btrfs_header_generation(info->tree_root->node));
1741
1742 btrfs_set_backup_tree_root_level(root_backup,
1743 btrfs_header_level(info->tree_root->node));
1744
1745 btrfs_set_backup_chunk_root(root_backup, info->chunk_root->node->start);
1746 btrfs_set_backup_chunk_root_gen(root_backup,
1747 btrfs_header_generation(info->chunk_root->node));
1748 btrfs_set_backup_chunk_root_level(root_backup,
1749 btrfs_header_level(info->chunk_root->node));
1750
1751 btrfs_set_backup_extent_root(root_backup, info->extent_root->node->start);
1752 btrfs_set_backup_extent_root_gen(root_backup,
1753 btrfs_header_generation(info->extent_root->node));
1754 btrfs_set_backup_extent_root_level(root_backup,
1755 btrfs_header_level(info->extent_root->node));
1756
1757 /*
1758 * we might commit during log recovery, which happens before we set
1759 * the fs_root. Make sure it is valid before we fill it in.
1760 */
1761 if (info->fs_root && info->fs_root->node) {
1762 btrfs_set_backup_fs_root(root_backup,
1763 info->fs_root->node->start);
1764 btrfs_set_backup_fs_root_gen(root_backup,
1765 btrfs_header_generation(info->fs_root->node));
1766 btrfs_set_backup_fs_root_level(root_backup,
1767 btrfs_header_level(info->fs_root->node));
1768 }
1769
1770 btrfs_set_backup_dev_root(root_backup, info->dev_root->node->start);
1771 btrfs_set_backup_dev_root_gen(root_backup,
1772 btrfs_header_generation(info->dev_root->node));
1773 btrfs_set_backup_dev_root_level(root_backup,
1774 btrfs_header_level(info->dev_root->node));
1775
1776 btrfs_set_backup_csum_root(root_backup, info->csum_root->node->start);
1777 btrfs_set_backup_csum_root_gen(root_backup,
1778 btrfs_header_generation(info->csum_root->node));
1779 btrfs_set_backup_csum_root_level(root_backup,
1780 btrfs_header_level(info->csum_root->node));
1781
1782 btrfs_set_backup_total_bytes(root_backup,
1783 btrfs_super_total_bytes(info->super_copy));
1784 btrfs_set_backup_bytes_used(root_backup,
1785 btrfs_super_bytes_used(info->super_copy));
1786 btrfs_set_backup_num_devices(root_backup,
1787 btrfs_super_num_devices(info->super_copy));
1788
1789 /*
1790 * if we don't copy this out to the super_copy, it won't get remembered
1791 * for the next commit
1792 */
1793 memcpy(&info->super_copy->super_roots,
1794 &info->super_for_commit->super_roots,
1795 sizeof(*root_backup) * BTRFS_NUM_BACKUP_ROOTS);
1796}
1797
1798/*
1799 * this copies info out of the root backup array and back into
1800 * the in-memory super block. It is meant to help iterate through
1801 * the array, so you send it the number of backups you've already
1802 * tried and the last backup index you used.
1803 *
1804 * this returns -1 when it has tried all the backups
1805 */
1806static noinline int next_root_backup(struct btrfs_fs_info *info,
1807 struct btrfs_super_block *super,
1808 int *num_backups_tried, int *backup_index)
1809{
1810 struct btrfs_root_backup *root_backup;
1811 int newest = *backup_index;
1812
1813 if (*num_backups_tried == 0) {
1814 u64 gen = btrfs_super_generation(super);
1815
1816 newest = find_newest_super_backup(info, gen);
1817 if (newest == -1)
1818 return -1;
1819
1820 *backup_index = newest;
1821 *num_backups_tried = 1;
1822 } else if (*num_backups_tried == BTRFS_NUM_BACKUP_ROOTS) {
1823 /* we've tried all the backups, all done */
1824 return -1;
1825 } else {
1826 /* jump to the next oldest backup */
1827 newest = (*backup_index + BTRFS_NUM_BACKUP_ROOTS - 1) %
1828 BTRFS_NUM_BACKUP_ROOTS;
1829 *backup_index = newest;
1830 *num_backups_tried += 1;
1831 }
1832 root_backup = super->super_roots + newest;
1833
1834 btrfs_set_super_generation(super,
1835 btrfs_backup_tree_root_gen(root_backup));
1836 btrfs_set_super_root(super, btrfs_backup_tree_root(root_backup));
1837 btrfs_set_super_root_level(super,
1838 btrfs_backup_tree_root_level(root_backup));
1839 btrfs_set_super_bytes_used(super, btrfs_backup_bytes_used(root_backup));
1840
1841 /*
1842 * fixme: the total bytes and num_devices need to match or we should
1843 * need a fsck
1844 */
1845 btrfs_set_super_total_bytes(super, btrfs_backup_total_bytes(root_backup));
1846 btrfs_set_super_num_devices(super, btrfs_backup_num_devices(root_backup));
1847 return 0;
1848}
1849
1850/* helper to cleanup tree roots */
1851static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
1852{
1853 free_extent_buffer(info->tree_root->node);
1854 free_extent_buffer(info->tree_root->commit_root);
1855 free_extent_buffer(info->dev_root->node);
1856 free_extent_buffer(info->dev_root->commit_root);
1857 free_extent_buffer(info->extent_root->node);
1858 free_extent_buffer(info->extent_root->commit_root);
1859 free_extent_buffer(info->csum_root->node);
1860 free_extent_buffer(info->csum_root->commit_root);
1861
1862 info->tree_root->node = NULL;
1863 info->tree_root->commit_root = NULL;
1864 info->dev_root->node = NULL;
1865 info->dev_root->commit_root = NULL;
1866 info->extent_root->node = NULL;
1867 info->extent_root->commit_root = NULL;
1868 info->csum_root->node = NULL;
1869 info->csum_root->commit_root = NULL;
1870
1871 if (chunk_root) {
1872 free_extent_buffer(info->chunk_root->node);
1873 free_extent_buffer(info->chunk_root->commit_root);
1874 info->chunk_root->node = NULL;
1875 info->chunk_root->commit_root = NULL;
1876 }
1877}
1878
1879
1580struct btrfs_root *open_ctree(struct super_block *sb, 1880struct btrfs_root *open_ctree(struct super_block *sb,
1581 struct btrfs_fs_devices *fs_devices, 1881 struct btrfs_fs_devices *fs_devices,
1582 char *options) 1882 char *options)
@@ -1590,29 +1890,32 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1590 u64 features; 1890 u64 features;
1591 struct btrfs_key location; 1891 struct btrfs_key location;
1592 struct buffer_head *bh; 1892 struct buffer_head *bh;
1593 struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root), 1893 struct btrfs_super_block *disk_super;
1594 GFP_NOFS);
1595 struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root),
1596 GFP_NOFS);
1597 struct btrfs_root *tree_root = btrfs_sb(sb); 1894 struct btrfs_root *tree_root = btrfs_sb(sb);
1598 struct btrfs_fs_info *fs_info = NULL; 1895 struct btrfs_fs_info *fs_info = tree_root->fs_info;
1599 struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), 1896 struct btrfs_root *extent_root;
1600 GFP_NOFS); 1897 struct btrfs_root *csum_root;
1601 struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), 1898 struct btrfs_root *chunk_root;
1602 GFP_NOFS); 1899 struct btrfs_root *dev_root;
1603 struct btrfs_root *log_tree_root; 1900 struct btrfs_root *log_tree_root;
1604
1605 int ret; 1901 int ret;
1606 int err = -EINVAL; 1902 int err = -EINVAL;
1607 1903 int num_backups_tried = 0;
1608 struct btrfs_super_block *disk_super; 1904 int backup_index = 0;
1609 1905
1610 if (!extent_root || !tree_root || !tree_root->fs_info || 1906 extent_root = fs_info->extent_root =
1611 !chunk_root || !dev_root || !csum_root) { 1907 kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
1908 csum_root = fs_info->csum_root =
1909 kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
1910 chunk_root = fs_info->chunk_root =
1911 kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
1912 dev_root = fs_info->dev_root =
1913 kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
1914
1915 if (!extent_root || !csum_root || !chunk_root || !dev_root) {
1612 err = -ENOMEM; 1916 err = -ENOMEM;
1613 goto fail; 1917 goto fail;
1614 } 1918 }
1615 fs_info = tree_root->fs_info;
1616 1919
1617 ret = init_srcu_struct(&fs_info->subvol_srcu); 1920 ret = init_srcu_struct(&fs_info->subvol_srcu);
1618 if (ret) { 1921 if (ret) {
@@ -1648,15 +1951,10 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1648 spin_lock_init(&fs_info->fs_roots_radix_lock); 1951 spin_lock_init(&fs_info->fs_roots_radix_lock);
1649 spin_lock_init(&fs_info->delayed_iput_lock); 1952 spin_lock_init(&fs_info->delayed_iput_lock);
1650 spin_lock_init(&fs_info->defrag_inodes_lock); 1953 spin_lock_init(&fs_info->defrag_inodes_lock);
1954 spin_lock_init(&fs_info->free_chunk_lock);
1651 mutex_init(&fs_info->reloc_mutex); 1955 mutex_init(&fs_info->reloc_mutex);
1652 1956
1653 init_completion(&fs_info->kobj_unregister); 1957 init_completion(&fs_info->kobj_unregister);
1654 fs_info->tree_root = tree_root;
1655 fs_info->extent_root = extent_root;
1656 fs_info->csum_root = csum_root;
1657 fs_info->chunk_root = chunk_root;
1658 fs_info->dev_root = dev_root;
1659 fs_info->fs_devices = fs_devices;
1660 INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); 1958 INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
1661 INIT_LIST_HEAD(&fs_info->space_info); 1959 INIT_LIST_HEAD(&fs_info->space_info);
1662 btrfs_mapping_init(&fs_info->mapping_tree); 1960 btrfs_mapping_init(&fs_info->mapping_tree);
@@ -1665,8 +1963,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1665 btrfs_init_block_rsv(&fs_info->trans_block_rsv); 1963 btrfs_init_block_rsv(&fs_info->trans_block_rsv);
1666 btrfs_init_block_rsv(&fs_info->chunk_block_rsv); 1964 btrfs_init_block_rsv(&fs_info->chunk_block_rsv);
1667 btrfs_init_block_rsv(&fs_info->empty_block_rsv); 1965 btrfs_init_block_rsv(&fs_info->empty_block_rsv);
1668 INIT_LIST_HEAD(&fs_info->durable_block_rsv_list); 1966 btrfs_init_block_rsv(&fs_info->delayed_block_rsv);
1669 mutex_init(&fs_info->durable_block_rsv_mutex);
1670 atomic_set(&fs_info->nr_async_submits, 0); 1967 atomic_set(&fs_info->nr_async_submits, 0);
1671 atomic_set(&fs_info->async_delalloc_pages, 0); 1968 atomic_set(&fs_info->async_delalloc_pages, 0);
1672 atomic_set(&fs_info->async_submit_draining, 0); 1969 atomic_set(&fs_info->async_submit_draining, 0);
@@ -1677,6 +1974,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1677 fs_info->metadata_ratio = 0; 1974 fs_info->metadata_ratio = 0;
1678 fs_info->defrag_inodes = RB_ROOT; 1975 fs_info->defrag_inodes = RB_ROOT;
1679 fs_info->trans_no_join = 0; 1976 fs_info->trans_no_join = 0;
1977 fs_info->free_chunk_space = 0;
1978
1979 /* readahead state */
1980 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
1981 spin_lock_init(&fs_info->reada_lock);
1680 1982
1681 fs_info->thread_pool_size = min_t(unsigned long, 1983 fs_info->thread_pool_size = min_t(unsigned long,
1682 num_online_cpus() + 2, 8); 1984 num_online_cpus() + 2, 8);
@@ -1705,7 +2007,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1705 sb->s_bdi = &fs_info->bdi; 2007 sb->s_bdi = &fs_info->bdi;
1706 2008
1707 fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID; 2009 fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
1708 fs_info->btree_inode->i_nlink = 1; 2010 set_nlink(fs_info->btree_inode, 1);
1709 /* 2011 /*
1710 * we set the i_size on the btree inode to the max possible int. 2012 * we set the i_size on the btree inode to the max possible int.
1711 * the real end of the address space is determined by all of 2013 * the real end of the address space is determined by all of
@@ -1766,14 +2068,14 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1766 goto fail_alloc; 2068 goto fail_alloc;
1767 } 2069 }
1768 2070
1769 memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); 2071 memcpy(fs_info->super_copy, bh->b_data, sizeof(*fs_info->super_copy));
1770 memcpy(&fs_info->super_for_commit, &fs_info->super_copy, 2072 memcpy(fs_info->super_for_commit, fs_info->super_copy,
1771 sizeof(fs_info->super_for_commit)); 2073 sizeof(*fs_info->super_for_commit));
1772 brelse(bh); 2074 brelse(bh);
1773 2075
1774 memcpy(fs_info->fsid, fs_info->super_copy.fsid, BTRFS_FSID_SIZE); 2076 memcpy(fs_info->fsid, fs_info->super_copy->fsid, BTRFS_FSID_SIZE);
1775 2077
1776 disk_super = &fs_info->super_copy; 2078 disk_super = fs_info->super_copy;
1777 if (!btrfs_super_root(disk_super)) 2079 if (!btrfs_super_root(disk_super))
1778 goto fail_alloc; 2080 goto fail_alloc;
1779 2081
@@ -1783,6 +2085,13 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1783 btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); 2085 btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
1784 2086
1785 /* 2087 /*
2088 * run through our array of backup supers and setup
2089 * our ring pointer to the oldest one
2090 */
2091 generation = btrfs_super_generation(disk_super);
2092 find_oldest_super_backup(fs_info, generation);
2093
2094 /*
1786 * In the long term, we'll store the compression type in the super 2095 * In the long term, we'll store the compression type in the super
1787 * block, and it'll be used for per file compression control. 2096 * block, and it'll be used for per file compression control.
1788 */ 2097 */
@@ -1870,6 +2179,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1870 btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta", 2179 btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta",
1871 fs_info->thread_pool_size, 2180 fs_info->thread_pool_size,
1872 &fs_info->generic_worker); 2181 &fs_info->generic_worker);
2182 btrfs_init_workers(&fs_info->readahead_workers, "readahead",
2183 fs_info->thread_pool_size,
2184 &fs_info->generic_worker);
1873 2185
1874 /* 2186 /*
1875 * endios are largely parallel and should have a very 2187 * endios are largely parallel and should have a very
@@ -1880,6 +2192,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1880 2192
1881 fs_info->endio_write_workers.idle_thresh = 2; 2193 fs_info->endio_write_workers.idle_thresh = 2;
1882 fs_info->endio_meta_write_workers.idle_thresh = 2; 2194 fs_info->endio_meta_write_workers.idle_thresh = 2;
2195 fs_info->readahead_workers.idle_thresh = 2;
1883 2196
1884 btrfs_start_workers(&fs_info->workers, 1); 2197 btrfs_start_workers(&fs_info->workers, 1);
1885 btrfs_start_workers(&fs_info->generic_worker, 1); 2198 btrfs_start_workers(&fs_info->generic_worker, 1);
@@ -1893,6 +2206,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1893 btrfs_start_workers(&fs_info->endio_freespace_worker, 1); 2206 btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
1894 btrfs_start_workers(&fs_info->delayed_workers, 1); 2207 btrfs_start_workers(&fs_info->delayed_workers, 1);
1895 btrfs_start_workers(&fs_info->caching_workers, 1); 2208 btrfs_start_workers(&fs_info->caching_workers, 1);
2209 btrfs_start_workers(&fs_info->readahead_workers, 1);
1896 2210
1897 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); 2211 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
1898 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, 2212 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -1939,7 +2253,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1939 if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { 2253 if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
1940 printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", 2254 printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n",
1941 sb->s_id); 2255 sb->s_id);
1942 goto fail_chunk_root; 2256 goto fail_tree_roots;
1943 } 2257 }
1944 btrfs_set_root_node(&chunk_root->root_item, chunk_root->node); 2258 btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
1945 chunk_root->commit_root = btrfs_root_node(chunk_root); 2259 chunk_root->commit_root = btrfs_root_node(chunk_root);
@@ -1954,11 +2268,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1954 if (ret) { 2268 if (ret) {
1955 printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n", 2269 printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n",
1956 sb->s_id); 2270 sb->s_id);
1957 goto fail_chunk_root; 2271 goto fail_tree_roots;
1958 } 2272 }
1959 2273
1960 btrfs_close_extra_devices(fs_devices); 2274 btrfs_close_extra_devices(fs_devices);
1961 2275
2276retry_root_backup:
1962 blocksize = btrfs_level_size(tree_root, 2277 blocksize = btrfs_level_size(tree_root,
1963 btrfs_super_root_level(disk_super)); 2278 btrfs_super_root_level(disk_super));
1964 generation = btrfs_super_generation(disk_super); 2279 generation = btrfs_super_generation(disk_super);
@@ -1966,32 +2281,33 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1966 tree_root->node = read_tree_block(tree_root, 2281 tree_root->node = read_tree_block(tree_root,
1967 btrfs_super_root(disk_super), 2282 btrfs_super_root(disk_super),
1968 blocksize, generation); 2283 blocksize, generation);
1969 if (!tree_root->node) 2284 if (!tree_root->node ||
1970 goto fail_chunk_root; 2285 !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
1971 if (!test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
1972 printk(KERN_WARNING "btrfs: failed to read tree root on %s\n", 2286 printk(KERN_WARNING "btrfs: failed to read tree root on %s\n",
1973 sb->s_id); 2287 sb->s_id);
1974 goto fail_tree_root; 2288
2289 goto recovery_tree_root;
1975 } 2290 }
2291
1976 btrfs_set_root_node(&tree_root->root_item, tree_root->node); 2292 btrfs_set_root_node(&tree_root->root_item, tree_root->node);
1977 tree_root->commit_root = btrfs_root_node(tree_root); 2293 tree_root->commit_root = btrfs_root_node(tree_root);
1978 2294
1979 ret = find_and_setup_root(tree_root, fs_info, 2295 ret = find_and_setup_root(tree_root, fs_info,
1980 BTRFS_EXTENT_TREE_OBJECTID, extent_root); 2296 BTRFS_EXTENT_TREE_OBJECTID, extent_root);
1981 if (ret) 2297 if (ret)
1982 goto fail_tree_root; 2298 goto recovery_tree_root;
1983 extent_root->track_dirty = 1; 2299 extent_root->track_dirty = 1;
1984 2300
1985 ret = find_and_setup_root(tree_root, fs_info, 2301 ret = find_and_setup_root(tree_root, fs_info,
1986 BTRFS_DEV_TREE_OBJECTID, dev_root); 2302 BTRFS_DEV_TREE_OBJECTID, dev_root);
1987 if (ret) 2303 if (ret)
1988 goto fail_extent_root; 2304 goto recovery_tree_root;
1989 dev_root->track_dirty = 1; 2305 dev_root->track_dirty = 1;
1990 2306
1991 ret = find_and_setup_root(tree_root, fs_info, 2307 ret = find_and_setup_root(tree_root, fs_info,
1992 BTRFS_CSUM_TREE_OBJECTID, csum_root); 2308 BTRFS_CSUM_TREE_OBJECTID, csum_root);
1993 if (ret) 2309 if (ret)
1994 goto fail_dev_root; 2310 goto recovery_tree_root;
1995 2311
1996 csum_root->track_dirty = 1; 2312 csum_root->track_dirty = 1;
1997 2313
@@ -2124,22 +2440,13 @@ fail_cleaner:
2124 2440
2125fail_block_groups: 2441fail_block_groups:
2126 btrfs_free_block_groups(fs_info); 2442 btrfs_free_block_groups(fs_info);
2127 free_extent_buffer(csum_root->node); 2443
2128 free_extent_buffer(csum_root->commit_root); 2444fail_tree_roots:
2129fail_dev_root: 2445 free_root_pointers(fs_info, 1);
2130 free_extent_buffer(dev_root->node); 2446
2131 free_extent_buffer(dev_root->commit_root);
2132fail_extent_root:
2133 free_extent_buffer(extent_root->node);
2134 free_extent_buffer(extent_root->commit_root);
2135fail_tree_root:
2136 free_extent_buffer(tree_root->node);
2137 free_extent_buffer(tree_root->commit_root);
2138fail_chunk_root:
2139 free_extent_buffer(chunk_root->node);
2140 free_extent_buffer(chunk_root->commit_root);
2141fail_sb_buffer: 2447fail_sb_buffer:
2142 btrfs_stop_workers(&fs_info->generic_worker); 2448 btrfs_stop_workers(&fs_info->generic_worker);
2449 btrfs_stop_workers(&fs_info->readahead_workers);
2143 btrfs_stop_workers(&fs_info->fixup_workers); 2450 btrfs_stop_workers(&fs_info->fixup_workers);
2144 btrfs_stop_workers(&fs_info->delalloc_workers); 2451 btrfs_stop_workers(&fs_info->delalloc_workers);
2145 btrfs_stop_workers(&fs_info->workers); 2452 btrfs_stop_workers(&fs_info->workers);
@@ -2152,25 +2459,37 @@ fail_sb_buffer:
2152 btrfs_stop_workers(&fs_info->delayed_workers); 2459 btrfs_stop_workers(&fs_info->delayed_workers);
2153 btrfs_stop_workers(&fs_info->caching_workers); 2460 btrfs_stop_workers(&fs_info->caching_workers);
2154fail_alloc: 2461fail_alloc:
2155 kfree(fs_info->delayed_root);
2156fail_iput: 2462fail_iput:
2463 btrfs_mapping_tree_free(&fs_info->mapping_tree);
2464
2157 invalidate_inode_pages2(fs_info->btree_inode->i_mapping); 2465 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
2158 iput(fs_info->btree_inode); 2466 iput(fs_info->btree_inode);
2159
2160 btrfs_close_devices(fs_info->fs_devices);
2161 btrfs_mapping_tree_free(&fs_info->mapping_tree);
2162fail_bdi: 2467fail_bdi:
2163 bdi_destroy(&fs_info->bdi); 2468 bdi_destroy(&fs_info->bdi);
2164fail_srcu: 2469fail_srcu:
2165 cleanup_srcu_struct(&fs_info->subvol_srcu); 2470 cleanup_srcu_struct(&fs_info->subvol_srcu);
2166fail: 2471fail:
2167 kfree(extent_root); 2472 btrfs_close_devices(fs_info->fs_devices);
2168 kfree(tree_root); 2473 free_fs_info(fs_info);
2169 kfree(fs_info);
2170 kfree(chunk_root);
2171 kfree(dev_root);
2172 kfree(csum_root);
2173 return ERR_PTR(err); 2474 return ERR_PTR(err);
2475
2476recovery_tree_root:
2477 if (!btrfs_test_opt(tree_root, RECOVERY))
2478 goto fail_tree_roots;
2479
2480 free_root_pointers(fs_info, 0);
2481
2482 /* don't use the log in recovery mode, it won't be valid */
2483 btrfs_set_super_log_root(disk_super, 0);
2484
2485 /* we can't trust the free space cache either */
2486 btrfs_set_opt(fs_info->mount_opt, CLEAR_CACHE);
2487
2488 ret = next_root_backup(fs_info, fs_info->super_copy,
2489 &num_backups_tried, &backup_index);
2490 if (ret == -1)
2491 goto fail_block_groups;
2492 goto retry_root_backup;
2174} 2493}
2175 2494
2176static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) 2495static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
@@ -2254,22 +2573,10 @@ static int write_dev_supers(struct btrfs_device *device,
2254 int errors = 0; 2573 int errors = 0;
2255 u32 crc; 2574 u32 crc;
2256 u64 bytenr; 2575 u64 bytenr;
2257 int last_barrier = 0;
2258 2576
2259 if (max_mirrors == 0) 2577 if (max_mirrors == 0)
2260 max_mirrors = BTRFS_SUPER_MIRROR_MAX; 2578 max_mirrors = BTRFS_SUPER_MIRROR_MAX;
2261 2579
2262 /* make sure only the last submit_bh does a barrier */
2263 if (do_barriers) {
2264 for (i = 0; i < max_mirrors; i++) {
2265 bytenr = btrfs_sb_offset(i);
2266 if (bytenr + BTRFS_SUPER_INFO_SIZE >=
2267 device->total_bytes)
2268 break;
2269 last_barrier = i;
2270 }
2271 }
2272
2273 for (i = 0; i < max_mirrors; i++) { 2580 for (i = 0; i < max_mirrors; i++) {
2274 bytenr = btrfs_sb_offset(i); 2581 bytenr = btrfs_sb_offset(i);
2275 if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) 2582 if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes)
@@ -2315,17 +2622,136 @@ static int write_dev_supers(struct btrfs_device *device,
2315 bh->b_end_io = btrfs_end_buffer_write_sync; 2622 bh->b_end_io = btrfs_end_buffer_write_sync;
2316 } 2623 }
2317 2624
2318 if (i == last_barrier && do_barriers) 2625 /*
2319 ret = submit_bh(WRITE_FLUSH_FUA, bh); 2626 * we fua the first super. The others we allow
2320 else 2627 * to go down lazy.
2321 ret = submit_bh(WRITE_SYNC, bh); 2628 */
2322 2629 ret = submit_bh(WRITE_FUA, bh);
2323 if (ret) 2630 if (ret)
2324 errors++; 2631 errors++;
2325 } 2632 }
2326 return errors < i ? 0 : -1; 2633 return errors < i ? 0 : -1;
2327} 2634}
2328 2635
2636/*
2637 * endio for the write_dev_flush, this will wake anyone waiting
2638 * for the barrier when it is done
2639 */
2640static void btrfs_end_empty_barrier(struct bio *bio, int err)
2641{
2642 if (err) {
2643 if (err == -EOPNOTSUPP)
2644 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
2645 clear_bit(BIO_UPTODATE, &bio->bi_flags);
2646 }
2647 if (bio->bi_private)
2648 complete(bio->bi_private);
2649 bio_put(bio);
2650}
2651
2652/*
2653 * trigger flushes for one the devices. If you pass wait == 0, the flushes are
2654 * sent down. With wait == 1, it waits for the previous flush.
2655 *
2656 * any device where the flush fails with eopnotsupp are flagged as not-barrier
2657 * capable
2658 */
2659static int write_dev_flush(struct btrfs_device *device, int wait)
2660{
2661 struct bio *bio;
2662 int ret = 0;
2663
2664 if (device->nobarriers)
2665 return 0;
2666
2667 if (wait) {
2668 bio = device->flush_bio;
2669 if (!bio)
2670 return 0;
2671
2672 wait_for_completion(&device->flush_wait);
2673
2674 if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
2675 printk("btrfs: disabling barriers on dev %s\n",
2676 device->name);
2677 device->nobarriers = 1;
2678 }
2679 if (!bio_flagged(bio, BIO_UPTODATE)) {
2680 ret = -EIO;
2681 }
2682
2683 /* drop the reference from the wait == 0 run */
2684 bio_put(bio);
2685 device->flush_bio = NULL;
2686
2687 return ret;
2688 }
2689
2690 /*
2691 * one reference for us, and we leave it for the
2692 * caller
2693 */
2694 device->flush_bio = NULL;;
2695 bio = bio_alloc(GFP_NOFS, 0);
2696 if (!bio)
2697 return -ENOMEM;
2698
2699 bio->bi_end_io = btrfs_end_empty_barrier;
2700 bio->bi_bdev = device->bdev;
2701 init_completion(&device->flush_wait);
2702 bio->bi_private = &device->flush_wait;
2703 device->flush_bio = bio;
2704
2705 bio_get(bio);
2706 submit_bio(WRITE_FLUSH, bio);
2707
2708 return 0;
2709}
2710
2711/*
2712 * send an empty flush down to each device in parallel,
2713 * then wait for them
2714 */
2715static int barrier_all_devices(struct btrfs_fs_info *info)
2716{
2717 struct list_head *head;
2718 struct btrfs_device *dev;
2719 int errors = 0;
2720 int ret;
2721
2722 /* send down all the barriers */
2723 head = &info->fs_devices->devices;
2724 list_for_each_entry_rcu(dev, head, dev_list) {
2725 if (!dev->bdev) {
2726 errors++;
2727 continue;
2728 }
2729 if (!dev->in_fs_metadata || !dev->writeable)
2730 continue;
2731
2732 ret = write_dev_flush(dev, 0);
2733 if (ret)
2734 errors++;
2735 }
2736
2737 /* wait for all the barriers */
2738 list_for_each_entry_rcu(dev, head, dev_list) {
2739 if (!dev->bdev) {
2740 errors++;
2741 continue;
2742 }
2743 if (!dev->in_fs_metadata || !dev->writeable)
2744 continue;
2745
2746 ret = write_dev_flush(dev, 1);
2747 if (ret)
2748 errors++;
2749 }
2750 if (errors)
2751 return -EIO;
2752 return 0;
2753}
2754
2329int write_all_supers(struct btrfs_root *root, int max_mirrors) 2755int write_all_supers(struct btrfs_root *root, int max_mirrors)
2330{ 2756{
2331 struct list_head *head; 2757 struct list_head *head;
@@ -2338,14 +2764,19 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
2338 int total_errors = 0; 2764 int total_errors = 0;
2339 u64 flags; 2765 u64 flags;
2340 2766
2341 max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; 2767 max_errors = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
2342 do_barriers = !btrfs_test_opt(root, NOBARRIER); 2768 do_barriers = !btrfs_test_opt(root, NOBARRIER);
2769 backup_super_roots(root->fs_info);
2343 2770
2344 sb = &root->fs_info->super_for_commit; 2771 sb = root->fs_info->super_for_commit;
2345 dev_item = &sb->dev_item; 2772 dev_item = &sb->dev_item;
2346 2773
2347 mutex_lock(&root->fs_info->fs_devices->device_list_mutex); 2774 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
2348 head = &root->fs_info->fs_devices->devices; 2775 head = &root->fs_info->fs_devices->devices;
2776
2777 if (do_barriers)
2778 barrier_all_devices(root->fs_info);
2779
2349 list_for_each_entry_rcu(dev, head, dev_list) { 2780 list_for_each_entry_rcu(dev, head, dev_list) {
2350 if (!dev->bdev) { 2781 if (!dev->bdev) {
2351 total_errors++; 2782 total_errors++;
@@ -2545,8 +2976,6 @@ int close_ctree(struct btrfs_root *root)
2545 /* clear out the rbtree of defraggable inodes */ 2976 /* clear out the rbtree of defraggable inodes */
2546 btrfs_run_defrag_inodes(root->fs_info); 2977 btrfs_run_defrag_inodes(root->fs_info);
2547 2978
2548 btrfs_put_block_group_cache(fs_info);
2549
2550 /* 2979 /*
2551 * Here come 2 situations when btrfs is broken to flip readonly: 2980 * Here come 2 situations when btrfs is broken to flip readonly:
2552 * 2981 *
@@ -2572,6 +3001,8 @@ int close_ctree(struct btrfs_root *root)
2572 printk(KERN_ERR "btrfs: commit super ret %d\n", ret); 3001 printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
2573 } 3002 }
2574 3003
3004 btrfs_put_block_group_cache(fs_info);
3005
2575 kthread_stop(root->fs_info->transaction_kthread); 3006 kthread_stop(root->fs_info->transaction_kthread);
2576 kthread_stop(root->fs_info->cleaner_kthread); 3007 kthread_stop(root->fs_info->cleaner_kthread);
2577 3008
@@ -2603,7 +3034,6 @@ int close_ctree(struct btrfs_root *root)
2603 del_fs_roots(fs_info); 3034 del_fs_roots(fs_info);
2604 3035
2605 iput(fs_info->btree_inode); 3036 iput(fs_info->btree_inode);
2606 kfree(fs_info->delayed_root);
2607 3037
2608 btrfs_stop_workers(&fs_info->generic_worker); 3038 btrfs_stop_workers(&fs_info->generic_worker);
2609 btrfs_stop_workers(&fs_info->fixup_workers); 3039 btrfs_stop_workers(&fs_info->fixup_workers);
@@ -2617,6 +3047,7 @@ int close_ctree(struct btrfs_root *root)
2617 btrfs_stop_workers(&fs_info->submit_workers); 3047 btrfs_stop_workers(&fs_info->submit_workers);
2618 btrfs_stop_workers(&fs_info->delayed_workers); 3048 btrfs_stop_workers(&fs_info->delayed_workers);
2619 btrfs_stop_workers(&fs_info->caching_workers); 3049 btrfs_stop_workers(&fs_info->caching_workers);
3050 btrfs_stop_workers(&fs_info->readahead_workers);
2620 3051
2621 btrfs_close_devices(fs_info->fs_devices); 3052 btrfs_close_devices(fs_info->fs_devices);
2622 btrfs_mapping_tree_free(&fs_info->mapping_tree); 3053 btrfs_mapping_tree_free(&fs_info->mapping_tree);
@@ -2624,12 +3055,7 @@ int close_ctree(struct btrfs_root *root)
2624 bdi_destroy(&fs_info->bdi); 3055 bdi_destroy(&fs_info->bdi);
2625 cleanup_srcu_struct(&fs_info->subvol_srcu); 3056 cleanup_srcu_struct(&fs_info->subvol_srcu);
2626 3057
2627 kfree(fs_info->extent_root); 3058 free_fs_info(fs_info);
2628 kfree(fs_info->tree_root);
2629 kfree(fs_info->chunk_root);
2630 kfree(fs_info->dev_root);
2631 kfree(fs_info->csum_root);
2632 kfree(fs_info);
2633 3059
2634 return 0; 3060 return 0;
2635} 3061}
@@ -2735,7 +3161,8 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
2735 return ret; 3161 return ret;
2736} 3162}
2737 3163
2738int btree_lock_page_hook(struct page *page) 3164static int btree_lock_page_hook(struct page *page, void *data,
3165 void (*flush_fn)(void *))
2739{ 3166{
2740 struct inode *inode = page->mapping->host; 3167 struct inode *inode = page->mapping->host;
2741 struct btrfs_root *root = BTRFS_I(inode)->root; 3168 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -2752,7 +3179,10 @@ int btree_lock_page_hook(struct page *page)
2752 if (!eb) 3179 if (!eb)
2753 goto out; 3180 goto out;
2754 3181
2755 btrfs_tree_lock(eb); 3182 if (!btrfs_try_tree_write_lock(eb)) {
3183 flush_fn(data);
3184 btrfs_tree_lock(eb);
3185 }
2756 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); 3186 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
2757 3187
2758 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { 3188 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
@@ -2767,7 +3197,10 @@ int btree_lock_page_hook(struct page *page)
2767 btrfs_tree_unlock(eb); 3197 btrfs_tree_unlock(eb);
2768 free_extent_buffer(eb); 3198 free_extent_buffer(eb);
2769out: 3199out:
2770 lock_page(page); 3200 if (!trylock_page(page)) {
3201 flush_fn(data);
3202 lock_page(page);
3203 }
2771 return 0; 3204 return 0;
2772} 3205}
2773 3206
@@ -3123,6 +3556,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3123static struct extent_io_ops btree_extent_io_ops = { 3556static struct extent_io_ops btree_extent_io_ops = {
3124 .write_cache_pages_lock_hook = btree_lock_page_hook, 3557 .write_cache_pages_lock_hook = btree_lock_page_hook,
3125 .readpage_end_io_hook = btree_readpage_end_io_hook, 3558 .readpage_end_io_hook = btree_readpage_end_io_hook,
3559 .readpage_io_failed_hook = btree_io_failed_hook,
3126 .submit_bio_hook = btree_submit_bio_hook, 3560 .submit_bio_hook = btree_submit_bio_hook,
3127 /* note we're sharing with inode.c for the merge bio hook */ 3561 /* note we're sharing with inode.c for the merge bio hook */
3128 .merge_bio_hook = btrfs_merge_bio_hook, 3562 .merge_bio_hook = btrfs_merge_bio_hook,