aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-11-06 23:03:41 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2011-11-06 23:03:41 -0500
commit6a6662ced4153f6dbcfc40d7225c3cc45416039c (patch)
tree77ad5d577333f02cd854e44827a407dd0388d4eb /fs/btrfs/disk-io.c
parent32aaeffbd4a7457bf2f7448b33b5946ff2a960eb (diff)
parent7c7e82a77fe3d89ae50824aa7c897454675eb4c4 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (114 commits) Btrfs: check for a null fs root when writing to the backup root log Btrfs: fix race during transaction joins Btrfs: fix a potential btrfs_bio leak on scrub fixups Btrfs: rename btrfs_bio multi -> bbio for consistency Btrfs: stop leaking btrfs_bios on readahead Btrfs: stop the readahead threads on failed mount Btrfs: fix extent_buffer leak in the metadata IO error handling Btrfs: fix the new inspection ioctls for 32 bit compat Btrfs: fix delayed insertion reservation Btrfs: ClearPageError during writepage and clean_tree_block Btrfs: be smarter about committing the transaction in reserve_metadata_bytes Btrfs: make a delayed_block_rsv for the delayed item insertion Btrfs: add a log of past tree roots btrfs: separate superblock items out of fs_info Btrfs: use the global reserve when truncating the free space cache inode Btrfs: release metadata from global reserve if we have to fallback for unlink Btrfs: make sure to flush queued bios if write_cache_pages waits Btrfs: fix extent pinning bugs in the tree log Btrfs: make sure btrfs_remove_free_space doesn't leak EAGAIN Btrfs: don't wait as long for more batches during SSD log commit ...
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c441
1 files changed, 385 insertions, 56 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 07ea91879a91..102c176fc29c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -256,8 +256,7 @@ void btrfs_csum_final(u32 crc, char *result)
256static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, 256static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
257 int verify) 257 int verify)
258{ 258{
259 u16 csum_size = 259 u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
260 btrfs_super_csum_size(&root->fs_info->super_copy);
261 char *result = NULL; 260 char *result = NULL;
262 unsigned long len; 261 unsigned long len;
263 unsigned long cur_len; 262 unsigned long cur_len;
@@ -367,7 +366,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
367 clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); 366 clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
368 io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; 367 io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
369 while (1) { 368 while (1) {
370 ret = read_extent_buffer_pages(io_tree, eb, start, 1, 369 ret = read_extent_buffer_pages(io_tree, eb, start,
370 WAIT_COMPLETE,
371 btree_get_extent, mirror_num); 371 btree_get_extent, mirror_num);
372 if (!ret && 372 if (!ret &&
373 !verify_parent_transid(io_tree, eb, parent_transid)) 373 !verify_parent_transid(io_tree, eb, parent_transid))
@@ -608,11 +608,48 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
608 end = min_t(u64, eb->len, PAGE_CACHE_SIZE); 608 end = min_t(u64, eb->len, PAGE_CACHE_SIZE);
609 end = eb->start + end - 1; 609 end = eb->start + end - 1;
610err: 610err:
611 if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) {
612 clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags);
613 btree_readahead_hook(root, eb, eb->start, ret);
614 }
615
611 free_extent_buffer(eb); 616 free_extent_buffer(eb);
612out: 617out:
613 return ret; 618 return ret;
614} 619}
615 620
621static int btree_io_failed_hook(struct bio *failed_bio,
622 struct page *page, u64 start, u64 end,
623 u64 mirror_num, struct extent_state *state)
624{
625 struct extent_io_tree *tree;
626 unsigned long len;
627 struct extent_buffer *eb;
628 struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
629
630 tree = &BTRFS_I(page->mapping->host)->io_tree;
631 if (page->private == EXTENT_PAGE_PRIVATE)
632 goto out;
633 if (!page->private)
634 goto out;
635
636 len = page->private >> 2;
637 WARN_ON(len == 0);
638
639 eb = alloc_extent_buffer(tree, start, len, page);
640 if (eb == NULL)
641 goto out;
642
643 if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) {
644 clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags);
645 btree_readahead_hook(root, eb, eb->start, -EIO);
646 }
647 free_extent_buffer(eb);
648
649out:
650 return -EIO; /* we fixed nothing */
651}
652
616static void end_workqueue_bio(struct bio *bio, int err) 653static void end_workqueue_bio(struct bio *bio, int err)
617{ 654{
618 struct end_io_wq *end_io_wq = bio->bi_private; 655 struct end_io_wq *end_io_wq = bio->bi_private;
@@ -908,7 +945,7 @@ static int btree_readpage(struct file *file, struct page *page)
908{ 945{
909 struct extent_io_tree *tree; 946 struct extent_io_tree *tree;
910 tree = &BTRFS_I(page->mapping->host)->io_tree; 947 tree = &BTRFS_I(page->mapping->host)->io_tree;
911 return extent_read_full_page(tree, page, btree_get_extent); 948 return extent_read_full_page(tree, page, btree_get_extent, 0);
912} 949}
913 950
914static int btree_releasepage(struct page *page, gfp_t gfp_flags) 951static int btree_releasepage(struct page *page, gfp_t gfp_flags)
@@ -974,11 +1011,43 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
974 if (!buf) 1011 if (!buf)
975 return 0; 1012 return 0;
976 read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, 1013 read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
977 buf, 0, 0, btree_get_extent, 0); 1014 buf, 0, WAIT_NONE, btree_get_extent, 0);
978 free_extent_buffer(buf); 1015 free_extent_buffer(buf);
979 return ret; 1016 return ret;
980} 1017}
981 1018
1019int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize,
1020 int mirror_num, struct extent_buffer **eb)
1021{
1022 struct extent_buffer *buf = NULL;
1023 struct inode *btree_inode = root->fs_info->btree_inode;
1024 struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree;
1025 int ret;
1026
1027 buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
1028 if (!buf)
1029 return 0;
1030
1031 set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags);
1032
1033 ret = read_extent_buffer_pages(io_tree, buf, 0, WAIT_PAGE_LOCK,
1034 btree_get_extent, mirror_num);
1035 if (ret) {
1036 free_extent_buffer(buf);
1037 return ret;
1038 }
1039
1040 if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) {
1041 free_extent_buffer(buf);
1042 return -EIO;
1043 } else if (extent_buffer_uptodate(io_tree, buf, NULL)) {
1044 *eb = buf;
1045 } else {
1046 free_extent_buffer(buf);
1047 }
1048 return 0;
1049}
1050
982struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, 1051struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root,
983 u64 bytenr, u32 blocksize) 1052 u64 bytenr, u32 blocksize)
984{ 1053{
@@ -1135,10 +1204,12 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
1135 1204
1136 generation = btrfs_root_generation(&root->root_item); 1205 generation = btrfs_root_generation(&root->root_item);
1137 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); 1206 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
1207 root->commit_root = NULL;
1138 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), 1208 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
1139 blocksize, generation); 1209 blocksize, generation);
1140 if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) { 1210 if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) {
1141 free_extent_buffer(root->node); 1211 free_extent_buffer(root->node);
1212 root->node = NULL;
1142 return -EIO; 1213 return -EIO;
1143 } 1214 }
1144 root->commit_root = btrfs_root_node(root); 1215 root->commit_root = btrfs_root_node(root);
@@ -1577,6 +1648,235 @@ sleep:
1577 return 0; 1648 return 0;
1578} 1649}
1579 1650
1651/*
1652 * this will find the highest generation in the array of
1653 * root backups. The index of the highest array is returned,
1654 * or -1 if we can't find anything.
1655 *
1656 * We check to make sure the array is valid by comparing the
1657 * generation of the latest root in the array with the generation
1658 * in the super block. If they don't match we pitch it.
1659 */
1660static int find_newest_super_backup(struct btrfs_fs_info *info, u64 newest_gen)
1661{
1662 u64 cur;
1663 int newest_index = -1;
1664 struct btrfs_root_backup *root_backup;
1665 int i;
1666
1667 for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
1668 root_backup = info->super_copy->super_roots + i;
1669 cur = btrfs_backup_tree_root_gen(root_backup);
1670 if (cur == newest_gen)
1671 newest_index = i;
1672 }
1673
1674 /* check to see if we actually wrapped around */
1675 if (newest_index == BTRFS_NUM_BACKUP_ROOTS - 1) {
1676 root_backup = info->super_copy->super_roots;
1677 cur = btrfs_backup_tree_root_gen(root_backup);
1678 if (cur == newest_gen)
1679 newest_index = 0;
1680 }
1681 return newest_index;
1682}
1683
1684
1685/*
1686 * find the oldest backup so we know where to store new entries
1687 * in the backup array. This will set the backup_root_index
1688 * field in the fs_info struct
1689 */
1690static void find_oldest_super_backup(struct btrfs_fs_info *info,
1691 u64 newest_gen)
1692{
1693 int newest_index = -1;
1694
1695 newest_index = find_newest_super_backup(info, newest_gen);
1696 /* if there was garbage in there, just move along */
1697 if (newest_index == -1) {
1698 info->backup_root_index = 0;
1699 } else {
1700 info->backup_root_index = (newest_index + 1) % BTRFS_NUM_BACKUP_ROOTS;
1701 }
1702}
1703
1704/*
1705 * copy all the root pointers into the super backup array.
1706 * this will bump the backup pointer by one when it is
1707 * done
1708 */
1709static void backup_super_roots(struct btrfs_fs_info *info)
1710{
1711 int next_backup;
1712 struct btrfs_root_backup *root_backup;
1713 int last_backup;
1714
1715 next_backup = info->backup_root_index;
1716 last_backup = (next_backup + BTRFS_NUM_BACKUP_ROOTS - 1) %
1717 BTRFS_NUM_BACKUP_ROOTS;
1718
1719 /*
1720 * just overwrite the last backup if we're at the same generation
1721 * this happens only at umount
1722 */
1723 root_backup = info->super_for_commit->super_roots + last_backup;
1724 if (btrfs_backup_tree_root_gen(root_backup) ==
1725 btrfs_header_generation(info->tree_root->node))
1726 next_backup = last_backup;
1727
1728 root_backup = info->super_for_commit->super_roots + next_backup;
1729
1730 /*
1731 * make sure all of our padding and empty slots get zero filled
1732 * regardless of which ones we use today
1733 */
1734 memset(root_backup, 0, sizeof(*root_backup));
1735
1736 info->backup_root_index = (next_backup + 1) % BTRFS_NUM_BACKUP_ROOTS;
1737
1738 btrfs_set_backup_tree_root(root_backup, info->tree_root->node->start);
1739 btrfs_set_backup_tree_root_gen(root_backup,
1740 btrfs_header_generation(info->tree_root->node));
1741
1742 btrfs_set_backup_tree_root_level(root_backup,
1743 btrfs_header_level(info->tree_root->node));
1744
1745 btrfs_set_backup_chunk_root(root_backup, info->chunk_root->node->start);
1746 btrfs_set_backup_chunk_root_gen(root_backup,
1747 btrfs_header_generation(info->chunk_root->node));
1748 btrfs_set_backup_chunk_root_level(root_backup,
1749 btrfs_header_level(info->chunk_root->node));
1750
1751 btrfs_set_backup_extent_root(root_backup, info->extent_root->node->start);
1752 btrfs_set_backup_extent_root_gen(root_backup,
1753 btrfs_header_generation(info->extent_root->node));
1754 btrfs_set_backup_extent_root_level(root_backup,
1755 btrfs_header_level(info->extent_root->node));
1756
1757 /*
1758 * we might commit during log recovery, which happens before we set
1759 * the fs_root. Make sure it is valid before we fill it in.
1760 */
1761 if (info->fs_root && info->fs_root->node) {
1762 btrfs_set_backup_fs_root(root_backup,
1763 info->fs_root->node->start);
1764 btrfs_set_backup_fs_root_gen(root_backup,
1765 btrfs_header_generation(info->fs_root->node));
1766 btrfs_set_backup_fs_root_level(root_backup,
1767 btrfs_header_level(info->fs_root->node));
1768 }
1769
1770 btrfs_set_backup_dev_root(root_backup, info->dev_root->node->start);
1771 btrfs_set_backup_dev_root_gen(root_backup,
1772 btrfs_header_generation(info->dev_root->node));
1773 btrfs_set_backup_dev_root_level(root_backup,
1774 btrfs_header_level(info->dev_root->node));
1775
1776 btrfs_set_backup_csum_root(root_backup, info->csum_root->node->start);
1777 btrfs_set_backup_csum_root_gen(root_backup,
1778 btrfs_header_generation(info->csum_root->node));
1779 btrfs_set_backup_csum_root_level(root_backup,
1780 btrfs_header_level(info->csum_root->node));
1781
1782 btrfs_set_backup_total_bytes(root_backup,
1783 btrfs_super_total_bytes(info->super_copy));
1784 btrfs_set_backup_bytes_used(root_backup,
1785 btrfs_super_bytes_used(info->super_copy));
1786 btrfs_set_backup_num_devices(root_backup,
1787 btrfs_super_num_devices(info->super_copy));
1788
1789 /*
1790 * if we don't copy this out to the super_copy, it won't get remembered
1791 * for the next commit
1792 */
1793 memcpy(&info->super_copy->super_roots,
1794 &info->super_for_commit->super_roots,
1795 sizeof(*root_backup) * BTRFS_NUM_BACKUP_ROOTS);
1796}
1797
1798/*
1799 * this copies info out of the root backup array and back into
1800 * the in-memory super block. It is meant to help iterate through
1801 * the array, so you send it the number of backups you've already
1802 * tried and the last backup index you used.
1803 *
1804 * this returns -1 when it has tried all the backups
1805 */
1806static noinline int next_root_backup(struct btrfs_fs_info *info,
1807 struct btrfs_super_block *super,
1808 int *num_backups_tried, int *backup_index)
1809{
1810 struct btrfs_root_backup *root_backup;
1811 int newest = *backup_index;
1812
1813 if (*num_backups_tried == 0) {
1814 u64 gen = btrfs_super_generation(super);
1815
1816 newest = find_newest_super_backup(info, gen);
1817 if (newest == -1)
1818 return -1;
1819
1820 *backup_index = newest;
1821 *num_backups_tried = 1;
1822 } else if (*num_backups_tried == BTRFS_NUM_BACKUP_ROOTS) {
1823 /* we've tried all the backups, all done */
1824 return -1;
1825 } else {
1826 /* jump to the next oldest backup */
1827 newest = (*backup_index + BTRFS_NUM_BACKUP_ROOTS - 1) %
1828 BTRFS_NUM_BACKUP_ROOTS;
1829 *backup_index = newest;
1830 *num_backups_tried += 1;
1831 }
1832 root_backup = super->super_roots + newest;
1833
1834 btrfs_set_super_generation(super,
1835 btrfs_backup_tree_root_gen(root_backup));
1836 btrfs_set_super_root(super, btrfs_backup_tree_root(root_backup));
1837 btrfs_set_super_root_level(super,
1838 btrfs_backup_tree_root_level(root_backup));
1839 btrfs_set_super_bytes_used(super, btrfs_backup_bytes_used(root_backup));
1840
1841 /*
1842 * fixme: the total bytes and num_devices need to match or we should
1843 * need a fsck
1844 */
1845 btrfs_set_super_total_bytes(super, btrfs_backup_total_bytes(root_backup));
1846 btrfs_set_super_num_devices(super, btrfs_backup_num_devices(root_backup));
1847 return 0;
1848}
1849
1850/* helper to cleanup tree roots */
1851static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
1852{
1853 free_extent_buffer(info->tree_root->node);
1854 free_extent_buffer(info->tree_root->commit_root);
1855 free_extent_buffer(info->dev_root->node);
1856 free_extent_buffer(info->dev_root->commit_root);
1857 free_extent_buffer(info->extent_root->node);
1858 free_extent_buffer(info->extent_root->commit_root);
1859 free_extent_buffer(info->csum_root->node);
1860 free_extent_buffer(info->csum_root->commit_root);
1861
1862 info->tree_root->node = NULL;
1863 info->tree_root->commit_root = NULL;
1864 info->dev_root->node = NULL;
1865 info->dev_root->commit_root = NULL;
1866 info->extent_root->node = NULL;
1867 info->extent_root->commit_root = NULL;
1868 info->csum_root->node = NULL;
1869 info->csum_root->commit_root = NULL;
1870
1871 if (chunk_root) {
1872 free_extent_buffer(info->chunk_root->node);
1873 free_extent_buffer(info->chunk_root->commit_root);
1874 info->chunk_root->node = NULL;
1875 info->chunk_root->commit_root = NULL;
1876 }
1877}
1878
1879
1580struct btrfs_root *open_ctree(struct super_block *sb, 1880struct btrfs_root *open_ctree(struct super_block *sb,
1581 struct btrfs_fs_devices *fs_devices, 1881 struct btrfs_fs_devices *fs_devices,
1582 char *options) 1882 char *options)
@@ -1604,6 +1904,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1604 1904
1605 int ret; 1905 int ret;
1606 int err = -EINVAL; 1906 int err = -EINVAL;
1907 int num_backups_tried = 0;
1908 int backup_index = 0;
1607 1909
1608 struct btrfs_super_block *disk_super; 1910 struct btrfs_super_block *disk_super;
1609 1911
@@ -1648,6 +1950,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1648 spin_lock_init(&fs_info->fs_roots_radix_lock); 1950 spin_lock_init(&fs_info->fs_roots_radix_lock);
1649 spin_lock_init(&fs_info->delayed_iput_lock); 1951 spin_lock_init(&fs_info->delayed_iput_lock);
1650 spin_lock_init(&fs_info->defrag_inodes_lock); 1952 spin_lock_init(&fs_info->defrag_inodes_lock);
1953 spin_lock_init(&fs_info->free_chunk_lock);
1651 mutex_init(&fs_info->reloc_mutex); 1954 mutex_init(&fs_info->reloc_mutex);
1652 1955
1653 init_completion(&fs_info->kobj_unregister); 1956 init_completion(&fs_info->kobj_unregister);
@@ -1665,8 +1968,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1665 btrfs_init_block_rsv(&fs_info->trans_block_rsv); 1968 btrfs_init_block_rsv(&fs_info->trans_block_rsv);
1666 btrfs_init_block_rsv(&fs_info->chunk_block_rsv); 1969 btrfs_init_block_rsv(&fs_info->chunk_block_rsv);
1667 btrfs_init_block_rsv(&fs_info->empty_block_rsv); 1970 btrfs_init_block_rsv(&fs_info->empty_block_rsv);
1668 INIT_LIST_HEAD(&fs_info->durable_block_rsv_list); 1971 btrfs_init_block_rsv(&fs_info->delayed_block_rsv);
1669 mutex_init(&fs_info->durable_block_rsv_mutex);
1670 atomic_set(&fs_info->nr_async_submits, 0); 1972 atomic_set(&fs_info->nr_async_submits, 0);
1671 atomic_set(&fs_info->async_delalloc_pages, 0); 1973 atomic_set(&fs_info->async_delalloc_pages, 0);
1672 atomic_set(&fs_info->async_submit_draining, 0); 1974 atomic_set(&fs_info->async_submit_draining, 0);
@@ -1677,6 +1979,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1677 fs_info->metadata_ratio = 0; 1979 fs_info->metadata_ratio = 0;
1678 fs_info->defrag_inodes = RB_ROOT; 1980 fs_info->defrag_inodes = RB_ROOT;
1679 fs_info->trans_no_join = 0; 1981 fs_info->trans_no_join = 0;
1982 fs_info->free_chunk_space = 0;
1983
1984 /* readahead state */
1985 INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
1986 spin_lock_init(&fs_info->reada_lock);
1680 1987
1681 fs_info->thread_pool_size = min_t(unsigned long, 1988 fs_info->thread_pool_size = min_t(unsigned long,
1682 num_online_cpus() + 2, 8); 1989 num_online_cpus() + 2, 8);
@@ -1766,14 +2073,14 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1766 goto fail_alloc; 2073 goto fail_alloc;
1767 } 2074 }
1768 2075
1769 memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); 2076 memcpy(fs_info->super_copy, bh->b_data, sizeof(*fs_info->super_copy));
1770 memcpy(&fs_info->super_for_commit, &fs_info->super_copy, 2077 memcpy(fs_info->super_for_commit, fs_info->super_copy,
1771 sizeof(fs_info->super_for_commit)); 2078 sizeof(*fs_info->super_for_commit));
1772 brelse(bh); 2079 brelse(bh);
1773 2080
1774 memcpy(fs_info->fsid, fs_info->super_copy.fsid, BTRFS_FSID_SIZE); 2081 memcpy(fs_info->fsid, fs_info->super_copy->fsid, BTRFS_FSID_SIZE);
1775 2082
1776 disk_super = &fs_info->super_copy; 2083 disk_super = fs_info->super_copy;
1777 if (!btrfs_super_root(disk_super)) 2084 if (!btrfs_super_root(disk_super))
1778 goto fail_alloc; 2085 goto fail_alloc;
1779 2086
@@ -1783,6 +2090,13 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1783 btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); 2090 btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
1784 2091
1785 /* 2092 /*
2093 * run through our array of backup supers and setup
2094 * our ring pointer to the oldest one
2095 */
2096 generation = btrfs_super_generation(disk_super);
2097 find_oldest_super_backup(fs_info, generation);
2098
2099 /*
1786 * In the long term, we'll store the compression type in the super 2100 * In the long term, we'll store the compression type in the super
1787 * block, and it'll be used for per file compression control. 2101 * block, and it'll be used for per file compression control.
1788 */ 2102 */
@@ -1870,6 +2184,9 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1870 btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta", 2184 btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta",
1871 fs_info->thread_pool_size, 2185 fs_info->thread_pool_size,
1872 &fs_info->generic_worker); 2186 &fs_info->generic_worker);
2187 btrfs_init_workers(&fs_info->readahead_workers, "readahead",
2188 fs_info->thread_pool_size,
2189 &fs_info->generic_worker);
1873 2190
1874 /* 2191 /*
1875 * endios are largely parallel and should have a very 2192 * endios are largely parallel and should have a very
@@ -1880,6 +2197,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1880 2197
1881 fs_info->endio_write_workers.idle_thresh = 2; 2198 fs_info->endio_write_workers.idle_thresh = 2;
1882 fs_info->endio_meta_write_workers.idle_thresh = 2; 2199 fs_info->endio_meta_write_workers.idle_thresh = 2;
2200 fs_info->readahead_workers.idle_thresh = 2;
1883 2201
1884 btrfs_start_workers(&fs_info->workers, 1); 2202 btrfs_start_workers(&fs_info->workers, 1);
1885 btrfs_start_workers(&fs_info->generic_worker, 1); 2203 btrfs_start_workers(&fs_info->generic_worker, 1);
@@ -1893,6 +2211,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1893 btrfs_start_workers(&fs_info->endio_freespace_worker, 1); 2211 btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
1894 btrfs_start_workers(&fs_info->delayed_workers, 1); 2212 btrfs_start_workers(&fs_info->delayed_workers, 1);
1895 btrfs_start_workers(&fs_info->caching_workers, 1); 2213 btrfs_start_workers(&fs_info->caching_workers, 1);
2214 btrfs_start_workers(&fs_info->readahead_workers, 1);
1896 2215
1897 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); 2216 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
1898 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, 2217 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -1939,7 +2258,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1939 if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { 2258 if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
1940 printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", 2259 printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n",
1941 sb->s_id); 2260 sb->s_id);
1942 goto fail_chunk_root; 2261 goto fail_tree_roots;
1943 } 2262 }
1944 btrfs_set_root_node(&chunk_root->root_item, chunk_root->node); 2263 btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
1945 chunk_root->commit_root = btrfs_root_node(chunk_root); 2264 chunk_root->commit_root = btrfs_root_node(chunk_root);
@@ -1954,11 +2273,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1954 if (ret) { 2273 if (ret) {
1955 printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n", 2274 printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n",
1956 sb->s_id); 2275 sb->s_id);
1957 goto fail_chunk_root; 2276 goto fail_tree_roots;
1958 } 2277 }
1959 2278
1960 btrfs_close_extra_devices(fs_devices); 2279 btrfs_close_extra_devices(fs_devices);
1961 2280
2281retry_root_backup:
1962 blocksize = btrfs_level_size(tree_root, 2282 blocksize = btrfs_level_size(tree_root,
1963 btrfs_super_root_level(disk_super)); 2283 btrfs_super_root_level(disk_super));
1964 generation = btrfs_super_generation(disk_super); 2284 generation = btrfs_super_generation(disk_super);
@@ -1966,32 +2286,33 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1966 tree_root->node = read_tree_block(tree_root, 2286 tree_root->node = read_tree_block(tree_root,
1967 btrfs_super_root(disk_super), 2287 btrfs_super_root(disk_super),
1968 blocksize, generation); 2288 blocksize, generation);
1969 if (!tree_root->node) 2289 if (!tree_root->node ||
1970 goto fail_chunk_root; 2290 !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
1971 if (!test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) {
1972 printk(KERN_WARNING "btrfs: failed to read tree root on %s\n", 2291 printk(KERN_WARNING "btrfs: failed to read tree root on %s\n",
1973 sb->s_id); 2292 sb->s_id);
1974 goto fail_tree_root; 2293
2294 goto recovery_tree_root;
1975 } 2295 }
2296
1976 btrfs_set_root_node(&tree_root->root_item, tree_root->node); 2297 btrfs_set_root_node(&tree_root->root_item, tree_root->node);
1977 tree_root->commit_root = btrfs_root_node(tree_root); 2298 tree_root->commit_root = btrfs_root_node(tree_root);
1978 2299
1979 ret = find_and_setup_root(tree_root, fs_info, 2300 ret = find_and_setup_root(tree_root, fs_info,
1980 BTRFS_EXTENT_TREE_OBJECTID, extent_root); 2301 BTRFS_EXTENT_TREE_OBJECTID, extent_root);
1981 if (ret) 2302 if (ret)
1982 goto fail_tree_root; 2303 goto recovery_tree_root;
1983 extent_root->track_dirty = 1; 2304 extent_root->track_dirty = 1;
1984 2305
1985 ret = find_and_setup_root(tree_root, fs_info, 2306 ret = find_and_setup_root(tree_root, fs_info,
1986 BTRFS_DEV_TREE_OBJECTID, dev_root); 2307 BTRFS_DEV_TREE_OBJECTID, dev_root);
1987 if (ret) 2308 if (ret)
1988 goto fail_extent_root; 2309 goto recovery_tree_root;
1989 dev_root->track_dirty = 1; 2310 dev_root->track_dirty = 1;
1990 2311
1991 ret = find_and_setup_root(tree_root, fs_info, 2312 ret = find_and_setup_root(tree_root, fs_info,
1992 BTRFS_CSUM_TREE_OBJECTID, csum_root); 2313 BTRFS_CSUM_TREE_OBJECTID, csum_root);
1993 if (ret) 2314 if (ret)
1994 goto fail_dev_root; 2315 goto recovery_tree_root;
1995 2316
1996 csum_root->track_dirty = 1; 2317 csum_root->track_dirty = 1;
1997 2318
@@ -2124,22 +2445,13 @@ fail_cleaner:
2124 2445
2125fail_block_groups: 2446fail_block_groups:
2126 btrfs_free_block_groups(fs_info); 2447 btrfs_free_block_groups(fs_info);
2127 free_extent_buffer(csum_root->node); 2448
2128 free_extent_buffer(csum_root->commit_root); 2449fail_tree_roots:
2129fail_dev_root: 2450 free_root_pointers(fs_info, 1);
2130 free_extent_buffer(dev_root->node); 2451
2131 free_extent_buffer(dev_root->commit_root);
2132fail_extent_root:
2133 free_extent_buffer(extent_root->node);
2134 free_extent_buffer(extent_root->commit_root);
2135fail_tree_root:
2136 free_extent_buffer(tree_root->node);
2137 free_extent_buffer(tree_root->commit_root);
2138fail_chunk_root:
2139 free_extent_buffer(chunk_root->node);
2140 free_extent_buffer(chunk_root->commit_root);
2141fail_sb_buffer: 2452fail_sb_buffer:
2142 btrfs_stop_workers(&fs_info->generic_worker); 2453 btrfs_stop_workers(&fs_info->generic_worker);
2454 btrfs_stop_workers(&fs_info->readahead_workers);
2143 btrfs_stop_workers(&fs_info->fixup_workers); 2455 btrfs_stop_workers(&fs_info->fixup_workers);
2144 btrfs_stop_workers(&fs_info->delalloc_workers); 2456 btrfs_stop_workers(&fs_info->delalloc_workers);
2145 btrfs_stop_workers(&fs_info->workers); 2457 btrfs_stop_workers(&fs_info->workers);
@@ -2152,7 +2464,6 @@ fail_sb_buffer:
2152 btrfs_stop_workers(&fs_info->delayed_workers); 2464 btrfs_stop_workers(&fs_info->delayed_workers);
2153 btrfs_stop_workers(&fs_info->caching_workers); 2465 btrfs_stop_workers(&fs_info->caching_workers);
2154fail_alloc: 2466fail_alloc:
2155 kfree(fs_info->delayed_root);
2156fail_iput: 2467fail_iput:
2157 invalidate_inode_pages2(fs_info->btree_inode->i_mapping); 2468 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
2158 iput(fs_info->btree_inode); 2469 iput(fs_info->btree_inode);
@@ -2164,13 +2475,27 @@ fail_bdi:
2164fail_srcu: 2475fail_srcu:
2165 cleanup_srcu_struct(&fs_info->subvol_srcu); 2476 cleanup_srcu_struct(&fs_info->subvol_srcu);
2166fail: 2477fail:
2167 kfree(extent_root); 2478 free_fs_info(fs_info);
2168 kfree(tree_root);
2169 kfree(fs_info);
2170 kfree(chunk_root);
2171 kfree(dev_root);
2172 kfree(csum_root);
2173 return ERR_PTR(err); 2479 return ERR_PTR(err);
2480
2481recovery_tree_root:
2482
2483 if (!btrfs_test_opt(tree_root, RECOVERY))
2484 goto fail_tree_roots;
2485
2486 free_root_pointers(fs_info, 0);
2487
2488 /* don't use the log in recovery mode, it won't be valid */
2489 btrfs_set_super_log_root(disk_super, 0);
2490
2491 /* we can't trust the free space cache either */
2492 btrfs_set_opt(fs_info->mount_opt, CLEAR_CACHE);
2493
2494 ret = next_root_backup(fs_info, fs_info->super_copy,
2495 &num_backups_tried, &backup_index);
2496 if (ret == -1)
2497 goto fail_block_groups;
2498 goto retry_root_backup;
2174} 2499}
2175 2500
2176static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) 2501static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate)
@@ -2338,10 +2663,11 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors)
2338 int total_errors = 0; 2663 int total_errors = 0;
2339 u64 flags; 2664 u64 flags;
2340 2665
2341 max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; 2666 max_errors = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
2342 do_barriers = !btrfs_test_opt(root, NOBARRIER); 2667 do_barriers = !btrfs_test_opt(root, NOBARRIER);
2668 backup_super_roots(root->fs_info);
2343 2669
2344 sb = &root->fs_info->super_for_commit; 2670 sb = root->fs_info->super_for_commit;
2345 dev_item = &sb->dev_item; 2671 dev_item = &sb->dev_item;
2346 2672
2347 mutex_lock(&root->fs_info->fs_devices->device_list_mutex); 2673 mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
@@ -2545,8 +2871,6 @@ int close_ctree(struct btrfs_root *root)
2545 /* clear out the rbtree of defraggable inodes */ 2871 /* clear out the rbtree of defraggable inodes */
2546 btrfs_run_defrag_inodes(root->fs_info); 2872 btrfs_run_defrag_inodes(root->fs_info);
2547 2873
2548 btrfs_put_block_group_cache(fs_info);
2549
2550 /* 2874 /*
2551 * Here come 2 situations when btrfs is broken to flip readonly: 2875 * Here come 2 situations when btrfs is broken to flip readonly:
2552 * 2876 *
@@ -2572,6 +2896,8 @@ int close_ctree(struct btrfs_root *root)
2572 printk(KERN_ERR "btrfs: commit super ret %d\n", ret); 2896 printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
2573 } 2897 }
2574 2898
2899 btrfs_put_block_group_cache(fs_info);
2900
2575 kthread_stop(root->fs_info->transaction_kthread); 2901 kthread_stop(root->fs_info->transaction_kthread);
2576 kthread_stop(root->fs_info->cleaner_kthread); 2902 kthread_stop(root->fs_info->cleaner_kthread);
2577 2903
@@ -2603,7 +2929,6 @@ int close_ctree(struct btrfs_root *root)
2603 del_fs_roots(fs_info); 2929 del_fs_roots(fs_info);
2604 2930
2605 iput(fs_info->btree_inode); 2931 iput(fs_info->btree_inode);
2606 kfree(fs_info->delayed_root);
2607 2932
2608 btrfs_stop_workers(&fs_info->generic_worker); 2933 btrfs_stop_workers(&fs_info->generic_worker);
2609 btrfs_stop_workers(&fs_info->fixup_workers); 2934 btrfs_stop_workers(&fs_info->fixup_workers);
@@ -2617,6 +2942,7 @@ int close_ctree(struct btrfs_root *root)
2617 btrfs_stop_workers(&fs_info->submit_workers); 2942 btrfs_stop_workers(&fs_info->submit_workers);
2618 btrfs_stop_workers(&fs_info->delayed_workers); 2943 btrfs_stop_workers(&fs_info->delayed_workers);
2619 btrfs_stop_workers(&fs_info->caching_workers); 2944 btrfs_stop_workers(&fs_info->caching_workers);
2945 btrfs_stop_workers(&fs_info->readahead_workers);
2620 2946
2621 btrfs_close_devices(fs_info->fs_devices); 2947 btrfs_close_devices(fs_info->fs_devices);
2622 btrfs_mapping_tree_free(&fs_info->mapping_tree); 2948 btrfs_mapping_tree_free(&fs_info->mapping_tree);
@@ -2624,12 +2950,7 @@ int close_ctree(struct btrfs_root *root)
2624 bdi_destroy(&fs_info->bdi); 2950 bdi_destroy(&fs_info->bdi);
2625 cleanup_srcu_struct(&fs_info->subvol_srcu); 2951 cleanup_srcu_struct(&fs_info->subvol_srcu);
2626 2952
2627 kfree(fs_info->extent_root); 2953 free_fs_info(fs_info);
2628 kfree(fs_info->tree_root);
2629 kfree(fs_info->chunk_root);
2630 kfree(fs_info->dev_root);
2631 kfree(fs_info->csum_root);
2632 kfree(fs_info);
2633 2954
2634 return 0; 2955 return 0;
2635} 2956}
@@ -2735,7 +3056,8 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
2735 return ret; 3056 return ret;
2736} 3057}
2737 3058
2738int btree_lock_page_hook(struct page *page) 3059static int btree_lock_page_hook(struct page *page, void *data,
3060 void (*flush_fn)(void *))
2739{ 3061{
2740 struct inode *inode = page->mapping->host; 3062 struct inode *inode = page->mapping->host;
2741 struct btrfs_root *root = BTRFS_I(inode)->root; 3063 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -2752,7 +3074,10 @@ int btree_lock_page_hook(struct page *page)
2752 if (!eb) 3074 if (!eb)
2753 goto out; 3075 goto out;
2754 3076
2755 btrfs_tree_lock(eb); 3077 if (!btrfs_try_tree_write_lock(eb)) {
3078 flush_fn(data);
3079 btrfs_tree_lock(eb);
3080 }
2756 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); 3081 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
2757 3082
2758 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { 3083 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
@@ -2767,7 +3092,10 @@ int btree_lock_page_hook(struct page *page)
2767 btrfs_tree_unlock(eb); 3092 btrfs_tree_unlock(eb);
2768 free_extent_buffer(eb); 3093 free_extent_buffer(eb);
2769out: 3094out:
2770 lock_page(page); 3095 if (!trylock_page(page)) {
3096 flush_fn(data);
3097 lock_page(page);
3098 }
2771 return 0; 3099 return 0;
2772} 3100}
2773 3101
@@ -3123,6 +3451,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root)
3123static struct extent_io_ops btree_extent_io_ops = { 3451static struct extent_io_ops btree_extent_io_ops = {
3124 .write_cache_pages_lock_hook = btree_lock_page_hook, 3452 .write_cache_pages_lock_hook = btree_lock_page_hook,
3125 .readpage_end_io_hook = btree_readpage_end_io_hook, 3453 .readpage_end_io_hook = btree_readpage_end_io_hook,
3454 .readpage_io_failed_hook = btree_io_failed_hook,
3126 .submit_bio_hook = btree_submit_bio_hook, 3455 .submit_bio_hook = btree_submit_bio_hook,
3127 /* note we're sharing with inode.c for the merge bio hook */ 3456 /* note we're sharing with inode.c for the merge bio hook */
3128 .merge_bio_hook = btrfs_merge_bio_hook, 3457 .merge_bio_hook = btrfs_merge_bio_hook,