diff options
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 632 |
1 files changed, 533 insertions, 99 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 07b3ac662e19..632f8f3cc9db 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -256,8 +256,7 @@ void btrfs_csum_final(u32 crc, char *result) | |||
256 | static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, | 256 | static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, |
257 | int verify) | 257 | int verify) |
258 | { | 258 | { |
259 | u16 csum_size = | 259 | u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy); |
260 | btrfs_super_csum_size(&root->fs_info->super_copy); | ||
261 | char *result = NULL; | 260 | char *result = NULL; |
262 | unsigned long len; | 261 | unsigned long len; |
263 | unsigned long cur_len; | 262 | unsigned long cur_len; |
@@ -367,7 +366,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
367 | clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); | 366 | clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); |
368 | io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; | 367 | io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; |
369 | while (1) { | 368 | while (1) { |
370 | ret = read_extent_buffer_pages(io_tree, eb, start, 1, | 369 | ret = read_extent_buffer_pages(io_tree, eb, start, |
370 | WAIT_COMPLETE, | ||
371 | btree_get_extent, mirror_num); | 371 | btree_get_extent, mirror_num); |
372 | if (!ret && | 372 | if (!ret && |
373 | !verify_parent_transid(io_tree, eb, parent_transid)) | 373 | !verify_parent_transid(io_tree, eb, parent_transid)) |
@@ -608,11 +608,48 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
608 | end = min_t(u64, eb->len, PAGE_CACHE_SIZE); | 608 | end = min_t(u64, eb->len, PAGE_CACHE_SIZE); |
609 | end = eb->start + end - 1; | 609 | end = eb->start + end - 1; |
610 | err: | 610 | err: |
611 | if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) { | ||
612 | clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags); | ||
613 | btree_readahead_hook(root, eb, eb->start, ret); | ||
614 | } | ||
615 | |||
611 | free_extent_buffer(eb); | 616 | free_extent_buffer(eb); |
612 | out: | 617 | out: |
613 | return ret; | 618 | return ret; |
614 | } | 619 | } |
615 | 620 | ||
621 | static int btree_io_failed_hook(struct bio *failed_bio, | ||
622 | struct page *page, u64 start, u64 end, | ||
623 | int mirror_num, struct extent_state *state) | ||
624 | { | ||
625 | struct extent_io_tree *tree; | ||
626 | unsigned long len; | ||
627 | struct extent_buffer *eb; | ||
628 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | ||
629 | |||
630 | tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
631 | if (page->private == EXTENT_PAGE_PRIVATE) | ||
632 | goto out; | ||
633 | if (!page->private) | ||
634 | goto out; | ||
635 | |||
636 | len = page->private >> 2; | ||
637 | WARN_ON(len == 0); | ||
638 | |||
639 | eb = alloc_extent_buffer(tree, start, len, page); | ||
640 | if (eb == NULL) | ||
641 | goto out; | ||
642 | |||
643 | if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) { | ||
644 | clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags); | ||
645 | btree_readahead_hook(root, eb, eb->start, -EIO); | ||
646 | } | ||
647 | free_extent_buffer(eb); | ||
648 | |||
649 | out: | ||
650 | return -EIO; /* we fixed nothing */ | ||
651 | } | ||
652 | |||
616 | static void end_workqueue_bio(struct bio *bio, int err) | 653 | static void end_workqueue_bio(struct bio *bio, int err) |
617 | { | 654 | { |
618 | struct end_io_wq *end_io_wq = bio->bi_private; | 655 | struct end_io_wq *end_io_wq = bio->bi_private; |
@@ -908,7 +945,7 @@ static int btree_readpage(struct file *file, struct page *page) | |||
908 | { | 945 | { |
909 | struct extent_io_tree *tree; | 946 | struct extent_io_tree *tree; |
910 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 947 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
911 | return extent_read_full_page(tree, page, btree_get_extent); | 948 | return extent_read_full_page(tree, page, btree_get_extent, 0); |
912 | } | 949 | } |
913 | 950 | ||
914 | static int btree_releasepage(struct page *page, gfp_t gfp_flags) | 951 | static int btree_releasepage(struct page *page, gfp_t gfp_flags) |
@@ -974,11 +1011,43 @@ int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, | |||
974 | if (!buf) | 1011 | if (!buf) |
975 | return 0; | 1012 | return 0; |
976 | read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, | 1013 | read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree, |
977 | buf, 0, 0, btree_get_extent, 0); | 1014 | buf, 0, WAIT_NONE, btree_get_extent, 0); |
978 | free_extent_buffer(buf); | 1015 | free_extent_buffer(buf); |
979 | return ret; | 1016 | return ret; |
980 | } | 1017 | } |
981 | 1018 | ||
1019 | int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, | ||
1020 | int mirror_num, struct extent_buffer **eb) | ||
1021 | { | ||
1022 | struct extent_buffer *buf = NULL; | ||
1023 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
1024 | struct extent_io_tree *io_tree = &BTRFS_I(btree_inode)->io_tree; | ||
1025 | int ret; | ||
1026 | |||
1027 | buf = btrfs_find_create_tree_block(root, bytenr, blocksize); | ||
1028 | if (!buf) | ||
1029 | return 0; | ||
1030 | |||
1031 | set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags); | ||
1032 | |||
1033 | ret = read_extent_buffer_pages(io_tree, buf, 0, WAIT_PAGE_LOCK, | ||
1034 | btree_get_extent, mirror_num); | ||
1035 | if (ret) { | ||
1036 | free_extent_buffer(buf); | ||
1037 | return ret; | ||
1038 | } | ||
1039 | |||
1040 | if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) { | ||
1041 | free_extent_buffer(buf); | ||
1042 | return -EIO; | ||
1043 | } else if (extent_buffer_uptodate(io_tree, buf, NULL)) { | ||
1044 | *eb = buf; | ||
1045 | } else { | ||
1046 | free_extent_buffer(buf); | ||
1047 | } | ||
1048 | return 0; | ||
1049 | } | ||
1050 | |||
982 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, | 1051 | struct extent_buffer *btrfs_find_tree_block(struct btrfs_root *root, |
983 | u64 bytenr, u32 blocksize) | 1052 | u64 bytenr, u32 blocksize) |
984 | { | 1053 | { |
@@ -1135,10 +1204,12 @@ static int find_and_setup_root(struct btrfs_root *tree_root, | |||
1135 | 1204 | ||
1136 | generation = btrfs_root_generation(&root->root_item); | 1205 | generation = btrfs_root_generation(&root->root_item); |
1137 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | 1206 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); |
1207 | root->commit_root = NULL; | ||
1138 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | 1208 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), |
1139 | blocksize, generation); | 1209 | blocksize, generation); |
1140 | if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) { | 1210 | if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) { |
1141 | free_extent_buffer(root->node); | 1211 | free_extent_buffer(root->node); |
1212 | root->node = NULL; | ||
1142 | return -EIO; | 1213 | return -EIO; |
1143 | } | 1214 | } |
1144 | root->commit_root = btrfs_root_node(root); | 1215 | root->commit_root = btrfs_root_node(root); |
@@ -1577,6 +1648,235 @@ sleep: | |||
1577 | return 0; | 1648 | return 0; |
1578 | } | 1649 | } |
1579 | 1650 | ||
1651 | /* | ||
1652 | * this will find the highest generation in the array of | ||
1653 | * root backups. The index of the highest array is returned, | ||
1654 | * or -1 if we can't find anything. | ||
1655 | * | ||
1656 | * We check to make sure the array is valid by comparing the | ||
1657 | * generation of the latest root in the array with the generation | ||
1658 | * in the super block. If they don't match we pitch it. | ||
1659 | */ | ||
1660 | static int find_newest_super_backup(struct btrfs_fs_info *info, u64 newest_gen) | ||
1661 | { | ||
1662 | u64 cur; | ||
1663 | int newest_index = -1; | ||
1664 | struct btrfs_root_backup *root_backup; | ||
1665 | int i; | ||
1666 | |||
1667 | for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) { | ||
1668 | root_backup = info->super_copy->super_roots + i; | ||
1669 | cur = btrfs_backup_tree_root_gen(root_backup); | ||
1670 | if (cur == newest_gen) | ||
1671 | newest_index = i; | ||
1672 | } | ||
1673 | |||
1674 | /* check to see if we actually wrapped around */ | ||
1675 | if (newest_index == BTRFS_NUM_BACKUP_ROOTS - 1) { | ||
1676 | root_backup = info->super_copy->super_roots; | ||
1677 | cur = btrfs_backup_tree_root_gen(root_backup); | ||
1678 | if (cur == newest_gen) | ||
1679 | newest_index = 0; | ||
1680 | } | ||
1681 | return newest_index; | ||
1682 | } | ||
1683 | |||
1684 | |||
1685 | /* | ||
1686 | * find the oldest backup so we know where to store new entries | ||
1687 | * in the backup array. This will set the backup_root_index | ||
1688 | * field in the fs_info struct | ||
1689 | */ | ||
1690 | static void find_oldest_super_backup(struct btrfs_fs_info *info, | ||
1691 | u64 newest_gen) | ||
1692 | { | ||
1693 | int newest_index = -1; | ||
1694 | |||
1695 | newest_index = find_newest_super_backup(info, newest_gen); | ||
1696 | /* if there was garbage in there, just move along */ | ||
1697 | if (newest_index == -1) { | ||
1698 | info->backup_root_index = 0; | ||
1699 | } else { | ||
1700 | info->backup_root_index = (newest_index + 1) % BTRFS_NUM_BACKUP_ROOTS; | ||
1701 | } | ||
1702 | } | ||
1703 | |||
1704 | /* | ||
1705 | * copy all the root pointers into the super backup array. | ||
1706 | * this will bump the backup pointer by one when it is | ||
1707 | * done | ||
1708 | */ | ||
1709 | static void backup_super_roots(struct btrfs_fs_info *info) | ||
1710 | { | ||
1711 | int next_backup; | ||
1712 | struct btrfs_root_backup *root_backup; | ||
1713 | int last_backup; | ||
1714 | |||
1715 | next_backup = info->backup_root_index; | ||
1716 | last_backup = (next_backup + BTRFS_NUM_BACKUP_ROOTS - 1) % | ||
1717 | BTRFS_NUM_BACKUP_ROOTS; | ||
1718 | |||
1719 | /* | ||
1720 | * just overwrite the last backup if we're at the same generation | ||
1721 | * this happens only at umount | ||
1722 | */ | ||
1723 | root_backup = info->super_for_commit->super_roots + last_backup; | ||
1724 | if (btrfs_backup_tree_root_gen(root_backup) == | ||
1725 | btrfs_header_generation(info->tree_root->node)) | ||
1726 | next_backup = last_backup; | ||
1727 | |||
1728 | root_backup = info->super_for_commit->super_roots + next_backup; | ||
1729 | |||
1730 | /* | ||
1731 | * make sure all of our padding and empty slots get zero filled | ||
1732 | * regardless of which ones we use today | ||
1733 | */ | ||
1734 | memset(root_backup, 0, sizeof(*root_backup)); | ||
1735 | |||
1736 | info->backup_root_index = (next_backup + 1) % BTRFS_NUM_BACKUP_ROOTS; | ||
1737 | |||
1738 | btrfs_set_backup_tree_root(root_backup, info->tree_root->node->start); | ||
1739 | btrfs_set_backup_tree_root_gen(root_backup, | ||
1740 | btrfs_header_generation(info->tree_root->node)); | ||
1741 | |||
1742 | btrfs_set_backup_tree_root_level(root_backup, | ||
1743 | btrfs_header_level(info->tree_root->node)); | ||
1744 | |||
1745 | btrfs_set_backup_chunk_root(root_backup, info->chunk_root->node->start); | ||
1746 | btrfs_set_backup_chunk_root_gen(root_backup, | ||
1747 | btrfs_header_generation(info->chunk_root->node)); | ||
1748 | btrfs_set_backup_chunk_root_level(root_backup, | ||
1749 | btrfs_header_level(info->chunk_root->node)); | ||
1750 | |||
1751 | btrfs_set_backup_extent_root(root_backup, info->extent_root->node->start); | ||
1752 | btrfs_set_backup_extent_root_gen(root_backup, | ||
1753 | btrfs_header_generation(info->extent_root->node)); | ||
1754 | btrfs_set_backup_extent_root_level(root_backup, | ||
1755 | btrfs_header_level(info->extent_root->node)); | ||
1756 | |||
1757 | /* | ||
1758 | * we might commit during log recovery, which happens before we set | ||
1759 | * the fs_root. Make sure it is valid before we fill it in. | ||
1760 | */ | ||
1761 | if (info->fs_root && info->fs_root->node) { | ||
1762 | btrfs_set_backup_fs_root(root_backup, | ||
1763 | info->fs_root->node->start); | ||
1764 | btrfs_set_backup_fs_root_gen(root_backup, | ||
1765 | btrfs_header_generation(info->fs_root->node)); | ||
1766 | btrfs_set_backup_fs_root_level(root_backup, | ||
1767 | btrfs_header_level(info->fs_root->node)); | ||
1768 | } | ||
1769 | |||
1770 | btrfs_set_backup_dev_root(root_backup, info->dev_root->node->start); | ||
1771 | btrfs_set_backup_dev_root_gen(root_backup, | ||
1772 | btrfs_header_generation(info->dev_root->node)); | ||
1773 | btrfs_set_backup_dev_root_level(root_backup, | ||
1774 | btrfs_header_level(info->dev_root->node)); | ||
1775 | |||
1776 | btrfs_set_backup_csum_root(root_backup, info->csum_root->node->start); | ||
1777 | btrfs_set_backup_csum_root_gen(root_backup, | ||
1778 | btrfs_header_generation(info->csum_root->node)); | ||
1779 | btrfs_set_backup_csum_root_level(root_backup, | ||
1780 | btrfs_header_level(info->csum_root->node)); | ||
1781 | |||
1782 | btrfs_set_backup_total_bytes(root_backup, | ||
1783 | btrfs_super_total_bytes(info->super_copy)); | ||
1784 | btrfs_set_backup_bytes_used(root_backup, | ||
1785 | btrfs_super_bytes_used(info->super_copy)); | ||
1786 | btrfs_set_backup_num_devices(root_backup, | ||
1787 | btrfs_super_num_devices(info->super_copy)); | ||
1788 | |||
1789 | /* | ||
1790 | * if we don't copy this out to the super_copy, it won't get remembered | ||
1791 | * for the next commit | ||
1792 | */ | ||
1793 | memcpy(&info->super_copy->super_roots, | ||
1794 | &info->super_for_commit->super_roots, | ||
1795 | sizeof(*root_backup) * BTRFS_NUM_BACKUP_ROOTS); | ||
1796 | } | ||
1797 | |||
1798 | /* | ||
1799 | * this copies info out of the root backup array and back into | ||
1800 | * the in-memory super block. It is meant to help iterate through | ||
1801 | * the array, so you send it the number of backups you've already | ||
1802 | * tried and the last backup index you used. | ||
1803 | * | ||
1804 | * this returns -1 when it has tried all the backups | ||
1805 | */ | ||
1806 | static noinline int next_root_backup(struct btrfs_fs_info *info, | ||
1807 | struct btrfs_super_block *super, | ||
1808 | int *num_backups_tried, int *backup_index) | ||
1809 | { | ||
1810 | struct btrfs_root_backup *root_backup; | ||
1811 | int newest = *backup_index; | ||
1812 | |||
1813 | if (*num_backups_tried == 0) { | ||
1814 | u64 gen = btrfs_super_generation(super); | ||
1815 | |||
1816 | newest = find_newest_super_backup(info, gen); | ||
1817 | if (newest == -1) | ||
1818 | return -1; | ||
1819 | |||
1820 | *backup_index = newest; | ||
1821 | *num_backups_tried = 1; | ||
1822 | } else if (*num_backups_tried == BTRFS_NUM_BACKUP_ROOTS) { | ||
1823 | /* we've tried all the backups, all done */ | ||
1824 | return -1; | ||
1825 | } else { | ||
1826 | /* jump to the next oldest backup */ | ||
1827 | newest = (*backup_index + BTRFS_NUM_BACKUP_ROOTS - 1) % | ||
1828 | BTRFS_NUM_BACKUP_ROOTS; | ||
1829 | *backup_index = newest; | ||
1830 | *num_backups_tried += 1; | ||
1831 | } | ||
1832 | root_backup = super->super_roots + newest; | ||
1833 | |||
1834 | btrfs_set_super_generation(super, | ||
1835 | btrfs_backup_tree_root_gen(root_backup)); | ||
1836 | btrfs_set_super_root(super, btrfs_backup_tree_root(root_backup)); | ||
1837 | btrfs_set_super_root_level(super, | ||
1838 | btrfs_backup_tree_root_level(root_backup)); | ||
1839 | btrfs_set_super_bytes_used(super, btrfs_backup_bytes_used(root_backup)); | ||
1840 | |||
1841 | /* | ||
1842 | * fixme: the total bytes and num_devices need to match or we should | ||
1843 | * need a fsck | ||
1844 | */ | ||
1845 | btrfs_set_super_total_bytes(super, btrfs_backup_total_bytes(root_backup)); | ||
1846 | btrfs_set_super_num_devices(super, btrfs_backup_num_devices(root_backup)); | ||
1847 | return 0; | ||
1848 | } | ||
1849 | |||
1850 | /* helper to cleanup tree roots */ | ||
1851 | static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root) | ||
1852 | { | ||
1853 | free_extent_buffer(info->tree_root->node); | ||
1854 | free_extent_buffer(info->tree_root->commit_root); | ||
1855 | free_extent_buffer(info->dev_root->node); | ||
1856 | free_extent_buffer(info->dev_root->commit_root); | ||
1857 | free_extent_buffer(info->extent_root->node); | ||
1858 | free_extent_buffer(info->extent_root->commit_root); | ||
1859 | free_extent_buffer(info->csum_root->node); | ||
1860 | free_extent_buffer(info->csum_root->commit_root); | ||
1861 | |||
1862 | info->tree_root->node = NULL; | ||
1863 | info->tree_root->commit_root = NULL; | ||
1864 | info->dev_root->node = NULL; | ||
1865 | info->dev_root->commit_root = NULL; | ||
1866 | info->extent_root->node = NULL; | ||
1867 | info->extent_root->commit_root = NULL; | ||
1868 | info->csum_root->node = NULL; | ||
1869 | info->csum_root->commit_root = NULL; | ||
1870 | |||
1871 | if (chunk_root) { | ||
1872 | free_extent_buffer(info->chunk_root->node); | ||
1873 | free_extent_buffer(info->chunk_root->commit_root); | ||
1874 | info->chunk_root->node = NULL; | ||
1875 | info->chunk_root->commit_root = NULL; | ||
1876 | } | ||
1877 | } | ||
1878 | |||
1879 | |||
1580 | struct btrfs_root *open_ctree(struct super_block *sb, | 1880 | struct btrfs_root *open_ctree(struct super_block *sb, |
1581 | struct btrfs_fs_devices *fs_devices, | 1881 | struct btrfs_fs_devices *fs_devices, |
1582 | char *options) | 1882 | char *options) |
@@ -1590,29 +1890,32 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1590 | u64 features; | 1890 | u64 features; |
1591 | struct btrfs_key location; | 1891 | struct btrfs_key location; |
1592 | struct buffer_head *bh; | 1892 | struct buffer_head *bh; |
1593 | struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root), | 1893 | struct btrfs_super_block *disk_super; |
1594 | GFP_NOFS); | ||
1595 | struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root), | ||
1596 | GFP_NOFS); | ||
1597 | struct btrfs_root *tree_root = btrfs_sb(sb); | 1894 | struct btrfs_root *tree_root = btrfs_sb(sb); |
1598 | struct btrfs_fs_info *fs_info = NULL; | 1895 | struct btrfs_fs_info *fs_info = tree_root->fs_info; |
1599 | struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root), | 1896 | struct btrfs_root *extent_root; |
1600 | GFP_NOFS); | 1897 | struct btrfs_root *csum_root; |
1601 | struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root), | 1898 | struct btrfs_root *chunk_root; |
1602 | GFP_NOFS); | 1899 | struct btrfs_root *dev_root; |
1603 | struct btrfs_root *log_tree_root; | 1900 | struct btrfs_root *log_tree_root; |
1604 | |||
1605 | int ret; | 1901 | int ret; |
1606 | int err = -EINVAL; | 1902 | int err = -EINVAL; |
1607 | 1903 | int num_backups_tried = 0; | |
1608 | struct btrfs_super_block *disk_super; | 1904 | int backup_index = 0; |
1609 | 1905 | ||
1610 | if (!extent_root || !tree_root || !tree_root->fs_info || | 1906 | extent_root = fs_info->extent_root = |
1611 | !chunk_root || !dev_root || !csum_root) { | 1907 | kzalloc(sizeof(struct btrfs_root), GFP_NOFS); |
1908 | csum_root = fs_info->csum_root = | ||
1909 | kzalloc(sizeof(struct btrfs_root), GFP_NOFS); | ||
1910 | chunk_root = fs_info->chunk_root = | ||
1911 | kzalloc(sizeof(struct btrfs_root), GFP_NOFS); | ||
1912 | dev_root = fs_info->dev_root = | ||
1913 | kzalloc(sizeof(struct btrfs_root), GFP_NOFS); | ||
1914 | |||
1915 | if (!extent_root || !csum_root || !chunk_root || !dev_root) { | ||
1612 | err = -ENOMEM; | 1916 | err = -ENOMEM; |
1613 | goto fail; | 1917 | goto fail; |
1614 | } | 1918 | } |
1615 | fs_info = tree_root->fs_info; | ||
1616 | 1919 | ||
1617 | ret = init_srcu_struct(&fs_info->subvol_srcu); | 1920 | ret = init_srcu_struct(&fs_info->subvol_srcu); |
1618 | if (ret) { | 1921 | if (ret) { |
@@ -1648,15 +1951,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1648 | spin_lock_init(&fs_info->fs_roots_radix_lock); | 1951 | spin_lock_init(&fs_info->fs_roots_radix_lock); |
1649 | spin_lock_init(&fs_info->delayed_iput_lock); | 1952 | spin_lock_init(&fs_info->delayed_iput_lock); |
1650 | spin_lock_init(&fs_info->defrag_inodes_lock); | 1953 | spin_lock_init(&fs_info->defrag_inodes_lock); |
1954 | spin_lock_init(&fs_info->free_chunk_lock); | ||
1651 | mutex_init(&fs_info->reloc_mutex); | 1955 | mutex_init(&fs_info->reloc_mutex); |
1652 | 1956 | ||
1653 | init_completion(&fs_info->kobj_unregister); | 1957 | init_completion(&fs_info->kobj_unregister); |
1654 | fs_info->tree_root = tree_root; | ||
1655 | fs_info->extent_root = extent_root; | ||
1656 | fs_info->csum_root = csum_root; | ||
1657 | fs_info->chunk_root = chunk_root; | ||
1658 | fs_info->dev_root = dev_root; | ||
1659 | fs_info->fs_devices = fs_devices; | ||
1660 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); | 1958 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); |
1661 | INIT_LIST_HEAD(&fs_info->space_info); | 1959 | INIT_LIST_HEAD(&fs_info->space_info); |
1662 | btrfs_mapping_init(&fs_info->mapping_tree); | 1960 | btrfs_mapping_init(&fs_info->mapping_tree); |
@@ -1665,8 +1963,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1665 | btrfs_init_block_rsv(&fs_info->trans_block_rsv); | 1963 | btrfs_init_block_rsv(&fs_info->trans_block_rsv); |
1666 | btrfs_init_block_rsv(&fs_info->chunk_block_rsv); | 1964 | btrfs_init_block_rsv(&fs_info->chunk_block_rsv); |
1667 | btrfs_init_block_rsv(&fs_info->empty_block_rsv); | 1965 | btrfs_init_block_rsv(&fs_info->empty_block_rsv); |
1668 | INIT_LIST_HEAD(&fs_info->durable_block_rsv_list); | 1966 | btrfs_init_block_rsv(&fs_info->delayed_block_rsv); |
1669 | mutex_init(&fs_info->durable_block_rsv_mutex); | ||
1670 | atomic_set(&fs_info->nr_async_submits, 0); | 1967 | atomic_set(&fs_info->nr_async_submits, 0); |
1671 | atomic_set(&fs_info->async_delalloc_pages, 0); | 1968 | atomic_set(&fs_info->async_delalloc_pages, 0); |
1672 | atomic_set(&fs_info->async_submit_draining, 0); | 1969 | atomic_set(&fs_info->async_submit_draining, 0); |
@@ -1677,6 +1974,11 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1677 | fs_info->metadata_ratio = 0; | 1974 | fs_info->metadata_ratio = 0; |
1678 | fs_info->defrag_inodes = RB_ROOT; | 1975 | fs_info->defrag_inodes = RB_ROOT; |
1679 | fs_info->trans_no_join = 0; | 1976 | fs_info->trans_no_join = 0; |
1977 | fs_info->free_chunk_space = 0; | ||
1978 | |||
1979 | /* readahead state */ | ||
1980 | INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT); | ||
1981 | spin_lock_init(&fs_info->reada_lock); | ||
1680 | 1982 | ||
1681 | fs_info->thread_pool_size = min_t(unsigned long, | 1983 | fs_info->thread_pool_size = min_t(unsigned long, |
1682 | num_online_cpus() + 2, 8); | 1984 | num_online_cpus() + 2, 8); |
@@ -1705,7 +2007,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1705 | sb->s_bdi = &fs_info->bdi; | 2007 | sb->s_bdi = &fs_info->bdi; |
1706 | 2008 | ||
1707 | fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID; | 2009 | fs_info->btree_inode->i_ino = BTRFS_BTREE_INODE_OBJECTID; |
1708 | fs_info->btree_inode->i_nlink = 1; | 2010 | set_nlink(fs_info->btree_inode, 1); |
1709 | /* | 2011 | /* |
1710 | * we set the i_size on the btree inode to the max possible int. | 2012 | * we set the i_size on the btree inode to the max possible int. |
1711 | * the real end of the address space is determined by all of | 2013 | * the real end of the address space is determined by all of |
@@ -1766,14 +2068,14 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1766 | goto fail_alloc; | 2068 | goto fail_alloc; |
1767 | } | 2069 | } |
1768 | 2070 | ||
1769 | memcpy(&fs_info->super_copy, bh->b_data, sizeof(fs_info->super_copy)); | 2071 | memcpy(fs_info->super_copy, bh->b_data, sizeof(*fs_info->super_copy)); |
1770 | memcpy(&fs_info->super_for_commit, &fs_info->super_copy, | 2072 | memcpy(fs_info->super_for_commit, fs_info->super_copy, |
1771 | sizeof(fs_info->super_for_commit)); | 2073 | sizeof(*fs_info->super_for_commit)); |
1772 | brelse(bh); | 2074 | brelse(bh); |
1773 | 2075 | ||
1774 | memcpy(fs_info->fsid, fs_info->super_copy.fsid, BTRFS_FSID_SIZE); | 2076 | memcpy(fs_info->fsid, fs_info->super_copy->fsid, BTRFS_FSID_SIZE); |
1775 | 2077 | ||
1776 | disk_super = &fs_info->super_copy; | 2078 | disk_super = fs_info->super_copy; |
1777 | if (!btrfs_super_root(disk_super)) | 2079 | if (!btrfs_super_root(disk_super)) |
1778 | goto fail_alloc; | 2080 | goto fail_alloc; |
1779 | 2081 | ||
@@ -1783,6 +2085,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1783 | btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); | 2085 | btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); |
1784 | 2086 | ||
1785 | /* | 2087 | /* |
2088 | * run through our array of backup supers and setup | ||
2089 | * our ring pointer to the oldest one | ||
2090 | */ | ||
2091 | generation = btrfs_super_generation(disk_super); | ||
2092 | find_oldest_super_backup(fs_info, generation); | ||
2093 | |||
2094 | /* | ||
1786 | * In the long term, we'll store the compression type in the super | 2095 | * In the long term, we'll store the compression type in the super |
1787 | * block, and it'll be used for per file compression control. | 2096 | * block, and it'll be used for per file compression control. |
1788 | */ | 2097 | */ |
@@ -1870,6 +2179,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1870 | btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta", | 2179 | btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta", |
1871 | fs_info->thread_pool_size, | 2180 | fs_info->thread_pool_size, |
1872 | &fs_info->generic_worker); | 2181 | &fs_info->generic_worker); |
2182 | btrfs_init_workers(&fs_info->readahead_workers, "readahead", | ||
2183 | fs_info->thread_pool_size, | ||
2184 | &fs_info->generic_worker); | ||
1873 | 2185 | ||
1874 | /* | 2186 | /* |
1875 | * endios are largely parallel and should have a very | 2187 | * endios are largely parallel and should have a very |
@@ -1880,6 +2192,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1880 | 2192 | ||
1881 | fs_info->endio_write_workers.idle_thresh = 2; | 2193 | fs_info->endio_write_workers.idle_thresh = 2; |
1882 | fs_info->endio_meta_write_workers.idle_thresh = 2; | 2194 | fs_info->endio_meta_write_workers.idle_thresh = 2; |
2195 | fs_info->readahead_workers.idle_thresh = 2; | ||
1883 | 2196 | ||
1884 | btrfs_start_workers(&fs_info->workers, 1); | 2197 | btrfs_start_workers(&fs_info->workers, 1); |
1885 | btrfs_start_workers(&fs_info->generic_worker, 1); | 2198 | btrfs_start_workers(&fs_info->generic_worker, 1); |
@@ -1893,6 +2206,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1893 | btrfs_start_workers(&fs_info->endio_freespace_worker, 1); | 2206 | btrfs_start_workers(&fs_info->endio_freespace_worker, 1); |
1894 | btrfs_start_workers(&fs_info->delayed_workers, 1); | 2207 | btrfs_start_workers(&fs_info->delayed_workers, 1); |
1895 | btrfs_start_workers(&fs_info->caching_workers, 1); | 2208 | btrfs_start_workers(&fs_info->caching_workers, 1); |
2209 | btrfs_start_workers(&fs_info->readahead_workers, 1); | ||
1896 | 2210 | ||
1897 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); | 2211 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); |
1898 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, | 2212 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, |
@@ -1939,7 +2253,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1939 | if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { | 2253 | if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { |
1940 | printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", | 2254 | printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", |
1941 | sb->s_id); | 2255 | sb->s_id); |
1942 | goto fail_chunk_root; | 2256 | goto fail_tree_roots; |
1943 | } | 2257 | } |
1944 | btrfs_set_root_node(&chunk_root->root_item, chunk_root->node); | 2258 | btrfs_set_root_node(&chunk_root->root_item, chunk_root->node); |
1945 | chunk_root->commit_root = btrfs_root_node(chunk_root); | 2259 | chunk_root->commit_root = btrfs_root_node(chunk_root); |
@@ -1954,11 +2268,12 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1954 | if (ret) { | 2268 | if (ret) { |
1955 | printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n", | 2269 | printk(KERN_WARNING "btrfs: failed to read chunk tree on %s\n", |
1956 | sb->s_id); | 2270 | sb->s_id); |
1957 | goto fail_chunk_root; | 2271 | goto fail_tree_roots; |
1958 | } | 2272 | } |
1959 | 2273 | ||
1960 | btrfs_close_extra_devices(fs_devices); | 2274 | btrfs_close_extra_devices(fs_devices); |
1961 | 2275 | ||
2276 | retry_root_backup: | ||
1962 | blocksize = btrfs_level_size(tree_root, | 2277 | blocksize = btrfs_level_size(tree_root, |
1963 | btrfs_super_root_level(disk_super)); | 2278 | btrfs_super_root_level(disk_super)); |
1964 | generation = btrfs_super_generation(disk_super); | 2279 | generation = btrfs_super_generation(disk_super); |
@@ -1966,32 +2281,33 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1966 | tree_root->node = read_tree_block(tree_root, | 2281 | tree_root->node = read_tree_block(tree_root, |
1967 | btrfs_super_root(disk_super), | 2282 | btrfs_super_root(disk_super), |
1968 | blocksize, generation); | 2283 | blocksize, generation); |
1969 | if (!tree_root->node) | 2284 | if (!tree_root->node || |
1970 | goto fail_chunk_root; | 2285 | !test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { |
1971 | if (!test_bit(EXTENT_BUFFER_UPTODATE, &tree_root->node->bflags)) { | ||
1972 | printk(KERN_WARNING "btrfs: failed to read tree root on %s\n", | 2286 | printk(KERN_WARNING "btrfs: failed to read tree root on %s\n", |
1973 | sb->s_id); | 2287 | sb->s_id); |
1974 | goto fail_tree_root; | 2288 | |
2289 | goto recovery_tree_root; | ||
1975 | } | 2290 | } |
2291 | |||
1976 | btrfs_set_root_node(&tree_root->root_item, tree_root->node); | 2292 | btrfs_set_root_node(&tree_root->root_item, tree_root->node); |
1977 | tree_root->commit_root = btrfs_root_node(tree_root); | 2293 | tree_root->commit_root = btrfs_root_node(tree_root); |
1978 | 2294 | ||
1979 | ret = find_and_setup_root(tree_root, fs_info, | 2295 | ret = find_and_setup_root(tree_root, fs_info, |
1980 | BTRFS_EXTENT_TREE_OBJECTID, extent_root); | 2296 | BTRFS_EXTENT_TREE_OBJECTID, extent_root); |
1981 | if (ret) | 2297 | if (ret) |
1982 | goto fail_tree_root; | 2298 | goto recovery_tree_root; |
1983 | extent_root->track_dirty = 1; | 2299 | extent_root->track_dirty = 1; |
1984 | 2300 | ||
1985 | ret = find_and_setup_root(tree_root, fs_info, | 2301 | ret = find_and_setup_root(tree_root, fs_info, |
1986 | BTRFS_DEV_TREE_OBJECTID, dev_root); | 2302 | BTRFS_DEV_TREE_OBJECTID, dev_root); |
1987 | if (ret) | 2303 | if (ret) |
1988 | goto fail_extent_root; | 2304 | goto recovery_tree_root; |
1989 | dev_root->track_dirty = 1; | 2305 | dev_root->track_dirty = 1; |
1990 | 2306 | ||
1991 | ret = find_and_setup_root(tree_root, fs_info, | 2307 | ret = find_and_setup_root(tree_root, fs_info, |
1992 | BTRFS_CSUM_TREE_OBJECTID, csum_root); | 2308 | BTRFS_CSUM_TREE_OBJECTID, csum_root); |
1993 | if (ret) | 2309 | if (ret) |
1994 | goto fail_dev_root; | 2310 | goto recovery_tree_root; |
1995 | 2311 | ||
1996 | csum_root->track_dirty = 1; | 2312 | csum_root->track_dirty = 1; |
1997 | 2313 | ||
@@ -2124,22 +2440,13 @@ fail_cleaner: | |||
2124 | 2440 | ||
2125 | fail_block_groups: | 2441 | fail_block_groups: |
2126 | btrfs_free_block_groups(fs_info); | 2442 | btrfs_free_block_groups(fs_info); |
2127 | free_extent_buffer(csum_root->node); | 2443 | |
2128 | free_extent_buffer(csum_root->commit_root); | 2444 | fail_tree_roots: |
2129 | fail_dev_root: | 2445 | free_root_pointers(fs_info, 1); |
2130 | free_extent_buffer(dev_root->node); | 2446 | |
2131 | free_extent_buffer(dev_root->commit_root); | ||
2132 | fail_extent_root: | ||
2133 | free_extent_buffer(extent_root->node); | ||
2134 | free_extent_buffer(extent_root->commit_root); | ||
2135 | fail_tree_root: | ||
2136 | free_extent_buffer(tree_root->node); | ||
2137 | free_extent_buffer(tree_root->commit_root); | ||
2138 | fail_chunk_root: | ||
2139 | free_extent_buffer(chunk_root->node); | ||
2140 | free_extent_buffer(chunk_root->commit_root); | ||
2141 | fail_sb_buffer: | 2447 | fail_sb_buffer: |
2142 | btrfs_stop_workers(&fs_info->generic_worker); | 2448 | btrfs_stop_workers(&fs_info->generic_worker); |
2449 | btrfs_stop_workers(&fs_info->readahead_workers); | ||
2143 | btrfs_stop_workers(&fs_info->fixup_workers); | 2450 | btrfs_stop_workers(&fs_info->fixup_workers); |
2144 | btrfs_stop_workers(&fs_info->delalloc_workers); | 2451 | btrfs_stop_workers(&fs_info->delalloc_workers); |
2145 | btrfs_stop_workers(&fs_info->workers); | 2452 | btrfs_stop_workers(&fs_info->workers); |
@@ -2152,25 +2459,37 @@ fail_sb_buffer: | |||
2152 | btrfs_stop_workers(&fs_info->delayed_workers); | 2459 | btrfs_stop_workers(&fs_info->delayed_workers); |
2153 | btrfs_stop_workers(&fs_info->caching_workers); | 2460 | btrfs_stop_workers(&fs_info->caching_workers); |
2154 | fail_alloc: | 2461 | fail_alloc: |
2155 | kfree(fs_info->delayed_root); | ||
2156 | fail_iput: | 2462 | fail_iput: |
2463 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | ||
2464 | |||
2157 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | 2465 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); |
2158 | iput(fs_info->btree_inode); | 2466 | iput(fs_info->btree_inode); |
2159 | |||
2160 | btrfs_close_devices(fs_info->fs_devices); | ||
2161 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | ||
2162 | fail_bdi: | 2467 | fail_bdi: |
2163 | bdi_destroy(&fs_info->bdi); | 2468 | bdi_destroy(&fs_info->bdi); |
2164 | fail_srcu: | 2469 | fail_srcu: |
2165 | cleanup_srcu_struct(&fs_info->subvol_srcu); | 2470 | cleanup_srcu_struct(&fs_info->subvol_srcu); |
2166 | fail: | 2471 | fail: |
2167 | kfree(extent_root); | 2472 | btrfs_close_devices(fs_info->fs_devices); |
2168 | kfree(tree_root); | 2473 | free_fs_info(fs_info); |
2169 | kfree(fs_info); | ||
2170 | kfree(chunk_root); | ||
2171 | kfree(dev_root); | ||
2172 | kfree(csum_root); | ||
2173 | return ERR_PTR(err); | 2474 | return ERR_PTR(err); |
2475 | |||
2476 | recovery_tree_root: | ||
2477 | if (!btrfs_test_opt(tree_root, RECOVERY)) | ||
2478 | goto fail_tree_roots; | ||
2479 | |||
2480 | free_root_pointers(fs_info, 0); | ||
2481 | |||
2482 | /* don't use the log in recovery mode, it won't be valid */ | ||
2483 | btrfs_set_super_log_root(disk_super, 0); | ||
2484 | |||
2485 | /* we can't trust the free space cache either */ | ||
2486 | btrfs_set_opt(fs_info->mount_opt, CLEAR_CACHE); | ||
2487 | |||
2488 | ret = next_root_backup(fs_info, fs_info->super_copy, | ||
2489 | &num_backups_tried, &backup_index); | ||
2490 | if (ret == -1) | ||
2491 | goto fail_block_groups; | ||
2492 | goto retry_root_backup; | ||
2174 | } | 2493 | } |
2175 | 2494 | ||
2176 | static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) | 2495 | static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) |
@@ -2254,22 +2573,10 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2254 | int errors = 0; | 2573 | int errors = 0; |
2255 | u32 crc; | 2574 | u32 crc; |
2256 | u64 bytenr; | 2575 | u64 bytenr; |
2257 | int last_barrier = 0; | ||
2258 | 2576 | ||
2259 | if (max_mirrors == 0) | 2577 | if (max_mirrors == 0) |
2260 | max_mirrors = BTRFS_SUPER_MIRROR_MAX; | 2578 | max_mirrors = BTRFS_SUPER_MIRROR_MAX; |
2261 | 2579 | ||
2262 | /* make sure only the last submit_bh does a barrier */ | ||
2263 | if (do_barriers) { | ||
2264 | for (i = 0; i < max_mirrors; i++) { | ||
2265 | bytenr = btrfs_sb_offset(i); | ||
2266 | if (bytenr + BTRFS_SUPER_INFO_SIZE >= | ||
2267 | device->total_bytes) | ||
2268 | break; | ||
2269 | last_barrier = i; | ||
2270 | } | ||
2271 | } | ||
2272 | |||
2273 | for (i = 0; i < max_mirrors; i++) { | 2580 | for (i = 0; i < max_mirrors; i++) { |
2274 | bytenr = btrfs_sb_offset(i); | 2581 | bytenr = btrfs_sb_offset(i); |
2275 | if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) | 2582 | if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) |
@@ -2315,17 +2622,136 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2315 | bh->b_end_io = btrfs_end_buffer_write_sync; | 2622 | bh->b_end_io = btrfs_end_buffer_write_sync; |
2316 | } | 2623 | } |
2317 | 2624 | ||
2318 | if (i == last_barrier && do_barriers) | 2625 | /* |
2319 | ret = submit_bh(WRITE_FLUSH_FUA, bh); | 2626 | * we fua the first super. The others we allow |
2320 | else | 2627 | * to go down lazy. |
2321 | ret = submit_bh(WRITE_SYNC, bh); | 2628 | */ |
2322 | 2629 | ret = submit_bh(WRITE_FUA, bh); | |
2323 | if (ret) | 2630 | if (ret) |
2324 | errors++; | 2631 | errors++; |
2325 | } | 2632 | } |
2326 | return errors < i ? 0 : -1; | 2633 | return errors < i ? 0 : -1; |
2327 | } | 2634 | } |
2328 | 2635 | ||
2636 | /* | ||
2637 | * endio for the write_dev_flush, this will wake anyone waiting | ||
2638 | * for the barrier when it is done | ||
2639 | */ | ||
2640 | static void btrfs_end_empty_barrier(struct bio *bio, int err) | ||
2641 | { | ||
2642 | if (err) { | ||
2643 | if (err == -EOPNOTSUPP) | ||
2644 | set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); | ||
2645 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
2646 | } | ||
2647 | if (bio->bi_private) | ||
2648 | complete(bio->bi_private); | ||
2649 | bio_put(bio); | ||
2650 | } | ||
2651 | |||
2652 | /* | ||
2653 | * trigger flushes for one the devices. If you pass wait == 0, the flushes are | ||
2654 | * sent down. With wait == 1, it waits for the previous flush. | ||
2655 | * | ||
2656 | * any device where the flush fails with eopnotsupp are flagged as not-barrier | ||
2657 | * capable | ||
2658 | */ | ||
2659 | static int write_dev_flush(struct btrfs_device *device, int wait) | ||
2660 | { | ||
2661 | struct bio *bio; | ||
2662 | int ret = 0; | ||
2663 | |||
2664 | if (device->nobarriers) | ||
2665 | return 0; | ||
2666 | |||
2667 | if (wait) { | ||
2668 | bio = device->flush_bio; | ||
2669 | if (!bio) | ||
2670 | return 0; | ||
2671 | |||
2672 | wait_for_completion(&device->flush_wait); | ||
2673 | |||
2674 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) { | ||
2675 | printk("btrfs: disabling barriers on dev %s\n", | ||
2676 | device->name); | ||
2677 | device->nobarriers = 1; | ||
2678 | } | ||
2679 | if (!bio_flagged(bio, BIO_UPTODATE)) { | ||
2680 | ret = -EIO; | ||
2681 | } | ||
2682 | |||
2683 | /* drop the reference from the wait == 0 run */ | ||
2684 | bio_put(bio); | ||
2685 | device->flush_bio = NULL; | ||
2686 | |||
2687 | return ret; | ||
2688 | } | ||
2689 | |||
2690 | /* | ||
2691 | * one reference for us, and we leave it for the | ||
2692 | * caller | ||
2693 | */ | ||
2694 | device->flush_bio = NULL;; | ||
2695 | bio = bio_alloc(GFP_NOFS, 0); | ||
2696 | if (!bio) | ||
2697 | return -ENOMEM; | ||
2698 | |||
2699 | bio->bi_end_io = btrfs_end_empty_barrier; | ||
2700 | bio->bi_bdev = device->bdev; | ||
2701 | init_completion(&device->flush_wait); | ||
2702 | bio->bi_private = &device->flush_wait; | ||
2703 | device->flush_bio = bio; | ||
2704 | |||
2705 | bio_get(bio); | ||
2706 | submit_bio(WRITE_FLUSH, bio); | ||
2707 | |||
2708 | return 0; | ||
2709 | } | ||
2710 | |||
2711 | /* | ||
2712 | * send an empty flush down to each device in parallel, | ||
2713 | * then wait for them | ||
2714 | */ | ||
2715 | static int barrier_all_devices(struct btrfs_fs_info *info) | ||
2716 | { | ||
2717 | struct list_head *head; | ||
2718 | struct btrfs_device *dev; | ||
2719 | int errors = 0; | ||
2720 | int ret; | ||
2721 | |||
2722 | /* send down all the barriers */ | ||
2723 | head = &info->fs_devices->devices; | ||
2724 | list_for_each_entry_rcu(dev, head, dev_list) { | ||
2725 | if (!dev->bdev) { | ||
2726 | errors++; | ||
2727 | continue; | ||
2728 | } | ||
2729 | if (!dev->in_fs_metadata || !dev->writeable) | ||
2730 | continue; | ||
2731 | |||
2732 | ret = write_dev_flush(dev, 0); | ||
2733 | if (ret) | ||
2734 | errors++; | ||
2735 | } | ||
2736 | |||
2737 | /* wait for all the barriers */ | ||
2738 | list_for_each_entry_rcu(dev, head, dev_list) { | ||
2739 | if (!dev->bdev) { | ||
2740 | errors++; | ||
2741 | continue; | ||
2742 | } | ||
2743 | if (!dev->in_fs_metadata || !dev->writeable) | ||
2744 | continue; | ||
2745 | |||
2746 | ret = write_dev_flush(dev, 1); | ||
2747 | if (ret) | ||
2748 | errors++; | ||
2749 | } | ||
2750 | if (errors) | ||
2751 | return -EIO; | ||
2752 | return 0; | ||
2753 | } | ||
2754 | |||
2329 | int write_all_supers(struct btrfs_root *root, int max_mirrors) | 2755 | int write_all_supers(struct btrfs_root *root, int max_mirrors) |
2330 | { | 2756 | { |
2331 | struct list_head *head; | 2757 | struct list_head *head; |
@@ -2338,14 +2764,19 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) | |||
2338 | int total_errors = 0; | 2764 | int total_errors = 0; |
2339 | u64 flags; | 2765 | u64 flags; |
2340 | 2766 | ||
2341 | max_errors = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; | 2767 | max_errors = btrfs_super_num_devices(root->fs_info->super_copy) - 1; |
2342 | do_barriers = !btrfs_test_opt(root, NOBARRIER); | 2768 | do_barriers = !btrfs_test_opt(root, NOBARRIER); |
2769 | backup_super_roots(root->fs_info); | ||
2343 | 2770 | ||
2344 | sb = &root->fs_info->super_for_commit; | 2771 | sb = root->fs_info->super_for_commit; |
2345 | dev_item = &sb->dev_item; | 2772 | dev_item = &sb->dev_item; |
2346 | 2773 | ||
2347 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 2774 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
2348 | head = &root->fs_info->fs_devices->devices; | 2775 | head = &root->fs_info->fs_devices->devices; |
2776 | |||
2777 | if (do_barriers) | ||
2778 | barrier_all_devices(root->fs_info); | ||
2779 | |||
2349 | list_for_each_entry_rcu(dev, head, dev_list) { | 2780 | list_for_each_entry_rcu(dev, head, dev_list) { |
2350 | if (!dev->bdev) { | 2781 | if (!dev->bdev) { |
2351 | total_errors++; | 2782 | total_errors++; |
@@ -2545,8 +2976,6 @@ int close_ctree(struct btrfs_root *root) | |||
2545 | /* clear out the rbtree of defraggable inodes */ | 2976 | /* clear out the rbtree of defraggable inodes */ |
2546 | btrfs_run_defrag_inodes(root->fs_info); | 2977 | btrfs_run_defrag_inodes(root->fs_info); |
2547 | 2978 | ||
2548 | btrfs_put_block_group_cache(fs_info); | ||
2549 | |||
2550 | /* | 2979 | /* |
2551 | * Here come 2 situations when btrfs is broken to flip readonly: | 2980 | * Here come 2 situations when btrfs is broken to flip readonly: |
2552 | * | 2981 | * |
@@ -2572,6 +3001,8 @@ int close_ctree(struct btrfs_root *root) | |||
2572 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | 3001 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); |
2573 | } | 3002 | } |
2574 | 3003 | ||
3004 | btrfs_put_block_group_cache(fs_info); | ||
3005 | |||
2575 | kthread_stop(root->fs_info->transaction_kthread); | 3006 | kthread_stop(root->fs_info->transaction_kthread); |
2576 | kthread_stop(root->fs_info->cleaner_kthread); | 3007 | kthread_stop(root->fs_info->cleaner_kthread); |
2577 | 3008 | ||
@@ -2603,7 +3034,6 @@ int close_ctree(struct btrfs_root *root) | |||
2603 | del_fs_roots(fs_info); | 3034 | del_fs_roots(fs_info); |
2604 | 3035 | ||
2605 | iput(fs_info->btree_inode); | 3036 | iput(fs_info->btree_inode); |
2606 | kfree(fs_info->delayed_root); | ||
2607 | 3037 | ||
2608 | btrfs_stop_workers(&fs_info->generic_worker); | 3038 | btrfs_stop_workers(&fs_info->generic_worker); |
2609 | btrfs_stop_workers(&fs_info->fixup_workers); | 3039 | btrfs_stop_workers(&fs_info->fixup_workers); |
@@ -2617,6 +3047,7 @@ int close_ctree(struct btrfs_root *root) | |||
2617 | btrfs_stop_workers(&fs_info->submit_workers); | 3047 | btrfs_stop_workers(&fs_info->submit_workers); |
2618 | btrfs_stop_workers(&fs_info->delayed_workers); | 3048 | btrfs_stop_workers(&fs_info->delayed_workers); |
2619 | btrfs_stop_workers(&fs_info->caching_workers); | 3049 | btrfs_stop_workers(&fs_info->caching_workers); |
3050 | btrfs_stop_workers(&fs_info->readahead_workers); | ||
2620 | 3051 | ||
2621 | btrfs_close_devices(fs_info->fs_devices); | 3052 | btrfs_close_devices(fs_info->fs_devices); |
2622 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 3053 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
@@ -2624,12 +3055,7 @@ int close_ctree(struct btrfs_root *root) | |||
2624 | bdi_destroy(&fs_info->bdi); | 3055 | bdi_destroy(&fs_info->bdi); |
2625 | cleanup_srcu_struct(&fs_info->subvol_srcu); | 3056 | cleanup_srcu_struct(&fs_info->subvol_srcu); |
2626 | 3057 | ||
2627 | kfree(fs_info->extent_root); | 3058 | free_fs_info(fs_info); |
2628 | kfree(fs_info->tree_root); | ||
2629 | kfree(fs_info->chunk_root); | ||
2630 | kfree(fs_info->dev_root); | ||
2631 | kfree(fs_info->csum_root); | ||
2632 | kfree(fs_info); | ||
2633 | 3059 | ||
2634 | return 0; | 3060 | return 0; |
2635 | } | 3061 | } |
@@ -2735,7 +3161,8 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) | |||
2735 | return ret; | 3161 | return ret; |
2736 | } | 3162 | } |
2737 | 3163 | ||
2738 | int btree_lock_page_hook(struct page *page) | 3164 | static int btree_lock_page_hook(struct page *page, void *data, |
3165 | void (*flush_fn)(void *)) | ||
2739 | { | 3166 | { |
2740 | struct inode *inode = page->mapping->host; | 3167 | struct inode *inode = page->mapping->host; |
2741 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3168 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -2752,7 +3179,10 @@ int btree_lock_page_hook(struct page *page) | |||
2752 | if (!eb) | 3179 | if (!eb) |
2753 | goto out; | 3180 | goto out; |
2754 | 3181 | ||
2755 | btrfs_tree_lock(eb); | 3182 | if (!btrfs_try_tree_write_lock(eb)) { |
3183 | flush_fn(data); | ||
3184 | btrfs_tree_lock(eb); | ||
3185 | } | ||
2756 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); | 3186 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); |
2757 | 3187 | ||
2758 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { | 3188 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { |
@@ -2767,7 +3197,10 @@ int btree_lock_page_hook(struct page *page) | |||
2767 | btrfs_tree_unlock(eb); | 3197 | btrfs_tree_unlock(eb); |
2768 | free_extent_buffer(eb); | 3198 | free_extent_buffer(eb); |
2769 | out: | 3199 | out: |
2770 | lock_page(page); | 3200 | if (!trylock_page(page)) { |
3201 | flush_fn(data); | ||
3202 | lock_page(page); | ||
3203 | } | ||
2771 | return 0; | 3204 | return 0; |
2772 | } | 3205 | } |
2773 | 3206 | ||
@@ -3123,6 +3556,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
3123 | static struct extent_io_ops btree_extent_io_ops = { | 3556 | static struct extent_io_ops btree_extent_io_ops = { |
3124 | .write_cache_pages_lock_hook = btree_lock_page_hook, | 3557 | .write_cache_pages_lock_hook = btree_lock_page_hook, |
3125 | .readpage_end_io_hook = btree_readpage_end_io_hook, | 3558 | .readpage_end_io_hook = btree_readpage_end_io_hook, |
3559 | .readpage_io_failed_hook = btree_io_failed_hook, | ||
3126 | .submit_bio_hook = btree_submit_bio_hook, | 3560 | .submit_bio_hook = btree_submit_bio_hook, |
3127 | /* note we're sharing with inode.c for the merge bio hook */ | 3561 | /* note we're sharing with inode.c for the merge bio hook */ |
3128 | .merge_bio_hook = btrfs_merge_bio_hook, | 3562 | .merge_bio_hook = btrfs_merge_bio_hook, |