diff options
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r-- | fs/ext4/mballoc.c | 261 |
1 files changed, 202 insertions, 59 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index edf9730ba72e..d2b1bcaf88ec 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -886,18 +886,20 @@ static noinline_for_stack int | |||
886 | ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | 886 | ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, |
887 | struct ext4_buddy *e4b) | 887 | struct ext4_buddy *e4b) |
888 | { | 888 | { |
889 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
890 | struct inode *inode = sbi->s_buddy_cache; | ||
891 | int blocks_per_page; | 889 | int blocks_per_page; |
892 | int block; | 890 | int block; |
893 | int pnum; | 891 | int pnum; |
894 | int poff; | 892 | int poff; |
895 | struct page *page; | 893 | struct page *page; |
896 | int ret; | 894 | int ret; |
895 | struct ext4_group_info *grp; | ||
896 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
897 | struct inode *inode = sbi->s_buddy_cache; | ||
897 | 898 | ||
898 | mb_debug("load group %u\n", group); | 899 | mb_debug("load group %u\n", group); |
899 | 900 | ||
900 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | 901 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; |
902 | grp = ext4_get_group_info(sb, group); | ||
901 | 903 | ||
902 | e4b->bd_blkbits = sb->s_blocksize_bits; | 904 | e4b->bd_blkbits = sb->s_blocksize_bits; |
903 | e4b->bd_info = ext4_get_group_info(sb, group); | 905 | e4b->bd_info = ext4_get_group_info(sb, group); |
@@ -905,6 +907,15 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | |||
905 | e4b->bd_group = group; | 907 | e4b->bd_group = group; |
906 | e4b->bd_buddy_page = NULL; | 908 | e4b->bd_buddy_page = NULL; |
907 | e4b->bd_bitmap_page = NULL; | 909 | e4b->bd_bitmap_page = NULL; |
910 | e4b->alloc_semp = &grp->alloc_sem; | ||
911 | |||
912 | /* Take the read lock on the group alloc | ||
913 | * sem. This would make sure a parallel | ||
914 | * ext4_mb_init_group happening on other | ||
915 | * groups mapped by the page is blocked | ||
916 | * till we are done with allocation | ||
917 | */ | ||
918 | down_read(e4b->alloc_semp); | ||
908 | 919 | ||
909 | /* | 920 | /* |
910 | * the buddy cache inode stores the block bitmap | 921 | * the buddy cache inode stores the block bitmap |
@@ -920,6 +931,14 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | |||
920 | page = find_get_page(inode->i_mapping, pnum); | 931 | page = find_get_page(inode->i_mapping, pnum); |
921 | if (page == NULL || !PageUptodate(page)) { | 932 | if (page == NULL || !PageUptodate(page)) { |
922 | if (page) | 933 | if (page) |
934 | /* | ||
935 | * drop the page reference and try | ||
936 | * to get the page with lock. If we | ||
937 | * are not uptodate that implies | ||
938 | * somebody just created the page but | ||
939 | * is yet to initialize the same. So | ||
940 | * wait for it to initialize. | ||
941 | */ | ||
923 | page_cache_release(page); | 942 | page_cache_release(page); |
924 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | 943 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); |
925 | if (page) { | 944 | if (page) { |
@@ -985,6 +1004,9 @@ err: | |||
985 | page_cache_release(e4b->bd_buddy_page); | 1004 | page_cache_release(e4b->bd_buddy_page); |
986 | e4b->bd_buddy = NULL; | 1005 | e4b->bd_buddy = NULL; |
987 | e4b->bd_bitmap = NULL; | 1006 | e4b->bd_bitmap = NULL; |
1007 | |||
1008 | /* Done with the buddy cache */ | ||
1009 | up_read(e4b->alloc_semp); | ||
988 | return ret; | 1010 | return ret; |
989 | } | 1011 | } |
990 | 1012 | ||
@@ -994,6 +1016,8 @@ static void ext4_mb_release_desc(struct ext4_buddy *e4b) | |||
994 | page_cache_release(e4b->bd_bitmap_page); | 1016 | page_cache_release(e4b->bd_bitmap_page); |
995 | if (e4b->bd_buddy_page) | 1017 | if (e4b->bd_buddy_page) |
996 | page_cache_release(e4b->bd_buddy_page); | 1018 | page_cache_release(e4b->bd_buddy_page); |
1019 | /* Done with the buddy cache */ | ||
1020 | up_read(e4b->alloc_semp); | ||
997 | } | 1021 | } |
998 | 1022 | ||
999 | 1023 | ||
@@ -1696,6 +1720,173 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, | |||
1696 | return 0; | 1720 | return 0; |
1697 | } | 1721 | } |
1698 | 1722 | ||
1723 | /* | ||
1724 | * lock the group_info alloc_sem of all the groups | ||
1725 | * belonging to the same buddy cache page. This | ||
1726 | * make sure other parallel operation on the buddy | ||
1727 | * cache doesn't happen whild holding the buddy cache | ||
1728 | * lock | ||
1729 | */ | ||
1730 | int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group) | ||
1731 | { | ||
1732 | int i; | ||
1733 | int block, pnum; | ||
1734 | int blocks_per_page; | ||
1735 | int groups_per_page; | ||
1736 | ext4_group_t first_group; | ||
1737 | struct ext4_group_info *grp; | ||
1738 | |||
1739 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1740 | /* | ||
1741 | * the buddy cache inode stores the block bitmap | ||
1742 | * and buddy information in consecutive blocks. | ||
1743 | * So for each group we need two blocks. | ||
1744 | */ | ||
1745 | block = group * 2; | ||
1746 | pnum = block / blocks_per_page; | ||
1747 | first_group = pnum * blocks_per_page / 2; | ||
1748 | |||
1749 | groups_per_page = blocks_per_page >> 1; | ||
1750 | if (groups_per_page == 0) | ||
1751 | groups_per_page = 1; | ||
1752 | /* read all groups the page covers into the cache */ | ||
1753 | for (i = 0; i < groups_per_page; i++) { | ||
1754 | |||
1755 | if ((first_group + i) >= EXT4_SB(sb)->s_groups_count) | ||
1756 | break; | ||
1757 | grp = ext4_get_group_info(sb, first_group + i); | ||
1758 | /* take all groups write allocation | ||
1759 | * semaphore. This make sure there is | ||
1760 | * no block allocation going on in any | ||
1761 | * of that groups | ||
1762 | */ | ||
1763 | down_write(&grp->alloc_sem); | ||
1764 | } | ||
1765 | return i; | ||
1766 | } | ||
1767 | |||
1768 | void ext4_mb_put_buddy_cache_lock(struct super_block *sb, | ||
1769 | ext4_group_t group, int locked_group) | ||
1770 | { | ||
1771 | int i; | ||
1772 | int block, pnum; | ||
1773 | int blocks_per_page; | ||
1774 | ext4_group_t first_group; | ||
1775 | struct ext4_group_info *grp; | ||
1776 | |||
1777 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1778 | /* | ||
1779 | * the buddy cache inode stores the block bitmap | ||
1780 | * and buddy information in consecutive blocks. | ||
1781 | * So for each group we need two blocks. | ||
1782 | */ | ||
1783 | block = group * 2; | ||
1784 | pnum = block / blocks_per_page; | ||
1785 | first_group = pnum * blocks_per_page / 2; | ||
1786 | /* release locks on all the groups */ | ||
1787 | for (i = 0; i < locked_group; i++) { | ||
1788 | |||
1789 | grp = ext4_get_group_info(sb, first_group + i); | ||
1790 | /* take all groups write allocation | ||
1791 | * semaphore. This make sure there is | ||
1792 | * no block allocation going on in any | ||
1793 | * of that groups | ||
1794 | */ | ||
1795 | up_write(&grp->alloc_sem); | ||
1796 | } | ||
1797 | |||
1798 | } | ||
1799 | |||
1800 | static int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) | ||
1801 | { | ||
1802 | |||
1803 | int ret; | ||
1804 | void *bitmap; | ||
1805 | int blocks_per_page; | ||
1806 | int block, pnum, poff; | ||
1807 | int num_grp_locked = 0; | ||
1808 | struct ext4_group_info *this_grp; | ||
1809 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1810 | struct inode *inode = sbi->s_buddy_cache; | ||
1811 | struct page *page = NULL, *bitmap_page = NULL; | ||
1812 | |||
1813 | mb_debug("init group %lu\n", group); | ||
1814 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
1815 | this_grp = ext4_get_group_info(sb, group); | ||
1816 | /* | ||
1817 | * This ensures we don't add group | ||
1818 | * to this buddy cache via resize | ||
1819 | */ | ||
1820 | num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group); | ||
1821 | if (!EXT4_MB_GRP_NEED_INIT(this_grp)) { | ||
1822 | /* | ||
1823 | * somebody initialized the group | ||
1824 | * return without doing anything | ||
1825 | */ | ||
1826 | ret = 0; | ||
1827 | goto err; | ||
1828 | } | ||
1829 | /* | ||
1830 | * the buddy cache inode stores the block bitmap | ||
1831 | * and buddy information in consecutive blocks. | ||
1832 | * So for each group we need two blocks. | ||
1833 | */ | ||
1834 | block = group * 2; | ||
1835 | pnum = block / blocks_per_page; | ||
1836 | poff = block % blocks_per_page; | ||
1837 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
1838 | if (page) { | ||
1839 | BUG_ON(page->mapping != inode->i_mapping); | ||
1840 | ret = ext4_mb_init_cache(page, NULL); | ||
1841 | if (ret) { | ||
1842 | unlock_page(page); | ||
1843 | goto err; | ||
1844 | } | ||
1845 | unlock_page(page); | ||
1846 | } | ||
1847 | if (page == NULL || !PageUptodate(page)) { | ||
1848 | ret = -EIO; | ||
1849 | goto err; | ||
1850 | } | ||
1851 | mark_page_accessed(page); | ||
1852 | bitmap_page = page; | ||
1853 | bitmap = page_address(page) + (poff * sb->s_blocksize); | ||
1854 | |||
1855 | /* init buddy cache */ | ||
1856 | block++; | ||
1857 | pnum = block / blocks_per_page; | ||
1858 | poff = block % blocks_per_page; | ||
1859 | page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); | ||
1860 | if (page == bitmap_page) { | ||
1861 | /* | ||
1862 | * If both the bitmap and buddy are in | ||
1863 | * the same page we don't need to force | ||
1864 | * init the buddy | ||
1865 | */ | ||
1866 | unlock_page(page); | ||
1867 | } else if (page) { | ||
1868 | BUG_ON(page->mapping != inode->i_mapping); | ||
1869 | ret = ext4_mb_init_cache(page, bitmap); | ||
1870 | if (ret) { | ||
1871 | unlock_page(page); | ||
1872 | goto err; | ||
1873 | } | ||
1874 | unlock_page(page); | ||
1875 | } | ||
1876 | if (page == NULL || !PageUptodate(page)) { | ||
1877 | ret = -EIO; | ||
1878 | goto err; | ||
1879 | } | ||
1880 | mark_page_accessed(page); | ||
1881 | err: | ||
1882 | ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked); | ||
1883 | if (bitmap_page) | ||
1884 | page_cache_release(bitmap_page); | ||
1885 | if (page) | ||
1886 | page_cache_release(page); | ||
1887 | return ret; | ||
1888 | } | ||
1889 | |||
1699 | static noinline_for_stack int | 1890 | static noinline_for_stack int |
1700 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) | 1891 | ext4_mb_regular_allocator(struct ext4_allocation_context *ac) |
1701 | { | 1892 | { |
@@ -1779,7 +1970,7 @@ repeat: | |||
1779 | group = 0; | 1970 | group = 0; |
1780 | 1971 | ||
1781 | /* quick check to skip empty groups */ | 1972 | /* quick check to skip empty groups */ |
1782 | grp = ext4_get_group_info(ac->ac_sb, group); | 1973 | grp = ext4_get_group_info(sb, group); |
1783 | if (grp->bb_free == 0) | 1974 | if (grp->bb_free == 0) |
1784 | continue; | 1975 | continue; |
1785 | 1976 | ||
@@ -1792,10 +1983,9 @@ repeat: | |||
1792 | * we need full data about the group | 1983 | * we need full data about the group |
1793 | * to make a good selection | 1984 | * to make a good selection |
1794 | */ | 1985 | */ |
1795 | err = ext4_mb_load_buddy(sb, group, &e4b); | 1986 | err = ext4_mb_init_group(sb, group); |
1796 | if (err) | 1987 | if (err) |
1797 | goto out; | 1988 | goto out; |
1798 | ext4_mb_release_desc(&e4b); | ||
1799 | } | 1989 | } |
1800 | 1990 | ||
1801 | /* | 1991 | /* |
@@ -2246,7 +2436,7 @@ ext4_mb_store_history(struct ext4_allocation_context *ac) | |||
2246 | 2436 | ||
2247 | 2437 | ||
2248 | /* Create and initialize ext4_group_info data for the given group. */ | 2438 | /* Create and initialize ext4_group_info data for the given group. */ |
2249 | static int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | 2439 | int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, |
2250 | struct ext4_group_desc *desc) | 2440 | struct ext4_group_desc *desc) |
2251 | { | 2441 | { |
2252 | int i, len; | 2442 | int i, len; |
@@ -2304,6 +2494,7 @@ static int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2304 | } | 2494 | } |
2305 | 2495 | ||
2306 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); | 2496 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); |
2497 | init_rwsem(&meta_group_info[i]->alloc_sem); | ||
2307 | meta_group_info[i]->bb_free_root.rb_node = NULL;; | 2498 | meta_group_info[i]->bb_free_root.rb_node = NULL;; |
2308 | 2499 | ||
2309 | #ifdef DOUBLE_CHECK | 2500 | #ifdef DOUBLE_CHECK |
@@ -2331,54 +2522,6 @@ exit_meta_group_info: | |||
2331 | } /* ext4_mb_add_groupinfo */ | 2522 | } /* ext4_mb_add_groupinfo */ |
2332 | 2523 | ||
2333 | /* | 2524 | /* |
2334 | * Add a group to the existing groups. | ||
2335 | * This function is used for online resize | ||
2336 | */ | ||
2337 | int ext4_mb_add_more_groupinfo(struct super_block *sb, ext4_group_t group, | ||
2338 | struct ext4_group_desc *desc) | ||
2339 | { | ||
2340 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2341 | struct inode *inode = sbi->s_buddy_cache; | ||
2342 | int blocks_per_page; | ||
2343 | int block; | ||
2344 | int pnum; | ||
2345 | struct page *page; | ||
2346 | int err; | ||
2347 | |||
2348 | /* Add group based on group descriptor*/ | ||
2349 | err = ext4_mb_add_groupinfo(sb, group, desc); | ||
2350 | if (err) | ||
2351 | return err; | ||
2352 | |||
2353 | /* | ||
2354 | * Cache pages containing dynamic mb_alloc datas (buddy and bitmap | ||
2355 | * datas) are set not up to date so that they will be re-initilaized | ||
2356 | * during the next call to ext4_mb_load_buddy | ||
2357 | */ | ||
2358 | |||
2359 | /* Set buddy page as not up to date */ | ||
2360 | blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; | ||
2361 | block = group * 2; | ||
2362 | pnum = block / blocks_per_page; | ||
2363 | page = find_get_page(inode->i_mapping, pnum); | ||
2364 | if (page != NULL) { | ||
2365 | ClearPageUptodate(page); | ||
2366 | page_cache_release(page); | ||
2367 | } | ||
2368 | |||
2369 | /* Set bitmap page as not up to date */ | ||
2370 | block++; | ||
2371 | pnum = block / blocks_per_page; | ||
2372 | page = find_get_page(inode->i_mapping, pnum); | ||
2373 | if (page != NULL) { | ||
2374 | ClearPageUptodate(page); | ||
2375 | page_cache_release(page); | ||
2376 | } | ||
2377 | |||
2378 | return 0; | ||
2379 | } | ||
2380 | |||
2381 | /* | ||
2382 | * Update an existing group. | 2525 | * Update an existing group. |
2383 | * This function is used for online resize | 2526 | * This function is used for online resize |
2384 | */ | 2527 | */ |
@@ -4588,11 +4731,6 @@ do_more: | |||
4588 | err = ext4_journal_get_write_access(handle, gd_bh); | 4731 | err = ext4_journal_get_write_access(handle, gd_bh); |
4589 | if (err) | 4732 | if (err) |
4590 | goto error_return; | 4733 | goto error_return; |
4591 | |||
4592 | err = ext4_mb_load_buddy(sb, block_group, &e4b); | ||
4593 | if (err) | ||
4594 | goto error_return; | ||
4595 | |||
4596 | #ifdef AGGRESSIVE_CHECK | 4734 | #ifdef AGGRESSIVE_CHECK |
4597 | { | 4735 | { |
4598 | int i; | 4736 | int i; |
@@ -4606,6 +4744,8 @@ do_more: | |||
4606 | /* We dirtied the bitmap block */ | 4744 | /* We dirtied the bitmap block */ |
4607 | BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); | 4745 | BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); |
4608 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); | 4746 | err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); |
4747 | if (err) | ||
4748 | goto error_return; | ||
4609 | 4749 | ||
4610 | if (ac) { | 4750 | if (ac) { |
4611 | ac->ac_b_ex.fe_group = block_group; | 4751 | ac->ac_b_ex.fe_group = block_group; |
@@ -4614,6 +4754,9 @@ do_more: | |||
4614 | ext4_mb_store_history(ac); | 4754 | ext4_mb_store_history(ac); |
4615 | } | 4755 | } |
4616 | 4756 | ||
4757 | err = ext4_mb_load_buddy(sb, block_group, &e4b); | ||
4758 | if (err) | ||
4759 | goto error_return; | ||
4617 | if (metadata && ext4_handle_valid(handle)) { | 4760 | if (metadata && ext4_handle_valid(handle)) { |
4618 | /* blocks being freed are metadata. these blocks shouldn't | 4761 | /* blocks being freed are metadata. these blocks shouldn't |
4619 | * be used until this transaction is committed */ | 4762 | * be used until this transaction is committed */ |