aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/mballoc.c
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>2009-01-05 21:36:19 -0500
committerTheodore Ts'o <tytso@mit.edu>2009-01-05 21:36:19 -0500
commit920313a726e04fef0f2c0bcb04ad8229c0e700d8 (patch)
tree7e7644a2fd48586ec2f455e56525565174798e4a /fs/ext4/mballoc.c
parente21675d4b63975d09eb75c443c48ebe663d23e18 (diff)
ext4: Use EXT4_GROUP_INFO_NEED_INIT_BIT during resize
The new groups added during resize are flagged as need_init group. Make sure we properly initialize these groups. When we have block size < page size and we are adding new groups the page may still be marked uptodate even though we haven't initialized the group. While forcing the init of buddy cache we need to make sure other groups part of the same page of buddy cache is not using the cache. group_info->alloc_sem is added to ensure the same. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu> cc: stable@kernel.org
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r--fs/ext4/mballoc.c261
1 files changed, 202 insertions, 59 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index edf9730ba72e..d2b1bcaf88ec 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -886,18 +886,20 @@ static noinline_for_stack int
886ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, 886ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
887 struct ext4_buddy *e4b) 887 struct ext4_buddy *e4b)
888{ 888{
889 struct ext4_sb_info *sbi = EXT4_SB(sb);
890 struct inode *inode = sbi->s_buddy_cache;
891 int blocks_per_page; 889 int blocks_per_page;
892 int block; 890 int block;
893 int pnum; 891 int pnum;
894 int poff; 892 int poff;
895 struct page *page; 893 struct page *page;
896 int ret; 894 int ret;
895 struct ext4_group_info *grp;
896 struct ext4_sb_info *sbi = EXT4_SB(sb);
897 struct inode *inode = sbi->s_buddy_cache;
897 898
898 mb_debug("load group %u\n", group); 899 mb_debug("load group %u\n", group);
899 900
900 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; 901 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
902 grp = ext4_get_group_info(sb, group);
901 903
902 e4b->bd_blkbits = sb->s_blocksize_bits; 904 e4b->bd_blkbits = sb->s_blocksize_bits;
903 e4b->bd_info = ext4_get_group_info(sb, group); 905 e4b->bd_info = ext4_get_group_info(sb, group);
@@ -905,6 +907,15 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
905 e4b->bd_group = group; 907 e4b->bd_group = group;
906 e4b->bd_buddy_page = NULL; 908 e4b->bd_buddy_page = NULL;
907 e4b->bd_bitmap_page = NULL; 909 e4b->bd_bitmap_page = NULL;
910 e4b->alloc_semp = &grp->alloc_sem;
911
912 /* Take the read lock on the group alloc
913 * sem. This would make sure a parallel
914 * ext4_mb_init_group happening on other
915 * groups mapped by the page is blocked
916 * till we are done with allocation
917 */
918 down_read(e4b->alloc_semp);
908 919
909 /* 920 /*
910 * the buddy cache inode stores the block bitmap 921 * the buddy cache inode stores the block bitmap
@@ -920,6 +931,14 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
920 page = find_get_page(inode->i_mapping, pnum); 931 page = find_get_page(inode->i_mapping, pnum);
921 if (page == NULL || !PageUptodate(page)) { 932 if (page == NULL || !PageUptodate(page)) {
922 if (page) 933 if (page)
934 /*
935 * drop the page reference and try
936 * to get the page with lock. If we
937 * are not uptodate that implies
938 * somebody just created the page but
939 * is yet to initialize the same. So
940 * wait for it to initialize.
941 */
923 page_cache_release(page); 942 page_cache_release(page);
924 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); 943 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
925 if (page) { 944 if (page) {
@@ -985,6 +1004,9 @@ err:
985 page_cache_release(e4b->bd_buddy_page); 1004 page_cache_release(e4b->bd_buddy_page);
986 e4b->bd_buddy = NULL; 1005 e4b->bd_buddy = NULL;
987 e4b->bd_bitmap = NULL; 1006 e4b->bd_bitmap = NULL;
1007
1008 /* Done with the buddy cache */
1009 up_read(e4b->alloc_semp);
988 return ret; 1010 return ret;
989} 1011}
990 1012
@@ -994,6 +1016,8 @@ static void ext4_mb_release_desc(struct ext4_buddy *e4b)
994 page_cache_release(e4b->bd_bitmap_page); 1016 page_cache_release(e4b->bd_bitmap_page);
995 if (e4b->bd_buddy_page) 1017 if (e4b->bd_buddy_page)
996 page_cache_release(e4b->bd_buddy_page); 1018 page_cache_release(e4b->bd_buddy_page);
1019 /* Done with the buddy cache */
1020 up_read(e4b->alloc_semp);
997} 1021}
998 1022
999 1023
@@ -1696,6 +1720,173 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1696 return 0; 1720 return 0;
1697} 1721}
1698 1722
1723/*
1724 * lock the group_info alloc_sem of all the groups
1725 * belonging to the same buddy cache page. This
1726 * make sure other parallel operation on the buddy
1727 * cache doesn't happen whild holding the buddy cache
1728 * lock
1729 */
1730int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
1731{
1732 int i;
1733 int block, pnum;
1734 int blocks_per_page;
1735 int groups_per_page;
1736 ext4_group_t first_group;
1737 struct ext4_group_info *grp;
1738
1739 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1740 /*
1741 * the buddy cache inode stores the block bitmap
1742 * and buddy information in consecutive blocks.
1743 * So for each group we need two blocks.
1744 */
1745 block = group * 2;
1746 pnum = block / blocks_per_page;
1747 first_group = pnum * blocks_per_page / 2;
1748
1749 groups_per_page = blocks_per_page >> 1;
1750 if (groups_per_page == 0)
1751 groups_per_page = 1;
1752 /* read all groups the page covers into the cache */
1753 for (i = 0; i < groups_per_page; i++) {
1754
1755 if ((first_group + i) >= EXT4_SB(sb)->s_groups_count)
1756 break;
1757 grp = ext4_get_group_info(sb, first_group + i);
1758 /* take all groups write allocation
1759 * semaphore. This make sure there is
1760 * no block allocation going on in any
1761 * of that groups
1762 */
1763 down_write(&grp->alloc_sem);
1764 }
1765 return i;
1766}
1767
1768void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
1769 ext4_group_t group, int locked_group)
1770{
1771 int i;
1772 int block, pnum;
1773 int blocks_per_page;
1774 ext4_group_t first_group;
1775 struct ext4_group_info *grp;
1776
1777 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1778 /*
1779 * the buddy cache inode stores the block bitmap
1780 * and buddy information in consecutive blocks.
1781 * So for each group we need two blocks.
1782 */
1783 block = group * 2;
1784 pnum = block / blocks_per_page;
1785 first_group = pnum * blocks_per_page / 2;
1786 /* release locks on all the groups */
1787 for (i = 0; i < locked_group; i++) {
1788
1789 grp = ext4_get_group_info(sb, first_group + i);
1790 /* take all groups write allocation
1791 * semaphore. This make sure there is
1792 * no block allocation going on in any
1793 * of that groups
1794 */
1795 up_write(&grp->alloc_sem);
1796 }
1797
1798}
1799
1800static int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
1801{
1802
1803 int ret;
1804 void *bitmap;
1805 int blocks_per_page;
1806 int block, pnum, poff;
1807 int num_grp_locked = 0;
1808 struct ext4_group_info *this_grp;
1809 struct ext4_sb_info *sbi = EXT4_SB(sb);
1810 struct inode *inode = sbi->s_buddy_cache;
1811 struct page *page = NULL, *bitmap_page = NULL;
1812
1813 mb_debug("init group %lu\n", group);
1814 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1815 this_grp = ext4_get_group_info(sb, group);
1816 /*
1817 * This ensures we don't add group
1818 * to this buddy cache via resize
1819 */
1820 num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group);
1821 if (!EXT4_MB_GRP_NEED_INIT(this_grp)) {
1822 /*
1823 * somebody initialized the group
1824 * return without doing anything
1825 */
1826 ret = 0;
1827 goto err;
1828 }
1829 /*
1830 * the buddy cache inode stores the block bitmap
1831 * and buddy information in consecutive blocks.
1832 * So for each group we need two blocks.
1833 */
1834 block = group * 2;
1835 pnum = block / blocks_per_page;
1836 poff = block % blocks_per_page;
1837 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1838 if (page) {
1839 BUG_ON(page->mapping != inode->i_mapping);
1840 ret = ext4_mb_init_cache(page, NULL);
1841 if (ret) {
1842 unlock_page(page);
1843 goto err;
1844 }
1845 unlock_page(page);
1846 }
1847 if (page == NULL || !PageUptodate(page)) {
1848 ret = -EIO;
1849 goto err;
1850 }
1851 mark_page_accessed(page);
1852 bitmap_page = page;
1853 bitmap = page_address(page) + (poff * sb->s_blocksize);
1854
1855 /* init buddy cache */
1856 block++;
1857 pnum = block / blocks_per_page;
1858 poff = block % blocks_per_page;
1859 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1860 if (page == bitmap_page) {
1861 /*
1862 * If both the bitmap and buddy are in
1863 * the same page we don't need to force
1864 * init the buddy
1865 */
1866 unlock_page(page);
1867 } else if (page) {
1868 BUG_ON(page->mapping != inode->i_mapping);
1869 ret = ext4_mb_init_cache(page, bitmap);
1870 if (ret) {
1871 unlock_page(page);
1872 goto err;
1873 }
1874 unlock_page(page);
1875 }
1876 if (page == NULL || !PageUptodate(page)) {
1877 ret = -EIO;
1878 goto err;
1879 }
1880 mark_page_accessed(page);
1881err:
1882 ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked);
1883 if (bitmap_page)
1884 page_cache_release(bitmap_page);
1885 if (page)
1886 page_cache_release(page);
1887 return ret;
1888}
1889
1699static noinline_for_stack int 1890static noinline_for_stack int
1700ext4_mb_regular_allocator(struct ext4_allocation_context *ac) 1891ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1701{ 1892{
@@ -1779,7 +1970,7 @@ repeat:
1779 group = 0; 1970 group = 0;
1780 1971
1781 /* quick check to skip empty groups */ 1972 /* quick check to skip empty groups */
1782 grp = ext4_get_group_info(ac->ac_sb, group); 1973 grp = ext4_get_group_info(sb, group);
1783 if (grp->bb_free == 0) 1974 if (grp->bb_free == 0)
1784 continue; 1975 continue;
1785 1976
@@ -1792,10 +1983,9 @@ repeat:
1792 * we need full data about the group 1983 * we need full data about the group
1793 * to make a good selection 1984 * to make a good selection
1794 */ 1985 */
1795 err = ext4_mb_load_buddy(sb, group, &e4b); 1986 err = ext4_mb_init_group(sb, group);
1796 if (err) 1987 if (err)
1797 goto out; 1988 goto out;
1798 ext4_mb_release_desc(&e4b);
1799 } 1989 }
1800 1990
1801 /* 1991 /*
@@ -2246,7 +2436,7 @@ ext4_mb_store_history(struct ext4_allocation_context *ac)
2246 2436
2247 2437
2248/* Create and initialize ext4_group_info data for the given group. */ 2438/* Create and initialize ext4_group_info data for the given group. */
2249static int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, 2439int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2250 struct ext4_group_desc *desc) 2440 struct ext4_group_desc *desc)
2251{ 2441{
2252 int i, len; 2442 int i, len;
@@ -2304,6 +2494,7 @@ static int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2304 } 2494 }
2305 2495
2306 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); 2496 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2497 init_rwsem(&meta_group_info[i]->alloc_sem);
2307 meta_group_info[i]->bb_free_root.rb_node = NULL;; 2498 meta_group_info[i]->bb_free_root.rb_node = NULL;;
2308 2499
2309#ifdef DOUBLE_CHECK 2500#ifdef DOUBLE_CHECK
@@ -2331,54 +2522,6 @@ exit_meta_group_info:
2331} /* ext4_mb_add_groupinfo */ 2522} /* ext4_mb_add_groupinfo */
2332 2523
2333/* 2524/*
2334 * Add a group to the existing groups.
2335 * This function is used for online resize
2336 */
2337int ext4_mb_add_more_groupinfo(struct super_block *sb, ext4_group_t group,
2338 struct ext4_group_desc *desc)
2339{
2340 struct ext4_sb_info *sbi = EXT4_SB(sb);
2341 struct inode *inode = sbi->s_buddy_cache;
2342 int blocks_per_page;
2343 int block;
2344 int pnum;
2345 struct page *page;
2346 int err;
2347
2348 /* Add group based on group descriptor*/
2349 err = ext4_mb_add_groupinfo(sb, group, desc);
2350 if (err)
2351 return err;
2352
2353 /*
2354 * Cache pages containing dynamic mb_alloc datas (buddy and bitmap
2355 * datas) are set not up to date so that they will be re-initilaized
2356 * during the next call to ext4_mb_load_buddy
2357 */
2358
2359 /* Set buddy page as not up to date */
2360 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
2361 block = group * 2;
2362 pnum = block / blocks_per_page;
2363 page = find_get_page(inode->i_mapping, pnum);
2364 if (page != NULL) {
2365 ClearPageUptodate(page);
2366 page_cache_release(page);
2367 }
2368
2369 /* Set bitmap page as not up to date */
2370 block++;
2371 pnum = block / blocks_per_page;
2372 page = find_get_page(inode->i_mapping, pnum);
2373 if (page != NULL) {
2374 ClearPageUptodate(page);
2375 page_cache_release(page);
2376 }
2377
2378 return 0;
2379}
2380
2381/*
2382 * Update an existing group. 2525 * Update an existing group.
2383 * This function is used for online resize 2526 * This function is used for online resize
2384 */ 2527 */
@@ -4588,11 +4731,6 @@ do_more:
4588 err = ext4_journal_get_write_access(handle, gd_bh); 4731 err = ext4_journal_get_write_access(handle, gd_bh);
4589 if (err) 4732 if (err)
4590 goto error_return; 4733 goto error_return;
4591
4592 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4593 if (err)
4594 goto error_return;
4595
4596#ifdef AGGRESSIVE_CHECK 4734#ifdef AGGRESSIVE_CHECK
4597 { 4735 {
4598 int i; 4736 int i;
@@ -4606,6 +4744,8 @@ do_more:
4606 /* We dirtied the bitmap block */ 4744 /* We dirtied the bitmap block */
4607 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); 4745 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4608 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 4746 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
4747 if (err)
4748 goto error_return;
4609 4749
4610 if (ac) { 4750 if (ac) {
4611 ac->ac_b_ex.fe_group = block_group; 4751 ac->ac_b_ex.fe_group = block_group;
@@ -4614,6 +4754,9 @@ do_more:
4614 ext4_mb_store_history(ac); 4754 ext4_mb_store_history(ac);
4615 } 4755 }
4616 4756
4757 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4758 if (err)
4759 goto error_return;
4617 if (metadata && ext4_handle_valid(handle)) { 4760 if (metadata && ext4_handle_valid(handle)) {
4618 /* blocks being freed are metadata. these blocks shouldn't 4761 /* blocks being freed are metadata. these blocks shouldn't
4619 * be used until this transaction is committed */ 4762 * be used until this transaction is committed */