aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/mballoc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r--fs/ext4/mballoc.c261
1 files changed, 202 insertions, 59 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index edf9730ba72e..d2b1bcaf88ec 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -886,18 +886,20 @@ static noinline_for_stack int
886ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, 886ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
887 struct ext4_buddy *e4b) 887 struct ext4_buddy *e4b)
888{ 888{
889 struct ext4_sb_info *sbi = EXT4_SB(sb);
890 struct inode *inode = sbi->s_buddy_cache;
891 int blocks_per_page; 889 int blocks_per_page;
892 int block; 890 int block;
893 int pnum; 891 int pnum;
894 int poff; 892 int poff;
895 struct page *page; 893 struct page *page;
896 int ret; 894 int ret;
895 struct ext4_group_info *grp;
896 struct ext4_sb_info *sbi = EXT4_SB(sb);
897 struct inode *inode = sbi->s_buddy_cache;
897 898
898 mb_debug("load group %u\n", group); 899 mb_debug("load group %u\n", group);
899 900
900 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; 901 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
902 grp = ext4_get_group_info(sb, group);
901 903
902 e4b->bd_blkbits = sb->s_blocksize_bits; 904 e4b->bd_blkbits = sb->s_blocksize_bits;
903 e4b->bd_info = ext4_get_group_info(sb, group); 905 e4b->bd_info = ext4_get_group_info(sb, group);
@@ -905,6 +907,15 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
905 e4b->bd_group = group; 907 e4b->bd_group = group;
906 e4b->bd_buddy_page = NULL; 908 e4b->bd_buddy_page = NULL;
907 e4b->bd_bitmap_page = NULL; 909 e4b->bd_bitmap_page = NULL;
910 e4b->alloc_semp = &grp->alloc_sem;
911
912 /* Take the read lock on the group alloc
913 * sem. This would make sure a parallel
914 * ext4_mb_init_group happening on other
915 * groups mapped by the page is blocked
916 * till we are done with allocation
917 */
918 down_read(e4b->alloc_semp);
908 919
909 /* 920 /*
910 * the buddy cache inode stores the block bitmap 921 * the buddy cache inode stores the block bitmap
@@ -920,6 +931,14 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
920 page = find_get_page(inode->i_mapping, pnum); 931 page = find_get_page(inode->i_mapping, pnum);
921 if (page == NULL || !PageUptodate(page)) { 932 if (page == NULL || !PageUptodate(page)) {
922 if (page) 933 if (page)
934 /*
935 * drop the page reference and try
936 * to get the page with lock. If we
937 * are not uptodate that implies
938 * somebody just created the page but
939 * is yet to initialize the same. So
940 * wait for it to initialize.
941 */
923 page_cache_release(page); 942 page_cache_release(page);
924 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); 943 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
925 if (page) { 944 if (page) {
@@ -985,6 +1004,9 @@ err:
985 page_cache_release(e4b->bd_buddy_page); 1004 page_cache_release(e4b->bd_buddy_page);
986 e4b->bd_buddy = NULL; 1005 e4b->bd_buddy = NULL;
987 e4b->bd_bitmap = NULL; 1006 e4b->bd_bitmap = NULL;
1007
1008 /* Done with the buddy cache */
1009 up_read(e4b->alloc_semp);
988 return ret; 1010 return ret;
989} 1011}
990 1012
@@ -994,6 +1016,8 @@ static void ext4_mb_release_desc(struct ext4_buddy *e4b)
994 page_cache_release(e4b->bd_bitmap_page); 1016 page_cache_release(e4b->bd_bitmap_page);
995 if (e4b->bd_buddy_page) 1017 if (e4b->bd_buddy_page)
996 page_cache_release(e4b->bd_buddy_page); 1018 page_cache_release(e4b->bd_buddy_page);
1019 /* Done with the buddy cache */
1020 up_read(e4b->alloc_semp);
997} 1021}
998 1022
999 1023
@@ -1696,6 +1720,173 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1696 return 0; 1720 return 0;
1697} 1721}
1698 1722
1723/*
1724 * lock the group_info alloc_sem of all the groups
1725 * belonging to the same buddy cache page. This
1726 * make sure other parallel operation on the buddy
1727 * cache doesn't happen whild holding the buddy cache
1728 * lock
1729 */
1730int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
1731{
1732 int i;
1733 int block, pnum;
1734 int blocks_per_page;
1735 int groups_per_page;
1736 ext4_group_t first_group;
1737 struct ext4_group_info *grp;
1738
1739 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1740 /*
1741 * the buddy cache inode stores the block bitmap
1742 * and buddy information in consecutive blocks.
1743 * So for each group we need two blocks.
1744 */
1745 block = group * 2;
1746 pnum = block / blocks_per_page;
1747 first_group = pnum * blocks_per_page / 2;
1748
1749 groups_per_page = blocks_per_page >> 1;
1750 if (groups_per_page == 0)
1751 groups_per_page = 1;
1752 /* read all groups the page covers into the cache */
1753 for (i = 0; i < groups_per_page; i++) {
1754
1755 if ((first_group + i) >= EXT4_SB(sb)->s_groups_count)
1756 break;
1757 grp = ext4_get_group_info(sb, first_group + i);
1758 /* take all groups write allocation
1759 * semaphore. This make sure there is
1760 * no block allocation going on in any
1761 * of that groups
1762 */
1763 down_write(&grp->alloc_sem);
1764 }
1765 return i;
1766}
1767
1768void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
1769 ext4_group_t group, int locked_group)
1770{
1771 int i;
1772 int block, pnum;
1773 int blocks_per_page;
1774 ext4_group_t first_group;
1775 struct ext4_group_info *grp;
1776
1777 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1778 /*
1779 * the buddy cache inode stores the block bitmap
1780 * and buddy information in consecutive blocks.
1781 * So for each group we need two blocks.
1782 */
1783 block = group * 2;
1784 pnum = block / blocks_per_page;
1785 first_group = pnum * blocks_per_page / 2;
1786 /* release locks on all the groups */
1787 for (i = 0; i < locked_group; i++) {
1788
1789 grp = ext4_get_group_info(sb, first_group + i);
1790 /* take all groups write allocation
1791 * semaphore. This make sure there is
1792 * no block allocation going on in any
1793 * of that groups
1794 */
1795 up_write(&grp->alloc_sem);
1796 }
1797
1798}
1799
1800static int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
1801{
1802
1803 int ret;
1804 void *bitmap;
1805 int blocks_per_page;
1806 int block, pnum, poff;
1807 int num_grp_locked = 0;
1808 struct ext4_group_info *this_grp;
1809 struct ext4_sb_info *sbi = EXT4_SB(sb);
1810 struct inode *inode = sbi->s_buddy_cache;
1811 struct page *page = NULL, *bitmap_page = NULL;
1812
1813 mb_debug("init group %lu\n", group);
1814 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1815 this_grp = ext4_get_group_info(sb, group);
1816 /*
1817 * This ensures we don't add group
1818 * to this buddy cache via resize
1819 */
1820 num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group);
1821 if (!EXT4_MB_GRP_NEED_INIT(this_grp)) {
1822 /*
1823 * somebody initialized the group
1824 * return without doing anything
1825 */
1826 ret = 0;
1827 goto err;
1828 }
1829 /*
1830 * the buddy cache inode stores the block bitmap
1831 * and buddy information in consecutive blocks.
1832 * So for each group we need two blocks.
1833 */
1834 block = group * 2;
1835 pnum = block / blocks_per_page;
1836 poff = block % blocks_per_page;
1837 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1838 if (page) {
1839 BUG_ON(page->mapping != inode->i_mapping);
1840 ret = ext4_mb_init_cache(page, NULL);
1841 if (ret) {
1842 unlock_page(page);
1843 goto err;
1844 }
1845 unlock_page(page);
1846 }
1847 if (page == NULL || !PageUptodate(page)) {
1848 ret = -EIO;
1849 goto err;
1850 }
1851 mark_page_accessed(page);
1852 bitmap_page = page;
1853 bitmap = page_address(page) + (poff * sb->s_blocksize);
1854
1855 /* init buddy cache */
1856 block++;
1857 pnum = block / blocks_per_page;
1858 poff = block % blocks_per_page;
1859 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1860 if (page == bitmap_page) {
1861 /*
1862 * If both the bitmap and buddy are in
1863 * the same page we don't need to force
1864 * init the buddy
1865 */
1866 unlock_page(page);
1867 } else if (page) {
1868 BUG_ON(page->mapping != inode->i_mapping);
1869 ret = ext4_mb_init_cache(page, bitmap);
1870 if (ret) {
1871 unlock_page(page);
1872 goto err;
1873 }
1874 unlock_page(page);
1875 }
1876 if (page == NULL || !PageUptodate(page)) {
1877 ret = -EIO;
1878 goto err;
1879 }
1880 mark_page_accessed(page);
1881err:
1882 ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked);
1883 if (bitmap_page)
1884 page_cache_release(bitmap_page);
1885 if (page)
1886 page_cache_release(page);
1887 return ret;
1888}
1889
1699static noinline_for_stack int 1890static noinline_for_stack int
1700ext4_mb_regular_allocator(struct ext4_allocation_context *ac) 1891ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1701{ 1892{
@@ -1779,7 +1970,7 @@ repeat:
1779 group = 0; 1970 group = 0;
1780 1971
1781 /* quick check to skip empty groups */ 1972 /* quick check to skip empty groups */
1782 grp = ext4_get_group_info(ac->ac_sb, group); 1973 grp = ext4_get_group_info(sb, group);
1783 if (grp->bb_free == 0) 1974 if (grp->bb_free == 0)
1784 continue; 1975 continue;
1785 1976
@@ -1792,10 +1983,9 @@ repeat:
1792 * we need full data about the group 1983 * we need full data about the group
1793 * to make a good selection 1984 * to make a good selection
1794 */ 1985 */
1795 err = ext4_mb_load_buddy(sb, group, &e4b); 1986 err = ext4_mb_init_group(sb, group);
1796 if (err) 1987 if (err)
1797 goto out; 1988 goto out;
1798 ext4_mb_release_desc(&e4b);
1799 } 1989 }
1800 1990
1801 /* 1991 /*
@@ -2246,7 +2436,7 @@ ext4_mb_store_history(struct ext4_allocation_context *ac)
2246 2436
2247 2437
2248/* Create and initialize ext4_group_info data for the given group. */ 2438/* Create and initialize ext4_group_info data for the given group. */
2249static int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, 2439int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2250 struct ext4_group_desc *desc) 2440 struct ext4_group_desc *desc)
2251{ 2441{
2252 int i, len; 2442 int i, len;
@@ -2304,6 +2494,7 @@ static int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2304 } 2494 }
2305 2495
2306 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); 2496 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2497 init_rwsem(&meta_group_info[i]->alloc_sem);
2307 meta_group_info[i]->bb_free_root.rb_node = NULL;; 2498 meta_group_info[i]->bb_free_root.rb_node = NULL;;
2308 2499
2309#ifdef DOUBLE_CHECK 2500#ifdef DOUBLE_CHECK
@@ -2331,54 +2522,6 @@ exit_meta_group_info:
2331} /* ext4_mb_add_groupinfo */ 2522} /* ext4_mb_add_groupinfo */
2332 2523
2333/* 2524/*
2334 * Add a group to the existing groups.
2335 * This function is used for online resize
2336 */
2337int ext4_mb_add_more_groupinfo(struct super_block *sb, ext4_group_t group,
2338 struct ext4_group_desc *desc)
2339{
2340 struct ext4_sb_info *sbi = EXT4_SB(sb);
2341 struct inode *inode = sbi->s_buddy_cache;
2342 int blocks_per_page;
2343 int block;
2344 int pnum;
2345 struct page *page;
2346 int err;
2347
2348 /* Add group based on group descriptor*/
2349 err = ext4_mb_add_groupinfo(sb, group, desc);
2350 if (err)
2351 return err;
2352
2353 /*
2354 * Cache pages containing dynamic mb_alloc datas (buddy and bitmap
2355 * datas) are set not up to date so that they will be re-initilaized
2356 * during the next call to ext4_mb_load_buddy
2357 */
2358
2359 /* Set buddy page as not up to date */
2360 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
2361 block = group * 2;
2362 pnum = block / blocks_per_page;
2363 page = find_get_page(inode->i_mapping, pnum);
2364 if (page != NULL) {
2365 ClearPageUptodate(page);
2366 page_cache_release(page);
2367 }
2368
2369 /* Set bitmap page as not up to date */
2370 block++;
2371 pnum = block / blocks_per_page;
2372 page = find_get_page(inode->i_mapping, pnum);
2373 if (page != NULL) {
2374 ClearPageUptodate(page);
2375 page_cache_release(page);
2376 }
2377
2378 return 0;
2379}
2380
2381/*
2382 * Update an existing group. 2525 * Update an existing group.
2383 * This function is used for online resize 2526 * This function is used for online resize
2384 */ 2527 */
@@ -4588,11 +4731,6 @@ do_more:
4588 err = ext4_journal_get_write_access(handle, gd_bh); 4731 err = ext4_journal_get_write_access(handle, gd_bh);
4589 if (err) 4732 if (err)
4590 goto error_return; 4733 goto error_return;
4591
4592 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4593 if (err)
4594 goto error_return;
4595
4596#ifdef AGGRESSIVE_CHECK 4734#ifdef AGGRESSIVE_CHECK
4597 { 4735 {
4598 int i; 4736 int i;
@@ -4606,6 +4744,8 @@ do_more:
4606 /* We dirtied the bitmap block */ 4744 /* We dirtied the bitmap block */
4607 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); 4745 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
4608 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 4746 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
4747 if (err)
4748 goto error_return;
4609 4749
4610 if (ac) { 4750 if (ac) {
4611 ac->ac_b_ex.fe_group = block_group; 4751 ac->ac_b_ex.fe_group = block_group;
@@ -4614,6 +4754,9 @@ do_more:
4614 ext4_mb_store_history(ac); 4754 ext4_mb_store_history(ac);
4615 } 4755 }
4616 4756
4757 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4758 if (err)
4759 goto error_return;
4617 if (metadata && ext4_handle_valid(handle)) { 4760 if (metadata && ext4_handle_valid(handle)) {
4618 /* blocks being freed are metadata. these blocks shouldn't 4761 /* blocks being freed are metadata. these blocks shouldn't
4619 * be used until this transaction is committed */ 4762 * be used until this transaction is committed */