diff options
| -rw-r--r-- | fs/ext4/ext4.h | 1 | ||||
| -rw-r--r-- | fs/ext4/mballoc.c | 70 |
2 files changed, 58 insertions, 13 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index bf938cf7c5f0..d266003cac3e 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
| @@ -1678,6 +1678,7 @@ struct ext4_group_info { | |||
| 1678 | ext4_grpblk_t bb_first_free; /* first free block */ | 1678 | ext4_grpblk_t bb_first_free; /* first free block */ |
| 1679 | ext4_grpblk_t bb_free; /* total free blocks */ | 1679 | ext4_grpblk_t bb_free; /* total free blocks */ |
| 1680 | ext4_grpblk_t bb_fragments; /* nr of freespace fragments */ | 1680 | ext4_grpblk_t bb_fragments; /* nr of freespace fragments */ |
| 1681 | ext4_grpblk_t bb_largest_free_order;/* order of largest frag in BG */ | ||
| 1681 | struct list_head bb_prealloc_list; | 1682 | struct list_head bb_prealloc_list; |
| 1682 | #ifdef DOUBLE_CHECK | 1683 | #ifdef DOUBLE_CHECK |
| 1683 | void *bb_bitmap; | 1684 | void *bb_bitmap; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 4f2d3a9d4e21..aa499fe11687 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
| @@ -658,6 +658,27 @@ static void ext4_mb_mark_free_simple(struct super_block *sb, | |||
| 658 | } | 658 | } |
| 659 | } | 659 | } |
| 660 | 660 | ||
| 661 | /* | ||
| 662 | * Cache the order of the largest free extent we have available in this block | ||
| 663 | * group. | ||
| 664 | */ | ||
| 665 | static void | ||
| 666 | mb_set_largest_free_order(struct super_block *sb, struct ext4_group_info *grp) | ||
| 667 | { | ||
| 668 | int i; | ||
| 669 | int bits; | ||
| 670 | |||
| 671 | grp->bb_largest_free_order = -1; /* uninit */ | ||
| 672 | |||
| 673 | bits = sb->s_blocksize_bits + 1; | ||
| 674 | for (i = bits; i >= 0; i--) { | ||
| 675 | if (grp->bb_counters[i] > 0) { | ||
| 676 | grp->bb_largest_free_order = i; | ||
| 677 | break; | ||
| 678 | } | ||
| 679 | } | ||
| 680 | } | ||
| 681 | |||
| 661 | static noinline_for_stack | 682 | static noinline_for_stack |
| 662 | void ext4_mb_generate_buddy(struct super_block *sb, | 683 | void ext4_mb_generate_buddy(struct super_block *sb, |
| 663 | void *buddy, void *bitmap, ext4_group_t group) | 684 | void *buddy, void *bitmap, ext4_group_t group) |
| @@ -700,6 +721,7 @@ void ext4_mb_generate_buddy(struct super_block *sb, | |||
| 700 | */ | 721 | */ |
| 701 | grp->bb_free = free; | 722 | grp->bb_free = free; |
| 702 | } | 723 | } |
| 724 | mb_set_largest_free_order(sb, grp); | ||
| 703 | 725 | ||
| 704 | clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); | 726 | clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); |
| 705 | 727 | ||
| @@ -725,6 +747,9 @@ void ext4_mb_generate_buddy(struct super_block *sb, | |||
| 725 | * contain blocks_per_page (PAGE_CACHE_SIZE / blocksize) blocks. | 747 | * contain blocks_per_page (PAGE_CACHE_SIZE / blocksize) blocks. |
| 726 | * So it can have information regarding groups_per_page which | 748 | * So it can have information regarding groups_per_page which |
| 727 | * is blocks_per_page/2 | 749 | * is blocks_per_page/2 |
| 750 | * | ||
| 751 | * Locking note: This routine takes the block group lock of all groups | ||
| 752 | * for this page; do not hold this lock when calling this routine! | ||
| 728 | */ | 753 | */ |
| 729 | 754 | ||
| 730 | static int ext4_mb_init_cache(struct page *page, char *incore) | 755 | static int ext4_mb_init_cache(struct page *page, char *incore) |
| @@ -910,6 +935,11 @@ out: | |||
| 910 | return err; | 935 | return err; |
| 911 | } | 936 | } |
| 912 | 937 | ||
| 938 | /* | ||
| 939 | * Locking note: This routine calls ext4_mb_init_cache(), which takes the | ||
| 940 | * block group lock of all groups for this page; do not hold the BG lock when | ||
| 941 | * calling this routine! | ||
| 942 | */ | ||
| 913 | static noinline_for_stack | 943 | static noinline_for_stack |
| 914 | int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) | 944 | int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) |
| 915 | { | 945 | { |
| @@ -1004,6 +1034,11 @@ err: | |||
| 1004 | return ret; | 1034 | return ret; |
| 1005 | } | 1035 | } |
| 1006 | 1036 | ||
| 1037 | /* | ||
| 1038 | * Locking note: This routine calls ext4_mb_init_cache(), which takes the | ||
| 1039 | * block group lock of all groups for this page; do not hold the BG lock when | ||
| 1040 | * calling this routine! | ||
| 1041 | */ | ||
| 1007 | static noinline_for_stack int | 1042 | static noinline_for_stack int |
| 1008 | ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, | 1043 | ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, |
| 1009 | struct ext4_buddy *e4b) | 1044 | struct ext4_buddy *e4b) |
| @@ -1299,6 +1334,7 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b, | |||
| 1299 | buddy = buddy2; | 1334 | buddy = buddy2; |
| 1300 | } while (1); | 1335 | } while (1); |
| 1301 | } | 1336 | } |
| 1337 | mb_set_largest_free_order(sb, e4b->bd_info); | ||
| 1302 | mb_check_buddy(e4b); | 1338 | mb_check_buddy(e4b); |
| 1303 | } | 1339 | } |
| 1304 | 1340 | ||
| @@ -1427,6 +1463,7 @@ static int mb_mark_used(struct ext4_buddy *e4b, struct ext4_free_extent *ex) | |||
| 1427 | e4b->bd_info->bb_counters[ord]++; | 1463 | e4b->bd_info->bb_counters[ord]++; |
| 1428 | e4b->bd_info->bb_counters[ord]++; | 1464 | e4b->bd_info->bb_counters[ord]++; |
| 1429 | } | 1465 | } |
| 1466 | mb_set_largest_free_order(e4b->bd_sb, e4b->bd_info); | ||
| 1430 | 1467 | ||
| 1431 | mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); | 1468 | mb_set_bits(EXT4_MB_BITMAP(e4b), ex->fe_start, len0); |
| 1432 | mb_check_buddy(e4b); | 1469 | mb_check_buddy(e4b); |
| @@ -1821,16 +1858,22 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, | |||
| 1821 | } | 1858 | } |
| 1822 | } | 1859 | } |
| 1823 | 1860 | ||
| 1861 | /* This is now called BEFORE we load the buddy bitmap. */ | ||
| 1824 | static int ext4_mb_good_group(struct ext4_allocation_context *ac, | 1862 | static int ext4_mb_good_group(struct ext4_allocation_context *ac, |
| 1825 | ext4_group_t group, int cr) | 1863 | ext4_group_t group, int cr) |
| 1826 | { | 1864 | { |
| 1827 | unsigned free, fragments; | 1865 | unsigned free, fragments; |
| 1828 | unsigned i, bits; | ||
| 1829 | int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb)); | 1866 | int flex_size = ext4_flex_bg_size(EXT4_SB(ac->ac_sb)); |
| 1830 | struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); | 1867 | struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group); |
| 1831 | 1868 | ||
| 1832 | BUG_ON(cr < 0 || cr >= 4); | 1869 | BUG_ON(cr < 0 || cr >= 4); |
| 1833 | BUG_ON(EXT4_MB_GRP_NEED_INIT(grp)); | 1870 | |
| 1871 | /* We only do this if the grp has never been initialized */ | ||
| 1872 | if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { | ||
| 1873 | int ret = ext4_mb_init_group(ac->ac_sb, group); | ||
| 1874 | if (ret) | ||
| 1875 | return 0; | ||
| 1876 | } | ||
| 1834 | 1877 | ||
| 1835 | free = grp->bb_free; | 1878 | free = grp->bb_free; |
| 1836 | fragments = grp->bb_fragments; | 1879 | fragments = grp->bb_fragments; |
| @@ -1843,17 +1886,16 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac, | |||
| 1843 | case 0: | 1886 | case 0: |
| 1844 | BUG_ON(ac->ac_2order == 0); | 1887 | BUG_ON(ac->ac_2order == 0); |
| 1845 | 1888 | ||
| 1889 | if (grp->bb_largest_free_order < ac->ac_2order) | ||
| 1890 | return 0; | ||
| 1891 | |||
| 1846 | /* Avoid using the first bg of a flexgroup for data files */ | 1892 | /* Avoid using the first bg of a flexgroup for data files */ |
| 1847 | if ((ac->ac_flags & EXT4_MB_HINT_DATA) && | 1893 | if ((ac->ac_flags & EXT4_MB_HINT_DATA) && |
| 1848 | (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) && | 1894 | (flex_size >= EXT4_FLEX_SIZE_DIR_ALLOC_SCHEME) && |
| 1849 | ((group % flex_size) == 0)) | 1895 | ((group % flex_size) == 0)) |
| 1850 | return 0; | 1896 | return 0; |
| 1851 | 1897 | ||
| 1852 | bits = ac->ac_sb->s_blocksize_bits + 1; | 1898 | return 1; |
| 1853 | for (i = ac->ac_2order; i <= bits; i++) | ||
| 1854 | if (grp->bb_counters[i] > 0) | ||
| 1855 | return 1; | ||
| 1856 | break; | ||
| 1857 | case 1: | 1899 | case 1: |
| 1858 | if ((free / fragments) >= ac->ac_g_ex.fe_len) | 1900 | if ((free / fragments) >= ac->ac_g_ex.fe_len) |
| 1859 | return 1; | 1901 | return 1; |
| @@ -2024,14 +2066,11 @@ repeat: | |||
| 2024 | group = ac->ac_g_ex.fe_group; | 2066 | group = ac->ac_g_ex.fe_group; |
| 2025 | 2067 | ||
| 2026 | for (i = 0; i < ngroups; group++, i++) { | 2068 | for (i = 0; i < ngroups; group++, i++) { |
| 2027 | struct ext4_group_info *grp; | ||
| 2028 | |||
| 2029 | if (group == ngroups) | 2069 | if (group == ngroups) |
| 2030 | group = 0; | 2070 | group = 0; |
| 2031 | 2071 | ||
| 2032 | /* quick check to skip empty groups */ | 2072 | /* This now checks without needing the buddy page */ |
| 2033 | grp = ext4_get_group_info(sb, group); | 2073 | if (!ext4_mb_good_group(ac, group, cr)) |
| 2034 | if (grp->bb_free == 0) | ||
| 2035 | continue; | 2074 | continue; |
| 2036 | 2075 | ||
| 2037 | err = ext4_mb_load_buddy(sb, group, &e4b); | 2076 | err = ext4_mb_load_buddy(sb, group, &e4b); |
| @@ -2039,8 +2078,12 @@ repeat: | |||
| 2039 | goto out; | 2078 | goto out; |
| 2040 | 2079 | ||
| 2041 | ext4_lock_group(sb, group); | 2080 | ext4_lock_group(sb, group); |
| 2081 | |||
| 2082 | /* | ||
| 2083 | * We need to check again after locking the | ||
| 2084 | * block group | ||
| 2085 | */ | ||
| 2042 | if (!ext4_mb_good_group(ac, group, cr)) { | 2086 | if (!ext4_mb_good_group(ac, group, cr)) { |
| 2043 | /* someone did allocation from this group */ | ||
| 2044 | ext4_unlock_group(sb, group); | 2087 | ext4_unlock_group(sb, group); |
| 2045 | ext4_mb_unload_buddy(&e4b); | 2088 | ext4_mb_unload_buddy(&e4b); |
| 2046 | continue; | 2089 | continue; |
| @@ -2253,6 +2296,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
| 2253 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); | 2296 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); |
| 2254 | init_rwsem(&meta_group_info[i]->alloc_sem); | 2297 | init_rwsem(&meta_group_info[i]->alloc_sem); |
| 2255 | meta_group_info[i]->bb_free_root = RB_ROOT; | 2298 | meta_group_info[i]->bb_free_root = RB_ROOT; |
| 2299 | meta_group_info[i]->bb_largest_free_order = -1; /* uninit */ | ||
| 2256 | 2300 | ||
| 2257 | #ifdef DOUBLE_CHECK | 2301 | #ifdef DOUBLE_CHECK |
| 2258 | { | 2302 | { |
