aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/mballoc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/mballoc.c')
-rw-r--r--fs/ext4/mballoc.c726
1 files changed, 233 insertions, 493 deletions
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index cd258463e2a9..bba12824defa 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -22,6 +22,7 @@
22 */ 22 */
23 23
24#include "mballoc.h" 24#include "mballoc.h"
25#include <linux/debugfs.h>
25#include <trace/events/ext4.h> 26#include <trace/events/ext4.h>
26 27
27/* 28/*
@@ -622,13 +623,13 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file,
622 623
623/* FIXME!! need more doc */ 624/* FIXME!! need more doc */
624static void ext4_mb_mark_free_simple(struct super_block *sb, 625static void ext4_mb_mark_free_simple(struct super_block *sb,
625 void *buddy, unsigned first, int len, 626 void *buddy, ext4_grpblk_t first, ext4_grpblk_t len,
626 struct ext4_group_info *grp) 627 struct ext4_group_info *grp)
627{ 628{
628 struct ext4_sb_info *sbi = EXT4_SB(sb); 629 struct ext4_sb_info *sbi = EXT4_SB(sb);
629 unsigned short min; 630 ext4_grpblk_t min;
630 unsigned short max; 631 ext4_grpblk_t max;
631 unsigned short chunk; 632 ext4_grpblk_t chunk;
632 unsigned short border; 633 unsigned short border;
633 634
634 BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb)); 635 BUG_ON(len > EXT4_BLOCKS_PER_GROUP(sb));
@@ -662,10 +663,10 @@ void ext4_mb_generate_buddy(struct super_block *sb,
662 void *buddy, void *bitmap, ext4_group_t group) 663 void *buddy, void *bitmap, ext4_group_t group)
663{ 664{
664 struct ext4_group_info *grp = ext4_get_group_info(sb, group); 665 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
665 unsigned short max = EXT4_BLOCKS_PER_GROUP(sb); 666 ext4_grpblk_t max = EXT4_BLOCKS_PER_GROUP(sb);
666 unsigned short i = 0; 667 ext4_grpblk_t i = 0;
667 unsigned short first; 668 ext4_grpblk_t first;
668 unsigned short len; 669 ext4_grpblk_t len;
669 unsigned free = 0; 670 unsigned free = 0;
670 unsigned fragments = 0; 671 unsigned fragments = 0;
671 unsigned long long period = get_cycles(); 672 unsigned long long period = get_cycles();
@@ -743,7 +744,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
743 char *data; 744 char *data;
744 char *bitmap; 745 char *bitmap;
745 746
746 mb_debug("init page %lu\n", page->index); 747 mb_debug(1, "init page %lu\n", page->index);
747 748
748 inode = page->mapping->host; 749 inode = page->mapping->host;
749 sb = inode->i_sb; 750 sb = inode->i_sb;
@@ -822,7 +823,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
822 set_bitmap_uptodate(bh[i]); 823 set_bitmap_uptodate(bh[i]);
823 bh[i]->b_end_io = end_buffer_read_sync; 824 bh[i]->b_end_io = end_buffer_read_sync;
824 submit_bh(READ, bh[i]); 825 submit_bh(READ, bh[i]);
825 mb_debug("read bitmap for group %u\n", first_group + i); 826 mb_debug(1, "read bitmap for group %u\n", first_group + i);
826 } 827 }
827 828
828 /* wait for I/O completion */ 829 /* wait for I/O completion */
@@ -862,12 +863,13 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
862 if ((first_block + i) & 1) { 863 if ((first_block + i) & 1) {
863 /* this is block of buddy */ 864 /* this is block of buddy */
864 BUG_ON(incore == NULL); 865 BUG_ON(incore == NULL);
865 mb_debug("put buddy for group %u in page %lu/%x\n", 866 mb_debug(1, "put buddy for group %u in page %lu/%x\n",
866 group, page->index, i * blocksize); 867 group, page->index, i * blocksize);
867 grinfo = ext4_get_group_info(sb, group); 868 grinfo = ext4_get_group_info(sb, group);
868 grinfo->bb_fragments = 0; 869 grinfo->bb_fragments = 0;
869 memset(grinfo->bb_counters, 0, 870 memset(grinfo->bb_counters, 0,
870 sizeof(unsigned short)*(sb->s_blocksize_bits+2)); 871 sizeof(*grinfo->bb_counters) *
872 (sb->s_blocksize_bits+2));
871 /* 873 /*
872 * incore got set to the group block bitmap below 874 * incore got set to the group block bitmap below
873 */ 875 */
@@ -878,7 +880,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
878 } else { 880 } else {
879 /* this is block of bitmap */ 881 /* this is block of bitmap */
880 BUG_ON(incore != NULL); 882 BUG_ON(incore != NULL);
881 mb_debug("put bitmap for group %u in page %lu/%x\n", 883 mb_debug(1, "put bitmap for group %u in page %lu/%x\n",
882 group, page->index, i * blocksize); 884 group, page->index, i * blocksize);
883 885
884 /* see comments in ext4_mb_put_pa() */ 886 /* see comments in ext4_mb_put_pa() */
@@ -908,6 +910,100 @@ out:
908 return err; 910 return err;
909} 911}
910 912
913static noinline_for_stack
914int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
915{
916
917 int ret = 0;
918 void *bitmap;
919 int blocks_per_page;
920 int block, pnum, poff;
921 int num_grp_locked = 0;
922 struct ext4_group_info *this_grp;
923 struct ext4_sb_info *sbi = EXT4_SB(sb);
924 struct inode *inode = sbi->s_buddy_cache;
925 struct page *page = NULL, *bitmap_page = NULL;
926
927 mb_debug(1, "init group %u\n", group);
928 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
929 this_grp = ext4_get_group_info(sb, group);
930 /*
931 * This ensures that we don't reinit the buddy cache
932 * page which map to the group from which we are already
933 * allocating. If we are looking at the buddy cache we would
934 * have taken a reference using ext4_mb_load_buddy and that
935 * would have taken the alloc_sem lock.
936 */
937 num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group);
938 if (!EXT4_MB_GRP_NEED_INIT(this_grp)) {
939 /*
940 * somebody initialized the group
941 * return without doing anything
942 */
943 ret = 0;
944 goto err;
945 }
946 /*
947 * the buddy cache inode stores the block bitmap
948 * and buddy information in consecutive blocks.
949 * So for each group we need two blocks.
950 */
951 block = group * 2;
952 pnum = block / blocks_per_page;
953 poff = block % blocks_per_page;
954 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
955 if (page) {
956 BUG_ON(page->mapping != inode->i_mapping);
957 ret = ext4_mb_init_cache(page, NULL);
958 if (ret) {
959 unlock_page(page);
960 goto err;
961 }
962 unlock_page(page);
963 }
964 if (page == NULL || !PageUptodate(page)) {
965 ret = -EIO;
966 goto err;
967 }
968 mark_page_accessed(page);
969 bitmap_page = page;
970 bitmap = page_address(page) + (poff * sb->s_blocksize);
971
972 /* init buddy cache */
973 block++;
974 pnum = block / blocks_per_page;
975 poff = block % blocks_per_page;
976 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
977 if (page == bitmap_page) {
978 /*
979 * If both the bitmap and buddy are in
980 * the same page we don't need to force
981 * init the buddy
982 */
983 unlock_page(page);
984 } else if (page) {
985 BUG_ON(page->mapping != inode->i_mapping);
986 ret = ext4_mb_init_cache(page, bitmap);
987 if (ret) {
988 unlock_page(page);
989 goto err;
990 }
991 unlock_page(page);
992 }
993 if (page == NULL || !PageUptodate(page)) {
994 ret = -EIO;
995 goto err;
996 }
997 mark_page_accessed(page);
998err:
999 ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked);
1000 if (bitmap_page)
1001 page_cache_release(bitmap_page);
1002 if (page)
1003 page_cache_release(page);
1004 return ret;
1005}
1006
911static noinline_for_stack int 1007static noinline_for_stack int
912ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group, 1008ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
913 struct ext4_buddy *e4b) 1009 struct ext4_buddy *e4b)
@@ -922,7 +1018,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
922 struct ext4_sb_info *sbi = EXT4_SB(sb); 1018 struct ext4_sb_info *sbi = EXT4_SB(sb);
923 struct inode *inode = sbi->s_buddy_cache; 1019 struct inode *inode = sbi->s_buddy_cache;
924 1020
925 mb_debug("load group %u\n", group); 1021 mb_debug(1, "load group %u\n", group);
926 1022
927 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize; 1023 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
928 grp = ext4_get_group_info(sb, group); 1024 grp = ext4_get_group_info(sb, group);
@@ -941,8 +1037,26 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
941 * groups mapped by the page is blocked 1037 * groups mapped by the page is blocked
942 * till we are done with allocation 1038 * till we are done with allocation
943 */ 1039 */
1040repeat_load_buddy:
944 down_read(e4b->alloc_semp); 1041 down_read(e4b->alloc_semp);
945 1042
1043 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1044 /* we need to check for group need init flag
1045 * with alloc_semp held so that we can be sure
1046 * that new blocks didn't get added to the group
1047 * when we are loading the buddy cache
1048 */
1049 up_read(e4b->alloc_semp);
1050 /*
1051 * we need full data about the group
1052 * to make a good selection
1053 */
1054 ret = ext4_mb_init_group(sb, group);
1055 if (ret)
1056 return ret;
1057 goto repeat_load_buddy;
1058 }
1059
946 /* 1060 /*
947 * the buddy cache inode stores the block bitmap 1061 * the buddy cache inode stores the block bitmap
948 * and buddy information in consecutive blocks. 1062 * and buddy information in consecutive blocks.
@@ -1360,7 +1474,7 @@ static void ext4_mb_use_best_found(struct ext4_allocation_context *ac,
1360 ac->alloc_semp = e4b->alloc_semp; 1474 ac->alloc_semp = e4b->alloc_semp;
1361 e4b->alloc_semp = NULL; 1475 e4b->alloc_semp = NULL;
1362 /* store last allocated for subsequent stream allocation */ 1476 /* store last allocated for subsequent stream allocation */
1363 if ((ac->ac_flags & EXT4_MB_HINT_DATA)) { 1477 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
1364 spin_lock(&sbi->s_md_lock); 1478 spin_lock(&sbi->s_md_lock);
1365 sbi->s_mb_last_group = ac->ac_f_ex.fe_group; 1479 sbi->s_mb_last_group = ac->ac_f_ex.fe_group;
1366 sbi->s_mb_last_start = ac->ac_f_ex.fe_start; 1480 sbi->s_mb_last_start = ac->ac_f_ex.fe_start;
@@ -1837,97 +1951,6 @@ void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
1837 1951
1838} 1952}
1839 1953
1840static noinline_for_stack
1841int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
1842{
1843
1844 int ret;
1845 void *bitmap;
1846 int blocks_per_page;
1847 int block, pnum, poff;
1848 int num_grp_locked = 0;
1849 struct ext4_group_info *this_grp;
1850 struct ext4_sb_info *sbi = EXT4_SB(sb);
1851 struct inode *inode = sbi->s_buddy_cache;
1852 struct page *page = NULL, *bitmap_page = NULL;
1853
1854 mb_debug("init group %lu\n", group);
1855 blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
1856 this_grp = ext4_get_group_info(sb, group);
1857 /*
1858 * This ensures we don't add group
1859 * to this buddy cache via resize
1860 */
1861 num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, group);
1862 if (!EXT4_MB_GRP_NEED_INIT(this_grp)) {
1863 /*
1864 * somebody initialized the group
1865 * return without doing anything
1866 */
1867 ret = 0;
1868 goto err;
1869 }
1870 /*
1871 * the buddy cache inode stores the block bitmap
1872 * and buddy information in consecutive blocks.
1873 * So for each group we need two blocks.
1874 */
1875 block = group * 2;
1876 pnum = block / blocks_per_page;
1877 poff = block % blocks_per_page;
1878 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1879 if (page) {
1880 BUG_ON(page->mapping != inode->i_mapping);
1881 ret = ext4_mb_init_cache(page, NULL);
1882 if (ret) {
1883 unlock_page(page);
1884 goto err;
1885 }
1886 unlock_page(page);
1887 }
1888 if (page == NULL || !PageUptodate(page)) {
1889 ret = -EIO;
1890 goto err;
1891 }
1892 mark_page_accessed(page);
1893 bitmap_page = page;
1894 bitmap = page_address(page) + (poff * sb->s_blocksize);
1895
1896 /* init buddy cache */
1897 block++;
1898 pnum = block / blocks_per_page;
1899 poff = block % blocks_per_page;
1900 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1901 if (page == bitmap_page) {
1902 /*
1903 * If both the bitmap and buddy are in
1904 * the same page we don't need to force
1905 * init the buddy
1906 */
1907 unlock_page(page);
1908 } else if (page) {
1909 BUG_ON(page->mapping != inode->i_mapping);
1910 ret = ext4_mb_init_cache(page, bitmap);
1911 if (ret) {
1912 unlock_page(page);
1913 goto err;
1914 }
1915 unlock_page(page);
1916 }
1917 if (page == NULL || !PageUptodate(page)) {
1918 ret = -EIO;
1919 goto err;
1920 }
1921 mark_page_accessed(page);
1922err:
1923 ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked);
1924 if (bitmap_page)
1925 page_cache_release(bitmap_page);
1926 if (page)
1927 page_cache_release(page);
1928 return ret;
1929}
1930
1931static noinline_for_stack int 1954static noinline_for_stack int
1932ext4_mb_regular_allocator(struct ext4_allocation_context *ac) 1955ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1933{ 1956{
@@ -1938,11 +1961,14 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1938 struct ext4_sb_info *sbi; 1961 struct ext4_sb_info *sbi;
1939 struct super_block *sb; 1962 struct super_block *sb;
1940 struct ext4_buddy e4b; 1963 struct ext4_buddy e4b;
1941 loff_t size, isize;
1942 1964
1943 sb = ac->ac_sb; 1965 sb = ac->ac_sb;
1944 sbi = EXT4_SB(sb); 1966 sbi = EXT4_SB(sb);
1945 ngroups = ext4_get_groups_count(sb); 1967 ngroups = ext4_get_groups_count(sb);
1968 /* non-extent files are limited to low blocks/groups */
1969 if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL))
1970 ngroups = sbi->s_blockfile_groups;
1971
1946 BUG_ON(ac->ac_status == AC_STATUS_FOUND); 1972 BUG_ON(ac->ac_status == AC_STATUS_FOUND);
1947 1973
1948 /* first, try the goal */ 1974 /* first, try the goal */
@@ -1974,20 +2000,16 @@ ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
1974 } 2000 }
1975 2001
1976 bsbits = ac->ac_sb->s_blocksize_bits; 2002 bsbits = ac->ac_sb->s_blocksize_bits;
1977 /* if stream allocation is enabled, use global goal */
1978 size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
1979 isize = i_size_read(ac->ac_inode) >> bsbits;
1980 if (size < isize)
1981 size = isize;
1982 2003
1983 if (size < sbi->s_mb_stream_request && 2004 /* if stream allocation is enabled, use global goal */
1984 (ac->ac_flags & EXT4_MB_HINT_DATA)) { 2005 if (ac->ac_flags & EXT4_MB_STREAM_ALLOC) {
1985 /* TBD: may be hot point */ 2006 /* TBD: may be hot point */
1986 spin_lock(&sbi->s_md_lock); 2007 spin_lock(&sbi->s_md_lock);
1987 ac->ac_g_ex.fe_group = sbi->s_mb_last_group; 2008 ac->ac_g_ex.fe_group = sbi->s_mb_last_group;
1988 ac->ac_g_ex.fe_start = sbi->s_mb_last_start; 2009 ac->ac_g_ex.fe_start = sbi->s_mb_last_start;
1989 spin_unlock(&sbi->s_md_lock); 2010 spin_unlock(&sbi->s_md_lock);
1990 } 2011 }
2012
1991 /* Let's just scan groups to find more-less suitable blocks */ 2013 /* Let's just scan groups to find more-less suitable blocks */
1992 cr = ac->ac_2order ? 0 : 1; 2014 cr = ac->ac_2order ? 0 : 1;
1993 /* 2015 /*
@@ -2015,27 +2037,6 @@ repeat:
2015 if (grp->bb_free == 0) 2037 if (grp->bb_free == 0)
2016 continue; 2038 continue;
2017 2039
2018 /*
2019 * if the group is already init we check whether it is
2020 * a good group and if not we don't load the buddy
2021 */
2022 if (EXT4_MB_GRP_NEED_INIT(grp)) {
2023 /*
2024 * we need full data about the group
2025 * to make a good selection
2026 */
2027 err = ext4_mb_init_group(sb, group);
2028 if (err)
2029 goto out;
2030 }
2031
2032 /*
2033 * If the particular group doesn't satisfy our
2034 * criteria we continue with the next group
2035 */
2036 if (!ext4_mb_good_group(ac, group, cr))
2037 continue;
2038
2039 err = ext4_mb_load_buddy(sb, group, &e4b); 2040 err = ext4_mb_load_buddy(sb, group, &e4b);
2040 if (err) 2041 if (err)
2041 goto out; 2042 goto out;
@@ -2095,207 +2096,6 @@ out:
2095 return err; 2096 return err;
2096} 2097}
2097 2098
2098#ifdef EXT4_MB_HISTORY
2099struct ext4_mb_proc_session {
2100 struct ext4_mb_history *history;
2101 struct super_block *sb;
2102 int start;
2103 int max;
2104};
2105
2106static void *ext4_mb_history_skip_empty(struct ext4_mb_proc_session *s,
2107 struct ext4_mb_history *hs,
2108 int first)
2109{
2110 if (hs == s->history + s->max)
2111 hs = s->history;
2112 if (!first && hs == s->history + s->start)
2113 return NULL;
2114 while (hs->orig.fe_len == 0) {
2115 hs++;
2116 if (hs == s->history + s->max)
2117 hs = s->history;
2118 if (hs == s->history + s->start)
2119 return NULL;
2120 }
2121 return hs;
2122}
2123
2124static void *ext4_mb_seq_history_start(struct seq_file *seq, loff_t *pos)
2125{
2126 struct ext4_mb_proc_session *s = seq->private;
2127 struct ext4_mb_history *hs;
2128 int l = *pos;
2129
2130 if (l == 0)
2131 return SEQ_START_TOKEN;
2132 hs = ext4_mb_history_skip_empty(s, s->history + s->start, 1);
2133 if (!hs)
2134 return NULL;
2135 while (--l && (hs = ext4_mb_history_skip_empty(s, ++hs, 0)) != NULL);
2136 return hs;
2137}
2138
2139static void *ext4_mb_seq_history_next(struct seq_file *seq, void *v,
2140 loff_t *pos)
2141{
2142 struct ext4_mb_proc_session *s = seq->private;
2143 struct ext4_mb_history *hs = v;
2144
2145 ++*pos;
2146 if (v == SEQ_START_TOKEN)
2147 return ext4_mb_history_skip_empty(s, s->history + s->start, 1);
2148 else
2149 return ext4_mb_history_skip_empty(s, ++hs, 0);
2150}
2151
2152static int ext4_mb_seq_history_show(struct seq_file *seq, void *v)
2153{
2154 char buf[25], buf2[25], buf3[25], *fmt;
2155 struct ext4_mb_history *hs = v;
2156
2157 if (v == SEQ_START_TOKEN) {
2158 seq_printf(seq, "%-5s %-8s %-23s %-23s %-23s %-5s "
2159 "%-5s %-2s %-5s %-5s %-5s %-6s\n",
2160 "pid", "inode", "original", "goal", "result", "found",
2161 "grps", "cr", "flags", "merge", "tail", "broken");
2162 return 0;
2163 }
2164
2165 if (hs->op == EXT4_MB_HISTORY_ALLOC) {
2166 fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u "
2167 "%-5u %-5s %-5u %-6u\n";
2168 sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group,
2169 hs->result.fe_start, hs->result.fe_len,
2170 hs->result.fe_logical);
2171 sprintf(buf, "%u/%d/%u@%u", hs->orig.fe_group,
2172 hs->orig.fe_start, hs->orig.fe_len,
2173 hs->orig.fe_logical);
2174 sprintf(buf3, "%u/%d/%u@%u", hs->goal.fe_group,
2175 hs->goal.fe_start, hs->goal.fe_len,
2176 hs->goal.fe_logical);
2177 seq_printf(seq, fmt, hs->pid, hs->ino, buf, buf3, buf2,
2178 hs->found, hs->groups, hs->cr, hs->flags,
2179 hs->merged ? "M" : "", hs->tail,
2180 hs->buddy ? 1 << hs->buddy : 0);
2181 } else if (hs->op == EXT4_MB_HISTORY_PREALLOC) {
2182 fmt = "%-5u %-8u %-23s %-23s %-23s\n";
2183 sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group,
2184 hs->result.fe_start, hs->result.fe_len,
2185 hs->result.fe_logical);
2186 sprintf(buf, "%u/%d/%u@%u", hs->orig.fe_group,
2187 hs->orig.fe_start, hs->orig.fe_len,
2188 hs->orig.fe_logical);
2189 seq_printf(seq, fmt, hs->pid, hs->ino, buf, "", buf2);
2190 } else if (hs->op == EXT4_MB_HISTORY_DISCARD) {
2191 sprintf(buf2, "%u/%d/%u", hs->result.fe_group,
2192 hs->result.fe_start, hs->result.fe_len);
2193 seq_printf(seq, "%-5u %-8u %-23s discard\n",
2194 hs->pid, hs->ino, buf2);
2195 } else if (hs->op == EXT4_MB_HISTORY_FREE) {
2196 sprintf(buf2, "%u/%d/%u", hs->result.fe_group,
2197 hs->result.fe_start, hs->result.fe_len);
2198 seq_printf(seq, "%-5u %-8u %-23s free\n",
2199 hs->pid, hs->ino, buf2);
2200 }
2201 return 0;
2202}
2203
2204static void ext4_mb_seq_history_stop(struct seq_file *seq, void *v)
2205{
2206}
2207
2208static struct seq_operations ext4_mb_seq_history_ops = {
2209 .start = ext4_mb_seq_history_start,
2210 .next = ext4_mb_seq_history_next,
2211 .stop = ext4_mb_seq_history_stop,
2212 .show = ext4_mb_seq_history_show,
2213};
2214
2215static int ext4_mb_seq_history_open(struct inode *inode, struct file *file)
2216{
2217 struct super_block *sb = PDE(inode)->data;
2218 struct ext4_sb_info *sbi = EXT4_SB(sb);
2219 struct ext4_mb_proc_session *s;
2220 int rc;
2221 int size;
2222
2223 if (unlikely(sbi->s_mb_history == NULL))
2224 return -ENOMEM;
2225 s = kmalloc(sizeof(*s), GFP_KERNEL);
2226 if (s == NULL)
2227 return -ENOMEM;
2228 s->sb = sb;
2229 size = sizeof(struct ext4_mb_history) * sbi->s_mb_history_max;
2230 s->history = kmalloc(size, GFP_KERNEL);
2231 if (s->history == NULL) {
2232 kfree(s);
2233 return -ENOMEM;
2234 }
2235
2236 spin_lock(&sbi->s_mb_history_lock);
2237 memcpy(s->history, sbi->s_mb_history, size);
2238 s->max = sbi->s_mb_history_max;
2239 s->start = sbi->s_mb_history_cur % s->max;
2240 spin_unlock(&sbi->s_mb_history_lock);
2241
2242 rc = seq_open(file, &ext4_mb_seq_history_ops);
2243 if (rc == 0) {
2244 struct seq_file *m = (struct seq_file *)file->private_data;
2245 m->private = s;
2246 } else {
2247 kfree(s->history);
2248 kfree(s);
2249 }
2250 return rc;
2251
2252}
2253
2254static int ext4_mb_seq_history_release(struct inode *inode, struct file *file)
2255{
2256 struct seq_file *seq = (struct seq_file *)file->private_data;
2257 struct ext4_mb_proc_session *s = seq->private;
2258 kfree(s->history);
2259 kfree(s);
2260 return seq_release(inode, file);
2261}
2262
2263static ssize_t ext4_mb_seq_history_write(struct file *file,
2264 const char __user *buffer,
2265 size_t count, loff_t *ppos)
2266{
2267 struct seq_file *seq = (struct seq_file *)file->private_data;
2268 struct ext4_mb_proc_session *s = seq->private;
2269 struct super_block *sb = s->sb;
2270 char str[32];
2271 int value;
2272
2273 if (count >= sizeof(str)) {
2274 printk(KERN_ERR "EXT4-fs: %s string too long, max %u bytes\n",
2275 "mb_history", (int)sizeof(str));
2276 return -EOVERFLOW;
2277 }
2278
2279 if (copy_from_user(str, buffer, count))
2280 return -EFAULT;
2281
2282 value = simple_strtol(str, NULL, 0);
2283 if (value < 0)
2284 return -ERANGE;
2285 EXT4_SB(sb)->s_mb_history_filter = value;
2286
2287 return count;
2288}
2289
2290static struct file_operations ext4_mb_seq_history_fops = {
2291 .owner = THIS_MODULE,
2292 .open = ext4_mb_seq_history_open,
2293 .read = seq_read,
2294 .write = ext4_mb_seq_history_write,
2295 .llseek = seq_lseek,
2296 .release = ext4_mb_seq_history_release,
2297};
2298
2299static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) 2099static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2300{ 2100{
2301 struct super_block *sb = seq->private; 2101 struct super_block *sb = seq->private;
@@ -2328,7 +2128,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
2328 struct ext4_buddy e4b; 2128 struct ext4_buddy e4b;
2329 struct sg { 2129 struct sg {
2330 struct ext4_group_info info; 2130 struct ext4_group_info info;
2331 unsigned short counters[16]; 2131 ext4_grpblk_t counters[16];
2332 } sg; 2132 } sg;
2333 2133
2334 group--; 2134 group--;
@@ -2366,7 +2166,7 @@ static void ext4_mb_seq_groups_stop(struct seq_file *seq, void *v)
2366{ 2166{
2367} 2167}
2368 2168
2369static struct seq_operations ext4_mb_seq_groups_ops = { 2169static const struct seq_operations ext4_mb_seq_groups_ops = {
2370 .start = ext4_mb_seq_groups_start, 2170 .start = ext4_mb_seq_groups_start,
2371 .next = ext4_mb_seq_groups_next, 2171 .next = ext4_mb_seq_groups_next,
2372 .stop = ext4_mb_seq_groups_stop, 2172 .stop = ext4_mb_seq_groups_stop,
@@ -2387,7 +2187,7 @@ static int ext4_mb_seq_groups_open(struct inode *inode, struct file *file)
2387 2187
2388} 2188}
2389 2189
2390static struct file_operations ext4_mb_seq_groups_fops = { 2190static const struct file_operations ext4_mb_seq_groups_fops = {
2391 .owner = THIS_MODULE, 2191 .owner = THIS_MODULE,
2392 .open = ext4_mb_seq_groups_open, 2192 .open = ext4_mb_seq_groups_open,
2393 .read = seq_read, 2193 .read = seq_read,
@@ -2395,82 +2195,6 @@ static struct file_operations ext4_mb_seq_groups_fops = {
2395 .release = seq_release, 2195 .release = seq_release,
2396}; 2196};
2397 2197
2398static void ext4_mb_history_release(struct super_block *sb)
2399{
2400 struct ext4_sb_info *sbi = EXT4_SB(sb);
2401
2402 if (sbi->s_proc != NULL) {
2403 remove_proc_entry("mb_groups", sbi->s_proc);
2404 if (sbi->s_mb_history_max)
2405 remove_proc_entry("mb_history", sbi->s_proc);
2406 }
2407 kfree(sbi->s_mb_history);
2408}
2409
2410static void ext4_mb_history_init(struct super_block *sb)
2411{
2412 struct ext4_sb_info *sbi = EXT4_SB(sb);
2413 int i;
2414
2415 if (sbi->s_proc != NULL) {
2416 if (sbi->s_mb_history_max)
2417 proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
2418 &ext4_mb_seq_history_fops, sb);
2419 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
2420 &ext4_mb_seq_groups_fops, sb);
2421 }
2422
2423 sbi->s_mb_history_cur = 0;
2424 spin_lock_init(&sbi->s_mb_history_lock);
2425 i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history);
2426 sbi->s_mb_history = i ? kzalloc(i, GFP_KERNEL) : NULL;
2427 /* if we can't allocate history, then we simple won't use it */
2428}
2429
2430static noinline_for_stack void
2431ext4_mb_store_history(struct ext4_allocation_context *ac)
2432{
2433 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2434 struct ext4_mb_history h;
2435
2436 if (sbi->s_mb_history == NULL)
2437 return;
2438
2439 if (!(ac->ac_op & sbi->s_mb_history_filter))
2440 return;
2441
2442 h.op = ac->ac_op;
2443 h.pid = current->pid;
2444 h.ino = ac->ac_inode ? ac->ac_inode->i_ino : 0;
2445 h.orig = ac->ac_o_ex;
2446 h.result = ac->ac_b_ex;
2447 h.flags = ac->ac_flags;
2448 h.found = ac->ac_found;
2449 h.groups = ac->ac_groups_scanned;
2450 h.cr = ac->ac_criteria;
2451 h.tail = ac->ac_tail;
2452 h.buddy = ac->ac_buddy;
2453 h.merged = 0;
2454 if (ac->ac_op == EXT4_MB_HISTORY_ALLOC) {
2455 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
2456 ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
2457 h.merged = 1;
2458 h.goal = ac->ac_g_ex;
2459 h.result = ac->ac_f_ex;
2460 }
2461
2462 spin_lock(&sbi->s_mb_history_lock);
2463 memcpy(sbi->s_mb_history + sbi->s_mb_history_cur, &h, sizeof(h));
2464 if (++sbi->s_mb_history_cur >= sbi->s_mb_history_max)
2465 sbi->s_mb_history_cur = 0;
2466 spin_unlock(&sbi->s_mb_history_lock);
2467}
2468
2469#else
2470#define ext4_mb_history_release(sb)
2471#define ext4_mb_history_init(sb)
2472#endif
2473
2474 2198
2475/* Create and initialize ext4_group_info data for the given group. */ 2199/* Create and initialize ext4_group_info data for the given group. */
2476int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, 2200int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
@@ -2532,7 +2256,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2532 2256
2533 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); 2257 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2534 init_rwsem(&meta_group_info[i]->alloc_sem); 2258 init_rwsem(&meta_group_info[i]->alloc_sem);
2535 meta_group_info[i]->bb_free_root.rb_node = NULL;; 2259 meta_group_info[i]->bb_free_root.rb_node = NULL;
2536 2260
2537#ifdef DOUBLE_CHECK 2261#ifdef DOUBLE_CHECK
2538 { 2262 {
@@ -2558,26 +2282,15 @@ exit_meta_group_info:
2558 return -ENOMEM; 2282 return -ENOMEM;
2559} /* ext4_mb_add_groupinfo */ 2283} /* ext4_mb_add_groupinfo */
2560 2284
2561/*
2562 * Update an existing group.
2563 * This function is used for online resize
2564 */
2565void ext4_mb_update_group_info(struct ext4_group_info *grp, ext4_grpblk_t add)
2566{
2567 grp->bb_free += add;
2568}
2569
2570static int ext4_mb_init_backend(struct super_block *sb) 2285static int ext4_mb_init_backend(struct super_block *sb)
2571{ 2286{
2572 ext4_group_t ngroups = ext4_get_groups_count(sb); 2287 ext4_group_t ngroups = ext4_get_groups_count(sb);
2573 ext4_group_t i; 2288 ext4_group_t i;
2574 int metalen;
2575 struct ext4_sb_info *sbi = EXT4_SB(sb); 2289 struct ext4_sb_info *sbi = EXT4_SB(sb);
2576 struct ext4_super_block *es = sbi->s_es; 2290 struct ext4_super_block *es = sbi->s_es;
2577 int num_meta_group_infos; 2291 int num_meta_group_infos;
2578 int num_meta_group_infos_max; 2292 int num_meta_group_infos_max;
2579 int array_size; 2293 int array_size;
2580 struct ext4_group_info **meta_group_info;
2581 struct ext4_group_desc *desc; 2294 struct ext4_group_desc *desc;
2582 2295
2583 /* This is the number of blocks used by GDT */ 2296 /* This is the number of blocks used by GDT */
@@ -2622,22 +2335,6 @@ static int ext4_mb_init_backend(struct super_block *sb)
2622 goto err_freesgi; 2335 goto err_freesgi;
2623 } 2336 }
2624 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; 2337 EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
2625
2626 metalen = sizeof(*meta_group_info) << EXT4_DESC_PER_BLOCK_BITS(sb);
2627 for (i = 0; i < num_meta_group_infos; i++) {
2628 if ((i + 1) == num_meta_group_infos)
2629 metalen = sizeof(*meta_group_info) *
2630 (ngroups -
2631 (i << EXT4_DESC_PER_BLOCK_BITS(sb)));
2632 meta_group_info = kmalloc(metalen, GFP_KERNEL);
2633 if (meta_group_info == NULL) {
2634 printk(KERN_ERR "EXT4-fs: can't allocate mem for a "
2635 "buddy group\n");
2636 goto err_freemeta;
2637 }
2638 sbi->s_group_info[i] = meta_group_info;
2639 }
2640
2641 for (i = 0; i < ngroups; i++) { 2338 for (i = 0; i < ngroups; i++) {
2642 desc = ext4_get_group_desc(sb, i, NULL); 2339 desc = ext4_get_group_desc(sb, i, NULL);
2643 if (desc == NULL) { 2340 if (desc == NULL) {
@@ -2655,7 +2352,6 @@ err_freebuddy:
2655 while (i-- > 0) 2352 while (i-- > 0)
2656 kfree(ext4_get_group_info(sb, i)); 2353 kfree(ext4_get_group_info(sb, i));
2657 i = num_meta_group_infos; 2354 i = num_meta_group_infos;
2658err_freemeta:
2659 while (i-- > 0) 2355 while (i-- > 0)
2660 kfree(sbi->s_group_info[i]); 2356 kfree(sbi->s_group_info[i]);
2661 iput(sbi->s_buddy_cache); 2357 iput(sbi->s_buddy_cache);
@@ -2672,14 +2368,14 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2672 unsigned max; 2368 unsigned max;
2673 int ret; 2369 int ret;
2674 2370
2675 i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); 2371 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
2676 2372
2677 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); 2373 sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
2678 if (sbi->s_mb_offsets == NULL) { 2374 if (sbi->s_mb_offsets == NULL) {
2679 return -ENOMEM; 2375 return -ENOMEM;
2680 } 2376 }
2681 2377
2682 i = (sb->s_blocksize_bits + 2) * sizeof(unsigned int); 2378 i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
2683 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); 2379 sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
2684 if (sbi->s_mb_maxs == NULL) { 2380 if (sbi->s_mb_maxs == NULL) {
2685 kfree(sbi->s_mb_offsets); 2381 kfree(sbi->s_mb_offsets);
@@ -2717,7 +2413,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2717 sbi->s_mb_stats = MB_DEFAULT_STATS; 2413 sbi->s_mb_stats = MB_DEFAULT_STATS;
2718 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; 2414 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
2719 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; 2415 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
2720 sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT;
2721 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; 2416 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
2722 2417
2723 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); 2418 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
@@ -2735,12 +2430,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2735 spin_lock_init(&lg->lg_prealloc_lock); 2430 spin_lock_init(&lg->lg_prealloc_lock);
2736 } 2431 }
2737 2432
2738 ext4_mb_history_init(sb); 2433 if (sbi->s_proc)
2434 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
2435 &ext4_mb_seq_groups_fops, sb);
2739 2436
2740 if (sbi->s_journal) 2437 if (sbi->s_journal)
2741 sbi->s_journal->j_commit_callback = release_blocks_on_commit; 2438 sbi->s_journal->j_commit_callback = release_blocks_on_commit;
2742
2743 printk(KERN_INFO "EXT4-fs: mballoc enabled\n");
2744 return 0; 2439 return 0;
2745} 2440}
2746 2441
@@ -2758,7 +2453,7 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
2758 kmem_cache_free(ext4_pspace_cachep, pa); 2453 kmem_cache_free(ext4_pspace_cachep, pa);
2759 } 2454 }
2760 if (count) 2455 if (count)
2761 mb_debug("mballoc: %u PAs left\n", count); 2456 mb_debug(1, "mballoc: %u PAs left\n", count);
2762 2457
2763} 2458}
2764 2459
@@ -2817,7 +2512,8 @@ int ext4_mb_release(struct super_block *sb)
2817 } 2512 }
2818 2513
2819 free_percpu(sbi->s_locality_groups); 2514 free_percpu(sbi->s_locality_groups);
2820 ext4_mb_history_release(sb); 2515 if (sbi->s_proc)
2516 remove_proc_entry("mb_groups", sbi->s_proc);
2821 2517
2822 return 0; 2518 return 0;
2823} 2519}
@@ -2839,7 +2535,7 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2839 list_for_each_safe(l, ltmp, &txn->t_private_list) { 2535 list_for_each_safe(l, ltmp, &txn->t_private_list) {
2840 entry = list_entry(l, struct ext4_free_data, list); 2536 entry = list_entry(l, struct ext4_free_data, list);
2841 2537
2842 mb_debug("gonna free %u blocks in group %u (0x%p):", 2538 mb_debug(1, "gonna free %u blocks in group %u (0x%p):",
2843 entry->count, entry->group, entry); 2539 entry->count, entry->group, entry);
2844 2540
2845 err = ext4_mb_load_buddy(sb, entry->group, &e4b); 2541 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
@@ -2874,9 +2570,43 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2874 ext4_mb_release_desc(&e4b); 2570 ext4_mb_release_desc(&e4b);
2875 } 2571 }
2876 2572
2877 mb_debug("freed %u blocks in %u structures\n", count, count2); 2573 mb_debug(1, "freed %u blocks in %u structures\n", count, count2);
2574}
2575
2576#ifdef CONFIG_EXT4_DEBUG
2577u8 mb_enable_debug __read_mostly;
2578
2579static struct dentry *debugfs_dir;
2580static struct dentry *debugfs_debug;
2581
2582static void __init ext4_create_debugfs_entry(void)
2583{
2584 debugfs_dir = debugfs_create_dir("ext4", NULL);
2585 if (debugfs_dir)
2586 debugfs_debug = debugfs_create_u8("mballoc-debug",
2587 S_IRUGO | S_IWUSR,
2588 debugfs_dir,
2589 &mb_enable_debug);
2590}
2591
2592static void ext4_remove_debugfs_entry(void)
2593{
2594 debugfs_remove(debugfs_debug);
2595 debugfs_remove(debugfs_dir);
2878} 2596}
2879 2597
2598#else
2599
2600static void __init ext4_create_debugfs_entry(void)
2601{
2602}
2603
2604static void ext4_remove_debugfs_entry(void)
2605{
2606}
2607
2608#endif
2609
2880int __init init_ext4_mballoc(void) 2610int __init init_ext4_mballoc(void)
2881{ 2611{
2882 ext4_pspace_cachep = 2612 ext4_pspace_cachep =
@@ -2904,6 +2634,7 @@ int __init init_ext4_mballoc(void)
2904 kmem_cache_destroy(ext4_ac_cachep); 2634 kmem_cache_destroy(ext4_ac_cachep);
2905 return -ENOMEM; 2635 return -ENOMEM;
2906 } 2636 }
2637 ext4_create_debugfs_entry();
2907 return 0; 2638 return 0;
2908} 2639}
2909 2640
@@ -2917,6 +2648,7 @@ void exit_ext4_mballoc(void)
2917 kmem_cache_destroy(ext4_pspace_cachep); 2648 kmem_cache_destroy(ext4_pspace_cachep);
2918 kmem_cache_destroy(ext4_ac_cachep); 2649 kmem_cache_destroy(ext4_ac_cachep);
2919 kmem_cache_destroy(ext4_free_ext_cachep); 2650 kmem_cache_destroy(ext4_free_ext_cachep);
2651 ext4_remove_debugfs_entry();
2920} 2652}
2921 2653
2922 2654
@@ -3061,7 +2793,7 @@ static void ext4_mb_normalize_group_request(struct ext4_allocation_context *ac)
3061 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe; 2793 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_stripe;
3062 else 2794 else
3063 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc; 2795 ac->ac_g_ex.fe_len = EXT4_SB(sb)->s_mb_group_prealloc;
3064 mb_debug("#%u: goal %u blocks for locality group\n", 2796 mb_debug(1, "#%u: goal %u blocks for locality group\n",
3065 current->pid, ac->ac_g_ex.fe_len); 2797 current->pid, ac->ac_g_ex.fe_len);
3066} 2798}
3067 2799
@@ -3180,23 +2912,18 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3180 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end || 2912 BUG_ON(!(ac->ac_o_ex.fe_logical >= pa_end ||
3181 ac->ac_o_ex.fe_logical < pa->pa_lstart)); 2913 ac->ac_o_ex.fe_logical < pa->pa_lstart));
3182 2914
3183 /* skip PA normalized request doesn't overlap with */ 2915 /* skip PAs this normalized request doesn't overlap with */
3184 if (pa->pa_lstart >= end) { 2916 if (pa->pa_lstart >= end || pa_end <= start) {
3185 spin_unlock(&pa->pa_lock);
3186 continue;
3187 }
3188 if (pa_end <= start) {
3189 spin_unlock(&pa->pa_lock); 2917 spin_unlock(&pa->pa_lock);
3190 continue; 2918 continue;
3191 } 2919 }
3192 BUG_ON(pa->pa_lstart <= start && pa_end >= end); 2920 BUG_ON(pa->pa_lstart <= start && pa_end >= end);
3193 2921
2922 /* adjust start or end to be adjacent to this pa */
3194 if (pa_end <= ac->ac_o_ex.fe_logical) { 2923 if (pa_end <= ac->ac_o_ex.fe_logical) {
3195 BUG_ON(pa_end < start); 2924 BUG_ON(pa_end < start);
3196 start = pa_end; 2925 start = pa_end;
3197 } 2926 } else if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
3198
3199 if (pa->pa_lstart > ac->ac_o_ex.fe_logical) {
3200 BUG_ON(pa->pa_lstart > end); 2927 BUG_ON(pa->pa_lstart > end);
3201 end = pa->pa_lstart; 2928 end = pa->pa_lstart;
3202 } 2929 }
@@ -3251,7 +2978,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3251 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL; 2978 ac->ac_flags |= EXT4_MB_HINT_TRY_GOAL;
3252 } 2979 }
3253 2980
3254 mb_debug("goal: %u(was %u) blocks at %u\n", (unsigned) size, 2981 mb_debug(1, "goal: %u(was %u) blocks at %u\n", (unsigned) size,
3255 (unsigned) orig_size, (unsigned) start); 2982 (unsigned) orig_size, (unsigned) start);
3256} 2983}
3257 2984
@@ -3272,7 +2999,10 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
3272 atomic_inc(&sbi->s_bal_breaks); 2999 atomic_inc(&sbi->s_bal_breaks);
3273 } 3000 }
3274 3001
3275 ext4_mb_store_history(ac); 3002 if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
3003 trace_ext4_mballoc_alloc(ac);
3004 else
3005 trace_ext4_mballoc_prealloc(ac);
3276} 3006}
3277 3007
3278/* 3008/*
@@ -3300,7 +3030,7 @@ static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
3300 BUG_ON(pa->pa_free < len); 3030 BUG_ON(pa->pa_free < len);
3301 pa->pa_free -= len; 3031 pa->pa_free -= len;
3302 3032
3303 mb_debug("use %llu/%u from inode pa %p\n", start, len, pa); 3033 mb_debug(1, "use %llu/%u from inode pa %p\n", start, len, pa);
3304} 3034}
3305 3035
3306/* 3036/*
@@ -3324,7 +3054,7 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac,
3324 * in on-disk bitmap -- see ext4_mb_release_context() 3054 * in on-disk bitmap -- see ext4_mb_release_context()
3325 * Other CPUs are prevented from allocating from this pa by lg_mutex 3055 * Other CPUs are prevented from allocating from this pa by lg_mutex
3326 */ 3056 */
3327 mb_debug("use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa); 3057 mb_debug(1, "use %u/%u from group pa %p\n", pa->pa_lstart-len, len, pa);
3328} 3058}
3329 3059
3330/* 3060/*
@@ -3382,6 +3112,11 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac)
3382 ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len) 3112 ac->ac_o_ex.fe_logical >= pa->pa_lstart + pa->pa_len)
3383 continue; 3113 continue;
3384 3114
3115 /* non-extent files can't have physical blocks past 2^32 */
3116 if (!(EXT4_I(ac->ac_inode)->i_flags & EXT4_EXTENTS_FL) &&
3117 pa->pa_pstart + pa->pa_len > EXT4_MAX_BLOCK_FILE_PHYS)
3118 continue;
3119
3385 /* found preallocated blocks, use them */ 3120 /* found preallocated blocks, use them */
3386 spin_lock(&pa->pa_lock); 3121 spin_lock(&pa->pa_lock);
3387 if (pa->pa_deleted == 0 && pa->pa_free) { 3122 if (pa->pa_deleted == 0 && pa->pa_free) {
@@ -3503,7 +3238,7 @@ void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
3503 preallocated += len; 3238 preallocated += len;
3504 count++; 3239 count++;
3505 } 3240 }
3506 mb_debug("prellocated %u for group %u\n", preallocated, group); 3241 mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
3507} 3242}
3508 3243
3509static void ext4_mb_pa_callback(struct rcu_head *head) 3244static void ext4_mb_pa_callback(struct rcu_head *head)
@@ -3638,7 +3373,7 @@ ext4_mb_new_inode_pa(struct ext4_allocation_context *ac)
3638 pa->pa_deleted = 0; 3373 pa->pa_deleted = 0;
3639 pa->pa_type = MB_INODE_PA; 3374 pa->pa_type = MB_INODE_PA;
3640 3375
3641 mb_debug("new inode pa %p: %llu/%u for %u\n", pa, 3376 mb_debug(1, "new inode pa %p: %llu/%u for %u\n", pa,
3642 pa->pa_pstart, pa->pa_len, pa->pa_lstart); 3377 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3643 trace_ext4_mb_new_inode_pa(ac, pa); 3378 trace_ext4_mb_new_inode_pa(ac, pa);
3644 3379
@@ -3698,7 +3433,7 @@ ext4_mb_new_group_pa(struct ext4_allocation_context *ac)
3698 pa->pa_deleted = 0; 3433 pa->pa_deleted = 0;
3699 pa->pa_type = MB_GROUP_PA; 3434 pa->pa_type = MB_GROUP_PA;
3700 3435
3701 mb_debug("new group pa %p: %llu/%u for %u\n", pa, 3436 mb_debug(1, "new group pa %p: %llu/%u for %u\n", pa,
3702 pa->pa_pstart, pa->pa_len, pa->pa_lstart); 3437 pa->pa_pstart, pa->pa_len, pa->pa_lstart);
3703 trace_ext4_mb_new_group_pa(ac, pa); 3438 trace_ext4_mb_new_group_pa(ac, pa);
3704 3439
@@ -3767,7 +3502,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3767 if (ac) { 3502 if (ac) {
3768 ac->ac_sb = sb; 3503 ac->ac_sb = sb;
3769 ac->ac_inode = pa->pa_inode; 3504 ac->ac_inode = pa->pa_inode;
3770 ac->ac_op = EXT4_MB_HISTORY_DISCARD;
3771 } 3505 }
3772 3506
3773 while (bit < end) { 3507 while (bit < end) {
@@ -3777,7 +3511,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3777 next = mb_find_next_bit(bitmap_bh->b_data, end, bit); 3511 next = mb_find_next_bit(bitmap_bh->b_data, end, bit);
3778 start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit + 3512 start = group * EXT4_BLOCKS_PER_GROUP(sb) + bit +
3779 le32_to_cpu(sbi->s_es->s_first_data_block); 3513 le32_to_cpu(sbi->s_es->s_first_data_block);
3780 mb_debug(" free preallocated %u/%u in group %u\n", 3514 mb_debug(1, " free preallocated %u/%u in group %u\n",
3781 (unsigned) start, (unsigned) next - bit, 3515 (unsigned) start, (unsigned) next - bit,
3782 (unsigned) group); 3516 (unsigned) group);
3783 free += next - bit; 3517 free += next - bit;
@@ -3787,7 +3521,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3787 ac->ac_b_ex.fe_start = bit; 3521 ac->ac_b_ex.fe_start = bit;
3788 ac->ac_b_ex.fe_len = next - bit; 3522 ac->ac_b_ex.fe_len = next - bit;
3789 ac->ac_b_ex.fe_logical = 0; 3523 ac->ac_b_ex.fe_logical = 0;
3790 ext4_mb_store_history(ac); 3524 trace_ext4_mballoc_discard(ac);
3791 } 3525 }
3792 3526
3793 trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit, 3527 trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit,
@@ -3822,9 +3556,6 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3822 ext4_group_t group; 3556 ext4_group_t group;
3823 ext4_grpblk_t bit; 3557 ext4_grpblk_t bit;
3824 3558
3825 if (ac)
3826 ac->ac_op = EXT4_MB_HISTORY_DISCARD;
3827
3828 trace_ext4_mb_release_group_pa(ac, pa); 3559 trace_ext4_mb_release_group_pa(ac, pa);
3829 BUG_ON(pa->pa_deleted == 0); 3560 BUG_ON(pa->pa_deleted == 0);
3830 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); 3561 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
@@ -3839,7 +3570,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3839 ac->ac_b_ex.fe_start = bit; 3570 ac->ac_b_ex.fe_start = bit;
3840 ac->ac_b_ex.fe_len = pa->pa_len; 3571 ac->ac_b_ex.fe_len = pa->pa_len;
3841 ac->ac_b_ex.fe_logical = 0; 3572 ac->ac_b_ex.fe_logical = 0;
3842 ext4_mb_store_history(ac); 3573 trace_ext4_mballoc_discard(ac);
3843 } 3574 }
3844 3575
3845 return 0; 3576 return 0;
@@ -3868,7 +3599,7 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
3868 int busy = 0; 3599 int busy = 0;
3869 int free = 0; 3600 int free = 0;
3870 3601
3871 mb_debug("discard preallocation for group %u\n", group); 3602 mb_debug(1, "discard preallocation for group %u\n", group);
3872 3603
3873 if (list_empty(&grp->bb_prealloc_list)) 3604 if (list_empty(&grp->bb_prealloc_list))
3874 return 0; 3605 return 0;
@@ -3992,7 +3723,7 @@ void ext4_discard_preallocations(struct inode *inode)
3992 return; 3723 return;
3993 } 3724 }
3994 3725
3995 mb_debug("discard preallocation for inode %lu\n", inode->i_ino); 3726 mb_debug(1, "discard preallocation for inode %lu\n", inode->i_ino);
3996 trace_ext4_discard_preallocations(inode); 3727 trace_ext4_discard_preallocations(inode);
3997 3728
3998 INIT_LIST_HEAD(&list); 3729 INIT_LIST_HEAD(&list);
@@ -4097,7 +3828,7 @@ static void ext4_mb_return_to_preallocation(struct inode *inode,
4097{ 3828{
4098 BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list)); 3829 BUG_ON(!list_empty(&EXT4_I(inode)->i_prealloc_list));
4099} 3830}
4100#ifdef MB_DEBUG 3831#ifdef CONFIG_EXT4_DEBUG
4101static void ext4_mb_show_ac(struct ext4_allocation_context *ac) 3832static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4102{ 3833{
4103 struct super_block *sb = ac->ac_sb; 3834 struct super_block *sb = ac->ac_sb;
@@ -4139,14 +3870,14 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac)
4139 ext4_get_group_no_and_offset(sb, pa->pa_pstart, 3870 ext4_get_group_no_and_offset(sb, pa->pa_pstart,
4140 NULL, &start); 3871 NULL, &start);
4141 spin_unlock(&pa->pa_lock); 3872 spin_unlock(&pa->pa_lock);
4142 printk(KERN_ERR "PA:%lu:%d:%u \n", i, 3873 printk(KERN_ERR "PA:%u:%d:%u \n", i,
4143 start, pa->pa_len); 3874 start, pa->pa_len);
4144 } 3875 }
4145 ext4_unlock_group(sb, i); 3876 ext4_unlock_group(sb, i);
4146 3877
4147 if (grp->bb_free == 0) 3878 if (grp->bb_free == 0)
4148 continue; 3879 continue;
4149 printk(KERN_ERR "%lu: %d/%d \n", 3880 printk(KERN_ERR "%u: %d/%d \n",
4150 i, grp->bb_free, grp->bb_fragments); 3881 i, grp->bb_free, grp->bb_fragments);
4151 } 3882 }
4152 printk(KERN_ERR "\n"); 3883 printk(KERN_ERR "\n");
@@ -4174,16 +3905,26 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
4174 if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) 3905 if (!(ac->ac_flags & EXT4_MB_HINT_DATA))
4175 return; 3906 return;
4176 3907
3908 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY))
3909 return;
3910
4177 size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len; 3911 size = ac->ac_o_ex.fe_logical + ac->ac_o_ex.fe_len;
4178 isize = i_size_read(ac->ac_inode) >> bsbits; 3912 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
4179 size = max(size, isize); 3913 >> bsbits;
4180 3914
4181 /* don't use group allocation for large files */ 3915 if ((size == isize) &&
4182 if (size >= sbi->s_mb_stream_request) 3916 !ext4_fs_is_busy(sbi) &&
3917 (atomic_read(&ac->ac_inode->i_writecount) == 0)) {
3918 ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
4183 return; 3919 return;
3920 }
4184 3921
4185 if (unlikely(ac->ac_flags & EXT4_MB_HINT_GOAL_ONLY)) 3922 /* don't use group allocation for large files */
3923 size = max(size, isize);
3924 if (size >= sbi->s_mb_stream_request) {
3925 ac->ac_flags |= EXT4_MB_STREAM_ALLOC;
4186 return; 3926 return;
3927 }
4187 3928
4188 BUG_ON(ac->ac_lg != NULL); 3929 BUG_ON(ac->ac_lg != NULL);
4189 /* 3930 /*
@@ -4246,7 +3987,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4246 * locality group. this is a policy, actually */ 3987 * locality group. this is a policy, actually */
4247 ext4_mb_group_or_file(ac); 3988 ext4_mb_group_or_file(ac);
4248 3989
4249 mb_debug("init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, " 3990 mb_debug(1, "init ac: %u blocks @ %u, goal %u, flags %x, 2^%d, "
4250 "left: %u/%u, right %u/%u to %swritable\n", 3991 "left: %u/%u, right %u/%u to %swritable\n",
4251 (unsigned) ar->len, (unsigned) ar->logical, 3992 (unsigned) ar->len, (unsigned) ar->logical,
4252 (unsigned) ar->goal, ac->ac_flags, ac->ac_2order, 3993 (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
@@ -4268,7 +4009,7 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
4268 struct ext4_prealloc_space *pa, *tmp; 4009 struct ext4_prealloc_space *pa, *tmp;
4269 struct ext4_allocation_context *ac; 4010 struct ext4_allocation_context *ac;
4270 4011
4271 mb_debug("discard locality group preallocation\n"); 4012 mb_debug(1, "discard locality group preallocation\n");
4272 4013
4273 INIT_LIST_HEAD(&discard_list); 4014 INIT_LIST_HEAD(&discard_list);
4274 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4015 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
@@ -4720,7 +4461,6 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4720 4461
4721 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4462 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4722 if (ac) { 4463 if (ac) {
4723 ac->ac_op = EXT4_MB_HISTORY_FREE;
4724 ac->ac_inode = inode; 4464 ac->ac_inode = inode;
4725 ac->ac_sb = sb; 4465 ac->ac_sb = sb;
4726 } 4466 }
@@ -4787,7 +4527,7 @@ do_more:
4787 ac->ac_b_ex.fe_group = block_group; 4527 ac->ac_b_ex.fe_group = block_group;
4788 ac->ac_b_ex.fe_start = bit; 4528 ac->ac_b_ex.fe_start = bit;
4789 ac->ac_b_ex.fe_len = count; 4529 ac->ac_b_ex.fe_len = count;
4790 ext4_mb_store_history(ac); 4530 trace_ext4_mballoc_free(ac);
4791 } 4531 }
4792 4532
4793 err = ext4_mb_load_buddy(sb, block_group, &e4b); 4533 err = ext4_mb_load_buddy(sb, block_group, &e4b);