aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTheodore Ts'o <tytso@mit.edu>2009-09-30 00:32:42 -0400
committerTheodore Ts'o <tytso@mit.edu>2009-09-30 00:32:42 -0400
commit296c355cd6443d89fa251885a8d78778fe111dc4 (patch)
tree5cf7c8b115617dc3829a16a5969894d37b73173c
parent90576c0b9a0b5323fc4bd7f23f49be0d234f36d1 (diff)
ext4: Use tracepoints for mb_history trace file
The /proc/fs/ext4/<dev>/mb_history was maintained manually, and had a number of problems: it required a largish amount of memory to be allocated for each ext4 filesystem, and the s_mb_history_lock introduced a CPU contention problem. By ripping out the mb_history code and replacing it with ftrace tracepoints, and we get more functionality: timestamps, event filtering, the ability to correlate mballoc history with other ext4 tracepoints, etc. Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r--Documentation/filesystems/proc.txt1
-rw-r--r--fs/ext4/ext4.h14
-rw-r--r--fs/ext4/mballoc.c301
-rw-r--r--fs/ext4/mballoc.h33
-rw-r--r--fs/ext4/super.c18
-rw-r--r--include/trace/events/ext4.h163
6 files changed, 182 insertions, 348 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index b5aee7838a00..2c48f945546b 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -1113,7 +1113,6 @@ Table 1-12: Files in /proc/fs/ext4/<devname>
1113.............................................................................. 1113..............................................................................
1114 File Content 1114 File Content
1115 mb_groups details of multiblock allocator buddy cache of free blocks 1115 mb_groups details of multiblock allocator buddy cache of free blocks
1116 mb_history multiblock allocation history
1117.............................................................................. 1116..............................................................................
1118 1117
1119 1118
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b491576e11c3..c508cf7be75c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -65,6 +65,12 @@ typedef __u32 ext4_lblk_t;
65/* data type for block group number */ 65/* data type for block group number */
66typedef unsigned int ext4_group_t; 66typedef unsigned int ext4_group_t;
67 67
68/*
69 * Flags used in mballoc's allocation_context flags field.
70 *
71 * Also used to show what's going on for debugging purposes when the
72 * flag field is exported via the traceport interface
73 */
68 74
69/* prefer goal again. length */ 75/* prefer goal again. length */
70#define EXT4_MB_HINT_MERGE 0x0001 76#define EXT4_MB_HINT_MERGE 0x0001
@@ -971,14 +977,6 @@ struct ext4_sb_info {
971 unsigned long s_mb_last_group; 977 unsigned long s_mb_last_group;
972 unsigned long s_mb_last_start; 978 unsigned long s_mb_last_start;
973 979
974 /* history to debug policy */
975 struct ext4_mb_history *s_mb_history;
976 int s_mb_history_cur;
977 int s_mb_history_max;
978 int s_mb_history_num;
979 spinlock_t s_mb_history_lock;
980 int s_mb_history_filter;
981
982 /* stats for buddy allocator */ 980 /* stats for buddy allocator */
983 spinlock_t s_mb_pa_lock; 981 spinlock_t s_mb_pa_lock;
984 atomic_t s_bal_reqs; /* number of reqs with len > 1 */ 982 atomic_t s_bal_reqs; /* number of reqs with len > 1 */
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 3e2320e66721..bba12824defa 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2096,207 +2096,6 @@ out:
2096 return err; 2096 return err;
2097} 2097}
2098 2098
2099#ifdef EXT4_MB_HISTORY
2100struct ext4_mb_proc_session {
2101 struct ext4_mb_history *history;
2102 struct super_block *sb;
2103 int start;
2104 int max;
2105};
2106
2107static void *ext4_mb_history_skip_empty(struct ext4_mb_proc_session *s,
2108 struct ext4_mb_history *hs,
2109 int first)
2110{
2111 if (hs == s->history + s->max)
2112 hs = s->history;
2113 if (!first && hs == s->history + s->start)
2114 return NULL;
2115 while (hs->orig.fe_len == 0) {
2116 hs++;
2117 if (hs == s->history + s->max)
2118 hs = s->history;
2119 if (hs == s->history + s->start)
2120 return NULL;
2121 }
2122 return hs;
2123}
2124
2125static void *ext4_mb_seq_history_start(struct seq_file *seq, loff_t *pos)
2126{
2127 struct ext4_mb_proc_session *s = seq->private;
2128 struct ext4_mb_history *hs;
2129 int l = *pos;
2130
2131 if (l == 0)
2132 return SEQ_START_TOKEN;
2133 hs = ext4_mb_history_skip_empty(s, s->history + s->start, 1);
2134 if (!hs)
2135 return NULL;
2136 while (--l && (hs = ext4_mb_history_skip_empty(s, ++hs, 0)) != NULL);
2137 return hs;
2138}
2139
2140static void *ext4_mb_seq_history_next(struct seq_file *seq, void *v,
2141 loff_t *pos)
2142{
2143 struct ext4_mb_proc_session *s = seq->private;
2144 struct ext4_mb_history *hs = v;
2145
2146 ++*pos;
2147 if (v == SEQ_START_TOKEN)
2148 return ext4_mb_history_skip_empty(s, s->history + s->start, 1);
2149 else
2150 return ext4_mb_history_skip_empty(s, ++hs, 0);
2151}
2152
2153static int ext4_mb_seq_history_show(struct seq_file *seq, void *v)
2154{
2155 char buf[25], buf2[25], buf3[25], *fmt;
2156 struct ext4_mb_history *hs = v;
2157
2158 if (v == SEQ_START_TOKEN) {
2159 seq_printf(seq, "%-5s %-8s %-23s %-23s %-23s %-5s "
2160 "%-5s %-2s %-6s %-5s %-5s %-6s\n",
2161 "pid", "inode", "original", "goal", "result", "found",
2162 "grps", "cr", "flags", "merge", "tail", "broken");
2163 return 0;
2164 }
2165
2166 if (hs->op == EXT4_MB_HISTORY_ALLOC) {
2167 fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u "
2168 "0x%04x %-5s %-5u %-6u\n";
2169 sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group,
2170 hs->result.fe_start, hs->result.fe_len,
2171 hs->result.fe_logical);
2172 sprintf(buf, "%u/%d/%u@%u", hs->orig.fe_group,
2173 hs->orig.fe_start, hs->orig.fe_len,
2174 hs->orig.fe_logical);
2175 sprintf(buf3, "%u/%d/%u@%u", hs->goal.fe_group,
2176 hs->goal.fe_start, hs->goal.fe_len,
2177 hs->goal.fe_logical);
2178 seq_printf(seq, fmt, hs->pid, hs->ino, buf, buf3, buf2,
2179 hs->found, hs->groups, hs->cr, hs->flags,
2180 hs->merged ? "M" : "", hs->tail,
2181 hs->buddy ? 1 << hs->buddy : 0);
2182 } else if (hs->op == EXT4_MB_HISTORY_PREALLOC) {
2183 fmt = "%-5u %-8u %-23s %-23s %-23s\n";
2184 sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group,
2185 hs->result.fe_start, hs->result.fe_len,
2186 hs->result.fe_logical);
2187 sprintf(buf, "%u/%d/%u@%u", hs->orig.fe_group,
2188 hs->orig.fe_start, hs->orig.fe_len,
2189 hs->orig.fe_logical);
2190 seq_printf(seq, fmt, hs->pid, hs->ino, buf, "", buf2);
2191 } else if (hs->op == EXT4_MB_HISTORY_DISCARD) {
2192 sprintf(buf2, "%u/%d/%u", hs->result.fe_group,
2193 hs->result.fe_start, hs->result.fe_len);
2194 seq_printf(seq, "%-5u %-8u %-23s discard\n",
2195 hs->pid, hs->ino, buf2);
2196 } else if (hs->op == EXT4_MB_HISTORY_FREE) {
2197 sprintf(buf2, "%u/%d/%u", hs->result.fe_group,
2198 hs->result.fe_start, hs->result.fe_len);
2199 seq_printf(seq, "%-5u %-8u %-23s free\n",
2200 hs->pid, hs->ino, buf2);
2201 }
2202 return 0;
2203}
2204
2205static void ext4_mb_seq_history_stop(struct seq_file *seq, void *v)
2206{
2207}
2208
2209static const struct seq_operations ext4_mb_seq_history_ops = {
2210 .start = ext4_mb_seq_history_start,
2211 .next = ext4_mb_seq_history_next,
2212 .stop = ext4_mb_seq_history_stop,
2213 .show = ext4_mb_seq_history_show,
2214};
2215
2216static int ext4_mb_seq_history_open(struct inode *inode, struct file *file)
2217{
2218 struct super_block *sb = PDE(inode)->data;
2219 struct ext4_sb_info *sbi = EXT4_SB(sb);
2220 struct ext4_mb_proc_session *s;
2221 int rc;
2222 int size;
2223
2224 if (unlikely(sbi->s_mb_history == NULL))
2225 return -ENOMEM;
2226 s = kmalloc(sizeof(*s), GFP_KERNEL);
2227 if (s == NULL)
2228 return -ENOMEM;
2229 s->sb = sb;
2230 size = sizeof(struct ext4_mb_history) * sbi->s_mb_history_max;
2231 s->history = kmalloc(size, GFP_KERNEL);
2232 if (s->history == NULL) {
2233 kfree(s);
2234 return -ENOMEM;
2235 }
2236
2237 spin_lock(&sbi->s_mb_history_lock);
2238 memcpy(s->history, sbi->s_mb_history, size);
2239 s->max = sbi->s_mb_history_max;
2240 s->start = sbi->s_mb_history_cur % s->max;
2241 spin_unlock(&sbi->s_mb_history_lock);
2242
2243 rc = seq_open(file, &ext4_mb_seq_history_ops);
2244 if (rc == 0) {
2245 struct seq_file *m = (struct seq_file *)file->private_data;
2246 m->private = s;
2247 } else {
2248 kfree(s->history);
2249 kfree(s);
2250 }
2251 return rc;
2252
2253}
2254
2255static int ext4_mb_seq_history_release(struct inode *inode, struct file *file)
2256{
2257 struct seq_file *seq = (struct seq_file *)file->private_data;
2258 struct ext4_mb_proc_session *s = seq->private;
2259 kfree(s->history);
2260 kfree(s);
2261 return seq_release(inode, file);
2262}
2263
2264static ssize_t ext4_mb_seq_history_write(struct file *file,
2265 const char __user *buffer,
2266 size_t count, loff_t *ppos)
2267{
2268 struct seq_file *seq = (struct seq_file *)file->private_data;
2269 struct ext4_mb_proc_session *s = seq->private;
2270 struct super_block *sb = s->sb;
2271 char str[32];
2272 int value;
2273
2274 if (count >= sizeof(str)) {
2275 printk(KERN_ERR "EXT4-fs: %s string too long, max %u bytes\n",
2276 "mb_history", (int)sizeof(str));
2277 return -EOVERFLOW;
2278 }
2279
2280 if (copy_from_user(str, buffer, count))
2281 return -EFAULT;
2282
2283 value = simple_strtol(str, NULL, 0);
2284 if (value < 0)
2285 return -ERANGE;
2286 EXT4_SB(sb)->s_mb_history_filter = value;
2287
2288 return count;
2289}
2290
2291static const struct file_operations ext4_mb_seq_history_fops = {
2292 .owner = THIS_MODULE,
2293 .open = ext4_mb_seq_history_open,
2294 .read = seq_read,
2295 .write = ext4_mb_seq_history_write,
2296 .llseek = seq_lseek,
2297 .release = ext4_mb_seq_history_release,
2298};
2299
2300static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) 2099static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
2301{ 2100{
2302 struct super_block *sb = seq->private; 2101 struct super_block *sb = seq->private;
@@ -2396,82 +2195,6 @@ static const struct file_operations ext4_mb_seq_groups_fops = {
2396 .release = seq_release, 2195 .release = seq_release,
2397}; 2196};
2398 2197
2399static void ext4_mb_history_release(struct super_block *sb)
2400{
2401 struct ext4_sb_info *sbi = EXT4_SB(sb);
2402
2403 if (sbi->s_proc != NULL) {
2404 remove_proc_entry("mb_groups", sbi->s_proc);
2405 if (sbi->s_mb_history_max)
2406 remove_proc_entry("mb_history", sbi->s_proc);
2407 }
2408 kfree(sbi->s_mb_history);
2409}
2410
2411static void ext4_mb_history_init(struct super_block *sb)
2412{
2413 struct ext4_sb_info *sbi = EXT4_SB(sb);
2414 int i;
2415
2416 if (sbi->s_proc != NULL) {
2417 if (sbi->s_mb_history_max)
2418 proc_create_data("mb_history", S_IRUGO, sbi->s_proc,
2419 &ext4_mb_seq_history_fops, sb);
2420 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
2421 &ext4_mb_seq_groups_fops, sb);
2422 }
2423
2424 sbi->s_mb_history_cur = 0;
2425 spin_lock_init(&sbi->s_mb_history_lock);
2426 i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history);
2427 sbi->s_mb_history = i ? kzalloc(i, GFP_KERNEL) : NULL;
2428 /* if we can't allocate history, then we simple won't use it */
2429}
2430
2431static noinline_for_stack void
2432ext4_mb_store_history(struct ext4_allocation_context *ac)
2433{
2434 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
2435 struct ext4_mb_history h;
2436
2437 if (sbi->s_mb_history == NULL)
2438 return;
2439
2440 if (!(ac->ac_op & sbi->s_mb_history_filter))
2441 return;
2442
2443 h.op = ac->ac_op;
2444 h.pid = current->pid;
2445 h.ino = ac->ac_inode ? ac->ac_inode->i_ino : 0;
2446 h.orig = ac->ac_o_ex;
2447 h.result = ac->ac_b_ex;
2448 h.flags = ac->ac_flags;
2449 h.found = ac->ac_found;
2450 h.groups = ac->ac_groups_scanned;
2451 h.cr = ac->ac_criteria;
2452 h.tail = ac->ac_tail;
2453 h.buddy = ac->ac_buddy;
2454 h.merged = 0;
2455 if (ac->ac_op == EXT4_MB_HISTORY_ALLOC) {
2456 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
2457 ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
2458 h.merged = 1;
2459 h.goal = ac->ac_g_ex;
2460 h.result = ac->ac_f_ex;
2461 }
2462
2463 spin_lock(&sbi->s_mb_history_lock);
2464 memcpy(sbi->s_mb_history + sbi->s_mb_history_cur, &h, sizeof(h));
2465 if (++sbi->s_mb_history_cur >= sbi->s_mb_history_max)
2466 sbi->s_mb_history_cur = 0;
2467 spin_unlock(&sbi->s_mb_history_lock);
2468}
2469
2470#else
2471#define ext4_mb_history_release(sb)
2472#define ext4_mb_history_init(sb)
2473#endif
2474
2475 2198
2476/* Create and initialize ext4_group_info data for the given group. */ 2199/* Create and initialize ext4_group_info data for the given group. */
2477int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, 2200int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
@@ -2690,7 +2413,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2690 sbi->s_mb_stats = MB_DEFAULT_STATS; 2413 sbi->s_mb_stats = MB_DEFAULT_STATS;
2691 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; 2414 sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD;
2692 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; 2415 sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS;
2693 sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT;
2694 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; 2416 sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC;
2695 2417
2696 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); 2418 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
@@ -2708,7 +2430,9 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2708 spin_lock_init(&lg->lg_prealloc_lock); 2430 spin_lock_init(&lg->lg_prealloc_lock);
2709 } 2431 }
2710 2432
2711 ext4_mb_history_init(sb); 2433 if (sbi->s_proc)
2434 proc_create_data("mb_groups", S_IRUGO, sbi->s_proc,
2435 &ext4_mb_seq_groups_fops, sb);
2712 2436
2713 if (sbi->s_journal) 2437 if (sbi->s_journal)
2714 sbi->s_journal->j_commit_callback = release_blocks_on_commit; 2438 sbi->s_journal->j_commit_callback = release_blocks_on_commit;
@@ -2788,7 +2512,8 @@ int ext4_mb_release(struct super_block *sb)
2788 } 2512 }
2789 2513
2790 free_percpu(sbi->s_locality_groups); 2514 free_percpu(sbi->s_locality_groups);
2791 ext4_mb_history_release(sb); 2515 if (sbi->s_proc)
2516 remove_proc_entry("mb_groups", sbi->s_proc);
2792 2517
2793 return 0; 2518 return 0;
2794} 2519}
@@ -3274,7 +2999,10 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
3274 atomic_inc(&sbi->s_bal_breaks); 2999 atomic_inc(&sbi->s_bal_breaks);
3275 } 3000 }
3276 3001
3277 ext4_mb_store_history(ac); 3002 if (ac->ac_op == EXT4_MB_HISTORY_ALLOC)
3003 trace_ext4_mballoc_alloc(ac);
3004 else
3005 trace_ext4_mballoc_prealloc(ac);
3278} 3006}
3279 3007
3280/* 3008/*
@@ -3774,7 +3502,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3774 if (ac) { 3502 if (ac) {
3775 ac->ac_sb = sb; 3503 ac->ac_sb = sb;
3776 ac->ac_inode = pa->pa_inode; 3504 ac->ac_inode = pa->pa_inode;
3777 ac->ac_op = EXT4_MB_HISTORY_DISCARD;
3778 } 3505 }
3779 3506
3780 while (bit < end) { 3507 while (bit < end) {
@@ -3794,7 +3521,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
3794 ac->ac_b_ex.fe_start = bit; 3521 ac->ac_b_ex.fe_start = bit;
3795 ac->ac_b_ex.fe_len = next - bit; 3522 ac->ac_b_ex.fe_len = next - bit;
3796 ac->ac_b_ex.fe_logical = 0; 3523 ac->ac_b_ex.fe_logical = 0;
3797 ext4_mb_store_history(ac); 3524 trace_ext4_mballoc_discard(ac);
3798 } 3525 }
3799 3526
3800 trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit, 3527 trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit,
@@ -3829,9 +3556,6 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3829 ext4_group_t group; 3556 ext4_group_t group;
3830 ext4_grpblk_t bit; 3557 ext4_grpblk_t bit;
3831 3558
3832 if (ac)
3833 ac->ac_op = EXT4_MB_HISTORY_DISCARD;
3834
3835 trace_ext4_mb_release_group_pa(ac, pa); 3559 trace_ext4_mb_release_group_pa(ac, pa);
3836 BUG_ON(pa->pa_deleted == 0); 3560 BUG_ON(pa->pa_deleted == 0);
3837 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); 3561 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
@@ -3846,7 +3570,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3846 ac->ac_b_ex.fe_start = bit; 3570 ac->ac_b_ex.fe_start = bit;
3847 ac->ac_b_ex.fe_len = pa->pa_len; 3571 ac->ac_b_ex.fe_len = pa->pa_len;
3848 ac->ac_b_ex.fe_logical = 0; 3572 ac->ac_b_ex.fe_logical = 0;
3849 ext4_mb_store_history(ac); 3573 trace_ext4_mballoc_discard(ac);
3850 } 3574 }
3851 3575
3852 return 0; 3576 return 0;
@@ -4737,7 +4461,6 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4737 4461
4738 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4462 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4739 if (ac) { 4463 if (ac) {
4740 ac->ac_op = EXT4_MB_HISTORY_FREE;
4741 ac->ac_inode = inode; 4464 ac->ac_inode = inode;
4742 ac->ac_sb = sb; 4465 ac->ac_sb = sb;
4743 } 4466 }
@@ -4804,7 +4527,7 @@ do_more:
4804 ac->ac_b_ex.fe_group = block_group; 4527 ac->ac_b_ex.fe_group = block_group;
4805 ac->ac_b_ex.fe_start = bit; 4528 ac->ac_b_ex.fe_start = bit;
4806 ac->ac_b_ex.fe_len = count; 4529 ac->ac_b_ex.fe_len = count;
4807 ext4_mb_store_history(ac); 4530 trace_ext4_mballoc_free(ac);
4808 } 4531 }
4809 4532
4810 err = ext4_mb_load_buddy(sb, block_group, &e4b); 4533 err = ext4_mb_load_buddy(sb, block_group, &e4b);
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 14f25f253112..0ca811061bc7 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -52,18 +52,8 @@ extern u8 mb_enable_debug;
52#define mb_debug(n, fmt, a...) 52#define mb_debug(n, fmt, a...)
53#endif 53#endif
54 54
55/*
56 * with EXT4_MB_HISTORY mballoc stores last N allocations in memory
57 * and you can monitor it in /proc/fs/ext4/<dev>/mb_history
58 */
59#define EXT4_MB_HISTORY
60#define EXT4_MB_HISTORY_ALLOC 1 /* allocation */ 55#define EXT4_MB_HISTORY_ALLOC 1 /* allocation */
61#define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */ 56#define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */
62#define EXT4_MB_HISTORY_DISCARD 4 /* preallocation discarded */
63#define EXT4_MB_HISTORY_FREE 8 /* free */
64
65#define EXT4_MB_HISTORY_DEFAULT (EXT4_MB_HISTORY_ALLOC | \
66 EXT4_MB_HISTORY_PREALLOC)
67 57
68/* 58/*
69 * How long mballoc can look for a best extent (in found extents) 59 * How long mballoc can look for a best extent (in found extents)
@@ -217,22 +207,6 @@ struct ext4_allocation_context {
217#define AC_STATUS_FOUND 2 207#define AC_STATUS_FOUND 2
218#define AC_STATUS_BREAK 3 208#define AC_STATUS_BREAK 3
219 209
220struct ext4_mb_history {
221 struct ext4_free_extent orig; /* orig allocation */
222 struct ext4_free_extent goal; /* goal allocation */
223 struct ext4_free_extent result; /* result allocation */
224 unsigned pid;
225 unsigned ino;
226 __u16 found; /* how many extents have been found */
227 __u16 groups; /* how many groups have been scanned */
228 __u16 tail; /* what tail broke some buddy */
229 __u16 buddy; /* buddy the tail ^^^ broke */
230 __u16 flags;
231 __u8 cr:3; /* which phase the result extent was found at */
232 __u8 op:4;
233 __u8 merged:1;
234};
235
236struct ext4_buddy { 210struct ext4_buddy {
237 struct page *bd_buddy_page; 211 struct page *bd_buddy_page;
238 void *bd_buddy; 212 void *bd_buddy;
@@ -247,13 +221,6 @@ struct ext4_buddy {
247#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) 221#define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap)
248#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) 222#define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy)
249 223
250#ifndef EXT4_MB_HISTORY
251static inline void ext4_mb_store_history(struct ext4_allocation_context *ac)
252{
253 return;
254}
255#endif
256
257#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) 224#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
258 225
259static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, 226static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e5b206a043a5..12e726a7073f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -50,13 +50,6 @@
50#define CREATE_TRACE_POINTS 50#define CREATE_TRACE_POINTS
51#include <trace/events/ext4.h> 51#include <trace/events/ext4.h>
52 52
53static int default_mb_history_length = 1000;
54
55module_param_named(default_mb_history_length, default_mb_history_length,
56 int, 0644);
57MODULE_PARM_DESC(default_mb_history_length,
58 "Default number of entries saved for mb_history");
59
60struct proc_dir_entry *ext4_proc_root; 53struct proc_dir_entry *ext4_proc_root;
61static struct kset *ext4_kset; 54static struct kset *ext4_kset;
62 55
@@ -1079,7 +1072,7 @@ enum {
1079 Opt_journal_update, Opt_journal_dev, 1072 Opt_journal_update, Opt_journal_dev,
1080 Opt_journal_checksum, Opt_journal_async_commit, 1073 Opt_journal_checksum, Opt_journal_async_commit,
1081 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1074 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1082 Opt_data_err_abort, Opt_data_err_ignore, Opt_mb_history_length, 1075 Opt_data_err_abort, Opt_data_err_ignore,
1083 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1076 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1084 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 1077 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
1085 Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize, 1078 Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize,
@@ -1126,7 +1119,6 @@ static const match_table_t tokens = {
1126 {Opt_data_writeback, "data=writeback"}, 1119 {Opt_data_writeback, "data=writeback"},
1127 {Opt_data_err_abort, "data_err=abort"}, 1120 {Opt_data_err_abort, "data_err=abort"},
1128 {Opt_data_err_ignore, "data_err=ignore"}, 1121 {Opt_data_err_ignore, "data_err=ignore"},
1129 {Opt_mb_history_length, "mb_history_length=%u"},
1130 {Opt_offusrjquota, "usrjquota="}, 1122 {Opt_offusrjquota, "usrjquota="},
1131 {Opt_usrjquota, "usrjquota=%s"}, 1123 {Opt_usrjquota, "usrjquota=%s"},
1132 {Opt_offgrpjquota, "grpjquota="}, 1124 {Opt_offgrpjquota, "grpjquota="},
@@ -1367,13 +1359,6 @@ static int parse_options(char *options, struct super_block *sb,
1367 case Opt_data_err_ignore: 1359 case Opt_data_err_ignore:
1368 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); 1360 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1369 break; 1361 break;
1370 case Opt_mb_history_length:
1371 if (match_int(&args[0], &option))
1372 return 0;
1373 if (option < 0)
1374 return 0;
1375 sbi->s_mb_history_max = option;
1376 break;
1377#ifdef CONFIG_QUOTA 1362#ifdef CONFIG_QUOTA
1378 case Opt_usrjquota: 1363 case Opt_usrjquota:
1379 qtype = USRQUOTA; 1364 qtype = USRQUOTA;
@@ -2435,7 +2420,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2435 sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; 2420 sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
2436 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; 2421 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
2437 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; 2422 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
2438 sbi->s_mb_history_max = default_mb_history_length;
2439 2423
2440 set_opt(sbi->s_mount_opt, BARRIER); 2424 set_opt(sbi->s_mount_opt, BARRIER);
2441 2425
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 7c6bbb7198a3..b8320256dc5d 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -743,6 +743,169 @@ TRACE_EVENT(ext4_alloc_da_blocks,
743 __entry->data_blocks, __entry->meta_blocks) 743 __entry->data_blocks, __entry->meta_blocks)
744); 744);
745 745
746TRACE_EVENT(ext4_mballoc_alloc,
747 TP_PROTO(struct ext4_allocation_context *ac),
748
749 TP_ARGS(ac),
750
751 TP_STRUCT__entry(
752 __field( dev_t, dev )
753 __field( ino_t, ino )
754 __field( __u16, found )
755 __field( __u16, groups )
756 __field( __u16, buddy )
757 __field( __u16, flags )
758 __field( __u16, tail )
759 __field( __u8, cr )
760 __field( __u32, orig_logical )
761 __field( int, orig_start )
762 __field( __u32, orig_group )
763 __field( int, orig_len )
764 __field( __u32, goal_logical )
765 __field( int, goal_start )
766 __field( __u32, goal_group )
767 __field( int, goal_len )
768 __field( __u32, result_logical )
769 __field( int, result_start )
770 __field( __u32, result_group )
771 __field( int, result_len )
772 ),
773
774 TP_fast_assign(
775 __entry->dev = ac->ac_inode->i_sb->s_dev;
776 __entry->ino = ac->ac_inode->i_ino;
777 __entry->found = ac->ac_found;
778 __entry->flags = ac->ac_flags;
779 __entry->groups = ac->ac_groups_scanned;
780 __entry->buddy = ac->ac_buddy;
781 __entry->tail = ac->ac_tail;
782 __entry->cr = ac->ac_criteria;
783 __entry->orig_logical = ac->ac_o_ex.fe_logical;
784 __entry->orig_start = ac->ac_o_ex.fe_start;
785 __entry->orig_group = ac->ac_o_ex.fe_group;
786 __entry->orig_len = ac->ac_o_ex.fe_len;
787 __entry->goal_logical = ac->ac_g_ex.fe_logical;
788 __entry->goal_start = ac->ac_g_ex.fe_start;
789 __entry->goal_group = ac->ac_g_ex.fe_group;
790 __entry->goal_len = ac->ac_g_ex.fe_len;
791 __entry->result_logical = ac->ac_f_ex.fe_logical;
792 __entry->result_start = ac->ac_f_ex.fe_start;
793 __entry->result_group = ac->ac_f_ex.fe_group;
794 __entry->result_len = ac->ac_f_ex.fe_len;
795 ),
796
797 TP_printk("dev %s inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u "
798 "result %u/%d/%u@%u blks %u grps %u cr %u flags 0x%04x "
799 "tail %u broken %u",
800 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
801 __entry->orig_group, __entry->orig_start,
802 __entry->orig_len, __entry->orig_logical,
803 __entry->goal_group, __entry->goal_start,
804 __entry->goal_len, __entry->goal_logical,
805 __entry->result_group, __entry->result_start,
806 __entry->result_len, __entry->result_logical,
807 __entry->found, __entry->groups, __entry->cr,
808 __entry->flags, __entry->tail,
809 __entry->buddy ? 1 << __entry->buddy : 0)
810);
811
812TRACE_EVENT(ext4_mballoc_prealloc,
813 TP_PROTO(struct ext4_allocation_context *ac),
814
815 TP_ARGS(ac),
816
817 TP_STRUCT__entry(
818 __field( dev_t, dev )
819 __field( ino_t, ino )
820 __field( __u32, orig_logical )
821 __field( int, orig_start )
822 __field( __u32, orig_group )
823 __field( int, orig_len )
824 __field( __u32, result_logical )
825 __field( int, result_start )
826 __field( __u32, result_group )
827 __field( int, result_len )
828 ),
829
830 TP_fast_assign(
831 __entry->dev = ac->ac_inode->i_sb->s_dev;
832 __entry->ino = ac->ac_inode->i_ino;
833 __entry->orig_logical = ac->ac_o_ex.fe_logical;
834 __entry->orig_start = ac->ac_o_ex.fe_start;
835 __entry->orig_group = ac->ac_o_ex.fe_group;
836 __entry->orig_len = ac->ac_o_ex.fe_len;
837 __entry->result_logical = ac->ac_b_ex.fe_logical;
838 __entry->result_start = ac->ac_b_ex.fe_start;
839 __entry->result_group = ac->ac_b_ex.fe_group;
840 __entry->result_len = ac->ac_b_ex.fe_len;
841 ),
842
843 TP_printk("dev %s inode %lu orig %u/%d/%u@%u result %u/%d/%u@%u",
844 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
845 __entry->orig_group, __entry->orig_start,
846 __entry->orig_len, __entry->orig_logical,
847 __entry->result_group, __entry->result_start,
848 __entry->result_len, __entry->result_logical)
849);
850
851TRACE_EVENT(ext4_mballoc_discard,
852 TP_PROTO(struct ext4_allocation_context *ac),
853
854 TP_ARGS(ac),
855
856 TP_STRUCT__entry(
857 __field( dev_t, dev )
858 __field( ino_t, ino )
859 __field( __u32, result_logical )
860 __field( int, result_start )
861 __field( __u32, result_group )
862 __field( int, result_len )
863 ),
864
865 TP_fast_assign(
866 __entry->dev = ac->ac_inode->i_sb->s_dev;
867 __entry->ino = ac->ac_inode->i_ino;
868 __entry->result_logical = ac->ac_b_ex.fe_logical;
869 __entry->result_start = ac->ac_b_ex.fe_start;
870 __entry->result_group = ac->ac_b_ex.fe_group;
871 __entry->result_len = ac->ac_b_ex.fe_len;
872 ),
873
874 TP_printk("dev %s inode %lu extent %u/%d/%u@%u ",
875 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
876 __entry->result_group, __entry->result_start,
877 __entry->result_len, __entry->result_logical)
878);
879
880TRACE_EVENT(ext4_mballoc_free,
881 TP_PROTO(struct ext4_allocation_context *ac),
882
883 TP_ARGS(ac),
884
885 TP_STRUCT__entry(
886 __field( dev_t, dev )
887 __field( ino_t, ino )
888 __field( __u32, result_logical )
889 __field( int, result_start )
890 __field( __u32, result_group )
891 __field( int, result_len )
892 ),
893
894 TP_fast_assign(
895 __entry->dev = ac->ac_inode->i_sb->s_dev;
896 __entry->ino = ac->ac_inode->i_ino;
897 __entry->result_logical = ac->ac_b_ex.fe_logical;
898 __entry->result_start = ac->ac_b_ex.fe_start;
899 __entry->result_group = ac->ac_b_ex.fe_group;
900 __entry->result_len = ac->ac_b_ex.fe_len;
901 ),
902
903 TP_printk("dev %s inode %lu extent %u/%d/%u@%u ",
904 jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
905 __entry->result_group, __entry->result_start,
906 __entry->result_len, __entry->result_logical)
907);
908
746#endif /* _TRACE_EXT4_H */ 909#endif /* _TRACE_EXT4_H */
747 910
748/* This part must be outside protection */ 911/* This part must be outside protection */