diff options
author | Theodore Ts'o <tytso@mit.edu> | 2009-09-30 00:32:42 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2009-09-30 00:32:42 -0400 |
commit | 296c355cd6443d89fa251885a8d78778fe111dc4 (patch) | |
tree | 5cf7c8b115617dc3829a16a5969894d37b73173c | |
parent | 90576c0b9a0b5323fc4bd7f23f49be0d234f36d1 (diff) |
ext4: Use tracepoints for mb_history trace file
The /proc/fs/ext4/<dev>/mb_history was maintained manually, and had a
number of problems: it required a largish amount of memory to be
allocated for each ext4 filesystem, and the s_mb_history_lock
introduced a CPU contention problem.
By ripping out the mb_history code and replacing it with ftrace
tracepoints, and we get more functionality: timestamps, event
filtering, the ability to correlate mballoc history with other ext4
tracepoints, etc.
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r-- | Documentation/filesystems/proc.txt | 1 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 14 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 301 | ||||
-rw-r--r-- | fs/ext4/mballoc.h | 33 | ||||
-rw-r--r-- | fs/ext4/super.c | 18 | ||||
-rw-r--r-- | include/trace/events/ext4.h | 163 |
6 files changed, 182 insertions, 348 deletions
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index b5aee7838a00..2c48f945546b 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt | |||
@@ -1113,7 +1113,6 @@ Table 1-12: Files in /proc/fs/ext4/<devname> | |||
1113 | .............................................................................. | 1113 | .............................................................................. |
1114 | File Content | 1114 | File Content |
1115 | mb_groups details of multiblock allocator buddy cache of free blocks | 1115 | mb_groups details of multiblock allocator buddy cache of free blocks |
1116 | mb_history multiblock allocation history | ||
1117 | .............................................................................. | 1116 | .............................................................................. |
1118 | 1117 | ||
1119 | 1118 | ||
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b491576e11c3..c508cf7be75c 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -65,6 +65,12 @@ typedef __u32 ext4_lblk_t; | |||
65 | /* data type for block group number */ | 65 | /* data type for block group number */ |
66 | typedef unsigned int ext4_group_t; | 66 | typedef unsigned int ext4_group_t; |
67 | 67 | ||
68 | /* | ||
69 | * Flags used in mballoc's allocation_context flags field. | ||
70 | * | ||
71 | * Also used to show what's going on for debugging purposes when the | ||
72 | * flag field is exported via the traceport interface | ||
73 | */ | ||
68 | 74 | ||
69 | /* prefer goal again. length */ | 75 | /* prefer goal again. length */ |
70 | #define EXT4_MB_HINT_MERGE 0x0001 | 76 | #define EXT4_MB_HINT_MERGE 0x0001 |
@@ -971,14 +977,6 @@ struct ext4_sb_info { | |||
971 | unsigned long s_mb_last_group; | 977 | unsigned long s_mb_last_group; |
972 | unsigned long s_mb_last_start; | 978 | unsigned long s_mb_last_start; |
973 | 979 | ||
974 | /* history to debug policy */ | ||
975 | struct ext4_mb_history *s_mb_history; | ||
976 | int s_mb_history_cur; | ||
977 | int s_mb_history_max; | ||
978 | int s_mb_history_num; | ||
979 | spinlock_t s_mb_history_lock; | ||
980 | int s_mb_history_filter; | ||
981 | |||
982 | /* stats for buddy allocator */ | 980 | /* stats for buddy allocator */ |
983 | spinlock_t s_mb_pa_lock; | 981 | spinlock_t s_mb_pa_lock; |
984 | atomic_t s_bal_reqs; /* number of reqs with len > 1 */ | 982 | atomic_t s_bal_reqs; /* number of reqs with len > 1 */ |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 3e2320e66721..bba12824defa 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -2096,207 +2096,6 @@ out: | |||
2096 | return err; | 2096 | return err; |
2097 | } | 2097 | } |
2098 | 2098 | ||
2099 | #ifdef EXT4_MB_HISTORY | ||
2100 | struct ext4_mb_proc_session { | ||
2101 | struct ext4_mb_history *history; | ||
2102 | struct super_block *sb; | ||
2103 | int start; | ||
2104 | int max; | ||
2105 | }; | ||
2106 | |||
2107 | static void *ext4_mb_history_skip_empty(struct ext4_mb_proc_session *s, | ||
2108 | struct ext4_mb_history *hs, | ||
2109 | int first) | ||
2110 | { | ||
2111 | if (hs == s->history + s->max) | ||
2112 | hs = s->history; | ||
2113 | if (!first && hs == s->history + s->start) | ||
2114 | return NULL; | ||
2115 | while (hs->orig.fe_len == 0) { | ||
2116 | hs++; | ||
2117 | if (hs == s->history + s->max) | ||
2118 | hs = s->history; | ||
2119 | if (hs == s->history + s->start) | ||
2120 | return NULL; | ||
2121 | } | ||
2122 | return hs; | ||
2123 | } | ||
2124 | |||
2125 | static void *ext4_mb_seq_history_start(struct seq_file *seq, loff_t *pos) | ||
2126 | { | ||
2127 | struct ext4_mb_proc_session *s = seq->private; | ||
2128 | struct ext4_mb_history *hs; | ||
2129 | int l = *pos; | ||
2130 | |||
2131 | if (l == 0) | ||
2132 | return SEQ_START_TOKEN; | ||
2133 | hs = ext4_mb_history_skip_empty(s, s->history + s->start, 1); | ||
2134 | if (!hs) | ||
2135 | return NULL; | ||
2136 | while (--l && (hs = ext4_mb_history_skip_empty(s, ++hs, 0)) != NULL); | ||
2137 | return hs; | ||
2138 | } | ||
2139 | |||
2140 | static void *ext4_mb_seq_history_next(struct seq_file *seq, void *v, | ||
2141 | loff_t *pos) | ||
2142 | { | ||
2143 | struct ext4_mb_proc_session *s = seq->private; | ||
2144 | struct ext4_mb_history *hs = v; | ||
2145 | |||
2146 | ++*pos; | ||
2147 | if (v == SEQ_START_TOKEN) | ||
2148 | return ext4_mb_history_skip_empty(s, s->history + s->start, 1); | ||
2149 | else | ||
2150 | return ext4_mb_history_skip_empty(s, ++hs, 0); | ||
2151 | } | ||
2152 | |||
2153 | static int ext4_mb_seq_history_show(struct seq_file *seq, void *v) | ||
2154 | { | ||
2155 | char buf[25], buf2[25], buf3[25], *fmt; | ||
2156 | struct ext4_mb_history *hs = v; | ||
2157 | |||
2158 | if (v == SEQ_START_TOKEN) { | ||
2159 | seq_printf(seq, "%-5s %-8s %-23s %-23s %-23s %-5s " | ||
2160 | "%-5s %-2s %-6s %-5s %-5s %-6s\n", | ||
2161 | "pid", "inode", "original", "goal", "result", "found", | ||
2162 | "grps", "cr", "flags", "merge", "tail", "broken"); | ||
2163 | return 0; | ||
2164 | } | ||
2165 | |||
2166 | if (hs->op == EXT4_MB_HISTORY_ALLOC) { | ||
2167 | fmt = "%-5u %-8u %-23s %-23s %-23s %-5u %-5u %-2u " | ||
2168 | "0x%04x %-5s %-5u %-6u\n"; | ||
2169 | sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group, | ||
2170 | hs->result.fe_start, hs->result.fe_len, | ||
2171 | hs->result.fe_logical); | ||
2172 | sprintf(buf, "%u/%d/%u@%u", hs->orig.fe_group, | ||
2173 | hs->orig.fe_start, hs->orig.fe_len, | ||
2174 | hs->orig.fe_logical); | ||
2175 | sprintf(buf3, "%u/%d/%u@%u", hs->goal.fe_group, | ||
2176 | hs->goal.fe_start, hs->goal.fe_len, | ||
2177 | hs->goal.fe_logical); | ||
2178 | seq_printf(seq, fmt, hs->pid, hs->ino, buf, buf3, buf2, | ||
2179 | hs->found, hs->groups, hs->cr, hs->flags, | ||
2180 | hs->merged ? "M" : "", hs->tail, | ||
2181 | hs->buddy ? 1 << hs->buddy : 0); | ||
2182 | } else if (hs->op == EXT4_MB_HISTORY_PREALLOC) { | ||
2183 | fmt = "%-5u %-8u %-23s %-23s %-23s\n"; | ||
2184 | sprintf(buf2, "%u/%d/%u@%u", hs->result.fe_group, | ||
2185 | hs->result.fe_start, hs->result.fe_len, | ||
2186 | hs->result.fe_logical); | ||
2187 | sprintf(buf, "%u/%d/%u@%u", hs->orig.fe_group, | ||
2188 | hs->orig.fe_start, hs->orig.fe_len, | ||
2189 | hs->orig.fe_logical); | ||
2190 | seq_printf(seq, fmt, hs->pid, hs->ino, buf, "", buf2); | ||
2191 | } else if (hs->op == EXT4_MB_HISTORY_DISCARD) { | ||
2192 | sprintf(buf2, "%u/%d/%u", hs->result.fe_group, | ||
2193 | hs->result.fe_start, hs->result.fe_len); | ||
2194 | seq_printf(seq, "%-5u %-8u %-23s discard\n", | ||
2195 | hs->pid, hs->ino, buf2); | ||
2196 | } else if (hs->op == EXT4_MB_HISTORY_FREE) { | ||
2197 | sprintf(buf2, "%u/%d/%u", hs->result.fe_group, | ||
2198 | hs->result.fe_start, hs->result.fe_len); | ||
2199 | seq_printf(seq, "%-5u %-8u %-23s free\n", | ||
2200 | hs->pid, hs->ino, buf2); | ||
2201 | } | ||
2202 | return 0; | ||
2203 | } | ||
2204 | |||
2205 | static void ext4_mb_seq_history_stop(struct seq_file *seq, void *v) | ||
2206 | { | ||
2207 | } | ||
2208 | |||
2209 | static const struct seq_operations ext4_mb_seq_history_ops = { | ||
2210 | .start = ext4_mb_seq_history_start, | ||
2211 | .next = ext4_mb_seq_history_next, | ||
2212 | .stop = ext4_mb_seq_history_stop, | ||
2213 | .show = ext4_mb_seq_history_show, | ||
2214 | }; | ||
2215 | |||
2216 | static int ext4_mb_seq_history_open(struct inode *inode, struct file *file) | ||
2217 | { | ||
2218 | struct super_block *sb = PDE(inode)->data; | ||
2219 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2220 | struct ext4_mb_proc_session *s; | ||
2221 | int rc; | ||
2222 | int size; | ||
2223 | |||
2224 | if (unlikely(sbi->s_mb_history == NULL)) | ||
2225 | return -ENOMEM; | ||
2226 | s = kmalloc(sizeof(*s), GFP_KERNEL); | ||
2227 | if (s == NULL) | ||
2228 | return -ENOMEM; | ||
2229 | s->sb = sb; | ||
2230 | size = sizeof(struct ext4_mb_history) * sbi->s_mb_history_max; | ||
2231 | s->history = kmalloc(size, GFP_KERNEL); | ||
2232 | if (s->history == NULL) { | ||
2233 | kfree(s); | ||
2234 | return -ENOMEM; | ||
2235 | } | ||
2236 | |||
2237 | spin_lock(&sbi->s_mb_history_lock); | ||
2238 | memcpy(s->history, sbi->s_mb_history, size); | ||
2239 | s->max = sbi->s_mb_history_max; | ||
2240 | s->start = sbi->s_mb_history_cur % s->max; | ||
2241 | spin_unlock(&sbi->s_mb_history_lock); | ||
2242 | |||
2243 | rc = seq_open(file, &ext4_mb_seq_history_ops); | ||
2244 | if (rc == 0) { | ||
2245 | struct seq_file *m = (struct seq_file *)file->private_data; | ||
2246 | m->private = s; | ||
2247 | } else { | ||
2248 | kfree(s->history); | ||
2249 | kfree(s); | ||
2250 | } | ||
2251 | return rc; | ||
2252 | |||
2253 | } | ||
2254 | |||
2255 | static int ext4_mb_seq_history_release(struct inode *inode, struct file *file) | ||
2256 | { | ||
2257 | struct seq_file *seq = (struct seq_file *)file->private_data; | ||
2258 | struct ext4_mb_proc_session *s = seq->private; | ||
2259 | kfree(s->history); | ||
2260 | kfree(s); | ||
2261 | return seq_release(inode, file); | ||
2262 | } | ||
2263 | |||
2264 | static ssize_t ext4_mb_seq_history_write(struct file *file, | ||
2265 | const char __user *buffer, | ||
2266 | size_t count, loff_t *ppos) | ||
2267 | { | ||
2268 | struct seq_file *seq = (struct seq_file *)file->private_data; | ||
2269 | struct ext4_mb_proc_session *s = seq->private; | ||
2270 | struct super_block *sb = s->sb; | ||
2271 | char str[32]; | ||
2272 | int value; | ||
2273 | |||
2274 | if (count >= sizeof(str)) { | ||
2275 | printk(KERN_ERR "EXT4-fs: %s string too long, max %u bytes\n", | ||
2276 | "mb_history", (int)sizeof(str)); | ||
2277 | return -EOVERFLOW; | ||
2278 | } | ||
2279 | |||
2280 | if (copy_from_user(str, buffer, count)) | ||
2281 | return -EFAULT; | ||
2282 | |||
2283 | value = simple_strtol(str, NULL, 0); | ||
2284 | if (value < 0) | ||
2285 | return -ERANGE; | ||
2286 | EXT4_SB(sb)->s_mb_history_filter = value; | ||
2287 | |||
2288 | return count; | ||
2289 | } | ||
2290 | |||
2291 | static const struct file_operations ext4_mb_seq_history_fops = { | ||
2292 | .owner = THIS_MODULE, | ||
2293 | .open = ext4_mb_seq_history_open, | ||
2294 | .read = seq_read, | ||
2295 | .write = ext4_mb_seq_history_write, | ||
2296 | .llseek = seq_lseek, | ||
2297 | .release = ext4_mb_seq_history_release, | ||
2298 | }; | ||
2299 | |||
2300 | static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) | 2099 | static void *ext4_mb_seq_groups_start(struct seq_file *seq, loff_t *pos) |
2301 | { | 2100 | { |
2302 | struct super_block *sb = seq->private; | 2101 | struct super_block *sb = seq->private; |
@@ -2396,82 +2195,6 @@ static const struct file_operations ext4_mb_seq_groups_fops = { | |||
2396 | .release = seq_release, | 2195 | .release = seq_release, |
2397 | }; | 2196 | }; |
2398 | 2197 | ||
2399 | static void ext4_mb_history_release(struct super_block *sb) | ||
2400 | { | ||
2401 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2402 | |||
2403 | if (sbi->s_proc != NULL) { | ||
2404 | remove_proc_entry("mb_groups", sbi->s_proc); | ||
2405 | if (sbi->s_mb_history_max) | ||
2406 | remove_proc_entry("mb_history", sbi->s_proc); | ||
2407 | } | ||
2408 | kfree(sbi->s_mb_history); | ||
2409 | } | ||
2410 | |||
2411 | static void ext4_mb_history_init(struct super_block *sb) | ||
2412 | { | ||
2413 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
2414 | int i; | ||
2415 | |||
2416 | if (sbi->s_proc != NULL) { | ||
2417 | if (sbi->s_mb_history_max) | ||
2418 | proc_create_data("mb_history", S_IRUGO, sbi->s_proc, | ||
2419 | &ext4_mb_seq_history_fops, sb); | ||
2420 | proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, | ||
2421 | &ext4_mb_seq_groups_fops, sb); | ||
2422 | } | ||
2423 | |||
2424 | sbi->s_mb_history_cur = 0; | ||
2425 | spin_lock_init(&sbi->s_mb_history_lock); | ||
2426 | i = sbi->s_mb_history_max * sizeof(struct ext4_mb_history); | ||
2427 | sbi->s_mb_history = i ? kzalloc(i, GFP_KERNEL) : NULL; | ||
2428 | /* if we can't allocate history, then we simple won't use it */ | ||
2429 | } | ||
2430 | |||
2431 | static noinline_for_stack void | ||
2432 | ext4_mb_store_history(struct ext4_allocation_context *ac) | ||
2433 | { | ||
2434 | struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); | ||
2435 | struct ext4_mb_history h; | ||
2436 | |||
2437 | if (sbi->s_mb_history == NULL) | ||
2438 | return; | ||
2439 | |||
2440 | if (!(ac->ac_op & sbi->s_mb_history_filter)) | ||
2441 | return; | ||
2442 | |||
2443 | h.op = ac->ac_op; | ||
2444 | h.pid = current->pid; | ||
2445 | h.ino = ac->ac_inode ? ac->ac_inode->i_ino : 0; | ||
2446 | h.orig = ac->ac_o_ex; | ||
2447 | h.result = ac->ac_b_ex; | ||
2448 | h.flags = ac->ac_flags; | ||
2449 | h.found = ac->ac_found; | ||
2450 | h.groups = ac->ac_groups_scanned; | ||
2451 | h.cr = ac->ac_criteria; | ||
2452 | h.tail = ac->ac_tail; | ||
2453 | h.buddy = ac->ac_buddy; | ||
2454 | h.merged = 0; | ||
2455 | if (ac->ac_op == EXT4_MB_HISTORY_ALLOC) { | ||
2456 | if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start && | ||
2457 | ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group) | ||
2458 | h.merged = 1; | ||
2459 | h.goal = ac->ac_g_ex; | ||
2460 | h.result = ac->ac_f_ex; | ||
2461 | } | ||
2462 | |||
2463 | spin_lock(&sbi->s_mb_history_lock); | ||
2464 | memcpy(sbi->s_mb_history + sbi->s_mb_history_cur, &h, sizeof(h)); | ||
2465 | if (++sbi->s_mb_history_cur >= sbi->s_mb_history_max) | ||
2466 | sbi->s_mb_history_cur = 0; | ||
2467 | spin_unlock(&sbi->s_mb_history_lock); | ||
2468 | } | ||
2469 | |||
2470 | #else | ||
2471 | #define ext4_mb_history_release(sb) | ||
2472 | #define ext4_mb_history_init(sb) | ||
2473 | #endif | ||
2474 | |||
2475 | 2198 | ||
2476 | /* Create and initialize ext4_group_info data for the given group. */ | 2199 | /* Create and initialize ext4_group_info data for the given group. */ |
2477 | int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | 2200 | int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, |
@@ -2690,7 +2413,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2690 | sbi->s_mb_stats = MB_DEFAULT_STATS; | 2413 | sbi->s_mb_stats = MB_DEFAULT_STATS; |
2691 | sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; | 2414 | sbi->s_mb_stream_request = MB_DEFAULT_STREAM_THRESHOLD; |
2692 | sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; | 2415 | sbi->s_mb_order2_reqs = MB_DEFAULT_ORDER2_REQS; |
2693 | sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; | ||
2694 | sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; | 2416 | sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; |
2695 | 2417 | ||
2696 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); | 2418 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); |
@@ -2708,7 +2430,9 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2708 | spin_lock_init(&lg->lg_prealloc_lock); | 2430 | spin_lock_init(&lg->lg_prealloc_lock); |
2709 | } | 2431 | } |
2710 | 2432 | ||
2711 | ext4_mb_history_init(sb); | 2433 | if (sbi->s_proc) |
2434 | proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, | ||
2435 | &ext4_mb_seq_groups_fops, sb); | ||
2712 | 2436 | ||
2713 | if (sbi->s_journal) | 2437 | if (sbi->s_journal) |
2714 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; | 2438 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; |
@@ -2788,7 +2512,8 @@ int ext4_mb_release(struct super_block *sb) | |||
2788 | } | 2512 | } |
2789 | 2513 | ||
2790 | free_percpu(sbi->s_locality_groups); | 2514 | free_percpu(sbi->s_locality_groups); |
2791 | ext4_mb_history_release(sb); | 2515 | if (sbi->s_proc) |
2516 | remove_proc_entry("mb_groups", sbi->s_proc); | ||
2792 | 2517 | ||
2793 | return 0; | 2518 | return 0; |
2794 | } | 2519 | } |
@@ -3274,7 +2999,10 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac) | |||
3274 | atomic_inc(&sbi->s_bal_breaks); | 2999 | atomic_inc(&sbi->s_bal_breaks); |
3275 | } | 3000 | } |
3276 | 3001 | ||
3277 | ext4_mb_store_history(ac); | 3002 | if (ac->ac_op == EXT4_MB_HISTORY_ALLOC) |
3003 | trace_ext4_mballoc_alloc(ac); | ||
3004 | else | ||
3005 | trace_ext4_mballoc_prealloc(ac); | ||
3278 | } | 3006 | } |
3279 | 3007 | ||
3280 | /* | 3008 | /* |
@@ -3774,7 +3502,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3774 | if (ac) { | 3502 | if (ac) { |
3775 | ac->ac_sb = sb; | 3503 | ac->ac_sb = sb; |
3776 | ac->ac_inode = pa->pa_inode; | 3504 | ac->ac_inode = pa->pa_inode; |
3777 | ac->ac_op = EXT4_MB_HISTORY_DISCARD; | ||
3778 | } | 3505 | } |
3779 | 3506 | ||
3780 | while (bit < end) { | 3507 | while (bit < end) { |
@@ -3794,7 +3521,7 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh, | |||
3794 | ac->ac_b_ex.fe_start = bit; | 3521 | ac->ac_b_ex.fe_start = bit; |
3795 | ac->ac_b_ex.fe_len = next - bit; | 3522 | ac->ac_b_ex.fe_len = next - bit; |
3796 | ac->ac_b_ex.fe_logical = 0; | 3523 | ac->ac_b_ex.fe_logical = 0; |
3797 | ext4_mb_store_history(ac); | 3524 | trace_ext4_mballoc_discard(ac); |
3798 | } | 3525 | } |
3799 | 3526 | ||
3800 | trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit, | 3527 | trace_ext4_mb_release_inode_pa(ac, pa, grp_blk_start + bit, |
@@ -3829,9 +3556,6 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, | |||
3829 | ext4_group_t group; | 3556 | ext4_group_t group; |
3830 | ext4_grpblk_t bit; | 3557 | ext4_grpblk_t bit; |
3831 | 3558 | ||
3832 | if (ac) | ||
3833 | ac->ac_op = EXT4_MB_HISTORY_DISCARD; | ||
3834 | |||
3835 | trace_ext4_mb_release_group_pa(ac, pa); | 3559 | trace_ext4_mb_release_group_pa(ac, pa); |
3836 | BUG_ON(pa->pa_deleted == 0); | 3560 | BUG_ON(pa->pa_deleted == 0); |
3837 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); | 3561 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); |
@@ -3846,7 +3570,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, | |||
3846 | ac->ac_b_ex.fe_start = bit; | 3570 | ac->ac_b_ex.fe_start = bit; |
3847 | ac->ac_b_ex.fe_len = pa->pa_len; | 3571 | ac->ac_b_ex.fe_len = pa->pa_len; |
3848 | ac->ac_b_ex.fe_logical = 0; | 3572 | ac->ac_b_ex.fe_logical = 0; |
3849 | ext4_mb_store_history(ac); | 3573 | trace_ext4_mballoc_discard(ac); |
3850 | } | 3574 | } |
3851 | 3575 | ||
3852 | return 0; | 3576 | return 0; |
@@ -4737,7 +4461,6 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, | |||
4737 | 4461 | ||
4738 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | 4462 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); |
4739 | if (ac) { | 4463 | if (ac) { |
4740 | ac->ac_op = EXT4_MB_HISTORY_FREE; | ||
4741 | ac->ac_inode = inode; | 4464 | ac->ac_inode = inode; |
4742 | ac->ac_sb = sb; | 4465 | ac->ac_sb = sb; |
4743 | } | 4466 | } |
@@ -4804,7 +4527,7 @@ do_more: | |||
4804 | ac->ac_b_ex.fe_group = block_group; | 4527 | ac->ac_b_ex.fe_group = block_group; |
4805 | ac->ac_b_ex.fe_start = bit; | 4528 | ac->ac_b_ex.fe_start = bit; |
4806 | ac->ac_b_ex.fe_len = count; | 4529 | ac->ac_b_ex.fe_len = count; |
4807 | ext4_mb_store_history(ac); | 4530 | trace_ext4_mballoc_free(ac); |
4808 | } | 4531 | } |
4809 | 4532 | ||
4810 | err = ext4_mb_load_buddy(sb, block_group, &e4b); | 4533 | err = ext4_mb_load_buddy(sb, block_group, &e4b); |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 14f25f253112..0ca811061bc7 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -52,18 +52,8 @@ extern u8 mb_enable_debug; | |||
52 | #define mb_debug(n, fmt, a...) | 52 | #define mb_debug(n, fmt, a...) |
53 | #endif | 53 | #endif |
54 | 54 | ||
55 | /* | ||
56 | * with EXT4_MB_HISTORY mballoc stores last N allocations in memory | ||
57 | * and you can monitor it in /proc/fs/ext4/<dev>/mb_history | ||
58 | */ | ||
59 | #define EXT4_MB_HISTORY | ||
60 | #define EXT4_MB_HISTORY_ALLOC 1 /* allocation */ | 55 | #define EXT4_MB_HISTORY_ALLOC 1 /* allocation */ |
61 | #define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */ | 56 | #define EXT4_MB_HISTORY_PREALLOC 2 /* preallocated blocks used */ |
62 | #define EXT4_MB_HISTORY_DISCARD 4 /* preallocation discarded */ | ||
63 | #define EXT4_MB_HISTORY_FREE 8 /* free */ | ||
64 | |||
65 | #define EXT4_MB_HISTORY_DEFAULT (EXT4_MB_HISTORY_ALLOC | \ | ||
66 | EXT4_MB_HISTORY_PREALLOC) | ||
67 | 57 | ||
68 | /* | 58 | /* |
69 | * How long mballoc can look for a best extent (in found extents) | 59 | * How long mballoc can look for a best extent (in found extents) |
@@ -217,22 +207,6 @@ struct ext4_allocation_context { | |||
217 | #define AC_STATUS_FOUND 2 | 207 | #define AC_STATUS_FOUND 2 |
218 | #define AC_STATUS_BREAK 3 | 208 | #define AC_STATUS_BREAK 3 |
219 | 209 | ||
220 | struct ext4_mb_history { | ||
221 | struct ext4_free_extent orig; /* orig allocation */ | ||
222 | struct ext4_free_extent goal; /* goal allocation */ | ||
223 | struct ext4_free_extent result; /* result allocation */ | ||
224 | unsigned pid; | ||
225 | unsigned ino; | ||
226 | __u16 found; /* how many extents have been found */ | ||
227 | __u16 groups; /* how many groups have been scanned */ | ||
228 | __u16 tail; /* what tail broke some buddy */ | ||
229 | __u16 buddy; /* buddy the tail ^^^ broke */ | ||
230 | __u16 flags; | ||
231 | __u8 cr:3; /* which phase the result extent was found at */ | ||
232 | __u8 op:4; | ||
233 | __u8 merged:1; | ||
234 | }; | ||
235 | |||
236 | struct ext4_buddy { | 210 | struct ext4_buddy { |
237 | struct page *bd_buddy_page; | 211 | struct page *bd_buddy_page; |
238 | void *bd_buddy; | 212 | void *bd_buddy; |
@@ -247,13 +221,6 @@ struct ext4_buddy { | |||
247 | #define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) | 221 | #define EXT4_MB_BITMAP(e4b) ((e4b)->bd_bitmap) |
248 | #define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) | 222 | #define EXT4_MB_BUDDY(e4b) ((e4b)->bd_buddy) |
249 | 223 | ||
250 | #ifndef EXT4_MB_HISTORY | ||
251 | static inline void ext4_mb_store_history(struct ext4_allocation_context *ac) | ||
252 | { | ||
253 | return; | ||
254 | } | ||
255 | #endif | ||
256 | |||
257 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) | 224 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) |
258 | 225 | ||
259 | static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, | 226 | static inline ext4_fsblk_t ext4_grp_offs_to_block(struct super_block *sb, |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e5b206a043a5..12e726a7073f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -50,13 +50,6 @@ | |||
50 | #define CREATE_TRACE_POINTS | 50 | #define CREATE_TRACE_POINTS |
51 | #include <trace/events/ext4.h> | 51 | #include <trace/events/ext4.h> |
52 | 52 | ||
53 | static int default_mb_history_length = 1000; | ||
54 | |||
55 | module_param_named(default_mb_history_length, default_mb_history_length, | ||
56 | int, 0644); | ||
57 | MODULE_PARM_DESC(default_mb_history_length, | ||
58 | "Default number of entries saved for mb_history"); | ||
59 | |||
60 | struct proc_dir_entry *ext4_proc_root; | 53 | struct proc_dir_entry *ext4_proc_root; |
61 | static struct kset *ext4_kset; | 54 | static struct kset *ext4_kset; |
62 | 55 | ||
@@ -1079,7 +1072,7 @@ enum { | |||
1079 | Opt_journal_update, Opt_journal_dev, | 1072 | Opt_journal_update, Opt_journal_dev, |
1080 | Opt_journal_checksum, Opt_journal_async_commit, | 1073 | Opt_journal_checksum, Opt_journal_async_commit, |
1081 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, | 1074 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, |
1082 | Opt_data_err_abort, Opt_data_err_ignore, Opt_mb_history_length, | 1075 | Opt_data_err_abort, Opt_data_err_ignore, |
1083 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, | 1076 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, |
1084 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, | 1077 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, |
1085 | Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize, | 1078 | Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize, |
@@ -1126,7 +1119,6 @@ static const match_table_t tokens = { | |||
1126 | {Opt_data_writeback, "data=writeback"}, | 1119 | {Opt_data_writeback, "data=writeback"}, |
1127 | {Opt_data_err_abort, "data_err=abort"}, | 1120 | {Opt_data_err_abort, "data_err=abort"}, |
1128 | {Opt_data_err_ignore, "data_err=ignore"}, | 1121 | {Opt_data_err_ignore, "data_err=ignore"}, |
1129 | {Opt_mb_history_length, "mb_history_length=%u"}, | ||
1130 | {Opt_offusrjquota, "usrjquota="}, | 1122 | {Opt_offusrjquota, "usrjquota="}, |
1131 | {Opt_usrjquota, "usrjquota=%s"}, | 1123 | {Opt_usrjquota, "usrjquota=%s"}, |
1132 | {Opt_offgrpjquota, "grpjquota="}, | 1124 | {Opt_offgrpjquota, "grpjquota="}, |
@@ -1367,13 +1359,6 @@ static int parse_options(char *options, struct super_block *sb, | |||
1367 | case Opt_data_err_ignore: | 1359 | case Opt_data_err_ignore: |
1368 | clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); | 1360 | clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); |
1369 | break; | 1361 | break; |
1370 | case Opt_mb_history_length: | ||
1371 | if (match_int(&args[0], &option)) | ||
1372 | return 0; | ||
1373 | if (option < 0) | ||
1374 | return 0; | ||
1375 | sbi->s_mb_history_max = option; | ||
1376 | break; | ||
1377 | #ifdef CONFIG_QUOTA | 1362 | #ifdef CONFIG_QUOTA |
1378 | case Opt_usrjquota: | 1363 | case Opt_usrjquota: |
1379 | qtype = USRQUOTA; | 1364 | qtype = USRQUOTA; |
@@ -2435,7 +2420,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2435 | sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; | 2420 | sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; |
2436 | sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; | 2421 | sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; |
2437 | sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; | 2422 | sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; |
2438 | sbi->s_mb_history_max = default_mb_history_length; | ||
2439 | 2423 | ||
2440 | set_opt(sbi->s_mount_opt, BARRIER); | 2424 | set_opt(sbi->s_mount_opt, BARRIER); |
2441 | 2425 | ||
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 7c6bbb7198a3..b8320256dc5d 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h | |||
@@ -743,6 +743,169 @@ TRACE_EVENT(ext4_alloc_da_blocks, | |||
743 | __entry->data_blocks, __entry->meta_blocks) | 743 | __entry->data_blocks, __entry->meta_blocks) |
744 | ); | 744 | ); |
745 | 745 | ||
746 | TRACE_EVENT(ext4_mballoc_alloc, | ||
747 | TP_PROTO(struct ext4_allocation_context *ac), | ||
748 | |||
749 | TP_ARGS(ac), | ||
750 | |||
751 | TP_STRUCT__entry( | ||
752 | __field( dev_t, dev ) | ||
753 | __field( ino_t, ino ) | ||
754 | __field( __u16, found ) | ||
755 | __field( __u16, groups ) | ||
756 | __field( __u16, buddy ) | ||
757 | __field( __u16, flags ) | ||
758 | __field( __u16, tail ) | ||
759 | __field( __u8, cr ) | ||
760 | __field( __u32, orig_logical ) | ||
761 | __field( int, orig_start ) | ||
762 | __field( __u32, orig_group ) | ||
763 | __field( int, orig_len ) | ||
764 | __field( __u32, goal_logical ) | ||
765 | __field( int, goal_start ) | ||
766 | __field( __u32, goal_group ) | ||
767 | __field( int, goal_len ) | ||
768 | __field( __u32, result_logical ) | ||
769 | __field( int, result_start ) | ||
770 | __field( __u32, result_group ) | ||
771 | __field( int, result_len ) | ||
772 | ), | ||
773 | |||
774 | TP_fast_assign( | ||
775 | __entry->dev = ac->ac_inode->i_sb->s_dev; | ||
776 | __entry->ino = ac->ac_inode->i_ino; | ||
777 | __entry->found = ac->ac_found; | ||
778 | __entry->flags = ac->ac_flags; | ||
779 | __entry->groups = ac->ac_groups_scanned; | ||
780 | __entry->buddy = ac->ac_buddy; | ||
781 | __entry->tail = ac->ac_tail; | ||
782 | __entry->cr = ac->ac_criteria; | ||
783 | __entry->orig_logical = ac->ac_o_ex.fe_logical; | ||
784 | __entry->orig_start = ac->ac_o_ex.fe_start; | ||
785 | __entry->orig_group = ac->ac_o_ex.fe_group; | ||
786 | __entry->orig_len = ac->ac_o_ex.fe_len; | ||
787 | __entry->goal_logical = ac->ac_g_ex.fe_logical; | ||
788 | __entry->goal_start = ac->ac_g_ex.fe_start; | ||
789 | __entry->goal_group = ac->ac_g_ex.fe_group; | ||
790 | __entry->goal_len = ac->ac_g_ex.fe_len; | ||
791 | __entry->result_logical = ac->ac_f_ex.fe_logical; | ||
792 | __entry->result_start = ac->ac_f_ex.fe_start; | ||
793 | __entry->result_group = ac->ac_f_ex.fe_group; | ||
794 | __entry->result_len = ac->ac_f_ex.fe_len; | ||
795 | ), | ||
796 | |||
797 | TP_printk("dev %s inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u " | ||
798 | "result %u/%d/%u@%u blks %u grps %u cr %u flags 0x%04x " | ||
799 | "tail %u broken %u", | ||
800 | jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, | ||
801 | __entry->orig_group, __entry->orig_start, | ||
802 | __entry->orig_len, __entry->orig_logical, | ||
803 | __entry->goal_group, __entry->goal_start, | ||
804 | __entry->goal_len, __entry->goal_logical, | ||
805 | __entry->result_group, __entry->result_start, | ||
806 | __entry->result_len, __entry->result_logical, | ||
807 | __entry->found, __entry->groups, __entry->cr, | ||
808 | __entry->flags, __entry->tail, | ||
809 | __entry->buddy ? 1 << __entry->buddy : 0) | ||
810 | ); | ||
811 | |||
812 | TRACE_EVENT(ext4_mballoc_prealloc, | ||
813 | TP_PROTO(struct ext4_allocation_context *ac), | ||
814 | |||
815 | TP_ARGS(ac), | ||
816 | |||
817 | TP_STRUCT__entry( | ||
818 | __field( dev_t, dev ) | ||
819 | __field( ino_t, ino ) | ||
820 | __field( __u32, orig_logical ) | ||
821 | __field( int, orig_start ) | ||
822 | __field( __u32, orig_group ) | ||
823 | __field( int, orig_len ) | ||
824 | __field( __u32, result_logical ) | ||
825 | __field( int, result_start ) | ||
826 | __field( __u32, result_group ) | ||
827 | __field( int, result_len ) | ||
828 | ), | ||
829 | |||
830 | TP_fast_assign( | ||
831 | __entry->dev = ac->ac_inode->i_sb->s_dev; | ||
832 | __entry->ino = ac->ac_inode->i_ino; | ||
833 | __entry->orig_logical = ac->ac_o_ex.fe_logical; | ||
834 | __entry->orig_start = ac->ac_o_ex.fe_start; | ||
835 | __entry->orig_group = ac->ac_o_ex.fe_group; | ||
836 | __entry->orig_len = ac->ac_o_ex.fe_len; | ||
837 | __entry->result_logical = ac->ac_b_ex.fe_logical; | ||
838 | __entry->result_start = ac->ac_b_ex.fe_start; | ||
839 | __entry->result_group = ac->ac_b_ex.fe_group; | ||
840 | __entry->result_len = ac->ac_b_ex.fe_len; | ||
841 | ), | ||
842 | |||
843 | TP_printk("dev %s inode %lu orig %u/%d/%u@%u result %u/%d/%u@%u", | ||
844 | jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, | ||
845 | __entry->orig_group, __entry->orig_start, | ||
846 | __entry->orig_len, __entry->orig_logical, | ||
847 | __entry->result_group, __entry->result_start, | ||
848 | __entry->result_len, __entry->result_logical) | ||
849 | ); | ||
850 | |||
851 | TRACE_EVENT(ext4_mballoc_discard, | ||
852 | TP_PROTO(struct ext4_allocation_context *ac), | ||
853 | |||
854 | TP_ARGS(ac), | ||
855 | |||
856 | TP_STRUCT__entry( | ||
857 | __field( dev_t, dev ) | ||
858 | __field( ino_t, ino ) | ||
859 | __field( __u32, result_logical ) | ||
860 | __field( int, result_start ) | ||
861 | __field( __u32, result_group ) | ||
862 | __field( int, result_len ) | ||
863 | ), | ||
864 | |||
865 | TP_fast_assign( | ||
866 | __entry->dev = ac->ac_inode->i_sb->s_dev; | ||
867 | __entry->ino = ac->ac_inode->i_ino; | ||
868 | __entry->result_logical = ac->ac_b_ex.fe_logical; | ||
869 | __entry->result_start = ac->ac_b_ex.fe_start; | ||
870 | __entry->result_group = ac->ac_b_ex.fe_group; | ||
871 | __entry->result_len = ac->ac_b_ex.fe_len; | ||
872 | ), | ||
873 | |||
874 | TP_printk("dev %s inode %lu extent %u/%d/%u@%u ", | ||
875 | jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, | ||
876 | __entry->result_group, __entry->result_start, | ||
877 | __entry->result_len, __entry->result_logical) | ||
878 | ); | ||
879 | |||
880 | TRACE_EVENT(ext4_mballoc_free, | ||
881 | TP_PROTO(struct ext4_allocation_context *ac), | ||
882 | |||
883 | TP_ARGS(ac), | ||
884 | |||
885 | TP_STRUCT__entry( | ||
886 | __field( dev_t, dev ) | ||
887 | __field( ino_t, ino ) | ||
888 | __field( __u32, result_logical ) | ||
889 | __field( int, result_start ) | ||
890 | __field( __u32, result_group ) | ||
891 | __field( int, result_len ) | ||
892 | ), | ||
893 | |||
894 | TP_fast_assign( | ||
895 | __entry->dev = ac->ac_inode->i_sb->s_dev; | ||
896 | __entry->ino = ac->ac_inode->i_ino; | ||
897 | __entry->result_logical = ac->ac_b_ex.fe_logical; | ||
898 | __entry->result_start = ac->ac_b_ex.fe_start; | ||
899 | __entry->result_group = ac->ac_b_ex.fe_group; | ||
900 | __entry->result_len = ac->ac_b_ex.fe_len; | ||
901 | ), | ||
902 | |||
903 | TP_printk("dev %s inode %lu extent %u/%d/%u@%u ", | ||
904 | jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino, | ||
905 | __entry->result_group, __entry->result_start, | ||
906 | __entry->result_len, __entry->result_logical) | ||
907 | ); | ||
908 | |||
746 | #endif /* _TRACE_EXT4_H */ | 909 | #endif /* _TRACE_EXT4_H */ |
747 | 910 | ||
748 | /* This part must be outside protection */ | 911 | /* This part must be outside protection */ |