diff options
author | Yan, Zheng <zheng.z.yan@intel.com> | 2013-05-03 02:15:52 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2013-05-03 02:15:52 -0400 |
commit | e30b5dca15dea86aa697f9d58ff646294fe80d3d (patch) | |
tree | 549d1ff3f8e5ecb9675a2937113aa99e64ef3ea1 /fs/ext4/extents_status.c | |
parent | 0d606e2c9fccdd4e67febf1e2da500e1bfe9e045 (diff) |
ext4: fix fio regression
We (Linux Kernel Performance project) found a regression introduced
by commit:
f7fec032aa ext4: track all extent status in extent status tree
The commit causes about 20% performance decrease in fio random write
test. Profiler shows that rb_next() uses a lot of CPU time. The call
stack is:
rb_next
ext4_es_find_delayed_extent
ext4_map_blocks
_ext4_get_block
ext4_get_block_write
__blockdev_direct_IO
ext4_direct_IO
generic_file_direct_write
__generic_file_aio_write
ext4_file_write
aio_rw_vect_retry
aio_run_iocb
do_io_submit
sys_io_submit
system_call_fastpath
io_submit
td_io_getevents
io_u_queued_complete
thread_main
main
__libc_start_main
The cause is that ext4_es_find_delayed_extent() doesn't have an
upper bound, it keeps searching until a delayed extent is found.
When there are a lots of non-delayed entries in the extent state
tree, ext4_es_find_delayed_extent() may uses a lot of CPU time.
Reported-by: LKP project <lkp@linux.intel.com>
Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/extents_status.c')
-rw-r--r-- | fs/ext4/extents_status.c | 17 |
1 files changed, 12 insertions, 5 deletions
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index fe3337a85ede..e6941e622d31 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c | |||
@@ -232,14 +232,16 @@ static struct extent_status *__es_tree_search(struct rb_root *root, | |||
232 | } | 232 | } |
233 | 233 | ||
234 | /* | 234 | /* |
235 | * ext4_es_find_delayed_extent: find the 1st delayed extent covering @es->lblk | 235 | * ext4_es_find_delayed_extent_range: find the 1st delayed extent covering |
236 | * if it exists, otherwise, the next extent after @es->lblk. | 236 | * @es->lblk if it exists, otherwise, the next extent after @es->lblk. |
237 | * | 237 | * |
238 | * @inode: the inode which owns delayed extents | 238 | * @inode: the inode which owns delayed extents |
239 | * @lblk: the offset where we start to search | 239 | * @lblk: the offset where we start to search |
240 | * @end: the offset where we stop to search | ||
240 | * @es: delayed extent that we found | 241 | * @es: delayed extent that we found |
241 | */ | 242 | */ |
242 | void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk, | 243 | void ext4_es_find_delayed_extent_range(struct inode *inode, |
244 | ext4_lblk_t lblk, ext4_lblk_t end, | ||
243 | struct extent_status *es) | 245 | struct extent_status *es) |
244 | { | 246 | { |
245 | struct ext4_es_tree *tree = NULL; | 247 | struct ext4_es_tree *tree = NULL; |
@@ -247,7 +249,8 @@ void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk, | |||
247 | struct rb_node *node; | 249 | struct rb_node *node; |
248 | 250 | ||
249 | BUG_ON(es == NULL); | 251 | BUG_ON(es == NULL); |
250 | trace_ext4_es_find_delayed_extent_enter(inode, lblk); | 252 | BUG_ON(end < lblk); |
253 | trace_ext4_es_find_delayed_extent_range_enter(inode, lblk); | ||
251 | 254 | ||
252 | read_lock(&EXT4_I(inode)->i_es_lock); | 255 | read_lock(&EXT4_I(inode)->i_es_lock); |
253 | tree = &EXT4_I(inode)->i_es_tree; | 256 | tree = &EXT4_I(inode)->i_es_tree; |
@@ -270,6 +273,10 @@ out: | |||
270 | if (es1 && !ext4_es_is_delayed(es1)) { | 273 | if (es1 && !ext4_es_is_delayed(es1)) { |
271 | while ((node = rb_next(&es1->rb_node)) != NULL) { | 274 | while ((node = rb_next(&es1->rb_node)) != NULL) { |
272 | es1 = rb_entry(node, struct extent_status, rb_node); | 275 | es1 = rb_entry(node, struct extent_status, rb_node); |
276 | if (es1->es_lblk > end) { | ||
277 | es1 = NULL; | ||
278 | break; | ||
279 | } | ||
273 | if (ext4_es_is_delayed(es1)) | 280 | if (ext4_es_is_delayed(es1)) |
274 | break; | 281 | break; |
275 | } | 282 | } |
@@ -285,7 +292,7 @@ out: | |||
285 | read_unlock(&EXT4_I(inode)->i_es_lock); | 292 | read_unlock(&EXT4_I(inode)->i_es_lock); |
286 | 293 | ||
287 | ext4_es_lru_add(inode); | 294 | ext4_es_lru_add(inode); |
288 | trace_ext4_es_find_delayed_extent_exit(inode, es); | 295 | trace_ext4_es_find_delayed_extent_range_exit(inode, es); |
289 | } | 296 | } |
290 | 297 | ||
291 | static struct extent_status * | 298 | static struct extent_status * |