aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorYan, Zheng <zheng.z.yan@intel.com>2013-05-03 02:15:52 -0400
committerTheodore Ts'o <tytso@mit.edu>2013-05-03 02:15:52 -0400
commite30b5dca15dea86aa697f9d58ff646294fe80d3d (patch)
tree549d1ff3f8e5ecb9675a2937113aa99e64ef3ea1 /fs
parent0d606e2c9fccdd4e67febf1e2da500e1bfe9e045 (diff)
ext4: fix fio regression
We (Linux Kernel Performance project) found a regression introduced by commit: f7fec032aa ext4: track all extent status in extent status tree The commit causes about 20% performance decrease in fio random write test. Profiler shows that rb_next() uses a lot of CPU time. The call stack is: rb_next ext4_es_find_delayed_extent ext4_map_blocks _ext4_get_block ext4_get_block_write __blockdev_direct_IO ext4_direct_IO generic_file_direct_write __generic_file_aio_write ext4_file_write aio_rw_vect_retry aio_run_iocb do_io_submit sys_io_submit system_call_fastpath io_submit td_io_getevents io_u_queued_complete thread_main main __libc_start_main The cause is that ext4_es_find_delayed_extent() doesn't have an upper bound, it keeps searching until a delayed extent is found. When there are a lots of non-delayed entries in the extent state tree, ext4_es_find_delayed_extent() may uses a lot of CPU time. Reported-by: LKP project <lkp@linux.intel.com> Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com> Signed-off-by: Zheng Liu <wenqing.lz@taobao.com> Cc: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/extents.c9
-rw-r--r--fs/ext4/extents_status.c17
-rw-r--r--fs/ext4/extents_status.h3
-rw-r--r--fs/ext4/file.c4
4 files changed, 21 insertions, 12 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 107936db244e..bc0f1910b9cf 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3642,7 +3642,7 @@ int ext4_find_delalloc_range(struct inode *inode,
3642{ 3642{
3643 struct extent_status es; 3643 struct extent_status es;
3644 3644
3645 ext4_es_find_delayed_extent(inode, lblk_start, &es); 3645 ext4_es_find_delayed_extent_range(inode, lblk_start, lblk_end, &es);
3646 if (es.es_len == 0) 3646 if (es.es_len == 0)
3647 return 0; /* there is no delay extent in this tree */ 3647 return 0; /* there is no delay extent in this tree */
3648 else if (es.es_lblk <= lblk_start && 3648 else if (es.es_lblk <= lblk_start &&
@@ -4608,9 +4608,10 @@ static int ext4_find_delayed_extent(struct inode *inode,
4608 struct extent_status es; 4608 struct extent_status es;
4609 ext4_lblk_t block, next_del; 4609 ext4_lblk_t block, next_del;
4610 4610
4611 ext4_es_find_delayed_extent(inode, newes->es_lblk, &es);
4612
4613 if (newes->es_pblk == 0) { 4611 if (newes->es_pblk == 0) {
4612 ext4_es_find_delayed_extent_range(inode, newes->es_lblk,
4613 newes->es_lblk + newes->es_len - 1, &es);
4614
4614 /* 4615 /*
4615 * No extent in extent-tree contains block @newes->es_pblk, 4616 * No extent in extent-tree contains block @newes->es_pblk,
4616 * then the block may stay in 1)a hole or 2)delayed-extent. 4617 * then the block may stay in 1)a hole or 2)delayed-extent.
@@ -4630,7 +4631,7 @@ static int ext4_find_delayed_extent(struct inode *inode,
4630 } 4631 }
4631 4632
4632 block = newes->es_lblk + newes->es_len; 4633 block = newes->es_lblk + newes->es_len;
4633 ext4_es_find_delayed_extent(inode, block, &es); 4634 ext4_es_find_delayed_extent_range(inode, block, EXT_MAX_BLOCKS, &es);
4634 if (es.es_len == 0) 4635 if (es.es_len == 0)
4635 next_del = EXT_MAX_BLOCKS; 4636 next_del = EXT_MAX_BLOCKS;
4636 else 4637 else
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index fe3337a85ede..e6941e622d31 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -232,14 +232,16 @@ static struct extent_status *__es_tree_search(struct rb_root *root,
232} 232}
233 233
234/* 234/*
235 * ext4_es_find_delayed_extent: find the 1st delayed extent covering @es->lblk 235 * ext4_es_find_delayed_extent_range: find the 1st delayed extent covering
236 * if it exists, otherwise, the next extent after @es->lblk. 236 * @es->lblk if it exists, otherwise, the next extent after @es->lblk.
237 * 237 *
238 * @inode: the inode which owns delayed extents 238 * @inode: the inode which owns delayed extents
239 * @lblk: the offset where we start to search 239 * @lblk: the offset where we start to search
240 * @end: the offset where we stop to search
240 * @es: delayed extent that we found 241 * @es: delayed extent that we found
241 */ 242 */
242void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk, 243void ext4_es_find_delayed_extent_range(struct inode *inode,
244 ext4_lblk_t lblk, ext4_lblk_t end,
243 struct extent_status *es) 245 struct extent_status *es)
244{ 246{
245 struct ext4_es_tree *tree = NULL; 247 struct ext4_es_tree *tree = NULL;
@@ -247,7 +249,8 @@ void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
247 struct rb_node *node; 249 struct rb_node *node;
248 250
249 BUG_ON(es == NULL); 251 BUG_ON(es == NULL);
250 trace_ext4_es_find_delayed_extent_enter(inode, lblk); 252 BUG_ON(end < lblk);
253 trace_ext4_es_find_delayed_extent_range_enter(inode, lblk);
251 254
252 read_lock(&EXT4_I(inode)->i_es_lock); 255 read_lock(&EXT4_I(inode)->i_es_lock);
253 tree = &EXT4_I(inode)->i_es_tree; 256 tree = &EXT4_I(inode)->i_es_tree;
@@ -270,6 +273,10 @@ out:
270 if (es1 && !ext4_es_is_delayed(es1)) { 273 if (es1 && !ext4_es_is_delayed(es1)) {
271 while ((node = rb_next(&es1->rb_node)) != NULL) { 274 while ((node = rb_next(&es1->rb_node)) != NULL) {
272 es1 = rb_entry(node, struct extent_status, rb_node); 275 es1 = rb_entry(node, struct extent_status, rb_node);
276 if (es1->es_lblk > end) {
277 es1 = NULL;
278 break;
279 }
273 if (ext4_es_is_delayed(es1)) 280 if (ext4_es_is_delayed(es1))
274 break; 281 break;
275 } 282 }
@@ -285,7 +292,7 @@ out:
285 read_unlock(&EXT4_I(inode)->i_es_lock); 292 read_unlock(&EXT4_I(inode)->i_es_lock);
286 293
287 ext4_es_lru_add(inode); 294 ext4_es_lru_add(inode);
288 trace_ext4_es_find_delayed_extent_exit(inode, es); 295 trace_ext4_es_find_delayed_extent_range_exit(inode, es);
289} 296}
290 297
291static struct extent_status * 298static struct extent_status *
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index d8e2d4dc311e..f740eb03b707 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -62,7 +62,8 @@ extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
62 unsigned long long status); 62 unsigned long long status);
63extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, 63extern int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
64 ext4_lblk_t len); 64 ext4_lblk_t len);
65extern void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk, 65extern void ext4_es_find_delayed_extent_range(struct inode *inode,
66 ext4_lblk_t lblk, ext4_lblk_t end,
66 struct extent_status *es); 67 struct extent_status *es);
67extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, 68extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
68 struct extent_status *es); 69 struct extent_status *es);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 64848b595b24..1bc4523541d6 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -464,7 +464,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
464 * If there is a delay extent at this offset, 464 * If there is a delay extent at this offset,
465 * it will be as a data. 465 * it will be as a data.
466 */ 466 */
467 ext4_es_find_delayed_extent(inode, last, &es); 467 ext4_es_find_delayed_extent_range(inode, last, last, &es);
468 if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { 468 if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
469 if (last != start) 469 if (last != start)
470 dataoff = last << blkbits; 470 dataoff = last << blkbits;
@@ -547,7 +547,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
547 * If there is a delay extent at this offset, 547 * If there is a delay extent at this offset,
548 * we will skip this extent. 548 * we will skip this extent.
549 */ 549 */
550 ext4_es_find_delayed_extent(inode, last, &es); 550 ext4_es_find_delayed_extent_range(inode, last, last, &es);
551 if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { 551 if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
552 last = es.es_lblk + es.es_len; 552 last = es.es_lblk + es.es_len;
553 holeoff = last << blkbits; 553 holeoff = last << blkbits;