aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorDmitry Monakhov <dmonakhov@openvz.org>2014-12-02 18:08:53 -0500
committerTheodore Ts'o <tytso@mit.edu>2014-12-02 18:08:53 -0500
commit14516bb7bb6ffbd49f35389f9ece3b2045ba5815 (patch)
tree23cbe197595d9dc8534dd1dbab7e1237fbb74226 /fs/ext4
parentd952d69e268f833c85c0bafee9f67f9dba85044b (diff)
ext4: fix suboptimal seek_{data,hole} extents traversial
It is ridiculous practice to scan inode block by block, this technique applicable only for old indirect files. This takes significant amount of time for really large files. Let's reuse ext4_fiemap which already traverse inode-tree in most optimal meaner. TESTCASE: ftruncate64(fd, 0); ftruncate64(fd, 1ULL << 40); /* lseek will spin very long time */ lseek64(fd, 0, SEEK_DATA); lseek64(fd, 0, SEEK_HOLE); Original report: https://lkml.org/lkml/2014/10/16/620 Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/extents.c4
-rw-r--r--fs/ext4/file.c220
2 files changed, 108 insertions, 116 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index bed43081720f..e5d3eadf47b1 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5166,8 +5166,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
5166 5166
5167 /* fallback to generic here if not in extents fmt */ 5167 /* fallback to generic here if not in extents fmt */
5168 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 5168 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
5169 return generic_block_fiemap(inode, fieinfo, start, len, 5169 return __generic_block_fiemap(inode, fieinfo, start, len,
5170 ext4_get_block); 5170 ext4_get_block);
5171 5171
5172 if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS)) 5172 if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS))
5173 return -EBADR; 5173 return -EBADR;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 8131be8c0af3..513c12cf444c 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -273,24 +273,19 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
273 * we determine this extent as a data or a hole according to whether the 273 * we determine this extent as a data or a hole according to whether the
274 * page cache has data or not. 274 * page cache has data or not.
275 */ 275 */
276static int ext4_find_unwritten_pgoff(struct inode *inode, 276static int ext4_find_unwritten_pgoff(struct inode *inode, int whence,
277 int whence, 277 loff_t endoff, loff_t *offset)
278 struct ext4_map_blocks *map,
279 loff_t *offset)
280{ 278{
281 struct pagevec pvec; 279 struct pagevec pvec;
282 unsigned int blkbits;
283 pgoff_t index; 280 pgoff_t index;
284 pgoff_t end; 281 pgoff_t end;
285 loff_t endoff;
286 loff_t startoff; 282 loff_t startoff;
287 loff_t lastoff; 283 loff_t lastoff;
288 int found = 0; 284 int found = 0;
289 285
290 blkbits = inode->i_sb->s_blocksize_bits;
291 startoff = *offset; 286 startoff = *offset;
292 lastoff = startoff; 287 lastoff = startoff;
293 endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits; 288
294 289
295 index = startoff >> PAGE_CACHE_SHIFT; 290 index = startoff >> PAGE_CACHE_SHIFT;
296 end = endoff >> PAGE_CACHE_SHIFT; 291 end = endoff >> PAGE_CACHE_SHIFT;
@@ -408,147 +403,144 @@ out:
408static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) 403static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
409{ 404{
410 struct inode *inode = file->f_mapping->host; 405 struct inode *inode = file->f_mapping->host;
411 struct ext4_map_blocks map; 406 struct fiemap_extent_info fie;
412 struct extent_status es; 407 struct fiemap_extent ext[2];
413 ext4_lblk_t start, last, end; 408 loff_t next;
414 loff_t dataoff, isize; 409 int i, ret = 0;
415 int blkbits;
416 int ret = 0;
417 410
418 mutex_lock(&inode->i_mutex); 411 mutex_lock(&inode->i_mutex);
419 412 if (offset >= inode->i_size) {
420 isize = i_size_read(inode);
421 if (offset >= isize) {
422 mutex_unlock(&inode->i_mutex); 413 mutex_unlock(&inode->i_mutex);
423 return -ENXIO; 414 return -ENXIO;
424 } 415 }
425 416 fie.fi_flags = 0;
426 blkbits = inode->i_sb->s_blocksize_bits; 417 fie.fi_extents_max = 2;
427 start = offset >> blkbits; 418 fie.fi_extents_start = (struct fiemap_extent __user *) &ext;
428 last = start; 419 while (1) {
429 end = isize >> blkbits; 420 mm_segment_t old_fs = get_fs();
430 dataoff = offset; 421
431 422 fie.fi_extents_mapped = 0;
432 do { 423 memset(ext, 0, sizeof(*ext) * fie.fi_extents_max);
433 map.m_lblk = last; 424
434 map.m_len = end - last + 1; 425 set_fs(get_ds());
435 ret = ext4_map_blocks(NULL, inode, &map, 0); 426 ret = ext4_fiemap(inode, &fie, offset, maxsize - offset);
436 if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { 427 set_fs(old_fs);
437 if (last != start) 428 if (ret)
438 dataoff = (loff_t)last << blkbits;
439 break; 429 break;
440 }
441 430
442 /* 431 /* No extents found, EOF */
443 * If there is a delay extent at this offset, 432 if (!fie.fi_extents_mapped) {
444 * it will be as a data. 433 ret = -ENXIO;
445 */
446 ext4_es_find_delayed_extent_range(inode, last, last, &es);
447 if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
448 if (last != start)
449 dataoff = (loff_t)last << blkbits;
450 break; 434 break;
451 } 435 }
436 for (i = 0; i < fie.fi_extents_mapped; i++) {
437 next = (loff_t)(ext[i].fe_length + ext[i].fe_logical);
452 438
453 /* 439 if (offset < (loff_t)ext[i].fe_logical)
454 * If there is a unwritten extent at this offset, 440 offset = (loff_t)ext[i].fe_logical;
455 * it will be as a data or a hole according to page 441 /*
456 * cache that has data or not. 442 * If extent is not unwritten, then it contains valid
457 */ 443 * data, mapped or delayed.
458 if (map.m_flags & EXT4_MAP_UNWRITTEN) { 444 */
459 int unwritten; 445 if (!(ext[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN))
460 unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA, 446 goto out;
461 &map, &dataoff);
462 if (unwritten)
463 break;
464 }
465 447
466 last++; 448 /*
467 dataoff = (loff_t)last << blkbits; 449 * If there is a unwritten extent at this offset,
468 } while (last <= end); 450 * it will be as a data or a hole according to page
451 * cache that has data or not.
452 */
453 if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
454 next, &offset))
455 goto out;
469 456
457 if (ext[i].fe_flags & FIEMAP_EXTENT_LAST) {
458 ret = -ENXIO;
459 goto out;
460 }
461 offset = next;
462 }
463 }
464 if (offset > inode->i_size)
465 offset = inode->i_size;
466out:
470 mutex_unlock(&inode->i_mutex); 467 mutex_unlock(&inode->i_mutex);
468 if (ret)
469 return ret;
471 470
472 if (dataoff > isize) 471 return vfs_setpos(file, offset, maxsize);
473 return -ENXIO;
474
475 return vfs_setpos(file, dataoff, maxsize);
476} 472}
477 473
478/* 474/*
479 * ext4_seek_hole() retrieves the offset for SEEK_HOLE. 475 * ext4_seek_hole() retrieves the offset for SEEK_HOLE
480 */ 476 */
481static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) 477static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
482{ 478{
483 struct inode *inode = file->f_mapping->host; 479 struct inode *inode = file->f_mapping->host;
484 struct ext4_map_blocks map; 480 struct fiemap_extent_info fie;
485 struct extent_status es; 481 struct fiemap_extent ext[2];
486 ext4_lblk_t start, last, end; 482 loff_t next;
487 loff_t holeoff, isize; 483 int i, ret = 0;
488 int blkbits;
489 int ret = 0;
490 484
491 mutex_lock(&inode->i_mutex); 485 mutex_lock(&inode->i_mutex);
492 486 if (offset >= inode->i_size) {
493 isize = i_size_read(inode);
494 if (offset >= isize) {
495 mutex_unlock(&inode->i_mutex); 487 mutex_unlock(&inode->i_mutex);
496 return -ENXIO; 488 return -ENXIO;
497 } 489 }
498 490
499 blkbits = inode->i_sb->s_blocksize_bits; 491 fie.fi_flags = 0;
500 start = offset >> blkbits; 492 fie.fi_extents_max = 2;
501 last = start; 493 fie.fi_extents_start = (struct fiemap_extent __user *)&ext;
502 end = isize >> blkbits; 494 while (1) {
503 holeoff = offset; 495 mm_segment_t old_fs = get_fs();
504 496
505 do { 497 fie.fi_extents_mapped = 0;
506 map.m_lblk = last; 498 memset(ext, 0, sizeof(*ext));
507 map.m_len = end - last + 1;
508 ret = ext4_map_blocks(NULL, inode, &map, 0);
509 if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
510 last += ret;
511 holeoff = (loff_t)last << blkbits;
512 continue;
513 }
514 499
515 /* 500 set_fs(get_ds());
516 * If there is a delay extent at this offset, 501 ret = ext4_fiemap(inode, &fie, offset, maxsize - offset);
517 * we will skip this extent. 502 set_fs(old_fs);
518 */ 503 if (ret)
519 ext4_es_find_delayed_extent_range(inode, last, last, &es); 504 break;
520 if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
521 last = es.es_lblk + es.es_len;
522 holeoff = (loff_t)last << blkbits;
523 continue;
524 }
525 505
526 /* 506 /* No extents found */
527 * If there is a unwritten extent at this offset, 507 if (!fie.fi_extents_mapped)
528 * it will be as a data or a hole according to page 508 break;
529 * cache that has data or not. 509
530 */ 510 for (i = 0; i < fie.fi_extents_mapped; i++) {
531 if (map.m_flags & EXT4_MAP_UNWRITTEN) { 511 next = (loff_t)(ext[i].fe_logical + ext[i].fe_length);
532 int unwritten; 512 /*
533 unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE, 513 * If extent is not unwritten, then it contains valid
534 &map, &holeoff); 514 * data, mapped or delayed.
535 if (!unwritten) { 515 */
536 last += ret; 516 if (!(ext[i].fe_flags & FIEMAP_EXTENT_UNWRITTEN)) {
537 holeoff = (loff_t)last << blkbits; 517 if (offset < (loff_t)ext[i].fe_logical)
518 goto out;
519 offset = next;
538 continue; 520 continue;
539 } 521 }
540 } 522 /*
541 523 * If there is a unwritten extent at this offset,
542 /* find a hole */ 524 * it will be as a data or a hole according to page
543 break; 525 * cache that has data or not.
544 } while (last <= end); 526 */
527 if (ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
528 next, &offset))
529 goto out;
545 530
531 offset = next;
532 if (ext[i].fe_flags & FIEMAP_EXTENT_LAST)
533 goto out;
534 }
535 }
536 if (offset > inode->i_size)
537 offset = inode->i_size;
538out:
546 mutex_unlock(&inode->i_mutex); 539 mutex_unlock(&inode->i_mutex);
540 if (ret)
541 return ret;
547 542
548 if (holeoff > isize) 543 return vfs_setpos(file, offset, maxsize);
549 holeoff = isize;
550
551 return vfs_setpos(file, holeoff, maxsize);
552} 544}
553 545
554/* 546/*