aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>2008-10-16 10:10:36 -0400
committerTheodore Ts'o <tytso@mit.edu>2008-10-16 10:10:36 -0400
commit22208dedbd7626e5fc4339c417f8d24cc21f79d7 (patch)
treed03fc8524dc7cb2f18c2038ffe4d2146903f630e /fs
parent17bc6c30cf6bfffd816bdc53682dd46fc34a2cf4 (diff)
ext4: Fix file fragmentation during large file write.
The range_cyclic writeback mode uses the address_space writeback_index as the start index for writeback. With delayed allocation we were updating writeback_index wrongly resulting in highly fragmented file. This patch reduces the number of extents reduced from 4000 to 27 for a 3GB file. Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/inode.c91
1 files changed, 57 insertions, 34 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4ee3f0692eeb..27fc6b951221 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1648,6 +1648,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
1648 int ret = 0, err, nr_pages, i; 1648 int ret = 0, err, nr_pages, i;
1649 unsigned long index, end; 1649 unsigned long index, end;
1650 struct pagevec pvec; 1650 struct pagevec pvec;
1651 long pages_skipped;
1651 1652
1652 BUG_ON(mpd->next_page <= mpd->first_page); 1653 BUG_ON(mpd->next_page <= mpd->first_page);
1653 pagevec_init(&pvec, 0); 1654 pagevec_init(&pvec, 0);
@@ -1655,7 +1656,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
1655 end = mpd->next_page - 1; 1656 end = mpd->next_page - 1;
1656 1657
1657 while (index <= end) { 1658 while (index <= end) {
1658 /* XXX: optimize tail */
1659 /* 1659 /*
1660 * We can use PAGECACHE_TAG_DIRTY lookup here because 1660 * We can use PAGECACHE_TAG_DIRTY lookup here because
1661 * even though we have cleared the dirty flag on the page 1661 * even though we have cleared the dirty flag on the page
@@ -1673,8 +1673,13 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
1673 for (i = 0; i < nr_pages; i++) { 1673 for (i = 0; i < nr_pages; i++) {
1674 struct page *page = pvec.pages[i]; 1674 struct page *page = pvec.pages[i];
1675 1675
1676 pages_skipped = mpd->wbc->pages_skipped;
1676 err = mapping->a_ops->writepage(page, mpd->wbc); 1677 err = mapping->a_ops->writepage(page, mpd->wbc);
1677 if (!err) 1678 if (!err && (pages_skipped == mpd->wbc->pages_skipped))
1679 /*
1680 * have successfully written the page
1681 * without skipping the same
1682 */
1678 mpd->pages_written++; 1683 mpd->pages_written++;
1679 /* 1684 /*
1680 * In error case, we have to continue because 1685 * In error case, we have to continue because
@@ -2110,7 +2115,6 @@ static int mpage_da_writepages(struct address_space *mapping,
2110 struct writeback_control *wbc, 2115 struct writeback_control *wbc,
2111 struct mpage_da_data *mpd) 2116 struct mpage_da_data *mpd)
2112{ 2117{
2113 long to_write;
2114 int ret; 2118 int ret;
2115 2119
2116 if (!mpd->get_block) 2120 if (!mpd->get_block)
@@ -2125,19 +2129,18 @@ static int mpage_da_writepages(struct address_space *mapping,
2125 mpd->pages_written = 0; 2129 mpd->pages_written = 0;
2126 mpd->retval = 0; 2130 mpd->retval = 0;
2127 2131
2128 to_write = wbc->nr_to_write;
2129
2130 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd); 2132 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
2131
2132 /* 2133 /*
2133 * Handle last extent of pages 2134 * Handle last extent of pages
2134 */ 2135 */
2135 if (!mpd->io_done && mpd->next_page != mpd->first_page) { 2136 if (!mpd->io_done && mpd->next_page != mpd->first_page) {
2136 if (mpage_da_map_blocks(mpd) == 0) 2137 if (mpage_da_map_blocks(mpd) == 0)
2137 mpage_da_submit_io(mpd); 2138 mpage_da_submit_io(mpd);
2138 }
2139 2139
2140 wbc->nr_to_write = to_write - mpd->pages_written; 2140 mpd->io_done = 1;
2141 ret = MPAGE_DA_EXTENT_TAIL;
2142 }
2143 wbc->nr_to_write -= mpd->pages_written;
2141 return ret; 2144 return ret;
2142} 2145}
2143 2146
@@ -2366,11 +2369,14 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
2366static int ext4_da_writepages(struct address_space *mapping, 2369static int ext4_da_writepages(struct address_space *mapping,
2367 struct writeback_control *wbc) 2370 struct writeback_control *wbc)
2368{ 2371{
2372 pgoff_t index;
2373 int range_whole = 0;
2369 handle_t *handle = NULL; 2374 handle_t *handle = NULL;
2370 struct mpage_da_data mpd; 2375 struct mpage_da_data mpd;
2371 struct inode *inode = mapping->host; 2376 struct inode *inode = mapping->host;
2377 int no_nrwrite_index_update;
2378 long pages_written = 0, pages_skipped;
2372 int needed_blocks, ret = 0, nr_to_writebump = 0; 2379 int needed_blocks, ret = 0, nr_to_writebump = 0;
2373 long to_write, pages_skipped = 0;
2374 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 2380 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2375 2381
2376 /* 2382 /*
@@ -2390,16 +2396,26 @@ static int ext4_da_writepages(struct address_space *mapping,
2390 nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write; 2396 nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
2391 wbc->nr_to_write = sbi->s_mb_stream_request; 2397 wbc->nr_to_write = sbi->s_mb_stream_request;
2392 } 2398 }
2399 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2400 range_whole = 1;
2393 2401
2394 2402 if (wbc->range_cyclic)
2395 pages_skipped = wbc->pages_skipped; 2403 index = mapping->writeback_index;
2404 else
2405 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2396 2406
2397 mpd.wbc = wbc; 2407 mpd.wbc = wbc;
2398 mpd.inode = mapping->host; 2408 mpd.inode = mapping->host;
2399 2409
2400restart_loop: 2410 /*
2401 to_write = wbc->nr_to_write; 2411 * we don't want write_cache_pages to update
2402 while (!ret && to_write > 0) { 2412 * nr_to_write and writeback_index
2413 */
2414 no_nrwrite_index_update = wbc->no_nrwrite_index_update;
2415 wbc->no_nrwrite_index_update = 1;
2416 pages_skipped = wbc->pages_skipped;
2417
2418 while (!ret && wbc->nr_to_write > 0) {
2403 2419
2404 /* 2420 /*
2405 * we insert one extent at a time. So we need 2421 * we insert one extent at a time. So we need
@@ -2420,46 +2436,53 @@ restart_loop:
2420 dump_stack(); 2436 dump_stack();
2421 goto out_writepages; 2437 goto out_writepages;
2422 } 2438 }
2423 to_write -= wbc->nr_to_write;
2424
2425 mpd.get_block = ext4_da_get_block_write; 2439 mpd.get_block = ext4_da_get_block_write;
2426 ret = mpage_da_writepages(mapping, wbc, &mpd); 2440 ret = mpage_da_writepages(mapping, wbc, &mpd);
2427 2441
2428 ext4_journal_stop(handle); 2442 ext4_journal_stop(handle);
2429 2443
2430 if (mpd.retval == -ENOSPC) 2444 if (mpd.retval == -ENOSPC) {
2445 /* commit the transaction which would
2446 * free blocks released in the transaction
2447 * and try again
2448 */
2431 jbd2_journal_force_commit_nested(sbi->s_journal); 2449 jbd2_journal_force_commit_nested(sbi->s_journal);
2432 2450 wbc->pages_skipped = pages_skipped;
2433 /* reset the retry count */ 2451 ret = 0;
2434 if (ret == MPAGE_DA_EXTENT_TAIL) { 2452 } else if (ret == MPAGE_DA_EXTENT_TAIL) {
2435 /* 2453 /*
2436 * got one extent now try with 2454 * got one extent now try with
2437 * rest of the pages 2455 * rest of the pages
2438 */ 2456 */
2439 to_write += wbc->nr_to_write; 2457 pages_written += mpd.pages_written;
2458 wbc->pages_skipped = pages_skipped;
2440 ret = 0; 2459 ret = 0;
2441 } else if (wbc->nr_to_write) { 2460 } else if (wbc->nr_to_write)
2442 /* 2461 /*
2443 * There is no more writeout needed 2462 * There is no more writeout needed
2444 * or we requested for a noblocking writeout 2463 * or we requested for a noblocking writeout
2445 * and we found the device congested 2464 * and we found the device congested
2446 */ 2465 */
2447 to_write += wbc->nr_to_write;
2448 break; 2466 break;
2449 }
2450 wbc->nr_to_write = to_write;
2451 }
2452
2453 if (!wbc->range_cyclic && (pages_skipped != wbc->pages_skipped)) {
2454 /* We skipped pages in this loop */
2455 wbc->nr_to_write = to_write +
2456 wbc->pages_skipped - pages_skipped;
2457 wbc->pages_skipped = pages_skipped;
2458 goto restart_loop;
2459 } 2467 }
2468 if (pages_skipped != wbc->pages_skipped)
2469 printk(KERN_EMERG "This should not happen leaving %s "
2470 "with nr_to_write = %ld ret = %d\n",
2471 __func__, wbc->nr_to_write, ret);
2472
2473 /* Update index */
2474 index += pages_written;
2475 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2476 /*
2477 * set the writeback_index so that range_cyclic
2478 * mode will write it back later
2479 */
2480 mapping->writeback_index = index;
2460 2481
2461out_writepages: 2482out_writepages:
2462 wbc->nr_to_write = to_write - nr_to_writebump; 2483 if (!no_nrwrite_index_update)
2484 wbc->no_nrwrite_index_update = 0;
2485 wbc->nr_to_write -= nr_to_writebump;
2463 return ret; 2486 return ret;
2464} 2487}
2465 2488