aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-10-17 18:08:11 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-17 18:08:11 -0400
commit58617d5e59663d2edea03bd03cb74279827611bb (patch)
tree1b472f0ab43ae08fef5dea30b95592a005385686 /fs/ext4/inode.c
parent26e9a397774a0e94efbb8a0bf4a952c28d808cab (diff)
parentf287a1a56130be5fdb96a4a62d1290bd064f308e (diff)
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: ext4: Remove automatic enabling of the HUGE_FILE feature flag ext4: Replace hackish ext4_mb_poll_new_transaction with commit callback ext4: Update Documentation/filesystems/ext4.txt ext4: Remove unused mount options: nomballoc, mballoc, nocheck ext4: Remove compile warnings when building w/o CONFIG_PROC_FS ext4: Add missing newlines to printk messages ext4: Fix file fragmentation during large file write. vfs: Add no_nrwrite_index_update writeback control flag vfs: Remove the range_cont writeback mode. ext4: Use tag dirty lookup during mpage_da_submit_io ext4: let the block device know when unused blocks can be discarded ext4: Don't reuse released data blocks until transaction commits ext4: Use an rbtree for tracking blocks freed during transaction. ext4: Do mballoc init before doing filesystem recovery ext4: Free ext4_prealloc_space using kmem_cache_free ext4: Fix Kconfig typo for ext4dev ext4: Remove an old reference to ext4dev in Makefile comment
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c143
1 files changed, 76 insertions, 67 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9b4ec9decfd1..8dbf6953845b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1648,6 +1648,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
1648 int ret = 0, err, nr_pages, i; 1648 int ret = 0, err, nr_pages, i;
1649 unsigned long index, end; 1649 unsigned long index, end;
1650 struct pagevec pvec; 1650 struct pagevec pvec;
1651 long pages_skipped;
1651 1652
1652 BUG_ON(mpd->next_page <= mpd->first_page); 1653 BUG_ON(mpd->next_page <= mpd->first_page);
1653 pagevec_init(&pvec, 0); 1654 pagevec_init(&pvec, 0);
@@ -1655,20 +1656,30 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
1655 end = mpd->next_page - 1; 1656 end = mpd->next_page - 1;
1656 1657
1657 while (index <= end) { 1658 while (index <= end) {
1658 /* XXX: optimize tail */ 1659 /*
1659 nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); 1660 * We can use PAGECACHE_TAG_DIRTY lookup here because
1661 * even though we have cleared the dirty flag on the page
1662 * We still keep the page in the radix tree with tag
1663 * PAGECACHE_TAG_DIRTY. See clear_page_dirty_for_io.
1664 * The PAGECACHE_TAG_DIRTY is cleared in set_page_writeback
1665 * which is called via the below writepage callback.
1666 */
1667 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
1668 PAGECACHE_TAG_DIRTY,
1669 min(end - index,
1670 (pgoff_t)PAGEVEC_SIZE-1) + 1);
1660 if (nr_pages == 0) 1671 if (nr_pages == 0)
1661 break; 1672 break;
1662 for (i = 0; i < nr_pages; i++) { 1673 for (i = 0; i < nr_pages; i++) {
1663 struct page *page = pvec.pages[i]; 1674 struct page *page = pvec.pages[i];
1664 1675
1665 index = page->index; 1676 pages_skipped = mpd->wbc->pages_skipped;
1666 if (index > end)
1667 break;
1668 index++;
1669
1670 err = mapping->a_ops->writepage(page, mpd->wbc); 1677 err = mapping->a_ops->writepage(page, mpd->wbc);
1671 if (!err) 1678 if (!err && (pages_skipped == mpd->wbc->pages_skipped))
1679 /*
1680 * have successfully written the page
1681 * without skipping the same
1682 */
1672 mpd->pages_written++; 1683 mpd->pages_written++;
1673 /* 1684 /*
1674 * In error case, we have to continue because 1685 * In error case, we have to continue because
@@ -2104,7 +2115,6 @@ static int mpage_da_writepages(struct address_space *mapping,
2104 struct writeback_control *wbc, 2115 struct writeback_control *wbc,
2105 struct mpage_da_data *mpd) 2116 struct mpage_da_data *mpd)
2106{ 2117{
2107 long to_write;
2108 int ret; 2118 int ret;
2109 2119
2110 if (!mpd->get_block) 2120 if (!mpd->get_block)
@@ -2119,19 +2129,18 @@ static int mpage_da_writepages(struct address_space *mapping,
2119 mpd->pages_written = 0; 2129 mpd->pages_written = 0;
2120 mpd->retval = 0; 2130 mpd->retval = 0;
2121 2131
2122 to_write = wbc->nr_to_write;
2123
2124 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd); 2132 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
2125
2126 /* 2133 /*
2127 * Handle last extent of pages 2134 * Handle last extent of pages
2128 */ 2135 */
2129 if (!mpd->io_done && mpd->next_page != mpd->first_page) { 2136 if (!mpd->io_done && mpd->next_page != mpd->first_page) {
2130 if (mpage_da_map_blocks(mpd) == 0) 2137 if (mpage_da_map_blocks(mpd) == 0)
2131 mpage_da_submit_io(mpd); 2138 mpage_da_submit_io(mpd);
2132 }
2133 2139
2134 wbc->nr_to_write = to_write - mpd->pages_written; 2140 mpd->io_done = 1;
2141 ret = MPAGE_DA_EXTENT_TAIL;
2142 }
2143 wbc->nr_to_write -= mpd->pages_written;
2135 return ret; 2144 return ret;
2136} 2145}
2137 2146
@@ -2360,12 +2369,14 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
2360static int ext4_da_writepages(struct address_space *mapping, 2369static int ext4_da_writepages(struct address_space *mapping,
2361 struct writeback_control *wbc) 2370 struct writeback_control *wbc)
2362{ 2371{
2372 pgoff_t index;
2373 int range_whole = 0;
2363 handle_t *handle = NULL; 2374 handle_t *handle = NULL;
2364 loff_t range_start = 0;
2365 struct mpage_da_data mpd; 2375 struct mpage_da_data mpd;
2366 struct inode *inode = mapping->host; 2376 struct inode *inode = mapping->host;
2377 int no_nrwrite_index_update;
2378 long pages_written = 0, pages_skipped;
2367 int needed_blocks, ret = 0, nr_to_writebump = 0; 2379 int needed_blocks, ret = 0, nr_to_writebump = 0;
2368 long to_write, pages_skipped = 0;
2369 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 2380 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2370 2381
2371 /* 2382 /*
@@ -2385,23 +2396,26 @@ static int ext4_da_writepages(struct address_space *mapping,
2385 nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write; 2396 nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
2386 wbc->nr_to_write = sbi->s_mb_stream_request; 2397 wbc->nr_to_write = sbi->s_mb_stream_request;
2387 } 2398 }
2399 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2400 range_whole = 1;
2388 2401
2389 if (!wbc->range_cyclic) 2402 if (wbc->range_cyclic)
2390 /* 2403 index = mapping->writeback_index;
2391 * If range_cyclic is not set force range_cont 2404 else
2392 * and save the old writeback_index 2405 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2393 */
2394 wbc->range_cont = 1;
2395
2396 range_start = wbc->range_start;
2397 pages_skipped = wbc->pages_skipped;
2398 2406
2399 mpd.wbc = wbc; 2407 mpd.wbc = wbc;
2400 mpd.inode = mapping->host; 2408 mpd.inode = mapping->host;
2401 2409
2402restart_loop: 2410 /*
2403 to_write = wbc->nr_to_write; 2411 * we don't want write_cache_pages to update
2404 while (!ret && to_write > 0) { 2412 * nr_to_write and writeback_index
2413 */
2414 no_nrwrite_index_update = wbc->no_nrwrite_index_update;
2415 wbc->no_nrwrite_index_update = 1;
2416 pages_skipped = wbc->pages_skipped;
2417
2418 while (!ret && wbc->nr_to_write > 0) {
2405 2419
2406 /* 2420 /*
2407 * we insert one extent at a time. So we need 2421 * we insert one extent at a time. So we need
@@ -2422,48 +2436,53 @@ restart_loop:
2422 dump_stack(); 2436 dump_stack();
2423 goto out_writepages; 2437 goto out_writepages;
2424 } 2438 }
2425 to_write -= wbc->nr_to_write;
2426
2427 mpd.get_block = ext4_da_get_block_write; 2439 mpd.get_block = ext4_da_get_block_write;
2428 ret = mpage_da_writepages(mapping, wbc, &mpd); 2440 ret = mpage_da_writepages(mapping, wbc, &mpd);
2429 2441
2430 ext4_journal_stop(handle); 2442 ext4_journal_stop(handle);
2431 2443
2432 if (mpd.retval == -ENOSPC) 2444 if (mpd.retval == -ENOSPC) {
2445 /* commit the transaction which would
2446 * free blocks released in the transaction
2447 * and try again
2448 */
2433 jbd2_journal_force_commit_nested(sbi->s_journal); 2449 jbd2_journal_force_commit_nested(sbi->s_journal);
2434 2450 wbc->pages_skipped = pages_skipped;
2435 /* reset the retry count */ 2451 ret = 0;
2436 if (ret == MPAGE_DA_EXTENT_TAIL) { 2452 } else if (ret == MPAGE_DA_EXTENT_TAIL) {
2437 /* 2453 /*
2438 * got one extent now try with 2454 * got one extent now try with
2439 * rest of the pages 2455 * rest of the pages
2440 */ 2456 */
2441 to_write += wbc->nr_to_write; 2457 pages_written += mpd.pages_written;
2458 wbc->pages_skipped = pages_skipped;
2442 ret = 0; 2459 ret = 0;
2443 } else if (wbc->nr_to_write) { 2460 } else if (wbc->nr_to_write)
2444 /* 2461 /*
2445 * There is no more writeout needed 2462 * There is no more writeout needed
2446 * or we requested for a noblocking writeout 2463 * or we requested for a noblocking writeout
2447 * and we found the device congested 2464 * and we found the device congested
2448 */ 2465 */
2449 to_write += wbc->nr_to_write;
2450 break; 2466 break;
2451 }
2452 wbc->nr_to_write = to_write;
2453 }
2454
2455 if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) {
2456 /* We skipped pages in this loop */
2457 wbc->range_start = range_start;
2458 wbc->nr_to_write = to_write +
2459 wbc->pages_skipped - pages_skipped;
2460 wbc->pages_skipped = pages_skipped;
2461 goto restart_loop;
2462 } 2467 }
2468 if (pages_skipped != wbc->pages_skipped)
2469 printk(KERN_EMERG "This should not happen leaving %s "
2470 "with nr_to_write = %ld ret = %d\n",
2471 __func__, wbc->nr_to_write, ret);
2472
2473 /* Update index */
2474 index += pages_written;
2475 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2476 /*
2477 * set the writeback_index so that range_cyclic
2478 * mode will write it back later
2479 */
2480 mapping->writeback_index = index;
2463 2481
2464out_writepages: 2482out_writepages:
2465 wbc->nr_to_write = to_write - nr_to_writebump; 2483 if (!no_nrwrite_index_update)
2466 wbc->range_start = range_start; 2484 wbc->no_nrwrite_index_update = 0;
2485 wbc->nr_to_write -= nr_to_writebump;
2467 return ret; 2486 return ret;
2468} 2487}
2469 2488
@@ -4175,7 +4194,6 @@ static int ext4_inode_blocks_set(handle_t *handle,
4175 struct inode *inode = &(ei->vfs_inode); 4194 struct inode *inode = &(ei->vfs_inode);
4176 u64 i_blocks = inode->i_blocks; 4195 u64 i_blocks = inode->i_blocks;
4177 struct super_block *sb = inode->i_sb; 4196 struct super_block *sb = inode->i_sb;
4178 int err = 0;
4179 4197
4180 if (i_blocks <= ~0U) { 4198 if (i_blocks <= ~0U) {
4181 /* 4199 /*
@@ -4185,36 +4203,27 @@ static int ext4_inode_blocks_set(handle_t *handle,
4185 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 4203 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
4186 raw_inode->i_blocks_high = 0; 4204 raw_inode->i_blocks_high = 0;
4187 ei->i_flags &= ~EXT4_HUGE_FILE_FL; 4205 ei->i_flags &= ~EXT4_HUGE_FILE_FL;
4188 } else if (i_blocks <= 0xffffffffffffULL) { 4206 return 0;
4207 }
4208 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE))
4209 return -EFBIG;
4210
4211 if (i_blocks <= 0xffffffffffffULL) {
4189 /* 4212 /*
4190 * i_blocks can be represented in a 48 bit variable 4213 * i_blocks can be represented in a 48 bit variable
4191 * as multiple of 512 bytes 4214 * as multiple of 512 bytes
4192 */ 4215 */
4193 err = ext4_update_rocompat_feature(handle, sb,
4194 EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
4195 if (err)
4196 goto err_out;
4197 /* i_block is stored in the split 48 bit fields */
4198 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 4216 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
4199 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); 4217 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
4200 ei->i_flags &= ~EXT4_HUGE_FILE_FL; 4218 ei->i_flags &= ~EXT4_HUGE_FILE_FL;
4201 } else { 4219 } else {
4202 /*
4203 * i_blocks should be represented in a 48 bit variable
4204 * as multiple of file system block size
4205 */
4206 err = ext4_update_rocompat_feature(handle, sb,
4207 EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
4208 if (err)
4209 goto err_out;
4210 ei->i_flags |= EXT4_HUGE_FILE_FL; 4220 ei->i_flags |= EXT4_HUGE_FILE_FL;
4211 /* i_block is stored in file system block size */ 4221 /* i_block is stored in file system block size */
4212 i_blocks = i_blocks >> (inode->i_blkbits - 9); 4222 i_blocks = i_blocks >> (inode->i_blkbits - 9);
4213 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 4223 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
4214 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); 4224 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
4215 } 4225 }
4216err_out: 4226 return 0;
4217 return err;
4218} 4227}
4219 4228
4220/* 4229/*