aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-10-17 18:08:11 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-17 18:08:11 -0400
commit58617d5e59663d2edea03bd03cb74279827611bb (patch)
tree1b472f0ab43ae08fef5dea30b95592a005385686
parent26e9a397774a0e94efbb8a0bf4a952c28d808cab (diff)
parentf287a1a56130be5fdb96a4a62d1290bd064f308e (diff)
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: ext4: Remove automatic enabling of the HUGE_FILE feature flag ext4: Replace hackish ext4_mb_poll_new_transaction with commit callback ext4: Update Documentation/filesystems/ext4.txt ext4: Remove unused mount options: nomballoc, mballoc, nocheck ext4: Remove compile warnings when building w/o CONFIG_PROC_FS ext4: Add missing newlines to printk messages ext4: Fix file fragmentation during large file write. vfs: Add no_nrwrite_index_update writeback control flag vfs: Remove the range_cont writeback mode. ext4: Use tag dirty lookup during mpage_da_submit_io ext4: let the block device know when unused blocks can be discarded ext4: Don't reuse released data blocks until transaction commits ext4: Use an rbtree for tracking blocks freed during transaction. ext4: Do mballoc init before doing filesystem recovery ext4: Free ext4_prealloc_space using kmem_cache_free ext4: Fix Kconfig typo for ext4dev ext4: Remove an old reference to ext4dev in Makefile comment
-rw-r--r--Documentation/filesystems/ext4.txt32
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/Makefile2
-rw-r--r--fs/ext4/balloc.c12
-rw-r--r--fs/ext4/ext4.h1
-rw-r--r--fs/ext4/ext4_sb.h3
-rw-r--r--fs/ext4/inode.c143
-rw-r--r--fs/ext4/mballoc.c263
-rw-r--r--fs/ext4/mballoc.h31
-rw-r--r--fs/ext4/super.c132
-rw-r--r--fs/jbd2/commit.c3
-rw-r--r--fs/jbd2/transaction.c1
-rw-r--r--include/linux/jbd2.h9
-rw-r--r--include/linux/writeback.h10
-rw-r--r--mm/page-writeback.c12
15 files changed, 320 insertions, 336 deletions
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index eb154ef36c2a..174eaff7ded9 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -2,19 +2,24 @@
2Ext4 Filesystem 2Ext4 Filesystem
3=============== 3===============
4 4
5This is a development version of the ext4 filesystem, an advanced level 5Ext4 is an an advanced level of the ext3 filesystem which incorporates
6of the ext3 filesystem which incorporates scalability and reliability 6scalability and reliability enhancements for supporting large filesystems
7enhancements for supporting large filesystems (64 bit) in keeping with 7(64 bit) in keeping with increasing disk capacities and state-of-the-art
8increasing disk capacities and state-of-the-art feature requirements. 8feature requirements.
9 9
10Mailing list: linux-ext4@vger.kernel.org 10Mailing list: linux-ext4@vger.kernel.org
11Web site: http://ext4.wiki.kernel.org
11 12
12 13
131. Quick usage instructions: 141. Quick usage instructions:
14=========================== 15===========================
15 16
17Note: More extensive information for getting started with ext4 can be
18 found at the ext4 wiki site at the URL:
19 http://ext4.wiki.kernel.org/index.php/Ext4_Howto
20
16 - Compile and install the latest version of e2fsprogs (as of this 21 - Compile and install the latest version of e2fsprogs (as of this
17 writing version 1.41) from: 22 writing version 1.41.3) from:
18 23
19 http://sourceforge.net/project/showfiles.php?group_id=2406 24 http://sourceforge.net/project/showfiles.php?group_id=2406
20 25
@@ -36,11 +41,9 @@ Mailing list: linux-ext4@vger.kernel.org
36 41
37 # mke2fs -t ext4 /dev/hda1 42 # mke2fs -t ext4 /dev/hda1
38 43
39 Or configure an existing ext3 filesystem to support extents and set 44 Or to configure an existing ext3 filesystem to support extents:
40 the test_fs flag to indicate that it's ok for an in-development
41 filesystem to touch this filesystem:
42 45
43 # tune2fs -O extents -E test_fs /dev/hda1 46 # tune2fs -O extents /dev/hda1
44 47
45 If the filesystem was created with 128 byte inodes, it can be 48 If the filesystem was created with 128 byte inodes, it can be
46 converted to use 256 byte for greater efficiency via: 49 converted to use 256 byte for greater efficiency via:
@@ -104,8 +107,8 @@ exist yet so I'm not sure they're in the near-term roadmap.
104The big performance win will come with mballoc, delalloc and flex_bg 107The big performance win will come with mballoc, delalloc and flex_bg
105grouping of bitmaps and inode tables. Some test results available here: 108grouping of bitmaps and inode tables. Some test results available here:
106 109
107 - http://www.bullopensource.org/ext4/20080530/ffsb-write-2.6.26-rc2.html 110 - http://www.bullopensource.org/ext4/20080818-ffsb/ffsb-write-2.6.27-rc1.html
108 - http://www.bullopensource.org/ext4/20080530/ffsb-readwrite-2.6.26-rc2.html 111 - http://www.bullopensource.org/ext4/20080818-ffsb/ffsb-readwrite-2.6.27-rc1.html
109 112
1103. Options 1133. Options
111========== 114==========
@@ -214,9 +217,6 @@ noreservation
214bsddf (*) Make 'df' act like BSD. 217bsddf (*) Make 'df' act like BSD.
215minixdf Make 'df' act like Minix. 218minixdf Make 'df' act like Minix.
216 219
217check=none Don't do extra checking of bitmaps on mount.
218nocheck
219
220debug Extra debugging information is sent to syslog. 220debug Extra debugging information is sent to syslog.
221 221
222errors=remount-ro(*) Remount the filesystem read-only on an error. 222errors=remount-ro(*) Remount the filesystem read-only on an error.
@@ -253,8 +253,6 @@ nobh (a) cache disk block mapping information
253 "nobh" option tries to avoid associating buffer 253 "nobh" option tries to avoid associating buffer
254 heads (supported only for "writeback" mode). 254 heads (supported only for "writeback" mode).
255 255
256mballoc (*) Use the multiple block allocator for block allocation
257nomballoc disabled multiple block allocator for block allocation.
258stripe=n Number of filesystem blocks that mballoc will try 256stripe=n Number of filesystem blocks that mballoc will try
259 to use for allocation size and alignment. For RAID5/6 257 to use for allocation size and alignment. For RAID5/6
260 systems this should be the number of data 258 systems this should be the number of data
diff --git a/fs/Kconfig b/fs/Kconfig
index 9e9d70c02a07..d0a1174fb516 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -160,7 +160,7 @@ config EXT4_FS
160 filesystem initially. 160 filesystem initially.
161 161
162 To compile this file system support as a module, choose M here. The 162 To compile this file system support as a module, choose M here. The
163 module will be called ext4dev. 163 module will be called ext4.
164 164
165 If unsure, say N. 165 If unsure, say N.
166 166
diff --git a/fs/Makefile b/fs/Makefile
index d0c69f57e5bf..2168c902d5ca 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -71,7 +71,7 @@ obj-$(CONFIG_DLM) += dlm/
71# Do not add any filesystems before this line 71# Do not add any filesystems before this line
72obj-$(CONFIG_REISERFS_FS) += reiserfs/ 72obj-$(CONFIG_REISERFS_FS) += reiserfs/
73obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 73obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3
74obj-$(CONFIG_EXT4_FS) += ext4/ # Before ext2 so root fs can be ext4dev 74obj-$(CONFIG_EXT4_FS) += ext4/ # Before ext2 so root fs can be ext4
75obj-$(CONFIG_JBD) += jbd/ 75obj-$(CONFIG_JBD) += jbd/
76obj-$(CONFIG_JBD2) += jbd2/ 76obj-$(CONFIG_JBD2) += jbd2/
77obj-$(CONFIG_EXT2_FS) += ext2/ 77obj-$(CONFIG_EXT2_FS) += ext2/
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index bd2ece228827..b9821be709bd 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -568,8 +568,16 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
568 568
569 /* this isn't the right place to decide whether block is metadata 569 /* this isn't the right place to decide whether block is metadata
570 * inode.c/extents.c knows better, but for safety ... */ 570 * inode.c/extents.c knows better, but for safety ... */
571 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) || 571 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
572 ext4_should_journal_data(inode)) 572 metadata = 1;
573
574 /* We need to make sure we don't reuse
575 * block released untill the transaction commit.
576 * writeback mode have weak data consistency so
577 * don't force data as metadata when freeing block
578 * for writeback mode.
579 */
580 if (metadata == 0 && !ext4_should_writeback_data(inode))
573 metadata = 1; 581 metadata = 1;
574 582
575 sb = inode->i_sb; 583 sb = inode->i_sb;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6690a41cdd9f..4880cc3e6727 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -511,7 +511,6 @@ do { \
511/* 511/*
512 * Mount flags 512 * Mount flags
513 */ 513 */
514#define EXT4_MOUNT_CHECK 0x00001 /* Do mount-time checks */
515#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ 514#define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */
516#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ 515#define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */
517#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ 516#define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h
index 6a0b40d43264..445fde603df8 100644
--- a/fs/ext4/ext4_sb.h
+++ b/fs/ext4/ext4_sb.h
@@ -99,9 +99,6 @@ struct ext4_sb_info {
99 struct inode *s_buddy_cache; 99 struct inode *s_buddy_cache;
100 long s_blocks_reserved; 100 long s_blocks_reserved;
101 spinlock_t s_reserve_lock; 101 spinlock_t s_reserve_lock;
102 struct list_head s_active_transaction;
103 struct list_head s_closed_transaction;
104 struct list_head s_committed_transaction;
105 spinlock_t s_md_lock; 102 spinlock_t s_md_lock;
106 tid_t s_last_transaction; 103 tid_t s_last_transaction;
107 unsigned short *s_mb_offsets, *s_mb_maxs; 104 unsigned short *s_mb_offsets, *s_mb_maxs;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9b4ec9decfd1..8dbf6953845b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1648,6 +1648,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
1648 int ret = 0, err, nr_pages, i; 1648 int ret = 0, err, nr_pages, i;
1649 unsigned long index, end; 1649 unsigned long index, end;
1650 struct pagevec pvec; 1650 struct pagevec pvec;
1651 long pages_skipped;
1651 1652
1652 BUG_ON(mpd->next_page <= mpd->first_page); 1653 BUG_ON(mpd->next_page <= mpd->first_page);
1653 pagevec_init(&pvec, 0); 1654 pagevec_init(&pvec, 0);
@@ -1655,20 +1656,30 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
1655 end = mpd->next_page - 1; 1656 end = mpd->next_page - 1;
1656 1657
1657 while (index <= end) { 1658 while (index <= end) {
1658 /* XXX: optimize tail */ 1659 /*
1659 nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); 1660 * We can use PAGECACHE_TAG_DIRTY lookup here because
1661 * even though we have cleared the dirty flag on the page
1662 * We still keep the page in the radix tree with tag
1663 * PAGECACHE_TAG_DIRTY. See clear_page_dirty_for_io.
1664 * The PAGECACHE_TAG_DIRTY is cleared in set_page_writeback
1665 * which is called via the below writepage callback.
1666 */
1667 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
1668 PAGECACHE_TAG_DIRTY,
1669 min(end - index,
1670 (pgoff_t)PAGEVEC_SIZE-1) + 1);
1660 if (nr_pages == 0) 1671 if (nr_pages == 0)
1661 break; 1672 break;
1662 for (i = 0; i < nr_pages; i++) { 1673 for (i = 0; i < nr_pages; i++) {
1663 struct page *page = pvec.pages[i]; 1674 struct page *page = pvec.pages[i];
1664 1675
1665 index = page->index; 1676 pages_skipped = mpd->wbc->pages_skipped;
1666 if (index > end)
1667 break;
1668 index++;
1669
1670 err = mapping->a_ops->writepage(page, mpd->wbc); 1677 err = mapping->a_ops->writepage(page, mpd->wbc);
1671 if (!err) 1678 if (!err && (pages_skipped == mpd->wbc->pages_skipped))
1679 /*
1680 * have successfully written the page
1681 * without skipping the same
1682 */
1672 mpd->pages_written++; 1683 mpd->pages_written++;
1673 /* 1684 /*
1674 * In error case, we have to continue because 1685 * In error case, we have to continue because
@@ -2104,7 +2115,6 @@ static int mpage_da_writepages(struct address_space *mapping,
2104 struct writeback_control *wbc, 2115 struct writeback_control *wbc,
2105 struct mpage_da_data *mpd) 2116 struct mpage_da_data *mpd)
2106{ 2117{
2107 long to_write;
2108 int ret; 2118 int ret;
2109 2119
2110 if (!mpd->get_block) 2120 if (!mpd->get_block)
@@ -2119,19 +2129,18 @@ static int mpage_da_writepages(struct address_space *mapping,
2119 mpd->pages_written = 0; 2129 mpd->pages_written = 0;
2120 mpd->retval = 0; 2130 mpd->retval = 0;
2121 2131
2122 to_write = wbc->nr_to_write;
2123
2124 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd); 2132 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd);
2125
2126 /* 2133 /*
2127 * Handle last extent of pages 2134 * Handle last extent of pages
2128 */ 2135 */
2129 if (!mpd->io_done && mpd->next_page != mpd->first_page) { 2136 if (!mpd->io_done && mpd->next_page != mpd->first_page) {
2130 if (mpage_da_map_blocks(mpd) == 0) 2137 if (mpage_da_map_blocks(mpd) == 0)
2131 mpage_da_submit_io(mpd); 2138 mpage_da_submit_io(mpd);
2132 }
2133 2139
2134 wbc->nr_to_write = to_write - mpd->pages_written; 2140 mpd->io_done = 1;
2141 ret = MPAGE_DA_EXTENT_TAIL;
2142 }
2143 wbc->nr_to_write -= mpd->pages_written;
2135 return ret; 2144 return ret;
2136} 2145}
2137 2146
@@ -2360,12 +2369,14 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
2360static int ext4_da_writepages(struct address_space *mapping, 2369static int ext4_da_writepages(struct address_space *mapping,
2361 struct writeback_control *wbc) 2370 struct writeback_control *wbc)
2362{ 2371{
2372 pgoff_t index;
2373 int range_whole = 0;
2363 handle_t *handle = NULL; 2374 handle_t *handle = NULL;
2364 loff_t range_start = 0;
2365 struct mpage_da_data mpd; 2375 struct mpage_da_data mpd;
2366 struct inode *inode = mapping->host; 2376 struct inode *inode = mapping->host;
2377 int no_nrwrite_index_update;
2378 long pages_written = 0, pages_skipped;
2367 int needed_blocks, ret = 0, nr_to_writebump = 0; 2379 int needed_blocks, ret = 0, nr_to_writebump = 0;
2368 long to_write, pages_skipped = 0;
2369 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); 2380 struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
2370 2381
2371 /* 2382 /*
@@ -2385,23 +2396,26 @@ static int ext4_da_writepages(struct address_space *mapping,
2385 nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write; 2396 nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write;
2386 wbc->nr_to_write = sbi->s_mb_stream_request; 2397 wbc->nr_to_write = sbi->s_mb_stream_request;
2387 } 2398 }
2399 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2400 range_whole = 1;
2388 2401
2389 if (!wbc->range_cyclic) 2402 if (wbc->range_cyclic)
2390 /* 2403 index = mapping->writeback_index;
2391 * If range_cyclic is not set force range_cont 2404 else
2392 * and save the old writeback_index 2405 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2393 */
2394 wbc->range_cont = 1;
2395
2396 range_start = wbc->range_start;
2397 pages_skipped = wbc->pages_skipped;
2398 2406
2399 mpd.wbc = wbc; 2407 mpd.wbc = wbc;
2400 mpd.inode = mapping->host; 2408 mpd.inode = mapping->host;
2401 2409
2402restart_loop: 2410 /*
2403 to_write = wbc->nr_to_write; 2411 * we don't want write_cache_pages to update
2404 while (!ret && to_write > 0) { 2412 * nr_to_write and writeback_index
2413 */
2414 no_nrwrite_index_update = wbc->no_nrwrite_index_update;
2415 wbc->no_nrwrite_index_update = 1;
2416 pages_skipped = wbc->pages_skipped;
2417
2418 while (!ret && wbc->nr_to_write > 0) {
2405 2419
2406 /* 2420 /*
2407 * we insert one extent at a time. So we need 2421 * we insert one extent at a time. So we need
@@ -2422,48 +2436,53 @@ restart_loop:
2422 dump_stack(); 2436 dump_stack();
2423 goto out_writepages; 2437 goto out_writepages;
2424 } 2438 }
2425 to_write -= wbc->nr_to_write;
2426
2427 mpd.get_block = ext4_da_get_block_write; 2439 mpd.get_block = ext4_da_get_block_write;
2428 ret = mpage_da_writepages(mapping, wbc, &mpd); 2440 ret = mpage_da_writepages(mapping, wbc, &mpd);
2429 2441
2430 ext4_journal_stop(handle); 2442 ext4_journal_stop(handle);
2431 2443
2432 if (mpd.retval == -ENOSPC) 2444 if (mpd.retval == -ENOSPC) {
2445 /* commit the transaction which would
2446 * free blocks released in the transaction
2447 * and try again
2448 */
2433 jbd2_journal_force_commit_nested(sbi->s_journal); 2449 jbd2_journal_force_commit_nested(sbi->s_journal);
2434 2450 wbc->pages_skipped = pages_skipped;
2435 /* reset the retry count */ 2451 ret = 0;
2436 if (ret == MPAGE_DA_EXTENT_TAIL) { 2452 } else if (ret == MPAGE_DA_EXTENT_TAIL) {
2437 /* 2453 /*
2438 * got one extent now try with 2454 * got one extent now try with
2439 * rest of the pages 2455 * rest of the pages
2440 */ 2456 */
2441 to_write += wbc->nr_to_write; 2457 pages_written += mpd.pages_written;
2458 wbc->pages_skipped = pages_skipped;
2442 ret = 0; 2459 ret = 0;
2443 } else if (wbc->nr_to_write) { 2460 } else if (wbc->nr_to_write)
2444 /* 2461 /*
2445 * There is no more writeout needed 2462 * There is no more writeout needed
2446 * or we requested for a noblocking writeout 2463 * or we requested for a noblocking writeout
2447 * and we found the device congested 2464 * and we found the device congested
2448 */ 2465 */
2449 to_write += wbc->nr_to_write;
2450 break; 2466 break;
2451 }
2452 wbc->nr_to_write = to_write;
2453 }
2454
2455 if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) {
2456 /* We skipped pages in this loop */
2457 wbc->range_start = range_start;
2458 wbc->nr_to_write = to_write +
2459 wbc->pages_skipped - pages_skipped;
2460 wbc->pages_skipped = pages_skipped;
2461 goto restart_loop;
2462 } 2467 }
2468 if (pages_skipped != wbc->pages_skipped)
2469 printk(KERN_EMERG "This should not happen leaving %s "
2470 "with nr_to_write = %ld ret = %d\n",
2471 __func__, wbc->nr_to_write, ret);
2472
2473 /* Update index */
2474 index += pages_written;
2475 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2476 /*
2477 * set the writeback_index so that range_cyclic
2478 * mode will write it back later
2479 */
2480 mapping->writeback_index = index;
2463 2481
2464out_writepages: 2482out_writepages:
2465 wbc->nr_to_write = to_write - nr_to_writebump; 2483 if (!no_nrwrite_index_update)
2466 wbc->range_start = range_start; 2484 wbc->no_nrwrite_index_update = 0;
2485 wbc->nr_to_write -= nr_to_writebump;
2467 return ret; 2486 return ret;
2468} 2487}
2469 2488
@@ -4175,7 +4194,6 @@ static int ext4_inode_blocks_set(handle_t *handle,
4175 struct inode *inode = &(ei->vfs_inode); 4194 struct inode *inode = &(ei->vfs_inode);
4176 u64 i_blocks = inode->i_blocks; 4195 u64 i_blocks = inode->i_blocks;
4177 struct super_block *sb = inode->i_sb; 4196 struct super_block *sb = inode->i_sb;
4178 int err = 0;
4179 4197
4180 if (i_blocks <= ~0U) { 4198 if (i_blocks <= ~0U) {
4181 /* 4199 /*
@@ -4185,36 +4203,27 @@ static int ext4_inode_blocks_set(handle_t *handle,
4185 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 4203 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
4186 raw_inode->i_blocks_high = 0; 4204 raw_inode->i_blocks_high = 0;
4187 ei->i_flags &= ~EXT4_HUGE_FILE_FL; 4205 ei->i_flags &= ~EXT4_HUGE_FILE_FL;
4188 } else if (i_blocks <= 0xffffffffffffULL) { 4206 return 0;
4207 }
4208 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE))
4209 return -EFBIG;
4210
4211 if (i_blocks <= 0xffffffffffffULL) {
4189 /* 4212 /*
4190 * i_blocks can be represented in a 48 bit variable 4213 * i_blocks can be represented in a 48 bit variable
4191 * as multiple of 512 bytes 4214 * as multiple of 512 bytes
4192 */ 4215 */
4193 err = ext4_update_rocompat_feature(handle, sb,
4194 EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
4195 if (err)
4196 goto err_out;
4197 /* i_block is stored in the split 48 bit fields */
4198 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 4216 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
4199 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); 4217 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
4200 ei->i_flags &= ~EXT4_HUGE_FILE_FL; 4218 ei->i_flags &= ~EXT4_HUGE_FILE_FL;
4201 } else { 4219 } else {
4202 /*
4203 * i_blocks should be represented in a 48 bit variable
4204 * as multiple of file system block size
4205 */
4206 err = ext4_update_rocompat_feature(handle, sb,
4207 EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
4208 if (err)
4209 goto err_out;
4210 ei->i_flags |= EXT4_HUGE_FILE_FL; 4220 ei->i_flags |= EXT4_HUGE_FILE_FL;
4211 /* i_block is stored in file system block size */ 4221 /* i_block is stored in file system block size */
4212 i_blocks = i_blocks >> (inode->i_blkbits - 9); 4222 i_blocks = i_blocks >> (inode->i_blkbits - 9);
4213 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); 4223 raw_inode->i_blocks_lo = cpu_to_le32(i_blocks);
4214 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); 4224 raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32);
4215 } 4225 }
4216err_out: 4226 return 0;
4217 return err;
4218} 4227}
4219 4228
4220/* 4229/*
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b580714f0d85..dfe17a134052 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2300,6 +2300,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2300 } 2300 }
2301 2301
2302 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); 2302 INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
2303 meta_group_info[i]->bb_free_root.rb_node = NULL;;
2303 2304
2304#ifdef DOUBLE_CHECK 2305#ifdef DOUBLE_CHECK
2305 { 2306 {
@@ -2522,9 +2523,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2522 } 2523 }
2523 2524
2524 spin_lock_init(&sbi->s_md_lock); 2525 spin_lock_init(&sbi->s_md_lock);
2525 INIT_LIST_HEAD(&sbi->s_active_transaction);
2526 INIT_LIST_HEAD(&sbi->s_closed_transaction);
2527 INIT_LIST_HEAD(&sbi->s_committed_transaction);
2528 spin_lock_init(&sbi->s_bal_lock); 2526 spin_lock_init(&sbi->s_bal_lock);
2529 2527
2530 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; 2528 sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN;
@@ -2553,6 +2551,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
2553 ext4_mb_init_per_dev_proc(sb); 2551 ext4_mb_init_per_dev_proc(sb);
2554 ext4_mb_history_init(sb); 2552 ext4_mb_history_init(sb);
2555 2553
2554 sbi->s_journal->j_commit_callback = release_blocks_on_commit;
2555
2556 printk(KERN_INFO "EXT4-fs: mballoc enabled\n"); 2556 printk(KERN_INFO "EXT4-fs: mballoc enabled\n");
2557 return 0; 2557 return 0;
2558} 2558}
@@ -2568,7 +2568,7 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp)
2568 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list); 2568 pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list);
2569 list_del(&pa->pa_group_list); 2569 list_del(&pa->pa_group_list);
2570 count++; 2570 count++;
2571 kfree(pa); 2571 kmem_cache_free(ext4_pspace_cachep, pa);
2572 } 2572 }
2573 if (count) 2573 if (count)
2574 mb_debug("mballoc: %u PAs left\n", count); 2574 mb_debug("mballoc: %u PAs left\n", count);
@@ -2582,15 +2582,6 @@ int ext4_mb_release(struct super_block *sb)
2582 struct ext4_group_info *grinfo; 2582 struct ext4_group_info *grinfo;
2583 struct ext4_sb_info *sbi = EXT4_SB(sb); 2583 struct ext4_sb_info *sbi = EXT4_SB(sb);
2584 2584
2585 /* release freed, non-committed blocks */
2586 spin_lock(&sbi->s_md_lock);
2587 list_splice_init(&sbi->s_closed_transaction,
2588 &sbi->s_committed_transaction);
2589 list_splice_init(&sbi->s_active_transaction,
2590 &sbi->s_committed_transaction);
2591 spin_unlock(&sbi->s_md_lock);
2592 ext4_mb_free_committed_blocks(sb);
2593
2594 if (sbi->s_group_info) { 2585 if (sbi->s_group_info) {
2595 for (i = 0; i < sbi->s_groups_count; i++) { 2586 for (i = 0; i < sbi->s_groups_count; i++) {
2596 grinfo = ext4_get_group_info(sb, i); 2587 grinfo = ext4_get_group_info(sb, i);
@@ -2644,61 +2635,57 @@ int ext4_mb_release(struct super_block *sb)
2644 return 0; 2635 return 0;
2645} 2636}
2646 2637
2647static noinline_for_stack void 2638/*
2648ext4_mb_free_committed_blocks(struct super_block *sb) 2639 * This function is called by the jbd2 layer once the commit has finished,
2640 * so we know we can free the blocks that were released with that commit.
2641 */
2642static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2649{ 2643{
2650 struct ext4_sb_info *sbi = EXT4_SB(sb); 2644 struct super_block *sb = journal->j_private;
2651 int err;
2652 int i;
2653 int count = 0;
2654 int count2 = 0;
2655 struct ext4_free_metadata *md;
2656 struct ext4_buddy e4b; 2645 struct ext4_buddy e4b;
2646 struct ext4_group_info *db;
2647 int err, count = 0, count2 = 0;
2648 struct ext4_free_data *entry;
2649 ext4_fsblk_t discard_block;
2650 struct list_head *l, *ltmp;
2657 2651
2658 if (list_empty(&sbi->s_committed_transaction)) 2652 list_for_each_safe(l, ltmp, &txn->t_private_list) {
2659 return; 2653 entry = list_entry(l, struct ext4_free_data, list);
2660
2661 /* there is committed blocks to be freed yet */
2662 do {
2663 /* get next array of blocks */
2664 md = NULL;
2665 spin_lock(&sbi->s_md_lock);
2666 if (!list_empty(&sbi->s_committed_transaction)) {
2667 md = list_entry(sbi->s_committed_transaction.next,
2668 struct ext4_free_metadata, list);
2669 list_del(&md->list);
2670 }
2671 spin_unlock(&sbi->s_md_lock);
2672
2673 if (md == NULL)
2674 break;
2675 2654
2676 mb_debug("gonna free %u blocks in group %lu (0x%p):", 2655 mb_debug("gonna free %u blocks in group %lu (0x%p):",
2677 md->num, md->group, md); 2656 entry->count, entry->group, entry);
2678 2657
2679 err = ext4_mb_load_buddy(sb, md->group, &e4b); 2658 err = ext4_mb_load_buddy(sb, entry->group, &e4b);
2680 /* we expect to find existing buddy because it's pinned */ 2659 /* we expect to find existing buddy because it's pinned */
2681 BUG_ON(err != 0); 2660 BUG_ON(err != 0);
2682 2661
2662 db = e4b.bd_info;
2683 /* there are blocks to put in buddy to make them really free */ 2663 /* there are blocks to put in buddy to make them really free */
2684 count += md->num; 2664 count += entry->count;
2685 count2++; 2665 count2++;
2686 ext4_lock_group(sb, md->group); 2666 ext4_lock_group(sb, entry->group);
2687 for (i = 0; i < md->num; i++) { 2667 /* Take it out of per group rb tree */
2688 mb_debug(" %u", md->blocks[i]); 2668 rb_erase(&entry->node, &(db->bb_free_root));
2689 mb_free_blocks(NULL, &e4b, md->blocks[i], 1); 2669 mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count);
2670
2671 if (!db->bb_free_root.rb_node) {
2672 /* No more items in the per group rb tree
2673 * balance refcounts from ext4_mb_free_metadata()
2674 */
2675 page_cache_release(e4b.bd_buddy_page);
2676 page_cache_release(e4b.bd_bitmap_page);
2690 } 2677 }
2691 mb_debug("\n"); 2678 ext4_unlock_group(sb, entry->group);
2692 ext4_unlock_group(sb, md->group); 2679 discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb)
2693 2680 + entry->start_blk
2694 /* balance refcounts from ext4_mb_free_metadata() */ 2681 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
2695 page_cache_release(e4b.bd_buddy_page); 2682 trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", sb->s_id,
2696 page_cache_release(e4b.bd_bitmap_page); 2683 (unsigned long long) discard_block, entry->count);
2697 2684 sb_issue_discard(sb, discard_block, entry->count);
2698 kfree(md); 2685
2686 kmem_cache_free(ext4_free_ext_cachep, entry);
2699 ext4_mb_release_desc(&e4b); 2687 ext4_mb_release_desc(&e4b);
2700 2688 }
2701 } while (md);
2702 2689
2703 mb_debug("freed %u blocks in %u structures\n", count, count2); 2690 mb_debug("freed %u blocks in %u structures\n", count, count2);
2704} 2691}
@@ -2712,6 +2699,7 @@ ext4_mb_free_committed_blocks(struct super_block *sb)
2712 2699
2713static int ext4_mb_init_per_dev_proc(struct super_block *sb) 2700static int ext4_mb_init_per_dev_proc(struct super_block *sb)
2714{ 2701{
2702#ifdef CONFIG_PROC_FS
2715 mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; 2703 mode_t mode = S_IFREG | S_IRUGO | S_IWUSR;
2716 struct ext4_sb_info *sbi = EXT4_SB(sb); 2704 struct ext4_sb_info *sbi = EXT4_SB(sb);
2717 struct proc_dir_entry *proc; 2705 struct proc_dir_entry *proc;
@@ -2735,10 +2723,14 @@ err_out:
2735 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); 2723 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
2736 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); 2724 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
2737 return -ENOMEM; 2725 return -ENOMEM;
2726#else
2727 return 0;
2728#endif
2738} 2729}
2739 2730
2740static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) 2731static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
2741{ 2732{
2733#ifdef CONFIG_PROC_FS
2742 struct ext4_sb_info *sbi = EXT4_SB(sb); 2734 struct ext4_sb_info *sbi = EXT4_SB(sb);
2743 2735
2744 if (sbi->s_proc == NULL) 2736 if (sbi->s_proc == NULL)
@@ -2750,7 +2742,7 @@ static int ext4_mb_destroy_per_dev_proc(struct super_block *sb)
2750 remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); 2742 remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc);
2751 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); 2743 remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc);
2752 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); 2744 remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc);
2753 2745#endif
2754 return 0; 2746 return 0;
2755} 2747}
2756 2748
@@ -2771,6 +2763,16 @@ int __init init_ext4_mballoc(void)
2771 kmem_cache_destroy(ext4_pspace_cachep); 2763 kmem_cache_destroy(ext4_pspace_cachep);
2772 return -ENOMEM; 2764 return -ENOMEM;
2773 } 2765 }
2766
2767 ext4_free_ext_cachep =
2768 kmem_cache_create("ext4_free_block_extents",
2769 sizeof(struct ext4_free_data),
2770 0, SLAB_RECLAIM_ACCOUNT, NULL);
2771 if (ext4_free_ext_cachep == NULL) {
2772 kmem_cache_destroy(ext4_pspace_cachep);
2773 kmem_cache_destroy(ext4_ac_cachep);
2774 return -ENOMEM;
2775 }
2774 return 0; 2776 return 0;
2775} 2777}
2776 2778
@@ -2779,6 +2781,7 @@ void exit_ext4_mballoc(void)
2779 /* XXX: synchronize_rcu(); */ 2781 /* XXX: synchronize_rcu(); */
2780 kmem_cache_destroy(ext4_pspace_cachep); 2782 kmem_cache_destroy(ext4_pspace_cachep);
2781 kmem_cache_destroy(ext4_ac_cachep); 2783 kmem_cache_destroy(ext4_ac_cachep);
2784 kmem_cache_destroy(ext4_free_ext_cachep);
2782} 2785}
2783 2786
2784 2787
@@ -4324,8 +4327,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4324 goto out1; 4327 goto out1;
4325 } 4328 }
4326 4329
4327 ext4_mb_poll_new_transaction(sb, handle);
4328
4329 *errp = ext4_mb_initialize_context(ac, ar); 4330 *errp = ext4_mb_initialize_context(ac, ar);
4330 if (*errp) { 4331 if (*errp) {
4331 ar->len = 0; 4332 ar->len = 0;
@@ -4384,35 +4385,20 @@ out1:
4384 4385
4385 return block; 4386 return block;
4386} 4387}
4387static void ext4_mb_poll_new_transaction(struct super_block *sb,
4388 handle_t *handle)
4389{
4390 struct ext4_sb_info *sbi = EXT4_SB(sb);
4391
4392 if (sbi->s_last_transaction == handle->h_transaction->t_tid)
4393 return;
4394
4395 /* new transaction! time to close last one and free blocks for
4396 * committed transaction. we know that only transaction can be
4397 * active, so previos transaction can be being logged and we
4398 * know that transaction before previous is known to be already
4399 * logged. this means that now we may free blocks freed in all
4400 * transactions before previous one. hope I'm clear enough ... */
4401 4388
4402 spin_lock(&sbi->s_md_lock); 4389/*
4403 if (sbi->s_last_transaction != handle->h_transaction->t_tid) { 4390 * We can merge two free data extents only if the physical blocks
4404 mb_debug("new transaction %lu, old %lu\n", 4391 * are contiguous, AND the extents were freed by the same transaction,
4405 (unsigned long) handle->h_transaction->t_tid, 4392 * AND the blocks are associated with the same group.
4406 (unsigned long) sbi->s_last_transaction); 4393 */
4407 list_splice_init(&sbi->s_closed_transaction, 4394static int can_merge(struct ext4_free_data *entry1,
4408 &sbi->s_committed_transaction); 4395 struct ext4_free_data *entry2)
4409 list_splice_init(&sbi->s_active_transaction, 4396{
4410 &sbi->s_closed_transaction); 4397 if ((entry1->t_tid == entry2->t_tid) &&
4411 sbi->s_last_transaction = handle->h_transaction->t_tid; 4398 (entry1->group == entry2->group) &&
4412 } 4399 ((entry1->start_blk + entry1->count) == entry2->start_blk))
4413 spin_unlock(&sbi->s_md_lock); 4400 return 1;
4414 4401 return 0;
4415 ext4_mb_free_committed_blocks(sb);
4416} 4402}
4417 4403
4418static noinline_for_stack int 4404static noinline_for_stack int
@@ -4422,57 +4408,80 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4422 struct ext4_group_info *db = e4b->bd_info; 4408 struct ext4_group_info *db = e4b->bd_info;
4423 struct super_block *sb = e4b->bd_sb; 4409 struct super_block *sb = e4b->bd_sb;
4424 struct ext4_sb_info *sbi = EXT4_SB(sb); 4410 struct ext4_sb_info *sbi = EXT4_SB(sb);
4425 struct ext4_free_metadata *md; 4411 struct ext4_free_data *entry, *new_entry;
4426 int i; 4412 struct rb_node **n = &db->bb_free_root.rb_node, *node;
4413 struct rb_node *parent = NULL, *new_node;
4414
4427 4415
4428 BUG_ON(e4b->bd_bitmap_page == NULL); 4416 BUG_ON(e4b->bd_bitmap_page == NULL);
4429 BUG_ON(e4b->bd_buddy_page == NULL); 4417 BUG_ON(e4b->bd_buddy_page == NULL);
4430 4418
4419 new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS);
4420 new_entry->start_blk = block;
4421 new_entry->group = group;
4422 new_entry->count = count;
4423 new_entry->t_tid = handle->h_transaction->t_tid;
4424 new_node = &new_entry->node;
4425
4431 ext4_lock_group(sb, group); 4426 ext4_lock_group(sb, group);
4432 for (i = 0; i < count; i++) { 4427 if (!*n) {
4433 md = db->bb_md_cur; 4428 /* first free block exent. We need to
4434 if (md && db->bb_tid != handle->h_transaction->t_tid) { 4429 protect buddy cache from being freed,
4435 db->bb_md_cur = NULL; 4430 * otherwise we'll refresh it from
4436 md = NULL; 4431 * on-disk bitmap and lose not-yet-available
4432 * blocks */
4433 page_cache_get(e4b->bd_buddy_page);
4434 page_cache_get(e4b->bd_bitmap_page);
4435 }
4436 while (*n) {
4437 parent = *n;
4438 entry = rb_entry(parent, struct ext4_free_data, node);
4439 if (block < entry->start_blk)
4440 n = &(*n)->rb_left;
4441 else if (block >= (entry->start_blk + entry->count))
4442 n = &(*n)->rb_right;
4443 else {
4444 ext4_error(sb, __func__,
4445 "Double free of blocks %d (%d %d)\n",
4446 block, entry->start_blk, entry->count);
4447 return 0;
4437 } 4448 }
4449 }
4438 4450
4439 if (md == NULL) { 4451 rb_link_node(new_node, parent, n);
4440 ext4_unlock_group(sb, group); 4452 rb_insert_color(new_node, &db->bb_free_root);
4441 md = kmalloc(sizeof(*md), GFP_NOFS); 4453
4442 if (md == NULL) 4454 /* Now try to see the extent can be merged to left and right */
4443 return -ENOMEM; 4455 node = rb_prev(new_node);
4444 md->num = 0; 4456 if (node) {
4445 md->group = group; 4457 entry = rb_entry(node, struct ext4_free_data, node);
4446 4458 if (can_merge(entry, new_entry)) {
4447 ext4_lock_group(sb, group); 4459 new_entry->start_blk = entry->start_blk;
4448 if (db->bb_md_cur == NULL) { 4460 new_entry->count += entry->count;
4449 spin_lock(&sbi->s_md_lock); 4461 rb_erase(node, &(db->bb_free_root));
4450 list_add(&md->list, &sbi->s_active_transaction); 4462 spin_lock(&sbi->s_md_lock);
4451 spin_unlock(&sbi->s_md_lock); 4463 list_del(&entry->list);
4452 /* protect buddy cache from being freed, 4464 spin_unlock(&sbi->s_md_lock);
4453 * otherwise we'll refresh it from 4465 kmem_cache_free(ext4_free_ext_cachep, entry);
4454 * on-disk bitmap and lose not-yet-available
4455 * blocks */
4456 page_cache_get(e4b->bd_buddy_page);
4457 page_cache_get(e4b->bd_bitmap_page);
4458 db->bb_md_cur = md;
4459 db->bb_tid = handle->h_transaction->t_tid;
4460 mb_debug("new md 0x%p for group %lu\n",
4461 md, md->group);
4462 } else {
4463 kfree(md);
4464 md = db->bb_md_cur;
4465 }
4466 } 4466 }
4467 }
4467 4468
4468 BUG_ON(md->num >= EXT4_BB_MAX_BLOCKS); 4469 node = rb_next(new_node);
4469 md->blocks[md->num] = block + i; 4470 if (node) {
4470 md->num++; 4471 entry = rb_entry(node, struct ext4_free_data, node);
4471 if (md->num == EXT4_BB_MAX_BLOCKS) { 4472 if (can_merge(new_entry, entry)) {
4472 /* no more space, put full container on a sb's list */ 4473 new_entry->count += entry->count;
4473 db->bb_md_cur = NULL; 4474 rb_erase(node, &(db->bb_free_root));
4475 spin_lock(&sbi->s_md_lock);
4476 list_del(&entry->list);
4477 spin_unlock(&sbi->s_md_lock);
4478 kmem_cache_free(ext4_free_ext_cachep, entry);
4474 } 4479 }
4475 } 4480 }
4481 /* Add the extent to transaction's private list */
4482 spin_lock(&sbi->s_md_lock);
4483 list_add(&new_entry->list, &handle->h_transaction->t_private_list);
4484 spin_unlock(&sbi->s_md_lock);
4476 ext4_unlock_group(sb, group); 4485 ext4_unlock_group(sb, group);
4477 return 0; 4486 return 0;
4478} 4487}
@@ -4500,8 +4509,6 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4500 4509
4501 *freed = 0; 4510 *freed = 0;
4502 4511
4503 ext4_mb_poll_new_transaction(sb, handle);
4504
4505 sbi = EXT4_SB(sb); 4512 sbi = EXT4_SB(sb);
4506 es = EXT4_SB(sb)->s_es; 4513 es = EXT4_SB(sb)->s_es;
4507 if (block < le32_to_cpu(es->s_first_data_block) || 4514 if (block < le32_to_cpu(es->s_first_data_block) ||
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index b3b4828f8b89..b5dff1fff1e5 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -18,6 +18,8 @@
18#include <linux/pagemap.h> 18#include <linux/pagemap.h>
19#include <linux/seq_file.h> 19#include <linux/seq_file.h>
20#include <linux/version.h> 20#include <linux/version.h>
21#include <linux/blkdev.h>
22#include <linux/marker.h>
21#include "ext4_jbd2.h" 23#include "ext4_jbd2.h"
22#include "ext4.h" 24#include "ext4.h"
23#include "group.h" 25#include "group.h"
@@ -98,23 +100,29 @@
98 100
99static struct kmem_cache *ext4_pspace_cachep; 101static struct kmem_cache *ext4_pspace_cachep;
100static struct kmem_cache *ext4_ac_cachep; 102static struct kmem_cache *ext4_ac_cachep;
103static struct kmem_cache *ext4_free_ext_cachep;
101 104
102#ifdef EXT4_BB_MAX_BLOCKS 105struct ext4_free_data {
103#undef EXT4_BB_MAX_BLOCKS 106 /* this links the free block information from group_info */
104#endif 107 struct rb_node node;
105#define EXT4_BB_MAX_BLOCKS 30
106 108
107struct ext4_free_metadata { 109 /* this links the free block information from ext4_sb_info */
108 ext4_group_t group;
109 unsigned short num;
110 ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS];
111 struct list_head list; 110 struct list_head list;
111
112 /* group which free block extent belongs */
113 ext4_group_t group;
114
115 /* free block extent */
116 ext4_grpblk_t start_blk;
117 ext4_grpblk_t count;
118
119 /* transaction which freed this extent */
120 tid_t t_tid;
112}; 121};
113 122
114struct ext4_group_info { 123struct ext4_group_info {
115 unsigned long bb_state; 124 unsigned long bb_state;
116 unsigned long bb_tid; 125 struct rb_root bb_free_root;
117 struct ext4_free_metadata *bb_md_cur;
118 unsigned short bb_first_free; 126 unsigned short bb_first_free;
119 unsigned short bb_free; 127 unsigned short bb_free;
120 unsigned short bb_fragments; 128 unsigned short bb_fragments;
@@ -261,8 +269,6 @@ struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
261 269
262static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, 270static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
263 ext4_group_t group); 271 ext4_group_t group);
264static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
265static void ext4_mb_free_committed_blocks(struct super_block *);
266static void ext4_mb_return_to_preallocation(struct inode *inode, 272static void ext4_mb_return_to_preallocation(struct inode *inode,
267 struct ext4_buddy *e4b, sector_t block, 273 struct ext4_buddy *e4b, sector_t block,
268 int count); 274 int count);
@@ -270,6 +276,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *,
270 struct super_block *, struct ext4_prealloc_space *pa); 276 struct super_block *, struct ext4_prealloc_space *pa);
271static int ext4_mb_init_per_dev_proc(struct super_block *sb); 277static int ext4_mb_init_per_dev_proc(struct super_block *sb);
272static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); 278static int ext4_mb_destroy_per_dev_proc(struct super_block *sb);
279static void release_blocks_on_commit(journal_t *journal, transaction_t *txn);
273 280
274 281
275static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) 282static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index dea8f13c2fd9..9b2b2bc4ec17 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -374,66 +374,6 @@ void ext4_update_dynamic_rev(struct super_block *sb)
374 */ 374 */
375} 375}
376 376
377int ext4_update_compat_feature(handle_t *handle,
378 struct super_block *sb, __u32 compat)
379{
380 int err = 0;
381 if (!EXT4_HAS_COMPAT_FEATURE(sb, compat)) {
382 err = ext4_journal_get_write_access(handle,
383 EXT4_SB(sb)->s_sbh);
384 if (err)
385 return err;
386 EXT4_SET_COMPAT_FEATURE(sb, compat);
387 sb->s_dirt = 1;
388 handle->h_sync = 1;
389 BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
390 "call ext4_journal_dirty_met adata");
391 err = ext4_journal_dirty_metadata(handle,
392 EXT4_SB(sb)->s_sbh);
393 }
394 return err;
395}
396
397int ext4_update_rocompat_feature(handle_t *handle,
398 struct super_block *sb, __u32 rocompat)
399{
400 int err = 0;
401 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, rocompat)) {
402 err = ext4_journal_get_write_access(handle,
403 EXT4_SB(sb)->s_sbh);
404 if (err)
405 return err;
406 EXT4_SET_RO_COMPAT_FEATURE(sb, rocompat);
407 sb->s_dirt = 1;
408 handle->h_sync = 1;
409 BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
410 "call ext4_journal_dirty_met adata");
411 err = ext4_journal_dirty_metadata(handle,
412 EXT4_SB(sb)->s_sbh);
413 }
414 return err;
415}
416
417int ext4_update_incompat_feature(handle_t *handle,
418 struct super_block *sb, __u32 incompat)
419{
420 int err = 0;
421 if (!EXT4_HAS_INCOMPAT_FEATURE(sb, incompat)) {
422 err = ext4_journal_get_write_access(handle,
423 EXT4_SB(sb)->s_sbh);
424 if (err)
425 return err;
426 EXT4_SET_INCOMPAT_FEATURE(sb, incompat);
427 sb->s_dirt = 1;
428 handle->h_sync = 1;
429 BUFFER_TRACE(EXT4_SB(sb)->s_sbh,
430 "call ext4_journal_dirty_met adata");
431 err = ext4_journal_dirty_metadata(handle,
432 EXT4_SB(sb)->s_sbh);
433 }
434 return err;
435}
436
437/* 377/*
438 * Open the external journal device 378 * Open the external journal device
439 */ 379 */
@@ -904,7 +844,7 @@ static const struct export_operations ext4_export_ops = {
904enum { 844enum {
905 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, 845 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
906 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, 846 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
907 Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, 847 Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
908 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, 848 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
909 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, 849 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
910 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, 850 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
@@ -915,7 +855,7 @@ enum {
915 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 855 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
916 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, 856 Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
917 Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, 857 Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version,
918 Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc, 858 Opt_stripe, Opt_delalloc, Opt_nodelalloc,
919 Opt_inode_readahead_blks 859 Opt_inode_readahead_blks
920}; 860};
921 861
@@ -933,8 +873,6 @@ static const match_table_t tokens = {
933 {Opt_err_panic, "errors=panic"}, 873 {Opt_err_panic, "errors=panic"},
934 {Opt_err_ro, "errors=remount-ro"}, 874 {Opt_err_ro, "errors=remount-ro"},
935 {Opt_nouid32, "nouid32"}, 875 {Opt_nouid32, "nouid32"},
936 {Opt_nocheck, "nocheck"},
937 {Opt_nocheck, "check=none"},
938 {Opt_debug, "debug"}, 876 {Opt_debug, "debug"},
939 {Opt_oldalloc, "oldalloc"}, 877 {Opt_oldalloc, "oldalloc"},
940 {Opt_orlov, "orlov"}, 878 {Opt_orlov, "orlov"},
@@ -973,8 +911,6 @@ static const match_table_t tokens = {
973 {Opt_extents, "extents"}, 911 {Opt_extents, "extents"},
974 {Opt_noextents, "noextents"}, 912 {Opt_noextents, "noextents"},
975 {Opt_i_version, "i_version"}, 913 {Opt_i_version, "i_version"},
976 {Opt_mballoc, "mballoc"},
977 {Opt_nomballoc, "nomballoc"},
978 {Opt_stripe, "stripe=%u"}, 914 {Opt_stripe, "stripe=%u"},
979 {Opt_resize, "resize"}, 915 {Opt_resize, "resize"},
980 {Opt_delalloc, "delalloc"}, 916 {Opt_delalloc, "delalloc"},
@@ -1073,9 +1009,6 @@ static int parse_options(char *options, struct super_block *sb,
1073 case Opt_nouid32: 1009 case Opt_nouid32:
1074 set_opt(sbi->s_mount_opt, NO_UID32); 1010 set_opt(sbi->s_mount_opt, NO_UID32);
1075 break; 1011 break;
1076 case Opt_nocheck:
1077 clear_opt(sbi->s_mount_opt, CHECK);
1078 break;
1079 case Opt_debug: 1012 case Opt_debug:
1080 set_opt(sbi->s_mount_opt, DEBUG); 1013 set_opt(sbi->s_mount_opt, DEBUG);
1081 break; 1014 break;
@@ -1618,14 +1551,14 @@ static int ext4_check_descriptors(struct super_block *sb)
1618 if (block_bitmap < first_block || block_bitmap > last_block) { 1551 if (block_bitmap < first_block || block_bitmap > last_block) {
1619 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1552 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1620 "Block bitmap for group %lu not in group " 1553 "Block bitmap for group %lu not in group "
1621 "(block %llu)!", i, block_bitmap); 1554 "(block %llu)!\n", i, block_bitmap);
1622 return 0; 1555 return 0;
1623 } 1556 }
1624 inode_bitmap = ext4_inode_bitmap(sb, gdp); 1557 inode_bitmap = ext4_inode_bitmap(sb, gdp);
1625 if (inode_bitmap < first_block || inode_bitmap > last_block) { 1558 if (inode_bitmap < first_block || inode_bitmap > last_block) {
1626 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1559 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1627 "Inode bitmap for group %lu not in group " 1560 "Inode bitmap for group %lu not in group "
1628 "(block %llu)!", i, inode_bitmap); 1561 "(block %llu)!\n", i, inode_bitmap);
1629 return 0; 1562 return 0;
1630 } 1563 }
1631 inode_table = ext4_inode_table(sb, gdp); 1564 inode_table = ext4_inode_table(sb, gdp);
@@ -1633,7 +1566,7 @@ static int ext4_check_descriptors(struct super_block *sb)
1633 inode_table + sbi->s_itb_per_group - 1 > last_block) { 1566 inode_table + sbi->s_itb_per_group - 1 > last_block) {
1634 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " 1567 printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1635 "Inode table for group %lu not in group " 1568 "Inode table for group %lu not in group "
1636 "(block %llu)!", i, inode_table); 1569 "(block %llu)!\n", i, inode_table);
1637 return 0; 1570 return 0;
1638 } 1571 }
1639 spin_lock(sb_bgl_lock(sbi, i)); 1572 spin_lock(sb_bgl_lock(sbi, i));
@@ -1778,13 +1711,13 @@ static void ext4_orphan_cleanup(struct super_block *sb,
1778 * 1711 *
1779 * Note, this does *not* consider any metadata overhead for vfs i_blocks. 1712 * Note, this does *not* consider any metadata overhead for vfs i_blocks.
1780 */ 1713 */
1781static loff_t ext4_max_size(int blkbits) 1714static loff_t ext4_max_size(int blkbits, int has_huge_files)
1782{ 1715{
1783 loff_t res; 1716 loff_t res;
1784 loff_t upper_limit = MAX_LFS_FILESIZE; 1717 loff_t upper_limit = MAX_LFS_FILESIZE;
1785 1718
1786 /* small i_blocks in vfs inode? */ 1719 /* small i_blocks in vfs inode? */
1787 if (sizeof(blkcnt_t) < sizeof(u64)) { 1720 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1788 /* 1721 /*
1789 * CONFIG_LSF is not enabled implies the inode 1722 * CONFIG_LSF is not enabled implies the inode
1790 * i_block represent total blocks in 512 bytes 1723 * i_block represent total blocks in 512 bytes
@@ -1814,7 +1747,7 @@ static loff_t ext4_max_size(int blkbits)
1814 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. 1747 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
1815 * We need to be 1 filesystem block less than the 2^48 sector limit. 1748 * We need to be 1 filesystem block less than the 2^48 sector limit.
1816 */ 1749 */
1817static loff_t ext4_max_bitmap_size(int bits) 1750static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
1818{ 1751{
1819 loff_t res = EXT4_NDIR_BLOCKS; 1752 loff_t res = EXT4_NDIR_BLOCKS;
1820 int meta_blocks; 1753 int meta_blocks;
@@ -1827,11 +1760,11 @@ static loff_t ext4_max_bitmap_size(int bits)
1827 * total number of 512 bytes blocks of the file 1760 * total number of 512 bytes blocks of the file
1828 */ 1761 */
1829 1762
1830 if (sizeof(blkcnt_t) < sizeof(u64)) { 1763 if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1831 /* 1764 /*
1832 * CONFIG_LSF is not enabled implies the inode 1765 * !has_huge_files or CONFIG_LSF is not enabled
1833 * i_block represent total blocks in 512 bytes 1766 * implies the inode i_block represent total blocks in
1834 * 32 == size of vfs inode i_blocks * 8 1767 * 512 bytes 32 == size of vfs inode i_blocks * 8
1835 */ 1768 */
1836 upper_limit = (1LL << 32) - 1; 1769 upper_limit = (1LL << 32) - 1;
1837 1770
@@ -1940,7 +1873,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
1940 int blocksize; 1873 int blocksize;
1941 int db_count; 1874 int db_count;
1942 int i; 1875 int i;
1943 int needs_recovery; 1876 int needs_recovery, has_huge_files;
1944 __le32 features; 1877 __le32 features;
1945 __u64 blocks_count; 1878 __u64 blocks_count;
1946 int err; 1879 int err;
@@ -2081,7 +2014,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2081 sb->s_id, le32_to_cpu(features)); 2014 sb->s_id, le32_to_cpu(features));
2082 goto failed_mount; 2015 goto failed_mount;
2083 } 2016 }
2084 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { 2017 has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
2018 EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
2019 if (has_huge_files) {
2085 /* 2020 /*
2086 * Large file size enabled file system can only be 2021 * Large file size enabled file system can only be
2087 * mount if kernel is build with CONFIG_LSF 2022 * mount if kernel is build with CONFIG_LSF
@@ -2131,8 +2066,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2131 } 2066 }
2132 } 2067 }
2133 2068
2134 sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits); 2069 sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
2135 sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits); 2070 has_huge_files);
2071 sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
2136 2072
2137 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { 2073 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
2138 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; 2074 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
@@ -2456,6 +2392,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2456 "available.\n"); 2392 "available.\n");
2457 } 2393 }
2458 2394
2395 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
2396 printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
2397 "requested data journaling mode\n");
2398 clear_opt(sbi->s_mount_opt, DELALLOC);
2399 } else if (test_opt(sb, DELALLOC))
2400 printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
2401
2402 ext4_ext_init(sb);
2403 err = ext4_mb_init(sb, needs_recovery);
2404 if (err) {
2405 printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n",
2406 err);
2407 goto failed_mount4;
2408 }
2409
2459 /* 2410 /*
2460 * akpm: core read_super() calls in here with the superblock locked. 2411 * akpm: core read_super() calls in here with the superblock locked.
2461 * That deadlocks, because orphan cleanup needs to lock the superblock 2412 * That deadlocks, because orphan cleanup needs to lock the superblock
@@ -2475,21 +2426,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2475 test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": 2426 test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
2476 "writeback"); 2427 "writeback");
2477 2428
2478 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
2479 printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
2480 "requested data journaling mode\n");
2481 clear_opt(sbi->s_mount_opt, DELALLOC);
2482 } else if (test_opt(sb, DELALLOC))
2483 printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
2484
2485 ext4_ext_init(sb);
2486 err = ext4_mb_init(sb, needs_recovery);
2487 if (err) {
2488 printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n",
2489 err);
2490 goto failed_mount4;
2491 }
2492
2493 lock_kernel(); 2429 lock_kernel();
2494 return 0; 2430 return 0;
2495 2431
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 0abe02c4242a..8b119e16aa36 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -995,6 +995,9 @@ restart_loop:
995 } 995 }
996 spin_unlock(&journal->j_list_lock); 996 spin_unlock(&journal->j_list_lock);
997 997
998 if (journal->j_commit_callback)
999 journal->j_commit_callback(journal, commit_transaction);
1000
998 trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", 1001 trace_mark(jbd2_end_commit, "dev %s transaction %d head %d",
999 journal->j_devname, commit_transaction->t_tid, 1002 journal->j_devname, commit_transaction->t_tid,
1000 journal->j_tail_sequence); 1003 journal->j_tail_sequence);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index e5d540588fa9..39b7805a599a 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -52,6 +52,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
52 transaction->t_expires = jiffies + journal->j_commit_interval; 52 transaction->t_expires = jiffies + journal->j_commit_interval;
53 spin_lock_init(&transaction->t_handle_lock); 53 spin_lock_init(&transaction->t_handle_lock);
54 INIT_LIST_HEAD(&transaction->t_inode_list); 54 INIT_LIST_HEAD(&transaction->t_inode_list);
55 INIT_LIST_HEAD(&transaction->t_private_list);
55 56
56 /* Set up the commit timer for the new transaction. */ 57 /* Set up the commit timer for the new transaction. */
57 journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); 58 journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 463d6f10b64f..c7d106ef22e2 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -641,6 +641,11 @@ struct transaction_s
641 */ 641 */
642 int t_handle_count; 642 int t_handle_count;
643 643
644 /*
645 * For use by the filesystem to store fs-specific data
646 * structures associated with the transaction
647 */
648 struct list_head t_private_list;
644}; 649};
645 650
646struct transaction_run_stats_s { 651struct transaction_run_stats_s {
@@ -935,6 +940,10 @@ struct journal_s
935 940
936 pid_t j_last_sync_writer; 941 pid_t j_last_sync_writer;
937 942
943 /* This function is called when a transaction is closed */
944 void (*j_commit_callback)(journal_t *,
945 transaction_t *);
946
938 /* 947 /*
939 * Journal statistics 948 * Journal statistics
940 */ 949 */
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 12b15c561a1f..e585657e9831 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -63,7 +63,15 @@ struct writeback_control {
63 unsigned for_writepages:1; /* This is a writepages() call */ 63 unsigned for_writepages:1; /* This is a writepages() call */
64 unsigned range_cyclic:1; /* range_start is cyclic */ 64 unsigned range_cyclic:1; /* range_start is cyclic */
65 unsigned more_io:1; /* more io to be dispatched */ 65 unsigned more_io:1; /* more io to be dispatched */
66 unsigned range_cont:1; 66 /*
67 * write_cache_pages() won't update wbc->nr_to_write and
68 * mapping->writeback_index if no_nrwrite_index_update
69 * is set. write_cache_pages() may write more than we
70 * requested and we want to make sure nr_to_write and
71 * writeback_index are updated in a consistent manner
72 * so we use a single control to update them
73 */
74 unsigned no_nrwrite_index_update:1;
67}; 75};
68 76
69/* 77/*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index c130a137c129..b40f6d5f8fe9 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -876,6 +876,7 @@ int write_cache_pages(struct address_space *mapping,
876 pgoff_t end; /* Inclusive */ 876 pgoff_t end; /* Inclusive */
877 int scanned = 0; 877 int scanned = 0;
878 int range_whole = 0; 878 int range_whole = 0;
879 long nr_to_write = wbc->nr_to_write;
879 880
880 if (wbc->nonblocking && bdi_write_congested(bdi)) { 881 if (wbc->nonblocking && bdi_write_congested(bdi)) {
881 wbc->encountered_congestion = 1; 882 wbc->encountered_congestion = 1;
@@ -939,7 +940,7 @@ retry:
939 unlock_page(page); 940 unlock_page(page);
940 ret = 0; 941 ret = 0;
941 } 942 }
942 if (ret || (--(wbc->nr_to_write) <= 0)) 943 if (ret || (--nr_to_write <= 0))
943 done = 1; 944 done = 1;
944 if (wbc->nonblocking && bdi_write_congested(bdi)) { 945 if (wbc->nonblocking && bdi_write_congested(bdi)) {
945 wbc->encountered_congestion = 1; 946 wbc->encountered_congestion = 1;
@@ -958,11 +959,12 @@ retry:
958 index = 0; 959 index = 0;
959 goto retry; 960 goto retry;
960 } 961 }
961 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) 962 if (!wbc->no_nrwrite_index_update) {
962 mapping->writeback_index = index; 963 if (wbc->range_cyclic || (range_whole && nr_to_write > 0))
964 mapping->writeback_index = index;
965 wbc->nr_to_write = nr_to_write;
966 }
963 967
964 if (wbc->range_cont)
965 wbc->range_start = index << PAGE_CACHE_SHIFT;
966 return ret; 968 return ret;
967} 969}
968EXPORT_SYMBOL(write_cache_pages); 970EXPORT_SYMBOL(write_cache_pages);