diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-17 18:08:11 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-17 18:08:11 -0400 |
commit | 58617d5e59663d2edea03bd03cb74279827611bb (patch) | |
tree | 1b472f0ab43ae08fef5dea30b95592a005385686 | |
parent | 26e9a397774a0e94efbb8a0bf4a952c28d808cab (diff) | |
parent | f287a1a56130be5fdb96a4a62d1290bd064f308e (diff) |
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
ext4: Remove automatic enabling of the HUGE_FILE feature flag
ext4: Replace hackish ext4_mb_poll_new_transaction with commit callback
ext4: Update Documentation/filesystems/ext4.txt
ext4: Remove unused mount options: nomballoc, mballoc, nocheck
ext4: Remove compile warnings when building w/o CONFIG_PROC_FS
ext4: Add missing newlines to printk messages
ext4: Fix file fragmentation during large file write.
vfs: Add no_nrwrite_index_update writeback control flag
vfs: Remove the range_cont writeback mode.
ext4: Use tag dirty lookup during mpage_da_submit_io
ext4: let the block device know when unused blocks can be discarded
ext4: Don't reuse released data blocks until transaction commits
ext4: Use an rbtree for tracking blocks freed during transaction.
ext4: Do mballoc init before doing filesystem recovery
ext4: Free ext4_prealloc_space using kmem_cache_free
ext4: Fix Kconfig typo for ext4dev
ext4: Remove an old reference to ext4dev in Makefile comment
-rw-r--r-- | Documentation/filesystems/ext4.txt | 32 | ||||
-rw-r--r-- | fs/Kconfig | 2 | ||||
-rw-r--r-- | fs/Makefile | 2 | ||||
-rw-r--r-- | fs/ext4/balloc.c | 12 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 1 | ||||
-rw-r--r-- | fs/ext4/ext4_sb.h | 3 | ||||
-rw-r--r-- | fs/ext4/inode.c | 143 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 263 | ||||
-rw-r--r-- | fs/ext4/mballoc.h | 31 | ||||
-rw-r--r-- | fs/ext4/super.c | 132 | ||||
-rw-r--r-- | fs/jbd2/commit.c | 3 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 1 | ||||
-rw-r--r-- | include/linux/jbd2.h | 9 | ||||
-rw-r--r-- | include/linux/writeback.h | 10 | ||||
-rw-r--r-- | mm/page-writeback.c | 12 |
15 files changed, 320 insertions, 336 deletions
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index eb154ef36c2a..174eaff7ded9 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt | |||
@@ -2,19 +2,24 @@ | |||
2 | Ext4 Filesystem | 2 | Ext4 Filesystem |
3 | =============== | 3 | =============== |
4 | 4 | ||
5 | This is a development version of the ext4 filesystem, an advanced level | 5 | Ext4 is an an advanced level of the ext3 filesystem which incorporates |
6 | of the ext3 filesystem which incorporates scalability and reliability | 6 | scalability and reliability enhancements for supporting large filesystems |
7 | enhancements for supporting large filesystems (64 bit) in keeping with | 7 | (64 bit) in keeping with increasing disk capacities and state-of-the-art |
8 | increasing disk capacities and state-of-the-art feature requirements. | 8 | feature requirements. |
9 | 9 | ||
10 | Mailing list: linux-ext4@vger.kernel.org | 10 | Mailing list: linux-ext4@vger.kernel.org |
11 | Web site: http://ext4.wiki.kernel.org | ||
11 | 12 | ||
12 | 13 | ||
13 | 1. Quick usage instructions: | 14 | 1. Quick usage instructions: |
14 | =========================== | 15 | =========================== |
15 | 16 | ||
17 | Note: More extensive information for getting started with ext4 can be | ||
18 | found at the ext4 wiki site at the URL: | ||
19 | http://ext4.wiki.kernel.org/index.php/Ext4_Howto | ||
20 | |||
16 | - Compile and install the latest version of e2fsprogs (as of this | 21 | - Compile and install the latest version of e2fsprogs (as of this |
17 | writing version 1.41) from: | 22 | writing version 1.41.3) from: |
18 | 23 | ||
19 | http://sourceforge.net/project/showfiles.php?group_id=2406 | 24 | http://sourceforge.net/project/showfiles.php?group_id=2406 |
20 | 25 | ||
@@ -36,11 +41,9 @@ Mailing list: linux-ext4@vger.kernel.org | |||
36 | 41 | ||
37 | # mke2fs -t ext4 /dev/hda1 | 42 | # mke2fs -t ext4 /dev/hda1 |
38 | 43 | ||
39 | Or configure an existing ext3 filesystem to support extents and set | 44 | Or to configure an existing ext3 filesystem to support extents: |
40 | the test_fs flag to indicate that it's ok for an in-development | ||
41 | filesystem to touch this filesystem: | ||
42 | 45 | ||
43 | # tune2fs -O extents -E test_fs /dev/hda1 | 46 | # tune2fs -O extents /dev/hda1 |
44 | 47 | ||
45 | If the filesystem was created with 128 byte inodes, it can be | 48 | If the filesystem was created with 128 byte inodes, it can be |
46 | converted to use 256 byte for greater efficiency via: | 49 | converted to use 256 byte for greater efficiency via: |
@@ -104,8 +107,8 @@ exist yet so I'm not sure they're in the near-term roadmap. | |||
104 | The big performance win will come with mballoc, delalloc and flex_bg | 107 | The big performance win will come with mballoc, delalloc and flex_bg |
105 | grouping of bitmaps and inode tables. Some test results available here: | 108 | grouping of bitmaps and inode tables. Some test results available here: |
106 | 109 | ||
107 | - http://www.bullopensource.org/ext4/20080530/ffsb-write-2.6.26-rc2.html | 110 | - http://www.bullopensource.org/ext4/20080818-ffsb/ffsb-write-2.6.27-rc1.html |
108 | - http://www.bullopensource.org/ext4/20080530/ffsb-readwrite-2.6.26-rc2.html | 111 | - http://www.bullopensource.org/ext4/20080818-ffsb/ffsb-readwrite-2.6.27-rc1.html |
109 | 112 | ||
110 | 3. Options | 113 | 3. Options |
111 | ========== | 114 | ========== |
@@ -214,9 +217,6 @@ noreservation | |||
214 | bsddf (*) Make 'df' act like BSD. | 217 | bsddf (*) Make 'df' act like BSD. |
215 | minixdf Make 'df' act like Minix. | 218 | minixdf Make 'df' act like Minix. |
216 | 219 | ||
217 | check=none Don't do extra checking of bitmaps on mount. | ||
218 | nocheck | ||
219 | |||
220 | debug Extra debugging information is sent to syslog. | 220 | debug Extra debugging information is sent to syslog. |
221 | 221 | ||
222 | errors=remount-ro(*) Remount the filesystem read-only on an error. | 222 | errors=remount-ro(*) Remount the filesystem read-only on an error. |
@@ -253,8 +253,6 @@ nobh (a) cache disk block mapping information | |||
253 | "nobh" option tries to avoid associating buffer | 253 | "nobh" option tries to avoid associating buffer |
254 | heads (supported only for "writeback" mode). | 254 | heads (supported only for "writeback" mode). |
255 | 255 | ||
256 | mballoc (*) Use the multiple block allocator for block allocation | ||
257 | nomballoc disabled multiple block allocator for block allocation. | ||
258 | stripe=n Number of filesystem blocks that mballoc will try | 256 | stripe=n Number of filesystem blocks that mballoc will try |
259 | to use for allocation size and alignment. For RAID5/6 | 257 | to use for allocation size and alignment. For RAID5/6 |
260 | systems this should be the number of data | 258 | systems this should be the number of data |
diff --git a/fs/Kconfig b/fs/Kconfig index 9e9d70c02a07..d0a1174fb516 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -160,7 +160,7 @@ config EXT4_FS | |||
160 | filesystem initially. | 160 | filesystem initially. |
161 | 161 | ||
162 | To compile this file system support as a module, choose M here. The | 162 | To compile this file system support as a module, choose M here. The |
163 | module will be called ext4dev. | 163 | module will be called ext4. |
164 | 164 | ||
165 | If unsure, say N. | 165 | If unsure, say N. |
166 | 166 | ||
diff --git a/fs/Makefile b/fs/Makefile index d0c69f57e5bf..2168c902d5ca 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -71,7 +71,7 @@ obj-$(CONFIG_DLM) += dlm/ | |||
71 | # Do not add any filesystems before this line | 71 | # Do not add any filesystems before this line |
72 | obj-$(CONFIG_REISERFS_FS) += reiserfs/ | 72 | obj-$(CONFIG_REISERFS_FS) += reiserfs/ |
73 | obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 | 73 | obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 |
74 | obj-$(CONFIG_EXT4_FS) += ext4/ # Before ext2 so root fs can be ext4dev | 74 | obj-$(CONFIG_EXT4_FS) += ext4/ # Before ext2 so root fs can be ext4 |
75 | obj-$(CONFIG_JBD) += jbd/ | 75 | obj-$(CONFIG_JBD) += jbd/ |
76 | obj-$(CONFIG_JBD2) += jbd2/ | 76 | obj-$(CONFIG_JBD2) += jbd2/ |
77 | obj-$(CONFIG_EXT2_FS) += ext2/ | 77 | obj-$(CONFIG_EXT2_FS) += ext2/ |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index bd2ece228827..b9821be709bd 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -568,8 +568,16 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
568 | 568 | ||
569 | /* this isn't the right place to decide whether block is metadata | 569 | /* this isn't the right place to decide whether block is metadata |
570 | * inode.c/extents.c knows better, but for safety ... */ | 570 | * inode.c/extents.c knows better, but for safety ... */ |
571 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) || | 571 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) |
572 | ext4_should_journal_data(inode)) | 572 | metadata = 1; |
573 | |||
574 | /* We need to make sure we don't reuse | ||
575 | * block released untill the transaction commit. | ||
576 | * writeback mode have weak data consistency so | ||
577 | * don't force data as metadata when freeing block | ||
578 | * for writeback mode. | ||
579 | */ | ||
580 | if (metadata == 0 && !ext4_should_writeback_data(inode)) | ||
573 | metadata = 1; | 581 | metadata = 1; |
574 | 582 | ||
575 | sb = inode->i_sb; | 583 | sb = inode->i_sb; |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 6690a41cdd9f..4880cc3e6727 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -511,7 +511,6 @@ do { \ | |||
511 | /* | 511 | /* |
512 | * Mount flags | 512 | * Mount flags |
513 | */ | 513 | */ |
514 | #define EXT4_MOUNT_CHECK 0x00001 /* Do mount-time checks */ | ||
515 | #define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ | 514 | #define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ |
516 | #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ | 515 | #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ |
517 | #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ | 516 | #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ |
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h index 6a0b40d43264..445fde603df8 100644 --- a/fs/ext4/ext4_sb.h +++ b/fs/ext4/ext4_sb.h | |||
@@ -99,9 +99,6 @@ struct ext4_sb_info { | |||
99 | struct inode *s_buddy_cache; | 99 | struct inode *s_buddy_cache; |
100 | long s_blocks_reserved; | 100 | long s_blocks_reserved; |
101 | spinlock_t s_reserve_lock; | 101 | spinlock_t s_reserve_lock; |
102 | struct list_head s_active_transaction; | ||
103 | struct list_head s_closed_transaction; | ||
104 | struct list_head s_committed_transaction; | ||
105 | spinlock_t s_md_lock; | 102 | spinlock_t s_md_lock; |
106 | tid_t s_last_transaction; | 103 | tid_t s_last_transaction; |
107 | unsigned short *s_mb_offsets, *s_mb_maxs; | 104 | unsigned short *s_mb_offsets, *s_mb_maxs; |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9b4ec9decfd1..8dbf6953845b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -1648,6 +1648,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
1648 | int ret = 0, err, nr_pages, i; | 1648 | int ret = 0, err, nr_pages, i; |
1649 | unsigned long index, end; | 1649 | unsigned long index, end; |
1650 | struct pagevec pvec; | 1650 | struct pagevec pvec; |
1651 | long pages_skipped; | ||
1651 | 1652 | ||
1652 | BUG_ON(mpd->next_page <= mpd->first_page); | 1653 | BUG_ON(mpd->next_page <= mpd->first_page); |
1653 | pagevec_init(&pvec, 0); | 1654 | pagevec_init(&pvec, 0); |
@@ -1655,20 +1656,30 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
1655 | end = mpd->next_page - 1; | 1656 | end = mpd->next_page - 1; |
1656 | 1657 | ||
1657 | while (index <= end) { | 1658 | while (index <= end) { |
1658 | /* XXX: optimize tail */ | 1659 | /* |
1659 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | 1660 | * We can use PAGECACHE_TAG_DIRTY lookup here because |
1661 | * even though we have cleared the dirty flag on the page | ||
1662 | * We still keep the page in the radix tree with tag | ||
1663 | * PAGECACHE_TAG_DIRTY. See clear_page_dirty_for_io. | ||
1664 | * The PAGECACHE_TAG_DIRTY is cleared in set_page_writeback | ||
1665 | * which is called via the below writepage callback. | ||
1666 | */ | ||
1667 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
1668 | PAGECACHE_TAG_DIRTY, | ||
1669 | min(end - index, | ||
1670 | (pgoff_t)PAGEVEC_SIZE-1) + 1); | ||
1660 | if (nr_pages == 0) | 1671 | if (nr_pages == 0) |
1661 | break; | 1672 | break; |
1662 | for (i = 0; i < nr_pages; i++) { | 1673 | for (i = 0; i < nr_pages; i++) { |
1663 | struct page *page = pvec.pages[i]; | 1674 | struct page *page = pvec.pages[i]; |
1664 | 1675 | ||
1665 | index = page->index; | 1676 | pages_skipped = mpd->wbc->pages_skipped; |
1666 | if (index > end) | ||
1667 | break; | ||
1668 | index++; | ||
1669 | |||
1670 | err = mapping->a_ops->writepage(page, mpd->wbc); | 1677 | err = mapping->a_ops->writepage(page, mpd->wbc); |
1671 | if (!err) | 1678 | if (!err && (pages_skipped == mpd->wbc->pages_skipped)) |
1679 | /* | ||
1680 | * have successfully written the page | ||
1681 | * without skipping the same | ||
1682 | */ | ||
1672 | mpd->pages_written++; | 1683 | mpd->pages_written++; |
1673 | /* | 1684 | /* |
1674 | * In error case, we have to continue because | 1685 | * In error case, we have to continue because |
@@ -2104,7 +2115,6 @@ static int mpage_da_writepages(struct address_space *mapping, | |||
2104 | struct writeback_control *wbc, | 2115 | struct writeback_control *wbc, |
2105 | struct mpage_da_data *mpd) | 2116 | struct mpage_da_data *mpd) |
2106 | { | 2117 | { |
2107 | long to_write; | ||
2108 | int ret; | 2118 | int ret; |
2109 | 2119 | ||
2110 | if (!mpd->get_block) | 2120 | if (!mpd->get_block) |
@@ -2119,19 +2129,18 @@ static int mpage_da_writepages(struct address_space *mapping, | |||
2119 | mpd->pages_written = 0; | 2129 | mpd->pages_written = 0; |
2120 | mpd->retval = 0; | 2130 | mpd->retval = 0; |
2121 | 2131 | ||
2122 | to_write = wbc->nr_to_write; | ||
2123 | |||
2124 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd); | 2132 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd); |
2125 | |||
2126 | /* | 2133 | /* |
2127 | * Handle last extent of pages | 2134 | * Handle last extent of pages |
2128 | */ | 2135 | */ |
2129 | if (!mpd->io_done && mpd->next_page != mpd->first_page) { | 2136 | if (!mpd->io_done && mpd->next_page != mpd->first_page) { |
2130 | if (mpage_da_map_blocks(mpd) == 0) | 2137 | if (mpage_da_map_blocks(mpd) == 0) |
2131 | mpage_da_submit_io(mpd); | 2138 | mpage_da_submit_io(mpd); |
2132 | } | ||
2133 | 2139 | ||
2134 | wbc->nr_to_write = to_write - mpd->pages_written; | 2140 | mpd->io_done = 1; |
2141 | ret = MPAGE_DA_EXTENT_TAIL; | ||
2142 | } | ||
2143 | wbc->nr_to_write -= mpd->pages_written; | ||
2135 | return ret; | 2144 | return ret; |
2136 | } | 2145 | } |
2137 | 2146 | ||
@@ -2360,12 +2369,14 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) | |||
2360 | static int ext4_da_writepages(struct address_space *mapping, | 2369 | static int ext4_da_writepages(struct address_space *mapping, |
2361 | struct writeback_control *wbc) | 2370 | struct writeback_control *wbc) |
2362 | { | 2371 | { |
2372 | pgoff_t index; | ||
2373 | int range_whole = 0; | ||
2363 | handle_t *handle = NULL; | 2374 | handle_t *handle = NULL; |
2364 | loff_t range_start = 0; | ||
2365 | struct mpage_da_data mpd; | 2375 | struct mpage_da_data mpd; |
2366 | struct inode *inode = mapping->host; | 2376 | struct inode *inode = mapping->host; |
2377 | int no_nrwrite_index_update; | ||
2378 | long pages_written = 0, pages_skipped; | ||
2367 | int needed_blocks, ret = 0, nr_to_writebump = 0; | 2379 | int needed_blocks, ret = 0, nr_to_writebump = 0; |
2368 | long to_write, pages_skipped = 0; | ||
2369 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2380 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
2370 | 2381 | ||
2371 | /* | 2382 | /* |
@@ -2385,23 +2396,26 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2385 | nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write; | 2396 | nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write; |
2386 | wbc->nr_to_write = sbi->s_mb_stream_request; | 2397 | wbc->nr_to_write = sbi->s_mb_stream_request; |
2387 | } | 2398 | } |
2399 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | ||
2400 | range_whole = 1; | ||
2388 | 2401 | ||
2389 | if (!wbc->range_cyclic) | 2402 | if (wbc->range_cyclic) |
2390 | /* | 2403 | index = mapping->writeback_index; |
2391 | * If range_cyclic is not set force range_cont | 2404 | else |
2392 | * and save the old writeback_index | 2405 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2393 | */ | ||
2394 | wbc->range_cont = 1; | ||
2395 | |||
2396 | range_start = wbc->range_start; | ||
2397 | pages_skipped = wbc->pages_skipped; | ||
2398 | 2406 | ||
2399 | mpd.wbc = wbc; | 2407 | mpd.wbc = wbc; |
2400 | mpd.inode = mapping->host; | 2408 | mpd.inode = mapping->host; |
2401 | 2409 | ||
2402 | restart_loop: | 2410 | /* |
2403 | to_write = wbc->nr_to_write; | 2411 | * we don't want write_cache_pages to update |
2404 | while (!ret && to_write > 0) { | 2412 | * nr_to_write and writeback_index |
2413 | */ | ||
2414 | no_nrwrite_index_update = wbc->no_nrwrite_index_update; | ||
2415 | wbc->no_nrwrite_index_update = 1; | ||
2416 | pages_skipped = wbc->pages_skipped; | ||
2417 | |||
2418 | while (!ret && wbc->nr_to_write > 0) { | ||
2405 | 2419 | ||
2406 | /* | 2420 | /* |
2407 | * we insert one extent at a time. So we need | 2421 | * we insert one extent at a time. So we need |
@@ -2422,48 +2436,53 @@ restart_loop: | |||
2422 | dump_stack(); | 2436 | dump_stack(); |
2423 | goto out_writepages; | 2437 | goto out_writepages; |
2424 | } | 2438 | } |
2425 | to_write -= wbc->nr_to_write; | ||
2426 | |||
2427 | mpd.get_block = ext4_da_get_block_write; | 2439 | mpd.get_block = ext4_da_get_block_write; |
2428 | ret = mpage_da_writepages(mapping, wbc, &mpd); | 2440 | ret = mpage_da_writepages(mapping, wbc, &mpd); |
2429 | 2441 | ||
2430 | ext4_journal_stop(handle); | 2442 | ext4_journal_stop(handle); |
2431 | 2443 | ||
2432 | if (mpd.retval == -ENOSPC) | 2444 | if (mpd.retval == -ENOSPC) { |
2445 | /* commit the transaction which would | ||
2446 | * free blocks released in the transaction | ||
2447 | * and try again | ||
2448 | */ | ||
2433 | jbd2_journal_force_commit_nested(sbi->s_journal); | 2449 | jbd2_journal_force_commit_nested(sbi->s_journal); |
2434 | 2450 | wbc->pages_skipped = pages_skipped; | |
2435 | /* reset the retry count */ | 2451 | ret = 0; |
2436 | if (ret == MPAGE_DA_EXTENT_TAIL) { | 2452 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { |
2437 | /* | 2453 | /* |
2438 | * got one extent now try with | 2454 | * got one extent now try with |
2439 | * rest of the pages | 2455 | * rest of the pages |
2440 | */ | 2456 | */ |
2441 | to_write += wbc->nr_to_write; | 2457 | pages_written += mpd.pages_written; |
2458 | wbc->pages_skipped = pages_skipped; | ||
2442 | ret = 0; | 2459 | ret = 0; |
2443 | } else if (wbc->nr_to_write) { | 2460 | } else if (wbc->nr_to_write) |
2444 | /* | 2461 | /* |
2445 | * There is no more writeout needed | 2462 | * There is no more writeout needed |
2446 | * or we requested for a noblocking writeout | 2463 | * or we requested for a noblocking writeout |
2447 | * and we found the device congested | 2464 | * and we found the device congested |
2448 | */ | 2465 | */ |
2449 | to_write += wbc->nr_to_write; | ||
2450 | break; | 2466 | break; |
2451 | } | ||
2452 | wbc->nr_to_write = to_write; | ||
2453 | } | ||
2454 | |||
2455 | if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) { | ||
2456 | /* We skipped pages in this loop */ | ||
2457 | wbc->range_start = range_start; | ||
2458 | wbc->nr_to_write = to_write + | ||
2459 | wbc->pages_skipped - pages_skipped; | ||
2460 | wbc->pages_skipped = pages_skipped; | ||
2461 | goto restart_loop; | ||
2462 | } | 2467 | } |
2468 | if (pages_skipped != wbc->pages_skipped) | ||
2469 | printk(KERN_EMERG "This should not happen leaving %s " | ||
2470 | "with nr_to_write = %ld ret = %d\n", | ||
2471 | __func__, wbc->nr_to_write, ret); | ||
2472 | |||
2473 | /* Update index */ | ||
2474 | index += pages_written; | ||
2475 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | ||
2476 | /* | ||
2477 | * set the writeback_index so that range_cyclic | ||
2478 | * mode will write it back later | ||
2479 | */ | ||
2480 | mapping->writeback_index = index; | ||
2463 | 2481 | ||
2464 | out_writepages: | 2482 | out_writepages: |
2465 | wbc->nr_to_write = to_write - nr_to_writebump; | 2483 | if (!no_nrwrite_index_update) |
2466 | wbc->range_start = range_start; | 2484 | wbc->no_nrwrite_index_update = 0; |
2485 | wbc->nr_to_write -= nr_to_writebump; | ||
2467 | return ret; | 2486 | return ret; |
2468 | } | 2487 | } |
2469 | 2488 | ||
@@ -4175,7 +4194,6 @@ static int ext4_inode_blocks_set(handle_t *handle, | |||
4175 | struct inode *inode = &(ei->vfs_inode); | 4194 | struct inode *inode = &(ei->vfs_inode); |
4176 | u64 i_blocks = inode->i_blocks; | 4195 | u64 i_blocks = inode->i_blocks; |
4177 | struct super_block *sb = inode->i_sb; | 4196 | struct super_block *sb = inode->i_sb; |
4178 | int err = 0; | ||
4179 | 4197 | ||
4180 | if (i_blocks <= ~0U) { | 4198 | if (i_blocks <= ~0U) { |
4181 | /* | 4199 | /* |
@@ -4185,36 +4203,27 @@ static int ext4_inode_blocks_set(handle_t *handle, | |||
4185 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); | 4203 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); |
4186 | raw_inode->i_blocks_high = 0; | 4204 | raw_inode->i_blocks_high = 0; |
4187 | ei->i_flags &= ~EXT4_HUGE_FILE_FL; | 4205 | ei->i_flags &= ~EXT4_HUGE_FILE_FL; |
4188 | } else if (i_blocks <= 0xffffffffffffULL) { | 4206 | return 0; |
4207 | } | ||
4208 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) | ||
4209 | return -EFBIG; | ||
4210 | |||
4211 | if (i_blocks <= 0xffffffffffffULL) { | ||
4189 | /* | 4212 | /* |
4190 | * i_blocks can be represented in a 48 bit variable | 4213 | * i_blocks can be represented in a 48 bit variable |
4191 | * as multiple of 512 bytes | 4214 | * as multiple of 512 bytes |
4192 | */ | 4215 | */ |
4193 | err = ext4_update_rocompat_feature(handle, sb, | ||
4194 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE); | ||
4195 | if (err) | ||
4196 | goto err_out; | ||
4197 | /* i_block is stored in the split 48 bit fields */ | ||
4198 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); | 4216 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); |
4199 | raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); | 4217 | raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); |
4200 | ei->i_flags &= ~EXT4_HUGE_FILE_FL; | 4218 | ei->i_flags &= ~EXT4_HUGE_FILE_FL; |
4201 | } else { | 4219 | } else { |
4202 | /* | ||
4203 | * i_blocks should be represented in a 48 bit variable | ||
4204 | * as multiple of file system block size | ||
4205 | */ | ||
4206 | err = ext4_update_rocompat_feature(handle, sb, | ||
4207 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE); | ||
4208 | if (err) | ||
4209 | goto err_out; | ||
4210 | ei->i_flags |= EXT4_HUGE_FILE_FL; | 4220 | ei->i_flags |= EXT4_HUGE_FILE_FL; |
4211 | /* i_block is stored in file system block size */ | 4221 | /* i_block is stored in file system block size */ |
4212 | i_blocks = i_blocks >> (inode->i_blkbits - 9); | 4222 | i_blocks = i_blocks >> (inode->i_blkbits - 9); |
4213 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); | 4223 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); |
4214 | raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); | 4224 | raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); |
4215 | } | 4225 | } |
4216 | err_out: | 4226 | return 0; |
4217 | return err; | ||
4218 | } | 4227 | } |
4219 | 4228 | ||
4220 | /* | 4229 | /* |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index b580714f0d85..dfe17a134052 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -2300,6 +2300,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
2300 | } | 2300 | } |
2301 | 2301 | ||
2302 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); | 2302 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); |
2303 | meta_group_info[i]->bb_free_root.rb_node = NULL;; | ||
2303 | 2304 | ||
2304 | #ifdef DOUBLE_CHECK | 2305 | #ifdef DOUBLE_CHECK |
2305 | { | 2306 | { |
@@ -2522,9 +2523,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2522 | } | 2523 | } |
2523 | 2524 | ||
2524 | spin_lock_init(&sbi->s_md_lock); | 2525 | spin_lock_init(&sbi->s_md_lock); |
2525 | INIT_LIST_HEAD(&sbi->s_active_transaction); | ||
2526 | INIT_LIST_HEAD(&sbi->s_closed_transaction); | ||
2527 | INIT_LIST_HEAD(&sbi->s_committed_transaction); | ||
2528 | spin_lock_init(&sbi->s_bal_lock); | 2526 | spin_lock_init(&sbi->s_bal_lock); |
2529 | 2527 | ||
2530 | sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; | 2528 | sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; |
@@ -2553,6 +2551,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
2553 | ext4_mb_init_per_dev_proc(sb); | 2551 | ext4_mb_init_per_dev_proc(sb); |
2554 | ext4_mb_history_init(sb); | 2552 | ext4_mb_history_init(sb); |
2555 | 2553 | ||
2554 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; | ||
2555 | |||
2556 | printk(KERN_INFO "EXT4-fs: mballoc enabled\n"); | 2556 | printk(KERN_INFO "EXT4-fs: mballoc enabled\n"); |
2557 | return 0; | 2557 | return 0; |
2558 | } | 2558 | } |
@@ -2568,7 +2568,7 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) | |||
2568 | pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list); | 2568 | pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list); |
2569 | list_del(&pa->pa_group_list); | 2569 | list_del(&pa->pa_group_list); |
2570 | count++; | 2570 | count++; |
2571 | kfree(pa); | 2571 | kmem_cache_free(ext4_pspace_cachep, pa); |
2572 | } | 2572 | } |
2573 | if (count) | 2573 | if (count) |
2574 | mb_debug("mballoc: %u PAs left\n", count); | 2574 | mb_debug("mballoc: %u PAs left\n", count); |
@@ -2582,15 +2582,6 @@ int ext4_mb_release(struct super_block *sb) | |||
2582 | struct ext4_group_info *grinfo; | 2582 | struct ext4_group_info *grinfo; |
2583 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2583 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2584 | 2584 | ||
2585 | /* release freed, non-committed blocks */ | ||
2586 | spin_lock(&sbi->s_md_lock); | ||
2587 | list_splice_init(&sbi->s_closed_transaction, | ||
2588 | &sbi->s_committed_transaction); | ||
2589 | list_splice_init(&sbi->s_active_transaction, | ||
2590 | &sbi->s_committed_transaction); | ||
2591 | spin_unlock(&sbi->s_md_lock); | ||
2592 | ext4_mb_free_committed_blocks(sb); | ||
2593 | |||
2594 | if (sbi->s_group_info) { | 2585 | if (sbi->s_group_info) { |
2595 | for (i = 0; i < sbi->s_groups_count; i++) { | 2586 | for (i = 0; i < sbi->s_groups_count; i++) { |
2596 | grinfo = ext4_get_group_info(sb, i); | 2587 | grinfo = ext4_get_group_info(sb, i); |
@@ -2644,61 +2635,57 @@ int ext4_mb_release(struct super_block *sb) | |||
2644 | return 0; | 2635 | return 0; |
2645 | } | 2636 | } |
2646 | 2637 | ||
2647 | static noinline_for_stack void | 2638 | /* |
2648 | ext4_mb_free_committed_blocks(struct super_block *sb) | 2639 | * This function is called by the jbd2 layer once the commit has finished, |
2640 | * so we know we can free the blocks that were released with that commit. | ||
2641 | */ | ||
2642 | static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | ||
2649 | { | 2643 | { |
2650 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2644 | struct super_block *sb = journal->j_private; |
2651 | int err; | ||
2652 | int i; | ||
2653 | int count = 0; | ||
2654 | int count2 = 0; | ||
2655 | struct ext4_free_metadata *md; | ||
2656 | struct ext4_buddy e4b; | 2645 | struct ext4_buddy e4b; |
2646 | struct ext4_group_info *db; | ||
2647 | int err, count = 0, count2 = 0; | ||
2648 | struct ext4_free_data *entry; | ||
2649 | ext4_fsblk_t discard_block; | ||
2650 | struct list_head *l, *ltmp; | ||
2657 | 2651 | ||
2658 | if (list_empty(&sbi->s_committed_transaction)) | 2652 | list_for_each_safe(l, ltmp, &txn->t_private_list) { |
2659 | return; | 2653 | entry = list_entry(l, struct ext4_free_data, list); |
2660 | |||
2661 | /* there is committed blocks to be freed yet */ | ||
2662 | do { | ||
2663 | /* get next array of blocks */ | ||
2664 | md = NULL; | ||
2665 | spin_lock(&sbi->s_md_lock); | ||
2666 | if (!list_empty(&sbi->s_committed_transaction)) { | ||
2667 | md = list_entry(sbi->s_committed_transaction.next, | ||
2668 | struct ext4_free_metadata, list); | ||
2669 | list_del(&md->list); | ||
2670 | } | ||
2671 | spin_unlock(&sbi->s_md_lock); | ||
2672 | |||
2673 | if (md == NULL) | ||
2674 | break; | ||
2675 | 2654 | ||
2676 | mb_debug("gonna free %u blocks in group %lu (0x%p):", | 2655 | mb_debug("gonna free %u blocks in group %lu (0x%p):", |
2677 | md->num, md->group, md); | 2656 | entry->count, entry->group, entry); |
2678 | 2657 | ||
2679 | err = ext4_mb_load_buddy(sb, md->group, &e4b); | 2658 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); |
2680 | /* we expect to find existing buddy because it's pinned */ | 2659 | /* we expect to find existing buddy because it's pinned */ |
2681 | BUG_ON(err != 0); | 2660 | BUG_ON(err != 0); |
2682 | 2661 | ||
2662 | db = e4b.bd_info; | ||
2683 | /* there are blocks to put in buddy to make them really free */ | 2663 | /* there are blocks to put in buddy to make them really free */ |
2684 | count += md->num; | 2664 | count += entry->count; |
2685 | count2++; | 2665 | count2++; |
2686 | ext4_lock_group(sb, md->group); | 2666 | ext4_lock_group(sb, entry->group); |
2687 | for (i = 0; i < md->num; i++) { | 2667 | /* Take it out of per group rb tree */ |
2688 | mb_debug(" %u", md->blocks[i]); | 2668 | rb_erase(&entry->node, &(db->bb_free_root)); |
2689 | mb_free_blocks(NULL, &e4b, md->blocks[i], 1); | 2669 | mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count); |
2670 | |||
2671 | if (!db->bb_free_root.rb_node) { | ||
2672 | /* No more items in the per group rb tree | ||
2673 | * balance refcounts from ext4_mb_free_metadata() | ||
2674 | */ | ||
2675 | page_cache_release(e4b.bd_buddy_page); | ||
2676 | page_cache_release(e4b.bd_bitmap_page); | ||
2690 | } | 2677 | } |
2691 | mb_debug("\n"); | 2678 | ext4_unlock_group(sb, entry->group); |
2692 | ext4_unlock_group(sb, md->group); | 2679 | discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) |
2693 | 2680 | + entry->start_blk | |
2694 | /* balance refcounts from ext4_mb_free_metadata() */ | 2681 | + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); |
2695 | page_cache_release(e4b.bd_buddy_page); | 2682 | trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", sb->s_id, |
2696 | page_cache_release(e4b.bd_bitmap_page); | 2683 | (unsigned long long) discard_block, entry->count); |
2697 | 2684 | sb_issue_discard(sb, discard_block, entry->count); | |
2698 | kfree(md); | 2685 | |
2686 | kmem_cache_free(ext4_free_ext_cachep, entry); | ||
2699 | ext4_mb_release_desc(&e4b); | 2687 | ext4_mb_release_desc(&e4b); |
2700 | 2688 | } | |
2701 | } while (md); | ||
2702 | 2689 | ||
2703 | mb_debug("freed %u blocks in %u structures\n", count, count2); | 2690 | mb_debug("freed %u blocks in %u structures\n", count, count2); |
2704 | } | 2691 | } |
@@ -2712,6 +2699,7 @@ ext4_mb_free_committed_blocks(struct super_block *sb) | |||
2712 | 2699 | ||
2713 | static int ext4_mb_init_per_dev_proc(struct super_block *sb) | 2700 | static int ext4_mb_init_per_dev_proc(struct super_block *sb) |
2714 | { | 2701 | { |
2702 | #ifdef CONFIG_PROC_FS | ||
2715 | mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; | 2703 | mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; |
2716 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2704 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2717 | struct proc_dir_entry *proc; | 2705 | struct proc_dir_entry *proc; |
@@ -2735,10 +2723,14 @@ err_out: | |||
2735 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); | 2723 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); |
2736 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); | 2724 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); |
2737 | return -ENOMEM; | 2725 | return -ENOMEM; |
2726 | #else | ||
2727 | return 0; | ||
2728 | #endif | ||
2738 | } | 2729 | } |
2739 | 2730 | ||
2740 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) | 2731 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) |
2741 | { | 2732 | { |
2733 | #ifdef CONFIG_PROC_FS | ||
2742 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2734 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2743 | 2735 | ||
2744 | if (sbi->s_proc == NULL) | 2736 | if (sbi->s_proc == NULL) |
@@ -2750,7 +2742,7 @@ static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) | |||
2750 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); | 2742 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); |
2751 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); | 2743 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); |
2752 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); | 2744 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); |
2753 | 2745 | #endif | |
2754 | return 0; | 2746 | return 0; |
2755 | } | 2747 | } |
2756 | 2748 | ||
@@ -2771,6 +2763,16 @@ int __init init_ext4_mballoc(void) | |||
2771 | kmem_cache_destroy(ext4_pspace_cachep); | 2763 | kmem_cache_destroy(ext4_pspace_cachep); |
2772 | return -ENOMEM; | 2764 | return -ENOMEM; |
2773 | } | 2765 | } |
2766 | |||
2767 | ext4_free_ext_cachep = | ||
2768 | kmem_cache_create("ext4_free_block_extents", | ||
2769 | sizeof(struct ext4_free_data), | ||
2770 | 0, SLAB_RECLAIM_ACCOUNT, NULL); | ||
2771 | if (ext4_free_ext_cachep == NULL) { | ||
2772 | kmem_cache_destroy(ext4_pspace_cachep); | ||
2773 | kmem_cache_destroy(ext4_ac_cachep); | ||
2774 | return -ENOMEM; | ||
2775 | } | ||
2774 | return 0; | 2776 | return 0; |
2775 | } | 2777 | } |
2776 | 2778 | ||
@@ -2779,6 +2781,7 @@ void exit_ext4_mballoc(void) | |||
2779 | /* XXX: synchronize_rcu(); */ | 2781 | /* XXX: synchronize_rcu(); */ |
2780 | kmem_cache_destroy(ext4_pspace_cachep); | 2782 | kmem_cache_destroy(ext4_pspace_cachep); |
2781 | kmem_cache_destroy(ext4_ac_cachep); | 2783 | kmem_cache_destroy(ext4_ac_cachep); |
2784 | kmem_cache_destroy(ext4_free_ext_cachep); | ||
2782 | } | 2785 | } |
2783 | 2786 | ||
2784 | 2787 | ||
@@ -4324,8 +4327,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
4324 | goto out1; | 4327 | goto out1; |
4325 | } | 4328 | } |
4326 | 4329 | ||
4327 | ext4_mb_poll_new_transaction(sb, handle); | ||
4328 | |||
4329 | *errp = ext4_mb_initialize_context(ac, ar); | 4330 | *errp = ext4_mb_initialize_context(ac, ar); |
4330 | if (*errp) { | 4331 | if (*errp) { |
4331 | ar->len = 0; | 4332 | ar->len = 0; |
@@ -4384,35 +4385,20 @@ out1: | |||
4384 | 4385 | ||
4385 | return block; | 4386 | return block; |
4386 | } | 4387 | } |
4387 | static void ext4_mb_poll_new_transaction(struct super_block *sb, | ||
4388 | handle_t *handle) | ||
4389 | { | ||
4390 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
4391 | |||
4392 | if (sbi->s_last_transaction == handle->h_transaction->t_tid) | ||
4393 | return; | ||
4394 | |||
4395 | /* new transaction! time to close last one and free blocks for | ||
4396 | * committed transaction. we know that only transaction can be | ||
4397 | * active, so previos transaction can be being logged and we | ||
4398 | * know that transaction before previous is known to be already | ||
4399 | * logged. this means that now we may free blocks freed in all | ||
4400 | * transactions before previous one. hope I'm clear enough ... */ | ||
4401 | 4388 | ||
4402 | spin_lock(&sbi->s_md_lock); | 4389 | /* |
4403 | if (sbi->s_last_transaction != handle->h_transaction->t_tid) { | 4390 | * We can merge two free data extents only if the physical blocks |
4404 | mb_debug("new transaction %lu, old %lu\n", | 4391 | * are contiguous, AND the extents were freed by the same transaction, |
4405 | (unsigned long) handle->h_transaction->t_tid, | 4392 | * AND the blocks are associated with the same group. |
4406 | (unsigned long) sbi->s_last_transaction); | 4393 | */ |
4407 | list_splice_init(&sbi->s_closed_transaction, | 4394 | static int can_merge(struct ext4_free_data *entry1, |
4408 | &sbi->s_committed_transaction); | 4395 | struct ext4_free_data *entry2) |
4409 | list_splice_init(&sbi->s_active_transaction, | 4396 | { |
4410 | &sbi->s_closed_transaction); | 4397 | if ((entry1->t_tid == entry2->t_tid) && |
4411 | sbi->s_last_transaction = handle->h_transaction->t_tid; | 4398 | (entry1->group == entry2->group) && |
4412 | } | 4399 | ((entry1->start_blk + entry1->count) == entry2->start_blk)) |
4413 | spin_unlock(&sbi->s_md_lock); | 4400 | return 1; |
4414 | 4401 | return 0; | |
4415 | ext4_mb_free_committed_blocks(sb); | ||
4416 | } | 4402 | } |
4417 | 4403 | ||
4418 | static noinline_for_stack int | 4404 | static noinline_for_stack int |
@@ -4422,57 +4408,80 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
4422 | struct ext4_group_info *db = e4b->bd_info; | 4408 | struct ext4_group_info *db = e4b->bd_info; |
4423 | struct super_block *sb = e4b->bd_sb; | 4409 | struct super_block *sb = e4b->bd_sb; |
4424 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 4410 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
4425 | struct ext4_free_metadata *md; | 4411 | struct ext4_free_data *entry, *new_entry; |
4426 | int i; | 4412 | struct rb_node **n = &db->bb_free_root.rb_node, *node; |
4413 | struct rb_node *parent = NULL, *new_node; | ||
4414 | |||
4427 | 4415 | ||
4428 | BUG_ON(e4b->bd_bitmap_page == NULL); | 4416 | BUG_ON(e4b->bd_bitmap_page == NULL); |
4429 | BUG_ON(e4b->bd_buddy_page == NULL); | 4417 | BUG_ON(e4b->bd_buddy_page == NULL); |
4430 | 4418 | ||
4419 | new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); | ||
4420 | new_entry->start_blk = block; | ||
4421 | new_entry->group = group; | ||
4422 | new_entry->count = count; | ||
4423 | new_entry->t_tid = handle->h_transaction->t_tid; | ||
4424 | new_node = &new_entry->node; | ||
4425 | |||
4431 | ext4_lock_group(sb, group); | 4426 | ext4_lock_group(sb, group); |
4432 | for (i = 0; i < count; i++) { | 4427 | if (!*n) { |
4433 | md = db->bb_md_cur; | 4428 | /* first free block exent. We need to |
4434 | if (md && db->bb_tid != handle->h_transaction->t_tid) { | 4429 | protect buddy cache from being freed, |
4435 | db->bb_md_cur = NULL; | 4430 | * otherwise we'll refresh it from |
4436 | md = NULL; | 4431 | * on-disk bitmap and lose not-yet-available |
4432 | * blocks */ | ||
4433 | page_cache_get(e4b->bd_buddy_page); | ||
4434 | page_cache_get(e4b->bd_bitmap_page); | ||
4435 | } | ||
4436 | while (*n) { | ||
4437 | parent = *n; | ||
4438 | entry = rb_entry(parent, struct ext4_free_data, node); | ||
4439 | if (block < entry->start_blk) | ||
4440 | n = &(*n)->rb_left; | ||
4441 | else if (block >= (entry->start_blk + entry->count)) | ||
4442 | n = &(*n)->rb_right; | ||
4443 | else { | ||
4444 | ext4_error(sb, __func__, | ||
4445 | "Double free of blocks %d (%d %d)\n", | ||
4446 | block, entry->start_blk, entry->count); | ||
4447 | return 0; | ||
4437 | } | 4448 | } |
4449 | } | ||
4438 | 4450 | ||
4439 | if (md == NULL) { | 4451 | rb_link_node(new_node, parent, n); |
4440 | ext4_unlock_group(sb, group); | 4452 | rb_insert_color(new_node, &db->bb_free_root); |
4441 | md = kmalloc(sizeof(*md), GFP_NOFS); | 4453 | |
4442 | if (md == NULL) | 4454 | /* Now try to see the extent can be merged to left and right */ |
4443 | return -ENOMEM; | 4455 | node = rb_prev(new_node); |
4444 | md->num = 0; | 4456 | if (node) { |
4445 | md->group = group; | 4457 | entry = rb_entry(node, struct ext4_free_data, node); |
4446 | 4458 | if (can_merge(entry, new_entry)) { | |
4447 | ext4_lock_group(sb, group); | 4459 | new_entry->start_blk = entry->start_blk; |
4448 | if (db->bb_md_cur == NULL) { | 4460 | new_entry->count += entry->count; |
4449 | spin_lock(&sbi->s_md_lock); | 4461 | rb_erase(node, &(db->bb_free_root)); |
4450 | list_add(&md->list, &sbi->s_active_transaction); | 4462 | spin_lock(&sbi->s_md_lock); |
4451 | spin_unlock(&sbi->s_md_lock); | 4463 | list_del(&entry->list); |
4452 | /* protect buddy cache from being freed, | 4464 | spin_unlock(&sbi->s_md_lock); |
4453 | * otherwise we'll refresh it from | 4465 | kmem_cache_free(ext4_free_ext_cachep, entry); |
4454 | * on-disk bitmap and lose not-yet-available | ||
4455 | * blocks */ | ||
4456 | page_cache_get(e4b->bd_buddy_page); | ||
4457 | page_cache_get(e4b->bd_bitmap_page); | ||
4458 | db->bb_md_cur = md; | ||
4459 | db->bb_tid = handle->h_transaction->t_tid; | ||
4460 | mb_debug("new md 0x%p for group %lu\n", | ||
4461 | md, md->group); | ||
4462 | } else { | ||
4463 | kfree(md); | ||
4464 | md = db->bb_md_cur; | ||
4465 | } | ||
4466 | } | 4466 | } |
4467 | } | ||
4467 | 4468 | ||
4468 | BUG_ON(md->num >= EXT4_BB_MAX_BLOCKS); | 4469 | node = rb_next(new_node); |
4469 | md->blocks[md->num] = block + i; | 4470 | if (node) { |
4470 | md->num++; | 4471 | entry = rb_entry(node, struct ext4_free_data, node); |
4471 | if (md->num == EXT4_BB_MAX_BLOCKS) { | 4472 | if (can_merge(new_entry, entry)) { |
4472 | /* no more space, put full container on a sb's list */ | 4473 | new_entry->count += entry->count; |
4473 | db->bb_md_cur = NULL; | 4474 | rb_erase(node, &(db->bb_free_root)); |
4475 | spin_lock(&sbi->s_md_lock); | ||
4476 | list_del(&entry->list); | ||
4477 | spin_unlock(&sbi->s_md_lock); | ||
4478 | kmem_cache_free(ext4_free_ext_cachep, entry); | ||
4474 | } | 4479 | } |
4475 | } | 4480 | } |
4481 | /* Add the extent to transaction's private list */ | ||
4482 | spin_lock(&sbi->s_md_lock); | ||
4483 | list_add(&new_entry->list, &handle->h_transaction->t_private_list); | ||
4484 | spin_unlock(&sbi->s_md_lock); | ||
4476 | ext4_unlock_group(sb, group); | 4485 | ext4_unlock_group(sb, group); |
4477 | return 0; | 4486 | return 0; |
4478 | } | 4487 | } |
@@ -4500,8 +4509,6 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, | |||
4500 | 4509 | ||
4501 | *freed = 0; | 4510 | *freed = 0; |
4502 | 4511 | ||
4503 | ext4_mb_poll_new_transaction(sb, handle); | ||
4504 | |||
4505 | sbi = EXT4_SB(sb); | 4512 | sbi = EXT4_SB(sb); |
4506 | es = EXT4_SB(sb)->s_es; | 4513 | es = EXT4_SB(sb)->s_es; |
4507 | if (block < le32_to_cpu(es->s_first_data_block) || | 4514 | if (block < le32_to_cpu(es->s_first_data_block) || |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index b3b4828f8b89..b5dff1fff1e5 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -18,6 +18,8 @@ | |||
18 | #include <linux/pagemap.h> | 18 | #include <linux/pagemap.h> |
19 | #include <linux/seq_file.h> | 19 | #include <linux/seq_file.h> |
20 | #include <linux/version.h> | 20 | #include <linux/version.h> |
21 | #include <linux/blkdev.h> | ||
22 | #include <linux/marker.h> | ||
21 | #include "ext4_jbd2.h" | 23 | #include "ext4_jbd2.h" |
22 | #include "ext4.h" | 24 | #include "ext4.h" |
23 | #include "group.h" | 25 | #include "group.h" |
@@ -98,23 +100,29 @@ | |||
98 | 100 | ||
99 | static struct kmem_cache *ext4_pspace_cachep; | 101 | static struct kmem_cache *ext4_pspace_cachep; |
100 | static struct kmem_cache *ext4_ac_cachep; | 102 | static struct kmem_cache *ext4_ac_cachep; |
103 | static struct kmem_cache *ext4_free_ext_cachep; | ||
101 | 104 | ||
102 | #ifdef EXT4_BB_MAX_BLOCKS | 105 | struct ext4_free_data { |
103 | #undef EXT4_BB_MAX_BLOCKS | 106 | /* this links the free block information from group_info */ |
104 | #endif | 107 | struct rb_node node; |
105 | #define EXT4_BB_MAX_BLOCKS 30 | ||
106 | 108 | ||
107 | struct ext4_free_metadata { | 109 | /* this links the free block information from ext4_sb_info */ |
108 | ext4_group_t group; | ||
109 | unsigned short num; | ||
110 | ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS]; | ||
111 | struct list_head list; | 110 | struct list_head list; |
111 | |||
112 | /* group which free block extent belongs */ | ||
113 | ext4_group_t group; | ||
114 | |||
115 | /* free block extent */ | ||
116 | ext4_grpblk_t start_blk; | ||
117 | ext4_grpblk_t count; | ||
118 | |||
119 | /* transaction which freed this extent */ | ||
120 | tid_t t_tid; | ||
112 | }; | 121 | }; |
113 | 122 | ||
114 | struct ext4_group_info { | 123 | struct ext4_group_info { |
115 | unsigned long bb_state; | 124 | unsigned long bb_state; |
116 | unsigned long bb_tid; | 125 | struct rb_root bb_free_root; |
117 | struct ext4_free_metadata *bb_md_cur; | ||
118 | unsigned short bb_first_free; | 126 | unsigned short bb_first_free; |
119 | unsigned short bb_free; | 127 | unsigned short bb_free; |
120 | unsigned short bb_fragments; | 128 | unsigned short bb_fragments; |
@@ -261,8 +269,6 @@ struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); | |||
261 | 269 | ||
262 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | 270 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, |
263 | ext4_group_t group); | 271 | ext4_group_t group); |
264 | static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *); | ||
265 | static void ext4_mb_free_committed_blocks(struct super_block *); | ||
266 | static void ext4_mb_return_to_preallocation(struct inode *inode, | 272 | static void ext4_mb_return_to_preallocation(struct inode *inode, |
267 | struct ext4_buddy *e4b, sector_t block, | 273 | struct ext4_buddy *e4b, sector_t block, |
268 | int count); | 274 | int count); |
@@ -270,6 +276,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *, | |||
270 | struct super_block *, struct ext4_prealloc_space *pa); | 276 | struct super_block *, struct ext4_prealloc_space *pa); |
271 | static int ext4_mb_init_per_dev_proc(struct super_block *sb); | 277 | static int ext4_mb_init_per_dev_proc(struct super_block *sb); |
272 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); | 278 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); |
279 | static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); | ||
273 | 280 | ||
274 | 281 | ||
275 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) | 282 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index dea8f13c2fd9..9b2b2bc4ec17 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -374,66 +374,6 @@ void ext4_update_dynamic_rev(struct super_block *sb) | |||
374 | */ | 374 | */ |
375 | } | 375 | } |
376 | 376 | ||
377 | int ext4_update_compat_feature(handle_t *handle, | ||
378 | struct super_block *sb, __u32 compat) | ||
379 | { | ||
380 | int err = 0; | ||
381 | if (!EXT4_HAS_COMPAT_FEATURE(sb, compat)) { | ||
382 | err = ext4_journal_get_write_access(handle, | ||
383 | EXT4_SB(sb)->s_sbh); | ||
384 | if (err) | ||
385 | return err; | ||
386 | EXT4_SET_COMPAT_FEATURE(sb, compat); | ||
387 | sb->s_dirt = 1; | ||
388 | handle->h_sync = 1; | ||
389 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, | ||
390 | "call ext4_journal_dirty_met adata"); | ||
391 | err = ext4_journal_dirty_metadata(handle, | ||
392 | EXT4_SB(sb)->s_sbh); | ||
393 | } | ||
394 | return err; | ||
395 | } | ||
396 | |||
397 | int ext4_update_rocompat_feature(handle_t *handle, | ||
398 | struct super_block *sb, __u32 rocompat) | ||
399 | { | ||
400 | int err = 0; | ||
401 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, rocompat)) { | ||
402 | err = ext4_journal_get_write_access(handle, | ||
403 | EXT4_SB(sb)->s_sbh); | ||
404 | if (err) | ||
405 | return err; | ||
406 | EXT4_SET_RO_COMPAT_FEATURE(sb, rocompat); | ||
407 | sb->s_dirt = 1; | ||
408 | handle->h_sync = 1; | ||
409 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, | ||
410 | "call ext4_journal_dirty_met adata"); | ||
411 | err = ext4_journal_dirty_metadata(handle, | ||
412 | EXT4_SB(sb)->s_sbh); | ||
413 | } | ||
414 | return err; | ||
415 | } | ||
416 | |||
417 | int ext4_update_incompat_feature(handle_t *handle, | ||
418 | struct super_block *sb, __u32 incompat) | ||
419 | { | ||
420 | int err = 0; | ||
421 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, incompat)) { | ||
422 | err = ext4_journal_get_write_access(handle, | ||
423 | EXT4_SB(sb)->s_sbh); | ||
424 | if (err) | ||
425 | return err; | ||
426 | EXT4_SET_INCOMPAT_FEATURE(sb, incompat); | ||
427 | sb->s_dirt = 1; | ||
428 | handle->h_sync = 1; | ||
429 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, | ||
430 | "call ext4_journal_dirty_met adata"); | ||
431 | err = ext4_journal_dirty_metadata(handle, | ||
432 | EXT4_SB(sb)->s_sbh); | ||
433 | } | ||
434 | return err; | ||
435 | } | ||
436 | |||
437 | /* | 377 | /* |
438 | * Open the external journal device | 378 | * Open the external journal device |
439 | */ | 379 | */ |
@@ -904,7 +844,7 @@ static const struct export_operations ext4_export_ops = { | |||
904 | enum { | 844 | enum { |
905 | Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, | 845 | Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, |
906 | Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, | 846 | Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, |
907 | Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, | 847 | Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, |
908 | Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, | 848 | Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, |
909 | Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, | 849 | Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, |
910 | Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, | 850 | Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, |
@@ -915,7 +855,7 @@ enum { | |||
915 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, | 855 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, |
916 | Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, | 856 | Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, |
917 | Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, | 857 | Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, |
918 | Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc, | 858 | Opt_stripe, Opt_delalloc, Opt_nodelalloc, |
919 | Opt_inode_readahead_blks | 859 | Opt_inode_readahead_blks |
920 | }; | 860 | }; |
921 | 861 | ||
@@ -933,8 +873,6 @@ static const match_table_t tokens = { | |||
933 | {Opt_err_panic, "errors=panic"}, | 873 | {Opt_err_panic, "errors=panic"}, |
934 | {Opt_err_ro, "errors=remount-ro"}, | 874 | {Opt_err_ro, "errors=remount-ro"}, |
935 | {Opt_nouid32, "nouid32"}, | 875 | {Opt_nouid32, "nouid32"}, |
936 | {Opt_nocheck, "nocheck"}, | ||
937 | {Opt_nocheck, "check=none"}, | ||
938 | {Opt_debug, "debug"}, | 876 | {Opt_debug, "debug"}, |
939 | {Opt_oldalloc, "oldalloc"}, | 877 | {Opt_oldalloc, "oldalloc"}, |
940 | {Opt_orlov, "orlov"}, | 878 | {Opt_orlov, "orlov"}, |
@@ -973,8 +911,6 @@ static const match_table_t tokens = { | |||
973 | {Opt_extents, "extents"}, | 911 | {Opt_extents, "extents"}, |
974 | {Opt_noextents, "noextents"}, | 912 | {Opt_noextents, "noextents"}, |
975 | {Opt_i_version, "i_version"}, | 913 | {Opt_i_version, "i_version"}, |
976 | {Opt_mballoc, "mballoc"}, | ||
977 | {Opt_nomballoc, "nomballoc"}, | ||
978 | {Opt_stripe, "stripe=%u"}, | 914 | {Opt_stripe, "stripe=%u"}, |
979 | {Opt_resize, "resize"}, | 915 | {Opt_resize, "resize"}, |
980 | {Opt_delalloc, "delalloc"}, | 916 | {Opt_delalloc, "delalloc"}, |
@@ -1073,9 +1009,6 @@ static int parse_options(char *options, struct super_block *sb, | |||
1073 | case Opt_nouid32: | 1009 | case Opt_nouid32: |
1074 | set_opt(sbi->s_mount_opt, NO_UID32); | 1010 | set_opt(sbi->s_mount_opt, NO_UID32); |
1075 | break; | 1011 | break; |
1076 | case Opt_nocheck: | ||
1077 | clear_opt(sbi->s_mount_opt, CHECK); | ||
1078 | break; | ||
1079 | case Opt_debug: | 1012 | case Opt_debug: |
1080 | set_opt(sbi->s_mount_opt, DEBUG); | 1013 | set_opt(sbi->s_mount_opt, DEBUG); |
1081 | break; | 1014 | break; |
@@ -1618,14 +1551,14 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
1618 | if (block_bitmap < first_block || block_bitmap > last_block) { | 1551 | if (block_bitmap < first_block || block_bitmap > last_block) { |
1619 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " | 1552 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " |
1620 | "Block bitmap for group %lu not in group " | 1553 | "Block bitmap for group %lu not in group " |
1621 | "(block %llu)!", i, block_bitmap); | 1554 | "(block %llu)!\n", i, block_bitmap); |
1622 | return 0; | 1555 | return 0; |
1623 | } | 1556 | } |
1624 | inode_bitmap = ext4_inode_bitmap(sb, gdp); | 1557 | inode_bitmap = ext4_inode_bitmap(sb, gdp); |
1625 | if (inode_bitmap < first_block || inode_bitmap > last_block) { | 1558 | if (inode_bitmap < first_block || inode_bitmap > last_block) { |
1626 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " | 1559 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " |
1627 | "Inode bitmap for group %lu not in group " | 1560 | "Inode bitmap for group %lu not in group " |
1628 | "(block %llu)!", i, inode_bitmap); | 1561 | "(block %llu)!\n", i, inode_bitmap); |
1629 | return 0; | 1562 | return 0; |
1630 | } | 1563 | } |
1631 | inode_table = ext4_inode_table(sb, gdp); | 1564 | inode_table = ext4_inode_table(sb, gdp); |
@@ -1633,7 +1566,7 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
1633 | inode_table + sbi->s_itb_per_group - 1 > last_block) { | 1566 | inode_table + sbi->s_itb_per_group - 1 > last_block) { |
1634 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " | 1567 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " |
1635 | "Inode table for group %lu not in group " | 1568 | "Inode table for group %lu not in group " |
1636 | "(block %llu)!", i, inode_table); | 1569 | "(block %llu)!\n", i, inode_table); |
1637 | return 0; | 1570 | return 0; |
1638 | } | 1571 | } |
1639 | spin_lock(sb_bgl_lock(sbi, i)); | 1572 | spin_lock(sb_bgl_lock(sbi, i)); |
@@ -1778,13 +1711,13 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
1778 | * | 1711 | * |
1779 | * Note, this does *not* consider any metadata overhead for vfs i_blocks. | 1712 | * Note, this does *not* consider any metadata overhead for vfs i_blocks. |
1780 | */ | 1713 | */ |
1781 | static loff_t ext4_max_size(int blkbits) | 1714 | static loff_t ext4_max_size(int blkbits, int has_huge_files) |
1782 | { | 1715 | { |
1783 | loff_t res; | 1716 | loff_t res; |
1784 | loff_t upper_limit = MAX_LFS_FILESIZE; | 1717 | loff_t upper_limit = MAX_LFS_FILESIZE; |
1785 | 1718 | ||
1786 | /* small i_blocks in vfs inode? */ | 1719 | /* small i_blocks in vfs inode? */ |
1787 | if (sizeof(blkcnt_t) < sizeof(u64)) { | 1720 | if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { |
1788 | /* | 1721 | /* |
1789 | * CONFIG_LSF is not enabled implies the inode | 1722 | * CONFIG_LSF is not enabled implies the inode |
1790 | * i_block represent total blocks in 512 bytes | 1723 | * i_block represent total blocks in 512 bytes |
@@ -1814,7 +1747,7 @@ static loff_t ext4_max_size(int blkbits) | |||
1814 | * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. | 1747 | * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. |
1815 | * We need to be 1 filesystem block less than the 2^48 sector limit. | 1748 | * We need to be 1 filesystem block less than the 2^48 sector limit. |
1816 | */ | 1749 | */ |
1817 | static loff_t ext4_max_bitmap_size(int bits) | 1750 | static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) |
1818 | { | 1751 | { |
1819 | loff_t res = EXT4_NDIR_BLOCKS; | 1752 | loff_t res = EXT4_NDIR_BLOCKS; |
1820 | int meta_blocks; | 1753 | int meta_blocks; |
@@ -1827,11 +1760,11 @@ static loff_t ext4_max_bitmap_size(int bits) | |||
1827 | * total number of 512 bytes blocks of the file | 1760 | * total number of 512 bytes blocks of the file |
1828 | */ | 1761 | */ |
1829 | 1762 | ||
1830 | if (sizeof(blkcnt_t) < sizeof(u64)) { | 1763 | if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { |
1831 | /* | 1764 | /* |
1832 | * CONFIG_LSF is not enabled implies the inode | 1765 | * !has_huge_files or CONFIG_LSF is not enabled |
1833 | * i_block represent total blocks in 512 bytes | 1766 | * implies the inode i_block represent total blocks in |
1834 | * 32 == size of vfs inode i_blocks * 8 | 1767 | * 512 bytes 32 == size of vfs inode i_blocks * 8 |
1835 | */ | 1768 | */ |
1836 | upper_limit = (1LL << 32) - 1; | 1769 | upper_limit = (1LL << 32) - 1; |
1837 | 1770 | ||
@@ -1940,7 +1873,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
1940 | int blocksize; | 1873 | int blocksize; |
1941 | int db_count; | 1874 | int db_count; |
1942 | int i; | 1875 | int i; |
1943 | int needs_recovery; | 1876 | int needs_recovery, has_huge_files; |
1944 | __le32 features; | 1877 | __le32 features; |
1945 | __u64 blocks_count; | 1878 | __u64 blocks_count; |
1946 | int err; | 1879 | int err; |
@@ -2081,7 +2014,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2081 | sb->s_id, le32_to_cpu(features)); | 2014 | sb->s_id, le32_to_cpu(features)); |
2082 | goto failed_mount; | 2015 | goto failed_mount; |
2083 | } | 2016 | } |
2084 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { | 2017 | has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, |
2018 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE); | ||
2019 | if (has_huge_files) { | ||
2085 | /* | 2020 | /* |
2086 | * Large file size enabled file system can only be | 2021 | * Large file size enabled file system can only be |
2087 | * mount if kernel is build with CONFIG_LSF | 2022 | * mount if kernel is build with CONFIG_LSF |
@@ -2131,8 +2066,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2131 | } | 2066 | } |
2132 | } | 2067 | } |
2133 | 2068 | ||
2134 | sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits); | 2069 | sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, |
2135 | sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits); | 2070 | has_huge_files); |
2071 | sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); | ||
2136 | 2072 | ||
2137 | if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { | 2073 | if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { |
2138 | sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; | 2074 | sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; |
@@ -2456,6 +2392,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2456 | "available.\n"); | 2392 | "available.\n"); |
2457 | } | 2393 | } |
2458 | 2394 | ||
2395 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | ||
2396 | printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - " | ||
2397 | "requested data journaling mode\n"); | ||
2398 | clear_opt(sbi->s_mount_opt, DELALLOC); | ||
2399 | } else if (test_opt(sb, DELALLOC)) | ||
2400 | printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); | ||
2401 | |||
2402 | ext4_ext_init(sb); | ||
2403 | err = ext4_mb_init(sb, needs_recovery); | ||
2404 | if (err) { | ||
2405 | printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n", | ||
2406 | err); | ||
2407 | goto failed_mount4; | ||
2408 | } | ||
2409 | |||
2459 | /* | 2410 | /* |
2460 | * akpm: core read_super() calls in here with the superblock locked. | 2411 | * akpm: core read_super() calls in here with the superblock locked. |
2461 | * That deadlocks, because orphan cleanup needs to lock the superblock | 2412 | * That deadlocks, because orphan cleanup needs to lock the superblock |
@@ -2475,21 +2426,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2475 | test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": | 2426 | test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": |
2476 | "writeback"); | 2427 | "writeback"); |
2477 | 2428 | ||
2478 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | ||
2479 | printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - " | ||
2480 | "requested data journaling mode\n"); | ||
2481 | clear_opt(sbi->s_mount_opt, DELALLOC); | ||
2482 | } else if (test_opt(sb, DELALLOC)) | ||
2483 | printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); | ||
2484 | |||
2485 | ext4_ext_init(sb); | ||
2486 | err = ext4_mb_init(sb, needs_recovery); | ||
2487 | if (err) { | ||
2488 | printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n", | ||
2489 | err); | ||
2490 | goto failed_mount4; | ||
2491 | } | ||
2492 | |||
2493 | lock_kernel(); | 2429 | lock_kernel(); |
2494 | return 0; | 2430 | return 0; |
2495 | 2431 | ||
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 0abe02c4242a..8b119e16aa36 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -995,6 +995,9 @@ restart_loop: | |||
995 | } | 995 | } |
996 | spin_unlock(&journal->j_list_lock); | 996 | spin_unlock(&journal->j_list_lock); |
997 | 997 | ||
998 | if (journal->j_commit_callback) | ||
999 | journal->j_commit_callback(journal, commit_transaction); | ||
1000 | |||
998 | trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", | 1001 | trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", |
999 | journal->j_devname, commit_transaction->t_tid, | 1002 | journal->j_devname, commit_transaction->t_tid, |
1000 | journal->j_tail_sequence); | 1003 | journal->j_tail_sequence); |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index e5d540588fa9..39b7805a599a 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -52,6 +52,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) | |||
52 | transaction->t_expires = jiffies + journal->j_commit_interval; | 52 | transaction->t_expires = jiffies + journal->j_commit_interval; |
53 | spin_lock_init(&transaction->t_handle_lock); | 53 | spin_lock_init(&transaction->t_handle_lock); |
54 | INIT_LIST_HEAD(&transaction->t_inode_list); | 54 | INIT_LIST_HEAD(&transaction->t_inode_list); |
55 | INIT_LIST_HEAD(&transaction->t_private_list); | ||
55 | 56 | ||
56 | /* Set up the commit timer for the new transaction. */ | 57 | /* Set up the commit timer for the new transaction. */ |
57 | journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); | 58 | journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); |
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 463d6f10b64f..c7d106ef22e2 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h | |||
@@ -641,6 +641,11 @@ struct transaction_s | |||
641 | */ | 641 | */ |
642 | int t_handle_count; | 642 | int t_handle_count; |
643 | 643 | ||
644 | /* | ||
645 | * For use by the filesystem to store fs-specific data | ||
646 | * structures associated with the transaction | ||
647 | */ | ||
648 | struct list_head t_private_list; | ||
644 | }; | 649 | }; |
645 | 650 | ||
646 | struct transaction_run_stats_s { | 651 | struct transaction_run_stats_s { |
@@ -935,6 +940,10 @@ struct journal_s | |||
935 | 940 | ||
936 | pid_t j_last_sync_writer; | 941 | pid_t j_last_sync_writer; |
937 | 942 | ||
943 | /* This function is called when a transaction is closed */ | ||
944 | void (*j_commit_callback)(journal_t *, | ||
945 | transaction_t *); | ||
946 | |||
938 | /* | 947 | /* |
939 | * Journal statistics | 948 | * Journal statistics |
940 | */ | 949 | */ |
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 12b15c561a1f..e585657e9831 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
@@ -63,7 +63,15 @@ struct writeback_control { | |||
63 | unsigned for_writepages:1; /* This is a writepages() call */ | 63 | unsigned for_writepages:1; /* This is a writepages() call */ |
64 | unsigned range_cyclic:1; /* range_start is cyclic */ | 64 | unsigned range_cyclic:1; /* range_start is cyclic */ |
65 | unsigned more_io:1; /* more io to be dispatched */ | 65 | unsigned more_io:1; /* more io to be dispatched */ |
66 | unsigned range_cont:1; | 66 | /* |
67 | * write_cache_pages() won't update wbc->nr_to_write and | ||
68 | * mapping->writeback_index if no_nrwrite_index_update | ||
69 | * is set. write_cache_pages() may write more than we | ||
70 | * requested and we want to make sure nr_to_write and | ||
71 | * writeback_index are updated in a consistent manner | ||
72 | * so we use a single control to update them | ||
73 | */ | ||
74 | unsigned no_nrwrite_index_update:1; | ||
67 | }; | 75 | }; |
68 | 76 | ||
69 | /* | 77 | /* |
diff --git a/mm/page-writeback.c b/mm/page-writeback.c index c130a137c129..b40f6d5f8fe9 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c | |||
@@ -876,6 +876,7 @@ int write_cache_pages(struct address_space *mapping, | |||
876 | pgoff_t end; /* Inclusive */ | 876 | pgoff_t end; /* Inclusive */ |
877 | int scanned = 0; | 877 | int scanned = 0; |
878 | int range_whole = 0; | 878 | int range_whole = 0; |
879 | long nr_to_write = wbc->nr_to_write; | ||
879 | 880 | ||
880 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | 881 | if (wbc->nonblocking && bdi_write_congested(bdi)) { |
881 | wbc->encountered_congestion = 1; | 882 | wbc->encountered_congestion = 1; |
@@ -939,7 +940,7 @@ retry: | |||
939 | unlock_page(page); | 940 | unlock_page(page); |
940 | ret = 0; | 941 | ret = 0; |
941 | } | 942 | } |
942 | if (ret || (--(wbc->nr_to_write) <= 0)) | 943 | if (ret || (--nr_to_write <= 0)) |
943 | done = 1; | 944 | done = 1; |
944 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | 945 | if (wbc->nonblocking && bdi_write_congested(bdi)) { |
945 | wbc->encountered_congestion = 1; | 946 | wbc->encountered_congestion = 1; |
@@ -958,11 +959,12 @@ retry: | |||
958 | index = 0; | 959 | index = 0; |
959 | goto retry; | 960 | goto retry; |
960 | } | 961 | } |
961 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | 962 | if (!wbc->no_nrwrite_index_update) { |
962 | mapping->writeback_index = index; | 963 | if (wbc->range_cyclic || (range_whole && nr_to_write > 0)) |
964 | mapping->writeback_index = index; | ||
965 | wbc->nr_to_write = nr_to_write; | ||
966 | } | ||
963 | 967 | ||
964 | if (wbc->range_cont) | ||
965 | wbc->range_start = index << PAGE_CACHE_SHIFT; | ||
966 | return ret; | 968 | return ret; |
967 | } | 969 | } |
968 | EXPORT_SYMBOL(write_cache_pages); | 970 | EXPORT_SYMBOL(write_cache_pages); |