diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-07-13 19:39:25 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-07-13 19:39:25 -0400 |
| commit | 1cf29683f4414296dc772a87caa207cab16c310c (patch) | |
| tree | 90d05ba531c5ad48d8d171d5b78038a7df7728e7 | |
| parent | 4a390e07fc53ce9dd615d7b788e9ecc73f87ad94 (diff) | |
| parent | 96577c43827697ca1af5982fa256a34786d0c720 (diff) | |
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
jbd2: fix race between write_metadata_buffer and get_write_access
ext4: Fix ext4_mb_initialize_context() to initialize all fields
ext4: fix null handler of ioctls in no journal mode
ext4: Fix buffer head reference leak in no-journal mode
ext4: Move __ext4_journalled_writepage() to avoid forward declaration
ext4: Fix mmap/truncate race when blocksize < pagesize && !nodellaoc
ext4: Fix mmap/truncate race when blocksize < pagesize && delayed allocation
ext4: Don't look at buffer_heads outside i_size.
ext4: Fix goal inum check in the inode allocator
ext4: fix no journal corruption with locale-gen
ext4: Calculate required journal credits for inserting an extent properly
ext4: Fix truncation of symlinks after failed write
jbd2: Fix a race between checkpointing code and journal_get_write_access()
ext4: Use rcu_barrier() on module unload.
ext4: naturally align struct ext4_allocation_request
ext4: mark several more functions in mballoc.c as noinline
ext4: Fix potential reclaim deadlock when truncating partial block
jbd2: Remove GFP_ATOMIC kmalloc from inside spinlock critical region
ext4: Fix type warning on 64-bit platforms in tracing events header
| -rw-r--r-- | fs/ext4/ext4.h | 14 | ||||
| -rw-r--r-- | fs/ext4/ext4_jbd2.c | 4 | ||||
| -rw-r--r-- | fs/ext4/ext4_jbd2.h | 6 | ||||
| -rw-r--r-- | fs/ext4/extents.c | 1 | ||||
| -rw-r--r-- | fs/ext4/ialloc.c | 2 | ||||
| -rw-r--r-- | fs/ext4/inode.c | 384 | ||||
| -rw-r--r-- | fs/ext4/ioctl.c | 20 | ||||
| -rw-r--r-- | fs/ext4/mballoc.c | 50 | ||||
| -rw-r--r-- | fs/jbd2/journal.c | 31 | ||||
| -rw-r--r-- | fs/jbd2/transaction.c | 68 | ||||
| -rw-r--r-- | include/trace/events/ext4.h | 48 |
11 files changed, 235 insertions, 393 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0ddf7e55abe1..9714db393efe 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
| @@ -93,20 +93,20 @@ typedef unsigned int ext4_group_t; | |||
| 93 | struct ext4_allocation_request { | 93 | struct ext4_allocation_request { |
| 94 | /* target inode for block we're allocating */ | 94 | /* target inode for block we're allocating */ |
| 95 | struct inode *inode; | 95 | struct inode *inode; |
| 96 | /* how many blocks we want to allocate */ | ||
| 97 | unsigned int len; | ||
| 96 | /* logical block in target inode */ | 98 | /* logical block in target inode */ |
| 97 | ext4_lblk_t logical; | 99 | ext4_lblk_t logical; |
| 98 | /* phys. target (a hint) */ | ||
| 99 | ext4_fsblk_t goal; | ||
| 100 | /* the closest logical allocated block to the left */ | 100 | /* the closest logical allocated block to the left */ |
| 101 | ext4_lblk_t lleft; | 101 | ext4_lblk_t lleft; |
| 102 | /* phys. block for ^^^ */ | ||
| 103 | ext4_fsblk_t pleft; | ||
| 104 | /* the closest logical allocated block to the right */ | 102 | /* the closest logical allocated block to the right */ |
| 105 | ext4_lblk_t lright; | 103 | ext4_lblk_t lright; |
| 106 | /* phys. block for ^^^ */ | 104 | /* phys. target (a hint) */ |
| 105 | ext4_fsblk_t goal; | ||
| 106 | /* phys. block for the closest logical allocated block to the left */ | ||
| 107 | ext4_fsblk_t pleft; | ||
| 108 | /* phys. block for the closest logical allocated block to the right */ | ||
| 107 | ext4_fsblk_t pright; | 109 | ext4_fsblk_t pright; |
| 108 | /* how many blocks we want to allocate */ | ||
| 109 | unsigned int len; | ||
| 110 | /* flags. see above EXT4_MB_HINT_* */ | 110 | /* flags. see above EXT4_MB_HINT_* */ |
| 111 | unsigned int flags; | 111 | unsigned int flags; |
| 112 | }; | 112 | }; |
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index ad13a84644e1..eb27fd0f2ee8 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c | |||
| @@ -43,6 +43,8 @@ int __ext4_journal_forget(const char *where, handle_t *handle, | |||
| 43 | ext4_journal_abort_handle(where, __func__, bh, | 43 | ext4_journal_abort_handle(where, __func__, bh, |
| 44 | handle, err); | 44 | handle, err); |
| 45 | } | 45 | } |
| 46 | else | ||
| 47 | brelse(bh); | ||
| 46 | return err; | 48 | return err; |
| 47 | } | 49 | } |
| 48 | 50 | ||
| @@ -57,6 +59,8 @@ int __ext4_journal_revoke(const char *where, handle_t *handle, | |||
| 57 | ext4_journal_abort_handle(where, __func__, bh, | 59 | ext4_journal_abort_handle(where, __func__, bh, |
| 58 | handle, err); | 60 | handle, err); |
| 59 | } | 61 | } |
| 62 | else | ||
| 63 | brelse(bh); | ||
| 60 | return err; | 64 | return err; |
| 61 | } | 65 | } |
| 62 | 66 | ||
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index be2f426f6805..139fb8cb87e4 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
| @@ -131,9 +131,11 @@ int __ext4_journal_get_undo_access(const char *where, handle_t *handle, | |||
| 131 | int __ext4_journal_get_write_access(const char *where, handle_t *handle, | 131 | int __ext4_journal_get_write_access(const char *where, handle_t *handle, |
| 132 | struct buffer_head *bh); | 132 | struct buffer_head *bh); |
| 133 | 133 | ||
| 134 | /* When called with an invalid handle, this will still do a put on the BH */ | ||
| 134 | int __ext4_journal_forget(const char *where, handle_t *handle, | 135 | int __ext4_journal_forget(const char *where, handle_t *handle, |
| 135 | struct buffer_head *bh); | 136 | struct buffer_head *bh); |
| 136 | 137 | ||
| 138 | /* When called with an invalid handle, this will still do a put on the BH */ | ||
| 137 | int __ext4_journal_revoke(const char *where, handle_t *handle, | 139 | int __ext4_journal_revoke(const char *where, handle_t *handle, |
| 138 | ext4_fsblk_t blocknr, struct buffer_head *bh); | 140 | ext4_fsblk_t blocknr, struct buffer_head *bh); |
| 139 | 141 | ||
| @@ -281,10 +283,10 @@ static inline int ext4_should_order_data(struct inode *inode) | |||
| 281 | 283 | ||
| 282 | static inline int ext4_should_writeback_data(struct inode *inode) | 284 | static inline int ext4_should_writeback_data(struct inode *inode) |
| 283 | { | 285 | { |
| 284 | if (EXT4_JOURNAL(inode) == NULL) | ||
| 285 | return 0; | ||
| 286 | if (!S_ISREG(inode->i_mode)) | 286 | if (!S_ISREG(inode->i_mode)) |
| 287 | return 0; | 287 | return 0; |
| 288 | if (EXT4_JOURNAL(inode) == NULL) | ||
| 289 | return 1; | ||
| 288 | if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) | 290 | if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) |
| 289 | return 0; | 291 | return 0; |
| 290 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) | 292 | if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 50322a09bd01..73ebfb44ad75 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
| @@ -1977,6 +1977,7 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks, | |||
| 1977 | */ | 1977 | */ |
| 1978 | /* 1 bitmap, 1 block group descriptor */ | 1978 | /* 1 bitmap, 1 block group descriptor */ |
| 1979 | ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb); | 1979 | ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb); |
| 1980 | return ret; | ||
| 1980 | } | 1981 | } |
| 1981 | } | 1982 | } |
| 1982 | 1983 | ||
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 2f645732e3b7..29e6dc7299b8 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
| @@ -833,7 +833,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode, | |||
| 833 | if (!goal) | 833 | if (!goal) |
| 834 | goal = sbi->s_inode_goal; | 834 | goal = sbi->s_inode_goal; |
| 835 | 835 | ||
| 836 | if (goal && goal < le32_to_cpu(sbi->s_es->s_inodes_count)) { | 836 | if (goal && goal <= le32_to_cpu(sbi->s_es->s_inodes_count)) { |
| 837 | group = (goal - 1) / EXT4_INODES_PER_GROUP(sb); | 837 | group = (goal - 1) / EXT4_INODES_PER_GROUP(sb); |
| 838 | ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb); | 838 | ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb); |
| 839 | ret2 = 0; | 839 | ret2 = 0; |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 60a26f3a6f8b..f9c642b22efa 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
| @@ -78,16 +78,14 @@ static int ext4_inode_is_fast_symlink(struct inode *inode) | |||
| 78 | * but there may still be a record of it in the journal, and that record | 78 | * but there may still be a record of it in the journal, and that record |
| 79 | * still needs to be revoked. | 79 | * still needs to be revoked. |
| 80 | * | 80 | * |
| 81 | * If the handle isn't valid we're not journaling so there's nothing to do. | 81 | * If the handle isn't valid we're not journaling, but we still need to |
| 82 | * call into ext4_journal_revoke() to put the buffer head. | ||
| 82 | */ | 83 | */ |
| 83 | int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, | 84 | int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode, |
| 84 | struct buffer_head *bh, ext4_fsblk_t blocknr) | 85 | struct buffer_head *bh, ext4_fsblk_t blocknr) |
| 85 | { | 86 | { |
| 86 | int err; | 87 | int err; |
| 87 | 88 | ||
| 88 | if (!ext4_handle_valid(handle)) | ||
| 89 | return 0; | ||
| 90 | |||
| 91 | might_sleep(); | 89 | might_sleep(); |
| 92 | 90 | ||
| 93 | BUFFER_TRACE(bh, "enter"); | 91 | BUFFER_TRACE(bh, "enter"); |
| @@ -1513,14 +1511,14 @@ retry: | |||
| 1513 | * Add inode to orphan list in case we crash before | 1511 | * Add inode to orphan list in case we crash before |
| 1514 | * truncate finishes | 1512 | * truncate finishes |
| 1515 | */ | 1513 | */ |
| 1516 | if (pos + len > inode->i_size) | 1514 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) |
| 1517 | ext4_orphan_add(handle, inode); | 1515 | ext4_orphan_add(handle, inode); |
| 1518 | 1516 | ||
| 1519 | ext4_journal_stop(handle); | 1517 | ext4_journal_stop(handle); |
| 1520 | if (pos + len > inode->i_size) { | 1518 | if (pos + len > inode->i_size) { |
| 1521 | vmtruncate(inode, inode->i_size); | 1519 | ext4_truncate(inode); |
| 1522 | /* | 1520 | /* |
| 1523 | * If vmtruncate failed early the inode might | 1521 | * If truncate failed early the inode might |
| 1524 | * still be on the orphan list; we need to | 1522 | * still be on the orphan list; we need to |
| 1525 | * make sure the inode is removed from the | 1523 | * make sure the inode is removed from the |
| 1526 | * orphan list in that case. | 1524 | * orphan list in that case. |
| @@ -1614,7 +1612,7 @@ static int ext4_ordered_write_end(struct file *file, | |||
| 1614 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, | 1612 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, |
| 1615 | page, fsdata); | 1613 | page, fsdata); |
| 1616 | copied = ret2; | 1614 | copied = ret2; |
| 1617 | if (pos + len > inode->i_size) | 1615 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) |
| 1618 | /* if we have allocated more blocks and copied | 1616 | /* if we have allocated more blocks and copied |
| 1619 | * less. We will have blocks allocated outside | 1617 | * less. We will have blocks allocated outside |
| 1620 | * inode->i_size. So truncate them | 1618 | * inode->i_size. So truncate them |
| @@ -1628,9 +1626,9 @@ static int ext4_ordered_write_end(struct file *file, | |||
| 1628 | ret = ret2; | 1626 | ret = ret2; |
| 1629 | 1627 | ||
| 1630 | if (pos + len > inode->i_size) { | 1628 | if (pos + len > inode->i_size) { |
| 1631 | vmtruncate(inode, inode->i_size); | 1629 | ext4_truncate(inode); |
| 1632 | /* | 1630 | /* |
| 1633 | * If vmtruncate failed early the inode might still be | 1631 | * If truncate failed early the inode might still be |
| 1634 | * on the orphan list; we need to make sure the inode | 1632 | * on the orphan list; we need to make sure the inode |
| 1635 | * is removed from the orphan list in that case. | 1633 | * is removed from the orphan list in that case. |
| 1636 | */ | 1634 | */ |
| @@ -1655,7 +1653,7 @@ static int ext4_writeback_write_end(struct file *file, | |||
| 1655 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, | 1653 | ret2 = ext4_generic_write_end(file, mapping, pos, len, copied, |
| 1656 | page, fsdata); | 1654 | page, fsdata); |
| 1657 | copied = ret2; | 1655 | copied = ret2; |
| 1658 | if (pos + len > inode->i_size) | 1656 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) |
| 1659 | /* if we have allocated more blocks and copied | 1657 | /* if we have allocated more blocks and copied |
| 1660 | * less. We will have blocks allocated outside | 1658 | * less. We will have blocks allocated outside |
| 1661 | * inode->i_size. So truncate them | 1659 | * inode->i_size. So truncate them |
| @@ -1670,9 +1668,9 @@ static int ext4_writeback_write_end(struct file *file, | |||
| 1670 | ret = ret2; | 1668 | ret = ret2; |
| 1671 | 1669 | ||
| 1672 | if (pos + len > inode->i_size) { | 1670 | if (pos + len > inode->i_size) { |
| 1673 | vmtruncate(inode, inode->i_size); | 1671 | ext4_truncate(inode); |
| 1674 | /* | 1672 | /* |
| 1675 | * If vmtruncate failed early the inode might still be | 1673 | * If truncate failed early the inode might still be |
| 1676 | * on the orphan list; we need to make sure the inode | 1674 | * on the orphan list; we need to make sure the inode |
| 1677 | * is removed from the orphan list in that case. | 1675 | * is removed from the orphan list in that case. |
| 1678 | */ | 1676 | */ |
| @@ -1722,7 +1720,7 @@ static int ext4_journalled_write_end(struct file *file, | |||
| 1722 | 1720 | ||
| 1723 | unlock_page(page); | 1721 | unlock_page(page); |
| 1724 | page_cache_release(page); | 1722 | page_cache_release(page); |
| 1725 | if (pos + len > inode->i_size) | 1723 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) |
| 1726 | /* if we have allocated more blocks and copied | 1724 | /* if we have allocated more blocks and copied |
| 1727 | * less. We will have blocks allocated outside | 1725 | * less. We will have blocks allocated outside |
| 1728 | * inode->i_size. So truncate them | 1726 | * inode->i_size. So truncate them |
| @@ -1733,9 +1731,9 @@ static int ext4_journalled_write_end(struct file *file, | |||
| 1733 | if (!ret) | 1731 | if (!ret) |
| 1734 | ret = ret2; | 1732 | ret = ret2; |
| 1735 | if (pos + len > inode->i_size) { | 1733 | if (pos + len > inode->i_size) { |
| 1736 | vmtruncate(inode, inode->i_size); | 1734 | ext4_truncate(inode); |
| 1737 | /* | 1735 | /* |
| 1738 | * If vmtruncate failed early the inode might still be | 1736 | * If truncate failed early the inode might still be |
| 1739 | * on the orphan list; we need to make sure the inode | 1737 | * on the orphan list; we need to make sure the inode |
| 1740 | * is removed from the orphan list in that case. | 1738 | * is removed from the orphan list in that case. |
| 1741 | */ | 1739 | */ |
| @@ -2305,15 +2303,9 @@ flush_it: | |||
| 2305 | return; | 2303 | return; |
| 2306 | } | 2304 | } |
| 2307 | 2305 | ||
| 2308 | static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) | 2306 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) |
| 2309 | { | 2307 | { |
| 2310 | /* | 2308 | return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh); |
| 2311 | * unmapped buffer is possible for holes. | ||
| 2312 | * delay buffer is possible with delayed allocation. | ||
| 2313 | * We also need to consider unwritten buffer as unmapped. | ||
| 2314 | */ | ||
| 2315 | return (!buffer_mapped(bh) || buffer_delay(bh) || | ||
| 2316 | buffer_unwritten(bh)) && buffer_dirty(bh); | ||
| 2317 | } | 2309 | } |
| 2318 | 2310 | ||
| 2319 | /* | 2311 | /* |
| @@ -2398,9 +2390,9 @@ static int __mpage_da_writepage(struct page *page, | |||
| 2398 | * We need to try to allocate | 2390 | * We need to try to allocate |
| 2399 | * unmapped blocks in the same page. | 2391 | * unmapped blocks in the same page. |
| 2400 | * Otherwise we won't make progress | 2392 | * Otherwise we won't make progress |
| 2401 | * with the page in ext4_da_writepage | 2393 | * with the page in ext4_writepage |
| 2402 | */ | 2394 | */ |
| 2403 | if (ext4_bh_unmapped_or_delay(NULL, bh)) { | 2395 | if (ext4_bh_delay_or_unwritten(NULL, bh)) { |
| 2404 | mpage_add_bh_to_extent(mpd, logical, | 2396 | mpage_add_bh_to_extent(mpd, logical, |
| 2405 | bh->b_size, | 2397 | bh->b_size, |
| 2406 | bh->b_state); | 2398 | bh->b_state); |
| @@ -2517,7 +2509,6 @@ static int noalloc_get_block_write(struct inode *inode, sector_t iblock, | |||
| 2517 | * so call get_block_wrap with create = 0 | 2509 | * so call get_block_wrap with create = 0 |
| 2518 | */ | 2510 | */ |
| 2519 | ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0); | 2511 | ret = ext4_get_blocks(NULL, inode, iblock, max_blocks, bh_result, 0); |
| 2520 | BUG_ON(create && ret == 0); | ||
| 2521 | if (ret > 0) { | 2512 | if (ret > 0) { |
| 2522 | bh_result->b_size = (ret << inode->i_blkbits); | 2513 | bh_result->b_size = (ret << inode->i_blkbits); |
| 2523 | ret = 0; | 2514 | ret = 0; |
| @@ -2525,15 +2516,102 @@ static int noalloc_get_block_write(struct inode *inode, sector_t iblock, | |||
| 2525 | return ret; | 2516 | return ret; |
| 2526 | } | 2517 | } |
| 2527 | 2518 | ||
| 2519 | static int bget_one(handle_t *handle, struct buffer_head *bh) | ||
| 2520 | { | ||
| 2521 | get_bh(bh); | ||
| 2522 | return 0; | ||
| 2523 | } | ||
| 2524 | |||
| 2525 | static int bput_one(handle_t *handle, struct buffer_head *bh) | ||
| 2526 | { | ||
| 2527 | put_bh(bh); | ||
| 2528 | return 0; | ||
| 2529 | } | ||
| 2530 | |||
| 2531 | static int __ext4_journalled_writepage(struct page *page, | ||
| 2532 | struct writeback_control *wbc, | ||
| 2533 | unsigned int len) | ||
| 2534 | { | ||
| 2535 | struct address_space *mapping = page->mapping; | ||
| 2536 | struct inode *inode = mapping->host; | ||
| 2537 | struct buffer_head *page_bufs; | ||
| 2538 | handle_t *handle = NULL; | ||
| 2539 | int ret = 0; | ||
| 2540 | int err; | ||
| 2541 | |||
| 2542 | page_bufs = page_buffers(page); | ||
| 2543 | BUG_ON(!page_bufs); | ||
| 2544 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one); | ||
| 2545 | /* As soon as we unlock the page, it can go away, but we have | ||
| 2546 | * references to buffers so we are safe */ | ||
| 2547 | unlock_page(page); | ||
| 2548 | |||
| 2549 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); | ||
| 2550 | if (IS_ERR(handle)) { | ||
| 2551 | ret = PTR_ERR(handle); | ||
| 2552 | goto out; | ||
| 2553 | } | ||
| 2554 | |||
| 2555 | ret = walk_page_buffers(handle, page_bufs, 0, len, NULL, | ||
| 2556 | do_journal_get_write_access); | ||
| 2557 | |||
| 2558 | err = walk_page_buffers(handle, page_bufs, 0, len, NULL, | ||
| 2559 | write_end_fn); | ||
| 2560 | if (ret == 0) | ||
| 2561 | ret = err; | ||
| 2562 | err = ext4_journal_stop(handle); | ||
| 2563 | if (!ret) | ||
| 2564 | ret = err; | ||
| 2565 | |||
| 2566 | walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one); | ||
| 2567 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; | ||
| 2568 | out: | ||
| 2569 | return ret; | ||
| 2570 | } | ||
| 2571 | |||
| 2528 | /* | 2572 | /* |
| 2573 | * Note that we don't need to start a transaction unless we're journaling data | ||
| 2574 | * because we should have holes filled from ext4_page_mkwrite(). We even don't | ||
| 2575 | * need to file the inode to the transaction's list in ordered mode because if | ||
| 2576 | * we are writing back data added by write(), the inode is already there and if | ||
| 2577 | * we are writing back data modified via mmap(), noone guarantees in which | ||
| 2578 | * transaction the data will hit the disk. In case we are journaling data, we | ||
| 2579 | * cannot start transaction directly because transaction start ranks above page | ||
| 2580 | * lock so we have to do some magic. | ||
| 2581 | * | ||
| 2529 | * This function can get called via... | 2582 | * This function can get called via... |
| 2530 | * - ext4_da_writepages after taking page lock (have journal handle) | 2583 | * - ext4_da_writepages after taking page lock (have journal handle) |
| 2531 | * - journal_submit_inode_data_buffers (no journal handle) | 2584 | * - journal_submit_inode_data_buffers (no journal handle) |
| 2532 | * - shrink_page_list via pdflush (no journal handle) | 2585 | * - shrink_page_list via pdflush (no journal handle) |
| 2533 | * - grab_page_cache when doing write_begin (have journal handle) | 2586 | * - grab_page_cache when doing write_begin (have journal handle) |
| 2587 | * | ||
| 2588 | * We don't do any block allocation in this function. If we have page with | ||
| 2589 | * multiple blocks we need to write those buffer_heads that are mapped. This | ||
| 2590 | * is important for mmaped based write. So if we do with blocksize 1K | ||
| 2591 | * truncate(f, 1024); | ||
| 2592 | * a = mmap(f, 0, 4096); | ||
| 2593 | * a[0] = 'a'; | ||
| 2594 | * truncate(f, 4096); | ||
| 2595 | * we have in the page first buffer_head mapped via page_mkwrite call back | ||
| 2596 | * but other bufer_heads would be unmapped but dirty(dirty done via the | ||
| 2597 | * do_wp_page). So writepage should write the first block. If we modify | ||
| 2598 | * the mmap area beyond 1024 we will again get a page_fault and the | ||
| 2599 | * page_mkwrite callback will do the block allocation and mark the | ||
| 2600 | * buffer_heads mapped. | ||
| 2601 | * | ||
| 2602 | * We redirty the page if we have any buffer_heads that is either delay or | ||
| 2603 | * unwritten in the page. | ||
| 2604 | * | ||
| 2605 | * We can get recursively called as show below. | ||
| 2606 | * | ||
| 2607 | * ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() -> | ||
| 2608 | * ext4_writepage() | ||
| 2609 | * | ||
| 2610 | * But since we don't do any block allocation we should not deadlock. | ||
| 2611 | * Page also have the dirty flag cleared so we don't get recurive page_lock. | ||
| 2534 | */ | 2612 | */ |
| 2535 | static int ext4_da_writepage(struct page *page, | 2613 | static int ext4_writepage(struct page *page, |
| 2536 | struct writeback_control *wbc) | 2614 | struct writeback_control *wbc) |
| 2537 | { | 2615 | { |
| 2538 | int ret = 0; | 2616 | int ret = 0; |
| 2539 | loff_t size; | 2617 | loff_t size; |
| @@ -2541,7 +2619,7 @@ static int ext4_da_writepage(struct page *page, | |||
| 2541 | struct buffer_head *page_bufs; | 2619 | struct buffer_head *page_bufs; |
| 2542 | struct inode *inode = page->mapping->host; | 2620 | struct inode *inode = page->mapping->host; |
| 2543 | 2621 | ||
| 2544 | trace_ext4_da_writepage(inode, page); | 2622 | trace_ext4_writepage(inode, page); |
| 2545 | size = i_size_read(inode); | 2623 | size = i_size_read(inode); |
| 2546 | if (page->index == size >> PAGE_CACHE_SHIFT) | 2624 | if (page->index == size >> PAGE_CACHE_SHIFT) |
| 2547 | len = size & ~PAGE_CACHE_MASK; | 2625 | len = size & ~PAGE_CACHE_MASK; |
| @@ -2551,7 +2629,7 @@ static int ext4_da_writepage(struct page *page, | |||
| 2551 | if (page_has_buffers(page)) { | 2629 | if (page_has_buffers(page)) { |
| 2552 | page_bufs = page_buffers(page); | 2630 | page_bufs = page_buffers(page); |
| 2553 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | 2631 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, |
| 2554 | ext4_bh_unmapped_or_delay)) { | 2632 | ext4_bh_delay_or_unwritten)) { |
| 2555 | /* | 2633 | /* |
| 2556 | * We don't want to do block allocation | 2634 | * We don't want to do block allocation |
| 2557 | * So redirty the page and return | 2635 | * So redirty the page and return |
| @@ -2578,13 +2656,13 @@ static int ext4_da_writepage(struct page *page, | |||
| 2578 | * all are mapped and non delay. We don't want to | 2656 | * all are mapped and non delay. We don't want to |
| 2579 | * do block allocation here. | 2657 | * do block allocation here. |
| 2580 | */ | 2658 | */ |
| 2581 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, | 2659 | ret = block_prepare_write(page, 0, len, |
| 2582 | noalloc_get_block_write); | 2660 | noalloc_get_block_write); |
| 2583 | if (!ret) { | 2661 | if (!ret) { |
| 2584 | page_bufs = page_buffers(page); | 2662 | page_bufs = page_buffers(page); |
| 2585 | /* check whether all are mapped and non delay */ | 2663 | /* check whether all are mapped and non delay */ |
| 2586 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, | 2664 | if (walk_page_buffers(NULL, page_bufs, 0, len, NULL, |
| 2587 | ext4_bh_unmapped_or_delay)) { | 2665 | ext4_bh_delay_or_unwritten)) { |
| 2588 | redirty_page_for_writepage(wbc, page); | 2666 | redirty_page_for_writepage(wbc, page); |
| 2589 | unlock_page(page); | 2667 | unlock_page(page); |
| 2590 | return 0; | 2668 | return 0; |
| @@ -2600,7 +2678,16 @@ static int ext4_da_writepage(struct page *page, | |||
| 2600 | return 0; | 2678 | return 0; |
| 2601 | } | 2679 | } |
| 2602 | /* now mark the buffer_heads as dirty and uptodate */ | 2680 | /* now mark the buffer_heads as dirty and uptodate */ |
| 2603 | block_commit_write(page, 0, PAGE_CACHE_SIZE); | 2681 | block_commit_write(page, 0, len); |
| 2682 | } | ||
| 2683 | |||
| 2684 | if (PageChecked(page) && ext4_should_journal_data(inode)) { | ||
| 2685 | /* | ||
| 2686 | * It's mmapped pagecache. Add buffers and journal it. There | ||
| 2687 | * doesn't seem much point in redirtying the page here. | ||
| 2688 | */ | ||
| 2689 | ClearPageChecked(page); | ||
| 2690 | return __ext4_journalled_writepage(page, wbc, len); | ||
| 2604 | } | 2691 | } |
| 2605 | 2692 | ||
| 2606 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) | 2693 | if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) |
| @@ -2907,7 +2994,7 @@ retry: | |||
| 2907 | * i_size_read because we hold i_mutex. | 2994 | * i_size_read because we hold i_mutex. |
| 2908 | */ | 2995 | */ |
| 2909 | if (pos + len > inode->i_size) | 2996 | if (pos + len > inode->i_size) |
| 2910 | vmtruncate(inode, inode->i_size); | 2997 | ext4_truncate(inode); |
| 2911 | } | 2998 | } |
| 2912 | 2999 | ||
| 2913 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 3000 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
| @@ -3130,222 +3217,6 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
| 3130 | return generic_block_bmap(mapping, block, ext4_get_block); | 3217 | return generic_block_bmap(mapping, block, ext4_get_block); |
| 3131 | } | 3218 | } |
| 3132 | 3219 | ||
| 3133 | static int bget_one(handle_t *handle, struct buffer_head *bh) | ||
| 3134 | { | ||
| 3135 | get_bh(bh); | ||
| 3136 | return 0; | ||
| 3137 | } | ||
| 3138 | |||
| 3139 | static int bput_one(handle_t *handle, struct buffer_head *bh) | ||
| 3140 | { | ||
| 3141 | put_bh(bh); | ||
| 3142 | return 0; | ||
| 3143 | } | ||
| 3144 | |||
| 3145 | /* | ||
| 3146 | * Note that we don't need to start a transaction unless we're journaling data | ||
| 3147 | * because we should have holes filled from ext4_page_mkwrite(). We even don't | ||
| 3148 | * need to file the inode to the transaction's list in ordered mode because if | ||
| 3149 | * we are writing back data added by write(), the inode is already there and if | ||
| 3150 | * we are writing back data modified via mmap(), noone guarantees in which | ||
| 3151 | * transaction the data will hit the disk. In case we are journaling data, we | ||
| 3152 | * cannot start transaction directly because transaction start ranks above page | ||
| 3153 | * lock so we have to do some magic. | ||
| 3154 | * | ||
| 3155 | * In all journaling modes block_write_full_page() will start the I/O. | ||
| 3156 | * | ||
| 3157 | * Problem: | ||
| 3158 | * | ||
| 3159 | * ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() -> | ||
| 3160 | * ext4_writepage() | ||
| 3161 | * | ||
| 3162 | * Similar for: | ||
| 3163 | * | ||
| 3164 | * ext4_file_write() -> generic_file_write() -> __alloc_pages() -> ... | ||
| 3165 | * | ||
| 3166 | * Same applies to ext4_get_block(). We will deadlock on various things like | ||
| 3167 | * lock_journal and i_data_sem | ||
| 3168 | * | ||
| 3169 | * Setting PF_MEMALLOC here doesn't work - too many internal memory | ||
| 3170 | * allocations fail. | ||
| 3171 | * | ||
| 3172 | * 16May01: If we're reentered then journal_current_handle() will be | ||
| 3173 | * non-zero. We simply *return*. | ||
| 3174 | * | ||
| 3175 | * 1 July 2001: @@@ FIXME: | ||
| 3176 | * In journalled data mode, a data buffer may be metadata against the | ||
| 3177 | * current transaction. But the same file is part of a shared mapping | ||
| 3178 | * and someone does a writepage() on it. | ||
| 3179 | * | ||
| 3180 | * We will move the buffer onto the async_data list, but *after* it has | ||
| 3181 | * been dirtied. So there's a small window where we have dirty data on | ||
| 3182 | * BJ_Metadata. | ||
| 3183 | * | ||
| 3184 | * Note that this only applies to the last partial page in the file. The | ||
| 3185 | * bit which block_write_full_page() uses prepare/commit for. (That's | ||
| 3186 | * broken code anyway: it's wrong for msync()). | ||
| 3187 | * | ||
| 3188 | * It's a rare case: affects the final partial page, for journalled data | ||
| 3189 | * where the file is subject to bith write() and writepage() in the same | ||
| 3190 | * transction. To fix it we'll need a custom block_write_full_page(). | ||
| 3191 | * We'll probably need that anyway for journalling writepage() output. | ||
| 3192 | * | ||
| 3193 | * We don't honour synchronous mounts for writepage(). That would be | ||
| 3194 | * disastrous. Any write() or metadata operation will sync the fs for | ||
| 3195 | * us. | ||
| 3196 | * | ||
| 3197 | */ | ||
| 3198 | static int __ext4_normal_writepage(struct page *page, | ||
| 3199 | struct writeback_control *wbc) | ||
| 3200 | { | ||
| 3201 | struct inode *inode = page->mapping->host; | ||
| 3202 | |||
| 3203 | if (test_opt(inode->i_sb, NOBH)) | ||
| 3204 | return nobh_writepage(page, noalloc_get_block_write, wbc); | ||
| 3205 | else | ||
| 3206 | return block_write_full_page(page, noalloc_get_block_write, | ||
| 3207 | wbc); | ||
| 3208 | } | ||
| 3209 | |||
| 3210 | static int ext4_normal_writepage(struct page *page, | ||
| 3211 | struct writeback_control *wbc) | ||
| 3212 | { | ||
| 3213 | struct inode *inode = page->mapping->host; | ||
| 3214 | loff_t size = i_size_read(inode); | ||
| 3215 | loff_t len; | ||
| 3216 | |||
| 3217 | trace_ext4_normal_writepage(inode, page); | ||
| 3218 | J_ASSERT(PageLocked(page)); | ||
| 3219 | if (page->index == size >> PAGE_CACHE_SHIFT) | ||
| 3220 | len = size & ~PAGE_CACHE_MASK; | ||
| 3221 | else | ||
| 3222 | len = PAGE_CACHE_SIZE; | ||
| 3223 | |||
| 3224 | if (page_has_buffers(page)) { | ||
| 3225 | /* if page has buffers it should all be mapped | ||
| 3226 | * and allocated. If there are not buffers attached | ||
| 3227 | * to the page we know the page is dirty but it lost | ||
| 3228 | * buffers. That means that at some moment in time | ||
| 3229 | * after write_begin() / write_end() has been called | ||
| 3230 | * all buffers have been clean and thus they must have been | ||
| 3231 | * written at least once. So they are all mapped and we can | ||
| 3232 | * happily proceed with mapping them and writing the page. | ||
| 3233 | */ | ||
| 3234 | BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | ||
| 3235 | ext4_bh_unmapped_or_delay)); | ||
| 3236 | } | ||
| 3237 | |||
| 3238 | if (!ext4_journal_current_handle()) | ||
| 3239 | return __ext4_normal_writepage(page, wbc); | ||
| 3240 | |||
| 3241 | redirty_page_for_writepage(wbc, page); | ||
| 3242 | unlock_page(page); | ||
| 3243 | return 0; | ||
| 3244 | } | ||
| 3245 | |||
| 3246 | static int __ext4_journalled_writepage(struct page *page, | ||
| 3247 | struct writeback_control *wbc) | ||
| 3248 | { | ||
| 3249 | struct address_space *mapping = page->mapping; | ||
| 3250 | struct inode *inode = mapping->host; | ||
| 3251 | struct buffer_head *page_bufs; | ||
| 3252 | handle_t *handle = NULL; | ||
| 3253 | int ret = 0; | ||
| 3254 | int err; | ||
| 3255 | |||
| 3256 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, | ||
| 3257 | noalloc_get_block_write); | ||
| 3258 | if (ret != 0) | ||
| 3259 | goto out_unlock; | ||
| 3260 | |||
| 3261 | page_bufs = page_buffers(page); | ||
| 3262 | walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, NULL, | ||
| 3263 | bget_one); | ||
| 3264 | /* As soon as we unlock the page, it can go away, but we have | ||
| 3265 | * references to buffers so we are safe */ | ||
| 3266 | unlock_page(page); | ||
| 3267 | |||
| 3268 | handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode)); | ||
| 3269 | if (IS_ERR(handle)) { | ||
| 3270 | ret = PTR_ERR(handle); | ||
| 3271 | goto out; | ||
| 3272 | } | ||
| 3273 | |||
| 3274 | ret = walk_page_buffers(handle, page_bufs, 0, | ||
| 3275 | PAGE_CACHE_SIZE, NULL, do_journal_get_write_access); | ||
| 3276 | |||
| 3277 | err = walk_page_buffers(handle, page_bufs, 0, | ||
| 3278 | PAGE_CACHE_SIZE, NULL, write_end_fn); | ||
| 3279 | if (ret == 0) | ||
| 3280 | ret = err; | ||
| 3281 | err = ext4_journal_stop(handle); | ||
| 3282 | if (!ret) | ||
| 3283 | ret = err; | ||
| 3284 | |||
| 3285 | walk_page_buffers(handle, page_bufs, 0, | ||
| 3286 | PAGE_CACHE_SIZE, NULL, bput_one); | ||
| 3287 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; | ||
| 3288 | goto out; | ||
| 3289 | |||
| 3290 | out_unlock: | ||
| 3291 | unlock_page(page); | ||
| 3292 | out: | ||
| 3293 | return ret; | ||
| 3294 | } | ||
| 3295 | |||
| 3296 | static int ext4_journalled_writepage(struct page *page, | ||
| 3297 | struct writeback_control *wbc) | ||
| 3298 | { | ||
| 3299 | struct inode *inode = page->mapping->host; | ||
| 3300 | loff_t size = i_size_read(inode); | ||
| 3301 | loff_t len; | ||
| 3302 | |||
| 3303 | trace_ext4_journalled_writepage(inode, page); | ||
| 3304 | J_ASSERT(PageLocked(page)); | ||
| 3305 | if (page->index == size >> PAGE_CACHE_SHIFT) | ||
| 3306 | len = size & ~PAGE_CACHE_MASK; | ||
| 3307 | else | ||
| 3308 | len = PAGE_CACHE_SIZE; | ||
| 3309 | |||
| 3310 | if (page_has_buffers(page)) { | ||
| 3311 | /* if page has buffers it should all be mapped | ||
| 3312 | * and allocated. If there are not buffers attached | ||
| 3313 | * to the page we know the page is dirty but it lost | ||
| 3314 | * buffers. That means that at some moment in time | ||
| 3315 | * after write_begin() / write_end() has been called | ||
| 3316 | * all buffers have been clean and thus they must have been | ||
| 3317 | * written at least once. So they are all mapped and we can | ||
| 3318 | * happily proceed with mapping them and writing the page. | ||
| 3319 | */ | ||
| 3320 | BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL, | ||
| 3321 | ext4_bh_unmapped_or_delay)); | ||
| 3322 | } | ||
| 3323 | |||
| 3324 | if (ext4_journal_current_handle()) | ||
| 3325 | goto no_write; | ||
| 3326 | |||
| 3327 | if (PageChecked(page)) { | ||
| 3328 | /* | ||
| 3329 | * It's mmapped pagecache. Add buffers and journal it. There | ||
| 3330 | * doesn't seem much point in redirtying the page here. | ||
| 3331 | */ | ||
| 3332 | ClearPageChecked(page); | ||
| 3333 | return __ext4_journalled_writepage(page, wbc); | ||
| 3334 | } else { | ||
| 3335 | /* | ||
| 3336 | * It may be a page full of checkpoint-mode buffers. We don't | ||
| 3337 | * really know unless we go poke around in the buffer_heads. | ||
| 3338 | * But block_write_full_page will do the right thing. | ||
| 3339 | */ | ||
| 3340 | return block_write_full_page(page, noalloc_get_block_write, | ||
| 3341 | wbc); | ||
| 3342 | } | ||
| 3343 | no_write: | ||
| 3344 | redirty_page_for_writepage(wbc, page); | ||
| 3345 | unlock_page(page); | ||
| 3346 | return 0; | ||
| 3347 | } | ||
| 3348 | |||
| 3349 | static int ext4_readpage(struct file *file, struct page *page) | 3220 | static int ext4_readpage(struct file *file, struct page *page) |
| 3350 | { | 3221 | { |
| 3351 | return mpage_readpage(page, ext4_get_block); | 3222 | return mpage_readpage(page, ext4_get_block); |
| @@ -3492,7 +3363,7 @@ static int ext4_journalled_set_page_dirty(struct page *page) | |||
| 3492 | static const struct address_space_operations ext4_ordered_aops = { | 3363 | static const struct address_space_operations ext4_ordered_aops = { |
| 3493 | .readpage = ext4_readpage, | 3364 | .readpage = ext4_readpage, |
| 3494 | .readpages = ext4_readpages, | 3365 | .readpages = ext4_readpages, |
| 3495 | .writepage = ext4_normal_writepage, | 3366 | .writepage = ext4_writepage, |
| 3496 | .sync_page = block_sync_page, | 3367 | .sync_page = block_sync_page, |
| 3497 | .write_begin = ext4_write_begin, | 3368 | .write_begin = ext4_write_begin, |
| 3498 | .write_end = ext4_ordered_write_end, | 3369 | .write_end = ext4_ordered_write_end, |
| @@ -3507,7 +3378,7 @@ static const struct address_space_operations ext4_ordered_aops = { | |||
| 3507 | static const struct address_space_operations ext4_writeback_aops = { | 3378 | static const struct address_space_operations ext4_writeback_aops = { |
| 3508 | .readpage = ext4_readpage, | 3379 | .readpage = ext4_readpage, |
| 3509 | .readpages = ext4_readpages, | 3380 | .readpages = ext4_readpages, |
| 3510 | .writepage = ext4_normal_writepage, | 3381 | .writepage = ext4_writepage, |
| 3511 | .sync_page = block_sync_page, | 3382 | .sync_page = block_sync_page, |
| 3512 | .write_begin = ext4_write_begin, | 3383 | .write_begin = ext4_write_begin, |
| 3513 | .write_end = ext4_writeback_write_end, | 3384 | .write_end = ext4_writeback_write_end, |
| @@ -3522,7 +3393,7 @@ static const struct address_space_operations ext4_writeback_aops = { | |||
| 3522 | static const struct address_space_operations ext4_journalled_aops = { | 3393 | static const struct address_space_operations ext4_journalled_aops = { |
| 3523 | .readpage = ext4_readpage, | 3394 | .readpage = ext4_readpage, |
| 3524 | .readpages = ext4_readpages, | 3395 | .readpages = ext4_readpages, |
| 3525 | .writepage = ext4_journalled_writepage, | 3396 | .writepage = ext4_writepage, |
| 3526 | .sync_page = block_sync_page, | 3397 | .sync_page = block_sync_page, |
| 3527 | .write_begin = ext4_write_begin, | 3398 | .write_begin = ext4_write_begin, |
| 3528 | .write_end = ext4_journalled_write_end, | 3399 | .write_end = ext4_journalled_write_end, |
| @@ -3536,7 +3407,7 @@ static const struct address_space_operations ext4_journalled_aops = { | |||
| 3536 | static const struct address_space_operations ext4_da_aops = { | 3407 | static const struct address_space_operations ext4_da_aops = { |
| 3537 | .readpage = ext4_readpage, | 3408 | .readpage = ext4_readpage, |
| 3538 | .readpages = ext4_readpages, | 3409 | .readpages = ext4_readpages, |
| 3539 | .writepage = ext4_da_writepage, | 3410 | .writepage = ext4_writepage, |
| 3540 | .writepages = ext4_da_writepages, | 3411 | .writepages = ext4_da_writepages, |
| 3541 | .sync_page = block_sync_page, | 3412 | .sync_page = block_sync_page, |
| 3542 | .write_begin = ext4_da_write_begin, | 3413 | .write_begin = ext4_da_write_begin, |
| @@ -3583,7 +3454,8 @@ int ext4_block_truncate_page(handle_t *handle, | |||
| 3583 | struct page *page; | 3454 | struct page *page; |
| 3584 | int err = 0; | 3455 | int err = 0; |
| 3585 | 3456 | ||
| 3586 | page = grab_cache_page(mapping, from >> PAGE_CACHE_SHIFT); | 3457 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, |
| 3458 | mapping_gfp_mask(mapping) & ~__GFP_FS); | ||
| 3587 | if (!page) | 3459 | if (!page) |
| 3588 | return -EINVAL; | 3460 | return -EINVAL; |
| 3589 | 3461 | ||
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 24a6abb2aef5..7050a9cd04a4 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
| @@ -191,7 +191,7 @@ setversion_out: | |||
| 191 | case EXT4_IOC_GROUP_EXTEND: { | 191 | case EXT4_IOC_GROUP_EXTEND: { |
| 192 | ext4_fsblk_t n_blocks_count; | 192 | ext4_fsblk_t n_blocks_count; |
| 193 | struct super_block *sb = inode->i_sb; | 193 | struct super_block *sb = inode->i_sb; |
| 194 | int err, err2; | 194 | int err, err2=0; |
| 195 | 195 | ||
| 196 | if (!capable(CAP_SYS_RESOURCE)) | 196 | if (!capable(CAP_SYS_RESOURCE)) |
| 197 | return -EPERM; | 197 | return -EPERM; |
| @@ -204,9 +204,11 @@ setversion_out: | |||
| 204 | return err; | 204 | return err; |
| 205 | 205 | ||
| 206 | err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); | 206 | err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); |
| 207 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); | 207 | if (EXT4_SB(sb)->s_journal) { |
| 208 | err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); | 208 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); |
| 209 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | 209 | err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); |
| 210 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | ||
| 211 | } | ||
| 210 | if (err == 0) | 212 | if (err == 0) |
| 211 | err = err2; | 213 | err = err2; |
| 212 | mnt_drop_write(filp->f_path.mnt); | 214 | mnt_drop_write(filp->f_path.mnt); |
| @@ -251,7 +253,7 @@ setversion_out: | |||
| 251 | case EXT4_IOC_GROUP_ADD: { | 253 | case EXT4_IOC_GROUP_ADD: { |
| 252 | struct ext4_new_group_data input; | 254 | struct ext4_new_group_data input; |
| 253 | struct super_block *sb = inode->i_sb; | 255 | struct super_block *sb = inode->i_sb; |
| 254 | int err, err2; | 256 | int err, err2=0; |
| 255 | 257 | ||
| 256 | if (!capable(CAP_SYS_RESOURCE)) | 258 | if (!capable(CAP_SYS_RESOURCE)) |
| 257 | return -EPERM; | 259 | return -EPERM; |
| @@ -265,9 +267,11 @@ setversion_out: | |||
| 265 | return err; | 267 | return err; |
| 266 | 268 | ||
| 267 | err = ext4_group_add(sb, &input); | 269 | err = ext4_group_add(sb, &input); |
| 268 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); | 270 | if (EXT4_SB(sb)->s_journal) { |
| 269 | err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); | 271 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); |
| 270 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | 272 | err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); |
| 273 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | ||
| 274 | } | ||
| 271 | if (err == 0) | 275 | if (err == 0) |
| 272 | err = err2; | 276 | err = err2; |
| 273 | mnt_drop_write(filp->f_path.mnt); | 277 | mnt_drop_write(filp->f_path.mnt); |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 519a0a686d94..cd258463e2a9 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
| @@ -657,7 +657,8 @@ static void ext4_mb_mark_free_simple(struct super_block *sb, | |||
| 657 | } | 657 | } |
| 658 | } | 658 | } |
| 659 | 659 | ||
| 660 | static void ext4_mb_generate_buddy(struct super_block *sb, | 660 | static noinline_for_stack |
| 661 | void ext4_mb_generate_buddy(struct super_block *sb, | ||
| 661 | void *buddy, void *bitmap, ext4_group_t group) | 662 | void *buddy, void *bitmap, ext4_group_t group) |
| 662 | { | 663 | { |
| 663 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | 664 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); |
| @@ -1480,7 +1481,8 @@ static void ext4_mb_measure_extent(struct ext4_allocation_context *ac, | |||
| 1480 | ext4_mb_check_limits(ac, e4b, 0); | 1481 | ext4_mb_check_limits(ac, e4b, 0); |
| 1481 | } | 1482 | } |
| 1482 | 1483 | ||
| 1483 | static int ext4_mb_try_best_found(struct ext4_allocation_context *ac, | 1484 | static noinline_for_stack |
| 1485 | int ext4_mb_try_best_found(struct ext4_allocation_context *ac, | ||
| 1484 | struct ext4_buddy *e4b) | 1486 | struct ext4_buddy *e4b) |
| 1485 | { | 1487 | { |
| 1486 | struct ext4_free_extent ex = ac->ac_b_ex; | 1488 | struct ext4_free_extent ex = ac->ac_b_ex; |
| @@ -1507,7 +1509,8 @@ static int ext4_mb_try_best_found(struct ext4_allocation_context *ac, | |||
| 1507 | return 0; | 1509 | return 0; |
| 1508 | } | 1510 | } |
| 1509 | 1511 | ||
| 1510 | static int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, | 1512 | static noinline_for_stack |
| 1513 | int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, | ||
| 1511 | struct ext4_buddy *e4b) | 1514 | struct ext4_buddy *e4b) |
| 1512 | { | 1515 | { |
| 1513 | ext4_group_t group = ac->ac_g_ex.fe_group; | 1516 | ext4_group_t group = ac->ac_g_ex.fe_group; |
| @@ -1566,7 +1569,8 @@ static int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, | |||
| 1566 | * The routine scans buddy structures (not bitmap!) from given order | 1569 | * The routine scans buddy structures (not bitmap!) from given order |
| 1567 | * to max order and tries to find big enough chunk to satisfy the req | 1570 | * to max order and tries to find big enough chunk to satisfy the req |
| 1568 | */ | 1571 | */ |
| 1569 | static void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac, | 1572 | static noinline_for_stack |
| 1573 | void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac, | ||
| 1570 | struct ext4_buddy *e4b) | 1574 | struct ext4_buddy *e4b) |
| 1571 | { | 1575 | { |
| 1572 | struct super_block *sb = ac->ac_sb; | 1576 | struct super_block *sb = ac->ac_sb; |
| @@ -1609,7 +1613,8 @@ static void ext4_mb_simple_scan_group(struct ext4_allocation_context *ac, | |||
| 1609 | * In order to optimize scanning, caller must pass number of | 1613 | * In order to optimize scanning, caller must pass number of |
| 1610 | * free blocks in the group, so the routine can know upper limit. | 1614 | * free blocks in the group, so the routine can know upper limit. |
| 1611 | */ | 1615 | */ |
| 1612 | static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, | 1616 | static noinline_for_stack |
| 1617 | void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, | ||
| 1613 | struct ext4_buddy *e4b) | 1618 | struct ext4_buddy *e4b) |
| 1614 | { | 1619 | { |
| 1615 | struct super_block *sb = ac->ac_sb; | 1620 | struct super_block *sb = ac->ac_sb; |
| @@ -1668,7 +1673,8 @@ static void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, | |||
| 1668 | * we try to find stripe-aligned chunks for stripe-size requests | 1673 | * we try to find stripe-aligned chunks for stripe-size requests |
| 1669 | * XXX should do so at least for multiples of stripe size as well | 1674 | * XXX should do so at least for multiples of stripe size as well |
| 1670 | */ | 1675 | */ |
| 1671 | static void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, | 1676 | static noinline_for_stack |
| 1677 | void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, | ||
| 1672 | struct ext4_buddy *e4b) | 1678 | struct ext4_buddy *e4b) |
| 1673 | { | 1679 | { |
| 1674 | struct super_block *sb = ac->ac_sb; | 1680 | struct super_block *sb = ac->ac_sb; |
| @@ -1831,7 +1837,8 @@ void ext4_mb_put_buddy_cache_lock(struct super_block *sb, | |||
| 1831 | 1837 | ||
| 1832 | } | 1838 | } |
| 1833 | 1839 | ||
| 1834 | static int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) | 1840 | static noinline_for_stack |
| 1841 | int ext4_mb_init_group(struct super_block *sb, ext4_group_t group) | ||
| 1835 | { | 1842 | { |
| 1836 | 1843 | ||
| 1837 | int ret; | 1844 | int ret; |
| @@ -2902,7 +2909,11 @@ int __init init_ext4_mballoc(void) | |||
| 2902 | 2909 | ||
| 2903 | void exit_ext4_mballoc(void) | 2910 | void exit_ext4_mballoc(void) |
| 2904 | { | 2911 | { |
| 2905 | /* XXX: synchronize_rcu(); */ | 2912 | /* |
| 2913 | * Wait for completion of call_rcu()'s on ext4_pspace_cachep | ||
| 2914 | * before destroying the slab cache. | ||
| 2915 | */ | ||
| 2916 | rcu_barrier(); | ||
| 2906 | kmem_cache_destroy(ext4_pspace_cachep); | 2917 | kmem_cache_destroy(ext4_pspace_cachep); |
| 2907 | kmem_cache_destroy(ext4_ac_cachep); | 2918 | kmem_cache_destroy(ext4_ac_cachep); |
| 2908 | kmem_cache_destroy(ext4_free_ext_cachep); | 2919 | kmem_cache_destroy(ext4_free_ext_cachep); |
| @@ -3457,7 +3468,8 @@ static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, | |||
| 3457 | * used in in-core bitmap. buddy must be generated from this bitmap | 3468 | * used in in-core bitmap. buddy must be generated from this bitmap |
| 3458 | * Need to be called with ext4 group lock held | 3469 | * Need to be called with ext4 group lock held |
| 3459 | */ | 3470 | */ |
| 3460 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | 3471 | static noinline_for_stack |
| 3472 | void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | ||
| 3461 | ext4_group_t group) | 3473 | ext4_group_t group) |
| 3462 | { | 3474 | { |
| 3463 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); | 3475 | struct ext4_group_info *grp = ext4_get_group_info(sb, group); |
| @@ -4215,14 +4227,9 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, | |||
| 4215 | ext4_get_group_no_and_offset(sb, goal, &group, &block); | 4227 | ext4_get_group_no_and_offset(sb, goal, &group, &block); |
| 4216 | 4228 | ||
| 4217 | /* set up allocation goals */ | 4229 | /* set up allocation goals */ |
| 4230 | memset(ac, 0, sizeof(struct ext4_allocation_context)); | ||
| 4218 | ac->ac_b_ex.fe_logical = ar->logical; | 4231 | ac->ac_b_ex.fe_logical = ar->logical; |
| 4219 | ac->ac_b_ex.fe_group = 0; | ||
| 4220 | ac->ac_b_ex.fe_start = 0; | ||
| 4221 | ac->ac_b_ex.fe_len = 0; | ||
| 4222 | ac->ac_status = AC_STATUS_CONTINUE; | 4232 | ac->ac_status = AC_STATUS_CONTINUE; |
| 4223 | ac->ac_groups_scanned = 0; | ||
| 4224 | ac->ac_ex_scanned = 0; | ||
| 4225 | ac->ac_found = 0; | ||
| 4226 | ac->ac_sb = sb; | 4233 | ac->ac_sb = sb; |
| 4227 | ac->ac_inode = ar->inode; | 4234 | ac->ac_inode = ar->inode; |
| 4228 | ac->ac_o_ex.fe_logical = ar->logical; | 4235 | ac->ac_o_ex.fe_logical = ar->logical; |
| @@ -4233,15 +4240,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, | |||
| 4233 | ac->ac_g_ex.fe_group = group; | 4240 | ac->ac_g_ex.fe_group = group; |
| 4234 | ac->ac_g_ex.fe_start = block; | 4241 | ac->ac_g_ex.fe_start = block; |
| 4235 | ac->ac_g_ex.fe_len = len; | 4242 | ac->ac_g_ex.fe_len = len; |
| 4236 | ac->ac_f_ex.fe_len = 0; | ||
| 4237 | ac->ac_flags = ar->flags; | 4243 | ac->ac_flags = ar->flags; |
| 4238 | ac->ac_2order = 0; | ||
| 4239 | ac->ac_criteria = 0; | ||
| 4240 | ac->ac_pa = NULL; | ||
| 4241 | ac->ac_bitmap_page = NULL; | ||
| 4242 | ac->ac_buddy_page = NULL; | ||
| 4243 | ac->alloc_semp = NULL; | ||
| 4244 | ac->ac_lg = NULL; | ||
| 4245 | 4244 | ||
| 4246 | /* we have to define context: we'll we work with a file or | 4245 | /* we have to define context: we'll we work with a file or |
| 4247 | * locality group. this is a policy, actually */ | 4246 | * locality group. this is a policy, actually */ |
| @@ -4509,10 +4508,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
| 4509 | } | 4508 | } |
| 4510 | 4509 | ||
| 4511 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); | 4510 | ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); |
| 4512 | if (ac) { | 4511 | if (!ac) { |
| 4513 | ac->ac_sb = sb; | ||
| 4514 | ac->ac_inode = ar->inode; | ||
| 4515 | } else { | ||
| 4516 | ar->len = 0; | 4512 | ar->len = 0; |
| 4517 | *errp = -ENOMEM; | 4513 | *errp = -ENOMEM; |
| 4518 | goto out1; | 4514 | goto out1; |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 18bfd5dab642..e378cb383979 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
| @@ -297,6 +297,7 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction, | |||
| 297 | unsigned int new_offset; | 297 | unsigned int new_offset; |
| 298 | struct buffer_head *bh_in = jh2bh(jh_in); | 298 | struct buffer_head *bh_in = jh2bh(jh_in); |
| 299 | struct jbd2_buffer_trigger_type *triggers; | 299 | struct jbd2_buffer_trigger_type *triggers; |
| 300 | journal_t *journal = transaction->t_journal; | ||
| 300 | 301 | ||
| 301 | /* | 302 | /* |
| 302 | * The buffer really shouldn't be locked: only the current committing | 303 | * The buffer really shouldn't be locked: only the current committing |
| @@ -310,6 +311,11 @@ int jbd2_journal_write_metadata_buffer(transaction_t *transaction, | |||
| 310 | J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); | 311 | J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); |
| 311 | 312 | ||
| 312 | new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); | 313 | new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); |
| 314 | /* keep subsequent assertions sane */ | ||
| 315 | new_bh->b_state = 0; | ||
| 316 | init_buffer(new_bh, NULL, NULL); | ||
| 317 | atomic_set(&new_bh->b_count, 1); | ||
| 318 | new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */ | ||
| 313 | 319 | ||
| 314 | /* | 320 | /* |
| 315 | * If a new transaction has already done a buffer copy-out, then | 321 | * If a new transaction has already done a buffer copy-out, then |
| @@ -388,14 +394,6 @@ repeat: | |||
| 388 | kunmap_atomic(mapped_data, KM_USER0); | 394 | kunmap_atomic(mapped_data, KM_USER0); |
| 389 | } | 395 | } |
| 390 | 396 | ||
| 391 | /* keep subsequent assertions sane */ | ||
| 392 | new_bh->b_state = 0; | ||
| 393 | init_buffer(new_bh, NULL, NULL); | ||
| 394 | atomic_set(&new_bh->b_count, 1); | ||
| 395 | jbd_unlock_bh_state(bh_in); | ||
| 396 | |||
| 397 | new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */ | ||
| 398 | |||
| 399 | set_bh_page(new_bh, new_page, new_offset); | 397 | set_bh_page(new_bh, new_page, new_offset); |
| 400 | new_jh->b_transaction = NULL; | 398 | new_jh->b_transaction = NULL; |
| 401 | new_bh->b_size = jh2bh(jh_in)->b_size; | 399 | new_bh->b_size = jh2bh(jh_in)->b_size; |
| @@ -412,7 +410,11 @@ repeat: | |||
| 412 | * copying is moved to the transaction's shadow queue. | 410 | * copying is moved to the transaction's shadow queue. |
| 413 | */ | 411 | */ |
| 414 | JBUFFER_TRACE(jh_in, "file as BJ_Shadow"); | 412 | JBUFFER_TRACE(jh_in, "file as BJ_Shadow"); |
| 415 | jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); | 413 | spin_lock(&journal->j_list_lock); |
| 414 | __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); | ||
| 415 | spin_unlock(&journal->j_list_lock); | ||
| 416 | jbd_unlock_bh_state(bh_in); | ||
| 417 | |||
| 416 | JBUFFER_TRACE(new_jh, "file as BJ_IO"); | 418 | JBUFFER_TRACE(new_jh, "file as BJ_IO"); |
| 417 | jbd2_journal_file_buffer(new_jh, transaction, BJ_IO); | 419 | jbd2_journal_file_buffer(new_jh, transaction, BJ_IO); |
| 418 | 420 | ||
| @@ -2410,6 +2412,7 @@ const char *jbd2_dev_to_name(dev_t device) | |||
| 2410 | int i = hash_32(device, CACHE_SIZE_BITS); | 2412 | int i = hash_32(device, CACHE_SIZE_BITS); |
| 2411 | char *ret; | 2413 | char *ret; |
| 2412 | struct block_device *bd; | 2414 | struct block_device *bd; |
| 2415 | static struct devname_cache *new_dev; | ||
| 2413 | 2416 | ||
| 2414 | rcu_read_lock(); | 2417 | rcu_read_lock(); |
| 2415 | if (devcache[i] && devcache[i]->device == device) { | 2418 | if (devcache[i] && devcache[i]->device == device) { |
| @@ -2419,20 +2422,20 @@ const char *jbd2_dev_to_name(dev_t device) | |||
| 2419 | } | 2422 | } |
| 2420 | rcu_read_unlock(); | 2423 | rcu_read_unlock(); |
| 2421 | 2424 | ||
| 2425 | new_dev = kmalloc(sizeof(struct devname_cache), GFP_KERNEL); | ||
| 2426 | if (!new_dev) | ||
| 2427 | return "NODEV-ALLOCFAILURE"; /* Something non-NULL */ | ||
| 2422 | spin_lock(&devname_cache_lock); | 2428 | spin_lock(&devname_cache_lock); |
| 2423 | if (devcache[i]) { | 2429 | if (devcache[i]) { |
| 2424 | if (devcache[i]->device == device) { | 2430 | if (devcache[i]->device == device) { |
| 2431 | kfree(new_dev); | ||
| 2425 | ret = devcache[i]->devname; | 2432 | ret = devcache[i]->devname; |
| 2426 | spin_unlock(&devname_cache_lock); | 2433 | spin_unlock(&devname_cache_lock); |
| 2427 | return ret; | 2434 | return ret; |
| 2428 | } | 2435 | } |
| 2429 | call_rcu(&devcache[i]->rcu, free_devcache); | 2436 | call_rcu(&devcache[i]->rcu, free_devcache); |
| 2430 | } | 2437 | } |
| 2431 | devcache[i] = kmalloc(sizeof(struct devname_cache), GFP_KERNEL); | 2438 | devcache[i] = new_dev; |
| 2432 | if (!devcache[i]) { | ||
| 2433 | spin_unlock(&devname_cache_lock); | ||
| 2434 | return "NODEV-ALLOCFAILURE"; /* Something non-NULL */ | ||
| 2435 | } | ||
| 2436 | devcache[i]->device = device; | 2439 | devcache[i]->device = device; |
| 2437 | bd = bdget(device); | 2440 | bd = bdget(device); |
| 2438 | if (bd) { | 2441 | if (bd) { |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 494501edba6b..6213ac728f30 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
| @@ -499,34 +499,15 @@ void jbd2_journal_unlock_updates (journal_t *journal) | |||
| 499 | wake_up(&journal->j_wait_transaction_locked); | 499 | wake_up(&journal->j_wait_transaction_locked); |
| 500 | } | 500 | } |
| 501 | 501 | ||
| 502 | /* | 502 | static void warn_dirty_buffer(struct buffer_head *bh) |
| 503 | * Report any unexpected dirty buffers which turn up. Normally those | ||
| 504 | * indicate an error, but they can occur if the user is running (say) | ||
| 505 | * tune2fs to modify the live filesystem, so we need the option of | ||
| 506 | * continuing as gracefully as possible. # | ||
| 507 | * | ||
| 508 | * The caller should already hold the journal lock and | ||
| 509 | * j_list_lock spinlock: most callers will need those anyway | ||
| 510 | * in order to probe the buffer's journaling state safely. | ||
| 511 | */ | ||
| 512 | static void jbd_unexpected_dirty_buffer(struct journal_head *jh) | ||
| 513 | { | 503 | { |
| 514 | int jlist; | 504 | char b[BDEVNAME_SIZE]; |
| 515 | |||
| 516 | /* If this buffer is one which might reasonably be dirty | ||
| 517 | * --- ie. data, or not part of this journal --- then | ||
| 518 | * we're OK to leave it alone, but otherwise we need to | ||
| 519 | * move the dirty bit to the journal's own internal | ||
| 520 | * JBDDirty bit. */ | ||
| 521 | jlist = jh->b_jlist; | ||
| 522 | 505 | ||
| 523 | if (jlist == BJ_Metadata || jlist == BJ_Reserved || | 506 | printk(KERN_WARNING |
| 524 | jlist == BJ_Shadow || jlist == BJ_Forget) { | 507 | "JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). " |
| 525 | struct buffer_head *bh = jh2bh(jh); | 508 | "There's a risk of filesystem corruption in case of system " |
| 526 | 509 | "crash.\n", | |
| 527 | if (test_clear_buffer_dirty(bh)) | 510 | bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); |
| 528 | set_buffer_jbddirty(bh); | ||
| 529 | } | ||
| 530 | } | 511 | } |
| 531 | 512 | ||
| 532 | /* | 513 | /* |
| @@ -593,14 +574,16 @@ repeat: | |||
| 593 | if (jh->b_next_transaction) | 574 | if (jh->b_next_transaction) |
| 594 | J_ASSERT_JH(jh, jh->b_next_transaction == | 575 | J_ASSERT_JH(jh, jh->b_next_transaction == |
| 595 | transaction); | 576 | transaction); |
| 577 | warn_dirty_buffer(bh); | ||
| 596 | } | 578 | } |
| 597 | /* | 579 | /* |
| 598 | * In any case we need to clean the dirty flag and we must | 580 | * In any case we need to clean the dirty flag and we must |
| 599 | * do it under the buffer lock to be sure we don't race | 581 | * do it under the buffer lock to be sure we don't race |
| 600 | * with running write-out. | 582 | * with running write-out. |
| 601 | */ | 583 | */ |
| 602 | JBUFFER_TRACE(jh, "Unexpected dirty buffer"); | 584 | JBUFFER_TRACE(jh, "Journalling dirty buffer"); |
| 603 | jbd_unexpected_dirty_buffer(jh); | 585 | clear_buffer_dirty(bh); |
| 586 | set_buffer_jbddirty(bh); | ||
| 604 | } | 587 | } |
| 605 | 588 | ||
| 606 | unlock_buffer(bh); | 589 | unlock_buffer(bh); |
| @@ -843,6 +826,15 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) | |||
| 843 | J_ASSERT_JH(jh, buffer_locked(jh2bh(jh))); | 826 | J_ASSERT_JH(jh, buffer_locked(jh2bh(jh))); |
| 844 | 827 | ||
| 845 | if (jh->b_transaction == NULL) { | 828 | if (jh->b_transaction == NULL) { |
| 829 | /* | ||
| 830 | * Previous jbd2_journal_forget() could have left the buffer | ||
| 831 | * with jbddirty bit set because it was being committed. When | ||
| 832 | * the commit finished, we've filed the buffer for | ||
| 833 | * checkpointing and marked it dirty. Now we are reallocating | ||
| 834 | * the buffer so the transaction freeing it must have | ||
| 835 | * committed and so it's safe to clear the dirty bit. | ||
| 836 | */ | ||
| 837 | clear_buffer_dirty(jh2bh(jh)); | ||
| 846 | jh->b_transaction = transaction; | 838 | jh->b_transaction = transaction; |
| 847 | 839 | ||
| 848 | /* first access by this transaction */ | 840 | /* first access by this transaction */ |
| @@ -1644,8 +1636,13 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) | |||
| 1644 | 1636 | ||
| 1645 | if (jh->b_cp_transaction) { | 1637 | if (jh->b_cp_transaction) { |
| 1646 | JBUFFER_TRACE(jh, "on running+cp transaction"); | 1638 | JBUFFER_TRACE(jh, "on running+cp transaction"); |
| 1639 | /* | ||
| 1640 | * We don't want to write the buffer anymore, clear the | ||
| 1641 | * bit so that we don't confuse checks in | ||
| 1642 | * __journal_file_buffer | ||
| 1643 | */ | ||
| 1644 | clear_buffer_dirty(bh); | ||
| 1647 | __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); | 1645 | __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); |
| 1648 | clear_buffer_jbddirty(bh); | ||
| 1649 | may_free = 0; | 1646 | may_free = 0; |
| 1650 | } else { | 1647 | } else { |
| 1651 | JBUFFER_TRACE(jh, "on running transaction"); | 1648 | JBUFFER_TRACE(jh, "on running transaction"); |
| @@ -1896,12 +1893,17 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, | |||
| 1896 | if (jh->b_transaction && jh->b_jlist == jlist) | 1893 | if (jh->b_transaction && jh->b_jlist == jlist) |
| 1897 | return; | 1894 | return; |
| 1898 | 1895 | ||
| 1899 | /* The following list of buffer states needs to be consistent | ||
| 1900 | * with __jbd_unexpected_dirty_buffer()'s handling of dirty | ||
| 1901 | * state. */ | ||
| 1902 | |||
| 1903 | if (jlist == BJ_Metadata || jlist == BJ_Reserved || | 1896 | if (jlist == BJ_Metadata || jlist == BJ_Reserved || |
| 1904 | jlist == BJ_Shadow || jlist == BJ_Forget) { | 1897 | jlist == BJ_Shadow || jlist == BJ_Forget) { |
| 1898 | /* | ||
| 1899 | * For metadata buffers, we track dirty bit in buffer_jbddirty | ||
| 1900 | * instead of buffer_dirty. We should not see a dirty bit set | ||
| 1901 | * here because we clear it in do_get_write_access but e.g. | ||
| 1902 | * tune2fs can modify the sb and set the dirty bit at any time | ||
| 1903 | * so we try to gracefully handle that. | ||
| 1904 | */ | ||
| 1905 | if (buffer_dirty(bh)) | ||
| 1906 | warn_dirty_buffer(bh); | ||
| 1905 | if (test_clear_buffer_dirty(bh) || | 1907 | if (test_clear_buffer_dirty(bh) || |
| 1906 | test_clear_buffer_jbddirty(bh)) | 1908 | test_clear_buffer_jbddirty(bh)) |
| 1907 | was_dirty = 1; | 1909 | was_dirty = 1; |
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index acf4cc9cd36d..dfbc9b0edc88 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h | |||
| @@ -34,7 +34,8 @@ TRACE_EVENT(ext4_free_inode, | |||
| 34 | 34 | ||
| 35 | TP_printk("dev %s ino %lu mode %d uid %u gid %u blocks %llu", | 35 | TP_printk("dev %s ino %lu mode %d uid %u gid %u blocks %llu", |
| 36 | jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->mode, | 36 | jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->mode, |
| 37 | __entry->uid, __entry->gid, __entry->blocks) | 37 | __entry->uid, __entry->gid, |
| 38 | (unsigned long long) __entry->blocks) | ||
| 38 | ); | 39 | ); |
| 39 | 40 | ||
| 40 | TRACE_EVENT(ext4_request_inode, | 41 | TRACE_EVENT(ext4_request_inode, |
| @@ -189,7 +190,7 @@ TRACE_EVENT(ext4_journalled_write_end, | |||
| 189 | __entry->copied) | 190 | __entry->copied) |
| 190 | ); | 191 | ); |
| 191 | 192 | ||
| 192 | TRACE_EVENT(ext4_da_writepage, | 193 | TRACE_EVENT(ext4_writepage, |
| 193 | TP_PROTO(struct inode *inode, struct page *page), | 194 | TP_PROTO(struct inode *inode, struct page *page), |
| 194 | 195 | ||
| 195 | TP_ARGS(inode, page), | 196 | TP_ARGS(inode, page), |
| @@ -341,49 +342,6 @@ TRACE_EVENT(ext4_da_write_end, | |||
| 341 | __entry->copied) | 342 | __entry->copied) |
| 342 | ); | 343 | ); |
| 343 | 344 | ||
| 344 | TRACE_EVENT(ext4_normal_writepage, | ||
| 345 | TP_PROTO(struct inode *inode, struct page *page), | ||
| 346 | |||
| 347 | TP_ARGS(inode, page), | ||
| 348 | |||
| 349 | TP_STRUCT__entry( | ||
| 350 | __field( dev_t, dev ) | ||
| 351 | __field( ino_t, ino ) | ||
| 352 | __field( pgoff_t, index ) | ||
| 353 | ), | ||
| 354 | |||
| 355 | TP_fast_assign( | ||
| 356 | __entry->dev = inode->i_sb->s_dev; | ||
| 357 | __entry->ino = inode->i_ino; | ||
| 358 | __entry->index = page->index; | ||
| 359 | ), | ||
| 360 | |||
| 361 | TP_printk("dev %s ino %lu page_index %lu", | ||
| 362 | jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->index) | ||
| 363 | ); | ||
| 364 | |||
| 365 | TRACE_EVENT(ext4_journalled_writepage, | ||
| 366 | TP_PROTO(struct inode *inode, struct page *page), | ||
| 367 | |||
| 368 | TP_ARGS(inode, page), | ||
| 369 | |||
| 370 | TP_STRUCT__entry( | ||
| 371 | __field( dev_t, dev ) | ||
| 372 | __field( ino_t, ino ) | ||
| 373 | __field( pgoff_t, index ) | ||
| 374 | |||
| 375 | ), | ||
| 376 | |||
| 377 | TP_fast_assign( | ||
| 378 | __entry->dev = inode->i_sb->s_dev; | ||
| 379 | __entry->ino = inode->i_ino; | ||
| 380 | __entry->index = page->index; | ||
| 381 | ), | ||
| 382 | |||
| 383 | TP_printk("dev %s ino %lu page_index %lu", | ||
| 384 | jbd2_dev_to_name(__entry->dev), __entry->ino, __entry->index) | ||
| 385 | ); | ||
| 386 | |||
| 387 | TRACE_EVENT(ext4_discard_blocks, | 345 | TRACE_EVENT(ext4_discard_blocks, |
| 388 | TP_PROTO(struct super_block *sb, unsigned long long blk, | 346 | TP_PROTO(struct super_block *sb, unsigned long long blk, |
| 389 | unsigned long long count), | 347 | unsigned long long count), |
