diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-07 20:59:17 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-07 20:59:17 -0400 |
commit | a7963eb7f4c4b5df84d5dd5083734278ad75bafb (patch) | |
tree | 65ac1402e20651b9fc59207480bac8399a30771e /fs/ext3/inode.c | |
parent | b003d7706abc5d75cb58de0c9de8f1fc77e57008 (diff) | |
parent | 01d8885785a60ae8f4c37b0ed75bdc96d0fc6a44 (diff) |
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs
Pull ext3 improvements, cleanups, reiserfs fix from Jan Kara:
"various cleanups for ext2, ext3, udf, isofs, a documentation update
for quota, and a fix of a race in reiserfs readdir implementation"
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs:
reiserfs: fix race in readdir
ext2: acl: remove unneeded include of linux/capability.h
ext3: explicitly remove inode from orphan list after failed direct io
fs/isofs/inode.c add __init to init_inodecache()
ext3: Speedup WB_SYNC_ALL pass
fs/quota/Kconfig: Update filesystems
ext3: Update outdated comment before ext3_ordered_writepage()
ext3: Update PF_MEMALLOC handling in ext3_write_inode()
ext2/3: use prandom_u32() instead of get_random_bytes()
ext3: remove an unneeded check in ext3_new_blocks()
ext3: remove unneeded check in ext3_ordered_writepage()
fs: Mark function as static in ext3/xattr_security.c
fs: Mark function as static in ext3/dir.c
fs: Mark function as static in ext2/xattr_security.c
ext3: Add __init macro to init_inodecache
ext2: Add __init macro to init_inodecache
udf: Add __init macro to init_inodecache
fs: udf: parse_options: blocksize check
Diffstat (limited to 'fs/ext3/inode.c')
-rw-r--r-- | fs/ext3/inode.c | 86 |
1 files changed, 25 insertions, 61 deletions
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index efce2bbfb5e5..f5157d0d1b43 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -1559,56 +1559,17 @@ static int buffer_unmapped(handle_t *handle, struct buffer_head *bh) | |||
1559 | } | 1559 | } |
1560 | 1560 | ||
1561 | /* | 1561 | /* |
1562 | * Note that we always start a transaction even if we're not journalling | 1562 | * Note that whenever we need to map blocks we start a transaction even if |
1563 | * data. This is to preserve ordering: any hole instantiation within | 1563 | * we're not journalling data. This is to preserve ordering: any hole |
1564 | * __block_write_full_page -> ext3_get_block() should be journalled | 1564 | * instantiation within __block_write_full_page -> ext3_get_block() should be |
1565 | * along with the data so we don't crash and then get metadata which | 1565 | * journalled along with the data so we don't crash and then get metadata which |
1566 | * refers to old data. | 1566 | * refers to old data. |
1567 | * | 1567 | * |
1568 | * In all journalling modes block_write_full_page() will start the I/O. | 1568 | * In all journalling modes block_write_full_page() will start the I/O. |
1569 | * | 1569 | * |
1570 | * Problem: | ||
1571 | * | ||
1572 | * ext3_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() -> | ||
1573 | * ext3_writepage() | ||
1574 | * | ||
1575 | * Similar for: | ||
1576 | * | ||
1577 | * ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ... | ||
1578 | * | ||
1579 | * Same applies to ext3_get_block(). We will deadlock on various things like | ||
1580 | * lock_journal and i_truncate_mutex. | ||
1581 | * | ||
1582 | * Setting PF_MEMALLOC here doesn't work - too many internal memory | ||
1583 | * allocations fail. | ||
1584 | * | ||
1585 | * 16May01: If we're reentered then journal_current_handle() will be | ||
1586 | * non-zero. We simply *return*. | ||
1587 | * | ||
1588 | * 1 July 2001: @@@ FIXME: | ||
1589 | * In journalled data mode, a data buffer may be metadata against the | ||
1590 | * current transaction. But the same file is part of a shared mapping | ||
1591 | * and someone does a writepage() on it. | ||
1592 | * | ||
1593 | * We will move the buffer onto the async_data list, but *after* it has | ||
1594 | * been dirtied. So there's a small window where we have dirty data on | ||
1595 | * BJ_Metadata. | ||
1596 | * | ||
1597 | * Note that this only applies to the last partial page in the file. The | ||
1598 | * bit which block_write_full_page() uses prepare/commit for. (That's | ||
1599 | * broken code anyway: it's wrong for msync()). | ||
1600 | * | ||
1601 | * It's a rare case: affects the final partial page, for journalled data | ||
1602 | * where the file is subject to bith write() and writepage() in the same | ||
1603 | * transction. To fix it we'll need a custom block_write_full_page(). | ||
1604 | * We'll probably need that anyway for journalling writepage() output. | ||
1605 | * | ||
1606 | * We don't honour synchronous mounts for writepage(). That would be | 1570 | * We don't honour synchronous mounts for writepage(). That would be |
1607 | * disastrous. Any write() or metadata operation will sync the fs for | 1571 | * disastrous. Any write() or metadata operation will sync the fs for |
1608 | * us. | 1572 | * us. |
1609 | * | ||
1610 | * AKPM2: if all the page's buffers are mapped to disk and !data=journal, | ||
1611 | * we don't need to open a transaction here. | ||
1612 | */ | 1573 | */ |
1613 | static int ext3_ordered_writepage(struct page *page, | 1574 | static int ext3_ordered_writepage(struct page *page, |
1614 | struct writeback_control *wbc) | 1575 | struct writeback_control *wbc) |
@@ -1673,12 +1634,9 @@ static int ext3_ordered_writepage(struct page *page, | |||
1673 | * block_write_full_page() succeeded. Otherwise they are unmapped, | 1634 | * block_write_full_page() succeeded. Otherwise they are unmapped, |
1674 | * and generally junk. | 1635 | * and generally junk. |
1675 | */ | 1636 | */ |
1676 | if (ret == 0) { | 1637 | if (ret == 0) |
1677 | err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, | 1638 | ret = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, |
1678 | NULL, journal_dirty_data_fn); | 1639 | NULL, journal_dirty_data_fn); |
1679 | if (!ret) | ||
1680 | ret = err; | ||
1681 | } | ||
1682 | walk_page_buffers(handle, page_bufs, 0, | 1640 | walk_page_buffers(handle, page_bufs, 0, |
1683 | PAGE_CACHE_SIZE, NULL, bput_one); | 1641 | PAGE_CACHE_SIZE, NULL, bput_one); |
1684 | err = ext3_journal_stop(handle); | 1642 | err = ext3_journal_stop(handle); |
@@ -1925,6 +1883,8 @@ retry: | |||
1925 | * and pretend the write failed... */ | 1883 | * and pretend the write failed... */ |
1926 | ext3_truncate_failed_direct_write(inode); | 1884 | ext3_truncate_failed_direct_write(inode); |
1927 | ret = PTR_ERR(handle); | 1885 | ret = PTR_ERR(handle); |
1886 | if (inode->i_nlink) | ||
1887 | ext3_orphan_del(NULL, inode); | ||
1928 | goto out; | 1888 | goto out; |
1929 | } | 1889 | } |
1930 | if (inode->i_nlink) | 1890 | if (inode->i_nlink) |
@@ -3212,21 +3172,20 @@ out_brelse: | |||
3212 | * | 3172 | * |
3213 | * We are called from a few places: | 3173 | * We are called from a few places: |
3214 | * | 3174 | * |
3215 | * - Within generic_file_write() for O_SYNC files. | 3175 | * - Within generic_file_aio_write() -> generic_write_sync() for O_SYNC files. |
3216 | * Here, there will be no transaction running. We wait for any running | 3176 | * Here, there will be no transaction running. We wait for any running |
3217 | * transaction to commit. | 3177 | * transaction to commit. |
3218 | * | 3178 | * |
3219 | * - Within sys_sync(), kupdate and such. | 3179 | * - Within flush work (for sys_sync(), kupdate and such). |
3220 | * We wait on commit, if tol to. | 3180 | * We wait on commit, if told to. |
3221 | * | 3181 | * |
3222 | * - Within prune_icache() (PF_MEMALLOC == true) | 3182 | * - Within iput_final() -> write_inode_now() |
3223 | * Here we simply return. We can't afford to block kswapd on the | 3183 | * We wait on commit, if told to. |
3224 | * journal commit. | ||
3225 | * | 3184 | * |
3226 | * In all cases it is actually safe for us to return without doing anything, | 3185 | * In all cases it is actually safe for us to return without doing anything, |
3227 | * because the inode has been copied into a raw inode buffer in | 3186 | * because the inode has been copied into a raw inode buffer in |
3228 | * ext3_mark_inode_dirty(). This is a correctness thing for O_SYNC and for | 3187 | * ext3_mark_inode_dirty(). This is a correctness thing for WB_SYNC_ALL |
3229 | * knfsd. | 3188 | * writeback. |
3230 | * | 3189 | * |
3231 | * Note that we are absolutely dependent upon all inode dirtiers doing the | 3190 | * Note that we are absolutely dependent upon all inode dirtiers doing the |
3232 | * right thing: they *must* call mark_inode_dirty() after dirtying info in | 3191 | * right thing: they *must* call mark_inode_dirty() after dirtying info in |
@@ -3238,13 +3197,13 @@ out_brelse: | |||
3238 | * stuff(); | 3197 | * stuff(); |
3239 | * inode->i_size = expr; | 3198 | * inode->i_size = expr; |
3240 | * | 3199 | * |
3241 | * is in error because a kswapd-driven write_inode() could occur while | 3200 | * is in error because write_inode() could occur while `stuff()' is running, |
3242 | * `stuff()' is running, and the new i_size will be lost. Plus the inode | 3201 | * and the new i_size will be lost. Plus the inode will no longer be on the |
3243 | * will no longer be on the superblock's dirty inode list. | 3202 | * superblock's dirty inode list. |
3244 | */ | 3203 | */ |
3245 | int ext3_write_inode(struct inode *inode, struct writeback_control *wbc) | 3204 | int ext3_write_inode(struct inode *inode, struct writeback_control *wbc) |
3246 | { | 3205 | { |
3247 | if (current->flags & PF_MEMALLOC) | 3206 | if (WARN_ON_ONCE(current->flags & PF_MEMALLOC)) |
3248 | return 0; | 3207 | return 0; |
3249 | 3208 | ||
3250 | if (ext3_journal_current_handle()) { | 3209 | if (ext3_journal_current_handle()) { |
@@ -3253,7 +3212,12 @@ int ext3_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
3253 | return -EIO; | 3212 | return -EIO; |
3254 | } | 3213 | } |
3255 | 3214 | ||
3256 | if (wbc->sync_mode != WB_SYNC_ALL) | 3215 | /* |
3216 | * No need to force transaction in WB_SYNC_NONE mode. Also | ||
3217 | * ext3_sync_fs() will force the commit after everything is | ||
3218 | * written. | ||
3219 | */ | ||
3220 | if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync) | ||
3257 | return 0; | 3221 | return 0; |
3258 | 3222 | ||
3259 | return ext3_force_commit(inode->i_sb); | 3223 | return ext3_force_commit(inode->i_sb); |