aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext3/inode.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-04-07 20:59:17 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-07 20:59:17 -0400
commita7963eb7f4c4b5df84d5dd5083734278ad75bafb (patch)
tree65ac1402e20651b9fc59207480bac8399a30771e /fs/ext3/inode.c
parentb003d7706abc5d75cb58de0c9de8f1fc77e57008 (diff)
parent01d8885785a60ae8f4c37b0ed75bdc96d0fc6a44 (diff)
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs
Pull ext3 improvements, cleanups, reiserfs fix from Jan Kara: "various cleanups for ext2, ext3, udf, isofs, a documentation update for quota, and a fix of a race in reiserfs readdir implementation" * 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs: reiserfs: fix race in readdir ext2: acl: remove unneeded include of linux/capability.h ext3: explicitly remove inode from orphan list after failed direct io fs/isofs/inode.c add __init to init_inodecache() ext3: Speedup WB_SYNC_ALL pass fs/quota/Kconfig: Update filesystems ext3: Update outdated comment before ext3_ordered_writepage() ext3: Update PF_MEMALLOC handling in ext3_write_inode() ext2/3: use prandom_u32() instead of get_random_bytes() ext3: remove an unneeded check in ext3_new_blocks() ext3: remove unneeded check in ext3_ordered_writepage() fs: Mark function as static in ext3/xattr_security.c fs: Mark function as static in ext3/dir.c fs: Mark function as static in ext2/xattr_security.c ext3: Add __init macro to init_inodecache ext2: Add __init macro to init_inodecache udf: Add __init macro to init_inodecache fs: udf: parse_options: blocksize check
Diffstat (limited to 'fs/ext3/inode.c')
-rw-r--r--fs/ext3/inode.c86
1 files changed, 25 insertions, 61 deletions
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index efce2bbfb5e5..f5157d0d1b43 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1559,56 +1559,17 @@ static int buffer_unmapped(handle_t *handle, struct buffer_head *bh)
1559} 1559}
1560 1560
1561/* 1561/*
1562 * Note that we always start a transaction even if we're not journalling 1562 * Note that whenever we need to map blocks we start a transaction even if
1563 * data. This is to preserve ordering: any hole instantiation within 1563 * we're not journalling data. This is to preserve ordering: any hole
1564 * __block_write_full_page -> ext3_get_block() should be journalled 1564 * instantiation within __block_write_full_page -> ext3_get_block() should be
1565 * along with the data so we don't crash and then get metadata which 1565 * journalled along with the data so we don't crash and then get metadata which
1566 * refers to old data. 1566 * refers to old data.
1567 * 1567 *
1568 * In all journalling modes block_write_full_page() will start the I/O. 1568 * In all journalling modes block_write_full_page() will start the I/O.
1569 * 1569 *
1570 * Problem:
1571 *
1572 * ext3_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
1573 * ext3_writepage()
1574 *
1575 * Similar for:
1576 *
1577 * ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ...
1578 *
1579 * Same applies to ext3_get_block(). We will deadlock on various things like
1580 * lock_journal and i_truncate_mutex.
1581 *
1582 * Setting PF_MEMALLOC here doesn't work - too many internal memory
1583 * allocations fail.
1584 *
1585 * 16May01: If we're reentered then journal_current_handle() will be
1586 * non-zero. We simply *return*.
1587 *
1588 * 1 July 2001: @@@ FIXME:
1589 * In journalled data mode, a data buffer may be metadata against the
1590 * current transaction. But the same file is part of a shared mapping
1591 * and someone does a writepage() on it.
1592 *
1593 * We will move the buffer onto the async_data list, but *after* it has
1594 * been dirtied. So there's a small window where we have dirty data on
1595 * BJ_Metadata.
1596 *
1597 * Note that this only applies to the last partial page in the file. The
1598 * bit which block_write_full_page() uses prepare/commit for. (That's
1599 * broken code anyway: it's wrong for msync()).
1600 *
1601 * It's a rare case: affects the final partial page, for journalled data
1602 * where the file is subject to bith write() and writepage() in the same
1603 * transction. To fix it we'll need a custom block_write_full_page().
1604 * We'll probably need that anyway for journalling writepage() output.
1605 *
1606 * We don't honour synchronous mounts for writepage(). That would be 1570 * We don't honour synchronous mounts for writepage(). That would be
1607 * disastrous. Any write() or metadata operation will sync the fs for 1571 * disastrous. Any write() or metadata operation will sync the fs for
1608 * us. 1572 * us.
1609 *
1610 * AKPM2: if all the page's buffers are mapped to disk and !data=journal,
1611 * we don't need to open a transaction here.
1612 */ 1573 */
1613static int ext3_ordered_writepage(struct page *page, 1574static int ext3_ordered_writepage(struct page *page,
1614 struct writeback_control *wbc) 1575 struct writeback_control *wbc)
@@ -1673,12 +1634,9 @@ static int ext3_ordered_writepage(struct page *page,
1673 * block_write_full_page() succeeded. Otherwise they are unmapped, 1634 * block_write_full_page() succeeded. Otherwise they are unmapped,
1674 * and generally junk. 1635 * and generally junk.
1675 */ 1636 */
1676 if (ret == 0) { 1637 if (ret == 0)
1677 err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, 1638 ret = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE,
1678 NULL, journal_dirty_data_fn); 1639 NULL, journal_dirty_data_fn);
1679 if (!ret)
1680 ret = err;
1681 }
1682 walk_page_buffers(handle, page_bufs, 0, 1640 walk_page_buffers(handle, page_bufs, 0,
1683 PAGE_CACHE_SIZE, NULL, bput_one); 1641 PAGE_CACHE_SIZE, NULL, bput_one);
1684 err = ext3_journal_stop(handle); 1642 err = ext3_journal_stop(handle);
@@ -1925,6 +1883,8 @@ retry:
1925 * and pretend the write failed... */ 1883 * and pretend the write failed... */
1926 ext3_truncate_failed_direct_write(inode); 1884 ext3_truncate_failed_direct_write(inode);
1927 ret = PTR_ERR(handle); 1885 ret = PTR_ERR(handle);
1886 if (inode->i_nlink)
1887 ext3_orphan_del(NULL, inode);
1928 goto out; 1888 goto out;
1929 } 1889 }
1930 if (inode->i_nlink) 1890 if (inode->i_nlink)
@@ -3212,21 +3172,20 @@ out_brelse:
3212 * 3172 *
3213 * We are called from a few places: 3173 * We are called from a few places:
3214 * 3174 *
3215 * - Within generic_file_write() for O_SYNC files. 3175 * - Within generic_file_aio_write() -> generic_write_sync() for O_SYNC files.
3216 * Here, there will be no transaction running. We wait for any running 3176 * Here, there will be no transaction running. We wait for any running
3217 * transaction to commit. 3177 * transaction to commit.
3218 * 3178 *
3219 * - Within sys_sync(), kupdate and such. 3179 * - Within flush work (for sys_sync(), kupdate and such).
3220 * We wait on commit, if tol to. 3180 * We wait on commit, if told to.
3221 * 3181 *
3222 * - Within prune_icache() (PF_MEMALLOC == true) 3182 * - Within iput_final() -> write_inode_now()
3223 * Here we simply return. We can't afford to block kswapd on the 3183 * We wait on commit, if told to.
3224 * journal commit.
3225 * 3184 *
3226 * In all cases it is actually safe for us to return without doing anything, 3185 * In all cases it is actually safe for us to return without doing anything,
3227 * because the inode has been copied into a raw inode buffer in 3186 * because the inode has been copied into a raw inode buffer in
3228 * ext3_mark_inode_dirty(). This is a correctness thing for O_SYNC and for 3187 * ext3_mark_inode_dirty(). This is a correctness thing for WB_SYNC_ALL
3229 * knfsd. 3188 * writeback.
3230 * 3189 *
3231 * Note that we are absolutely dependent upon all inode dirtiers doing the 3190 * Note that we are absolutely dependent upon all inode dirtiers doing the
3232 * right thing: they *must* call mark_inode_dirty() after dirtying info in 3191 * right thing: they *must* call mark_inode_dirty() after dirtying info in
@@ -3238,13 +3197,13 @@ out_brelse:
3238 * stuff(); 3197 * stuff();
3239 * inode->i_size = expr; 3198 * inode->i_size = expr;
3240 * 3199 *
3241 * is in error because a kswapd-driven write_inode() could occur while 3200 * is in error because write_inode() could occur while `stuff()' is running,
3242 * `stuff()' is running, and the new i_size will be lost. Plus the inode 3201 * and the new i_size will be lost. Plus the inode will no longer be on the
3243 * will no longer be on the superblock's dirty inode list. 3202 * superblock's dirty inode list.
3244 */ 3203 */
3245int ext3_write_inode(struct inode *inode, struct writeback_control *wbc) 3204int ext3_write_inode(struct inode *inode, struct writeback_control *wbc)
3246{ 3205{
3247 if (current->flags & PF_MEMALLOC) 3206 if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
3248 return 0; 3207 return 0;
3249 3208
3250 if (ext3_journal_current_handle()) { 3209 if (ext3_journal_current_handle()) {
@@ -3253,7 +3212,12 @@ int ext3_write_inode(struct inode *inode, struct writeback_control *wbc)
3253 return -EIO; 3212 return -EIO;
3254 } 3213 }
3255 3214
3256 if (wbc->sync_mode != WB_SYNC_ALL) 3215 /*
3216 * No need to force transaction in WB_SYNC_NONE mode. Also
3217 * ext3_sync_fs() will force the commit after everything is
3218 * written.
3219 */
3220 if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync)
3257 return 0; 3221 return 0;
3258 3222
3259 return ext3_force_commit(inode->i_sb); 3223 return ext3_force_commit(inode->i_sb);