aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2008-07-11 19:27:31 -0400
committerTheodore Ts'o <tytso@mit.edu>2008-07-11 19:27:31 -0400
commit678aaf481496b01473b778685eca231d6784098b (patch)
tree298fa039e4910a0ead3cdfb814af167f378391bc /fs
parentc851ed540173736e60d48b53b91a16ea5c903896 (diff)
ext4: Use new framework for data=ordered mode in JBD2
This patch makes ext4 use inode-based implementation of data=ordered mode in JBD2. It allows us to unify some data=ordered and data=writeback paths (especially writepage since we don't have to start a transaction anymore) and remove some buffer walking. Updated fix from Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> to fix file system hang due to corrupt jinode values. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Signed-off-by: Mingming Cao <cmm@us.ibm.com> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/ext4_i.h1
-rw-r--r--fs/ext4/ext4_jbd2.h7
-rw-r--r--fs/ext4/inode.c158
-rw-r--r--fs/ext4/super.c5
4 files changed, 59 insertions, 112 deletions
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h
index abf2744164e0..c2903ef72159 100644
--- a/fs/ext4/ext4_i.h
+++ b/fs/ext4/ext4_i.h
@@ -150,6 +150,7 @@ struct ext4_inode_info {
150 */ 150 */
151 struct rw_semaphore i_data_sem; 151 struct rw_semaphore i_data_sem;
152 struct inode vfs_inode; 152 struct inode vfs_inode;
153 struct jbd2_inode jinode;
153 154
154 unsigned long i_ext_generation; 155 unsigned long i_ext_generation;
155 struct ext4_ext_cache i_cached_extent; 156 struct ext4_ext_cache i_cached_extent;
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index d0aa9ee20f88..eb8bc3afe6e9 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -154,8 +154,6 @@ int __ext4_journal_dirty_metadata(const char *where,
154#define ext4_journal_forget(handle, bh) \ 154#define ext4_journal_forget(handle, bh) \
155 __ext4_journal_forget(__func__, (handle), (bh)) 155 __ext4_journal_forget(__func__, (handle), (bh))
156 156
157int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh);
158
159handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks); 157handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
160int __ext4_journal_stop(const char *where, handle_t *handle); 158int __ext4_journal_stop(const char *where, handle_t *handle);
161 159
@@ -192,6 +190,11 @@ static inline int ext4_journal_force_commit(journal_t *journal)
192 return jbd2_journal_force_commit(journal); 190 return jbd2_journal_force_commit(journal);
193} 191}
194 192
193static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
194{
195 return jbd2_journal_file_inode(handle, &EXT4_I(inode)->jinode);
196}
197
195/* super.c */ 198/* super.c */
196int ext4_force_commit(struct super_block *sb); 199int ext4_force_commit(struct super_block *sb);
197 200
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 320acb6c35bf..7b9569179fdf 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -39,6 +39,13 @@
39#include "xattr.h" 39#include "xattr.h"
40#include "acl.h" 40#include "acl.h"
41 41
42static inline int ext4_begin_ordered_truncate(struct inode *inode,
43 loff_t new_size)
44{
45 return jbd2_journal_begin_ordered_truncate(&EXT4_I(inode)->jinode,
46 new_size);
47}
48
42/* 49/*
43 * Test whether an inode is a fast symlink. 50 * Test whether an inode is a fast symlink.
44 */ 51 */
@@ -181,6 +188,8 @@ void ext4_delete_inode (struct inode * inode)
181{ 188{
182 handle_t *handle; 189 handle_t *handle;
183 190
191 if (ext4_should_order_data(inode))
192 ext4_begin_ordered_truncate(inode, 0);
184 truncate_inode_pages(&inode->i_data, 0); 193 truncate_inode_pages(&inode->i_data, 0);
185 194
186 if (is_bad_inode(inode)) 195 if (is_bad_inode(inode))
@@ -1273,15 +1282,6 @@ out:
1273 return ret; 1282 return ret;
1274} 1283}
1275 1284
1276int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
1277{
1278 int err = jbd2_journal_dirty_data(handle, bh);
1279 if (err)
1280 ext4_journal_abort_handle(__func__, __func__,
1281 bh, handle, err);
1282 return err;
1283}
1284
1285/* For write_end() in data=journal mode */ 1285/* For write_end() in data=journal mode */
1286static int write_end_fn(handle_t *handle, struct buffer_head *bh) 1286static int write_end_fn(handle_t *handle, struct buffer_head *bh)
1287{ 1287{
@@ -1311,8 +1311,7 @@ static int ext4_ordered_write_end(struct file *file,
1311 from = pos & (PAGE_CACHE_SIZE - 1); 1311 from = pos & (PAGE_CACHE_SIZE - 1);
1312 to = from + len; 1312 to = from + len;
1313 1313
1314 ret = walk_page_buffers(handle, page_buffers(page), 1314 ret = ext4_jbd2_file_inode(handle, inode);
1315 from, to, NULL, ext4_journal_dirty_data);
1316 1315
1317 if (ret == 0) { 1316 if (ret == 0) {
1318 /* 1317 /*
@@ -1472,25 +1471,22 @@ static int bput_one(handle_t *handle, struct buffer_head *bh)
1472 return 0; 1471 return 0;
1473} 1472}
1474 1473
1475static int jbd2_journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
1476{
1477 if (buffer_mapped(bh))
1478 return ext4_journal_dirty_data(handle, bh);
1479 return 0;
1480}
1481
1482static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh) 1474static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
1483{ 1475{
1484 return !buffer_mapped(bh) || buffer_delay(bh); 1476 return !buffer_mapped(bh) || buffer_delay(bh);
1485} 1477}
1486 1478
1487/* 1479/*
1488 * Note that we don't need to start a transaction unless we're journaling 1480 * Note that we don't need to start a transaction unless we're journaling data
1489 * data because we should have holes filled from ext4_page_mkwrite(). If 1481 * because we should have holes filled from ext4_page_mkwrite(). We even don't
1490 * we are journaling data, we cannot start transaction directly because 1482 * need to file the inode to the transaction's list in ordered mode because if
1491 * transaction start ranks above page lock so we have to do some magic... 1483 * we are writing back data added by write(), the inode is already there and if
1484 * we are writing back data modified via mmap(), noone guarantees in which
1485 * transaction the data will hit the disk. In case we are journaling data, we
1486 * cannot start transaction directly because transaction start ranks above page
1487 * lock so we have to do some magic.
1492 * 1488 *
1493 * In all journalling modes block_write_full_page() will start the I/O. 1489 * In all journaling modes block_write_full_page() will start the I/O.
1494 * 1490 *
1495 * Problem: 1491 * Problem:
1496 * 1492 *
@@ -1533,86 +1529,7 @@ static int ext4_bh_unmapped_or_delay(handle_t *handle, struct buffer_head *bh)
1533 * us. 1529 * us.
1534 * 1530 *
1535 */ 1531 */
1536static int __ext4_ordered_writepage(struct page *page, 1532static int __ext4_normal_writepage(struct page *page,
1537 struct writeback_control *wbc)
1538{
1539 struct inode *inode = page->mapping->host;
1540 struct buffer_head *page_bufs;
1541 handle_t *handle = NULL;
1542 int ret = 0;
1543 int err;
1544
1545 if (!page_has_buffers(page)) {
1546 create_empty_buffers(page, inode->i_sb->s_blocksize,
1547 (1 << BH_Dirty)|(1 << BH_Uptodate));
1548 }
1549 page_bufs = page_buffers(page);
1550 walk_page_buffers(handle, page_bufs, 0,
1551 PAGE_CACHE_SIZE, NULL, bget_one);
1552
1553 ret = block_write_full_page(page, ext4_get_block, wbc);
1554
1555 /*
1556 * The page can become unlocked at any point now, and
1557 * truncate can then come in and change things. So we
1558 * can't touch *page from now on. But *page_bufs is
1559 * safe due to elevated refcount.
1560 */
1561
1562 /*
1563 * And attach them to the current transaction. But only if
1564 * block_write_full_page() succeeded. Otherwise they are unmapped,
1565 * and generally junk.
1566 */
1567 if (ret == 0) {
1568 handle = ext4_journal_start(inode,
1569 ext4_writepage_trans_blocks(inode));
1570 if (IS_ERR(handle)) {
1571 ret = PTR_ERR(handle);
1572 goto out_put;
1573 }
1574
1575 ret = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE,
1576 NULL, jbd2_journal_dirty_data_fn);
1577 err = ext4_journal_stop(handle);
1578 if (!ret)
1579 ret = err;
1580 }
1581out_put:
1582 walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, NULL,
1583 bput_one);
1584 return ret;
1585}
1586
1587static int ext4_ordered_writepage(struct page *page,
1588 struct writeback_control *wbc)
1589{
1590 struct inode *inode = page->mapping->host;
1591 loff_t size = i_size_read(inode);
1592 loff_t len;
1593
1594 J_ASSERT(PageLocked(page));
1595 J_ASSERT(page_has_buffers(page));
1596 if (page->index == size >> PAGE_CACHE_SHIFT)
1597 len = size & ~PAGE_CACHE_MASK;
1598 else
1599 len = PAGE_CACHE_SIZE;
1600 BUG_ON(walk_page_buffers(NULL, page_buffers(page), 0, len, NULL,
1601 ext4_bh_unmapped_or_delay));
1602
1603 /*
1604 * We give up here if we're reentered, because it might be for a
1605 * different filesystem.
1606 */
1607 if (!ext4_journal_current_handle())
1608 return __ext4_ordered_writepage(page, wbc);
1609
1610 redirty_page_for_writepage(wbc, page);
1611 unlock_page(page);
1612 return 0;
1613}
1614
1615static int __ext4_writeback_writepage(struct page *page,
1616 struct writeback_control *wbc) 1533 struct writeback_control *wbc)
1617{ 1534{
1618 struct inode *inode = page->mapping->host; 1535 struct inode *inode = page->mapping->host;
@@ -1624,7 +1541,7 @@ static int __ext4_writeback_writepage(struct page *page,
1624} 1541}
1625 1542
1626 1543
1627static int ext4_writeback_writepage(struct page *page, 1544static int ext4_normal_writepage(struct page *page,
1628 struct writeback_control *wbc) 1545 struct writeback_control *wbc)
1629{ 1546{
1630 struct inode *inode = page->mapping->host; 1547 struct inode *inode = page->mapping->host;
@@ -1641,7 +1558,7 @@ static int ext4_writeback_writepage(struct page *page,
1641 ext4_bh_unmapped_or_delay)); 1558 ext4_bh_unmapped_or_delay));
1642 1559
1643 if (!ext4_journal_current_handle()) 1560 if (!ext4_journal_current_handle())
1644 return __ext4_writeback_writepage(page, wbc); 1561 return __ext4_normal_writepage(page, wbc);
1645 1562
1646 redirty_page_for_writepage(wbc, page); 1563 redirty_page_for_writepage(wbc, page);
1647 unlock_page(page); 1564 unlock_page(page);
@@ -1877,7 +1794,7 @@ static int ext4_journalled_set_page_dirty(struct page *page)
1877static const struct address_space_operations ext4_ordered_aops = { 1794static const struct address_space_operations ext4_ordered_aops = {
1878 .readpage = ext4_readpage, 1795 .readpage = ext4_readpage,
1879 .readpages = ext4_readpages, 1796 .readpages = ext4_readpages,
1880 .writepage = ext4_ordered_writepage, 1797 .writepage = ext4_normal_writepage,
1881 .sync_page = block_sync_page, 1798 .sync_page = block_sync_page,
1882 .write_begin = ext4_write_begin, 1799 .write_begin = ext4_write_begin,
1883 .write_end = ext4_ordered_write_end, 1800 .write_end = ext4_ordered_write_end,
@@ -1891,7 +1808,7 @@ static const struct address_space_operations ext4_ordered_aops = {
1891static const struct address_space_operations ext4_writeback_aops = { 1808static const struct address_space_operations ext4_writeback_aops = {
1892 .readpage = ext4_readpage, 1809 .readpage = ext4_readpage,
1893 .readpages = ext4_readpages, 1810 .readpages = ext4_readpages,
1894 .writepage = ext4_writeback_writepage, 1811 .writepage = ext4_normal_writepage,
1895 .sync_page = block_sync_page, 1812 .sync_page = block_sync_page,
1896 .write_begin = ext4_write_begin, 1813 .write_begin = ext4_write_begin,
1897 .write_end = ext4_writeback_write_end, 1814 .write_end = ext4_writeback_write_end,
@@ -2019,7 +1936,7 @@ int ext4_block_truncate_page(handle_t *handle,
2019 err = ext4_journal_dirty_metadata(handle, bh); 1936 err = ext4_journal_dirty_metadata(handle, bh);
2020 } else { 1937 } else {
2021 if (ext4_should_order_data(inode)) 1938 if (ext4_should_order_data(inode))
2022 err = ext4_journal_dirty_data(handle, bh); 1939 err = ext4_jbd2_file_inode(handle, inode);
2023 mark_buffer_dirty(bh); 1940 mark_buffer_dirty(bh);
2024 } 1941 }
2025 1942
@@ -3171,7 +3088,14 @@ int ext4_write_inode(struct inode *inode, int wait)
3171 * be freed, so we have a strong guarantee that no future commit will 3088 * be freed, so we have a strong guarantee that no future commit will
3172 * leave these blocks visible to the user.) 3089 * leave these blocks visible to the user.)
3173 * 3090 *
3174 * Called with inode->sem down. 3091 * Another thing we have to assure is that if we are in ordered mode
3092 * and inode is still attached to the committing transaction, we must
3093 * we start writeout of all the dirty pages which are being truncated.
3094 * This way we are sure that all the data written in the previous
3095 * transaction are already on disk (truncate waits for pages under
3096 * writeback).
3097 *
3098 * Called with inode->i_mutex down.
3175 */ 3099 */
3176int ext4_setattr(struct dentry *dentry, struct iattr *attr) 3100int ext4_setattr(struct dentry *dentry, struct iattr *attr)
3177{ 3101{
@@ -3237,6 +3161,22 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
3237 if (!error) 3161 if (!error)
3238 error = rc; 3162 error = rc;
3239 ext4_journal_stop(handle); 3163 ext4_journal_stop(handle);
3164
3165 if (ext4_should_order_data(inode)) {
3166 error = ext4_begin_ordered_truncate(inode,
3167 attr->ia_size);
3168 if (error) {
3169 /* Do as much error cleanup as possible */
3170 handle = ext4_journal_start(inode, 3);
3171 if (IS_ERR(handle)) {
3172 ext4_orphan_del(NULL, inode);
3173 goto err_out;
3174 }
3175 ext4_orphan_del(handle, inode);
3176 ext4_journal_stop(handle);
3177 goto err_out;
3178 }
3179 }
3240 } 3180 }
3241 3181
3242 rc = inode_setattr(inode, attr); 3182 rc = inode_setattr(inode, attr);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1b330cd71ca8..629d0fa27e3a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -573,6 +573,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
573 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); 573 memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
574 INIT_LIST_HEAD(&ei->i_prealloc_list); 574 INIT_LIST_HEAD(&ei->i_prealloc_list);
575 spin_lock_init(&ei->i_prealloc_lock); 575 spin_lock_init(&ei->i_prealloc_lock);
576 jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
576 return &ei->vfs_inode; 577 return &ei->vfs_inode;
577} 578}
578 579
@@ -637,6 +638,8 @@ static void ext4_clear_inode(struct inode *inode)
637 EXT4_I(inode)->i_block_alloc_info = NULL; 638 EXT4_I(inode)->i_block_alloc_info = NULL;
638 if (unlikely(rsv)) 639 if (unlikely(rsv))
639 kfree(rsv); 640 kfree(rsv);
641 jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
642 &EXT4_I(inode)->jinode);
640} 643}
641 644
642static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb) 645static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb)
@@ -3378,7 +3381,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
3378 err = ext4_journal_dirty_metadata(handle, bh); 3381 err = ext4_journal_dirty_metadata(handle, bh);
3379 else { 3382 else {
3380 /* Always do at least ordered writes for quotas */ 3383 /* Always do at least ordered writes for quotas */
3381 err = ext4_journal_dirty_data(handle, bh); 3384 err = ext4_jbd2_file_inode(handle, inode);
3382 mark_buffer_dirty(bh); 3385 mark_buffer_dirty(bh);
3383 } 3386 }
3384 brelse(bh); 3387 brelse(bh);