aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/aops.c
diff options
context:
space:
mode:
authorRyan Ding <ryan.ding@oracle.com>2016-03-25 17:20:55 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-03-25 19:37:42 -0400
commitb46637d59f1160dee5d8e03498e667ab36c2be04 (patch)
treee01bf53fa84428fa3b86e24245e3640be5dce8ee /fs/ocfs2/aops.c
parentc1ad1e3ca3064b1f0ab5a5185b4732ab9ad8fa24 (diff)
ocfs2: use c_new to indicate newly allocated extents
To support direct io in ocfs2_write_begin_nolock & ocfs2_write_end_nolock. There is a problem in ocfs2's direct io implement: if system crashed after extents allocated, and before data return, we will get a extent with dirty data on disk. This problem violate the journal=order semantics, which means meta changes take effect after data written to disk. To resolve this issue, direct write can use the UNWRITTEN flag to describe a extent during direct data writeback. The direct write procedure should act in the following order: phase 1: alloc extent with UNWRITTEN flag phase 2: submit direct data to disk, add zero page to page cache phase 3: clear UNWRITTEN flag when data has been written to disk This patch is to change the 'c_unwritten' member of ocfs2_write_cluster_desc to 'c_clear_unwritten'. Means whether to clear the unwritten flag. It do not care if a extent is allocated or not. And use 'c_new' to specify a newly allocated extent. So the direct io procedure can use c_clear_unwritten to control the UNWRITTEN bit on extent. Signed-off-by: Ryan Ding <ryan.ding@oracle.com> Reviewed-by: Junxiao Bi <junxiao.bi@oracle.com> Cc: Joseph Qi <joseph.qi@huawei.com> Cc: Mark Fasheh <mfasheh@suse.de> Cc: Joel Becker <jlbec@evilplan.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'fs/ocfs2/aops.c')
-rw-r--r--fs/ocfs2/aops.c22
1 files changed, 12 insertions, 10 deletions
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 69d7392f56d8..516ea896f13c 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1212,7 +1212,7 @@ struct ocfs2_write_cluster_desc {
1212 * filled. 1212 * filled.
1213 */ 1213 */
1214 unsigned c_new; 1214 unsigned c_new;
1215 unsigned c_unwritten; 1215 unsigned c_clear_unwritten;
1216 unsigned c_needs_zero; 1216 unsigned c_needs_zero;
1217}; 1217};
1218 1218
@@ -1588,19 +1588,19 @@ out:
1588 * Prepare a single cluster for write one cluster into the file. 1588 * Prepare a single cluster for write one cluster into the file.
1589 */ 1589 */
1590static int ocfs2_write_cluster(struct address_space *mapping, 1590static int ocfs2_write_cluster(struct address_space *mapping,
1591 u32 phys, unsigned int unwritten, 1591 u32 phys, unsigned int new,
1592 unsigned int clear_unwritten,
1592 unsigned int should_zero, 1593 unsigned int should_zero,
1593 struct ocfs2_alloc_context *data_ac, 1594 struct ocfs2_alloc_context *data_ac,
1594 struct ocfs2_alloc_context *meta_ac, 1595 struct ocfs2_alloc_context *meta_ac,
1595 struct ocfs2_write_ctxt *wc, u32 cpos, 1596 struct ocfs2_write_ctxt *wc, u32 cpos,
1596 loff_t user_pos, unsigned user_len) 1597 loff_t user_pos, unsigned user_len)
1597{ 1598{
1598 int ret, i, new; 1599 int ret, i;
1599 u64 v_blkno, p_blkno; 1600 u64 v_blkno, p_blkno;
1600 struct inode *inode = mapping->host; 1601 struct inode *inode = mapping->host;
1601 struct ocfs2_extent_tree et; 1602 struct ocfs2_extent_tree et;
1602 1603
1603 new = phys == 0 ? 1 : 0;
1604 if (new) { 1604 if (new) {
1605 u32 tmp_pos; 1605 u32 tmp_pos;
1606 1606
@@ -1610,9 +1610,9 @@ static int ocfs2_write_cluster(struct address_space *mapping,
1610 */ 1610 */
1611 tmp_pos = cpos; 1611 tmp_pos = cpos;
1612 ret = ocfs2_add_inode_data(OCFS2_SB(inode->i_sb), inode, 1612 ret = ocfs2_add_inode_data(OCFS2_SB(inode->i_sb), inode,
1613 &tmp_pos, 1, 0, wc->w_di_bh, 1613 &tmp_pos, 1, !clear_unwritten,
1614 wc->w_handle, data_ac, 1614 wc->w_di_bh, wc->w_handle,
1615 meta_ac, NULL); 1615 data_ac, meta_ac, NULL);
1616 /* 1616 /*
1617 * This shouldn't happen because we must have already 1617 * This shouldn't happen because we must have already
1618 * calculated the correct meta data allocation required. The 1618 * calculated the correct meta data allocation required. The
@@ -1629,7 +1629,7 @@ static int ocfs2_write_cluster(struct address_space *mapping,
1629 mlog_errno(ret); 1629 mlog_errno(ret);
1630 goto out; 1630 goto out;
1631 } 1631 }
1632 } else if (unwritten) { 1632 } else if (clear_unwritten) {
1633 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), 1633 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode),
1634 wc->w_di_bh); 1634 wc->w_di_bh);
1635 ret = ocfs2_mark_extent_written(inode, &et, 1635 ret = ocfs2_mark_extent_written(inode, &et,
@@ -1712,7 +1712,8 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping,
1712 local_len = osb->s_clustersize - cluster_off; 1712 local_len = osb->s_clustersize - cluster_off;
1713 1713
1714 ret = ocfs2_write_cluster(mapping, desc->c_phys, 1714 ret = ocfs2_write_cluster(mapping, desc->c_phys,
1715 desc->c_unwritten, 1715 desc->c_new,
1716 desc->c_clear_unwritten,
1716 desc->c_needs_zero, 1717 desc->c_needs_zero,
1717 data_ac, meta_ac, 1718 data_ac, meta_ac,
1718 wc, desc->c_cpos, pos, local_len); 1719 wc, desc->c_cpos, pos, local_len);
@@ -1857,11 +1858,12 @@ static int ocfs2_populate_write_desc(struct inode *inode,
1857 if (phys == 0) { 1858 if (phys == 0) {
1858 desc->c_new = 1; 1859 desc->c_new = 1;
1859 desc->c_needs_zero = 1; 1860 desc->c_needs_zero = 1;
1861 desc->c_clear_unwritten = 1;
1860 *clusters_to_alloc = *clusters_to_alloc + 1; 1862 *clusters_to_alloc = *clusters_to_alloc + 1;
1861 } 1863 }
1862 1864
1863 if (ext_flags & OCFS2_EXT_UNWRITTEN) { 1865 if (ext_flags & OCFS2_EXT_UNWRITTEN) {
1864 desc->c_unwritten = 1; 1866 desc->c_clear_unwritten = 1;
1865 desc->c_needs_zero = 1; 1867 desc->c_needs_zero = 1;
1866 } 1868 }
1867 1869