summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJia Guo <guojia12@huawei.com>2019-10-06 20:57:47 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-10-07 18:47:19 -0400
commit7a243c82ea527cd1da47381ad9cd646844f3b693 (patch)
treed7c8e8ce34d95a871bdec2ed80441c89dedfd3df
parent4ea655343ce4180fe9b2c7ec8cb8ef9884a47901 (diff)
ocfs2: clear zero in unaligned direct IO
Unused portion of a part-written fs-block-sized block is not set to zero in unaligned append direct write.This can lead to serious data inconsistencies. Ocfs2 manage disk with cluster size(for example, 1M), part-written in one cluster will change the cluster state from UN-WRITTEN to WRITTEN, VFS(function dio_zero_block) doesn't do the cleaning because bh's state is not set to NEW in function ocfs2_dio_wr_get_block when we write a WRITTEN cluster. For example, the cluster size is 1M, file size is 8k and we direct write from 14k to 15k, then 12k~14k and 15k~16k will contain dirty data. We have to deal with two cases: 1.The starting position of direct write is outside the file. 2.The starting position of direct write is located in the file. We need set bh's state to NEW in the first case. In the second case, we need mapped twice because bh's state of area out file should be set to NEW while area in file not. [akpm@linux-foundation.org: coding style fixes] Link: http://lkml.kernel.org/r/5292e287-8f1a-fd4a-1a14-661e555e0bed@huawei.com Signed-off-by: Jia Guo <guojia12@huawei.com> Reviewed-by: Yiwen Jiang <jiangyiwen@huawei.com> Cc: Mark Fasheh <mark@fasheh.com> Cc: Joel Becker <jlbec@evilplan.org> Cc: Junxiao Bi <junxiao.bi@oracle.com> Cc: Joseph Qi <joseph.qi@huawei.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--fs/ocfs2/aops.c22
1 files changed, 21 insertions, 1 deletions
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 8de1c9d644f6..50d56d9a0475 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -2146,13 +2146,30 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock,
2146 struct ocfs2_dio_write_ctxt *dwc = NULL; 2146 struct ocfs2_dio_write_ctxt *dwc = NULL;
2147 struct buffer_head *di_bh = NULL; 2147 struct buffer_head *di_bh = NULL;
2148 u64 p_blkno; 2148 u64 p_blkno;
2149 loff_t pos = iblock << inode->i_sb->s_blocksize_bits; 2149 unsigned int i_blkbits = inode->i_sb->s_blocksize_bits;
2150 loff_t pos = iblock << i_blkbits;
2151 sector_t endblk = (i_size_read(inode) - 1) >> i_blkbits;
2150 unsigned len, total_len = bh_result->b_size; 2152 unsigned len, total_len = bh_result->b_size;
2151 int ret = 0, first_get_block = 0; 2153 int ret = 0, first_get_block = 0;
2152 2154
2153 len = osb->s_clustersize - (pos & (osb->s_clustersize - 1)); 2155 len = osb->s_clustersize - (pos & (osb->s_clustersize - 1));
2154 len = min(total_len, len); 2156 len = min(total_len, len);
2155 2157
2158 /*
2159 * bh_result->b_size is count in get_more_blocks according to write
2160 * "pos" and "end", we need map twice to return different buffer state:
2161 * 1. area in file size, not set NEW;
2162 * 2. area out file size, set NEW.
2163 *
2164 * iblock endblk
2165 * |--------|---------|---------|---------
2166 * |<-------area in file------->|
2167 */
2168
2169 if ((iblock <= endblk) &&
2170 ((iblock + ((len - 1) >> i_blkbits)) > endblk))
2171 len = (endblk - iblock + 1) << i_blkbits;
2172
2156 mlog(0, "get block of %lu at %llu:%u req %u\n", 2173 mlog(0, "get block of %lu at %llu:%u req %u\n",
2157 inode->i_ino, pos, len, total_len); 2174 inode->i_ino, pos, len, total_len);
2158 2175
@@ -2236,6 +2253,9 @@ static int ocfs2_dio_wr_get_block(struct inode *inode, sector_t iblock,
2236 if (desc->c_needs_zero) 2253 if (desc->c_needs_zero)
2237 set_buffer_new(bh_result); 2254 set_buffer_new(bh_result);
2238 2255
2256 if (iblock > endblk)
2257 set_buffer_new(bh_result);
2258
2239 /* May sleep in end_io. It should not happen in a irq context. So defer 2259 /* May sleep in end_io. It should not happen in a irq context. So defer
2240 * it to dio work queue. */ 2260 * it to dio work queue. */
2241 set_buffer_defer_completion(bh_result); 2261 set_buffer_defer_completion(bh_result);