aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/aops.c
diff options
context:
space:
mode:
authorJoel Becker <joel.becker@oracle.com>2010-07-01 18:13:31 -0400
committerJoel Becker <joel.becker@oracle.com>2010-07-08 16:25:35 -0400
commit5693486bad2bc2ac585a2c24f7e2f3964b478df9 (patch)
tree03d61d72c1b73bbf0b049bf0328f8e0c69f35a43 /fs/ocfs2/aops.c
parenta4bfb4cf11fd2211b788af59dc8a8b4394bca227 (diff)
ocfs2: Zero the tail cluster when extending past i_size.
ocfs2's allocation unit is the cluster. This can be larger than a block or even a memory page. This means that a file may have many blocks in its last extent that are beyond the block containing i_size. There also may be more unwritten extents after that. When ocfs2 grows a file, it zeros the entire cluster in order to ensure future i_size growth will see cleared blocks. Unfortunately, block_write_full_page() drops the pages past i_size. This means that ocfs2 is actually leaking garbage data into the tail end of that last cluster. This is a bug. We adjust ocfs2_write_begin_nolock() and ocfs2_extend_file() to detect when a write or truncate is past i_size. They will use ocfs2_zero_extend() to ensure the data is properly zeroed. Older versions of ocfs2_zero_extend() simply zeroed every block between i_size and the zeroing position. This presumes three things: 1) There is allocation for all of these blocks. 2) The extents are not unwritten. 3) The extents are not refcounted. (1) and (2) hold true for non-sparse filesystems, which used to be the only users of ocfs2_zero_extend(). (3) is another bug. Since we're now using ocfs2_zero_extend() for sparse filesystems as well, we teach ocfs2_zero_extend() to check every extent between i_size and the zeroing position. If the extent is unwritten, it is ignored. If it is refcounted, it is CoWed. Then it is zeroed. Signed-off-by: Joel Becker <joel.becker@oracle.com> Cc: stable@kernel.org
Diffstat (limited to 'fs/ocfs2/aops.c')
-rw-r--r--fs/ocfs2/aops.c42
1 files changed, 28 insertions, 14 deletions
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 9a5c931439bd..742893ea7390 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -196,15 +196,14 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock,
196 dump_stack(); 196 dump_stack();
197 goto bail; 197 goto bail;
198 } 198 }
199
200 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
201 mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
202 (unsigned long long)past_eof);
203
204 if (create && (iblock >= past_eof))
205 set_buffer_new(bh_result);
206 } 199 }
207 200
201 past_eof = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
202 mlog(0, "Inode %lu, past_eof = %llu\n", inode->i_ino,
203 (unsigned long long)past_eof);
204 if (create && (iblock >= past_eof))
205 set_buffer_new(bh_result);
206
208bail: 207bail:
209 if (err < 0) 208 if (err < 0)
210 err = -EIO; 209 err = -EIO;
@@ -1590,21 +1589,20 @@ out:
1590 * write path can treat it as an non-allocating write, which has no 1589 * write path can treat it as an non-allocating write, which has no
1591 * special case code for sparse/nonsparse files. 1590 * special case code for sparse/nonsparse files.
1592 */ 1591 */
1593static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos, 1592static int ocfs2_expand_nonsparse_inode(struct inode *inode,
1594 unsigned len, 1593 struct buffer_head *di_bh,
1594 loff_t pos, unsigned len,
1595 struct ocfs2_write_ctxt *wc) 1595 struct ocfs2_write_ctxt *wc)
1596{ 1596{
1597 int ret; 1597 int ret;
1598 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1599 loff_t newsize = pos + len; 1598 loff_t newsize = pos + len;
1600 1599
1601 if (ocfs2_sparse_alloc(osb)) 1600 BUG_ON(ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)));
1602 return 0;
1603 1601
1604 if (newsize <= i_size_read(inode)) 1602 if (newsize <= i_size_read(inode))
1605 return 0; 1603 return 0;
1606 1604
1607 ret = ocfs2_extend_no_holes(inode, newsize, pos); 1605 ret = ocfs2_extend_no_holes(inode, di_bh, newsize, pos);
1608 if (ret) 1606 if (ret)
1609 mlog_errno(ret); 1607 mlog_errno(ret);
1610 1608
@@ -1614,6 +1612,18 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
1614 return ret; 1612 return ret;
1615} 1613}
1616 1614
1615static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh,
1616 loff_t pos)
1617{
1618 int ret = 0;
1619
1620 BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)));
1621 if (pos > i_size_read(inode))
1622 ret = ocfs2_zero_extend(inode, di_bh, pos);
1623
1624 return ret;
1625}
1626
1617int ocfs2_write_begin_nolock(struct address_space *mapping, 1627int ocfs2_write_begin_nolock(struct address_space *mapping,
1618 loff_t pos, unsigned len, unsigned flags, 1628 loff_t pos, unsigned len, unsigned flags,
1619 struct page **pagep, void **fsdata, 1629 struct page **pagep, void **fsdata,
@@ -1649,7 +1659,11 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1649 } 1659 }
1650 } 1660 }
1651 1661
1652 ret = ocfs2_expand_nonsparse_inode(inode, pos, len, wc); 1662 if (ocfs2_sparse_alloc(osb))
1663 ret = ocfs2_zero_tail(inode, di_bh, pos);
1664 else
1665 ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos, len,
1666 wc);
1653 if (ret) { 1667 if (ret) {
1654 mlog_errno(ret); 1668 mlog_errno(ret);
1655 goto out; 1669 goto out;