aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2/file.c')
-rw-r--r--fs/ocfs2/file.c200
1 files changed, 157 insertions, 43 deletions
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index aa501d3f93f1..89fc8ee1f5a5 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -59,6 +59,7 @@
59#include "xattr.h" 59#include "xattr.h"
60#include "acl.h" 60#include "acl.h"
61#include "quota.h" 61#include "quota.h"
62#include "refcounttree.h"
62 63
63#include "buffer_head_io.h" 64#include "buffer_head_io.h"
64 65
@@ -259,7 +260,7 @@ int ocfs2_update_inode_atime(struct inode *inode,
259 goto out; 260 goto out;
260 } 261 }
261 262
262 ret = ocfs2_journal_access_di(handle, inode, bh, 263 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
263 OCFS2_JOURNAL_ACCESS_WRITE); 264 OCFS2_JOURNAL_ACCESS_WRITE);
264 if (ret) { 265 if (ret) {
265 mlog_errno(ret); 266 mlog_errno(ret);
@@ -334,6 +335,39 @@ out:
334 return ret; 335 return ret;
335} 336}
336 337
338static int ocfs2_cow_file_pos(struct inode *inode,
339 struct buffer_head *fe_bh,
340 u64 offset)
341{
342 int status;
343 u32 phys, cpos = offset >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
344 unsigned int num_clusters = 0;
345 unsigned int ext_flags = 0;
346
347 /*
348 * If the new offset is aligned to the range of the cluster, there is
349 * no space for ocfs2_zero_range_for_truncate to fill, so no need to
350 * CoW either.
351 */
352 if ((offset & (OCFS2_SB(inode->i_sb)->s_clustersize - 1)) == 0)
353 return 0;
354
355 status = ocfs2_get_clusters(inode, cpos, &phys,
356 &num_clusters, &ext_flags);
357 if (status) {
358 mlog_errno(status);
359 goto out;
360 }
361
362 if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
363 goto out;
364
365 return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1);
366
367out:
368 return status;
369}
370
337static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, 371static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
338 struct inode *inode, 372 struct inode *inode,
339 struct buffer_head *fe_bh, 373 struct buffer_head *fe_bh,
@@ -346,6 +380,17 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
346 380
347 mlog_entry_void(); 381 mlog_entry_void();
348 382
383 /*
384 * We need to CoW the cluster contains the offset if it is reflinked
385 * since we will call ocfs2_zero_range_for_truncate later which will
386 * write "0" from offset to the end of the cluster.
387 */
388 status = ocfs2_cow_file_pos(inode, fe_bh, new_i_size);
389 if (status) {
390 mlog_errno(status);
391 return status;
392 }
393
349 /* TODO: This needs to actually orphan the inode in this 394 /* TODO: This needs to actually orphan the inode in this
350 * transaction. */ 395 * transaction. */
351 396
@@ -356,7 +401,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
356 goto out; 401 goto out;
357 } 402 }
358 403
359 status = ocfs2_journal_access_di(handle, inode, fe_bh, 404 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh,
360 OCFS2_JOURNAL_ACCESS_WRITE); 405 OCFS2_JOURNAL_ACCESS_WRITE);
361 if (status < 0) { 406 if (status < 0) {
362 mlog_errno(status); 407 mlog_errno(status);
@@ -486,6 +531,8 @@ bail_unlock_sem:
486 up_write(&OCFS2_I(inode)->ip_alloc_sem); 531 up_write(&OCFS2_I(inode)->ip_alloc_sem);
487 532
488bail: 533bail:
534 if (!status && OCFS2_I(inode)->ip_clusters == 0)
535 status = ocfs2_try_remove_refcount_tree(inode, di_bh);
489 536
490 mlog_exit(status); 537 mlog_exit(status);
491 return status; 538 return status;
@@ -515,11 +562,10 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
515 int ret; 562 int ret;
516 struct ocfs2_extent_tree et; 563 struct ocfs2_extent_tree et;
517 564
518 ocfs2_init_dinode_extent_tree(&et, inode, fe_bh); 565 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), fe_bh);
519 ret = ocfs2_add_clusters_in_btree(osb, inode, logical_offset, 566 ret = ocfs2_add_clusters_in_btree(handle, &et, logical_offset,
520 clusters_to_add, mark_unwritten, 567 clusters_to_add, mark_unwritten,
521 &et, handle, 568 data_ac, meta_ac, reason_ret);
522 data_ac, meta_ac, reason_ret);
523 569
524 return ret; 570 return ret;
525} 571}
@@ -564,7 +610,7 @@ restart_all:
564 (unsigned long long)OCFS2_I(inode)->ip_blkno, 610 (unsigned long long)OCFS2_I(inode)->ip_blkno,
565 (long long)i_size_read(inode), le32_to_cpu(fe->i_clusters), 611 (long long)i_size_read(inode), le32_to_cpu(fe->i_clusters),
566 clusters_to_add); 612 clusters_to_add);
567 ocfs2_init_dinode_extent_tree(&et, inode, bh); 613 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), bh);
568 status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0, 614 status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
569 &data_ac, &meta_ac); 615 &data_ac, &meta_ac);
570 if (status) { 616 if (status) {
@@ -593,7 +639,7 @@ restarted_transaction:
593 /* reserve a write to the file entry early on - that we if we 639 /* reserve a write to the file entry early on - that we if we
594 * run out of credits in the allocation path, we can still 640 * run out of credits in the allocation path, we can still
595 * update i_size. */ 641 * update i_size. */
596 status = ocfs2_journal_access_di(handle, inode, bh, 642 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
597 OCFS2_JOURNAL_ACCESS_WRITE); 643 OCFS2_JOURNAL_ACCESS_WRITE);
598 if (status < 0) { 644 if (status < 0) {
599 mlog_errno(status); 645 mlog_errno(status);
@@ -1131,7 +1177,7 @@ static int __ocfs2_write_remove_suid(struct inode *inode,
1131 goto out; 1177 goto out;
1132 } 1178 }
1133 1179
1134 ret = ocfs2_journal_access_di(handle, inode, bh, 1180 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
1135 OCFS2_JOURNAL_ACCESS_WRITE); 1181 OCFS2_JOURNAL_ACCESS_WRITE);
1136 if (ret < 0) { 1182 if (ret < 0) {
1137 mlog_errno(ret); 1183 mlog_errno(ret);
@@ -1395,7 +1441,7 @@ static int ocfs2_remove_inode_range(struct inode *inode,
1395 struct address_space *mapping = inode->i_mapping; 1441 struct address_space *mapping = inode->i_mapping;
1396 struct ocfs2_extent_tree et; 1442 struct ocfs2_extent_tree et;
1397 1443
1398 ocfs2_init_dinode_extent_tree(&et, inode, di_bh); 1444 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
1399 ocfs2_init_dealloc_ctxt(&dealloc); 1445 ocfs2_init_dealloc_ctxt(&dealloc);
1400 1446
1401 if (byte_len == 0) 1447 if (byte_len == 0)
@@ -1657,6 +1703,70 @@ static long ocfs2_fallocate(struct inode *inode, int mode, loff_t offset,
1657 OCFS2_IOC_RESVSP64, &sr, change_size); 1703 OCFS2_IOC_RESVSP64, &sr, change_size);
1658} 1704}
1659 1705
1706int ocfs2_check_range_for_refcount(struct inode *inode, loff_t pos,
1707 size_t count)
1708{
1709 int ret = 0;
1710 unsigned int extent_flags;
1711 u32 cpos, clusters, extent_len, phys_cpos;
1712 struct super_block *sb = inode->i_sb;
1713
1714 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)) ||
1715 !(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL))
1716 return 0;
1717
1718 cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits;
1719 clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos;
1720
1721 while (clusters) {
1722 ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len,
1723 &extent_flags);
1724 if (ret < 0) {
1725 mlog_errno(ret);
1726 goto out;
1727 }
1728
1729 if (phys_cpos && (extent_flags & OCFS2_EXT_REFCOUNTED)) {
1730 ret = 1;
1731 break;
1732 }
1733
1734 if (extent_len > clusters)
1735 extent_len = clusters;
1736
1737 clusters -= extent_len;
1738 cpos += extent_len;
1739 }
1740out:
1741 return ret;
1742}
1743
1744static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
1745 loff_t pos, size_t count,
1746 int *meta_level)
1747{
1748 int ret;
1749 struct buffer_head *di_bh = NULL;
1750 u32 cpos = pos >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
1751 u32 clusters =
1752 ocfs2_clusters_for_bytes(inode->i_sb, pos + count) - cpos;
1753
1754 ret = ocfs2_inode_lock(inode, &di_bh, 1);
1755 if (ret) {
1756 mlog_errno(ret);
1757 goto out;
1758 }
1759
1760 *meta_level = 1;
1761
1762 ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX);
1763 if (ret)
1764 mlog_errno(ret);
1765out:
1766 brelse(di_bh);
1767 return ret;
1768}
1769
1660static int ocfs2_prepare_inode_for_write(struct dentry *dentry, 1770static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1661 loff_t *ppos, 1771 loff_t *ppos,
1662 size_t count, 1772 size_t count,
@@ -1713,6 +1823,22 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1713 1823
1714 end = saved_pos + count; 1824 end = saved_pos + count;
1715 1825
1826 ret = ocfs2_check_range_for_refcount(inode, saved_pos, count);
1827 if (ret == 1) {
1828 ocfs2_inode_unlock(inode, meta_level);
1829 meta_level = -1;
1830
1831 ret = ocfs2_prepare_inode_for_refcount(inode,
1832 saved_pos,
1833 count,
1834 &meta_level);
1835 }
1836
1837 if (ret < 0) {
1838 mlog_errno(ret);
1839 goto out_unlock;
1840 }
1841
1716 /* 1842 /*
1717 * Skip the O_DIRECT checks if we don't need 1843 * Skip the O_DIRECT checks if we don't need
1718 * them. 1844 * them.
@@ -1759,7 +1885,8 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1759 *ppos = saved_pos; 1885 *ppos = saved_pos;
1760 1886
1761out_unlock: 1887out_unlock:
1762 ocfs2_inode_unlock(inode, meta_level); 1888 if (meta_level >= 0)
1889 ocfs2_inode_unlock(inode, meta_level);
1763 1890
1764out: 1891out:
1765 return ret; 1892 return ret;
@@ -1871,8 +1998,7 @@ relock:
1871 goto out_dio; 1998 goto out_dio;
1872 } 1999 }
1873 } else { 2000 } else {
1874 written = generic_file_aio_write_nolock(iocb, iov, nr_segs, 2001 written = __generic_file_aio_write(iocb, iov, nr_segs, ppos);
1875 *ppos);
1876 } 2002 }
1877 2003
1878out_dio: 2004out_dio:
@@ -1880,18 +2006,21 @@ out_dio:
1880 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT)); 2006 BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
1881 2007
1882 if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) { 2008 if ((file->f_flags & O_SYNC && !direct_io) || IS_SYNC(inode)) {
1883 /* 2009 ret = filemap_fdatawrite_range(file->f_mapping, pos,
1884 * The generic write paths have handled getting data 2010 pos + count - 1);
1885 * to disk, but since we don't make use of the dirty 2011 if (ret < 0)
1886 * inode list, a manual journal commit is necessary 2012 written = ret;
1887 * here. 2013
1888 */ 2014 if (!ret && (old_size != i_size_read(inode) ||
1889 if (old_size != i_size_read(inode) || 2015 old_clusters != OCFS2_I(inode)->ip_clusters)) {
1890 old_clusters != OCFS2_I(inode)->ip_clusters) {
1891 ret = jbd2_journal_force_commit(osb->journal->j_journal); 2016 ret = jbd2_journal_force_commit(osb->journal->j_journal);
1892 if (ret < 0) 2017 if (ret < 0)
1893 written = ret; 2018 written = ret;
1894 } 2019 }
2020
2021 if (!ret)
2022 ret = filemap_fdatawait_range(file->f_mapping, pos,
2023 pos + count - 1);
1895 } 2024 }
1896 2025
1897 /* 2026 /*
@@ -1991,31 +2120,16 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
1991 2120
1992 if (ret > 0) { 2121 if (ret > 0) {
1993 unsigned long nr_pages; 2122 unsigned long nr_pages;
2123 int err;
1994 2124
1995 *ppos += ret;
1996 nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 2125 nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
1997 2126
1998 /* 2127 err = generic_write_sync(out, *ppos, ret);
1999 * If file or inode is SYNC and we actually wrote some data, 2128 if (err)
2000 * sync it. 2129 ret = err;
2001 */ 2130 else
2002 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { 2131 *ppos += ret;
2003 int err;
2004
2005 mutex_lock(&inode->i_mutex);
2006 err = ocfs2_rw_lock(inode, 1);
2007 if (err < 0) {
2008 mlog_errno(err);
2009 } else {
2010 err = generic_osync_inode(inode, mapping,
2011 OSYNC_METADATA|OSYNC_DATA);
2012 ocfs2_rw_unlock(inode, 1);
2013 }
2014 mutex_unlock(&inode->i_mutex);
2015 2132
2016 if (err)
2017 ret = err;
2018 }
2019 balance_dirty_pages_ratelimited_nr(mapping, nr_pages); 2133 balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
2020 } 2134 }
2021 2135