aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorMark Fasheh <mark.fasheh@oracle.com>2007-09-07 17:46:51 -0400
committerMark Fasheh <mark.fasheh@oracle.com>2007-10-12 14:54:40 -0400
commit1afc32b952335f665327a1a9001ba1b44bb76fd9 (patch)
treec914afd0ef5d32b426c3cf65820de7599e570656 /fs
parent6798d35a31c413bbb3f83bbaa844bd2598168ccc (diff)
ocfs2: Write support for inline data
This fixes up write, truncate, mmap, and RESVSP/UNRESVP to understand inline inode data. For the most part, the changes to the core write code can be relied on to do the heavy lifting. Any code calling ocfs2_write_begin (including shared writeable mmap) can count on it doing the right thing with respect to growing inline data to an extent tree. Size reducing truncates, including UNRESVP can simply zero that portion of the inode block being removed. Size increasing truncatesm, including RESVP have to be a little bit smarter and grow the inode to an extent tree if necessary. Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com> Reviewed-by: Joel Becker <joel.becker@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/ocfs2/alloc.c245
-rw-r--r--fs/ocfs2/alloc.h6
-rw-r--r--fs/ocfs2/aops.c173
-rw-r--r--fs/ocfs2/aops.h4
-rw-r--r--fs/ocfs2/file.c99
-rw-r--r--fs/ocfs2/inode.c4
-rw-r--r--fs/ocfs2/journal.h3
7 files changed, 526 insertions, 8 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index c81bfdfb9929..72cefe25382b 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -3726,6 +3726,8 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
3726 struct ocfs2_insert_type insert = {0, }; 3726 struct ocfs2_insert_type insert = {0, };
3727 struct ocfs2_extent_rec rec; 3727 struct ocfs2_extent_rec rec;
3728 3728
3729 BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
3730
3729 mlog(0, "add %u clusters at position %u to inode %llu\n", 3731 mlog(0, "add %u clusters at position %u to inode %llu\n",
3730 new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno); 3732 new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno);
3731 3733
@@ -5826,6 +5828,174 @@ out:
5826 return ret; 5828 return ret;
5827} 5829}
5828 5830
5831static void ocfs2_zero_dinode_id2(struct inode *inode, struct ocfs2_dinode *di)
5832{
5833 unsigned int blocksize = 1 << inode->i_sb->s_blocksize_bits;
5834
5835 memset(&di->id2, 0, blocksize - offsetof(struct ocfs2_dinode, id2));
5836}
5837
5838void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di)
5839{
5840 struct ocfs2_inode_info *oi = OCFS2_I(inode);
5841 struct ocfs2_inline_data *idata = &di->id2.i_data;
5842
5843 spin_lock(&oi->ip_lock);
5844 oi->ip_dyn_features |= OCFS2_INLINE_DATA_FL;
5845 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
5846 spin_unlock(&oi->ip_lock);
5847
5848 /*
5849 * We clear the entire i_data structure here so that all
5850 * fields can be properly initialized.
5851 */
5852 ocfs2_zero_dinode_id2(inode, di);
5853
5854 idata->id_count = cpu_to_le16(ocfs2_max_inline_data(inode->i_sb));
5855}
5856
5857int ocfs2_convert_inline_data_to_extents(struct inode *inode,
5858 struct buffer_head *di_bh)
5859{
5860 int ret, i, has_data, num_pages = 0;
5861 handle_t *handle;
5862 u64 uninitialized_var(block);
5863 struct ocfs2_inode_info *oi = OCFS2_I(inode);
5864 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5865 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
5866 struct ocfs2_extent_list *el = &di->id2.i_list;
5867 struct ocfs2_alloc_context *data_ac = NULL;
5868 struct page **pages = NULL;
5869 loff_t end = osb->s_clustersize;
5870
5871 has_data = i_size_read(inode) ? 1 : 0;
5872
5873 if (has_data) {
5874 pages = kcalloc(ocfs2_pages_per_cluster(osb->sb),
5875 sizeof(struct page *), GFP_NOFS);
5876 if (pages == NULL) {
5877 ret = -ENOMEM;
5878 mlog_errno(ret);
5879 goto out;
5880 }
5881
5882 ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
5883 if (ret) {
5884 mlog_errno(ret);
5885 goto out;
5886 }
5887 }
5888
5889 handle = ocfs2_start_trans(osb, OCFS2_INLINE_TO_EXTENTS_CREDITS);
5890 if (IS_ERR(handle)) {
5891 ret = PTR_ERR(handle);
5892 mlog_errno(ret);
5893 goto out_unlock;
5894 }
5895
5896 ret = ocfs2_journal_access(handle, inode, di_bh,
5897 OCFS2_JOURNAL_ACCESS_WRITE);
5898 if (ret) {
5899 mlog_errno(ret);
5900 goto out_commit;
5901 }
5902
5903 if (has_data) {
5904 u32 bit_off, num;
5905 unsigned int page_end;
5906 u64 phys;
5907
5908 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off,
5909 &num);
5910 if (ret) {
5911 mlog_errno(ret);
5912 goto out_commit;
5913 }
5914
5915 /*
5916 * Save two copies, one for insert, and one that can
5917 * be changed by ocfs2_map_and_dirty_page() below.
5918 */
5919 block = phys = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
5920
5921 /*
5922 * Non sparse file systems zero on extend, so no need
5923 * to do that now.
5924 */
5925 if (!ocfs2_sparse_alloc(osb) &&
5926 PAGE_CACHE_SIZE < osb->s_clustersize)
5927 end = PAGE_CACHE_SIZE;
5928
5929 ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages);
5930 if (ret) {
5931 mlog_errno(ret);
5932 goto out_commit;
5933 }
5934
5935 /*
5936 * This should populate the 1st page for us and mark
5937 * it up to date.
5938 */
5939 ret = ocfs2_read_inline_data(inode, pages[0], di_bh);
5940 if (ret) {
5941 mlog_errno(ret);
5942 goto out_commit;
5943 }
5944
5945 page_end = PAGE_CACHE_SIZE;
5946 if (PAGE_CACHE_SIZE > osb->s_clustersize)
5947 page_end = osb->s_clustersize;
5948
5949 for (i = 0; i < num_pages; i++)
5950 ocfs2_map_and_dirty_page(inode, handle, 0, page_end,
5951 pages[i], i > 0, &phys);
5952 }
5953
5954 spin_lock(&oi->ip_lock);
5955 oi->ip_dyn_features &= ~OCFS2_INLINE_DATA_FL;
5956 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
5957 spin_unlock(&oi->ip_lock);
5958
5959 ocfs2_zero_dinode_id2(inode, di);
5960
5961 el->l_tree_depth = 0;
5962 el->l_next_free_rec = 0;
5963 el->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(inode->i_sb));
5964
5965 ocfs2_journal_dirty(handle, di_bh);
5966
5967 if (has_data) {
5968 /*
5969 * An error at this point should be extremely rare. If
5970 * this proves to be false, we could always re-build
5971 * the in-inode data from our pages.
5972 */
5973 ret = ocfs2_insert_extent(osb, handle, inode, di_bh,
5974 0, block, 1, 0, NULL);
5975 if (ret) {
5976 mlog_errno(ret);
5977 goto out_commit;
5978 }
5979
5980 inode->i_blocks = ocfs2_inode_sector_count(inode);
5981 }
5982
5983out_commit:
5984 ocfs2_commit_trans(osb, handle);
5985
5986out_unlock:
5987 if (data_ac)
5988 ocfs2_free_alloc_context(data_ac);
5989
5990out:
5991 if (pages) {
5992 ocfs2_unlock_and_free_pages(pages, num_pages);
5993 kfree(pages);
5994 }
5995
5996 return ret;
5997}
5998
5829/* 5999/*
5830 * It is expected, that by the time you call this function, 6000 * It is expected, that by the time you call this function,
5831 * inode->i_size and fe->i_size have been adjusted. 6001 * inode->i_size and fe->i_size have been adjusted.
@@ -6051,6 +6221,81 @@ bail:
6051 return status; 6221 return status;
6052} 6222}
6053 6223
6224/*
6225 * 'start' is inclusive, 'end' is not.
6226 */
6227int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
6228 unsigned int start, unsigned int end, int trunc)
6229{
6230 int ret;
6231 unsigned int numbytes;
6232 handle_t *handle;
6233 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6234 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
6235 struct ocfs2_inline_data *idata = &di->id2.i_data;
6236
6237 if (end > i_size_read(inode))
6238 end = i_size_read(inode);
6239
6240 BUG_ON(start >= end);
6241
6242 if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
6243 !(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL) ||
6244 !ocfs2_supports_inline_data(osb)) {
6245 ocfs2_error(inode->i_sb,
6246 "Inline data flags for inode %llu don't agree! "
6247 "Disk: 0x%x, Memory: 0x%x, Superblock: 0x%x\n",
6248 (unsigned long long)OCFS2_I(inode)->ip_blkno,
6249 le16_to_cpu(di->i_dyn_features),
6250 OCFS2_I(inode)->ip_dyn_features,
6251 osb->s_feature_incompat);
6252 ret = -EROFS;
6253 goto out;
6254 }
6255
6256 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
6257 if (IS_ERR(handle)) {
6258 ret = PTR_ERR(handle);
6259 mlog_errno(ret);
6260 goto out;
6261 }
6262
6263 ret = ocfs2_journal_access(handle, inode, di_bh,
6264 OCFS2_JOURNAL_ACCESS_WRITE);
6265 if (ret) {
6266 mlog_errno(ret);
6267 goto out_commit;
6268 }
6269
6270 numbytes = end - start;
6271 memset(idata->id_data + start, 0, numbytes);
6272
6273 /*
6274 * No need to worry about the data page here - it's been
6275 * truncated already and inline data doesn't need it for
6276 * pushing zero's to disk, so we'll let readpage pick it up
6277 * later.
6278 */
6279 if (trunc) {
6280 i_size_write(inode, start);
6281 di->i_size = cpu_to_le64(start);
6282 }
6283
6284 inode->i_blocks = ocfs2_inode_sector_count(inode);
6285 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
6286
6287 di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
6288 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
6289
6290 ocfs2_journal_dirty(handle, di_bh);
6291
6292out_commit:
6293 ocfs2_commit_trans(osb, handle);
6294
6295out:
6296 return ret;
6297}
6298
6054static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc) 6299static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
6055{ 6300{
6056 /* 6301 /*
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 990df48ae8d3..826e0a6cf5c7 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -62,6 +62,10 @@ static inline int ocfs2_extend_meta_needed(struct ocfs2_dinode *fe)
62 return le16_to_cpu(fe->id2.i_list.l_tree_depth) + 2; 62 return le16_to_cpu(fe->id2.i_list.l_tree_depth) + 2;
63} 63}
64 64
65void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di);
66int ocfs2_convert_inline_data_to_extents(struct inode *inode,
67 struct buffer_head *di_bh);
68
65int ocfs2_truncate_log_init(struct ocfs2_super *osb); 69int ocfs2_truncate_log_init(struct ocfs2_super *osb);
66void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb); 70void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb);
67void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb, 71void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb,
@@ -115,6 +119,8 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
115 struct inode *inode, 119 struct inode *inode,
116 struct buffer_head *fe_bh, 120 struct buffer_head *fe_bh,
117 struct ocfs2_truncate_context *tc); 121 struct ocfs2_truncate_context *tc);
122int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
123 unsigned int start, unsigned int end, int trunc);
118 124
119int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el, 125int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el,
120 u32 cpos, struct buffer_head **leaf_bh); 126 u32 cpos, struct buffer_head **leaf_bh);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index fef0186a91cd..34d10452c56d 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -206,8 +206,8 @@ bail:
206 return err; 206 return err;
207} 207}
208 208
209static int ocfs2_read_inline_data(struct inode *inode, struct page *page, 209int ocfs2_read_inline_data(struct inode *inode, struct page *page,
210 struct buffer_head *di_bh) 210 struct buffer_head *di_bh)
211{ 211{
212 void *kaddr; 212 void *kaddr;
213 unsigned int size; 213 unsigned int size;
@@ -1432,6 +1432,130 @@ out:
1432 return ret; 1432 return ret;
1433} 1433}
1434 1434
1435static int ocfs2_write_begin_inline(struct address_space *mapping,
1436 struct inode *inode,
1437 struct ocfs2_write_ctxt *wc)
1438{
1439 int ret;
1440 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1441 struct page *page;
1442 handle_t *handle;
1443 struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
1444
1445 page = find_or_create_page(mapping, 0, GFP_NOFS);
1446 if (!page) {
1447 ret = -ENOMEM;
1448 mlog_errno(ret);
1449 goto out;
1450 }
1451 /*
1452 * If we don't set w_num_pages then this page won't get unlocked
1453 * and freed on cleanup of the write context.
1454 */
1455 wc->w_pages[0] = wc->w_target_page = page;
1456 wc->w_num_pages = 1;
1457
1458 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
1459 if (IS_ERR(handle)) {
1460 ret = PTR_ERR(handle);
1461 mlog_errno(ret);
1462 goto out;
1463 }
1464
1465 ret = ocfs2_journal_access(handle, inode, wc->w_di_bh,
1466 OCFS2_JOURNAL_ACCESS_WRITE);
1467 if (ret) {
1468 ocfs2_commit_trans(osb, handle);
1469
1470 mlog_errno(ret);
1471 goto out;
1472 }
1473
1474 if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL))
1475 ocfs2_set_inode_data_inline(inode, di);
1476
1477 if (!PageUptodate(page)) {
1478 ret = ocfs2_read_inline_data(inode, page, wc->w_di_bh);
1479 if (ret) {
1480 ocfs2_commit_trans(osb, handle);
1481
1482 goto out;
1483 }
1484 }
1485
1486 wc->w_handle = handle;
1487out:
1488 return ret;
1489}
1490
1491int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size)
1492{
1493 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1494
1495 if (new_size < le16_to_cpu(di->id2.i_data.id_count))
1496 return 1;
1497 return 0;
1498}
1499
1500static int ocfs2_try_to_write_inline_data(struct address_space *mapping,
1501 struct inode *inode, loff_t pos,
1502 unsigned len, struct page *mmap_page,
1503 struct ocfs2_write_ctxt *wc)
1504{
1505 int ret, written = 0;
1506 loff_t end = pos + len;
1507 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1508
1509 mlog(0, "Inode %llu, write of %u bytes at off %llu. features: 0x%x\n",
1510 (unsigned long long)oi->ip_blkno, len, (unsigned long long)pos,
1511 oi->ip_dyn_features);
1512
1513 /*
1514 * Handle inodes which already have inline data 1st.
1515 */
1516 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1517 if (mmap_page == NULL &&
1518 ocfs2_size_fits_inline_data(wc->w_di_bh, end))
1519 goto do_inline_write;
1520
1521 /*
1522 * The write won't fit - we have to give this inode an
1523 * inline extent list now.
1524 */
1525 ret = ocfs2_convert_inline_data_to_extents(inode, wc->w_di_bh);
1526 if (ret)
1527 mlog_errno(ret);
1528 goto out;
1529 }
1530
1531 /*
1532 * Check whether the inode can accept inline data.
1533 */
1534 if (oi->ip_clusters != 0 || i_size_read(inode) != 0)
1535 return 0;
1536
1537 /*
1538 * Check whether the write can fit.
1539 */
1540 if (mmap_page || end > ocfs2_max_inline_data(inode->i_sb))
1541 return 0;
1542
1543do_inline_write:
1544 ret = ocfs2_write_begin_inline(mapping, inode, wc);
1545 if (ret) {
1546 mlog_errno(ret);
1547 goto out;
1548 }
1549
1550 /*
1551 * This signals to the caller that the data can be written
1552 * inline.
1553 */
1554 written = 1;
1555out:
1556 return written ? written : ret;
1557}
1558
1435/* 1559/*
1436 * This function only does anything for file systems which can't 1560 * This function only does anything for file systems which can't
1437 * handle sparse files. 1561 * handle sparse files.
@@ -1483,6 +1607,19 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1483 return ret; 1607 return ret;
1484 } 1608 }
1485 1609
1610 if (ocfs2_supports_inline_data(osb)) {
1611 ret = ocfs2_try_to_write_inline_data(mapping, inode, pos, len,
1612 mmap_page, wc);
1613 if (ret == 1) {
1614 ret = 0;
1615 goto success;
1616 }
1617 if (ret < 0) {
1618 mlog_errno(ret);
1619 goto out;
1620 }
1621 }
1622
1486 ret = ocfs2_expand_nonsparse_inode(inode, pos, len, wc); 1623 ret = ocfs2_expand_nonsparse_inode(inode, pos, len, wc);
1487 if (ret) { 1624 if (ret) {
1488 mlog_errno(ret); 1625 mlog_errno(ret);
@@ -1570,6 +1707,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1570 if (meta_ac) 1707 if (meta_ac)
1571 ocfs2_free_alloc_context(meta_ac); 1708 ocfs2_free_alloc_context(meta_ac);
1572 1709
1710success:
1573 *pagep = wc->w_target_page; 1711 *pagep = wc->w_target_page;
1574 *fsdata = wc; 1712 *fsdata = wc;
1575 return 0; 1713 return 0;
@@ -1637,6 +1775,31 @@ out_fail:
1637 return ret; 1775 return ret;
1638} 1776}
1639 1777
1778static void ocfs2_write_end_inline(struct inode *inode, loff_t pos,
1779 unsigned len, unsigned *copied,
1780 struct ocfs2_dinode *di,
1781 struct ocfs2_write_ctxt *wc)
1782{
1783 void *kaddr;
1784
1785 if (unlikely(*copied < len)) {
1786 if (!PageUptodate(wc->w_target_page)) {
1787 *copied = 0;
1788 return;
1789 }
1790 }
1791
1792 kaddr = kmap_atomic(wc->w_target_page, KM_USER0);
1793 memcpy(di->id2.i_data.id_data + pos, kaddr + pos, *copied);
1794 kunmap_atomic(kaddr, KM_USER0);
1795
1796 mlog(0, "Data written to inode at offset %llu. "
1797 "id_count = %u, copied = %u, i_dyn_features = 0x%x\n",
1798 (unsigned long long)pos, *copied,
1799 le16_to_cpu(di->id2.i_data.id_count),
1800 le16_to_cpu(di->i_dyn_features));
1801}
1802
1640int ocfs2_write_end_nolock(struct address_space *mapping, 1803int ocfs2_write_end_nolock(struct address_space *mapping,
1641 loff_t pos, unsigned len, unsigned copied, 1804 loff_t pos, unsigned len, unsigned copied,
1642 struct page *page, void *fsdata) 1805 struct page *page, void *fsdata)
@@ -1650,6 +1813,11 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
1650 handle_t *handle = wc->w_handle; 1813 handle_t *handle = wc->w_handle;
1651 struct page *tmppage; 1814 struct page *tmppage;
1652 1815
1816 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1817 ocfs2_write_end_inline(inode, pos, len, &copied, di, wc);
1818 goto out_write_size;
1819 }
1820
1653 if (unlikely(copied < len)) { 1821 if (unlikely(copied < len)) {
1654 if (!PageUptodate(wc->w_target_page)) 1822 if (!PageUptodate(wc->w_target_page))
1655 copied = 0; 1823 copied = 0;
@@ -1687,6 +1855,7 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
1687 block_commit_write(tmppage, from, to); 1855 block_commit_write(tmppage, from, to);
1688 } 1856 }
1689 1857
1858out_write_size:
1690 pos += copied; 1859 pos += copied;
1691 if (pos > inode->i_size) { 1860 if (pos > inode->i_size) {
1692 i_size_write(inode, pos); 1861 i_size_write(inode, pos);
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index b4fa37d40db4..113560877dbb 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -61,6 +61,10 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
61 struct page **pagep, void **fsdata, 61 struct page **pagep, void **fsdata,
62 struct buffer_head *di_bh, struct page *mmap_page); 62 struct buffer_head *di_bh, struct page *mmap_page);
63 63
64int ocfs2_read_inline_data(struct inode *inode, struct page *page,
65 struct buffer_head *di_bh);
66int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size);
67
64/* all ocfs2_dio_end_io()'s fault */ 68/* all ocfs2_dio_end_io()'s fault */
65#define ocfs2_iocb_is_rw_locked(iocb) \ 69#define ocfs2_iocb_is_rw_locked(iocb) \
66 test_bit(0, (unsigned long *)&iocb->private) 70 test_bit(0, (unsigned long *)&iocb->private)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 781ba6c4ef85..a62b14eb4065 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -397,6 +397,15 @@ static int ocfs2_truncate_file(struct inode *inode,
397 unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); 397 unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
398 truncate_inode_pages(inode->i_mapping, new_i_size); 398 truncate_inode_pages(inode->i_mapping, new_i_size);
399 399
400 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
401 status = ocfs2_truncate_inline(inode, di_bh, new_i_size,
402 i_size_read(inode), 0);
403 if (status)
404 mlog_errno(status);
405
406 goto bail_unlock_data;
407 }
408
400 /* alright, we're going to need to do a full blown alloc size 409 /* alright, we're going to need to do a full blown alloc size
401 * change. Orphan the inode so that recovery can complete the 410 * change. Orphan the inode so that recovery can complete the
402 * truncate if necessary. This does the task of marking 411 * truncate if necessary. This does the task of marking
@@ -908,7 +917,8 @@ static int ocfs2_extend_file(struct inode *inode,
908 struct buffer_head *di_bh, 917 struct buffer_head *di_bh,
909 u64 new_i_size) 918 u64 new_i_size)
910{ 919{
911 int ret = 0; 920 int ret = 0, data_locked = 0;
921 struct ocfs2_inode_info *oi = OCFS2_I(inode);
912 922
913 BUG_ON(!di_bh); 923 BUG_ON(!di_bh);
914 924
@@ -920,7 +930,17 @@ static int ocfs2_extend_file(struct inode *inode,
920 goto out; 930 goto out;
921 BUG_ON(new_i_size < i_size_read(inode)); 931 BUG_ON(new_i_size < i_size_read(inode));
922 932
923 if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) 933 /*
934 * Fall through for converting inline data, even if the fs
935 * supports sparse files.
936 *
937 * The check for inline data here is legal - nobody can add
938 * the feature since we have i_mutex. We must check it again
939 * after acquiring ip_alloc_sem though, as paths like mmap
940 * might have raced us to converting the inode to extents.
941 */
942 if (!(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
943 && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
924 goto out_update_size; 944 goto out_update_size;
925 945
926 /* 946 /*
@@ -935,6 +955,7 @@ static int ocfs2_extend_file(struct inode *inode,
935 mlog_errno(ret); 955 mlog_errno(ret);
936 goto out; 956 goto out;
937 } 957 }
958 data_locked = 1;
938 959
939 /* 960 /*
940 * The alloc sem blocks people in read/write from reading our 961 * The alloc sem blocks people in read/write from reading our
@@ -942,9 +963,31 @@ static int ocfs2_extend_file(struct inode *inode,
942 * i_mutex to block other extend/truncate calls while we're 963 * i_mutex to block other extend/truncate calls while we're
943 * here. 964 * here.
944 */ 965 */
945 down_write(&OCFS2_I(inode)->ip_alloc_sem); 966 down_write(&oi->ip_alloc_sem);
946 ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size); 967
947 up_write(&OCFS2_I(inode)->ip_alloc_sem); 968 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
969 /*
970 * We can optimize small extends by keeping the inodes
971 * inline data.
972 */
973 if (ocfs2_size_fits_inline_data(di_bh, new_i_size)) {
974 up_write(&oi->ip_alloc_sem);
975 goto out_update_size;
976 }
977
978 ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
979 if (ret) {
980 up_write(&oi->ip_alloc_sem);
981
982 mlog_errno(ret);
983 goto out_unlock;
984 }
985 }
986
987 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
988 ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size);
989
990 up_write(&oi->ip_alloc_sem);
948 991
949 if (ret < 0) { 992 if (ret < 0) {
950 mlog_errno(ret); 993 mlog_errno(ret);
@@ -957,7 +1000,7 @@ out_update_size:
957 mlog_errno(ret); 1000 mlog_errno(ret);
958 1001
959out_unlock: 1002out_unlock:
960 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) 1003 if (data_locked)
961 ocfs2_data_unlock(inode, 1); 1004 ocfs2_data_unlock(inode, 1);
962 1005
963out: 1006out:
@@ -1231,6 +1274,31 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode,
1231{ 1274{
1232 int ret; 1275 int ret;
1233 u32 cpos, phys_cpos, clusters, alloc_size; 1276 u32 cpos, phys_cpos, clusters, alloc_size;
1277 u64 end = start + len;
1278 struct buffer_head *di_bh = NULL;
1279
1280 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1281 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
1282 OCFS2_I(inode)->ip_blkno, &di_bh,
1283 OCFS2_BH_CACHED, inode);
1284 if (ret) {
1285 mlog_errno(ret);
1286 goto out;
1287 }
1288
1289 /*
1290 * Nothing to do if the requested reservation range
1291 * fits within the inode.
1292 */
1293 if (ocfs2_size_fits_inline_data(di_bh, end))
1294 goto out;
1295
1296 ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
1297 if (ret) {
1298 mlog_errno(ret);
1299 goto out;
1300 }
1301 }
1234 1302
1235 /* 1303 /*
1236 * We consider both start and len to be inclusive. 1304 * We consider both start and len to be inclusive.
@@ -1276,6 +1344,8 @@ next:
1276 1344
1277 ret = 0; 1345 ret = 0;
1278out: 1346out:
1347
1348 brelse(di_bh);
1279 return ret; 1349 return ret;
1280} 1350}
1281 1351
@@ -1457,6 +1527,14 @@ static int ocfs2_remove_inode_range(struct inode *inode,
1457 if (byte_len == 0) 1527 if (byte_len == 0)
1458 return 0; 1528 return 0;
1459 1529
1530 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1531 ret = ocfs2_truncate_inline(inode, di_bh, byte_start,
1532 byte_start + byte_len, 1);
1533 if (ret)
1534 mlog_errno(ret);
1535 return ret;
1536 }
1537
1460 trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start); 1538 trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start);
1461 trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits; 1539 trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits;
1462 if (trunc_len >= trunc_start) 1540 if (trunc_len >= trunc_start)
@@ -1759,6 +1837,15 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1759 break; 1837 break;
1760 1838
1761 /* 1839 /*
1840 * There's no sane way to do direct writes to an inode
1841 * with inline data.
1842 */
1843 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1844 *direct_io = 0;
1845 break;
1846 }
1847
1848 /*
1762 * Allowing concurrent direct writes means 1849 * Allowing concurrent direct writes means
1763 * i_size changes wouldn't be synchronized, so 1850 * i_size changes wouldn't be synchronized, so
1764 * one node could wind up truncating another 1851 * one node could wind up truncating another
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index c8923bab422a..1d5e0cb0fda1 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -514,6 +514,10 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
514 514
515 fe = (struct ocfs2_dinode *) fe_bh->b_data; 515 fe = (struct ocfs2_dinode *) fe_bh->b_data;
516 516
517 /*
518 * This check will also skip truncate of inodes with inline
519 * data and fast symlinks.
520 */
517 if (fe->i_clusters) { 521 if (fe->i_clusters) {
518 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 522 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
519 if (IS_ERR(handle)) { 523 if (IS_ERR(handle)) {
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index ce60aab013aa..4b32e0961568 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -282,6 +282,9 @@ int ocfs2_journal_dirty_data(handle_t *handle,
282 * prev. group desc. if we relink. */ 282 * prev. group desc. if we relink. */
283#define OCFS2_SUBALLOC_ALLOC (3) 283#define OCFS2_SUBALLOC_ALLOC (3)
284 284
285#define OCFS2_INLINE_TO_EXTENTS_CREDITS (OCFS2_SUBALLOC_ALLOC \
286 + OCFS2_INODE_UPDATE_CREDITS)
287
285/* dinode + group descriptor update. We don't relink on free yet. */ 288/* dinode + group descriptor update. We don't relink on free yet. */
286#define OCFS2_SUBALLOC_FREE (2) 289#define OCFS2_SUBALLOC_FREE (2)
287 290