aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/alloc.c
diff options
context:
space:
mode:
authorMark Fasheh <mark.fasheh@oracle.com>2007-09-07 17:46:51 -0400
committerMark Fasheh <mark.fasheh@oracle.com>2007-10-12 14:54:40 -0400
commit1afc32b952335f665327a1a9001ba1b44bb76fd9 (patch)
treec914afd0ef5d32b426c3cf65820de7599e570656 /fs/ocfs2/alloc.c
parent6798d35a31c413bbb3f83bbaa844bd2598168ccc (diff)
ocfs2: Write support for inline data
This fixes up write, truncate, mmap, and RESVSP/UNRESVP to understand inline inode data. For the most part, the changes to the core write code can be relied on to do the heavy lifting. Any code calling ocfs2_write_begin (including shared writeable mmap) can count on it doing the right thing with respect to growing inline data to an extent tree. Size reducing truncates, including UNRESVP can simply zero that portion of the inode block being removed. Size increasing truncatesm, including RESVP have to be a little bit smarter and grow the inode to an extent tree if necessary. Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com> Reviewed-by: Joel Becker <joel.becker@oracle.com>
Diffstat (limited to 'fs/ocfs2/alloc.c')
-rw-r--r--fs/ocfs2/alloc.c245
1 files changed, 245 insertions, 0 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index c81bfdfb9929..72cefe25382b 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -3726,6 +3726,8 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
3726 struct ocfs2_insert_type insert = {0, }; 3726 struct ocfs2_insert_type insert = {0, };
3727 struct ocfs2_extent_rec rec; 3727 struct ocfs2_extent_rec rec;
3728 3728
3729 BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
3730
3729 mlog(0, "add %u clusters at position %u to inode %llu\n", 3731 mlog(0, "add %u clusters at position %u to inode %llu\n",
3730 new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno); 3732 new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno);
3731 3733
@@ -5826,6 +5828,174 @@ out:
5826 return ret; 5828 return ret;
5827} 5829}
5828 5830
5831static void ocfs2_zero_dinode_id2(struct inode *inode, struct ocfs2_dinode *di)
5832{
5833 unsigned int blocksize = 1 << inode->i_sb->s_blocksize_bits;
5834
5835 memset(&di->id2, 0, blocksize - offsetof(struct ocfs2_dinode, id2));
5836}
5837
5838void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di)
5839{
5840 struct ocfs2_inode_info *oi = OCFS2_I(inode);
5841 struct ocfs2_inline_data *idata = &di->id2.i_data;
5842
5843 spin_lock(&oi->ip_lock);
5844 oi->ip_dyn_features |= OCFS2_INLINE_DATA_FL;
5845 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
5846 spin_unlock(&oi->ip_lock);
5847
5848 /*
5849 * We clear the entire i_data structure here so that all
5850 * fields can be properly initialized.
5851 */
5852 ocfs2_zero_dinode_id2(inode, di);
5853
5854 idata->id_count = cpu_to_le16(ocfs2_max_inline_data(inode->i_sb));
5855}
5856
5857int ocfs2_convert_inline_data_to_extents(struct inode *inode,
5858 struct buffer_head *di_bh)
5859{
5860 int ret, i, has_data, num_pages = 0;
5861 handle_t *handle;
5862 u64 uninitialized_var(block);
5863 struct ocfs2_inode_info *oi = OCFS2_I(inode);
5864 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5865 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
5866 struct ocfs2_extent_list *el = &di->id2.i_list;
5867 struct ocfs2_alloc_context *data_ac = NULL;
5868 struct page **pages = NULL;
5869 loff_t end = osb->s_clustersize;
5870
5871 has_data = i_size_read(inode) ? 1 : 0;
5872
5873 if (has_data) {
5874 pages = kcalloc(ocfs2_pages_per_cluster(osb->sb),
5875 sizeof(struct page *), GFP_NOFS);
5876 if (pages == NULL) {
5877 ret = -ENOMEM;
5878 mlog_errno(ret);
5879 goto out;
5880 }
5881
5882 ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
5883 if (ret) {
5884 mlog_errno(ret);
5885 goto out;
5886 }
5887 }
5888
5889 handle = ocfs2_start_trans(osb, OCFS2_INLINE_TO_EXTENTS_CREDITS);
5890 if (IS_ERR(handle)) {
5891 ret = PTR_ERR(handle);
5892 mlog_errno(ret);
5893 goto out_unlock;
5894 }
5895
5896 ret = ocfs2_journal_access(handle, inode, di_bh,
5897 OCFS2_JOURNAL_ACCESS_WRITE);
5898 if (ret) {
5899 mlog_errno(ret);
5900 goto out_commit;
5901 }
5902
5903 if (has_data) {
5904 u32 bit_off, num;
5905 unsigned int page_end;
5906 u64 phys;
5907
5908 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off,
5909 &num);
5910 if (ret) {
5911 mlog_errno(ret);
5912 goto out_commit;
5913 }
5914
5915 /*
5916 * Save two copies, one for insert, and one that can
5917 * be changed by ocfs2_map_and_dirty_page() below.
5918 */
5919 block = phys = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
5920
5921 /*
5922 * Non sparse file systems zero on extend, so no need
5923 * to do that now.
5924 */
5925 if (!ocfs2_sparse_alloc(osb) &&
5926 PAGE_CACHE_SIZE < osb->s_clustersize)
5927 end = PAGE_CACHE_SIZE;
5928
5929 ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages);
5930 if (ret) {
5931 mlog_errno(ret);
5932 goto out_commit;
5933 }
5934
5935 /*
5936 * This should populate the 1st page for us and mark
5937 * it up to date.
5938 */
5939 ret = ocfs2_read_inline_data(inode, pages[0], di_bh);
5940 if (ret) {
5941 mlog_errno(ret);
5942 goto out_commit;
5943 }
5944
5945 page_end = PAGE_CACHE_SIZE;
5946 if (PAGE_CACHE_SIZE > osb->s_clustersize)
5947 page_end = osb->s_clustersize;
5948
5949 for (i = 0; i < num_pages; i++)
5950 ocfs2_map_and_dirty_page(inode, handle, 0, page_end,
5951 pages[i], i > 0, &phys);
5952 }
5953
5954 spin_lock(&oi->ip_lock);
5955 oi->ip_dyn_features &= ~OCFS2_INLINE_DATA_FL;
5956 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
5957 spin_unlock(&oi->ip_lock);
5958
5959 ocfs2_zero_dinode_id2(inode, di);
5960
5961 el->l_tree_depth = 0;
5962 el->l_next_free_rec = 0;
5963 el->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(inode->i_sb));
5964
5965 ocfs2_journal_dirty(handle, di_bh);
5966
5967 if (has_data) {
5968 /*
5969 * An error at this point should be extremely rare. If
5970 * this proves to be false, we could always re-build
5971 * the in-inode data from our pages.
5972 */
5973 ret = ocfs2_insert_extent(osb, handle, inode, di_bh,
5974 0, block, 1, 0, NULL);
5975 if (ret) {
5976 mlog_errno(ret);
5977 goto out_commit;
5978 }
5979
5980 inode->i_blocks = ocfs2_inode_sector_count(inode);
5981 }
5982
5983out_commit:
5984 ocfs2_commit_trans(osb, handle);
5985
5986out_unlock:
5987 if (data_ac)
5988 ocfs2_free_alloc_context(data_ac);
5989
5990out:
5991 if (pages) {
5992 ocfs2_unlock_and_free_pages(pages, num_pages);
5993 kfree(pages);
5994 }
5995
5996 return ret;
5997}
5998
5829/* 5999/*
5830 * It is expected, that by the time you call this function, 6000 * It is expected, that by the time you call this function,
5831 * inode->i_size and fe->i_size have been adjusted. 6001 * inode->i_size and fe->i_size have been adjusted.
@@ -6051,6 +6221,81 @@ bail:
6051 return status; 6221 return status;
6052} 6222}
6053 6223
6224/*
6225 * 'start' is inclusive, 'end' is not.
6226 */
6227int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
6228 unsigned int start, unsigned int end, int trunc)
6229{
6230 int ret;
6231 unsigned int numbytes;
6232 handle_t *handle;
6233 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6234 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
6235 struct ocfs2_inline_data *idata = &di->id2.i_data;
6236
6237 if (end > i_size_read(inode))
6238 end = i_size_read(inode);
6239
6240 BUG_ON(start >= end);
6241
6242 if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
6243 !(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL) ||
6244 !ocfs2_supports_inline_data(osb)) {
6245 ocfs2_error(inode->i_sb,
6246 "Inline data flags for inode %llu don't agree! "
6247 "Disk: 0x%x, Memory: 0x%x, Superblock: 0x%x\n",
6248 (unsigned long long)OCFS2_I(inode)->ip_blkno,
6249 le16_to_cpu(di->i_dyn_features),
6250 OCFS2_I(inode)->ip_dyn_features,
6251 osb->s_feature_incompat);
6252 ret = -EROFS;
6253 goto out;
6254 }
6255
6256 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
6257 if (IS_ERR(handle)) {
6258 ret = PTR_ERR(handle);
6259 mlog_errno(ret);
6260 goto out;
6261 }
6262
6263 ret = ocfs2_journal_access(handle, inode, di_bh,
6264 OCFS2_JOURNAL_ACCESS_WRITE);
6265 if (ret) {
6266 mlog_errno(ret);
6267 goto out_commit;
6268 }
6269
6270 numbytes = end - start;
6271 memset(idata->id_data + start, 0, numbytes);
6272
6273 /*
6274 * No need to worry about the data page here - it's been
6275 * truncated already and inline data doesn't need it for
6276 * pushing zero's to disk, so we'll let readpage pick it up
6277 * later.
6278 */
6279 if (trunc) {
6280 i_size_write(inode, start);
6281 di->i_size = cpu_to_le64(start);
6282 }
6283
6284 inode->i_blocks = ocfs2_inode_sector_count(inode);
6285 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
6286
6287 di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
6288 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
6289
6290 ocfs2_journal_dirty(handle, di_bh);
6291
6292out_commit:
6293 ocfs2_commit_trans(osb, handle);
6294
6295out:
6296 return ret;
6297}
6298
6054static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc) 6299static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
6055{ 6300{
6056 /* 6301 /*