aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-12 18:04:00 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-10-12 18:04:00 -0400
commita6e3d7dba92e19acffaa36aad962741a762aa8c5 (patch)
tree4170e6cfe524b3714f35aba07890073ae8ae75c5
parent42f04b6d4c8c69ccffc10863418c5b5f100a8554 (diff)
parente7b34019606ab1dd06196635e931b0c302799228 (diff)
Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mfasheh/ocfs2: (23 commits) ocfs2: Optionally return filldir errors ocfs2: Write support for directories with inline data ocfs2: Read support for directories with inline data ocfs2: Write support for inline data ocfs2: Read support for inline data ocfs2: Structure updates for inline data ocfs2: Cleanup dirent size check ocfs2: Rename cleanups ocfs2: Provide convenience function for ino lookup ocfs2: Implement ocfs2_empty_dir() as a caller of ocfs2_dir_foreach() ocfs2: Remove open coded readdir() ocfs2: Pass raw u64 to filldir ocfs2: Abstract out core dir listing functionality ocfs2: Move directory manipulation code into dir.c ocfs2: Small refactor of truncate zeroing code ocfs2: move nonsparse hole-filling into ocfs2_write_begin() ocfs2: Sync ocfs2_fs.h with ocfs2-tools [PATCH] fs/ocfs2/: removed unneeded initial value and function's return value ocfs2: Implement show_options() ocfs2: Clear slot map when umounting a local volume ...
-rw-r--r--fs/ocfs2/alloc.c482
-rw-r--r--fs/ocfs2/alloc.h7
-rw-r--r--fs/ocfs2/aops.c309
-rw-r--r--fs/ocfs2/aops.h6
-rw-r--r--fs/ocfs2/dir.c1423
-rw-r--r--fs/ocfs2/dir.h48
-rw-r--r--fs/ocfs2/dlmglue.c2
-rw-r--r--fs/ocfs2/dlmglue.h4
-rw-r--r--fs/ocfs2/export.c8
-rw-r--r--fs/ocfs2/extent_map.c6
-rw-r--r--fs/ocfs2/file.c298
-rw-r--r--fs/ocfs2/file.h2
-rw-r--r--fs/ocfs2/inode.c7
-rw-r--r--fs/ocfs2/inode.h1
-rw-r--r--fs/ocfs2/journal.c120
-rw-r--r--fs/ocfs2/journal.h3
-rw-r--r--fs/ocfs2/namei.c552
-rw-r--r--fs/ocfs2/namei.h19
-rw-r--r--fs/ocfs2/ocfs2.h7
-rw-r--r--fs/ocfs2/ocfs2_fs.h64
-rw-r--r--fs/ocfs2/super.c62
-rw-r--r--fs/ocfs2/sysfile.c10
22 files changed, 2386 insertions, 1054 deletions
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 778a850b4634..4ba7f0bdc248 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -354,7 +354,6 @@ struct ocfs2_insert_type {
354 enum ocfs2_append_type ins_appending; 354 enum ocfs2_append_type ins_appending;
355 enum ocfs2_contig_type ins_contig; 355 enum ocfs2_contig_type ins_contig;
356 int ins_contig_index; 356 int ins_contig_index;
357 int ins_free_records;
358 int ins_tree_depth; 357 int ins_tree_depth;
359}; 358};
360 359
@@ -362,7 +361,6 @@ struct ocfs2_merge_ctxt {
362 enum ocfs2_contig_type c_contig_type; 361 enum ocfs2_contig_type c_contig_type;
363 int c_has_empty_extent; 362 int c_has_empty_extent;
364 int c_split_covers_rec; 363 int c_split_covers_rec;
365 int c_used_tail_recs;
366}; 364};
367 365
368/* 366/*
@@ -2808,36 +2806,28 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
2808 struct ocfs2_merge_ctxt *ctxt) 2806 struct ocfs2_merge_ctxt *ctxt)
2809 2807
2810{ 2808{
2811 int ret = 0, delete_tail_recs = 0; 2809 int ret = 0;
2812 struct ocfs2_extent_list *el = path_leaf_el(left_path); 2810 struct ocfs2_extent_list *el = path_leaf_el(left_path);
2813 struct ocfs2_extent_rec *rec = &el->l_recs[split_index]; 2811 struct ocfs2_extent_rec *rec = &el->l_recs[split_index];
2814 2812
2815 BUG_ON(ctxt->c_contig_type == CONTIG_NONE); 2813 BUG_ON(ctxt->c_contig_type == CONTIG_NONE);
2816 2814
2817 if (ctxt->c_split_covers_rec) { 2815 if (ctxt->c_split_covers_rec && ctxt->c_has_empty_extent) {
2818 delete_tail_recs++; 2816 /*
2819 2817 * The merge code will need to create an empty
2820 if (ctxt->c_contig_type == CONTIG_LEFTRIGHT || 2818 * extent to take the place of the newly
2821 ctxt->c_has_empty_extent) 2819 * emptied slot. Remove any pre-existing empty
2822 delete_tail_recs++; 2820 * extents - having more than one in a leaf is
2823 2821 * illegal.
2824 if (ctxt->c_has_empty_extent) { 2822 */
2825 /* 2823 ret = ocfs2_rotate_tree_left(inode, handle, left_path,
2826 * The merge code will need to create an empty 2824 dealloc);
2827 * extent to take the place of the newly 2825 if (ret) {
2828 * emptied slot. Remove any pre-existing empty 2826 mlog_errno(ret);
2829 * extents - having more than one in a leaf is 2827 goto out;
2830 * illegal.
2831 */
2832 ret = ocfs2_rotate_tree_left(inode, handle, left_path,
2833 dealloc);
2834 if (ret) {
2835 mlog_errno(ret);
2836 goto out;
2837 }
2838 split_index--;
2839 rec = &el->l_recs[split_index];
2840 } 2828 }
2829 split_index--;
2830 rec = &el->l_recs[split_index];
2841 } 2831 }
2842 2832
2843 if (ctxt->c_contig_type == CONTIG_LEFTRIGHT) { 2833 if (ctxt->c_contig_type == CONTIG_LEFTRIGHT) {
@@ -3593,6 +3583,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
3593 struct buffer_head *di_bh, 3583 struct buffer_head *di_bh,
3594 struct buffer_head **last_eb_bh, 3584 struct buffer_head **last_eb_bh,
3595 struct ocfs2_extent_rec *insert_rec, 3585 struct ocfs2_extent_rec *insert_rec,
3586 int *free_records,
3596 struct ocfs2_insert_type *insert) 3587 struct ocfs2_insert_type *insert)
3597{ 3588{
3598 int ret; 3589 int ret;
@@ -3633,7 +3624,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
3633 * XXX: This test is simplistic, we can search for empty 3624 * XXX: This test is simplistic, we can search for empty
3634 * extent records too. 3625 * extent records too.
3635 */ 3626 */
3636 insert->ins_free_records = le16_to_cpu(el->l_count) - 3627 *free_records = le16_to_cpu(el->l_count) -
3637 le16_to_cpu(el->l_next_free_rec); 3628 le16_to_cpu(el->l_next_free_rec);
3638 3629
3639 if (!insert->ins_tree_depth) { 3630 if (!insert->ins_tree_depth) {
@@ -3730,10 +3721,13 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
3730 struct ocfs2_alloc_context *meta_ac) 3721 struct ocfs2_alloc_context *meta_ac)
3731{ 3722{
3732 int status; 3723 int status;
3724 int uninitialized_var(free_records);
3733 struct buffer_head *last_eb_bh = NULL; 3725 struct buffer_head *last_eb_bh = NULL;
3734 struct ocfs2_insert_type insert = {0, }; 3726 struct ocfs2_insert_type insert = {0, };
3735 struct ocfs2_extent_rec rec; 3727 struct ocfs2_extent_rec rec;
3736 3728
3729 BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
3730
3737 mlog(0, "add %u clusters at position %u to inode %llu\n", 3731 mlog(0, "add %u clusters at position %u to inode %llu\n",
3738 new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno); 3732 new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno);
3739 3733
@@ -3752,7 +3746,7 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
3752 rec.e_flags = flags; 3746 rec.e_flags = flags;
3753 3747
3754 status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec, 3748 status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec,
3755 &insert); 3749 &free_records, &insert);
3756 if (status < 0) { 3750 if (status < 0) {
3757 mlog_errno(status); 3751 mlog_errno(status);
3758 goto bail; 3752 goto bail;
@@ -3762,9 +3756,9 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
3762 "Insert.contig_index: %d, Insert.free_records: %d, " 3756 "Insert.contig_index: %d, Insert.free_records: %d, "
3763 "Insert.tree_depth: %d\n", 3757 "Insert.tree_depth: %d\n",
3764 insert.ins_appending, insert.ins_contig, insert.ins_contig_index, 3758 insert.ins_appending, insert.ins_contig, insert.ins_contig_index,
3765 insert.ins_free_records, insert.ins_tree_depth); 3759 free_records, insert.ins_tree_depth);
3766 3760
3767 if (insert.ins_contig == CONTIG_NONE && insert.ins_free_records == 0) { 3761 if (insert.ins_contig == CONTIG_NONE && free_records == 0) {
3768 status = ocfs2_grow_tree(inode, handle, fe_bh, 3762 status = ocfs2_grow_tree(inode, handle, fe_bh,
3769 &insert.ins_tree_depth, &last_eb_bh, 3763 &insert.ins_tree_depth, &last_eb_bh,
3770 meta_ac); 3764 meta_ac);
@@ -3847,26 +3841,17 @@ leftright:
3847 3841
3848 if (le16_to_cpu(rightmost_el->l_next_free_rec) == 3842 if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
3849 le16_to_cpu(rightmost_el->l_count)) { 3843 le16_to_cpu(rightmost_el->l_count)) {
3850 int old_depth = depth;
3851
3852 ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, last_eb_bh, 3844 ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, last_eb_bh,
3853 meta_ac); 3845 meta_ac);
3854 if (ret) { 3846 if (ret) {
3855 mlog_errno(ret); 3847 mlog_errno(ret);
3856 goto out; 3848 goto out;
3857 } 3849 }
3858
3859 if (old_depth != depth) {
3860 eb = (struct ocfs2_extent_block *)(*last_eb_bh)->b_data;
3861 rightmost_el = &eb->h_list;
3862 }
3863 } 3850 }
3864 3851
3865 memset(&insert, 0, sizeof(struct ocfs2_insert_type)); 3852 memset(&insert, 0, sizeof(struct ocfs2_insert_type));
3866 insert.ins_appending = APPEND_NONE; 3853 insert.ins_appending = APPEND_NONE;
3867 insert.ins_contig = CONTIG_NONE; 3854 insert.ins_contig = CONTIG_NONE;
3868 insert.ins_free_records = le16_to_cpu(rightmost_el->l_count)
3869 - le16_to_cpu(rightmost_el->l_next_free_rec);
3870 insert.ins_tree_depth = depth; 3855 insert.ins_tree_depth = depth;
3871 3856
3872 insert_range = le32_to_cpu(split_rec.e_cpos) + 3857 insert_range = le32_to_cpu(split_rec.e_cpos) +
@@ -4015,11 +4000,6 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
4015 } else 4000 } else
4016 rightmost_el = path_root_el(path); 4001 rightmost_el = path_root_el(path);
4017 4002
4018 ctxt.c_used_tail_recs = le16_to_cpu(rightmost_el->l_next_free_rec);
4019 if (ctxt.c_used_tail_recs > 0 &&
4020 ocfs2_is_empty_extent(&rightmost_el->l_recs[0]))
4021 ctxt.c_used_tail_recs--;
4022
4023 if (rec->e_cpos == split_rec->e_cpos && 4003 if (rec->e_cpos == split_rec->e_cpos &&
4024 rec->e_leaf_clusters == split_rec->e_leaf_clusters) 4004 rec->e_leaf_clusters == split_rec->e_leaf_clusters)
4025 ctxt.c_split_covers_rec = 1; 4005 ctxt.c_split_covers_rec = 1;
@@ -4028,10 +4008,9 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
4028 4008
4029 ctxt.c_has_empty_extent = ocfs2_is_empty_extent(&el->l_recs[0]); 4009 ctxt.c_has_empty_extent = ocfs2_is_empty_extent(&el->l_recs[0]);
4030 4010
4031 mlog(0, "index: %d, contig: %u, used_tail_recs: %u, " 4011 mlog(0, "index: %d, contig: %u, has_empty: %u, split_covers: %u\n",
4032 "has_empty: %u, split_covers: %u\n", split_index, 4012 split_index, ctxt.c_contig_type, ctxt.c_has_empty_extent,
4033 ctxt.c_contig_type, ctxt.c_used_tail_recs, 4013 ctxt.c_split_covers_rec);
4034 ctxt.c_has_empty_extent, ctxt.c_split_covers_rec);
4035 4014
4036 if (ctxt.c_contig_type == CONTIG_NONE) { 4015 if (ctxt.c_contig_type == CONTIG_NONE) {
4037 if (ctxt.c_split_covers_rec) 4016 if (ctxt.c_split_covers_rec)
@@ -4180,27 +4159,18 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh,
4180 4159
4181 if (le16_to_cpu(rightmost_el->l_next_free_rec) == 4160 if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
4182 le16_to_cpu(rightmost_el->l_count)) { 4161 le16_to_cpu(rightmost_el->l_count)) {
4183 int old_depth = depth;
4184
4185 ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, &last_eb_bh, 4162 ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, &last_eb_bh,
4186 meta_ac); 4163 meta_ac);
4187 if (ret) { 4164 if (ret) {
4188 mlog_errno(ret); 4165 mlog_errno(ret);
4189 goto out; 4166 goto out;
4190 } 4167 }
4191
4192 if (old_depth != depth) {
4193 eb = (struct ocfs2_extent_block *)last_eb_bh->b_data;
4194 rightmost_el = &eb->h_list;
4195 }
4196 } 4168 }
4197 4169
4198 memset(&insert, 0, sizeof(struct ocfs2_insert_type)); 4170 memset(&insert, 0, sizeof(struct ocfs2_insert_type));
4199 insert.ins_appending = APPEND_NONE; 4171 insert.ins_appending = APPEND_NONE;
4200 insert.ins_contig = CONTIG_NONE; 4172 insert.ins_contig = CONTIG_NONE;
4201 insert.ins_split = SPLIT_RIGHT; 4173 insert.ins_split = SPLIT_RIGHT;
4202 insert.ins_free_records = le16_to_cpu(rightmost_el->l_count)
4203 - le16_to_cpu(rightmost_el->l_next_free_rec);
4204 insert.ins_tree_depth = depth; 4174 insert.ins_tree_depth = depth;
4205 4175
4206 ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec, &insert); 4176 ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec, &insert);
@@ -5665,12 +5635,50 @@ static int ocfs2_ordered_zero_func(handle_t *handle, struct buffer_head *bh)
5665 return ocfs2_journal_dirty_data(handle, bh); 5635 return ocfs2_journal_dirty_data(handle, bh);
5666} 5636}
5667 5637
5638static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
5639 unsigned int from, unsigned int to,
5640 struct page *page, int zero, u64 *phys)
5641{
5642 int ret, partial = 0;
5643
5644 ret = ocfs2_map_page_blocks(page, phys, inode, from, to, 0);
5645 if (ret)
5646 mlog_errno(ret);
5647
5648 if (zero)
5649 zero_user_page(page, from, to - from, KM_USER0);
5650
5651 /*
5652 * Need to set the buffers we zero'd into uptodate
5653 * here if they aren't - ocfs2_map_page_blocks()
5654 * might've skipped some
5655 */
5656 if (ocfs2_should_order_data(inode)) {
5657 ret = walk_page_buffers(handle,
5658 page_buffers(page),
5659 from, to, &partial,
5660 ocfs2_ordered_zero_func);
5661 if (ret < 0)
5662 mlog_errno(ret);
5663 } else {
5664 ret = walk_page_buffers(handle, page_buffers(page),
5665 from, to, &partial,
5666 ocfs2_writeback_zero_func);
5667 if (ret < 0)
5668 mlog_errno(ret);
5669 }
5670
5671 if (!partial)
5672 SetPageUptodate(page);
5673
5674 flush_dcache_page(page);
5675}
5676
5668static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t start, 5677static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t start,
5669 loff_t end, struct page **pages, 5678 loff_t end, struct page **pages,
5670 int numpages, u64 phys, handle_t *handle) 5679 int numpages, u64 phys, handle_t *handle)
5671{ 5680{
5672 int i, ret, partial = 0; 5681 int i;
5673 void *kaddr;
5674 struct page *page; 5682 struct page *page;
5675 unsigned int from, to = PAGE_CACHE_SIZE; 5683 unsigned int from, to = PAGE_CACHE_SIZE;
5676 struct super_block *sb = inode->i_sb; 5684 struct super_block *sb = inode->i_sb;
@@ -5691,87 +5699,31 @@ static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t start,
5691 BUG_ON(from > PAGE_CACHE_SIZE); 5699 BUG_ON(from > PAGE_CACHE_SIZE);
5692 BUG_ON(to > PAGE_CACHE_SIZE); 5700 BUG_ON(to > PAGE_CACHE_SIZE);
5693 5701
5694 ret = ocfs2_map_page_blocks(page, &phys, inode, from, to, 0); 5702 ocfs2_map_and_dirty_page(inode, handle, from, to, page, 1,
5695 if (ret) 5703 &phys);
5696 mlog_errno(ret);
5697
5698 kaddr = kmap_atomic(page, KM_USER0);
5699 memset(kaddr + from, 0, to - from);
5700 kunmap_atomic(kaddr, KM_USER0);
5701
5702 /*
5703 * Need to set the buffers we zero'd into uptodate
5704 * here if they aren't - ocfs2_map_page_blocks()
5705 * might've skipped some
5706 */
5707 if (ocfs2_should_order_data(inode)) {
5708 ret = walk_page_buffers(handle,
5709 page_buffers(page),
5710 from, to, &partial,
5711 ocfs2_ordered_zero_func);
5712 if (ret < 0)
5713 mlog_errno(ret);
5714 } else {
5715 ret = walk_page_buffers(handle, page_buffers(page),
5716 from, to, &partial,
5717 ocfs2_writeback_zero_func);
5718 if (ret < 0)
5719 mlog_errno(ret);
5720 }
5721
5722 if (!partial)
5723 SetPageUptodate(page);
5724
5725 flush_dcache_page(page);
5726 5704
5727 start = (page->index + 1) << PAGE_CACHE_SHIFT; 5705 start = (page->index + 1) << PAGE_CACHE_SHIFT;
5728 } 5706 }
5729out: 5707out:
5730 if (pages) { 5708 if (pages)
5731 for (i = 0; i < numpages; i++) { 5709 ocfs2_unlock_and_free_pages(pages, numpages);
5732 page = pages[i];
5733 unlock_page(page);
5734 mark_page_accessed(page);
5735 page_cache_release(page);
5736 }
5737 }
5738} 5710}
5739 5711
5740static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end, 5712static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end,
5741 struct page **pages, int *num, u64 *phys) 5713 struct page **pages, int *num)
5742{ 5714{
5743 int i, numpages = 0, ret = 0; 5715 int numpages, ret = 0;
5744 unsigned int ext_flags;
5745 struct super_block *sb = inode->i_sb; 5716 struct super_block *sb = inode->i_sb;
5746 struct address_space *mapping = inode->i_mapping; 5717 struct address_space *mapping = inode->i_mapping;
5747 unsigned long index; 5718 unsigned long index;
5748 loff_t last_page_bytes; 5719 loff_t last_page_bytes;
5749 5720
5750 BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb)));
5751 BUG_ON(start > end); 5721 BUG_ON(start > end);
5752 5722
5753 if (start == end)
5754 goto out;
5755
5756 BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits != 5723 BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits !=
5757 (end - 1) >> OCFS2_SB(sb)->s_clustersize_bits); 5724 (end - 1) >> OCFS2_SB(sb)->s_clustersize_bits);
5758 5725
5759 ret = ocfs2_extent_map_get_blocks(inode, start >> sb->s_blocksize_bits, 5726 numpages = 0;
5760 phys, NULL, &ext_flags);
5761 if (ret) {
5762 mlog_errno(ret);
5763 goto out;
5764 }
5765
5766 /* Tail is a hole. */
5767 if (*phys == 0)
5768 goto out;
5769
5770 /* Tail is marked as unwritten, we can count on write to zero
5771 * in that case. */
5772 if (ext_flags & OCFS2_EXT_UNWRITTEN)
5773 goto out;
5774
5775 last_page_bytes = PAGE_ALIGN(end); 5727 last_page_bytes = PAGE_ALIGN(end);
5776 index = start >> PAGE_CACHE_SHIFT; 5728 index = start >> PAGE_CACHE_SHIFT;
5777 do { 5729 do {
@@ -5788,14 +5740,8 @@ static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end,
5788 5740
5789out: 5741out:
5790 if (ret != 0) { 5742 if (ret != 0) {
5791 if (pages) { 5743 if (pages)
5792 for (i = 0; i < numpages; i++) { 5744 ocfs2_unlock_and_free_pages(pages, numpages);
5793 if (pages[i]) {
5794 unlock_page(pages[i]);
5795 page_cache_release(pages[i]);
5796 }
5797 }
5798 }
5799 numpages = 0; 5745 numpages = 0;
5800 } 5746 }
5801 5747
@@ -5816,18 +5762,20 @@ out:
5816int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle, 5762int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
5817 u64 range_start, u64 range_end) 5763 u64 range_start, u64 range_end)
5818{ 5764{
5819 int ret, numpages; 5765 int ret = 0, numpages;
5820 struct page **pages = NULL; 5766 struct page **pages = NULL;
5821 u64 phys; 5767 u64 phys;
5768 unsigned int ext_flags;
5769 struct super_block *sb = inode->i_sb;
5822 5770
5823 /* 5771 /*
5824 * File systems which don't support sparse files zero on every 5772 * File systems which don't support sparse files zero on every
5825 * extend. 5773 * extend.
5826 */ 5774 */
5827 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) 5775 if (!ocfs2_sparse_alloc(OCFS2_SB(sb)))
5828 return 0; 5776 return 0;
5829 5777
5830 pages = kcalloc(ocfs2_pages_per_cluster(inode->i_sb), 5778 pages = kcalloc(ocfs2_pages_per_cluster(sb),
5831 sizeof(struct page *), GFP_NOFS); 5779 sizeof(struct page *), GFP_NOFS);
5832 if (pages == NULL) { 5780 if (pages == NULL) {
5833 ret = -ENOMEM; 5781 ret = -ENOMEM;
@@ -5835,16 +5783,31 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
5835 goto out; 5783 goto out;
5836 } 5784 }
5837 5785
5838 ret = ocfs2_grab_eof_pages(inode, range_start, range_end, pages, 5786 if (range_start == range_end)
5839 &numpages, &phys); 5787 goto out;
5788
5789 ret = ocfs2_extent_map_get_blocks(inode,
5790 range_start >> sb->s_blocksize_bits,
5791 &phys, NULL, &ext_flags);
5840 if (ret) { 5792 if (ret) {
5841 mlog_errno(ret); 5793 mlog_errno(ret);
5842 goto out; 5794 goto out;
5843 } 5795 }
5844 5796
5845 if (numpages == 0) 5797 /*
5798 * Tail is a hole, or is marked unwritten. In either case, we
5799 * can count on read and write to return/push zero's.
5800 */
5801 if (phys == 0 || ext_flags & OCFS2_EXT_UNWRITTEN)
5846 goto out; 5802 goto out;
5847 5803
5804 ret = ocfs2_grab_eof_pages(inode, range_start, range_end, pages,
5805 &numpages);
5806 if (ret) {
5807 mlog_errno(ret);
5808 goto out;
5809 }
5810
5848 ocfs2_zero_cluster_pages(inode, range_start, range_end, pages, 5811 ocfs2_zero_cluster_pages(inode, range_start, range_end, pages,
5849 numpages, phys, handle); 5812 numpages, phys, handle);
5850 5813
@@ -5865,6 +5828,178 @@ out:
5865 return ret; 5828 return ret;
5866} 5829}
5867 5830
5831static void ocfs2_zero_dinode_id2(struct inode *inode, struct ocfs2_dinode *di)
5832{
5833 unsigned int blocksize = 1 << inode->i_sb->s_blocksize_bits;
5834
5835 memset(&di->id2, 0, blocksize - offsetof(struct ocfs2_dinode, id2));
5836}
5837
5838void ocfs2_dinode_new_extent_list(struct inode *inode,
5839 struct ocfs2_dinode *di)
5840{
5841 ocfs2_zero_dinode_id2(inode, di);
5842 di->id2.i_list.l_tree_depth = 0;
5843 di->id2.i_list.l_next_free_rec = 0;
5844 di->id2.i_list.l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(inode->i_sb));
5845}
5846
5847void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di)
5848{
5849 struct ocfs2_inode_info *oi = OCFS2_I(inode);
5850 struct ocfs2_inline_data *idata = &di->id2.i_data;
5851
5852 spin_lock(&oi->ip_lock);
5853 oi->ip_dyn_features |= OCFS2_INLINE_DATA_FL;
5854 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
5855 spin_unlock(&oi->ip_lock);
5856
5857 /*
5858 * We clear the entire i_data structure here so that all
5859 * fields can be properly initialized.
5860 */
5861 ocfs2_zero_dinode_id2(inode, di);
5862
5863 idata->id_count = cpu_to_le16(ocfs2_max_inline_data(inode->i_sb));
5864}
5865
5866int ocfs2_convert_inline_data_to_extents(struct inode *inode,
5867 struct buffer_head *di_bh)
5868{
5869 int ret, i, has_data, num_pages = 0;
5870 handle_t *handle;
5871 u64 uninitialized_var(block);
5872 struct ocfs2_inode_info *oi = OCFS2_I(inode);
5873 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5874 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
5875 struct ocfs2_alloc_context *data_ac = NULL;
5876 struct page **pages = NULL;
5877 loff_t end = osb->s_clustersize;
5878
5879 has_data = i_size_read(inode) ? 1 : 0;
5880
5881 if (has_data) {
5882 pages = kcalloc(ocfs2_pages_per_cluster(osb->sb),
5883 sizeof(struct page *), GFP_NOFS);
5884 if (pages == NULL) {
5885 ret = -ENOMEM;
5886 mlog_errno(ret);
5887 goto out;
5888 }
5889
5890 ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
5891 if (ret) {
5892 mlog_errno(ret);
5893 goto out;
5894 }
5895 }
5896
5897 handle = ocfs2_start_trans(osb, OCFS2_INLINE_TO_EXTENTS_CREDITS);
5898 if (IS_ERR(handle)) {
5899 ret = PTR_ERR(handle);
5900 mlog_errno(ret);
5901 goto out_unlock;
5902 }
5903
5904 ret = ocfs2_journal_access(handle, inode, di_bh,
5905 OCFS2_JOURNAL_ACCESS_WRITE);
5906 if (ret) {
5907 mlog_errno(ret);
5908 goto out_commit;
5909 }
5910
5911 if (has_data) {
5912 u32 bit_off, num;
5913 unsigned int page_end;
5914 u64 phys;
5915
5916 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off,
5917 &num);
5918 if (ret) {
5919 mlog_errno(ret);
5920 goto out_commit;
5921 }
5922
5923 /*
5924 * Save two copies, one for insert, and one that can
5925 * be changed by ocfs2_map_and_dirty_page() below.
5926 */
5927 block = phys = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
5928
5929 /*
5930 * Non sparse file systems zero on extend, so no need
5931 * to do that now.
5932 */
5933 if (!ocfs2_sparse_alloc(osb) &&
5934 PAGE_CACHE_SIZE < osb->s_clustersize)
5935 end = PAGE_CACHE_SIZE;
5936
5937 ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages);
5938 if (ret) {
5939 mlog_errno(ret);
5940 goto out_commit;
5941 }
5942
5943 /*
5944 * This should populate the 1st page for us and mark
5945 * it up to date.
5946 */
5947 ret = ocfs2_read_inline_data(inode, pages[0], di_bh);
5948 if (ret) {
5949 mlog_errno(ret);
5950 goto out_commit;
5951 }
5952
5953 page_end = PAGE_CACHE_SIZE;
5954 if (PAGE_CACHE_SIZE > osb->s_clustersize)
5955 page_end = osb->s_clustersize;
5956
5957 for (i = 0; i < num_pages; i++)
5958 ocfs2_map_and_dirty_page(inode, handle, 0, page_end,
5959 pages[i], i > 0, &phys);
5960 }
5961
5962 spin_lock(&oi->ip_lock);
5963 oi->ip_dyn_features &= ~OCFS2_INLINE_DATA_FL;
5964 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
5965 spin_unlock(&oi->ip_lock);
5966
5967 ocfs2_dinode_new_extent_list(inode, di);
5968
5969 ocfs2_journal_dirty(handle, di_bh);
5970
5971 if (has_data) {
5972 /*
5973 * An error at this point should be extremely rare. If
5974 * this proves to be false, we could always re-build
5975 * the in-inode data from our pages.
5976 */
5977 ret = ocfs2_insert_extent(osb, handle, inode, di_bh,
5978 0, block, 1, 0, NULL);
5979 if (ret) {
5980 mlog_errno(ret);
5981 goto out_commit;
5982 }
5983
5984 inode->i_blocks = ocfs2_inode_sector_count(inode);
5985 }
5986
5987out_commit:
5988 ocfs2_commit_trans(osb, handle);
5989
5990out_unlock:
5991 if (data_ac)
5992 ocfs2_free_alloc_context(data_ac);
5993
5994out:
5995 if (pages) {
5996 ocfs2_unlock_and_free_pages(pages, num_pages);
5997 kfree(pages);
5998 }
5999
6000 return ret;
6001}
6002
5868/* 6003/*
5869 * It is expected, that by the time you call this function, 6004 * It is expected, that by the time you call this function,
5870 * inode->i_size and fe->i_size have been adjusted. 6005 * inode->i_size and fe->i_size have been adjusted.
@@ -6090,6 +6225,81 @@ bail:
6090 return status; 6225 return status;
6091} 6226}
6092 6227
6228/*
6229 * 'start' is inclusive, 'end' is not.
6230 */
6231int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
6232 unsigned int start, unsigned int end, int trunc)
6233{
6234 int ret;
6235 unsigned int numbytes;
6236 handle_t *handle;
6237 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6238 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
6239 struct ocfs2_inline_data *idata = &di->id2.i_data;
6240
6241 if (end > i_size_read(inode))
6242 end = i_size_read(inode);
6243
6244 BUG_ON(start >= end);
6245
6246 if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
6247 !(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL) ||
6248 !ocfs2_supports_inline_data(osb)) {
6249 ocfs2_error(inode->i_sb,
6250 "Inline data flags for inode %llu don't agree! "
6251 "Disk: 0x%x, Memory: 0x%x, Superblock: 0x%x\n",
6252 (unsigned long long)OCFS2_I(inode)->ip_blkno,
6253 le16_to_cpu(di->i_dyn_features),
6254 OCFS2_I(inode)->ip_dyn_features,
6255 osb->s_feature_incompat);
6256 ret = -EROFS;
6257 goto out;
6258 }
6259
6260 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
6261 if (IS_ERR(handle)) {
6262 ret = PTR_ERR(handle);
6263 mlog_errno(ret);
6264 goto out;
6265 }
6266
6267 ret = ocfs2_journal_access(handle, inode, di_bh,
6268 OCFS2_JOURNAL_ACCESS_WRITE);
6269 if (ret) {
6270 mlog_errno(ret);
6271 goto out_commit;
6272 }
6273
6274 numbytes = end - start;
6275 memset(idata->id_data + start, 0, numbytes);
6276
6277 /*
6278 * No need to worry about the data page here - it's been
6279 * truncated already and inline data doesn't need it for
6280 * pushing zero's to disk, so we'll let readpage pick it up
6281 * later.
6282 */
6283 if (trunc) {
6284 i_size_write(inode, start);
6285 di->i_size = cpu_to_le64(start);
6286 }
6287
6288 inode->i_blocks = ocfs2_inode_sector_count(inode);
6289 inode->i_ctime = inode->i_mtime = CURRENT_TIME;
6290
6291 di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
6292 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
6293
6294 ocfs2_journal_dirty(handle, di_bh);
6295
6296out_commit:
6297 ocfs2_commit_trans(osb, handle);
6298
6299out:
6300 return ret;
6301}
6302
6093static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc) 6303static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
6094{ 6304{
6095 /* 6305 /*
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 990df48ae8d3..42ff94bd8011 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -62,6 +62,11 @@ static inline int ocfs2_extend_meta_needed(struct ocfs2_dinode *fe)
62 return le16_to_cpu(fe->id2.i_list.l_tree_depth) + 2; 62 return le16_to_cpu(fe->id2.i_list.l_tree_depth) + 2;
63} 63}
64 64
65void ocfs2_dinode_new_extent_list(struct inode *inode, struct ocfs2_dinode *di);
66void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di);
67int ocfs2_convert_inline_data_to_extents(struct inode *inode,
68 struct buffer_head *di_bh);
69
65int ocfs2_truncate_log_init(struct ocfs2_super *osb); 70int ocfs2_truncate_log_init(struct ocfs2_super *osb);
66void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb); 71void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb);
67void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb, 72void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb,
@@ -115,6 +120,8 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
115 struct inode *inode, 120 struct inode *inode,
116 struct buffer_head *fe_bh, 121 struct buffer_head *fe_bh,
117 struct ocfs2_truncate_context *tc); 122 struct ocfs2_truncate_context *tc);
123int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
124 unsigned int start, unsigned int end, int trunc);
118 125
119int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el, 126int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el,
120 u32 cpos, struct buffer_head **leaf_bh); 127 u32 cpos, struct buffer_head **leaf_bh);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index f37f25c931f5..34d10452c56d 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -206,9 +206,70 @@ bail:
206 return err; 206 return err;
207} 207}
208 208
209int ocfs2_read_inline_data(struct inode *inode, struct page *page,
210 struct buffer_head *di_bh)
211{
212 void *kaddr;
213 unsigned int size;
214 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
215
216 if (!(le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_DATA_FL)) {
217 ocfs2_error(inode->i_sb, "Inode %llu lost inline data flag",
218 (unsigned long long)OCFS2_I(inode)->ip_blkno);
219 return -EROFS;
220 }
221
222 size = i_size_read(inode);
223
224 if (size > PAGE_CACHE_SIZE ||
225 size > ocfs2_max_inline_data(inode->i_sb)) {
226 ocfs2_error(inode->i_sb,
227 "Inode %llu has with inline data has bad size: %u",
228 (unsigned long long)OCFS2_I(inode)->ip_blkno, size);
229 return -EROFS;
230 }
231
232 kaddr = kmap_atomic(page, KM_USER0);
233 if (size)
234 memcpy(kaddr, di->id2.i_data.id_data, size);
235 /* Clear the remaining part of the page */
236 memset(kaddr + size, 0, PAGE_CACHE_SIZE - size);
237 flush_dcache_page(page);
238 kunmap_atomic(kaddr, KM_USER0);
239
240 SetPageUptodate(page);
241
242 return 0;
243}
244
245static int ocfs2_readpage_inline(struct inode *inode, struct page *page)
246{
247 int ret;
248 struct buffer_head *di_bh = NULL;
249 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
250
251 BUG_ON(!PageLocked(page));
252 BUG_ON(!OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
253
254 ret = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &di_bh,
255 OCFS2_BH_CACHED, inode);
256 if (ret) {
257 mlog_errno(ret);
258 goto out;
259 }
260
261 ret = ocfs2_read_inline_data(inode, page, di_bh);
262out:
263 unlock_page(page);
264
265 brelse(di_bh);
266 return ret;
267}
268
209static int ocfs2_readpage(struct file *file, struct page *page) 269static int ocfs2_readpage(struct file *file, struct page *page)
210{ 270{
211 struct inode *inode = page->mapping->host; 271 struct inode *inode = page->mapping->host;
272 struct ocfs2_inode_info *oi = OCFS2_I(inode);
212 loff_t start = (loff_t)page->index << PAGE_CACHE_SHIFT; 273 loff_t start = (loff_t)page->index << PAGE_CACHE_SHIFT;
213 int ret, unlock = 1; 274 int ret, unlock = 1;
214 275
@@ -222,7 +283,7 @@ static int ocfs2_readpage(struct file *file, struct page *page)
222 goto out; 283 goto out;
223 } 284 }
224 285
225 if (down_read_trylock(&OCFS2_I(inode)->ip_alloc_sem) == 0) { 286 if (down_read_trylock(&oi->ip_alloc_sem) == 0) {
226 ret = AOP_TRUNCATED_PAGE; 287 ret = AOP_TRUNCATED_PAGE;
227 goto out_meta_unlock; 288 goto out_meta_unlock;
228 } 289 }
@@ -252,7 +313,10 @@ static int ocfs2_readpage(struct file *file, struct page *page)
252 goto out_alloc; 313 goto out_alloc;
253 } 314 }
254 315
255 ret = block_read_full_page(page, ocfs2_get_block); 316 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
317 ret = ocfs2_readpage_inline(inode, page);
318 else
319 ret = block_read_full_page(page, ocfs2_get_block);
256 unlock = 0; 320 unlock = 0;
257 321
258 ocfs2_data_unlock(inode, 0); 322 ocfs2_data_unlock(inode, 0);
@@ -301,12 +365,8 @@ int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,
301{ 365{
302 int ret; 366 int ret;
303 367
304 down_read(&OCFS2_I(inode)->ip_alloc_sem);
305
306 ret = block_prepare_write(page, from, to, ocfs2_get_block); 368 ret = block_prepare_write(page, from, to, ocfs2_get_block);
307 369
308 up_read(&OCFS2_I(inode)->ip_alloc_sem);
309
310 return ret; 370 return ret;
311} 371}
312 372
@@ -401,7 +461,9 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
401 down_read(&OCFS2_I(inode)->ip_alloc_sem); 461 down_read(&OCFS2_I(inode)->ip_alloc_sem);
402 } 462 }
403 463
404 err = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL, NULL); 464 if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL))
465 err = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL,
466 NULL);
405 467
406 if (!INODE_JOURNAL(inode)) { 468 if (!INODE_JOURNAL(inode)) {
407 up_read(&OCFS2_I(inode)->ip_alloc_sem); 469 up_read(&OCFS2_I(inode)->ip_alloc_sem);
@@ -415,7 +477,6 @@ static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
415 goto bail; 477 goto bail;
416 } 478 }
417 479
418
419bail: 480bail:
420 status = err ? 0 : p_blkno; 481 status = err ? 0 : p_blkno;
421 482
@@ -570,6 +631,13 @@ static ssize_t ocfs2_direct_IO(int rw,
570 631
571 mlog_entry_void(); 632 mlog_entry_void();
572 633
634 /*
635 * Fallback to buffered I/O if we see an inode without
636 * extents.
637 */
638 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
639 return 0;
640
573 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) { 641 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) {
574 /* 642 /*
575 * We get PR data locks even for O_DIRECT. This 643 * We get PR data locks even for O_DIRECT. This
@@ -834,18 +902,22 @@ struct ocfs2_write_ctxt {
834 struct ocfs2_cached_dealloc_ctxt w_dealloc; 902 struct ocfs2_cached_dealloc_ctxt w_dealloc;
835}; 903};
836 904
837static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) 905void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)
838{ 906{
839 int i; 907 int i;
840 908
841 for(i = 0; i < wc->w_num_pages; i++) { 909 for(i = 0; i < num_pages; i++) {
842 if (wc->w_pages[i] == NULL) 910 if (pages[i]) {
843 continue; 911 unlock_page(pages[i]);
844 912 mark_page_accessed(pages[i]);
845 unlock_page(wc->w_pages[i]); 913 page_cache_release(pages[i]);
846 mark_page_accessed(wc->w_pages[i]); 914 }
847 page_cache_release(wc->w_pages[i]);
848 } 915 }
916}
917
918static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
919{
920 ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);
849 921
850 brelse(wc->w_di_bh); 922 brelse(wc->w_di_bh);
851 kfree(wc); 923 kfree(wc);
@@ -1360,6 +1432,160 @@ out:
1360 return ret; 1432 return ret;
1361} 1433}
1362 1434
1435static int ocfs2_write_begin_inline(struct address_space *mapping,
1436 struct inode *inode,
1437 struct ocfs2_write_ctxt *wc)
1438{
1439 int ret;
1440 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1441 struct page *page;
1442 handle_t *handle;
1443 struct ocfs2_dinode *di = (struct ocfs2_dinode *)wc->w_di_bh->b_data;
1444
1445 page = find_or_create_page(mapping, 0, GFP_NOFS);
1446 if (!page) {
1447 ret = -ENOMEM;
1448 mlog_errno(ret);
1449 goto out;
1450 }
1451 /*
1452 * If we don't set w_num_pages then this page won't get unlocked
1453 * and freed on cleanup of the write context.
1454 */
1455 wc->w_pages[0] = wc->w_target_page = page;
1456 wc->w_num_pages = 1;
1457
1458 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
1459 if (IS_ERR(handle)) {
1460 ret = PTR_ERR(handle);
1461 mlog_errno(ret);
1462 goto out;
1463 }
1464
1465 ret = ocfs2_journal_access(handle, inode, wc->w_di_bh,
1466 OCFS2_JOURNAL_ACCESS_WRITE);
1467 if (ret) {
1468 ocfs2_commit_trans(osb, handle);
1469
1470 mlog_errno(ret);
1471 goto out;
1472 }
1473
1474 if (!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL))
1475 ocfs2_set_inode_data_inline(inode, di);
1476
1477 if (!PageUptodate(page)) {
1478 ret = ocfs2_read_inline_data(inode, page, wc->w_di_bh);
1479 if (ret) {
1480 ocfs2_commit_trans(osb, handle);
1481
1482 goto out;
1483 }
1484 }
1485
1486 wc->w_handle = handle;
1487out:
1488 return ret;
1489}
1490
1491int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size)
1492{
1493 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1494
1495 if (new_size < le16_to_cpu(di->id2.i_data.id_count))
1496 return 1;
1497 return 0;
1498}
1499
1500static int ocfs2_try_to_write_inline_data(struct address_space *mapping,
1501 struct inode *inode, loff_t pos,
1502 unsigned len, struct page *mmap_page,
1503 struct ocfs2_write_ctxt *wc)
1504{
1505 int ret, written = 0;
1506 loff_t end = pos + len;
1507 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1508
1509 mlog(0, "Inode %llu, write of %u bytes at off %llu. features: 0x%x\n",
1510 (unsigned long long)oi->ip_blkno, len, (unsigned long long)pos,
1511 oi->ip_dyn_features);
1512
1513 /*
1514 * Handle inodes which already have inline data 1st.
1515 */
1516 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1517 if (mmap_page == NULL &&
1518 ocfs2_size_fits_inline_data(wc->w_di_bh, end))
1519 goto do_inline_write;
1520
1521 /*
1522 * The write won't fit - we have to give this inode an
1523 * inline extent list now.
1524 */
1525 ret = ocfs2_convert_inline_data_to_extents(inode, wc->w_di_bh);
1526 if (ret)
1527 mlog_errno(ret);
1528 goto out;
1529 }
1530
1531 /*
1532 * Check whether the inode can accept inline data.
1533 */
1534 if (oi->ip_clusters != 0 || i_size_read(inode) != 0)
1535 return 0;
1536
1537 /*
1538 * Check whether the write can fit.
1539 */
1540 if (mmap_page || end > ocfs2_max_inline_data(inode->i_sb))
1541 return 0;
1542
1543do_inline_write:
1544 ret = ocfs2_write_begin_inline(mapping, inode, wc);
1545 if (ret) {
1546 mlog_errno(ret);
1547 goto out;
1548 }
1549
1550 /*
1551 * This signals to the caller that the data can be written
1552 * inline.
1553 */
1554 written = 1;
1555out:
1556 return written ? written : ret;
1557}
1558
1559/*
1560 * This function only does anything for file systems which can't
1561 * handle sparse files.
1562 *
1563 * What we want to do here is fill in any hole between the current end
1564 * of allocation and the end of our write. That way the rest of the
1565 * write path can treat it as an non-allocating write, which has no
1566 * special case code for sparse/nonsparse files.
1567 */
1568static int ocfs2_expand_nonsparse_inode(struct inode *inode, loff_t pos,
1569 unsigned len,
1570 struct ocfs2_write_ctxt *wc)
1571{
1572 int ret;
1573 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1574 loff_t newsize = pos + len;
1575
1576 if (ocfs2_sparse_alloc(osb))
1577 return 0;
1578
1579 if (newsize <= i_size_read(inode))
1580 return 0;
1581
1582 ret = ocfs2_extend_no_holes(inode, newsize, newsize - len);
1583 if (ret)
1584 mlog_errno(ret);
1585
1586 return ret;
1587}
1588
1363int ocfs2_write_begin_nolock(struct address_space *mapping, 1589int ocfs2_write_begin_nolock(struct address_space *mapping,
1364 loff_t pos, unsigned len, unsigned flags, 1590 loff_t pos, unsigned len, unsigned flags,
1365 struct page **pagep, void **fsdata, 1591 struct page **pagep, void **fsdata,
@@ -1381,6 +1607,25 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1381 return ret; 1607 return ret;
1382 } 1608 }
1383 1609
1610 if (ocfs2_supports_inline_data(osb)) {
1611 ret = ocfs2_try_to_write_inline_data(mapping, inode, pos, len,
1612 mmap_page, wc);
1613 if (ret == 1) {
1614 ret = 0;
1615 goto success;
1616 }
1617 if (ret < 0) {
1618 mlog_errno(ret);
1619 goto out;
1620 }
1621 }
1622
1623 ret = ocfs2_expand_nonsparse_inode(inode, pos, len, wc);
1624 if (ret) {
1625 mlog_errno(ret);
1626 goto out;
1627 }
1628
1384 ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc, 1629 ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc,
1385 &extents_to_split); 1630 &extents_to_split);
1386 if (ret) { 1631 if (ret) {
@@ -1462,6 +1707,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1462 if (meta_ac) 1707 if (meta_ac)
1463 ocfs2_free_alloc_context(meta_ac); 1708 ocfs2_free_alloc_context(meta_ac);
1464 1709
1710success:
1465 *pagep = wc->w_target_page; 1711 *pagep = wc->w_target_page;
1466 *fsdata = wc; 1712 *fsdata = wc;
1467 return 0; 1713 return 0;
@@ -1529,6 +1775,31 @@ out_fail:
1529 return ret; 1775 return ret;
1530} 1776}
1531 1777
1778static void ocfs2_write_end_inline(struct inode *inode, loff_t pos,
1779 unsigned len, unsigned *copied,
1780 struct ocfs2_dinode *di,
1781 struct ocfs2_write_ctxt *wc)
1782{
1783 void *kaddr;
1784
1785 if (unlikely(*copied < len)) {
1786 if (!PageUptodate(wc->w_target_page)) {
1787 *copied = 0;
1788 return;
1789 }
1790 }
1791
1792 kaddr = kmap_atomic(wc->w_target_page, KM_USER0);
1793 memcpy(di->id2.i_data.id_data + pos, kaddr + pos, *copied);
1794 kunmap_atomic(kaddr, KM_USER0);
1795
1796 mlog(0, "Data written to inode at offset %llu. "
1797 "id_count = %u, copied = %u, i_dyn_features = 0x%x\n",
1798 (unsigned long long)pos, *copied,
1799 le16_to_cpu(di->id2.i_data.id_count),
1800 le16_to_cpu(di->i_dyn_features));
1801}
1802
1532int ocfs2_write_end_nolock(struct address_space *mapping, 1803int ocfs2_write_end_nolock(struct address_space *mapping,
1533 loff_t pos, unsigned len, unsigned copied, 1804 loff_t pos, unsigned len, unsigned copied,
1534 struct page *page, void *fsdata) 1805 struct page *page, void *fsdata)
@@ -1542,6 +1813,11 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
1542 handle_t *handle = wc->w_handle; 1813 handle_t *handle = wc->w_handle;
1543 struct page *tmppage; 1814 struct page *tmppage;
1544 1815
1816 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1817 ocfs2_write_end_inline(inode, pos, len, &copied, di, wc);
1818 goto out_write_size;
1819 }
1820
1545 if (unlikely(copied < len)) { 1821 if (unlikely(copied < len)) {
1546 if (!PageUptodate(wc->w_target_page)) 1822 if (!PageUptodate(wc->w_target_page))
1547 copied = 0; 1823 copied = 0;
@@ -1579,6 +1855,7 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
1579 block_commit_write(tmppage, from, to); 1855 block_commit_write(tmppage, from, to);
1580 } 1856 }
1581 1857
1858out_write_size:
1582 pos += copied; 1859 pos += copied;
1583 if (pos > inode->i_size) { 1860 if (pos > inode->i_size) {
1584 i_size_write(inode, pos); 1861 i_size_write(inode, pos);
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 389579bd64e3..113560877dbb 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -34,6 +34,8 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
34 struct inode *inode, unsigned int from, 34 struct inode *inode, unsigned int from,
35 unsigned int to, int new); 35 unsigned int to, int new);
36 36
37void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages);
38
37int walk_page_buffers( handle_t *handle, 39int walk_page_buffers( handle_t *handle,
38 struct buffer_head *head, 40 struct buffer_head *head,
39 unsigned from, 41 unsigned from,
@@ -59,6 +61,10 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
59 struct page **pagep, void **fsdata, 61 struct page **pagep, void **fsdata,
60 struct buffer_head *di_bh, struct page *mmap_page); 62 struct buffer_head *di_bh, struct page *mmap_page);
61 63
64int ocfs2_read_inline_data(struct inode *inode, struct page *page,
65 struct buffer_head *di_bh);
66int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size);
67
62/* all ocfs2_dio_end_io()'s fault */ 68/* all ocfs2_dio_end_io()'s fault */
63#define ocfs2_iocb_is_rw_locked(iocb) \ 69#define ocfs2_iocb_is_rw_locked(iocb) \
64 test_bit(0, (unsigned long *)&iocb->private) 70 test_bit(0, (unsigned long *)&iocb->private)
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 0d5fdde959c8..7453b70c1a19 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -55,10 +55,16 @@
55#include "journal.h" 55#include "journal.h"
56#include "namei.h" 56#include "namei.h"
57#include "suballoc.h" 57#include "suballoc.h"
58#include "super.h"
58#include "uptodate.h" 59#include "uptodate.h"
59 60
60#include "buffer_head_io.h" 61#include "buffer_head_io.h"
61 62
63#define NAMEI_RA_CHUNKS 2
64#define NAMEI_RA_BLOCKS 4
65#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
66#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
67
62static unsigned char ocfs2_filetype_table[] = { 68static unsigned char ocfs2_filetype_table[] = {
63 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK 69 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
64}; 70};
@@ -66,12 +72,614 @@ static unsigned char ocfs2_filetype_table[] = {
66static int ocfs2_extend_dir(struct ocfs2_super *osb, 72static int ocfs2_extend_dir(struct ocfs2_super *osb,
67 struct inode *dir, 73 struct inode *dir,
68 struct buffer_head *parent_fe_bh, 74 struct buffer_head *parent_fe_bh,
75 unsigned int blocks_wanted,
69 struct buffer_head **new_de_bh); 76 struct buffer_head **new_de_bh);
77static int ocfs2_do_extend_dir(struct super_block *sb,
78 handle_t *handle,
79 struct inode *dir,
80 struct buffer_head *parent_fe_bh,
81 struct ocfs2_alloc_context *data_ac,
82 struct ocfs2_alloc_context *meta_ac,
83 struct buffer_head **new_bh);
84
70/* 85/*
71 * ocfs2_readdir() 86 * bh passed here can be an inode block or a dir data block, depending
87 * on the inode inline data flag.
88 */
89static int ocfs2_check_dir_entry(struct inode * dir,
90 struct ocfs2_dir_entry * de,
91 struct buffer_head * bh,
92 unsigned long offset)
93{
94 const char *error_msg = NULL;
95 const int rlen = le16_to_cpu(de->rec_len);
96
97 if (rlen < OCFS2_DIR_REC_LEN(1))
98 error_msg = "rec_len is smaller than minimal";
99 else if (rlen % 4 != 0)
100 error_msg = "rec_len % 4 != 0";
101 else if (rlen < OCFS2_DIR_REC_LEN(de->name_len))
102 error_msg = "rec_len is too small for name_len";
103 else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
104 error_msg = "directory entry across blocks";
105
106 if (error_msg != NULL)
107 mlog(ML_ERROR, "bad entry in directory #%llu: %s - "
108 "offset=%lu, inode=%llu, rec_len=%d, name_len=%d\n",
109 (unsigned long long)OCFS2_I(dir)->ip_blkno, error_msg,
110 offset, (unsigned long long)le64_to_cpu(de->inode), rlen,
111 de->name_len);
112 return error_msg == NULL ? 1 : 0;
113}
114
115static inline int ocfs2_match(int len,
116 const char * const name,
117 struct ocfs2_dir_entry *de)
118{
119 if (len != de->name_len)
120 return 0;
121 if (!de->inode)
122 return 0;
123 return !memcmp(name, de->name, len);
124}
125
126/*
127 * Returns 0 if not found, -1 on failure, and 1 on success
128 */
129static int inline ocfs2_search_dirblock(struct buffer_head *bh,
130 struct inode *dir,
131 const char *name, int namelen,
132 unsigned long offset,
133 char *first_de,
134 unsigned int bytes,
135 struct ocfs2_dir_entry **res_dir)
136{
137 struct ocfs2_dir_entry *de;
138 char *dlimit, *de_buf;
139 int de_len;
140 int ret = 0;
141
142 mlog_entry_void();
143
144 de_buf = first_de;
145 dlimit = de_buf + bytes;
146
147 while (de_buf < dlimit) {
148 /* this code is executed quadratically often */
149 /* do minimal checking `by hand' */
150
151 de = (struct ocfs2_dir_entry *) de_buf;
152
153 if (de_buf + namelen <= dlimit &&
154 ocfs2_match(namelen, name, de)) {
155 /* found a match - just to be sure, do a full check */
156 if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
157 ret = -1;
158 goto bail;
159 }
160 *res_dir = de;
161 ret = 1;
162 goto bail;
163 }
164
165 /* prevent looping on a bad block */
166 de_len = le16_to_cpu(de->rec_len);
167 if (de_len <= 0) {
168 ret = -1;
169 goto bail;
170 }
171
172 de_buf += de_len;
173 offset += de_len;
174 }
175
176bail:
177 mlog_exit(ret);
178 return ret;
179}
180
181static struct buffer_head *ocfs2_find_entry_id(const char *name,
182 int namelen,
183 struct inode *dir,
184 struct ocfs2_dir_entry **res_dir)
185{
186 int ret, found;
187 struct buffer_head *di_bh = NULL;
188 struct ocfs2_dinode *di;
189 struct ocfs2_inline_data *data;
190
191 ret = ocfs2_read_block(OCFS2_SB(dir->i_sb), OCFS2_I(dir)->ip_blkno,
192 &di_bh, OCFS2_BH_CACHED, dir);
193 if (ret) {
194 mlog_errno(ret);
195 goto out;
196 }
197
198 di = (struct ocfs2_dinode *)di_bh->b_data;
199 data = &di->id2.i_data;
200
201 found = ocfs2_search_dirblock(di_bh, dir, name, namelen, 0,
202 data->id_data, i_size_read(dir), res_dir);
203 if (found == 1)
204 return di_bh;
205
206 brelse(di_bh);
207out:
208 return NULL;
209}
210
211struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen,
212 struct inode *dir,
213 struct ocfs2_dir_entry **res_dir)
214{
215 struct super_block *sb;
216 struct buffer_head *bh_use[NAMEI_RA_SIZE];
217 struct buffer_head *bh, *ret = NULL;
218 unsigned long start, block, b;
219 int ra_max = 0; /* Number of bh's in the readahead
220 buffer, bh_use[] */
221 int ra_ptr = 0; /* Current index into readahead
222 buffer */
223 int num = 0;
224 int nblocks, i, err;
225
226 mlog_entry_void();
227
228 sb = dir->i_sb;
229
230 nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
231 start = OCFS2_I(dir)->ip_dir_start_lookup;
232 if (start >= nblocks)
233 start = 0;
234 block = start;
235
236restart:
237 do {
238 /*
239 * We deal with the read-ahead logic here.
240 */
241 if (ra_ptr >= ra_max) {
242 /* Refill the readahead buffer */
243 ra_ptr = 0;
244 b = block;
245 for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
246 /*
247 * Terminate if we reach the end of the
248 * directory and must wrap, or if our
249 * search has finished at this block.
250 */
251 if (b >= nblocks || (num && block == start)) {
252 bh_use[ra_max] = NULL;
253 break;
254 }
255 num++;
256
257 bh = ocfs2_bread(dir, b++, &err, 1);
258 bh_use[ra_max] = bh;
259 }
260 }
261 if ((bh = bh_use[ra_ptr++]) == NULL)
262 goto next;
263 wait_on_buffer(bh);
264 if (!buffer_uptodate(bh)) {
265 /* read error, skip block & hope for the best */
266 ocfs2_error(dir->i_sb, "reading directory %llu, "
267 "offset %lu\n",
268 (unsigned long long)OCFS2_I(dir)->ip_blkno,
269 block);
270 brelse(bh);
271 goto next;
272 }
273 i = ocfs2_search_dirblock(bh, dir, name, namelen,
274 block << sb->s_blocksize_bits,
275 bh->b_data, sb->s_blocksize,
276 res_dir);
277 if (i == 1) {
278 OCFS2_I(dir)->ip_dir_start_lookup = block;
279 ret = bh;
280 goto cleanup_and_exit;
281 } else {
282 brelse(bh);
283 if (i < 0)
284 goto cleanup_and_exit;
285 }
286 next:
287 if (++block >= nblocks)
288 block = 0;
289 } while (block != start);
290
291 /*
292 * If the directory has grown while we were searching, then
293 * search the last part of the directory before giving up.
294 */
295 block = nblocks;
296 nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
297 if (block < nblocks) {
298 start = 0;
299 goto restart;
300 }
301
302cleanup_and_exit:
303 /* Clean up the read-ahead blocks */
304 for (; ra_ptr < ra_max; ra_ptr++)
305 brelse(bh_use[ra_ptr]);
306
307 mlog_exit_ptr(ret);
308 return ret;
309}
310
311/*
312 * Try to find an entry of the provided name within 'dir'.
72 * 313 *
314 * If nothing was found, NULL is returned. Otherwise, a buffer_head
315 * and pointer to the dir entry are passed back.
316 *
317 * Caller can NOT assume anything about the contents of the
318 * buffer_head - it is passed back only so that it can be passed into
319 * any one of the manipulation functions (add entry, delete entry,
320 * etc). As an example, bh in the extent directory case is a data
321 * block, in the inline-data case it actually points to an inode.
73 */ 322 */
74int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) 323struct buffer_head *ocfs2_find_entry(const char *name, int namelen,
324 struct inode *dir,
325 struct ocfs2_dir_entry **res_dir)
326{
327 *res_dir = NULL;
328
329 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
330 return ocfs2_find_entry_id(name, namelen, dir, res_dir);
331
332 return ocfs2_find_entry_el(name, namelen, dir, res_dir);
333}
334
335/*
336 * Update inode number and type of a previously found directory entry.
337 */
338int ocfs2_update_entry(struct inode *dir, handle_t *handle,
339 struct buffer_head *de_bh, struct ocfs2_dir_entry *de,
340 struct inode *new_entry_inode)
341{
342 int ret;
343
344 /*
345 * The same code works fine for both inline-data and extent
346 * based directories, so no need to split this up.
347 */
348
349 ret = ocfs2_journal_access(handle, dir, de_bh,
350 OCFS2_JOURNAL_ACCESS_WRITE);
351 if (ret) {
352 mlog_errno(ret);
353 goto out;
354 }
355
356 de->inode = cpu_to_le64(OCFS2_I(new_entry_inode)->ip_blkno);
357 ocfs2_set_de_type(de, new_entry_inode->i_mode);
358
359 ocfs2_journal_dirty(handle, de_bh);
360
361out:
362 return ret;
363}
364
365static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
366 struct ocfs2_dir_entry *de_del,
367 struct buffer_head *bh, char *first_de,
368 unsigned int bytes)
369{
370 struct ocfs2_dir_entry *de, *pde;
371 int i, status = -ENOENT;
372
373 mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh);
374
375 i = 0;
376 pde = NULL;
377 de = (struct ocfs2_dir_entry *) first_de;
378 while (i < bytes) {
379 if (!ocfs2_check_dir_entry(dir, de, bh, i)) {
380 status = -EIO;
381 mlog_errno(status);
382 goto bail;
383 }
384 if (de == de_del) {
385 status = ocfs2_journal_access(handle, dir, bh,
386 OCFS2_JOURNAL_ACCESS_WRITE);
387 if (status < 0) {
388 status = -EIO;
389 mlog_errno(status);
390 goto bail;
391 }
392 if (pde)
393 pde->rec_len =
394 cpu_to_le16(le16_to_cpu(pde->rec_len) +
395 le16_to_cpu(de->rec_len));
396 else
397 de->inode = 0;
398 dir->i_version++;
399 status = ocfs2_journal_dirty(handle, bh);
400 goto bail;
401 }
402 i += le16_to_cpu(de->rec_len);
403 pde = de;
404 de = (struct ocfs2_dir_entry *)((char *)de + le16_to_cpu(de->rec_len));
405 }
406bail:
407 mlog_exit(status);
408 return status;
409}
410
411static inline int ocfs2_delete_entry_id(handle_t *handle,
412 struct inode *dir,
413 struct ocfs2_dir_entry *de_del,
414 struct buffer_head *bh)
415{
416 int ret;
417 struct buffer_head *di_bh = NULL;
418 struct ocfs2_dinode *di;
419 struct ocfs2_inline_data *data;
420
421 ret = ocfs2_read_block(OCFS2_SB(dir->i_sb), OCFS2_I(dir)->ip_blkno,
422 &di_bh, OCFS2_BH_CACHED, dir);
423 if (ret) {
424 mlog_errno(ret);
425 goto out;
426 }
427
428 di = (struct ocfs2_dinode *)di_bh->b_data;
429 data = &di->id2.i_data;
430
431 ret = __ocfs2_delete_entry(handle, dir, de_del, bh, data->id_data,
432 i_size_read(dir));
433
434 brelse(di_bh);
435out:
436 return ret;
437}
438
439static inline int ocfs2_delete_entry_el(handle_t *handle,
440 struct inode *dir,
441 struct ocfs2_dir_entry *de_del,
442 struct buffer_head *bh)
443{
444 return __ocfs2_delete_entry(handle, dir, de_del, bh, bh->b_data,
445 bh->b_size);
446}
447
448/*
449 * ocfs2_delete_entry deletes a directory entry by merging it with the
450 * previous entry
451 */
452int ocfs2_delete_entry(handle_t *handle,
453 struct inode *dir,
454 struct ocfs2_dir_entry *de_del,
455 struct buffer_head *bh)
456{
457 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
458 return ocfs2_delete_entry_id(handle, dir, de_del, bh);
459
460 return ocfs2_delete_entry_el(handle, dir, de_del, bh);
461}
462
463/*
464 * Check whether 'de' has enough room to hold an entry of
465 * 'new_rec_len' bytes.
466 */
467static inline int ocfs2_dirent_would_fit(struct ocfs2_dir_entry *de,
468 unsigned int new_rec_len)
469{
470 unsigned int de_really_used;
471
472 /* Check whether this is an empty record with enough space */
473 if (le64_to_cpu(de->inode) == 0 &&
474 le16_to_cpu(de->rec_len) >= new_rec_len)
475 return 1;
476
477 /*
478 * Record might have free space at the end which we can
479 * use.
480 */
481 de_really_used = OCFS2_DIR_REC_LEN(de->name_len);
482 if (le16_to_cpu(de->rec_len) >= (de_really_used + new_rec_len))
483 return 1;
484
485 return 0;
486}
487
488/* we don't always have a dentry for what we want to add, so people
489 * like orphan dir can call this instead.
490 *
491 * If you pass me insert_bh, I'll skip the search of the other dir
492 * blocks and put the record in there.
493 */
494int __ocfs2_add_entry(handle_t *handle,
495 struct inode *dir,
496 const char *name, int namelen,
497 struct inode *inode, u64 blkno,
498 struct buffer_head *parent_fe_bh,
499 struct buffer_head *insert_bh)
500{
501 unsigned long offset;
502 unsigned short rec_len;
503 struct ocfs2_dir_entry *de, *de1;
504 struct ocfs2_dinode *di = (struct ocfs2_dinode *)parent_fe_bh->b_data;
505 struct super_block *sb = dir->i_sb;
506 int retval, status;
507 unsigned int size = sb->s_blocksize;
508 char *data_start = insert_bh->b_data;
509
510 mlog_entry_void();
511
512 if (!namelen)
513 return -EINVAL;
514
515 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
516 data_start = di->id2.i_data.id_data;
517 size = i_size_read(dir);
518
519 BUG_ON(insert_bh != parent_fe_bh);
520 }
521
522 rec_len = OCFS2_DIR_REC_LEN(namelen);
523 offset = 0;
524 de = (struct ocfs2_dir_entry *) data_start;
525 while (1) {
526 BUG_ON((char *)de >= (size + data_start));
527
528 /* These checks should've already been passed by the
529 * prepare function, but I guess we can leave them
530 * here anyway. */
531 if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) {
532 retval = -ENOENT;
533 goto bail;
534 }
535 if (ocfs2_match(namelen, name, de)) {
536 retval = -EEXIST;
537 goto bail;
538 }
539
540 if (ocfs2_dirent_would_fit(de, rec_len)) {
541 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
542 retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
543 if (retval < 0) {
544 mlog_errno(retval);
545 goto bail;
546 }
547
548 status = ocfs2_journal_access(handle, dir, insert_bh,
549 OCFS2_JOURNAL_ACCESS_WRITE);
550 /* By now the buffer is marked for journaling */
551 offset += le16_to_cpu(de->rec_len);
552 if (le64_to_cpu(de->inode)) {
553 de1 = (struct ocfs2_dir_entry *)((char *) de +
554 OCFS2_DIR_REC_LEN(de->name_len));
555 de1->rec_len =
556 cpu_to_le16(le16_to_cpu(de->rec_len) -
557 OCFS2_DIR_REC_LEN(de->name_len));
558 de->rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
559 de = de1;
560 }
561 de->file_type = OCFS2_FT_UNKNOWN;
562 if (blkno) {
563 de->inode = cpu_to_le64(blkno);
564 ocfs2_set_de_type(de, inode->i_mode);
565 } else
566 de->inode = 0;
567 de->name_len = namelen;
568 memcpy(de->name, name, namelen);
569
570 dir->i_version++;
571 status = ocfs2_journal_dirty(handle, insert_bh);
572 retval = 0;
573 goto bail;
574 }
575 offset += le16_to_cpu(de->rec_len);
576 de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
577 }
578
579 /* when you think about it, the assert above should prevent us
580 * from ever getting here. */
581 retval = -ENOSPC;
582bail:
583
584 mlog_exit(retval);
585 return retval;
586}
587
588static int ocfs2_dir_foreach_blk_id(struct inode *inode,
589 unsigned long *f_version,
590 loff_t *f_pos, void *priv,
591 filldir_t filldir, int *filldir_err)
592{
593 int ret, i, filldir_ret;
594 unsigned long offset = *f_pos;
595 struct buffer_head *di_bh = NULL;
596 struct ocfs2_dinode *di;
597 struct ocfs2_inline_data *data;
598 struct ocfs2_dir_entry *de;
599
600 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno,
601 &di_bh, OCFS2_BH_CACHED, inode);
602 if (ret) {
603 mlog(ML_ERROR, "Unable to read inode block for dir %llu\n",
604 (unsigned long long)OCFS2_I(inode)->ip_blkno);
605 goto out;
606 }
607
608 di = (struct ocfs2_dinode *)di_bh->b_data;
609 data = &di->id2.i_data;
610
611 while (*f_pos < i_size_read(inode)) {
612revalidate:
613 /* If the dir block has changed since the last call to
614 * readdir(2), then we might be pointing to an invalid
615 * dirent right now. Scan from the start of the block
616 * to make sure. */
617 if (*f_version != inode->i_version) {
618 for (i = 0; i < i_size_read(inode) && i < offset; ) {
619 de = (struct ocfs2_dir_entry *)
620 (data->id_data + i);
621 /* It's too expensive to do a full
622 * dirent test each time round this
623 * loop, but we do have to test at
624 * least that it is non-zero. A
625 * failure will be detected in the
626 * dirent test below. */
627 if (le16_to_cpu(de->rec_len) <
628 OCFS2_DIR_REC_LEN(1))
629 break;
630 i += le16_to_cpu(de->rec_len);
631 }
632 *f_pos = offset = i;
633 *f_version = inode->i_version;
634 }
635
636 de = (struct ocfs2_dir_entry *) (data->id_data + *f_pos);
637 if (!ocfs2_check_dir_entry(inode, de, di_bh, *f_pos)) {
638 /* On error, skip the f_pos to the end. */
639 *f_pos = i_size_read(inode);
640 goto out;
641 }
642 offset += le16_to_cpu(de->rec_len);
643 if (le64_to_cpu(de->inode)) {
644 /* We might block in the next section
645 * if the data destination is
646 * currently swapped out. So, use a
647 * version stamp to detect whether or
648 * not the directory has been modified
649 * during the copy operation.
650 */
651 unsigned long version = *f_version;
652 unsigned char d_type = DT_UNKNOWN;
653
654 if (de->file_type < OCFS2_FT_MAX)
655 d_type = ocfs2_filetype_table[de->file_type];
656
657 filldir_ret = filldir(priv, de->name,
658 de->name_len,
659 *f_pos,
660 le64_to_cpu(de->inode),
661 d_type);
662 if (filldir_ret) {
663 if (filldir_err)
664 *filldir_err = filldir_ret;
665 break;
666 }
667 if (version != *f_version)
668 goto revalidate;
669 }
670 *f_pos += le16_to_cpu(de->rec_len);
671 }
672
673out:
674 brelse(di_bh);
675
676 return 0;
677}
678
679static int ocfs2_dir_foreach_blk_el(struct inode *inode,
680 unsigned long *f_version,
681 loff_t *f_pos, void *priv,
682 filldir_t filldir, int *filldir_err)
75{ 683{
76 int error = 0; 684 int error = 0;
77 unsigned long offset, blk, last_ra_blk = 0; 685 unsigned long offset, blk, last_ra_blk = 0;
@@ -79,45 +687,23 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
79 struct buffer_head * bh, * tmp; 687 struct buffer_head * bh, * tmp;
80 struct ocfs2_dir_entry * de; 688 struct ocfs2_dir_entry * de;
81 int err; 689 int err;
82 struct inode *inode = filp->f_path.dentry->d_inode;
83 struct super_block * sb = inode->i_sb; 690 struct super_block * sb = inode->i_sb;
84 unsigned int ra_sectors = 16; 691 unsigned int ra_sectors = 16;
85 int lock_level = 0;
86
87 mlog_entry("dirino=%llu\n",
88 (unsigned long long)OCFS2_I(inode)->ip_blkno);
89 692
90 stored = 0; 693 stored = 0;
91 bh = NULL; 694 bh = NULL;
92 695
93 error = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level); 696 offset = (*f_pos) & (sb->s_blocksize - 1);
94 if (lock_level && error >= 0) {
95 /* We release EX lock which used to update atime
96 * and get PR lock again to reduce contention
97 * on commonly accessed directories. */
98 ocfs2_meta_unlock(inode, 1);
99 lock_level = 0;
100 error = ocfs2_meta_lock(inode, NULL, 0);
101 }
102 if (error < 0) {
103 if (error != -ENOENT)
104 mlog_errno(error);
105 /* we haven't got any yet, so propagate the error. */
106 stored = error;
107 goto bail_nolock;
108 }
109 697
110 offset = filp->f_pos & (sb->s_blocksize - 1); 698 while (!error && !stored && *f_pos < i_size_read(inode)) {
111 699 blk = (*f_pos) >> sb->s_blocksize_bits;
112 while (!error && !stored && filp->f_pos < i_size_read(inode)) {
113 blk = (filp->f_pos) >> sb->s_blocksize_bits;
114 bh = ocfs2_bread(inode, blk, &err, 0); 700 bh = ocfs2_bread(inode, blk, &err, 0);
115 if (!bh) { 701 if (!bh) {
116 mlog(ML_ERROR, 702 mlog(ML_ERROR,
117 "directory #%llu contains a hole at offset %lld\n", 703 "directory #%llu contains a hole at offset %lld\n",
118 (unsigned long long)OCFS2_I(inode)->ip_blkno, 704 (unsigned long long)OCFS2_I(inode)->ip_blkno,
119 filp->f_pos); 705 *f_pos);
120 filp->f_pos += sb->s_blocksize - offset; 706 *f_pos += sb->s_blocksize - offset;
121 continue; 707 continue;
122 } 708 }
123 709
@@ -143,7 +729,7 @@ revalidate:
143 * readdir(2), then we might be pointing to an invalid 729 * readdir(2), then we might be pointing to an invalid
144 * dirent right now. Scan from the start of the block 730 * dirent right now. Scan from the start of the block
145 * to make sure. */ 731 * to make sure. */
146 if (filp->f_version != inode->i_version) { 732 if (*f_version != inode->i_version) {
147 for (i = 0; i < sb->s_blocksize && i < offset; ) { 733 for (i = 0; i < sb->s_blocksize && i < offset; ) {
148 de = (struct ocfs2_dir_entry *) (bh->b_data + i); 734 de = (struct ocfs2_dir_entry *) (bh->b_data + i);
149 /* It's too expensive to do a full 735 /* It's too expensive to do a full
@@ -158,21 +744,20 @@ revalidate:
158 i += le16_to_cpu(de->rec_len); 744 i += le16_to_cpu(de->rec_len);
159 } 745 }
160 offset = i; 746 offset = i;
161 filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) 747 *f_pos = ((*f_pos) & ~(sb->s_blocksize - 1))
162 | offset; 748 | offset;
163 filp->f_version = inode->i_version; 749 *f_version = inode->i_version;
164 } 750 }
165 751
166 while (!error && filp->f_pos < i_size_read(inode) 752 while (!error && *f_pos < i_size_read(inode)
167 && offset < sb->s_blocksize) { 753 && offset < sb->s_blocksize) {
168 de = (struct ocfs2_dir_entry *) (bh->b_data + offset); 754 de = (struct ocfs2_dir_entry *) (bh->b_data + offset);
169 if (!ocfs2_check_dir_entry(inode, de, bh, offset)) { 755 if (!ocfs2_check_dir_entry(inode, de, bh, offset)) {
170 /* On error, skip the f_pos to the 756 /* On error, skip the f_pos to the
171 next block. */ 757 next block. */
172 filp->f_pos = (filp->f_pos | 758 *f_pos = ((*f_pos) | (sb->s_blocksize - 1)) + 1;
173 (sb->s_blocksize - 1)) + 1;
174 brelse(bh); 759 brelse(bh);
175 goto bail; 760 goto out;
176 } 761 }
177 offset += le16_to_cpu(de->rec_len); 762 offset += le16_to_cpu(de->rec_len);
178 if (le64_to_cpu(de->inode)) { 763 if (le64_to_cpu(de->inode)) {
@@ -183,36 +768,109 @@ revalidate:
183 * not the directory has been modified 768 * not the directory has been modified
184 * during the copy operation. 769 * during the copy operation.
185 */ 770 */
186 unsigned long version = filp->f_version; 771 unsigned long version = *f_version;
187 unsigned char d_type = DT_UNKNOWN; 772 unsigned char d_type = DT_UNKNOWN;
188 773
189 if (de->file_type < OCFS2_FT_MAX) 774 if (de->file_type < OCFS2_FT_MAX)
190 d_type = ocfs2_filetype_table[de->file_type]; 775 d_type = ocfs2_filetype_table[de->file_type];
191 error = filldir(dirent, de->name, 776 error = filldir(priv, de->name,
192 de->name_len, 777 de->name_len,
193 filp->f_pos, 778 *f_pos,
194 ino_from_blkno(sb, le64_to_cpu(de->inode)), 779 le64_to_cpu(de->inode),
195 d_type); 780 d_type);
196 if (error) 781 if (error) {
782 if (filldir_err)
783 *filldir_err = error;
197 break; 784 break;
198 if (version != filp->f_version) 785 }
786 if (version != *f_version)
199 goto revalidate; 787 goto revalidate;
200 stored ++; 788 stored ++;
201 } 789 }
202 filp->f_pos += le16_to_cpu(de->rec_len); 790 *f_pos += le16_to_cpu(de->rec_len);
203 } 791 }
204 offset = 0; 792 offset = 0;
205 brelse(bh); 793 brelse(bh);
206 } 794 }
207 795
208 stored = 0; 796 stored = 0;
209bail: 797out:
798 return stored;
799}
800
801static int ocfs2_dir_foreach_blk(struct inode *inode, unsigned long *f_version,
802 loff_t *f_pos, void *priv, filldir_t filldir,
803 int *filldir_err)
804{
805 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
806 return ocfs2_dir_foreach_blk_id(inode, f_version, f_pos, priv,
807 filldir, filldir_err);
808
809 return ocfs2_dir_foreach_blk_el(inode, f_version, f_pos, priv, filldir,
810 filldir_err);
811}
812
813/*
814 * This is intended to be called from inside other kernel functions,
815 * so we fake some arguments.
816 */
817int ocfs2_dir_foreach(struct inode *inode, loff_t *f_pos, void *priv,
818 filldir_t filldir)
819{
820 int ret = 0, filldir_err = 0;
821 unsigned long version = inode->i_version;
822
823 while (*f_pos < i_size_read(inode)) {
824 ret = ocfs2_dir_foreach_blk(inode, &version, f_pos, priv,
825 filldir, &filldir_err);
826 if (ret || filldir_err)
827 break;
828 }
829
830 if (ret > 0)
831 ret = -EIO;
832
833 return 0;
834}
835
836/*
837 * ocfs2_readdir()
838 *
839 */
840int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir)
841{
842 int error = 0;
843 struct inode *inode = filp->f_path.dentry->d_inode;
844 int lock_level = 0;
845
846 mlog_entry("dirino=%llu\n",
847 (unsigned long long)OCFS2_I(inode)->ip_blkno);
848
849 error = ocfs2_meta_lock_atime(inode, filp->f_vfsmnt, &lock_level);
850 if (lock_level && error >= 0) {
851 /* We release EX lock which used to update atime
852 * and get PR lock again to reduce contention
853 * on commonly accessed directories. */
854 ocfs2_meta_unlock(inode, 1);
855 lock_level = 0;
856 error = ocfs2_meta_lock(inode, NULL, 0);
857 }
858 if (error < 0) {
859 if (error != -ENOENT)
860 mlog_errno(error);
861 /* we haven't got any yet, so propagate the error. */
862 goto bail_nolock;
863 }
864
865 error = ocfs2_dir_foreach_blk(inode, &filp->f_version, &filp->f_pos,
866 dirent, filldir, NULL);
867
210 ocfs2_meta_unlock(inode, lock_level); 868 ocfs2_meta_unlock(inode, lock_level);
211 869
212bail_nolock: 870bail_nolock:
213 mlog_exit(stored); 871 mlog_exit(error);
214 872
215 return stored; 873 return error;
216} 874}
217 875
218/* 876/*
@@ -252,6 +910,23 @@ leave:
252 return status; 910 return status;
253} 911}
254 912
913/*
914 * Convenience function for callers which just want the block number
915 * mapped to a name and don't require the full dirent info, etc.
916 */
917int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name,
918 int namelen, u64 *blkno)
919{
920 int ret;
921 struct buffer_head *bh = NULL;
922 struct ocfs2_dir_entry *dirent = NULL;
923
924 ret = ocfs2_find_files_on_disk(name, namelen, blkno, dir, &bh, &dirent);
925 brelse(bh);
926
927 return ret;
928}
929
255/* Check for a name within a directory. 930/* Check for a name within a directory.
256 * 931 *
257 * Return 0 if the name does not exist 932 * Return 0 if the name does not exist
@@ -284,77 +959,414 @@ bail:
284 return ret; 959 return ret;
285} 960}
286 961
962struct ocfs2_empty_dir_priv {
963 unsigned seen_dot;
964 unsigned seen_dot_dot;
965 unsigned seen_other;
966};
967static int ocfs2_empty_dir_filldir(void *priv, const char *name, int name_len,
968 loff_t pos, u64 ino, unsigned type)
969{
970 struct ocfs2_empty_dir_priv *p = priv;
971
972 /*
973 * Check the positions of "." and ".." records to be sure
974 * they're in the correct place.
975 */
976 if (name_len == 1 && !strncmp(".", name, 1) && pos == 0) {
977 p->seen_dot = 1;
978 return 0;
979 }
980
981 if (name_len == 2 && !strncmp("..", name, 2) &&
982 pos == OCFS2_DIR_REC_LEN(1)) {
983 p->seen_dot_dot = 1;
984 return 0;
985 }
986
987 p->seen_other = 1;
988 return 1;
989}
287/* 990/*
288 * routine to check that the specified directory is empty (for rmdir) 991 * routine to check that the specified directory is empty (for rmdir)
992 *
993 * Returns 1 if dir is empty, zero otherwise.
289 */ 994 */
290int ocfs2_empty_dir(struct inode *inode) 995int ocfs2_empty_dir(struct inode *inode)
291{ 996{
292 unsigned long offset; 997 int ret;
293 struct buffer_head * bh; 998 loff_t start = 0;
294 struct ocfs2_dir_entry * de, * de1; 999 struct ocfs2_empty_dir_priv priv;
295 struct super_block * sb; 1000
296 int err; 1001 memset(&priv, 0, sizeof(priv));
1002
1003 ret = ocfs2_dir_foreach(inode, &start, &priv, ocfs2_empty_dir_filldir);
1004 if (ret)
1005 mlog_errno(ret);
297 1006
298 sb = inode->i_sb; 1007 if (!priv.seen_dot || !priv.seen_dot_dot) {
299 if ((i_size_read(inode) < 1008 mlog(ML_ERROR, "bad directory (dir #%llu) - no `.' or `..'\n",
300 (OCFS2_DIR_REC_LEN(1) + OCFS2_DIR_REC_LEN(2))) ||
301 !(bh = ocfs2_bread(inode, 0, &err, 0))) {
302 mlog(ML_ERROR, "bad directory (dir #%llu) - no data block\n",
303 (unsigned long long)OCFS2_I(inode)->ip_blkno); 1009 (unsigned long long)OCFS2_I(inode)->ip_blkno);
1010 /*
1011 * XXX: Is it really safe to allow an unlink to continue?
1012 */
304 return 1; 1013 return 1;
305 } 1014 }
306 1015
307 de = (struct ocfs2_dir_entry *) bh->b_data; 1016 return !priv.seen_other;
308 de1 = (struct ocfs2_dir_entry *) 1017}
309 ((char *)de + le16_to_cpu(de->rec_len)); 1018
310 if ((le64_to_cpu(de->inode) != OCFS2_I(inode)->ip_blkno) || 1019static void ocfs2_fill_initial_dirents(struct inode *inode,
311 !le64_to_cpu(de1->inode) || 1020 struct inode *parent,
312 strcmp(".", de->name) || 1021 char *start, unsigned int size)
313 strcmp("..", de1->name)) { 1022{
314 mlog(ML_ERROR, "bad directory (dir #%llu) - no `.' or `..'\n", 1023 struct ocfs2_dir_entry *de = (struct ocfs2_dir_entry *)start;
315 (unsigned long long)OCFS2_I(inode)->ip_blkno); 1024
316 brelse(bh); 1025 de->inode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
317 return 1; 1026 de->name_len = 1;
1027 de->rec_len =
1028 cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
1029 strcpy(de->name, ".");
1030 ocfs2_set_de_type(de, S_IFDIR);
1031
1032 de = (struct ocfs2_dir_entry *) ((char *)de + le16_to_cpu(de->rec_len));
1033 de->inode = cpu_to_le64(OCFS2_I(parent)->ip_blkno);
1034 de->rec_len = cpu_to_le16(size - OCFS2_DIR_REC_LEN(1));
1035 de->name_len = 2;
1036 strcpy(de->name, "..");
1037 ocfs2_set_de_type(de, S_IFDIR);
1038}
1039
1040/*
1041 * This works together with code in ocfs2_mknod_locked() which sets
1042 * the inline-data flag and initializes the inline-data section.
1043 */
1044static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb,
1045 handle_t *handle,
1046 struct inode *parent,
1047 struct inode *inode,
1048 struct buffer_head *di_bh)
1049{
1050 int ret;
1051 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1052 struct ocfs2_inline_data *data = &di->id2.i_data;
1053 unsigned int size = le16_to_cpu(data->id_count);
1054
1055 ret = ocfs2_journal_access(handle, inode, di_bh,
1056 OCFS2_JOURNAL_ACCESS_WRITE);
1057 if (ret) {
1058 mlog_errno(ret);
1059 goto out;
318 } 1060 }
319 offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len); 1061
320 de = (struct ocfs2_dir_entry *)((char *)de1 + le16_to_cpu(de1->rec_len)); 1062 ocfs2_fill_initial_dirents(inode, parent, data->id_data, size);
321 while (offset < i_size_read(inode) ) { 1063
322 if (!bh || (void *)de >= (void *)(bh->b_data + sb->s_blocksize)) { 1064 ocfs2_journal_dirty(handle, di_bh);
323 brelse(bh); 1065 if (ret) {
324 bh = ocfs2_bread(inode, 1066 mlog_errno(ret);
325 offset >> sb->s_blocksize_bits, &err, 0); 1067 goto out;
326 if (!bh) { 1068 }
327 mlog(ML_ERROR, "dir %llu has a hole at %lu\n", 1069
328 (unsigned long long)OCFS2_I(inode)->ip_blkno, offset); 1070 i_size_write(inode, size);
329 offset += sb->s_blocksize; 1071 inode->i_nlink = 2;
330 continue; 1072 inode->i_blocks = ocfs2_inode_sector_count(inode);
331 } 1073
332 de = (struct ocfs2_dir_entry *) bh->b_data; 1074 ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
333 } 1075 if (ret < 0)
334 if (!ocfs2_check_dir_entry(inode, de, bh, offset)) { 1076 mlog_errno(ret);
335 brelse(bh); 1077
336 return 1; 1078out:
1079 return ret;
1080}
1081
1082static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
1083 handle_t *handle,
1084 struct inode *parent,
1085 struct inode *inode,
1086 struct buffer_head *fe_bh,
1087 struct ocfs2_alloc_context *data_ac)
1088{
1089 int status;
1090 struct buffer_head *new_bh = NULL;
1091
1092 mlog_entry_void();
1093
1094 status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh,
1095 data_ac, NULL, &new_bh);
1096 if (status < 0) {
1097 mlog_errno(status);
1098 goto bail;
1099 }
1100
1101 ocfs2_set_new_buffer_uptodate(inode, new_bh);
1102
1103 status = ocfs2_journal_access(handle, inode, new_bh,
1104 OCFS2_JOURNAL_ACCESS_CREATE);
1105 if (status < 0) {
1106 mlog_errno(status);
1107 goto bail;
1108 }
1109 memset(new_bh->b_data, 0, osb->sb->s_blocksize);
1110
1111 ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data,
1112 osb->sb->s_blocksize);
1113
1114 status = ocfs2_journal_dirty(handle, new_bh);
1115 if (status < 0) {
1116 mlog_errno(status);
1117 goto bail;
1118 }
1119
1120 i_size_write(inode, inode->i_sb->s_blocksize);
1121 inode->i_nlink = 2;
1122 inode->i_blocks = ocfs2_inode_sector_count(inode);
1123 status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
1124 if (status < 0) {
1125 mlog_errno(status);
1126 goto bail;
1127 }
1128
1129 status = 0;
1130bail:
1131 if (new_bh)
1132 brelse(new_bh);
1133
1134 mlog_exit(status);
1135 return status;
1136}
1137
1138int ocfs2_fill_new_dir(struct ocfs2_super *osb,
1139 handle_t *handle,
1140 struct inode *parent,
1141 struct inode *inode,
1142 struct buffer_head *fe_bh,
1143 struct ocfs2_alloc_context *data_ac)
1144{
1145 BUG_ON(!ocfs2_supports_inline_data(osb) && data_ac == NULL);
1146
1147 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
1148 return ocfs2_fill_new_dir_id(osb, handle, parent, inode, fe_bh);
1149
1150 return ocfs2_fill_new_dir_el(osb, handle, parent, inode, fe_bh,
1151 data_ac);
1152}
1153
1154static void ocfs2_expand_last_dirent(char *start, unsigned int old_size,
1155 unsigned int new_size)
1156{
1157 struct ocfs2_dir_entry *de;
1158 struct ocfs2_dir_entry *prev_de;
1159 char *de_buf, *limit;
1160 unsigned int bytes = new_size - old_size;
1161
1162 limit = start + old_size;
1163 de_buf = start;
1164 de = (struct ocfs2_dir_entry *)de_buf;
1165 do {
1166 prev_de = de;
1167 de_buf += le16_to_cpu(de->rec_len);
1168 de = (struct ocfs2_dir_entry *)de_buf;
1169 } while (de_buf < limit);
1170
1171 le16_add_cpu(&prev_de->rec_len, bytes);
1172}
1173
1174/*
1175 * We allocate enough clusters to fulfill "blocks_wanted", but set
1176 * i_size to exactly one block. Ocfs2_extend_dir() will handle the
1177 * rest automatically for us.
1178 *
1179 * *first_block_bh is a pointer to the 1st data block allocated to the
1180 * directory.
1181 */
1182static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1183 unsigned int blocks_wanted,
1184 struct buffer_head **first_block_bh)
1185{
1186 int ret, credits = OCFS2_INLINE_TO_EXTENTS_CREDITS;
1187 u32 alloc, bit_off, len;
1188 struct super_block *sb = dir->i_sb;
1189 u64 blkno, bytes = blocks_wanted << sb->s_blocksize_bits;
1190 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
1191 struct ocfs2_inode_info *oi = OCFS2_I(dir);
1192 struct ocfs2_alloc_context *data_ac;
1193 struct buffer_head *dirdata_bh = NULL;
1194 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1195 handle_t *handle;
1196
1197 alloc = ocfs2_clusters_for_bytes(sb, bytes);
1198
1199 /*
1200 * We should never need more than 2 clusters for this -
1201 * maximum dirent size is far less than one block. In fact,
1202 * the only time we'd need more than one cluster is if
1203 * blocksize == clustersize and the dirent won't fit in the
1204 * extra space that the expansion to a single block gives. As
1205 * of today, that only happens on 4k/4k file systems.
1206 */
1207 BUG_ON(alloc > 2);
1208
1209 ret = ocfs2_reserve_clusters(osb, alloc, &data_ac);
1210 if (ret) {
1211 mlog_errno(ret);
1212 goto out;
1213 }
1214
1215 down_write(&oi->ip_alloc_sem);
1216
1217 /*
1218 * Prepare for worst case allocation scenario of two seperate
1219 * extents.
1220 */
1221 if (alloc == 2)
1222 credits += OCFS2_SUBALLOC_ALLOC;
1223
1224 handle = ocfs2_start_trans(osb, credits);
1225 if (IS_ERR(handle)) {
1226 ret = PTR_ERR(handle);
1227 mlog_errno(ret);
1228 goto out_sem;
1229 }
1230
1231 /*
1232 * Try to claim as many clusters as the bitmap can give though
1233 * if we only get one now, that's enough to continue. The rest
1234 * will be claimed after the conversion to extents.
1235 */
1236 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len);
1237 if (ret) {
1238 mlog_errno(ret);
1239 goto out_commit;
1240 }
1241
1242 /*
1243 * Operations are carefully ordered so that we set up the new
1244 * data block first. The conversion from inline data to
1245 * extents follows.
1246 */
1247 blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
1248 dirdata_bh = sb_getblk(sb, blkno);
1249 if (!dirdata_bh) {
1250 ret = -EIO;
1251 mlog_errno(ret);
1252 goto out_commit;
1253 }
1254
1255 ocfs2_set_new_buffer_uptodate(dir, dirdata_bh);
1256
1257 ret = ocfs2_journal_access(handle, dir, dirdata_bh,
1258 OCFS2_JOURNAL_ACCESS_CREATE);
1259 if (ret) {
1260 mlog_errno(ret);
1261 goto out_commit;
1262 }
1263
1264 memcpy(dirdata_bh->b_data, di->id2.i_data.id_data, i_size_read(dir));
1265 memset(dirdata_bh->b_data + i_size_read(dir), 0,
1266 sb->s_blocksize - i_size_read(dir));
1267 ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir),
1268 sb->s_blocksize);
1269
1270 ret = ocfs2_journal_dirty(handle, dirdata_bh);
1271 if (ret) {
1272 mlog_errno(ret);
1273 goto out_commit;
1274 }
1275
1276 /*
1277 * Set extent, i_size, etc on the directory. After this, the
1278 * inode should contain the same exact dirents as before and
1279 * be fully accessible from system calls.
1280 *
1281 * We let the later dirent insert modify c/mtime - to the user
1282 * the data hasn't changed.
1283 */
1284 ret = ocfs2_journal_access(handle, dir, di_bh,
1285 OCFS2_JOURNAL_ACCESS_CREATE);
1286 if (ret) {
1287 mlog_errno(ret);
1288 goto out_commit;
1289 }
1290
1291 spin_lock(&oi->ip_lock);
1292 oi->ip_dyn_features &= ~OCFS2_INLINE_DATA_FL;
1293 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1294 spin_unlock(&oi->ip_lock);
1295
1296 ocfs2_dinode_new_extent_list(dir, di);
1297
1298 i_size_write(dir, sb->s_blocksize);
1299 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
1300
1301 di->i_size = cpu_to_le64(sb->s_blocksize);
1302 di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec);
1303 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec);
1304 dir->i_blocks = ocfs2_inode_sector_count(dir);
1305
1306 /*
1307 * This should never fail as our extent list is empty and all
1308 * related blocks have been journaled already.
1309 */
1310 ret = ocfs2_insert_extent(osb, handle, dir, di_bh, 0, blkno, len, 0,
1311 NULL);
1312 if (ret) {
1313 mlog_errno(ret);
1314 goto out;
1315 }
1316
1317 ret = ocfs2_journal_dirty(handle, di_bh);
1318 if (ret) {
1319 mlog_errno(ret);
1320 goto out_commit;
1321 }
1322
1323 /*
1324 * We asked for two clusters, but only got one in the 1st
1325 * pass. Claim the 2nd cluster as a separate extent.
1326 */
1327 if (alloc > len) {
1328 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off,
1329 &len);
1330 if (ret) {
1331 mlog_errno(ret);
1332 goto out_commit;
337 } 1333 }
338 if (le64_to_cpu(de->inode)) { 1334 blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
339 brelse(bh); 1335
340 return 0; 1336 ret = ocfs2_insert_extent(osb, handle, dir, di_bh, 1, blkno,
1337 len, 0, NULL);
1338 if (ret) {
1339 mlog_errno(ret);
1340 goto out;
341 } 1341 }
342 offset += le16_to_cpu(de->rec_len);
343 de = (struct ocfs2_dir_entry *)
344 ((char *)de + le16_to_cpu(de->rec_len));
345 } 1342 }
346 brelse(bh); 1343
347 return 1; 1344 *first_block_bh = dirdata_bh;
1345 dirdata_bh = NULL;
1346
1347out_commit:
1348 ocfs2_commit_trans(osb, handle);
1349
1350out_sem:
1351 up_write(&oi->ip_alloc_sem);
1352
1353out:
1354 if (data_ac)
1355 ocfs2_free_alloc_context(data_ac);
1356
1357 brelse(dirdata_bh);
1358
1359 return ret;
348} 1360}
349 1361
350/* returns a bh of the 1st new block in the allocation. */ 1362/* returns a bh of the 1st new block in the allocation. */
351int ocfs2_do_extend_dir(struct super_block *sb, 1363static int ocfs2_do_extend_dir(struct super_block *sb,
352 handle_t *handle, 1364 handle_t *handle,
353 struct inode *dir, 1365 struct inode *dir,
354 struct buffer_head *parent_fe_bh, 1366 struct buffer_head *parent_fe_bh,
355 struct ocfs2_alloc_context *data_ac, 1367 struct ocfs2_alloc_context *data_ac,
356 struct ocfs2_alloc_context *meta_ac, 1368 struct ocfs2_alloc_context *meta_ac,
357 struct buffer_head **new_bh) 1369 struct buffer_head **new_bh)
358{ 1370{
359 int status; 1371 int status;
360 int extend; 1372 int extend;
@@ -396,10 +1408,18 @@ bail:
396 return status; 1408 return status;
397} 1409}
398 1410
399/* assumes you already have a cluster lock on the directory. */ 1411/*
1412 * Assumes you already have a cluster lock on the directory.
1413 *
1414 * 'blocks_wanted' is only used if we have an inline directory which
1415 * is to be turned into an extent based one. The size of the dirent to
1416 * insert might be larger than the space gained by growing to just one
1417 * block, so we may have to grow the inode by two blocks in that case.
1418 */
400static int ocfs2_extend_dir(struct ocfs2_super *osb, 1419static int ocfs2_extend_dir(struct ocfs2_super *osb,
401 struct inode *dir, 1420 struct inode *dir,
402 struct buffer_head *parent_fe_bh, 1421 struct buffer_head *parent_fe_bh,
1422 unsigned int blocks_wanted,
403 struct buffer_head **new_de_bh) 1423 struct buffer_head **new_de_bh)
404{ 1424{
405 int status = 0; 1425 int status = 0;
@@ -415,6 +1435,38 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
415 1435
416 mlog_entry_void(); 1436 mlog_entry_void();
417 1437
1438 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1439 status = ocfs2_expand_inline_dir(dir, parent_fe_bh,
1440 blocks_wanted, &new_bh);
1441 if (status) {
1442 mlog_errno(status);
1443 goto bail;
1444 }
1445
1446 if (blocks_wanted == 1) {
1447 /*
1448 * If the new dirent will fit inside the space
1449 * created by pushing out to one block, then
1450 * we can complete the operation
1451 * here. Otherwise we have to expand i_size
1452 * and format the 2nd block below.
1453 */
1454 BUG_ON(new_bh == NULL);
1455 goto bail_bh;
1456 }
1457
1458 /*
1459 * Get rid of 'new_bh' - we want to format the 2nd
1460 * data block and return that instead.
1461 */
1462 brelse(new_bh);
1463 new_bh = NULL;
1464
1465 dir_i_size = i_size_read(dir);
1466 credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
1467 goto do_extend;
1468 }
1469
418 dir_i_size = i_size_read(dir); 1470 dir_i_size = i_size_read(dir);
419 mlog(0, "extending dir %llu (i_size = %lld)\n", 1471 mlog(0, "extending dir %llu (i_size = %lld)\n",
420 (unsigned long long)OCFS2_I(dir)->ip_blkno, dir_i_size); 1472 (unsigned long long)OCFS2_I(dir)->ip_blkno, dir_i_size);
@@ -452,6 +1504,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
452 credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS; 1504 credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
453 } 1505 }
454 1506
1507do_extend:
455 down_write(&OCFS2_I(dir)->ip_alloc_sem); 1508 down_write(&OCFS2_I(dir)->ip_alloc_sem);
456 drop_alloc_sem = 1; 1509 drop_alloc_sem = 1;
457 1510
@@ -497,6 +1550,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
497 goto bail; 1550 goto bail;
498 } 1551 }
499 1552
1553bail_bh:
500 *new_de_bh = new_bh; 1554 *new_de_bh = new_bh;
501 get_bh(*new_de_bh); 1555 get_bh(*new_de_bh);
502bail: 1556bail:
@@ -517,41 +1571,71 @@ bail:
517 return status; 1571 return status;
518} 1572}
519 1573
520/* 1574static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
521 * Search the dir for a good spot, extending it if necessary. The 1575 const char *name, int namelen,
522 * block containing an appropriate record is returned in ret_de_bh. 1576 struct buffer_head **ret_de_bh,
523 */ 1577 unsigned int *blocks_wanted)
524int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
525 struct inode *dir,
526 struct buffer_head *parent_fe_bh,
527 const char *name,
528 int namelen,
529 struct buffer_head **ret_de_bh)
530{ 1578{
531 unsigned long offset; 1579 int ret;
532 struct buffer_head * bh = NULL; 1580 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
533 unsigned short rec_len; 1581 struct ocfs2_dir_entry *de, *last_de = NULL;
534 struct ocfs2_dinode *fe; 1582 char *de_buf, *limit;
535 struct ocfs2_dir_entry *de; 1583 unsigned long offset = 0;
536 struct super_block *sb; 1584 unsigned int rec_len, new_rec_len;
537 int status; 1585
1586 de_buf = di->id2.i_data.id_data;
1587 limit = de_buf + i_size_read(dir);
1588 rec_len = OCFS2_DIR_REC_LEN(namelen);
538 1589
539 mlog_entry_void(); 1590 while (de_buf < limit) {
1591 de = (struct ocfs2_dir_entry *)de_buf;
540 1592
541 mlog(0, "getting ready to insert namelen %d into dir %llu\n", 1593 if (!ocfs2_check_dir_entry(dir, de, di_bh, offset)) {
542 namelen, (unsigned long long)OCFS2_I(dir)->ip_blkno); 1594 ret = -ENOENT;
1595 goto out;
1596 }
1597 if (ocfs2_match(namelen, name, de)) {
1598 ret = -EEXIST;
1599 goto out;
1600 }
1601 if (ocfs2_dirent_would_fit(de, rec_len)) {
1602 /* Ok, we found a spot. Return this bh and let
1603 * the caller actually fill it in. */
1604 *ret_de_bh = di_bh;
1605 get_bh(*ret_de_bh);
1606 ret = 0;
1607 goto out;
1608 }
543 1609
544 BUG_ON(!S_ISDIR(dir->i_mode)); 1610 last_de = de;
545 fe = (struct ocfs2_dinode *) parent_fe_bh->b_data; 1611 de_buf += le16_to_cpu(de->rec_len);
546 BUG_ON(le64_to_cpu(fe->i_size) != i_size_read(dir)); 1612 offset += le16_to_cpu(de->rec_len);
1613 }
547 1614
548 sb = dir->i_sb; 1615 /*
1616 * We're going to require expansion of the directory - figure
1617 * out how many blocks we'll need so that a place for the
1618 * dirent can be found.
1619 */
1620 *blocks_wanted = 1;
1621 new_rec_len = le16_to_cpu(last_de->rec_len) + (dir->i_sb->s_blocksize - i_size_read(dir));
1622 if (new_rec_len < (rec_len + OCFS2_DIR_REC_LEN(last_de->name_len)))
1623 *blocks_wanted = 2;
1624
1625 ret = -ENOSPC;
1626out:
1627 return ret;
1628}
549 1629
550 if (!namelen) { 1630static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
551 status = -EINVAL; 1631 int namelen, struct buffer_head **ret_de_bh)
552 mlog_errno(status); 1632{
553 goto bail; 1633 unsigned long offset;
554 } 1634 struct buffer_head *bh = NULL;
1635 unsigned short rec_len;
1636 struct ocfs2_dir_entry *de;
1637 struct super_block *sb = dir->i_sb;
1638 int status;
555 1639
556 bh = ocfs2_bread(dir, 0, &status, 0); 1640 bh = ocfs2_bread(dir, 0, &status, 0);
557 if (!bh) { 1641 if (!bh) {
@@ -568,17 +1652,11 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
568 bh = NULL; 1652 bh = NULL;
569 1653
570 if (i_size_read(dir) <= offset) { 1654 if (i_size_read(dir) <= offset) {
571 status = ocfs2_extend_dir(osb, 1655 /*
572 dir, 1656 * Caller will have to expand this
573 parent_fe_bh, 1657 * directory.
574 &bh); 1658 */
575 if (status < 0) { 1659 status = -ENOSPC;
576 mlog_errno(status);
577 goto bail;
578 }
579 BUG_ON(!bh);
580 *ret_de_bh = bh;
581 get_bh(*ret_de_bh);
582 goto bail; 1660 goto bail;
583 } 1661 }
584 bh = ocfs2_bread(dir, 1662 bh = ocfs2_bread(dir,
@@ -600,10 +1678,7 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
600 status = -EEXIST; 1678 status = -EEXIST;
601 goto bail; 1679 goto bail;
602 } 1680 }
603 if (((le64_to_cpu(de->inode) == 0) && 1681 if (ocfs2_dirent_would_fit(de, rec_len)) {
604 (le16_to_cpu(de->rec_len) >= rec_len)) ||
605 (le16_to_cpu(de->rec_len) >=
606 (OCFS2_DIR_REC_LEN(de->name_len) + rec_len))) {
607 /* Ok, we found a spot. Return this bh and let 1682 /* Ok, we found a spot. Return this bh and let
608 * the caller actually fill it in. */ 1683 * the caller actually fill it in. */
609 *ret_de_bh = bh; 1684 *ret_de_bh = bh;
@@ -623,3 +1698,61 @@ bail:
623 mlog_exit(status); 1698 mlog_exit(status);
624 return status; 1699 return status;
625} 1700}
1701
1702int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
1703 struct inode *dir,
1704 struct buffer_head *parent_fe_bh,
1705 const char *name,
1706 int namelen,
1707 struct buffer_head **ret_de_bh)
1708{
1709 int ret;
1710 unsigned int blocks_wanted = 1;
1711 struct buffer_head *bh = NULL;
1712
1713 mlog(0, "getting ready to insert namelen %d into dir %llu\n",
1714 namelen, (unsigned long long)OCFS2_I(dir)->ip_blkno);
1715
1716 *ret_de_bh = NULL;
1717
1718 if (!namelen) {
1719 ret = -EINVAL;
1720 mlog_errno(ret);
1721 goto out;
1722 }
1723
1724 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1725 ret = ocfs2_find_dir_space_id(dir, parent_fe_bh, name,
1726 namelen, &bh, &blocks_wanted);
1727 } else
1728 ret = ocfs2_find_dir_space_el(dir, name, namelen, &bh);
1729
1730 if (ret && ret != -ENOSPC) {
1731 mlog_errno(ret);
1732 goto out;
1733 }
1734
1735 if (ret == -ENOSPC) {
1736 /*
1737 * We have to expand the directory to add this name.
1738 */
1739 BUG_ON(bh);
1740
1741 ret = ocfs2_extend_dir(osb, dir, parent_fe_bh, blocks_wanted,
1742 &bh);
1743 if (ret) {
1744 if (ret != -ENOSPC)
1745 mlog_errno(ret);
1746 goto out;
1747 }
1748
1749 BUG_ON(!bh);
1750 }
1751
1752 *ret_de_bh = bh;
1753 bh = NULL;
1754out:
1755 if (bh)
1756 brelse(bh);
1757 return ret;
1758}
diff --git a/fs/ocfs2/dir.h b/fs/ocfs2/dir.h
index 3f67e146864a..ce48b9080d87 100644
--- a/fs/ocfs2/dir.h
+++ b/fs/ocfs2/dir.h
@@ -26,17 +26,49 @@
26#ifndef OCFS2_DIR_H 26#ifndef OCFS2_DIR_H
27#define OCFS2_DIR_H 27#define OCFS2_DIR_H
28 28
29struct buffer_head *ocfs2_find_entry(const char *name,
30 int namelen,
31 struct inode *dir,
32 struct ocfs2_dir_entry **res_dir);
33int ocfs2_delete_entry(handle_t *handle,
34 struct inode *dir,
35 struct ocfs2_dir_entry *de_del,
36 struct buffer_head *bh);
37int __ocfs2_add_entry(handle_t *handle,
38 struct inode *dir,
39 const char *name, int namelen,
40 struct inode *inode, u64 blkno,
41 struct buffer_head *parent_fe_bh,
42 struct buffer_head *insert_bh);
43static inline int ocfs2_add_entry(handle_t *handle,
44 struct dentry *dentry,
45 struct inode *inode, u64 blkno,
46 struct buffer_head *parent_fe_bh,
47 struct buffer_head *insert_bh)
48{
49 return __ocfs2_add_entry(handle, dentry->d_parent->d_inode,
50 dentry->d_name.name, dentry->d_name.len,
51 inode, blkno, parent_fe_bh, insert_bh);
52}
53int ocfs2_update_entry(struct inode *dir, handle_t *handle,
54 struct buffer_head *de_bh, struct ocfs2_dir_entry *de,
55 struct inode *new_entry_inode);
56
29int ocfs2_check_dir_for_entry(struct inode *dir, 57int ocfs2_check_dir_for_entry(struct inode *dir,
30 const char *name, 58 const char *name,
31 int namelen); 59 int namelen);
32int ocfs2_empty_dir(struct inode *inode); /* FIXME: to namei.c */ 60int ocfs2_empty_dir(struct inode *inode);
33int ocfs2_find_files_on_disk(const char *name, 61int ocfs2_find_files_on_disk(const char *name,
34 int namelen, 62 int namelen,
35 u64 *blkno, 63 u64 *blkno,
36 struct inode *inode, 64 struct inode *inode,
37 struct buffer_head **dirent_bh, 65 struct buffer_head **dirent_bh,
38 struct ocfs2_dir_entry **dirent); 66 struct ocfs2_dir_entry **dirent);
67int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name,
68 int namelen, u64 *blkno);
39int ocfs2_readdir(struct file *filp, void *dirent, filldir_t filldir); 69int ocfs2_readdir(struct file *filp, void *dirent, filldir_t filldir);
70int ocfs2_dir_foreach(struct inode *inode, loff_t *f_pos, void *priv,
71 filldir_t filldir);
40int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, 72int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
41 struct inode *dir, 73 struct inode *dir,
42 struct buffer_head *parent_fe_bh, 74 struct buffer_head *parent_fe_bh,
@@ -44,11 +76,11 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
44 int namelen, 76 int namelen,
45 struct buffer_head **ret_de_bh); 77 struct buffer_head **ret_de_bh);
46struct ocfs2_alloc_context; 78struct ocfs2_alloc_context;
47int ocfs2_do_extend_dir(struct super_block *sb, 79int ocfs2_fill_new_dir(struct ocfs2_super *osb,
48 handle_t *handle, 80 handle_t *handle,
49 struct inode *dir, 81 struct inode *parent,
50 struct buffer_head *parent_fe_bh, 82 struct inode *inode,
51 struct ocfs2_alloc_context *data_ac, 83 struct buffer_head *fe_bh,
52 struct ocfs2_alloc_context *meta_ac, 84 struct ocfs2_alloc_context *data_ac);
53 struct buffer_head **new_bh); 85
54#endif /* OCFS2_DIR_H */ 86#endif /* OCFS2_DIR_H */
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index f71250ed166f..41c76ff2fcfb 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -1482,6 +1482,7 @@ static void __ocfs2_stuff_meta_lvb(struct inode *inode)
1482 lvb->lvb_imtime_packed = 1482 lvb->lvb_imtime_packed =
1483 cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime)); 1483 cpu_to_be64(ocfs2_pack_timespec(&inode->i_mtime));
1484 lvb->lvb_iattr = cpu_to_be32(oi->ip_attr); 1484 lvb->lvb_iattr = cpu_to_be32(oi->ip_attr);
1485 lvb->lvb_idynfeatures = cpu_to_be16(oi->ip_dyn_features);
1485 lvb->lvb_igeneration = cpu_to_be32(inode->i_generation); 1486 lvb->lvb_igeneration = cpu_to_be32(inode->i_generation);
1486 1487
1487out: 1488out:
@@ -1515,6 +1516,7 @@ static void ocfs2_refresh_inode_from_lvb(struct inode *inode)
1515 i_size_write(inode, be64_to_cpu(lvb->lvb_isize)); 1516 i_size_write(inode, be64_to_cpu(lvb->lvb_isize));
1516 1517
1517 oi->ip_attr = be32_to_cpu(lvb->lvb_iattr); 1518 oi->ip_attr = be32_to_cpu(lvb->lvb_iattr);
1519 oi->ip_dyn_features = be16_to_cpu(lvb->lvb_idynfeatures);
1518 ocfs2_set_inode_flags(inode); 1520 ocfs2_set_inode_flags(inode);
1519 1521
1520 /* fast-symlinks are a special case */ 1522 /* fast-symlinks are a special case */
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 492bad32a8c0..87a785e41205 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -29,12 +29,12 @@
29 29
30#include "dcache.h" 30#include "dcache.h"
31 31
32#define OCFS2_LVB_VERSION 4 32#define OCFS2_LVB_VERSION 5
33 33
34struct ocfs2_meta_lvb { 34struct ocfs2_meta_lvb {
35 __u8 lvb_version; 35 __u8 lvb_version;
36 __u8 lvb_reserved0; 36 __u8 lvb_reserved0;
37 __be16 lvb_reserved1; 37 __be16 lvb_idynfeatures;
38 __be32 lvb_iclusters; 38 __be32 lvb_iclusters;
39 __be32 lvb_iuid; 39 __be32 lvb_iuid;
40 __be32 lvb_igid; 40 __be32 lvb_igid;
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index bc48177bd183..c3bbc198f9ce 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -88,8 +88,6 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
88 struct dentry *parent; 88 struct dentry *parent;
89 struct inode *inode; 89 struct inode *inode;
90 struct inode *dir = child->d_inode; 90 struct inode *dir = child->d_inode;
91 struct buffer_head *dirent_bh = NULL;
92 struct ocfs2_dir_entry *dirent;
93 91
94 mlog_entry("(0x%p, '%.*s')\n", child, 92 mlog_entry("(0x%p, '%.*s')\n", child,
95 child->d_name.len, child->d_name.name); 93 child->d_name.len, child->d_name.name);
@@ -105,8 +103,7 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
105 goto bail; 103 goto bail;
106 } 104 }
107 105
108 status = ocfs2_find_files_on_disk("..", 2, &blkno, dir, &dirent_bh, 106 status = ocfs2_lookup_ino_from_name(dir, "..", 2, &blkno);
109 &dirent);
110 if (status < 0) { 107 if (status < 0) {
111 parent = ERR_PTR(-ENOENT); 108 parent = ERR_PTR(-ENOENT);
112 goto bail_unlock; 109 goto bail_unlock;
@@ -131,9 +128,6 @@ static struct dentry *ocfs2_get_parent(struct dentry *child)
131bail_unlock: 128bail_unlock:
132 ocfs2_meta_unlock(dir, 0); 129 ocfs2_meta_unlock(dir, 0);
133 130
134 if (dirent_bh)
135 brelse(dirent_bh);
136
137bail: 131bail:
138 mlog_exit_ptr(parent); 132 mlog_exit_ptr(parent);
139 133
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 03c1d365c78b..c58668a326fe 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -387,6 +387,12 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
387 struct ocfs2_extent_rec *rec; 387 struct ocfs2_extent_rec *rec;
388 u32 coff; 388 u32 coff;
389 389
390 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
391 ret = -ERANGE;
392 mlog_errno(ret);
393 goto out;
394 }
395
390 ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster, 396 ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster,
391 num_clusters, extent_flags); 397 num_clusters, extent_flags);
392 if (ret == 0) 398 if (ret == 0)
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index f3bc3658e7a5..a62b14eb4065 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -397,6 +397,15 @@ static int ocfs2_truncate_file(struct inode *inode,
397 unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1); 397 unmap_mapping_range(inode->i_mapping, new_i_size + PAGE_SIZE - 1, 0, 1);
398 truncate_inode_pages(inode->i_mapping, new_i_size); 398 truncate_inode_pages(inode->i_mapping, new_i_size);
399 399
400 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
401 status = ocfs2_truncate_inline(inode, di_bh, new_i_size,
402 i_size_read(inode), 0);
403 if (status)
404 mlog_errno(status);
405
406 goto bail_unlock_data;
407 }
408
400 /* alright, we're going to need to do a full blown alloc size 409 /* alright, we're going to need to do a full blown alloc size
401 * change. Orphan the inode so that recovery can complete the 410 * change. Orphan the inode so that recovery can complete the
402 * truncate if necessary. This does the task of marking 411 * truncate if necessary. This does the task of marking
@@ -779,25 +788,6 @@ leave:
779 return status; 788 return status;
780} 789}
781 790
782static int ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
783 u32 clusters_to_add, int mark_unwritten)
784{
785 int ret;
786
787 /*
788 * The alloc sem blocks peope in read/write from reading our
789 * allocation until we're done changing it. We depend on
790 * i_mutex to block other extend/truncate calls while we're
791 * here.
792 */
793 down_write(&OCFS2_I(inode)->ip_alloc_sem);
794 ret = __ocfs2_extend_allocation(inode, logical_start, clusters_to_add,
795 mark_unwritten);
796 up_write(&OCFS2_I(inode)->ip_alloc_sem);
797
798 return ret;
799}
800
801/* Some parts of this taken from generic_cont_expand, which turned out 791/* Some parts of this taken from generic_cont_expand, which turned out
802 * to be too fragile to do exactly what we need without us having to 792 * to be too fragile to do exactly what we need without us having to
803 * worry about recursive locking in ->prepare_write() and 793 * worry about recursive locking in ->prepare_write() and
@@ -889,25 +879,48 @@ out:
889 return ret; 879 return ret;
890} 880}
891 881
892/* 882int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
893 * A tail_to_skip value > 0 indicates that we're being called from 883{
894 * ocfs2_file_aio_write(). This has the following implications: 884 int ret;
895 * 885 u32 clusters_to_add;
896 * - we don't want to update i_size 886 struct ocfs2_inode_info *oi = OCFS2_I(inode);
897 * - di_bh will be NULL, which is fine because it's only used in the 887
898 * case where we want to update i_size. 888 clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);
899 * - ocfs2_zero_extend() will then only be filling the hole created 889 if (clusters_to_add < oi->ip_clusters)
900 * between i_size and the start of the write. 890 clusters_to_add = 0;
901 */ 891 else
892 clusters_to_add -= oi->ip_clusters;
893
894 if (clusters_to_add) {
895 ret = __ocfs2_extend_allocation(inode, oi->ip_clusters,
896 clusters_to_add, 0);
897 if (ret) {
898 mlog_errno(ret);
899 goto out;
900 }
901 }
902
903 /*
904 * Call this even if we don't add any clusters to the tree. We
905 * still need to zero the area between the old i_size and the
906 * new i_size.
907 */
908 ret = ocfs2_zero_extend(inode, zero_to);
909 if (ret < 0)
910 mlog_errno(ret);
911
912out:
913 return ret;
914}
915
902static int ocfs2_extend_file(struct inode *inode, 916static int ocfs2_extend_file(struct inode *inode,
903 struct buffer_head *di_bh, 917 struct buffer_head *di_bh,
904 u64 new_i_size, 918 u64 new_i_size)
905 size_t tail_to_skip)
906{ 919{
907 int ret = 0; 920 int ret = 0, data_locked = 0;
908 u32 clusters_to_add = 0; 921 struct ocfs2_inode_info *oi = OCFS2_I(inode);
909 922
910 BUG_ON(!tail_to_skip && !di_bh); 923 BUG_ON(!di_bh);
911 924
912 /* setattr sometimes calls us like this. */ 925 /* setattr sometimes calls us like this. */
913 if (new_i_size == 0) 926 if (new_i_size == 0)
@@ -917,13 +930,18 @@ static int ocfs2_extend_file(struct inode *inode,
917 goto out; 930 goto out;
918 BUG_ON(new_i_size < i_size_read(inode)); 931 BUG_ON(new_i_size < i_size_read(inode));
919 932
920 if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) { 933 /*
921 BUG_ON(tail_to_skip != 0); 934 * Fall through for converting inline data, even if the fs
935 * supports sparse files.
936 *
937 * The check for inline data here is legal - nobody can add
938 * the feature since we have i_mutex. We must check it again
939 * after acquiring ip_alloc_sem though, as paths like mmap
940 * might have raced us to converting the inode to extents.
941 */
942 if (!(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
943 && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
922 goto out_update_size; 944 goto out_update_size;
923 }
924
925 clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size) -
926 OCFS2_I(inode)->ip_clusters;
927 945
928 /* 946 /*
929 * protect the pages that ocfs2_zero_extend is going to be 947 * protect the pages that ocfs2_zero_extend is going to be
@@ -937,39 +955,52 @@ static int ocfs2_extend_file(struct inode *inode,
937 mlog_errno(ret); 955 mlog_errno(ret);
938 goto out; 956 goto out;
939 } 957 }
958 data_locked = 1;
959
960 /*
961 * The alloc sem blocks people in read/write from reading our
962 * allocation until we're done changing it. We depend on
963 * i_mutex to block other extend/truncate calls while we're
964 * here.
965 */
966 down_write(&oi->ip_alloc_sem);
967
968 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
969 /*
970 * We can optimize small extends by keeping the inodes
971 * inline data.
972 */
973 if (ocfs2_size_fits_inline_data(di_bh, new_i_size)) {
974 up_write(&oi->ip_alloc_sem);
975 goto out_update_size;
976 }
977
978 ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
979 if (ret) {
980 up_write(&oi->ip_alloc_sem);
940 981
941 if (clusters_to_add) {
942 ret = ocfs2_extend_allocation(inode,
943 OCFS2_I(inode)->ip_clusters,
944 clusters_to_add, 0);
945 if (ret < 0) {
946 mlog_errno(ret); 982 mlog_errno(ret);
947 goto out_unlock; 983 goto out_unlock;
948 } 984 }
949 } 985 }
950 986
951 /* 987 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
952 * Call this even if we don't add any clusters to the tree. We 988 ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size);
953 * still need to zero the area between the old i_size and the 989
954 * new i_size. 990 up_write(&oi->ip_alloc_sem);
955 */ 991
956 ret = ocfs2_zero_extend(inode, (u64)new_i_size - tail_to_skip);
957 if (ret < 0) { 992 if (ret < 0) {
958 mlog_errno(ret); 993 mlog_errno(ret);
959 goto out_unlock; 994 goto out_unlock;
960 } 995 }
961 996
962out_update_size: 997out_update_size:
963 if (!tail_to_skip) { 998 ret = ocfs2_simple_size_update(inode, di_bh, new_i_size);
964 /* We're being called from ocfs2_setattr() which wants 999 if (ret < 0)
965 * us to update i_size */ 1000 mlog_errno(ret);
966 ret = ocfs2_simple_size_update(inode, di_bh, new_i_size);
967 if (ret < 0)
968 mlog_errno(ret);
969 }
970 1001
971out_unlock: 1002out_unlock:
972 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) 1003 if (data_locked)
973 ocfs2_data_unlock(inode, 1); 1004 ocfs2_data_unlock(inode, 1);
974 1005
975out: 1006out:
@@ -1035,7 +1066,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1035 if (i_size_read(inode) > attr->ia_size) 1066 if (i_size_read(inode) > attr->ia_size)
1036 status = ocfs2_truncate_file(inode, bh, attr->ia_size); 1067 status = ocfs2_truncate_file(inode, bh, attr->ia_size);
1037 else 1068 else
1038 status = ocfs2_extend_file(inode, bh, attr->ia_size, 0); 1069 status = ocfs2_extend_file(inode, bh, attr->ia_size);
1039 if (status < 0) { 1070 if (status < 0) {
1040 if (status != -ENOSPC) 1071 if (status != -ENOSPC)
1041 mlog_errno(status); 1072 mlog_errno(status);
@@ -1243,6 +1274,31 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode,
1243{ 1274{
1244 int ret; 1275 int ret;
1245 u32 cpos, phys_cpos, clusters, alloc_size; 1276 u32 cpos, phys_cpos, clusters, alloc_size;
1277 u64 end = start + len;
1278 struct buffer_head *di_bh = NULL;
1279
1280 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1281 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb),
1282 OCFS2_I(inode)->ip_blkno, &di_bh,
1283 OCFS2_BH_CACHED, inode);
1284 if (ret) {
1285 mlog_errno(ret);
1286 goto out;
1287 }
1288
1289 /*
1290 * Nothing to do if the requested reservation range
1291 * fits within the inode.
1292 */
1293 if (ocfs2_size_fits_inline_data(di_bh, end))
1294 goto out;
1295
1296 ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
1297 if (ret) {
1298 mlog_errno(ret);
1299 goto out;
1300 }
1301 }
1246 1302
1247 /* 1303 /*
1248 * We consider both start and len to be inclusive. 1304 * We consider both start and len to be inclusive.
@@ -1288,6 +1344,8 @@ next:
1288 1344
1289 ret = 0; 1345 ret = 0;
1290out: 1346out:
1347
1348 brelse(di_bh);
1291 return ret; 1349 return ret;
1292} 1350}
1293 1351
@@ -1469,6 +1527,14 @@ static int ocfs2_remove_inode_range(struct inode *inode,
1469 if (byte_len == 0) 1527 if (byte_len == 0)
1470 return 0; 1528 return 0;
1471 1529
1530 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1531 ret = ocfs2_truncate_inline(inode, di_bh, byte_start,
1532 byte_start + byte_len, 1);
1533 if (ret)
1534 mlog_errno(ret);
1535 return ret;
1536 }
1537
1472 trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start); 1538 trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start);
1473 trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits; 1539 trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits;
1474 if (trunc_len >= trunc_start) 1540 if (trunc_len >= trunc_start)
@@ -1713,15 +1779,13 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1713 int appending, 1779 int appending,
1714 int *direct_io) 1780 int *direct_io)
1715{ 1781{
1716 int ret = 0, meta_level = appending; 1782 int ret = 0, meta_level = 0;
1717 struct inode *inode = dentry->d_inode; 1783 struct inode *inode = dentry->d_inode;
1718 u32 clusters; 1784 loff_t saved_pos, end;
1719 loff_t newsize, saved_pos;
1720 1785
1721 /* 1786 /*
1722 * We sample i_size under a read level meta lock to see if our write 1787 * We start with a read level meta lock and only jump to an ex
1723 * is extending the file, if it is we back off and get a write level 1788 * if we need to make modifications here.
1724 * meta lock.
1725 */ 1789 */
1726 for(;;) { 1790 for(;;) {
1727 ret = ocfs2_meta_lock(inode, NULL, meta_level); 1791 ret = ocfs2_meta_lock(inode, NULL, meta_level);
@@ -1763,87 +1827,47 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1763 saved_pos = *ppos; 1827 saved_pos = *ppos;
1764 } 1828 }
1765 1829
1766 if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) { 1830 end = saved_pos + count;
1767 loff_t end = saved_pos + count;
1768 1831
1769 /* 1832 /*
1770 * Skip the O_DIRECT checks if we don't need 1833 * Skip the O_DIRECT checks if we don't need
1771 * them. 1834 * them.
1772 */ 1835 */
1773 if (!direct_io || !(*direct_io)) 1836 if (!direct_io || !(*direct_io))
1774 break;
1775
1776 /*
1777 * Allowing concurrent direct writes means
1778 * i_size changes wouldn't be synchronized, so
1779 * one node could wind up truncating another
1780 * nodes writes.
1781 */
1782 if (end > i_size_read(inode)) {
1783 *direct_io = 0;
1784 break;
1785 }
1786
1787 /*
1788 * We don't fill holes during direct io, so
1789 * check for them here. If any are found, the
1790 * caller will have to retake some cluster
1791 * locks and initiate the io as buffered.
1792 */
1793 ret = ocfs2_check_range_for_holes(inode, saved_pos,
1794 count);
1795 if (ret == 1) {
1796 *direct_io = 0;
1797 ret = 0;
1798 } else if (ret < 0)
1799 mlog_errno(ret);
1800 break; 1837 break;
1801 }
1802 1838
1803 /* 1839 /*
1804 * The rest of this loop is concerned with legacy file 1840 * There's no sane way to do direct writes to an inode
1805 * systems which don't support sparse files. 1841 * with inline data.
1806 */ 1842 */
1807 1843 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1808 newsize = count + saved_pos; 1844 *direct_io = 0;
1809
1810 mlog(0, "pos=%lld newsize=%lld cursize=%lld\n",
1811 (long long) saved_pos, (long long) newsize,
1812 (long long) i_size_read(inode));
1813
1814 /* No need for a higher level metadata lock if we're
1815 * never going past i_size. */
1816 if (newsize <= i_size_read(inode))
1817 break; 1845 break;
1818
1819 if (meta_level == 0) {
1820 ocfs2_meta_unlock(inode, meta_level);
1821 meta_level = 1;
1822 continue;
1823 } 1846 }
1824 1847
1825 spin_lock(&OCFS2_I(inode)->ip_lock); 1848 /*
1826 clusters = ocfs2_clusters_for_bytes(inode->i_sb, newsize) - 1849 * Allowing concurrent direct writes means
1827 OCFS2_I(inode)->ip_clusters; 1850 * i_size changes wouldn't be synchronized, so
1828 spin_unlock(&OCFS2_I(inode)->ip_lock); 1851 * one node could wind up truncating another
1829 1852 * nodes writes.
1830 mlog(0, "Writing at EOF, may need more allocation: " 1853 */
1831 "i_size = %lld, newsize = %lld, need %u clusters\n", 1854 if (end > i_size_read(inode)) {
1832 (long long) i_size_read(inode), (long long) newsize, 1855 *direct_io = 0;
1833 clusters);
1834
1835 /* We only want to continue the rest of this loop if
1836 * our extend will actually require more
1837 * allocation. */
1838 if (!clusters)
1839 break; 1856 break;
1840
1841 ret = ocfs2_extend_file(inode, NULL, newsize, count);
1842 if (ret < 0) {
1843 if (ret != -ENOSPC)
1844 mlog_errno(ret);
1845 goto out_unlock;
1846 } 1857 }
1858
1859 /*
1860 * We don't fill holes during direct io, so
1861 * check for them here. If any are found, the
1862 * caller will have to retake some cluster
1863 * locks and initiate the io as buffered.
1864 */
1865 ret = ocfs2_check_range_for_holes(inode, saved_pos, count);
1866 if (ret == 1) {
1867 *direct_io = 0;
1868 ret = 0;
1869 } else if (ret < 0)
1870 mlog_errno(ret);
1847 break; 1871 break;
1848 } 1872 }
1849 1873
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 36fe27f268ee..066f14add3a8 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -47,6 +47,8 @@ int ocfs2_do_extend_allocation(struct ocfs2_super *osb,
47 struct ocfs2_alloc_context *data_ac, 47 struct ocfs2_alloc_context *data_ac,
48 struct ocfs2_alloc_context *meta_ac, 48 struct ocfs2_alloc_context *meta_ac,
49 enum ocfs2_alloc_restarted *reason_ret); 49 enum ocfs2_alloc_restarted *reason_ret);
50int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size,
51 u64 zero_to);
50int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, 52int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
51 u32 clusters_to_add, u32 extents_to_split, 53 u32 clusters_to_add, u32 extents_to_split,
52 struct ocfs2_alloc_context **data_ac, 54 struct ocfs2_alloc_context **data_ac,
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index c53a6763bbbe..1d5e0cb0fda1 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -241,6 +241,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
241 241
242 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); 242 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
243 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); 243 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
244 OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features);
244 245
245 inode->i_version = 1; 246 inode->i_version = 1;
246 inode->i_generation = le32_to_cpu(fe->i_generation); 247 inode->i_generation = le32_to_cpu(fe->i_generation);
@@ -513,6 +514,10 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
513 514
514 fe = (struct ocfs2_dinode *) fe_bh->b_data; 515 fe = (struct ocfs2_dinode *) fe_bh->b_data;
515 516
517 /*
518 * This check will also skip truncate of inodes with inline
519 * data and fast symlinks.
520 */
516 if (fe->i_clusters) { 521 if (fe->i_clusters) {
517 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 522 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
518 if (IS_ERR(handle)) { 523 if (IS_ERR(handle)) {
@@ -1220,6 +1225,7 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
1220 fe->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters); 1225 fe->i_clusters = cpu_to_le32(OCFS2_I(inode)->ip_clusters);
1221 ocfs2_get_inode_flags(OCFS2_I(inode)); 1226 ocfs2_get_inode_flags(OCFS2_I(inode));
1222 fe->i_attr = cpu_to_le32(OCFS2_I(inode)->ip_attr); 1227 fe->i_attr = cpu_to_le32(OCFS2_I(inode)->ip_attr);
1228 fe->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features);
1223 spin_unlock(&OCFS2_I(inode)->ip_lock); 1229 spin_unlock(&OCFS2_I(inode)->ip_lock);
1224 1230
1225 fe->i_size = cpu_to_le64(i_size_read(inode)); 1231 fe->i_size = cpu_to_le64(i_size_read(inode));
@@ -1257,6 +1263,7 @@ void ocfs2_refresh_inode(struct inode *inode,
1257 1263
1258 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); 1264 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
1259 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr); 1265 OCFS2_I(inode)->ip_attr = le32_to_cpu(fe->i_attr);
1266 OCFS2_I(inode)->ip_dyn_features = le16_to_cpu(fe->i_dyn_features);
1260 ocfs2_set_inode_flags(inode); 1267 ocfs2_set_inode_flags(inode);
1261 i_size_write(inode, le64_to_cpu(fe->i_size)); 1268 i_size_write(inode, le64_to_cpu(fe->i_size));
1262 inode->i_nlink = le16_to_cpu(fe->i_links_count); 1269 inode->i_nlink = le16_to_cpu(fe->i_links_count);
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index a41d0817121b..70e881c55536 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -51,6 +51,7 @@ struct ocfs2_inode_info
51 51
52 u32 ip_flags; /* see below */ 52 u32 ip_flags; /* see below */
53 u32 ip_attr; /* inode attributes */ 53 u32 ip_attr; /* inode attributes */
54 u16 ip_dyn_features;
54 55
55 /* protected by recovery_lock. */ 56 /* protected by recovery_lock. */
56 struct inode *ip_next_orphan; 57 struct inode *ip_next_orphan;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index dbfb20bb27ea..f9d01e25298d 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -35,13 +35,13 @@
35#include "ocfs2.h" 35#include "ocfs2.h"
36 36
37#include "alloc.h" 37#include "alloc.h"
38#include "dir.h"
38#include "dlmglue.h" 39#include "dlmglue.h"
39#include "extent_map.h" 40#include "extent_map.h"
40#include "heartbeat.h" 41#include "heartbeat.h"
41#include "inode.h" 42#include "inode.h"
42#include "journal.h" 43#include "journal.h"
43#include "localalloc.h" 44#include "localalloc.h"
44#include "namei.h"
45#include "slot_map.h" 45#include "slot_map.h"
46#include "super.h" 46#include "super.h"
47#include "vote.h" 47#include "vote.h"
@@ -1213,17 +1213,49 @@ bail:
1213 return status; 1213 return status;
1214} 1214}
1215 1215
1216struct ocfs2_orphan_filldir_priv {
1217 struct inode *head;
1218 struct ocfs2_super *osb;
1219};
1220
1221static int ocfs2_orphan_filldir(void *priv, const char *name, int name_len,
1222 loff_t pos, u64 ino, unsigned type)
1223{
1224 struct ocfs2_orphan_filldir_priv *p = priv;
1225 struct inode *iter;
1226
1227 if (name_len == 1 && !strncmp(".", name, 1))
1228 return 0;
1229 if (name_len == 2 && !strncmp("..", name, 2))
1230 return 0;
1231
1232 /* Skip bad inodes so that recovery can continue */
1233 iter = ocfs2_iget(p->osb, ino,
1234 OCFS2_FI_FLAG_ORPHAN_RECOVERY);
1235 if (IS_ERR(iter))
1236 return 0;
1237
1238 mlog(0, "queue orphan %llu\n",
1239 (unsigned long long)OCFS2_I(iter)->ip_blkno);
1240 /* No locking is required for the next_orphan queue as there
1241 * is only ever a single process doing orphan recovery. */
1242 OCFS2_I(iter)->ip_next_orphan = p->head;
1243 p->head = iter;
1244
1245 return 0;
1246}
1247
1216static int ocfs2_queue_orphans(struct ocfs2_super *osb, 1248static int ocfs2_queue_orphans(struct ocfs2_super *osb,
1217 int slot, 1249 int slot,
1218 struct inode **head) 1250 struct inode **head)
1219{ 1251{
1220 int status; 1252 int status;
1221 struct inode *orphan_dir_inode = NULL; 1253 struct inode *orphan_dir_inode = NULL;
1222 struct inode *iter; 1254 struct ocfs2_orphan_filldir_priv priv;
1223 unsigned long offset, blk, local; 1255 loff_t pos = 0;
1224 struct buffer_head *bh = NULL; 1256
1225 struct ocfs2_dir_entry *de; 1257 priv.osb = osb;
1226 struct super_block *sb = osb->sb; 1258 priv.head = *head;
1227 1259
1228 orphan_dir_inode = ocfs2_get_system_file_inode(osb, 1260 orphan_dir_inode = ocfs2_get_system_file_inode(osb,
1229 ORPHAN_DIR_SYSTEM_INODE, 1261 ORPHAN_DIR_SYSTEM_INODE,
@@ -1241,77 +1273,15 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
1241 goto out; 1273 goto out;
1242 } 1274 }
1243 1275
1244 offset = 0; 1276 status = ocfs2_dir_foreach(orphan_dir_inode, &pos, &priv,
1245 iter = NULL; 1277 ocfs2_orphan_filldir);
1246 while(offset < i_size_read(orphan_dir_inode)) { 1278 if (status) {
1247 blk = offset >> sb->s_blocksize_bits; 1279 mlog_errno(status);
1248 1280 goto out;
1249 bh = ocfs2_bread(orphan_dir_inode, blk, &status, 0);
1250 if (!bh)
1251 status = -EINVAL;
1252 if (status < 0) {
1253 if (bh)
1254 brelse(bh);
1255 mlog_errno(status);
1256 goto out_unlock;
1257 }
1258
1259 local = 0;
1260 while(offset < i_size_read(orphan_dir_inode)
1261 && local < sb->s_blocksize) {
1262 de = (struct ocfs2_dir_entry *) (bh->b_data + local);
1263
1264 if (!ocfs2_check_dir_entry(orphan_dir_inode,
1265 de, bh, local)) {
1266 status = -EINVAL;
1267 mlog_errno(status);
1268 brelse(bh);
1269 goto out_unlock;
1270 }
1271
1272 local += le16_to_cpu(de->rec_len);
1273 offset += le16_to_cpu(de->rec_len);
1274
1275 /* I guess we silently fail on no inode? */
1276 if (!le64_to_cpu(de->inode))
1277 continue;
1278 if (de->file_type > OCFS2_FT_MAX) {
1279 mlog(ML_ERROR,
1280 "block %llu contains invalid de: "
1281 "inode = %llu, rec_len = %u, "
1282 "name_len = %u, file_type = %u, "
1283 "name='%.*s'\n",
1284 (unsigned long long)bh->b_blocknr,
1285 (unsigned long long)le64_to_cpu(de->inode),
1286 le16_to_cpu(de->rec_len),
1287 de->name_len,
1288 de->file_type,
1289 de->name_len,
1290 de->name);
1291 continue;
1292 }
1293 if (de->name_len == 1 && !strncmp(".", de->name, 1))
1294 continue;
1295 if (de->name_len == 2 && !strncmp("..", de->name, 2))
1296 continue;
1297
1298 iter = ocfs2_iget(osb, le64_to_cpu(de->inode),
1299 OCFS2_FI_FLAG_ORPHAN_RECOVERY);
1300 if (IS_ERR(iter))
1301 continue;
1302
1303 mlog(0, "queue orphan %llu\n",
1304 (unsigned long long)OCFS2_I(iter)->ip_blkno);
1305 /* No locking is required for the next_orphan
1306 * queue as there is only ever a single
1307 * process doing orphan recovery. */
1308 OCFS2_I(iter)->ip_next_orphan = *head;
1309 *head = iter;
1310 }
1311 brelse(bh);
1312 } 1281 }
1313 1282
1314out_unlock: 1283 *head = priv.head;
1284
1315 ocfs2_meta_unlock(orphan_dir_inode, 0); 1285 ocfs2_meta_unlock(orphan_dir_inode, 0);
1316out: 1286out:
1317 mutex_unlock(&orphan_dir_inode->i_mutex); 1287 mutex_unlock(&orphan_dir_inode->i_mutex);
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index ce60aab013aa..4b32e0961568 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -282,6 +282,9 @@ int ocfs2_journal_dirty_data(handle_t *handle,
282 * prev. group desc. if we relink. */ 282 * prev. group desc. if we relink. */
283#define OCFS2_SUBALLOC_ALLOC (3) 283#define OCFS2_SUBALLOC_ALLOC (3)
284 284
285#define OCFS2_INLINE_TO_EXTENTS_CREDITS (OCFS2_SUBALLOC_ALLOC \
286 + OCFS2_INODE_UPDATE_CREDITS)
287
285/* dinode + group descriptor update. We don't relink on free yet. */ 288/* dinode + group descriptor update. We don't relink on free yet. */
286#define OCFS2_SUBALLOC_FREE (2) 289#define OCFS2_SUBALLOC_FREE (2)
287 290
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 701e6d04ed5d..729259016c18 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -64,29 +64,6 @@
64 64
65#include "buffer_head_io.h" 65#include "buffer_head_io.h"
66 66
67#define NAMEI_RA_CHUNKS 2
68#define NAMEI_RA_BLOCKS 4
69#define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
70#define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
71
72static int inline ocfs2_search_dirblock(struct buffer_head *bh,
73 struct inode *dir,
74 const char *name, int namelen,
75 unsigned long offset,
76 struct ocfs2_dir_entry **res_dir);
77
78static int ocfs2_delete_entry(handle_t *handle,
79 struct inode *dir,
80 struct ocfs2_dir_entry *de_del,
81 struct buffer_head *bh);
82
83static int __ocfs2_add_entry(handle_t *handle,
84 struct inode *dir,
85 const char *name, int namelen,
86 struct inode *inode, u64 blkno,
87 struct buffer_head *parent_fe_bh,
88 struct buffer_head *insert_bh);
89
90static int ocfs2_mknod_locked(struct ocfs2_super *osb, 67static int ocfs2_mknod_locked(struct ocfs2_super *osb,
91 struct inode *dir, 68 struct inode *dir,
92 struct dentry *dentry, int mode, 69 struct dentry *dentry, int mode,
@@ -97,13 +74,6 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
97 struct inode **ret_inode, 74 struct inode **ret_inode,
98 struct ocfs2_alloc_context *inode_ac); 75 struct ocfs2_alloc_context *inode_ac);
99 76
100static int ocfs2_fill_new_dir(struct ocfs2_super *osb,
101 handle_t *handle,
102 struct inode *parent,
103 struct inode *inode,
104 struct buffer_head *fe_bh,
105 struct ocfs2_alloc_context *data_ac);
106
107static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, 77static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
108 struct inode **ret_orphan_dir, 78 struct inode **ret_orphan_dir,
109 struct inode *inode, 79 struct inode *inode,
@@ -123,17 +93,6 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
123 struct inode *inode, 93 struct inode *inode,
124 const char *symname); 94 const char *symname);
125 95
126static inline int ocfs2_add_entry(handle_t *handle,
127 struct dentry *dentry,
128 struct inode *inode, u64 blkno,
129 struct buffer_head *parent_fe_bh,
130 struct buffer_head *insert_bh)
131{
132 return __ocfs2_add_entry(handle, dentry->d_parent->d_inode,
133 dentry->d_name.name, dentry->d_name.len,
134 inode, blkno, parent_fe_bh, insert_bh);
135}
136
137/* An orphan dir name is an 8 byte value, printed as a hex string */ 96/* An orphan dir name is an 8 byte value, printed as a hex string */
138#define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64))) 97#define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64)))
139 98
@@ -142,10 +101,8 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
142{ 101{
143 int status; 102 int status;
144 u64 blkno; 103 u64 blkno;
145 struct buffer_head *dirent_bh = NULL;
146 struct inode *inode = NULL; 104 struct inode *inode = NULL;
147 struct dentry *ret; 105 struct dentry *ret;
148 struct ocfs2_dir_entry *dirent;
149 struct ocfs2_inode_info *oi; 106 struct ocfs2_inode_info *oi;
150 107
151 mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry, 108 mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry,
@@ -167,9 +124,8 @@ static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
167 goto bail; 124 goto bail;
168 } 125 }
169 126
170 status = ocfs2_find_files_on_disk(dentry->d_name.name, 127 status = ocfs2_lookup_ino_from_name(dir, dentry->d_name.name,
171 dentry->d_name.len, &blkno, 128 dentry->d_name.len, &blkno);
172 dir, &dirent_bh, &dirent);
173 if (status < 0) 129 if (status < 0)
174 goto bail_add; 130 goto bail_add;
175 131
@@ -224,83 +180,12 @@ bail_unlock:
224 ocfs2_meta_unlock(dir, 0); 180 ocfs2_meta_unlock(dir, 0);
225 181
226bail: 182bail:
227 if (dirent_bh)
228 brelse(dirent_bh);
229 183
230 mlog_exit_ptr(ret); 184 mlog_exit_ptr(ret);
231 185
232 return ret; 186 return ret;
233} 187}
234 188
235static int ocfs2_fill_new_dir(struct ocfs2_super *osb,
236 handle_t *handle,
237 struct inode *parent,
238 struct inode *inode,
239 struct buffer_head *fe_bh,
240 struct ocfs2_alloc_context *data_ac)
241{
242 int status;
243 struct buffer_head *new_bh = NULL;
244 struct ocfs2_dir_entry *de = NULL;
245
246 mlog_entry_void();
247
248 status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh,
249 data_ac, NULL, &new_bh);
250 if (status < 0) {
251 mlog_errno(status);
252 goto bail;
253 }
254
255 ocfs2_set_new_buffer_uptodate(inode, new_bh);
256
257 status = ocfs2_journal_access(handle, inode, new_bh,
258 OCFS2_JOURNAL_ACCESS_CREATE);
259 if (status < 0) {
260 mlog_errno(status);
261 goto bail;
262 }
263 memset(new_bh->b_data, 0, osb->sb->s_blocksize);
264
265 de = (struct ocfs2_dir_entry *) new_bh->b_data;
266 de->inode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
267 de->name_len = 1;
268 de->rec_len =
269 cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
270 strcpy(de->name, ".");
271 ocfs2_set_de_type(de, S_IFDIR);
272 de = (struct ocfs2_dir_entry *) ((char *)de + le16_to_cpu(de->rec_len));
273 de->inode = cpu_to_le64(OCFS2_I(parent)->ip_blkno);
274 de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize -
275 OCFS2_DIR_REC_LEN(1));
276 de->name_len = 2;
277 strcpy(de->name, "..");
278 ocfs2_set_de_type(de, S_IFDIR);
279
280 status = ocfs2_journal_dirty(handle, new_bh);
281 if (status < 0) {
282 mlog_errno(status);
283 goto bail;
284 }
285
286 i_size_write(inode, inode->i_sb->s_blocksize);
287 inode->i_nlink = 2;
288 inode->i_blocks = ocfs2_inode_sector_count(inode);
289 status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
290 if (status < 0) {
291 mlog_errno(status);
292 goto bail;
293 }
294
295 status = 0;
296bail:
297 if (new_bh)
298 brelse(new_bh);
299
300 mlog_exit(status);
301 return status;
302}
303
304static int ocfs2_mknod(struct inode *dir, 189static int ocfs2_mknod(struct inode *dir,
305 struct dentry *dentry, 190 struct dentry *dentry,
306 int mode, 191 int mode,
@@ -365,9 +250,8 @@ static int ocfs2_mknod(struct inode *dir,
365 goto leave; 250 goto leave;
366 } 251 }
367 252
368 /* are we making a directory? If so, reserve a cluster for his 253 /* Reserve a cluster if creating an extent based directory. */
369 * 1st extent. */ 254 if (S_ISDIR(mode) && !ocfs2_supports_inline_data(osb)) {
370 if (S_ISDIR(mode)) {
371 status = ocfs2_reserve_clusters(osb, 1, &data_ac); 255 status = ocfs2_reserve_clusters(osb, 1, &data_ac);
372 if (status < 0) { 256 if (status < 0) {
373 if (status != -ENOSPC) 257 if (status != -ENOSPC)
@@ -564,10 +448,21 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
564 cpu_to_le32(CURRENT_TIME.tv_nsec); 448 cpu_to_le32(CURRENT_TIME.tv_nsec);
565 fe->i_dtime = 0; 449 fe->i_dtime = 0;
566 450
567 fel = &fe->id2.i_list; 451 /*
568 fel->l_tree_depth = 0; 452 * If supported, directories start with inline data.
569 fel->l_next_free_rec = 0; 453 */
570 fel->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(osb->sb)); 454 if (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) {
455 u16 feat = le16_to_cpu(fe->i_dyn_features);
456
457 fe->i_dyn_features = cpu_to_le16(feat | OCFS2_INLINE_DATA_FL);
458
459 fe->id2.i_data.id_count = cpu_to_le16(ocfs2_max_inline_data(osb->sb));
460 } else {
461 fel = &fe->id2.i_list;
462 fel->l_tree_depth = 0;
463 fel->l_next_free_rec = 0;
464 fel->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(osb->sb));
465 }
571 466
572 status = ocfs2_journal_dirty(handle, *new_fe_bh); 467 status = ocfs2_journal_dirty(handle, *new_fe_bh);
573 if (status < 0) { 468 if (status < 0) {
@@ -1048,11 +943,6 @@ static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2)
1048 ocfs2_meta_unlock(inode2, 1); 943 ocfs2_meta_unlock(inode2, 1);
1049} 944}
1050 945
1051#define PARENT_INO(buffer) \
1052 ((struct ocfs2_dir_entry *) \
1053 ((char *)buffer + \
1054 le16_to_cpu(((struct ocfs2_dir_entry *)buffer)->rec_len)))->inode
1055
1056static int ocfs2_rename(struct inode *old_dir, 946static int ocfs2_rename(struct inode *old_dir,
1057 struct dentry *old_dentry, 947 struct dentry *old_dentry,
1058 struct inode *new_dir, 948 struct inode *new_dir,
@@ -1070,12 +960,12 @@ static int ocfs2_rename(struct inode *old_dir,
1070 struct buffer_head *old_inode_bh = NULL; 960 struct buffer_head *old_inode_bh = NULL;
1071 struct buffer_head *insert_entry_bh = NULL; 961 struct buffer_head *insert_entry_bh = NULL;
1072 struct ocfs2_super *osb = NULL; 962 struct ocfs2_super *osb = NULL;
1073 u64 newfe_blkno; 963 u64 newfe_blkno, old_de_ino;
1074 handle_t *handle = NULL; 964 handle_t *handle = NULL;
1075 struct buffer_head *old_dir_bh = NULL; 965 struct buffer_head *old_dir_bh = NULL;
1076 struct buffer_head *new_dir_bh = NULL; 966 struct buffer_head *new_dir_bh = NULL;
1077 struct ocfs2_dir_entry *old_de = NULL, *new_de = NULL; // dirent for old_dentry 967 struct ocfs2_dir_entry *old_inode_dot_dot_de = NULL, *old_de = NULL,
1078 // and new_dentry 968 *new_de = NULL;
1079 struct buffer_head *new_de_bh = NULL, *old_de_bh = NULL; // bhs for above 969 struct buffer_head *new_de_bh = NULL, *old_de_bh = NULL; // bhs for above
1080 struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir, 970 struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir,
1081 // this is the 1st dirent bh 971 // this is the 1st dirent bh
@@ -1159,27 +1049,35 @@ static int ocfs2_rename(struct inode *old_dir,
1159 } 1049 }
1160 1050
1161 if (S_ISDIR(old_inode->i_mode)) { 1051 if (S_ISDIR(old_inode->i_mode)) {
1162 status = -EIO; 1052 u64 old_inode_parent;
1163 old_inode_de_bh = ocfs2_bread(old_inode, 0, &status, 0); 1053
1164 if (!old_inode_de_bh) 1054 status = ocfs2_find_files_on_disk("..", 2, &old_inode_parent,
1055 old_inode, &old_inode_de_bh,
1056 &old_inode_dot_dot_de);
1057 if (status) {
1058 status = -EIO;
1165 goto bail; 1059 goto bail;
1060 }
1166 1061
1167 status = -EIO; 1062 if (old_inode_parent != OCFS2_I(old_dir)->ip_blkno) {
1168 if (le64_to_cpu(PARENT_INO(old_inode_de_bh->b_data)) != 1063 status = -EIO;
1169 OCFS2_I(old_dir)->ip_blkno)
1170 goto bail; 1064 goto bail;
1171 status = -EMLINK; 1065 }
1172 if (!new_inode && new_dir!=old_dir && 1066
1173 new_dir->i_nlink >= OCFS2_LINK_MAX) 1067 if (!new_inode && new_dir != old_dir &&
1068 new_dir->i_nlink >= OCFS2_LINK_MAX) {
1069 status = -EMLINK;
1174 goto bail; 1070 goto bail;
1071 }
1175 } 1072 }
1176 1073
1177 status = -ENOENT; 1074 status = ocfs2_lookup_ino_from_name(old_dir, old_dentry->d_name.name,
1178 old_de_bh = ocfs2_find_entry(old_dentry->d_name.name, 1075 old_dentry->d_name.len,
1179 old_dentry->d_name.len, 1076 &old_de_ino);
1180 old_dir, &old_de); 1077 if (status) {
1181 if (!old_de_bh) 1078 status = -ENOENT;
1182 goto bail; 1079 goto bail;
1080 }
1183 1081
1184 /* 1082 /*
1185 * Check for inode number is _not_ due to possible IO errors. 1083 * Check for inode number is _not_ due to possible IO errors.
@@ -1187,8 +1085,10 @@ static int ocfs2_rename(struct inode *old_dir,
1187 * and merrily kill the link to whatever was created under the 1085 * and merrily kill the link to whatever was created under the
1188 * same name. Goodbye sticky bit ;-< 1086 * same name. Goodbye sticky bit ;-<
1189 */ 1087 */
1190 if (le64_to_cpu(old_de->inode) != OCFS2_I(old_inode)->ip_blkno) 1088 if (old_de_ino != OCFS2_I(old_inode)->ip_blkno) {
1089 status = -ENOENT;
1191 goto bail; 1090 goto bail;
1091 }
1192 1092
1193 /* check if the target already exists (in which case we need 1093 /* check if the target already exists (in which case we need
1194 * to delete it */ 1094 * to delete it */
@@ -1321,20 +1221,13 @@ static int ocfs2_rename(struct inode *old_dir,
1321 } 1221 }
1322 1222
1323 /* change the dirent to point to the correct inode */ 1223 /* change the dirent to point to the correct inode */
1324 status = ocfs2_journal_access(handle, new_dir, new_de_bh, 1224 status = ocfs2_update_entry(new_dir, handle, new_de_bh,
1325 OCFS2_JOURNAL_ACCESS_WRITE); 1225 new_de, old_inode);
1326 if (status < 0) { 1226 if (status < 0) {
1327 mlog_errno(status); 1227 mlog_errno(status);
1328 goto bail; 1228 goto bail;
1329 } 1229 }
1330 new_de->inode = cpu_to_le64(OCFS2_I(old_inode)->ip_blkno);
1331 new_de->file_type = old_de->file_type;
1332 new_dir->i_version++; 1230 new_dir->i_version++;
1333 status = ocfs2_journal_dirty(handle, new_de_bh);
1334 if (status < 0) {
1335 mlog_errno(status);
1336 goto bail;
1337 }
1338 1231
1339 if (S_ISDIR(new_inode->i_mode)) 1232 if (S_ISDIR(new_inode->i_mode))
1340 newfe->i_links_count = 0; 1233 newfe->i_links_count = 0;
@@ -1370,7 +1263,21 @@ static int ocfs2_rename(struct inode *old_dir,
1370 } else 1263 } else
1371 mlog_errno(status); 1264 mlog_errno(status);
1372 1265
1373 /* now that the name has been added to new_dir, remove the old name */ 1266 /*
1267 * Now that the name has been added to new_dir, remove the old name.
1268 *
1269 * We don't keep any directory entry context around until now
1270 * because the insert might have changed the type of directory
1271 * we're dealing with.
1272 */
1273 old_de_bh = ocfs2_find_entry(old_dentry->d_name.name,
1274 old_dentry->d_name.len,
1275 old_dir, &old_de);
1276 if (!old_de_bh) {
1277 status = -EIO;
1278 goto bail;
1279 }
1280
1374 status = ocfs2_delete_entry(handle, old_dir, old_de, old_de_bh); 1281 status = ocfs2_delete_entry(handle, old_dir, old_de, old_de_bh);
1375 if (status < 0) { 1282 if (status < 0) {
1376 mlog_errno(status); 1283 mlog_errno(status);
@@ -1383,12 +1290,8 @@ static int ocfs2_rename(struct inode *old_dir,
1383 } 1290 }
1384 old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME; 1291 old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
1385 if (old_inode_de_bh) { 1292 if (old_inode_de_bh) {
1386 status = ocfs2_journal_access(handle, old_inode, 1293 status = ocfs2_update_entry(old_inode, handle, old_inode_de_bh,
1387 old_inode_de_bh, 1294 old_inode_dot_dot_de, new_dir);
1388 OCFS2_JOURNAL_ACCESS_WRITE);
1389 PARENT_INO(old_inode_de_bh->b_data) =
1390 cpu_to_le64(OCFS2_I(new_dir)->ip_blkno);
1391 status = ocfs2_journal_dirty(handle, old_inode_de_bh);
1392 old_dir->i_nlink--; 1295 old_dir->i_nlink--;
1393 if (new_inode) { 1296 if (new_inode) {
1394 new_inode->i_nlink--; 1297 new_inode->i_nlink--;
@@ -1767,329 +1670,6 @@ bail:
1767 return status; 1670 return status;
1768} 1671}
1769 1672
1770int ocfs2_check_dir_entry(struct inode * dir,
1771 struct ocfs2_dir_entry * de,
1772 struct buffer_head * bh,
1773 unsigned long offset)
1774{
1775 const char *error_msg = NULL;
1776 const int rlen = le16_to_cpu(de->rec_len);
1777
1778 if (rlen < OCFS2_DIR_REC_LEN(1))
1779 error_msg = "rec_len is smaller than minimal";
1780 else if (rlen % 4 != 0)
1781 error_msg = "rec_len % 4 != 0";
1782 else if (rlen < OCFS2_DIR_REC_LEN(de->name_len))
1783 error_msg = "rec_len is too small for name_len";
1784 else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
1785 error_msg = "directory entry across blocks";
1786
1787 if (error_msg != NULL)
1788 mlog(ML_ERROR, "bad entry in directory #%llu: %s - "
1789 "offset=%lu, inode=%llu, rec_len=%d, name_len=%d\n",
1790 (unsigned long long)OCFS2_I(dir)->ip_blkno, error_msg,
1791 offset, (unsigned long long)le64_to_cpu(de->inode), rlen,
1792 de->name_len);
1793 return error_msg == NULL ? 1 : 0;
1794}
1795
1796/* we don't always have a dentry for what we want to add, so people
1797 * like orphan dir can call this instead.
1798 *
1799 * If you pass me insert_bh, I'll skip the search of the other dir
1800 * blocks and put the record in there.
1801 */
1802static int __ocfs2_add_entry(handle_t *handle,
1803 struct inode *dir,
1804 const char *name, int namelen,
1805 struct inode *inode, u64 blkno,
1806 struct buffer_head *parent_fe_bh,
1807 struct buffer_head *insert_bh)
1808{
1809 unsigned long offset;
1810 unsigned short rec_len;
1811 struct ocfs2_dir_entry *de, *de1;
1812 struct super_block *sb;
1813 int retval, status;
1814
1815 mlog_entry_void();
1816
1817 sb = dir->i_sb;
1818
1819 if (!namelen)
1820 return -EINVAL;
1821
1822 rec_len = OCFS2_DIR_REC_LEN(namelen);
1823 offset = 0;
1824 de = (struct ocfs2_dir_entry *) insert_bh->b_data;
1825 while (1) {
1826 BUG_ON((char *)de >= sb->s_blocksize + insert_bh->b_data);
1827 /* These checks should've already been passed by the
1828 * prepare function, but I guess we can leave them
1829 * here anyway. */
1830 if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) {
1831 retval = -ENOENT;
1832 goto bail;
1833 }
1834 if (ocfs2_match(namelen, name, de)) {
1835 retval = -EEXIST;
1836 goto bail;
1837 }
1838 if (((le64_to_cpu(de->inode) == 0) &&
1839 (le16_to_cpu(de->rec_len) >= rec_len)) ||
1840 (le16_to_cpu(de->rec_len) >=
1841 (OCFS2_DIR_REC_LEN(de->name_len) + rec_len))) {
1842 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
1843 retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
1844 if (retval < 0) {
1845 mlog_errno(retval);
1846 goto bail;
1847 }
1848
1849 status = ocfs2_journal_access(handle, dir, insert_bh,
1850 OCFS2_JOURNAL_ACCESS_WRITE);
1851 /* By now the buffer is marked for journaling */
1852 offset += le16_to_cpu(de->rec_len);
1853 if (le64_to_cpu(de->inode)) {
1854 de1 = (struct ocfs2_dir_entry *)((char *) de +
1855 OCFS2_DIR_REC_LEN(de->name_len));
1856 de1->rec_len =
1857 cpu_to_le16(le16_to_cpu(de->rec_len) -
1858 OCFS2_DIR_REC_LEN(de->name_len));
1859 de->rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
1860 de = de1;
1861 }
1862 de->file_type = OCFS2_FT_UNKNOWN;
1863 if (blkno) {
1864 de->inode = cpu_to_le64(blkno);
1865 ocfs2_set_de_type(de, inode->i_mode);
1866 } else
1867 de->inode = 0;
1868 de->name_len = namelen;
1869 memcpy(de->name, name, namelen);
1870
1871 dir->i_version++;
1872 status = ocfs2_journal_dirty(handle, insert_bh);
1873 retval = 0;
1874 goto bail;
1875 }
1876 offset += le16_to_cpu(de->rec_len);
1877 de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
1878 }
1879
1880 /* when you think about it, the assert above should prevent us
1881 * from ever getting here. */
1882 retval = -ENOSPC;
1883bail:
1884
1885 mlog_exit(retval);
1886 return retval;
1887}
1888
1889
1890/*
1891 * ocfs2_delete_entry deletes a directory entry by merging it with the
1892 * previous entry
1893 */
1894static int ocfs2_delete_entry(handle_t *handle,
1895 struct inode *dir,
1896 struct ocfs2_dir_entry *de_del,
1897 struct buffer_head *bh)
1898{
1899 struct ocfs2_dir_entry *de, *pde;
1900 int i, status = -ENOENT;
1901
1902 mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh);
1903
1904 i = 0;
1905 pde = NULL;
1906 de = (struct ocfs2_dir_entry *) bh->b_data;
1907 while (i < bh->b_size) {
1908 if (!ocfs2_check_dir_entry(dir, de, bh, i)) {
1909 status = -EIO;
1910 mlog_errno(status);
1911 goto bail;
1912 }
1913 if (de == de_del) {
1914 status = ocfs2_journal_access(handle, dir, bh,
1915 OCFS2_JOURNAL_ACCESS_WRITE);
1916 if (status < 0) {
1917 status = -EIO;
1918 mlog_errno(status);
1919 goto bail;
1920 }
1921 if (pde)
1922 pde->rec_len =
1923 cpu_to_le16(le16_to_cpu(pde->rec_len) +
1924 le16_to_cpu(de->rec_len));
1925 else
1926 de->inode = 0;
1927 dir->i_version++;
1928 status = ocfs2_journal_dirty(handle, bh);
1929 goto bail;
1930 }
1931 i += le16_to_cpu(de->rec_len);
1932 pde = de;
1933 de = (struct ocfs2_dir_entry *)((char *)de + le16_to_cpu(de->rec_len));
1934 }
1935bail:
1936 mlog_exit(status);
1937 return status;
1938}
1939
1940/*
1941 * Returns 0 if not found, -1 on failure, and 1 on success
1942 */
1943static int inline ocfs2_search_dirblock(struct buffer_head *bh,
1944 struct inode *dir,
1945 const char *name, int namelen,
1946 unsigned long offset,
1947 struct ocfs2_dir_entry **res_dir)
1948{
1949 struct ocfs2_dir_entry *de;
1950 char *dlimit, *de_buf;
1951 int de_len;
1952 int ret = 0;
1953
1954 mlog_entry_void();
1955
1956 de_buf = bh->b_data;
1957 dlimit = de_buf + dir->i_sb->s_blocksize;
1958
1959 while (de_buf < dlimit) {
1960 /* this code is executed quadratically often */
1961 /* do minimal checking `by hand' */
1962
1963 de = (struct ocfs2_dir_entry *) de_buf;
1964
1965 if (de_buf + namelen <= dlimit &&
1966 ocfs2_match(namelen, name, de)) {
1967 /* found a match - just to be sure, do a full check */
1968 if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
1969 ret = -1;
1970 goto bail;
1971 }
1972 *res_dir = de;
1973 ret = 1;
1974 goto bail;
1975 }
1976
1977 /* prevent looping on a bad block */
1978 de_len = le16_to_cpu(de->rec_len);
1979 if (de_len <= 0) {
1980 ret = -1;
1981 goto bail;
1982 }
1983
1984 de_buf += de_len;
1985 offset += de_len;
1986 }
1987
1988bail:
1989 mlog_exit(ret);
1990 return ret;
1991}
1992
1993struct buffer_head *ocfs2_find_entry(const char *name, int namelen,
1994 struct inode *dir,
1995 struct ocfs2_dir_entry **res_dir)
1996{
1997 struct super_block *sb;
1998 struct buffer_head *bh_use[NAMEI_RA_SIZE];
1999 struct buffer_head *bh, *ret = NULL;
2000 unsigned long start, block, b;
2001 int ra_max = 0; /* Number of bh's in the readahead
2002 buffer, bh_use[] */
2003 int ra_ptr = 0; /* Current index into readahead
2004 buffer */
2005 int num = 0;
2006 int nblocks, i, err;
2007
2008 mlog_entry_void();
2009
2010 *res_dir = NULL;
2011 sb = dir->i_sb;
2012
2013 nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
2014 start = OCFS2_I(dir)->ip_dir_start_lookup;
2015 if (start >= nblocks)
2016 start = 0;
2017 block = start;
2018
2019restart:
2020 do {
2021 /*
2022 * We deal with the read-ahead logic here.
2023 */
2024 if (ra_ptr >= ra_max) {
2025 /* Refill the readahead buffer */
2026 ra_ptr = 0;
2027 b = block;
2028 for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
2029 /*
2030 * Terminate if we reach the end of the
2031 * directory and must wrap, or if our
2032 * search has finished at this block.
2033 */
2034 if (b >= nblocks || (num && block == start)) {
2035 bh_use[ra_max] = NULL;
2036 break;
2037 }
2038 num++;
2039
2040 bh = ocfs2_bread(dir, b++, &err, 1);
2041 bh_use[ra_max] = bh;
2042 }
2043 }
2044 if ((bh = bh_use[ra_ptr++]) == NULL)
2045 goto next;
2046 wait_on_buffer(bh);
2047 if (!buffer_uptodate(bh)) {
2048 /* read error, skip block & hope for the best */
2049 ocfs2_error(dir->i_sb, "reading directory %llu, "
2050 "offset %lu\n",
2051 (unsigned long long)OCFS2_I(dir)->ip_blkno,
2052 block);
2053 brelse(bh);
2054 goto next;
2055 }
2056 i = ocfs2_search_dirblock(bh, dir, name, namelen,
2057 block << sb->s_blocksize_bits,
2058 res_dir);
2059 if (i == 1) {
2060 OCFS2_I(dir)->ip_dir_start_lookup = block;
2061 ret = bh;
2062 goto cleanup_and_exit;
2063 } else {
2064 brelse(bh);
2065 if (i < 0)
2066 goto cleanup_and_exit;
2067 }
2068 next:
2069 if (++block >= nblocks)
2070 block = 0;
2071 } while (block != start);
2072
2073 /*
2074 * If the directory has grown while we were searching, then
2075 * search the last part of the directory before giving up.
2076 */
2077 block = nblocks;
2078 nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
2079 if (block < nblocks) {
2080 start = 0;
2081 goto restart;
2082 }
2083
2084cleanup_and_exit:
2085 /* Clean up the read-ahead blocks */
2086 for (; ra_ptr < ra_max; ra_ptr++)
2087 brelse(bh_use[ra_ptr]);
2088
2089 mlog_exit_ptr(ret);
2090 return ret;
2091}
2092
2093static int ocfs2_blkno_stringify(u64 blkno, char *name) 1673static int ocfs2_blkno_stringify(u64 blkno, char *name)
2094{ 1674{
2095 int status, namelen; 1675 int status, namelen;
diff --git a/fs/ocfs2/namei.h b/fs/ocfs2/namei.h
index 0975c7b7212b..688aef64c879 100644
--- a/fs/ocfs2/namei.h
+++ b/fs/ocfs2/namei.h
@@ -30,29 +30,10 @@ extern const struct inode_operations ocfs2_dir_iops;
30 30
31struct dentry *ocfs2_get_parent(struct dentry *child); 31struct dentry *ocfs2_get_parent(struct dentry *child);
32 32
33int ocfs2_check_dir_entry (struct inode *dir,
34 struct ocfs2_dir_entry *de,
35 struct buffer_head *bh,
36 unsigned long offset);
37struct buffer_head *ocfs2_find_entry(const char *name,
38 int namelen,
39 struct inode *dir,
40 struct ocfs2_dir_entry **res_dir);
41int ocfs2_orphan_del(struct ocfs2_super *osb, 33int ocfs2_orphan_del(struct ocfs2_super *osb,
42 handle_t *handle, 34 handle_t *handle,
43 struct inode *orphan_dir_inode, 35 struct inode *orphan_dir_inode,
44 struct inode *inode, 36 struct inode *inode,
45 struct buffer_head *orphan_dir_bh); 37 struct buffer_head *orphan_dir_bh);
46 38
47static inline int ocfs2_match(int len,
48 const char * const name,
49 struct ocfs2_dir_entry *de)
50{
51 if (len != de->name_len)
52 return 0;
53 if (!de->inode)
54 return 0;
55 return !memcmp(name, de->name, len);
56}
57
58#endif /* OCFS2_NAMEI_H */ 39#endif /* OCFS2_NAMEI_H */
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 58307853fb4a..60a23e1906b0 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -319,6 +319,13 @@ static inline int ocfs2_writes_unwritten_extents(struct ocfs2_super *osb)
319 return 0; 319 return 0;
320} 320}
321 321
322static inline int ocfs2_supports_inline_data(struct ocfs2_super *osb)
323{
324 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_INLINE_DATA)
325 return 1;
326 return 0;
327}
328
322/* set / clear functions because cluster events can make these happen 329/* set / clear functions because cluster events can make these happen
323 * in parallel so we want the transitions to be atomic. this also 330 * in parallel so we want the transitions to be atomic. this also
324 * means that any future flags osb_flags must be protected by spinlock 331 * means that any future flags osb_flags must be protected by spinlock
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 82f8a75b207e..6ef876759a73 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -87,7 +87,8 @@
87 87
88#define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB 88#define OCFS2_FEATURE_COMPAT_SUPP OCFS2_FEATURE_COMPAT_BACKUP_SB
89#define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \ 89#define OCFS2_FEATURE_INCOMPAT_SUPP (OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT \
90 | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC) 90 | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \
91 | OCFS2_FEATURE_INCOMPAT_INLINE_DATA)
91#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 92#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN
92 93
93/* 94/*
@@ -111,6 +112,20 @@
111#define OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC 0x0010 112#define OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC 0x0010
112 113
113/* 114/*
115 * Tunefs sets this incompat flag before starting an operation which
116 * would require cleanup on abort. This is done to protect users from
117 * inadvertently mounting the fs after an aborted run without
118 * fsck-ing.
119 *
120 * s_tunefs_flags on the super block describes precisely which
121 * operations were in progress.
122 */
123#define OCFS2_FEATURE_INCOMPAT_TUNEFS_INPROG 0x0020
124
125/* Support for data packed into inode blocks */
126#define OCFS2_FEATURE_INCOMPAT_INLINE_DATA 0x0040
127
128/*
114 * backup superblock flag is used to indicate that this volume 129 * backup superblock flag is used to indicate that this volume
115 * has backup superblocks. 130 * has backup superblocks.
116 */ 131 */
@@ -130,6 +145,11 @@
130#define OCFS2_MAX_BACKUP_SUPERBLOCKS 6 145#define OCFS2_MAX_BACKUP_SUPERBLOCKS 6
131 146
132/* 147/*
148 * Flags on ocfs2_super_block.s_tunefs_flags
149 */
150#define OCFS2_TUNEFS_INPROG_REMOVE_SLOT 0x0001 /* Removing slots */
151
152/*
133 * Flags on ocfs2_dinode.i_flags 153 * Flags on ocfs2_dinode.i_flags
134 */ 154 */
135#define OCFS2_VALID_FL (0x00000001) /* Inode is valid */ 155#define OCFS2_VALID_FL (0x00000001) /* Inode is valid */
@@ -146,6 +166,17 @@
146#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */ 166#define OCFS2_CHAIN_FL (0x00000400) /* Chain allocator */
147#define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */ 167#define OCFS2_DEALLOC_FL (0x00000800) /* Truncate log */
148 168
169/*
170 * Flags on ocfs2_dinode.i_dyn_features
171 *
172 * These can change much more often than i_flags. When adding flags,
173 * keep in mind that i_dyn_features is only 16 bits wide.
174 */
175#define OCFS2_INLINE_DATA_FL (0x0001) /* Data stored in inode block */
176#define OCFS2_HAS_XATTR_FL (0x0002)
177#define OCFS2_INLINE_XATTR_FL (0x0004)
178#define OCFS2_INDEXED_DIR_FL (0x0008)
179
149/* Inode attributes, keep in sync with EXT2 */ 180/* Inode attributes, keep in sync with EXT2 */
150#define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */ 181#define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */
151#define OCFS2_UNRM_FL (0x00000002) /* Undelete */ 182#define OCFS2_UNRM_FL (0x00000002) /* Undelete */
@@ -447,8 +478,8 @@ struct ocfs2_super_block {
447 __le32 s_clustersize_bits; /* Clustersize for this fs */ 478 __le32 s_clustersize_bits; /* Clustersize for this fs */
448/*40*/ __le16 s_max_slots; /* Max number of simultaneous mounts 479/*40*/ __le16 s_max_slots; /* Max number of simultaneous mounts
449 before tunefs required */ 480 before tunefs required */
450 __le16 s_reserved1; 481 __le16 s_tunefs_flag;
451 __le32 s_reserved2; 482 __le32 s_reserved1;
452 __le64 s_first_cluster_group; /* Block offset of 1st cluster 483 __le64 s_first_cluster_group; /* Block offset of 1st cluster
453 * group header */ 484 * group header */
454/*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ 485/*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */
@@ -471,6 +502,19 @@ struct ocfs2_local_alloc
471}; 502};
472 503
473/* 504/*
505 * Data-in-inode header. This is only used if i_dyn_features has
506 * OCFS2_INLINE_DATA_FL set.
507 */
508struct ocfs2_inline_data
509{
510/*00*/ __le16 id_count; /* Number of bytes that can be used
511 * for data, starting at id_data */
512 __le16 id_reserved0;
513 __le32 id_reserved1;
514 __u8 id_data[0]; /* Start of user data */
515};
516
517/*
474 * On disk inode for OCFS2 518 * On disk inode for OCFS2
475 */ 519 */
476struct ocfs2_dinode { 520struct ocfs2_dinode {
@@ -502,7 +546,7 @@ struct ocfs2_dinode {
502 __le32 i_attr; 546 __le32 i_attr;
503 __le16 i_orphaned_slot; /* Only valid when OCFS2_ORPHANED_FL 547 __le16 i_orphaned_slot; /* Only valid when OCFS2_ORPHANED_FL
504 was set in i_flags */ 548 was set in i_flags */
505 __le16 i_reserved1; 549 __le16 i_dyn_features;
506/*70*/ __le64 i_reserved2[8]; 550/*70*/ __le64 i_reserved2[8];
507/*B8*/ union { 551/*B8*/ union {
508 __le64 i_pad1; /* Generic way to refer to this 552 __le64 i_pad1; /* Generic way to refer to this
@@ -528,6 +572,7 @@ struct ocfs2_dinode {
528 struct ocfs2_chain_list i_chain; 572 struct ocfs2_chain_list i_chain;
529 struct ocfs2_extent_list i_list; 573 struct ocfs2_extent_list i_list;
530 struct ocfs2_truncate_log i_dealloc; 574 struct ocfs2_truncate_log i_dealloc;
575 struct ocfs2_inline_data i_data;
531 __u8 i_symlink[0]; 576 __u8 i_symlink[0];
532 } id2; 577 } id2;
533/* Actual on-disk size is one block */ 578/* Actual on-disk size is one block */
@@ -577,6 +622,12 @@ static inline int ocfs2_fast_symlink_chars(struct super_block *sb)
577 offsetof(struct ocfs2_dinode, id2.i_symlink); 622 offsetof(struct ocfs2_dinode, id2.i_symlink);
578} 623}
579 624
625static inline int ocfs2_max_inline_data(struct super_block *sb)
626{
627 return sb->s_blocksize -
628 offsetof(struct ocfs2_dinode, id2.i_data.id_data);
629}
630
580static inline int ocfs2_extent_recs_per_inode(struct super_block *sb) 631static inline int ocfs2_extent_recs_per_inode(struct super_block *sb)
581{ 632{
582 int size; 633 int size;
@@ -656,6 +707,11 @@ static inline int ocfs2_fast_symlink_chars(int blocksize)
656 return blocksize - offsetof(struct ocfs2_dinode, id2.i_symlink); 707 return blocksize - offsetof(struct ocfs2_dinode, id2.i_symlink);
657} 708}
658 709
710static inline int ocfs2_max_inline_data(int blocksize)
711{
712 return blocksize - offsetof(struct ocfs2_dinode, id2.i_data.id_data);
713}
714
659static inline int ocfs2_extent_recs_per_inode(int blocksize) 715static inline int ocfs2_extent_recs_per_inode(int blocksize)
660{ 716{
661 int size; 717 int size;
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index c034b5129c1e..0e2a1b45bf92 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -39,6 +39,7 @@
39#include <linux/parser.h> 39#include <linux/parser.h>
40#include <linux/crc32.h> 40#include <linux/crc32.h>
41#include <linux/debugfs.h> 41#include <linux/debugfs.h>
42#include <linux/mount.h>
42 43
43#include <cluster/nodemanager.h> 44#include <cluster/nodemanager.h>
44 45
@@ -91,6 +92,7 @@ struct mount_options
91static int ocfs2_parse_options(struct super_block *sb, char *options, 92static int ocfs2_parse_options(struct super_block *sb, char *options,
92 struct mount_options *mopt, 93 struct mount_options *mopt,
93 int is_remount); 94 int is_remount);
95static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt);
94static void ocfs2_put_super(struct super_block *sb); 96static void ocfs2_put_super(struct super_block *sb);
95static int ocfs2_mount_volume(struct super_block *sb); 97static int ocfs2_mount_volume(struct super_block *sb);
96static int ocfs2_remount(struct super_block *sb, int *flags, char *data); 98static int ocfs2_remount(struct super_block *sb, int *flags, char *data);
@@ -105,7 +107,7 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait);
105 107
106static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb); 108static int ocfs2_init_global_system_inodes(struct ocfs2_super *osb);
107static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb); 109static int ocfs2_init_local_system_inodes(struct ocfs2_super *osb);
108static int ocfs2_release_system_inodes(struct ocfs2_super *osb); 110static void ocfs2_release_system_inodes(struct ocfs2_super *osb);
109static int ocfs2_fill_local_node_info(struct ocfs2_super *osb); 111static int ocfs2_fill_local_node_info(struct ocfs2_super *osb);
110static int ocfs2_check_volume(struct ocfs2_super *osb); 112static int ocfs2_check_volume(struct ocfs2_super *osb);
111static int ocfs2_verify_volume(struct ocfs2_dinode *di, 113static int ocfs2_verify_volume(struct ocfs2_dinode *di,
@@ -133,6 +135,7 @@ static const struct super_operations ocfs2_sops = {
133 .write_super = ocfs2_write_super, 135 .write_super = ocfs2_write_super,
134 .put_super = ocfs2_put_super, 136 .put_super = ocfs2_put_super,
135 .remount_fs = ocfs2_remount, 137 .remount_fs = ocfs2_remount,
138 .show_options = ocfs2_show_options,
136}; 139};
137 140
138enum { 141enum {
@@ -177,7 +180,7 @@ static void ocfs2_write_super(struct super_block *sb)
177 180
178static int ocfs2_sync_fs(struct super_block *sb, int wait) 181static int ocfs2_sync_fs(struct super_block *sb, int wait)
179{ 182{
180 int status = 0; 183 int status;
181 tid_t target; 184 tid_t target;
182 struct ocfs2_super *osb = OCFS2_SB(sb); 185 struct ocfs2_super *osb = OCFS2_SB(sb);
183 186
@@ -275,9 +278,9 @@ bail:
275 return status; 278 return status;
276} 279}
277 280
278static int ocfs2_release_system_inodes(struct ocfs2_super *osb) 281static void ocfs2_release_system_inodes(struct ocfs2_super *osb)
279{ 282{
280 int status = 0, i; 283 int i;
281 struct inode *inode; 284 struct inode *inode;
282 285
283 mlog_entry_void(); 286 mlog_entry_void();
@@ -302,8 +305,7 @@ static int ocfs2_release_system_inodes(struct ocfs2_super *osb)
302 osb->root_inode = NULL; 305 osb->root_inode = NULL;
303 } 306 }
304 307
305 mlog_exit(status); 308 mlog_exit(0);
306 return status;
307} 309}
308 310
309/* We're allocating fs objects, use GFP_NOFS */ 311/* We're allocating fs objects, use GFP_NOFS */
@@ -453,7 +455,7 @@ static int ocfs2_sb_probe(struct super_block *sb,
453 struct buffer_head **bh, 455 struct buffer_head **bh,
454 int *sector_size) 456 int *sector_size)
455{ 457{
456 int status = 0, tmpstat; 458 int status, tmpstat;
457 struct ocfs1_vol_disk_hdr *hdr; 459 struct ocfs1_vol_disk_hdr *hdr;
458 struct ocfs2_dinode *di; 460 struct ocfs2_dinode *di;
459 int blksize; 461 int blksize;
@@ -830,6 +832,41 @@ bail:
830 return status; 832 return status;
831} 833}
832 834
835static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
836{
837 struct ocfs2_super *osb = OCFS2_SB(mnt->mnt_sb);
838 unsigned long opts = osb->s_mount_opt;
839
840 if (opts & OCFS2_MOUNT_HB_LOCAL)
841 seq_printf(s, ",_netdev,heartbeat=local");
842 else
843 seq_printf(s, ",heartbeat=none");
844
845 if (opts & OCFS2_MOUNT_NOINTR)
846 seq_printf(s, ",nointr");
847
848 if (opts & OCFS2_MOUNT_DATA_WRITEBACK)
849 seq_printf(s, ",data=writeback");
850 else
851 seq_printf(s, ",data=ordered");
852
853 if (opts & OCFS2_MOUNT_BARRIER)
854 seq_printf(s, ",barrier=1");
855
856 if (opts & OCFS2_MOUNT_ERRORS_PANIC)
857 seq_printf(s, ",errors=panic");
858 else
859 seq_printf(s, ",errors=remount-ro");
860
861 if (osb->preferred_slot != OCFS2_INVALID_SLOT)
862 seq_printf(s, ",preferred_slot=%d", osb->preferred_slot);
863
864 if (osb->s_atime_quantum != OCFS2_DEFAULT_ATIME_QUANTUM)
865 seq_printf(s, ",atime_quantum=%u", osb->s_atime_quantum);
866
867 return 0;
868}
869
833static int __init ocfs2_init(void) 870static int __init ocfs2_init(void)
834{ 871{
835 int status; 872 int status;
@@ -1209,12 +1246,13 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1209 tmp = ocfs2_request_umount_vote(osb); 1246 tmp = ocfs2_request_umount_vote(osb);
1210 if (tmp < 0) 1247 if (tmp < 0)
1211 mlog_errno(tmp); 1248 mlog_errno(tmp);
1249 }
1212 1250
1213 if (osb->slot_num != OCFS2_INVALID_SLOT) 1251 if (osb->slot_num != OCFS2_INVALID_SLOT)
1214 ocfs2_put_slot(osb); 1252 ocfs2_put_slot(osb);
1215 1253
1254 if (osb->dlm)
1216 ocfs2_super_unlock(osb, 1); 1255 ocfs2_super_unlock(osb, 1);
1217 }
1218 1256
1219 ocfs2_release_system_inodes(osb); 1257 ocfs2_release_system_inodes(osb);
1220 1258
@@ -1275,7 +1313,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
1275 struct buffer_head *bh, 1313 struct buffer_head *bh,
1276 int sector_size) 1314 int sector_size)
1277{ 1315{
1278 int status = 0; 1316 int status;
1279 int i, cbits, bbits; 1317 int i, cbits, bbits;
1280 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; 1318 struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
1281 struct inode *inode = NULL; 1319 struct inode *inode = NULL;
@@ -1596,7 +1634,7 @@ static int ocfs2_verify_volume(struct ocfs2_dinode *di,
1596 1634
1597static int ocfs2_check_volume(struct ocfs2_super *osb) 1635static int ocfs2_check_volume(struct ocfs2_super *osb)
1598{ 1636{
1599 int status = 0; 1637 int status;
1600 int dirty; 1638 int dirty;
1601 int local; 1639 int local;
1602 struct ocfs2_dinode *local_alloc = NULL; /* only used if we 1640 struct ocfs2_dinode *local_alloc = NULL; /* only used if we
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c
index 5df6e35d09b1..fd2e846e3e6f 100644
--- a/fs/ocfs2/sysfile.c
+++ b/fs/ocfs2/sysfile.c
@@ -100,17 +100,14 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
100 char namebuf[40]; 100 char namebuf[40];
101 struct inode *inode = NULL; 101 struct inode *inode = NULL;
102 u64 blkno; 102 u64 blkno;
103 struct buffer_head *dirent_bh = NULL;
104 struct ocfs2_dir_entry *de = NULL;
105 int status = 0; 103 int status = 0;
106 104
107 ocfs2_sprintf_system_inode_name(namebuf, 105 ocfs2_sprintf_system_inode_name(namebuf,
108 sizeof(namebuf), 106 sizeof(namebuf),
109 type, slot); 107 type, slot);
110 108
111 status = ocfs2_find_files_on_disk(namebuf, strlen(namebuf), 109 status = ocfs2_lookup_ino_from_name(osb->sys_root_inode, namebuf,
112 &blkno, osb->sys_root_inode, 110 strlen(namebuf), &blkno);
113 &dirent_bh, &de);
114 if (status < 0) { 111 if (status < 0) {
115 goto bail; 112 goto bail;
116 } 113 }
@@ -122,8 +119,7 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb,
122 goto bail; 119 goto bail;
123 } 120 }
124bail: 121bail:
125 if (dirent_bh) 122
126 brelse(dirent_bh);
127 return inode; 123 return inode;
128} 124}
129 125