aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-05-21 10:20:17 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-05-21 10:20:17 -0400
commit03e62303cf56e87337115f14842321043df2b4bb (patch)
tree3024495955beccddbae347d99613bcdd33801ee4 /fs
parent33cf23b0a535475aead57707cb9f4fe135a93544 (diff)
parent18d3a98f3c1b0e27ce026afa4d1ef042f2903726 (diff)
Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2: (47 commits) ocfs2: Silence a gcc warning. ocfs2: Don't retry xattr set in case value extension fails. ocfs2:dlm: avoid dlm->ast_lock lockres->spinlock dependency break ocfs2: Reset xattr value size after xa_cleanup_value_truncate(). fs/ocfs2/dlm: Use kstrdup fs/ocfs2/dlm: Drop memory allocation cast Ocfs2: Optimize punching-hole code. Ocfs2: Make ocfs2_find_cpos_for_left_leaf() public. Ocfs2: Fix hole punching to correctly do CoW during cluster zeroing. Ocfs2: Optimize ocfs2 truncate to use ocfs2_remove_btree_range() instead. ocfs2: Block signals for mkdir/link/symlink/O_CREAT. ocfs2: Wrap signal blocking in void functions. ocfs2/dlm: Increase o2dlm lockres hash size ocfs2: Make ocfs2_extend_trans() really extend. ocfs2/trivial: Code cleanup for allocation reservation. ocfs2: make ocfs2_adjust_resv_from_alloc simple. ocfs2: Make nointr a default mount option ocfs2/dlm: Make o2dlm domain join/leave messages KERN_NOTICE o2net: log socket state changes ocfs2: print node # when tcp fails ...
Diffstat (limited to 'fs')
-rw-r--r--fs/ocfs2/Makefile1
-rw-r--r--fs/ocfs2/alloc.c908
-rw-r--r--fs/ocfs2/alloc.h12
-rw-r--r--fs/ocfs2/aops.c3
-rw-r--r--fs/ocfs2/cluster/masklog.c1
-rw-r--r--fs/ocfs2/cluster/masklog.h1
-rw-r--r--fs/ocfs2/cluster/tcp.c3
-rw-r--r--fs/ocfs2/dir.c75
-rw-r--r--fs/ocfs2/dlm/dlmast.c8
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h4
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c4
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c28
-rw-r--r--fs/ocfs2/dlm/dlmlock.c6
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c30
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c27
-rw-r--r--fs/ocfs2/dlm/dlmthread.c16
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c3
-rw-r--r--fs/ocfs2/file.c215
-rw-r--r--fs/ocfs2/inode.c45
-rw-r--r--fs/ocfs2/inode.h2
-rw-r--r--fs/ocfs2/journal.c26
-rw-r--r--fs/ocfs2/journal.h15
-rw-r--r--fs/ocfs2/localalloc.c275
-rw-r--r--fs/ocfs2/localalloc.h3
-rw-r--r--fs/ocfs2/mmap.c48
-rw-r--r--fs/ocfs2/namei.c91
-rw-r--r--fs/ocfs2/ocfs2.h22
-rw-r--r--fs/ocfs2/ocfs2_fs.h144
-rw-r--r--fs/ocfs2/quota_global.c4
-rw-r--r--fs/ocfs2/quota_local.c50
-rw-r--r--fs/ocfs2/refcounttree.c74
-rw-r--r--fs/ocfs2/refcounttree.h4
-rw-r--r--fs/ocfs2/reservations.c847
-rw-r--r--fs/ocfs2/reservations.h159
-rw-r--r--fs/ocfs2/resize.c19
-rw-r--r--fs/ocfs2/suballoc.c688
-rw-r--r--fs/ocfs2/suballoc.h21
-rw-r--r--fs/ocfs2/super.c88
-rw-r--r--fs/ocfs2/super.h7
-rw-r--r--fs/ocfs2/xattr.c103
40 files changed, 2589 insertions, 1491 deletions
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 791c0886c060..07d9fd854350 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -29,6 +29,7 @@ ocfs2-objs := \
29 mmap.o \ 29 mmap.o \
30 namei.o \ 30 namei.o \
31 refcounttree.o \ 31 refcounttree.o \
32 reservations.o \
32 resize.o \ 33 resize.o \
33 slot_map.o \ 34 slot_map.o \
34 suballoc.o \ 35 suballoc.o \
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 9f8bd913c51e..215e12ce1d85 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1006,7 +1006,7 @@ static int ocfs2_create_new_meta_bhs(handle_t *handle,
1006 int count, status, i; 1006 int count, status, i;
1007 u16 suballoc_bit_start; 1007 u16 suballoc_bit_start;
1008 u32 num_got; 1008 u32 num_got;
1009 u64 first_blkno; 1009 u64 suballoc_loc, first_blkno;
1010 struct ocfs2_super *osb = 1010 struct ocfs2_super *osb =
1011 OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci)); 1011 OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci));
1012 struct ocfs2_extent_block *eb; 1012 struct ocfs2_extent_block *eb;
@@ -1015,10 +1015,10 @@ static int ocfs2_create_new_meta_bhs(handle_t *handle,
1015 1015
1016 count = 0; 1016 count = 0;
1017 while (count < wanted) { 1017 while (count < wanted) {
1018 status = ocfs2_claim_metadata(osb, 1018 status = ocfs2_claim_metadata(handle,
1019 handle,
1020 meta_ac, 1019 meta_ac,
1021 wanted - count, 1020 wanted - count,
1021 &suballoc_loc,
1022 &suballoc_bit_start, 1022 &suballoc_bit_start,
1023 &num_got, 1023 &num_got,
1024 &first_blkno); 1024 &first_blkno);
@@ -1052,6 +1052,7 @@ static int ocfs2_create_new_meta_bhs(handle_t *handle,
1052 eb->h_fs_generation = cpu_to_le32(osb->fs_generation); 1052 eb->h_fs_generation = cpu_to_le32(osb->fs_generation);
1053 eb->h_suballoc_slot = 1053 eb->h_suballoc_slot =
1054 cpu_to_le16(meta_ac->ac_alloc_slot); 1054 cpu_to_le16(meta_ac->ac_alloc_slot);
1055 eb->h_suballoc_loc = cpu_to_le64(suballoc_loc);
1055 eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start); 1056 eb->h_suballoc_bit = cpu_to_le16(suballoc_bit_start);
1056 eb->h_list.l_count = 1057 eb->h_list.l_count =
1057 cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb)); 1058 cpu_to_le16(ocfs2_extent_recs_per_eb(osb->sb));
@@ -1061,11 +1062,7 @@ static int ocfs2_create_new_meta_bhs(handle_t *handle,
1061 1062
1062 /* We'll also be dirtied by the caller, so 1063 /* We'll also be dirtied by the caller, so
1063 * this isn't absolutely necessary. */ 1064 * this isn't absolutely necessary. */
1064 status = ocfs2_journal_dirty(handle, bhs[i]); 1065 ocfs2_journal_dirty(handle, bhs[i]);
1065 if (status < 0) {
1066 mlog_errno(status);
1067 goto bail;
1068 }
1069 } 1066 }
1070 1067
1071 count += num_got; 1068 count += num_got;
@@ -1129,8 +1126,7 @@ static int ocfs2_adjust_rightmost_branch(handle_t *handle,
1129 goto out; 1126 goto out;
1130 } 1127 }
1131 1128
1132 status = ocfs2_extend_trans(handle, path_num_items(path) + 1129 status = ocfs2_extend_trans(handle, path_num_items(path));
1133 handle->h_buffer_credits);
1134 if (status < 0) { 1130 if (status < 0) {
1135 mlog_errno(status); 1131 mlog_errno(status);
1136 goto out; 1132 goto out;
@@ -1270,12 +1266,7 @@ static int ocfs2_add_branch(handle_t *handle,
1270 if (!eb_el->l_tree_depth) 1266 if (!eb_el->l_tree_depth)
1271 new_last_eb_blk = le64_to_cpu(eb->h_blkno); 1267 new_last_eb_blk = le64_to_cpu(eb->h_blkno);
1272 1268
1273 status = ocfs2_journal_dirty(handle, bh); 1269 ocfs2_journal_dirty(handle, bh);
1274 if (status < 0) {
1275 mlog_errno(status);
1276 goto bail;
1277 }
1278
1279 next_blkno = le64_to_cpu(eb->h_blkno); 1270 next_blkno = le64_to_cpu(eb->h_blkno);
1280 } 1271 }
1281 1272
@@ -1321,17 +1312,10 @@ static int ocfs2_add_branch(handle_t *handle,
1321 eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data; 1312 eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data;
1322 eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk); 1313 eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk);
1323 1314
1324 status = ocfs2_journal_dirty(handle, *last_eb_bh); 1315 ocfs2_journal_dirty(handle, *last_eb_bh);
1325 if (status < 0) 1316 ocfs2_journal_dirty(handle, et->et_root_bh);
1326 mlog_errno(status); 1317 if (eb_bh)
1327 status = ocfs2_journal_dirty(handle, et->et_root_bh); 1318 ocfs2_journal_dirty(handle, eb_bh);
1328 if (status < 0)
1329 mlog_errno(status);
1330 if (eb_bh) {
1331 status = ocfs2_journal_dirty(handle, eb_bh);
1332 if (status < 0)
1333 mlog_errno(status);
1334 }
1335 1319
1336 /* 1320 /*
1337 * Some callers want to track the rightmost leaf so pass it 1321 * Some callers want to track the rightmost leaf so pass it
@@ -1399,11 +1383,7 @@ static int ocfs2_shift_tree_depth(handle_t *handle,
1399 for (i = 0; i < le16_to_cpu(root_el->l_next_free_rec); i++) 1383 for (i = 0; i < le16_to_cpu(root_el->l_next_free_rec); i++)
1400 eb_el->l_recs[i] = root_el->l_recs[i]; 1384 eb_el->l_recs[i] = root_el->l_recs[i];
1401 1385
1402 status = ocfs2_journal_dirty(handle, new_eb_bh); 1386 ocfs2_journal_dirty(handle, new_eb_bh);
1403 if (status < 0) {
1404 mlog_errno(status);
1405 goto bail;
1406 }
1407 1387
1408 status = ocfs2_et_root_journal_access(handle, et, 1388 status = ocfs2_et_root_journal_access(handle, et,
1409 OCFS2_JOURNAL_ACCESS_WRITE); 1389 OCFS2_JOURNAL_ACCESS_WRITE);
@@ -1428,11 +1408,7 @@ static int ocfs2_shift_tree_depth(handle_t *handle,
1428 if (root_el->l_tree_depth == cpu_to_le16(1)) 1408 if (root_el->l_tree_depth == cpu_to_le16(1))
1429 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); 1409 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
1430 1410
1431 status = ocfs2_journal_dirty(handle, et->et_root_bh); 1411 ocfs2_journal_dirty(handle, et->et_root_bh);
1432 if (status < 0) {
1433 mlog_errno(status);
1434 goto bail;
1435 }
1436 1412
1437 *ret_new_eb_bh = new_eb_bh; 1413 *ret_new_eb_bh = new_eb_bh;
1438 new_eb_bh = NULL; 1414 new_eb_bh = NULL;
@@ -2064,7 +2040,7 @@ static void ocfs2_complete_edge_insert(handle_t *handle,
2064 struct ocfs2_path *right_path, 2040 struct ocfs2_path *right_path,
2065 int subtree_index) 2041 int subtree_index)
2066{ 2042{
2067 int ret, i, idx; 2043 int i, idx;
2068 struct ocfs2_extent_list *el, *left_el, *right_el; 2044 struct ocfs2_extent_list *el, *left_el, *right_el;
2069 struct ocfs2_extent_rec *left_rec, *right_rec; 2045 struct ocfs2_extent_rec *left_rec, *right_rec;
2070 struct buffer_head *root_bh = left_path->p_node[subtree_index].bh; 2046 struct buffer_head *root_bh = left_path->p_node[subtree_index].bh;
@@ -2102,13 +2078,8 @@ static void ocfs2_complete_edge_insert(handle_t *handle,
2102 ocfs2_adjust_adjacent_records(left_rec, left_el, right_rec, 2078 ocfs2_adjust_adjacent_records(left_rec, left_el, right_rec,
2103 right_el); 2079 right_el);
2104 2080
2105 ret = ocfs2_journal_dirty(handle, left_path->p_node[i].bh); 2081 ocfs2_journal_dirty(handle, left_path->p_node[i].bh);
2106 if (ret) 2082 ocfs2_journal_dirty(handle, right_path->p_node[i].bh);
2107 mlog_errno(ret);
2108
2109 ret = ocfs2_journal_dirty(handle, right_path->p_node[i].bh);
2110 if (ret)
2111 mlog_errno(ret);
2112 2083
2113 /* 2084 /*
2114 * Setup our list pointers now so that the current 2085 * Setup our list pointers now so that the current
@@ -2132,9 +2103,7 @@ static void ocfs2_complete_edge_insert(handle_t *handle,
2132 2103
2133 root_bh = left_path->p_node[subtree_index].bh; 2104 root_bh = left_path->p_node[subtree_index].bh;
2134 2105
2135 ret = ocfs2_journal_dirty(handle, root_bh); 2106 ocfs2_journal_dirty(handle, root_bh);
2136 if (ret)
2137 mlog_errno(ret);
2138} 2107}
2139 2108
2140static int ocfs2_rotate_subtree_right(handle_t *handle, 2109static int ocfs2_rotate_subtree_right(handle_t *handle,
@@ -2207,11 +2176,7 @@ static int ocfs2_rotate_subtree_right(handle_t *handle,
2207 2176
2208 ocfs2_create_empty_extent(right_el); 2177 ocfs2_create_empty_extent(right_el);
2209 2178
2210 ret = ocfs2_journal_dirty(handle, right_leaf_bh); 2179 ocfs2_journal_dirty(handle, right_leaf_bh);
2211 if (ret) {
2212 mlog_errno(ret);
2213 goto out;
2214 }
2215 2180
2216 /* Do the copy now. */ 2181 /* Do the copy now. */
2217 i = le16_to_cpu(left_el->l_next_free_rec) - 1; 2182 i = le16_to_cpu(left_el->l_next_free_rec) - 1;
@@ -2230,11 +2195,7 @@ static int ocfs2_rotate_subtree_right(handle_t *handle,
2230 memset(&left_el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); 2195 memset(&left_el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
2231 le16_add_cpu(&left_el->l_next_free_rec, 1); 2196 le16_add_cpu(&left_el->l_next_free_rec, 1);
2232 2197
2233 ret = ocfs2_journal_dirty(handle, left_leaf_bh); 2198 ocfs2_journal_dirty(handle, left_leaf_bh);
2234 if (ret) {
2235 mlog_errno(ret);
2236 goto out;
2237 }
2238 2199
2239 ocfs2_complete_edge_insert(handle, left_path, right_path, 2200 ocfs2_complete_edge_insert(handle, left_path, right_path,
2240 subtree_index); 2201 subtree_index);
@@ -2249,8 +2210,8 @@ out:
2249 * 2210 *
2250 * Will return zero if the path passed in is already the leftmost path. 2211 * Will return zero if the path passed in is already the leftmost path.
2251 */ 2212 */
2252static int ocfs2_find_cpos_for_left_leaf(struct super_block *sb, 2213int ocfs2_find_cpos_for_left_leaf(struct super_block *sb,
2253 struct ocfs2_path *path, u32 *cpos) 2214 struct ocfs2_path *path, u32 *cpos)
2254{ 2215{
2255 int i, j, ret = 0; 2216 int i, j, ret = 0;
2256 u64 blkno; 2217 u64 blkno;
@@ -2327,20 +2288,14 @@ static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth,
2327 int op_credits, 2288 int op_credits,
2328 struct ocfs2_path *path) 2289 struct ocfs2_path *path)
2329{ 2290{
2330 int ret; 2291 int ret = 0;
2331 int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits; 2292 int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits;
2332 2293
2333 if (handle->h_buffer_credits < credits) { 2294 if (handle->h_buffer_credits < credits)
2334 ret = ocfs2_extend_trans(handle, 2295 ret = ocfs2_extend_trans(handle,
2335 credits - handle->h_buffer_credits); 2296 credits - handle->h_buffer_credits);
2336 if (ret)
2337 return ret;
2338 2297
2339 if (unlikely(handle->h_buffer_credits < credits)) 2298 return ret;
2340 return ocfs2_extend_trans(handle, credits);
2341 }
2342
2343 return 0;
2344} 2299}
2345 2300
2346/* 2301/*
@@ -2584,8 +2539,7 @@ static int ocfs2_update_edge_lengths(handle_t *handle,
2584 * records for all the bh in the path. 2539 * records for all the bh in the path.
2585 * So we have to allocate extra credits and access them. 2540 * So we have to allocate extra credits and access them.
2586 */ 2541 */
2587 ret = ocfs2_extend_trans(handle, 2542 ret = ocfs2_extend_trans(handle, subtree_index);
2588 handle->h_buffer_credits + subtree_index);
2589 if (ret) { 2543 if (ret) {
2590 mlog_errno(ret); 2544 mlog_errno(ret);
2591 goto out; 2545 goto out;
@@ -2823,12 +2777,8 @@ static int ocfs2_rotate_subtree_left(handle_t *handle,
2823 ocfs2_remove_empty_extent(right_leaf_el); 2777 ocfs2_remove_empty_extent(right_leaf_el);
2824 } 2778 }
2825 2779
2826 ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); 2780 ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
2827 if (ret) 2781 ocfs2_journal_dirty(handle, path_leaf_bh(right_path));
2828 mlog_errno(ret);
2829 ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path));
2830 if (ret)
2831 mlog_errno(ret);
2832 2782
2833 if (del_right_subtree) { 2783 if (del_right_subtree) {
2834 ocfs2_unlink_subtree(handle, et, left_path, right_path, 2784 ocfs2_unlink_subtree(handle, et, left_path, right_path,
@@ -2851,9 +2801,7 @@ static int ocfs2_rotate_subtree_left(handle_t *handle,
2851 if (right_has_empty) 2801 if (right_has_empty)
2852 ocfs2_remove_empty_extent(left_leaf_el); 2802 ocfs2_remove_empty_extent(left_leaf_el);
2853 2803
2854 ret = ocfs2_journal_dirty(handle, et_root_bh); 2804 ocfs2_journal_dirty(handle, et_root_bh);
2855 if (ret)
2856 mlog_errno(ret);
2857 2805
2858 *deleted = 1; 2806 *deleted = 1;
2859 } else 2807 } else
@@ -2962,10 +2910,7 @@ static int ocfs2_rotate_rightmost_leaf_left(handle_t *handle,
2962 } 2910 }
2963 2911
2964 ocfs2_remove_empty_extent(el); 2912 ocfs2_remove_empty_extent(el);
2965 2913 ocfs2_journal_dirty(handle, bh);
2966 ret = ocfs2_journal_dirty(handle, bh);
2967 if (ret)
2968 mlog_errno(ret);
2969 2914
2970out: 2915out:
2971 return ret; 2916 return ret;
@@ -3506,15 +3451,9 @@ static int ocfs2_merge_rec_right(struct ocfs2_path *left_path,
3506 3451
3507 ocfs2_cleanup_merge(el, index); 3452 ocfs2_cleanup_merge(el, index);
3508 3453
3509 ret = ocfs2_journal_dirty(handle, bh); 3454 ocfs2_journal_dirty(handle, bh);
3510 if (ret)
3511 mlog_errno(ret);
3512
3513 if (right_path) { 3455 if (right_path) {
3514 ret = ocfs2_journal_dirty(handle, path_leaf_bh(right_path)); 3456 ocfs2_journal_dirty(handle, path_leaf_bh(right_path));
3515 if (ret)
3516 mlog_errno(ret);
3517
3518 ocfs2_complete_edge_insert(handle, left_path, right_path, 3457 ocfs2_complete_edge_insert(handle, left_path, right_path,
3519 subtree_index); 3458 subtree_index);
3520 } 3459 }
@@ -3683,14 +3622,9 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
3683 3622
3684 ocfs2_cleanup_merge(el, index); 3623 ocfs2_cleanup_merge(el, index);
3685 3624
3686 ret = ocfs2_journal_dirty(handle, bh); 3625 ocfs2_journal_dirty(handle, bh);
3687 if (ret)
3688 mlog_errno(ret);
3689
3690 if (left_path) { 3626 if (left_path) {
3691 ret = ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); 3627 ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
3692 if (ret)
3693 mlog_errno(ret);
3694 3628
3695 /* 3629 /*
3696 * In the situation that the right_rec is empty and the extent 3630 * In the situation that the right_rec is empty and the extent
@@ -4016,10 +3950,7 @@ static void ocfs2_adjust_rightmost_records(handle_t *handle,
4016 le32_add_cpu(&rec->e_int_clusters, 3950 le32_add_cpu(&rec->e_int_clusters,
4017 -le32_to_cpu(rec->e_cpos)); 3951 -le32_to_cpu(rec->e_cpos));
4018 3952
4019 ret = ocfs2_journal_dirty(handle, bh); 3953 ocfs2_journal_dirty(handle, bh);
4020 if (ret)
4021 mlog_errno(ret);
4022
4023 } 3954 }
4024} 3955}
4025 3956
@@ -4203,17 +4134,13 @@ static int ocfs2_insert_path(handle_t *handle,
4203 struct buffer_head *leaf_bh = path_leaf_bh(right_path); 4134 struct buffer_head *leaf_bh = path_leaf_bh(right_path);
4204 4135
4205 if (left_path) { 4136 if (left_path) {
4206 int credits = handle->h_buffer_credits;
4207
4208 /* 4137 /*
4209 * There's a chance that left_path got passed back to 4138 * There's a chance that left_path got passed back to
4210 * us without being accounted for in the 4139 * us without being accounted for in the
4211 * journal. Extend our transaction here to be sure we 4140 * journal. Extend our transaction here to be sure we
4212 * can change those blocks. 4141 * can change those blocks.
4213 */ 4142 */
4214 credits += left_path->p_tree_depth; 4143 ret = ocfs2_extend_trans(handle, left_path->p_tree_depth);
4215
4216 ret = ocfs2_extend_trans(handle, credits);
4217 if (ret < 0) { 4144 if (ret < 0) {
4218 mlog_errno(ret); 4145 mlog_errno(ret);
4219 goto out; 4146 goto out;
@@ -4251,17 +4178,13 @@ static int ocfs2_insert_path(handle_t *handle,
4251 * dirty this for us. 4178 * dirty this for us.
4252 */ 4179 */
4253 if (left_path) 4180 if (left_path)
4254 ret = ocfs2_journal_dirty(handle, 4181 ocfs2_journal_dirty(handle,
4255 path_leaf_bh(left_path)); 4182 path_leaf_bh(left_path));
4256 if (ret)
4257 mlog_errno(ret);
4258 } else 4183 } else
4259 ocfs2_insert_at_leaf(et, insert_rec, path_leaf_el(right_path), 4184 ocfs2_insert_at_leaf(et, insert_rec, path_leaf_el(right_path),
4260 insert); 4185 insert);
4261 4186
4262 ret = ocfs2_journal_dirty(handle, leaf_bh); 4187 ocfs2_journal_dirty(handle, leaf_bh);
4263 if (ret)
4264 mlog_errno(ret);
4265 4188
4266 if (left_path) { 4189 if (left_path) {
4267 /* 4190 /*
@@ -4384,9 +4307,7 @@ out_update_clusters:
4384 ocfs2_et_update_clusters(et, 4307 ocfs2_et_update_clusters(et,
4385 le16_to_cpu(insert_rec->e_leaf_clusters)); 4308 le16_to_cpu(insert_rec->e_leaf_clusters));
4386 4309
4387 ret = ocfs2_journal_dirty(handle, et->et_root_bh); 4310 ocfs2_journal_dirty(handle, et->et_root_bh);
4388 if (ret)
4389 mlog_errno(ret);
4390 4311
4391out: 4312out:
4392 ocfs2_free_path(left_path); 4313 ocfs2_free_path(left_path);
@@ -4866,7 +4787,7 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
4866 goto leave; 4787 goto leave;
4867 } 4788 }
4868 4789
4869 status = __ocfs2_claim_clusters(osb, handle, data_ac, 1, 4790 status = __ocfs2_claim_clusters(handle, data_ac, 1,
4870 clusters_to_add, &bit_off, &num_bits); 4791 clusters_to_add, &bit_off, &num_bits);
4871 if (status < 0) { 4792 if (status < 0) {
4872 if (status != -ENOSPC) 4793 if (status != -ENOSPC)
@@ -4895,11 +4816,7 @@ int ocfs2_add_clusters_in_btree(handle_t *handle,
4895 goto leave; 4816 goto leave;
4896 } 4817 }
4897 4818
4898 status = ocfs2_journal_dirty(handle, et->et_root_bh); 4819 ocfs2_journal_dirty(handle, et->et_root_bh);
4899 if (status < 0) {
4900 mlog_errno(status);
4901 goto leave;
4902 }
4903 4820
4904 clusters_to_add -= num_bits; 4821 clusters_to_add -= num_bits;
4905 *logical_offset += num_bits; 4822 *logical_offset += num_bits;
@@ -5309,7 +5226,7 @@ static int ocfs2_split_tree(handle_t *handle, struct ocfs2_extent_tree *et,
5309 int index, u32 new_range, 5226 int index, u32 new_range,
5310 struct ocfs2_alloc_context *meta_ac) 5227 struct ocfs2_alloc_context *meta_ac)
5311{ 5228{
5312 int ret, depth, credits = handle->h_buffer_credits; 5229 int ret, depth, credits;
5313 struct buffer_head *last_eb_bh = NULL; 5230 struct buffer_head *last_eb_bh = NULL;
5314 struct ocfs2_extent_block *eb; 5231 struct ocfs2_extent_block *eb;
5315 struct ocfs2_extent_list *rightmost_el, *el; 5232 struct ocfs2_extent_list *rightmost_el, *el;
@@ -5340,8 +5257,8 @@ static int ocfs2_split_tree(handle_t *handle, struct ocfs2_extent_tree *et,
5340 } else 5257 } else
5341 rightmost_el = path_leaf_el(path); 5258 rightmost_el = path_leaf_el(path);
5342 5259
5343 credits += path->p_tree_depth + 5260 credits = path->p_tree_depth +
5344 ocfs2_extend_meta_needed(et->et_root_el); 5261 ocfs2_extend_meta_needed(et->et_root_el);
5345 ret = ocfs2_extend_trans(handle, credits); 5262 ret = ocfs2_extend_trans(handle, credits);
5346 if (ret) { 5263 if (ret) {
5347 mlog_errno(ret); 5264 mlog_errno(ret);
@@ -5671,19 +5588,97 @@ out:
5671 return ret; 5588 return ret;
5672} 5589}
5673 5590
5591/*
5592 * ocfs2_reserve_blocks_for_rec_trunc() would look basically the
5593 * same as ocfs2_lock_alloctors(), except for it accepts a blocks
5594 * number to reserve some extra blocks, and it only handles meta
5595 * data allocations.
5596 *
5597 * Currently, only ocfs2_remove_btree_range() uses it for truncating
5598 * and punching holes.
5599 */
5600static int ocfs2_reserve_blocks_for_rec_trunc(struct inode *inode,
5601 struct ocfs2_extent_tree *et,
5602 u32 extents_to_split,
5603 struct ocfs2_alloc_context **ac,
5604 int extra_blocks)
5605{
5606 int ret = 0, num_free_extents;
5607 unsigned int max_recs_needed = 2 * extents_to_split;
5608 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5609
5610 *ac = NULL;
5611
5612 num_free_extents = ocfs2_num_free_extents(osb, et);
5613 if (num_free_extents < 0) {
5614 ret = num_free_extents;
5615 mlog_errno(ret);
5616 goto out;
5617 }
5618
5619 if (!num_free_extents ||
5620 (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed))
5621 extra_blocks += ocfs2_extend_meta_needed(et->et_root_el);
5622
5623 if (extra_blocks) {
5624 ret = ocfs2_reserve_new_metadata_blocks(osb, extra_blocks, ac);
5625 if (ret < 0) {
5626 if (ret != -ENOSPC)
5627 mlog_errno(ret);
5628 goto out;
5629 }
5630 }
5631
5632out:
5633 if (ret) {
5634 if (*ac) {
5635 ocfs2_free_alloc_context(*ac);
5636 *ac = NULL;
5637 }
5638 }
5639
5640 return ret;
5641}
5642
5674int ocfs2_remove_btree_range(struct inode *inode, 5643int ocfs2_remove_btree_range(struct inode *inode,
5675 struct ocfs2_extent_tree *et, 5644 struct ocfs2_extent_tree *et,
5676 u32 cpos, u32 phys_cpos, u32 len, 5645 u32 cpos, u32 phys_cpos, u32 len, int flags,
5677 struct ocfs2_cached_dealloc_ctxt *dealloc) 5646 struct ocfs2_cached_dealloc_ctxt *dealloc,
5647 u64 refcount_loc)
5678{ 5648{
5679 int ret; 5649 int ret, credits = 0, extra_blocks = 0;
5680 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 5650 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
5681 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5651 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5682 struct inode *tl_inode = osb->osb_tl_inode; 5652 struct inode *tl_inode = osb->osb_tl_inode;
5683 handle_t *handle; 5653 handle_t *handle;
5684 struct ocfs2_alloc_context *meta_ac = NULL; 5654 struct ocfs2_alloc_context *meta_ac = NULL;
5655 struct ocfs2_refcount_tree *ref_tree = NULL;
5656
5657 if ((flags & OCFS2_EXT_REFCOUNTED) && len) {
5658 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
5659 OCFS2_HAS_REFCOUNT_FL));
5660
5661 ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
5662 &ref_tree, NULL);
5663 if (ret) {
5664 mlog_errno(ret);
5665 goto out;
5666 }
5685 5667
5686 ret = ocfs2_lock_allocators(inode, et, 0, 1, NULL, &meta_ac); 5668 ret = ocfs2_prepare_refcount_change_for_del(inode,
5669 refcount_loc,
5670 phys_blkno,
5671 len,
5672 &credits,
5673 &extra_blocks);
5674 if (ret < 0) {
5675 mlog_errno(ret);
5676 goto out;
5677 }
5678 }
5679
5680 ret = ocfs2_reserve_blocks_for_rec_trunc(inode, et, 1, &meta_ac,
5681 extra_blocks);
5687 if (ret) { 5682 if (ret) {
5688 mlog_errno(ret); 5683 mlog_errno(ret);
5689 return ret; 5684 return ret;
@@ -5699,7 +5694,8 @@ int ocfs2_remove_btree_range(struct inode *inode,
5699 } 5694 }
5700 } 5695 }
5701 5696
5702 handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb)); 5697 handle = ocfs2_start_trans(osb,
5698 ocfs2_remove_extent_credits(osb->sb) + credits);
5703 if (IS_ERR(handle)) { 5699 if (IS_ERR(handle)) {
5704 ret = PTR_ERR(handle); 5700 ret = PTR_ERR(handle);
5705 mlog_errno(ret); 5701 mlog_errno(ret);
@@ -5724,15 +5720,22 @@ int ocfs2_remove_btree_range(struct inode *inode,
5724 5720
5725 ocfs2_et_update_clusters(et, -len); 5721 ocfs2_et_update_clusters(et, -len);
5726 5722
5727 ret = ocfs2_journal_dirty(handle, et->et_root_bh); 5723 ocfs2_journal_dirty(handle, et->et_root_bh);
5728 if (ret) {
5729 mlog_errno(ret);
5730 goto out_commit;
5731 }
5732 5724
5733 ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len); 5725 if (phys_blkno) {
5734 if (ret) 5726 if (flags & OCFS2_EXT_REFCOUNTED)
5735 mlog_errno(ret); 5727 ret = ocfs2_decrease_refcount(inode, handle,
5728 ocfs2_blocks_to_clusters(osb->sb,
5729 phys_blkno),
5730 len, meta_ac,
5731 dealloc, 1);
5732 else
5733 ret = ocfs2_truncate_log_append(osb, handle,
5734 phys_blkno, len);
5735 if (ret)
5736 mlog_errno(ret);
5737
5738 }
5736 5739
5737out_commit: 5740out_commit:
5738 ocfs2_commit_trans(osb, handle); 5741 ocfs2_commit_trans(osb, handle);
@@ -5742,6 +5745,9 @@ out:
5742 if (meta_ac) 5745 if (meta_ac)
5743 ocfs2_free_alloc_context(meta_ac); 5746 ocfs2_free_alloc_context(meta_ac);
5744 5747
5748 if (ref_tree)
5749 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
5750
5745 return ret; 5751 return ret;
5746} 5752}
5747 5753
@@ -5850,11 +5856,7 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
5850 } 5856 }
5851 tl->tl_recs[index].t_clusters = cpu_to_le32(num_clusters); 5857 tl->tl_recs[index].t_clusters = cpu_to_le32(num_clusters);
5852 5858
5853 status = ocfs2_journal_dirty(handle, tl_bh); 5859 ocfs2_journal_dirty(handle, tl_bh);
5854 if (status < 0) {
5855 mlog_errno(status);
5856 goto bail;
5857 }
5858 5860
5859bail: 5861bail:
5860 mlog_exit(status); 5862 mlog_exit(status);
@@ -5893,11 +5895,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
5893 5895
5894 tl->tl_used = cpu_to_le16(i); 5896 tl->tl_used = cpu_to_le16(i);
5895 5897
5896 status = ocfs2_journal_dirty(handle, tl_bh); 5898 ocfs2_journal_dirty(handle, tl_bh);
5897 if (status < 0) {
5898 mlog_errno(status);
5899 goto bail;
5900 }
5901 5899
5902 /* TODO: Perhaps we can calculate the bulk of the 5900 /* TODO: Perhaps we can calculate the bulk of the
5903 * credits up front rather than extending like 5901 * credits up front rather than extending like
@@ -6298,6 +6296,7 @@ int ocfs2_truncate_log_init(struct ocfs2_super *osb)
6298 */ 6296 */
6299struct ocfs2_cached_block_free { 6297struct ocfs2_cached_block_free {
6300 struct ocfs2_cached_block_free *free_next; 6298 struct ocfs2_cached_block_free *free_next;
6299 u64 free_bg;
6301 u64 free_blk; 6300 u64 free_blk;
6302 unsigned int free_bit; 6301 unsigned int free_bit;
6303}; 6302};
@@ -6344,8 +6343,11 @@ static int ocfs2_free_cached_blocks(struct ocfs2_super *osb,
6344 } 6343 }
6345 6344
6346 while (head) { 6345 while (head) {
6347 bg_blkno = ocfs2_which_suballoc_group(head->free_blk, 6346 if (head->free_bg)
6348 head->free_bit); 6347 bg_blkno = head->free_bg;
6348 else
6349 bg_blkno = ocfs2_which_suballoc_group(head->free_blk,
6350 head->free_bit);
6349 mlog(0, "Free bit: (bit %u, blkno %llu)\n", 6351 mlog(0, "Free bit: (bit %u, blkno %llu)\n",
6350 head->free_bit, (unsigned long long)head->free_blk); 6352 head->free_bit, (unsigned long long)head->free_blk);
6351 6353
@@ -6393,7 +6395,7 @@ int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
6393 int ret = 0; 6395 int ret = 0;
6394 struct ocfs2_cached_block_free *item; 6396 struct ocfs2_cached_block_free *item;
6395 6397
6396 item = kmalloc(sizeof(*item), GFP_NOFS); 6398 item = kzalloc(sizeof(*item), GFP_NOFS);
6397 if (item == NULL) { 6399 if (item == NULL) {
6398 ret = -ENOMEM; 6400 ret = -ENOMEM;
6399 mlog_errno(ret); 6401 mlog_errno(ret);
@@ -6533,8 +6535,8 @@ ocfs2_find_per_slot_free_list(int type,
6533} 6535}
6534 6536
6535int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, 6537int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
6536 int type, int slot, u64 blkno, 6538 int type, int slot, u64 suballoc,
6537 unsigned int bit) 6539 u64 blkno, unsigned int bit)
6538{ 6540{
6539 int ret; 6541 int ret;
6540 struct ocfs2_per_slot_free_list *fl; 6542 struct ocfs2_per_slot_free_list *fl;
@@ -6547,7 +6549,7 @@ int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
6547 goto out; 6549 goto out;
6548 } 6550 }
6549 6551
6550 item = kmalloc(sizeof(*item), GFP_NOFS); 6552 item = kzalloc(sizeof(*item), GFP_NOFS);
6551 if (item == NULL) { 6553 if (item == NULL) {
6552 ret = -ENOMEM; 6554 ret = -ENOMEM;
6553 mlog_errno(ret); 6555 mlog_errno(ret);
@@ -6557,6 +6559,7 @@ int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
6557 mlog(0, "Insert: (type %d, slot %u, bit %u, blk %llu)\n", 6559 mlog(0, "Insert: (type %d, slot %u, bit %u, blk %llu)\n",
6558 type, slot, bit, (unsigned long long)blkno); 6560 type, slot, bit, (unsigned long long)blkno);
6559 6561
6562 item->free_bg = suballoc;
6560 item->free_blk = blkno; 6563 item->free_blk = blkno;
6561 item->free_bit = bit; 6564 item->free_bit = bit;
6562 item->free_next = fl->f_first; 6565 item->free_next = fl->f_first;
@@ -6573,433 +6576,11 @@ static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
6573{ 6576{
6574 return ocfs2_cache_block_dealloc(ctxt, EXTENT_ALLOC_SYSTEM_INODE, 6577 return ocfs2_cache_block_dealloc(ctxt, EXTENT_ALLOC_SYSTEM_INODE,
6575 le16_to_cpu(eb->h_suballoc_slot), 6578 le16_to_cpu(eb->h_suballoc_slot),
6579 le64_to_cpu(eb->h_suballoc_loc),
6576 le64_to_cpu(eb->h_blkno), 6580 le64_to_cpu(eb->h_blkno),
6577 le16_to_cpu(eb->h_suballoc_bit)); 6581 le16_to_cpu(eb->h_suballoc_bit));
6578} 6582}
6579 6583
6580/* This function will figure out whether the currently last extent
6581 * block will be deleted, and if it will, what the new last extent
6582 * block will be so we can update his h_next_leaf_blk field, as well
6583 * as the dinodes i_last_eb_blk */
6584static int ocfs2_find_new_last_ext_blk(struct inode *inode,
6585 unsigned int clusters_to_del,
6586 struct ocfs2_path *path,
6587 struct buffer_head **new_last_eb)
6588{
6589 int next_free, ret = 0;
6590 u32 cpos;
6591 struct ocfs2_extent_rec *rec;
6592 struct ocfs2_extent_block *eb;
6593 struct ocfs2_extent_list *el;
6594 struct buffer_head *bh = NULL;
6595
6596 *new_last_eb = NULL;
6597
6598 /* we have no tree, so of course, no last_eb. */
6599 if (!path->p_tree_depth)
6600 goto out;
6601
6602 /* trunc to zero special case - this makes tree_depth = 0
6603 * regardless of what it is. */
6604 if (OCFS2_I(inode)->ip_clusters == clusters_to_del)
6605 goto out;
6606
6607 el = path_leaf_el(path);
6608 BUG_ON(!el->l_next_free_rec);
6609
6610 /*
6611 * Make sure that this extent list will actually be empty
6612 * after we clear away the data. We can shortcut out if
6613 * there's more than one non-empty extent in the
6614 * list. Otherwise, a check of the remaining extent is
6615 * necessary.
6616 */
6617 next_free = le16_to_cpu(el->l_next_free_rec);
6618 rec = NULL;
6619 if (ocfs2_is_empty_extent(&el->l_recs[0])) {
6620 if (next_free > 2)
6621 goto out;
6622
6623 /* We may have a valid extent in index 1, check it. */
6624 if (next_free == 2)
6625 rec = &el->l_recs[1];
6626
6627 /*
6628 * Fall through - no more nonempty extents, so we want
6629 * to delete this leaf.
6630 */
6631 } else {
6632 if (next_free > 1)
6633 goto out;
6634
6635 rec = &el->l_recs[0];
6636 }
6637
6638 if (rec) {
6639 /*
6640 * Check it we'll only be trimming off the end of this
6641 * cluster.
6642 */
6643 if (le16_to_cpu(rec->e_leaf_clusters) > clusters_to_del)
6644 goto out;
6645 }
6646
6647 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, &cpos);
6648 if (ret) {
6649 mlog_errno(ret);
6650 goto out;
6651 }
6652
6653 ret = ocfs2_find_leaf(INODE_CACHE(inode), path_root_el(path), cpos, &bh);
6654 if (ret) {
6655 mlog_errno(ret);
6656 goto out;
6657 }
6658
6659 eb = (struct ocfs2_extent_block *) bh->b_data;
6660 el = &eb->h_list;
6661
6662 /* ocfs2_find_leaf() gets the eb from ocfs2_read_extent_block().
6663 * Any corruption is a code bug. */
6664 BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
6665
6666 *new_last_eb = bh;
6667 get_bh(*new_last_eb);
6668 mlog(0, "returning block %llu, (cpos: %u)\n",
6669 (unsigned long long)le64_to_cpu(eb->h_blkno), cpos);
6670out:
6671 brelse(bh);
6672
6673 return ret;
6674}
6675
6676/*
6677 * Trim some clusters off the rightmost edge of a tree. Only called
6678 * during truncate.
6679 *
6680 * The caller needs to:
6681 * - start journaling of each path component.
6682 * - compute and fully set up any new last ext block
6683 */
6684static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path,
6685 handle_t *handle, struct ocfs2_truncate_context *tc,
6686 u32 clusters_to_del, u64 *delete_start, u8 *flags)
6687{
6688 int ret, i, index = path->p_tree_depth;
6689 u32 new_edge = 0;
6690 u64 deleted_eb = 0;
6691 struct buffer_head *bh;
6692 struct ocfs2_extent_list *el;
6693 struct ocfs2_extent_rec *rec;
6694
6695 *delete_start = 0;
6696 *flags = 0;
6697
6698 while (index >= 0) {
6699 bh = path->p_node[index].bh;
6700 el = path->p_node[index].el;
6701
6702 mlog(0, "traveling tree (index = %d, block = %llu)\n",
6703 index, (unsigned long long)bh->b_blocknr);
6704
6705 BUG_ON(le16_to_cpu(el->l_next_free_rec) == 0);
6706
6707 if (index !=
6708 (path->p_tree_depth - le16_to_cpu(el->l_tree_depth))) {
6709 ocfs2_error(inode->i_sb,
6710 "Inode %lu has invalid ext. block %llu",
6711 inode->i_ino,
6712 (unsigned long long)bh->b_blocknr);
6713 ret = -EROFS;
6714 goto out;
6715 }
6716
6717find_tail_record:
6718 i = le16_to_cpu(el->l_next_free_rec) - 1;
6719 rec = &el->l_recs[i];
6720
6721 mlog(0, "Extent list before: record %d: (%u, %u, %llu), "
6722 "next = %u\n", i, le32_to_cpu(rec->e_cpos),
6723 ocfs2_rec_clusters(el, rec),
6724 (unsigned long long)le64_to_cpu(rec->e_blkno),
6725 le16_to_cpu(el->l_next_free_rec));
6726
6727 BUG_ON(ocfs2_rec_clusters(el, rec) < clusters_to_del);
6728
6729 if (le16_to_cpu(el->l_tree_depth) == 0) {
6730 /*
6731 * If the leaf block contains a single empty
6732 * extent and no records, we can just remove
6733 * the block.
6734 */
6735 if (i == 0 && ocfs2_is_empty_extent(rec)) {
6736 memset(rec, 0,
6737 sizeof(struct ocfs2_extent_rec));
6738 el->l_next_free_rec = cpu_to_le16(0);
6739
6740 goto delete;
6741 }
6742
6743 /*
6744 * Remove any empty extents by shifting things
6745 * left. That should make life much easier on
6746 * the code below. This condition is rare
6747 * enough that we shouldn't see a performance
6748 * hit.
6749 */
6750 if (ocfs2_is_empty_extent(&el->l_recs[0])) {
6751 le16_add_cpu(&el->l_next_free_rec, -1);
6752
6753 for(i = 0;
6754 i < le16_to_cpu(el->l_next_free_rec); i++)
6755 el->l_recs[i] = el->l_recs[i + 1];
6756
6757 memset(&el->l_recs[i], 0,
6758 sizeof(struct ocfs2_extent_rec));
6759
6760 /*
6761 * We've modified our extent list. The
6762 * simplest way to handle this change
6763 * is to being the search from the
6764 * start again.
6765 */
6766 goto find_tail_record;
6767 }
6768
6769 le16_add_cpu(&rec->e_leaf_clusters, -clusters_to_del);
6770
6771 /*
6772 * We'll use "new_edge" on our way back up the
6773 * tree to know what our rightmost cpos is.
6774 */
6775 new_edge = le16_to_cpu(rec->e_leaf_clusters);
6776 new_edge += le32_to_cpu(rec->e_cpos);
6777
6778 /*
6779 * The caller will use this to delete data blocks.
6780 */
6781 *delete_start = le64_to_cpu(rec->e_blkno)
6782 + ocfs2_clusters_to_blocks(inode->i_sb,
6783 le16_to_cpu(rec->e_leaf_clusters));
6784 *flags = rec->e_flags;
6785
6786 /*
6787 * If it's now empty, remove this record.
6788 */
6789 if (le16_to_cpu(rec->e_leaf_clusters) == 0) {
6790 memset(rec, 0,
6791 sizeof(struct ocfs2_extent_rec));
6792 le16_add_cpu(&el->l_next_free_rec, -1);
6793 }
6794 } else {
6795 if (le64_to_cpu(rec->e_blkno) == deleted_eb) {
6796 memset(rec, 0,
6797 sizeof(struct ocfs2_extent_rec));
6798 le16_add_cpu(&el->l_next_free_rec, -1);
6799
6800 goto delete;
6801 }
6802
6803 /* Can this actually happen? */
6804 if (le16_to_cpu(el->l_next_free_rec) == 0)
6805 goto delete;
6806
6807 /*
6808 * We never actually deleted any clusters
6809 * because our leaf was empty. There's no
6810 * reason to adjust the rightmost edge then.
6811 */
6812 if (new_edge == 0)
6813 goto delete;
6814
6815 rec->e_int_clusters = cpu_to_le32(new_edge);
6816 le32_add_cpu(&rec->e_int_clusters,
6817 -le32_to_cpu(rec->e_cpos));
6818
6819 /*
6820 * A deleted child record should have been
6821 * caught above.
6822 */
6823 BUG_ON(le32_to_cpu(rec->e_int_clusters) == 0);
6824 }
6825
6826delete:
6827 ret = ocfs2_journal_dirty(handle, bh);
6828 if (ret) {
6829 mlog_errno(ret);
6830 goto out;
6831 }
6832
6833 mlog(0, "extent list container %llu, after: record %d: "
6834 "(%u, %u, %llu), next = %u.\n",
6835 (unsigned long long)bh->b_blocknr, i,
6836 le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec),
6837 (unsigned long long)le64_to_cpu(rec->e_blkno),
6838 le16_to_cpu(el->l_next_free_rec));
6839
6840 /*
6841 * We must be careful to only attempt delete of an
6842 * extent block (and not the root inode block).
6843 */
6844 if (index > 0 && le16_to_cpu(el->l_next_free_rec) == 0) {
6845 struct ocfs2_extent_block *eb =
6846 (struct ocfs2_extent_block *)bh->b_data;
6847
6848 /*
6849 * Save this for use when processing the
6850 * parent block.
6851 */
6852 deleted_eb = le64_to_cpu(eb->h_blkno);
6853
6854 mlog(0, "deleting this extent block.\n");
6855
6856 ocfs2_remove_from_cache(INODE_CACHE(inode), bh);
6857
6858 BUG_ON(ocfs2_rec_clusters(el, &el->l_recs[0]));
6859 BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos));
6860 BUG_ON(le64_to_cpu(el->l_recs[0].e_blkno));
6861
6862 ret = ocfs2_cache_extent_block_free(&tc->tc_dealloc, eb);
6863 /* An error here is not fatal. */
6864 if (ret < 0)
6865 mlog_errno(ret);
6866 } else {
6867 deleted_eb = 0;
6868 }
6869
6870 index--;
6871 }
6872
6873 ret = 0;
6874out:
6875 return ret;
6876}
6877
6878static int ocfs2_do_truncate(struct ocfs2_super *osb,
6879 unsigned int clusters_to_del,
6880 struct inode *inode,
6881 struct buffer_head *fe_bh,
6882 handle_t *handle,
6883 struct ocfs2_truncate_context *tc,
6884 struct ocfs2_path *path,
6885 struct ocfs2_alloc_context *meta_ac)
6886{
6887 int status;
6888 struct ocfs2_dinode *fe;
6889 struct ocfs2_extent_block *last_eb = NULL;
6890 struct ocfs2_extent_list *el;
6891 struct buffer_head *last_eb_bh = NULL;
6892 u64 delete_blk = 0;
6893 u8 rec_flags;
6894
6895 fe = (struct ocfs2_dinode *) fe_bh->b_data;
6896
6897 status = ocfs2_find_new_last_ext_blk(inode, clusters_to_del,
6898 path, &last_eb_bh);
6899 if (status < 0) {
6900 mlog_errno(status);
6901 goto bail;
6902 }
6903
6904 /*
6905 * Each component will be touched, so we might as well journal
6906 * here to avoid having to handle errors later.
6907 */
6908 status = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path);
6909 if (status < 0) {
6910 mlog_errno(status);
6911 goto bail;
6912 }
6913
6914 if (last_eb_bh) {
6915 status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), last_eb_bh,
6916 OCFS2_JOURNAL_ACCESS_WRITE);
6917 if (status < 0) {
6918 mlog_errno(status);
6919 goto bail;
6920 }
6921
6922 last_eb = (struct ocfs2_extent_block *) last_eb_bh->b_data;
6923 }
6924
6925 el = &(fe->id2.i_list);
6926
6927 /*
6928 * Lower levels depend on this never happening, but it's best
6929 * to check it up here before changing the tree.
6930 */
6931 if (el->l_tree_depth && el->l_recs[0].e_int_clusters == 0) {
6932 ocfs2_error(inode->i_sb,
6933 "Inode %lu has an empty extent record, depth %u\n",
6934 inode->i_ino, le16_to_cpu(el->l_tree_depth));
6935 status = -EROFS;
6936 goto bail;
6937 }
6938
6939 dquot_free_space_nodirty(inode,
6940 ocfs2_clusters_to_bytes(osb->sb, clusters_to_del));
6941 spin_lock(&OCFS2_I(inode)->ip_lock);
6942 OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters) -
6943 clusters_to_del;
6944 spin_unlock(&OCFS2_I(inode)->ip_lock);
6945 le32_add_cpu(&fe->i_clusters, -clusters_to_del);
6946 inode->i_blocks = ocfs2_inode_sector_count(inode);
6947
6948 status = ocfs2_trim_tree(inode, path, handle, tc,
6949 clusters_to_del, &delete_blk, &rec_flags);
6950 if (status) {
6951 mlog_errno(status);
6952 goto bail;
6953 }
6954
6955 if (le32_to_cpu(fe->i_clusters) == 0) {
6956 /* trunc to zero is a special case. */
6957 el->l_tree_depth = 0;
6958 fe->i_last_eb_blk = 0;
6959 } else if (last_eb)
6960 fe->i_last_eb_blk = last_eb->h_blkno;
6961
6962 status = ocfs2_journal_dirty(handle, fe_bh);
6963 if (status < 0) {
6964 mlog_errno(status);
6965 goto bail;
6966 }
6967
6968 if (last_eb) {
6969 /* If there will be a new last extent block, then by
6970 * definition, there cannot be any leaves to the right of
6971 * him. */
6972 last_eb->h_next_leaf_blk = 0;
6973 status = ocfs2_journal_dirty(handle, last_eb_bh);
6974 if (status < 0) {
6975 mlog_errno(status);
6976 goto bail;
6977 }
6978 }
6979
6980 if (delete_blk) {
6981 if (rec_flags & OCFS2_EXT_REFCOUNTED)
6982 status = ocfs2_decrease_refcount(inode, handle,
6983 ocfs2_blocks_to_clusters(osb->sb,
6984 delete_blk),
6985 clusters_to_del, meta_ac,
6986 &tc->tc_dealloc, 1);
6987 else
6988 status = ocfs2_truncate_log_append(osb, handle,
6989 delete_blk,
6990 clusters_to_del);
6991 if (status < 0) {
6992 mlog_errno(status);
6993 goto bail;
6994 }
6995 }
6996 status = 0;
6997bail:
6998 brelse(last_eb_bh);
6999 mlog_exit(status);
7000 return status;
7001}
7002
7003static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh) 6584static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh)
7004{ 6585{
7005 set_buffer_uptodate(bh); 6586 set_buffer_uptodate(bh);
@@ -7307,7 +6888,9 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
7307 goto out_commit; 6888 goto out_commit;
7308 did_quota = 1; 6889 did_quota = 1;
7309 6890
7310 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, 6891 data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv;
6892
6893 ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off,
7311 &num); 6894 &num);
7312 if (ret) { 6895 if (ret) {
7313 mlog_errno(ret); 6896 mlog_errno(ret);
@@ -7406,26 +6989,29 @@ out:
7406 */ 6989 */
7407int ocfs2_commit_truncate(struct ocfs2_super *osb, 6990int ocfs2_commit_truncate(struct ocfs2_super *osb,
7408 struct inode *inode, 6991 struct inode *inode,
7409 struct buffer_head *fe_bh, 6992 struct buffer_head *di_bh)
7410 struct ocfs2_truncate_context *tc)
7411{ 6993{
7412 int status, i, credits, tl_sem = 0; 6994 int status = 0, i, flags = 0;
7413 u32 clusters_to_del, new_highest_cpos, range; 6995 u32 new_highest_cpos, range, trunc_cpos, trunc_len, phys_cpos, coff;
7414 u64 blkno = 0; 6996 u64 blkno = 0;
7415 struct ocfs2_extent_list *el; 6997 struct ocfs2_extent_list *el;
7416 handle_t *handle = NULL; 6998 struct ocfs2_extent_rec *rec;
7417 struct inode *tl_inode = osb->osb_tl_inode;
7418 struct ocfs2_path *path = NULL; 6999 struct ocfs2_path *path = NULL;
7419 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 7000 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
7420 struct ocfs2_alloc_context *meta_ac = NULL; 7001 struct ocfs2_extent_list *root_el = &(di->id2.i_list);
7421 struct ocfs2_refcount_tree *ref_tree = NULL; 7002 u64 refcount_loc = le64_to_cpu(di->i_refcount_loc);
7003 struct ocfs2_extent_tree et;
7004 struct ocfs2_cached_dealloc_ctxt dealloc;
7422 7005
7423 mlog_entry_void(); 7006 mlog_entry_void();
7424 7007
7008 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
7009 ocfs2_init_dealloc_ctxt(&dealloc);
7010
7425 new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, 7011 new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb,
7426 i_size_read(inode)); 7012 i_size_read(inode));
7427 7013
7428 path = ocfs2_new_path(fe_bh, &di->id2.i_list, 7014 path = ocfs2_new_path(di_bh, &di->id2.i_list,
7429 ocfs2_journal_access_di); 7015 ocfs2_journal_access_di);
7430 if (!path) { 7016 if (!path) {
7431 status = -ENOMEM; 7017 status = -ENOMEM;
@@ -7444,8 +7030,6 @@ start:
7444 goto bail; 7030 goto bail;
7445 } 7031 }
7446 7032
7447 credits = 0;
7448
7449 /* 7033 /*
7450 * Truncate always works against the rightmost tree branch. 7034 * Truncate always works against the rightmost tree branch.
7451 */ 7035 */
@@ -7480,101 +7064,62 @@ start:
7480 } 7064 }
7481 7065
7482 i = le16_to_cpu(el->l_next_free_rec) - 1; 7066 i = le16_to_cpu(el->l_next_free_rec) - 1;
7483 range = le32_to_cpu(el->l_recs[i].e_cpos) + 7067 rec = &el->l_recs[i];
7484 ocfs2_rec_clusters(el, &el->l_recs[i]); 7068 flags = rec->e_flags;
7485 if (i == 0 && ocfs2_is_empty_extent(&el->l_recs[i])) { 7069 range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
7486 clusters_to_del = 0; 7070
7487 } else if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_highest_cpos) { 7071 if (i == 0 && ocfs2_is_empty_extent(rec)) {
7488 clusters_to_del = ocfs2_rec_clusters(el, &el->l_recs[i]); 7072 /*
7489 blkno = le64_to_cpu(el->l_recs[i].e_blkno); 7073 * Lower levels depend on this never happening, but it's best
7074 * to check it up here before changing the tree.
7075 */
7076 if (root_el->l_tree_depth && rec->e_int_clusters == 0) {
7077 ocfs2_error(inode->i_sb, "Inode %lu has an empty "
7078 "extent record, depth %u\n", inode->i_ino,
7079 le16_to_cpu(root_el->l_tree_depth));
7080 status = -EROFS;
7081 goto bail;
7082 }
7083 trunc_cpos = le32_to_cpu(rec->e_cpos);
7084 trunc_len = 0;
7085 blkno = 0;
7086 } else if (le32_to_cpu(rec->e_cpos) >= new_highest_cpos) {
7087 /*
7088 * Truncate entire record.
7089 */
7090 trunc_cpos = le32_to_cpu(rec->e_cpos);
7091 trunc_len = ocfs2_rec_clusters(el, rec);
7092 blkno = le64_to_cpu(rec->e_blkno);
7490 } else if (range > new_highest_cpos) { 7093 } else if (range > new_highest_cpos) {
7491 clusters_to_del = (ocfs2_rec_clusters(el, &el->l_recs[i]) + 7094 /*
7492 le32_to_cpu(el->l_recs[i].e_cpos)) - 7095 * Partial truncate. it also should be
7493 new_highest_cpos; 7096 * the last truncate we're doing.
7494 blkno = le64_to_cpu(el->l_recs[i].e_blkno) + 7097 */
7495 ocfs2_clusters_to_blocks(inode->i_sb, 7098 trunc_cpos = new_highest_cpos;
7496 ocfs2_rec_clusters(el, &el->l_recs[i]) - 7099 trunc_len = range - new_highest_cpos;
7497 clusters_to_del); 7100 coff = new_highest_cpos - le32_to_cpu(rec->e_cpos);
7101 blkno = le64_to_cpu(rec->e_blkno) +
7102 ocfs2_clusters_to_blocks(inode->i_sb, coff);
7498 } else { 7103 } else {
7104 /*
7105 * Truncate completed, leave happily.
7106 */
7499 status = 0; 7107 status = 0;
7500 goto bail; 7108 goto bail;
7501 } 7109 }
7502 7110
7503 mlog(0, "clusters_to_del = %u in this pass, tail blk=%llu\n", 7111 phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
7504 clusters_to_del, (unsigned long long)path_leaf_bh(path)->b_blocknr);
7505
7506 if (el->l_recs[i].e_flags & OCFS2_EXT_REFCOUNTED && clusters_to_del) {
7507 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
7508 OCFS2_HAS_REFCOUNT_FL));
7509
7510 status = ocfs2_lock_refcount_tree(osb,
7511 le64_to_cpu(di->i_refcount_loc),
7512 1, &ref_tree, NULL);
7513 if (status) {
7514 mlog_errno(status);
7515 goto bail;
7516 }
7517
7518 status = ocfs2_prepare_refcount_change_for_del(inode, fe_bh,
7519 blkno,
7520 clusters_to_del,
7521 &credits,
7522 &meta_ac);
7523 if (status < 0) {
7524 mlog_errno(status);
7525 goto bail;
7526 }
7527 }
7528
7529 mutex_lock(&tl_inode->i_mutex);
7530 tl_sem = 1;
7531 /* ocfs2_truncate_log_needs_flush guarantees us at least one
7532 * record is free for use. If there isn't any, we flush to get
7533 * an empty truncate log. */
7534 if (ocfs2_truncate_log_needs_flush(osb)) {
7535 status = __ocfs2_flush_truncate_log(osb);
7536 if (status < 0) {
7537 mlog_errno(status);
7538 goto bail;
7539 }
7540 }
7541 7112
7542 credits += ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del, 7113 status = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
7543 (struct ocfs2_dinode *)fe_bh->b_data, 7114 phys_cpos, trunc_len, flags, &dealloc,
7544 el); 7115 refcount_loc);
7545 handle = ocfs2_start_trans(osb, credits);
7546 if (IS_ERR(handle)) {
7547 status = PTR_ERR(handle);
7548 handle = NULL;
7549 mlog_errno(status);
7550 goto bail;
7551 }
7552
7553 status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh, handle,
7554 tc, path, meta_ac);
7555 if (status < 0) { 7116 if (status < 0) {
7556 mlog_errno(status); 7117 mlog_errno(status);
7557 goto bail; 7118 goto bail;
7558 } 7119 }
7559 7120
7560 mutex_unlock(&tl_inode->i_mutex);
7561 tl_sem = 0;
7562
7563 ocfs2_commit_trans(osb, handle);
7564 handle = NULL;
7565
7566 ocfs2_reinit_path(path, 1); 7121 ocfs2_reinit_path(path, 1);
7567 7122
7568 if (meta_ac) {
7569 ocfs2_free_alloc_context(meta_ac);
7570 meta_ac = NULL;
7571 }
7572
7573 if (ref_tree) {
7574 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
7575 ref_tree = NULL;
7576 }
7577
7578 /* 7123 /*
7579 * The check above will catch the case where we've truncated 7124 * The check above will catch the case where we've truncated
7580 * away all allocation. 7125 * away all allocation.
@@ -7585,25 +7130,10 @@ bail:
7585 7130
7586 ocfs2_schedule_truncate_log_flush(osb, 1); 7131 ocfs2_schedule_truncate_log_flush(osb, 1);
7587 7132
7588 if (tl_sem) 7133 ocfs2_run_deallocs(osb, &dealloc);
7589 mutex_unlock(&tl_inode->i_mutex);
7590
7591 if (handle)
7592 ocfs2_commit_trans(osb, handle);
7593
7594 if (meta_ac)
7595 ocfs2_free_alloc_context(meta_ac);
7596
7597 if (ref_tree)
7598 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
7599
7600 ocfs2_run_deallocs(osb, &tc->tc_dealloc);
7601 7134
7602 ocfs2_free_path(path); 7135 ocfs2_free_path(path);
7603 7136
7604 /* This will drop the ext_alloc cluster lock for us */
7605 ocfs2_free_truncate_context(tc);
7606
7607 mlog_exit(status); 7137 mlog_exit(status);
7608 return status; 7138 return status;
7609} 7139}
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 1db4359ccb90..55762b554b99 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -140,8 +140,9 @@ int ocfs2_remove_extent(handle_t *handle, struct ocfs2_extent_tree *et,
140 struct ocfs2_cached_dealloc_ctxt *dealloc); 140 struct ocfs2_cached_dealloc_ctxt *dealloc);
141int ocfs2_remove_btree_range(struct inode *inode, 141int ocfs2_remove_btree_range(struct inode *inode,
142 struct ocfs2_extent_tree *et, 142 struct ocfs2_extent_tree *et,
143 u32 cpos, u32 phys_cpos, u32 len, 143 u32 cpos, u32 phys_cpos, u32 len, int flags,
144 struct ocfs2_cached_dealloc_ctxt *dealloc); 144 struct ocfs2_cached_dealloc_ctxt *dealloc,
145 u64 refcount_loc);
145 146
146int ocfs2_num_free_extents(struct ocfs2_super *osb, 147int ocfs2_num_free_extents(struct ocfs2_super *osb,
147 struct ocfs2_extent_tree *et); 148 struct ocfs2_extent_tree *et);
@@ -209,7 +210,7 @@ static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c)
209int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, 210int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
210 u64 blkno, unsigned int bit); 211 u64 blkno, unsigned int bit);
211int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, 212int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
212 int type, int slot, u64 blkno, 213 int type, int slot, u64 suballoc, u64 blkno,
213 unsigned int bit); 214 unsigned int bit);
214static inline int ocfs2_dealloc_has_cluster(struct ocfs2_cached_dealloc_ctxt *c) 215static inline int ocfs2_dealloc_has_cluster(struct ocfs2_cached_dealloc_ctxt *c)
215{ 216{
@@ -233,8 +234,7 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
233 struct ocfs2_truncate_context **tc); 234 struct ocfs2_truncate_context **tc);
234int ocfs2_commit_truncate(struct ocfs2_super *osb, 235int ocfs2_commit_truncate(struct ocfs2_super *osb,
235 struct inode *inode, 236 struct inode *inode,
236 struct buffer_head *fe_bh, 237 struct buffer_head *di_bh);
237 struct ocfs2_truncate_context *tc);
238int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, 238int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
239 unsigned int start, unsigned int end, int trunc); 239 unsigned int start, unsigned int end, int trunc);
240 240
@@ -319,6 +319,8 @@ int ocfs2_journal_access_path(struct ocfs2_caching_info *ci,
319 struct ocfs2_path *path); 319 struct ocfs2_path *path);
320int ocfs2_find_cpos_for_right_leaf(struct super_block *sb, 320int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
321 struct ocfs2_path *path, u32 *cpos); 321 struct ocfs2_path *path, u32 *cpos);
322int ocfs2_find_cpos_for_left_leaf(struct super_block *sb,
323 struct ocfs2_path *path, u32 *cpos);
322int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et, 324int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
323 struct ocfs2_path *left, 325 struct ocfs2_path *left,
324 struct ocfs2_path *right); 326 struct ocfs2_path *right);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 21441ddb5506..3623ca20cc18 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1735,6 +1735,9 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1735 goto out; 1735 goto out;
1736 } 1736 }
1737 1737
1738 if (data_ac)
1739 data_ac->ac_resv = &OCFS2_I(inode)->ip_la_data_resv;
1740
1738 credits = ocfs2_calc_extend_credits(inode->i_sb, 1741 credits = ocfs2_calc_extend_credits(inode->i_sb,
1739 &di->id2.i_list, 1742 &di->id2.i_list,
1740 clusters_to_alloc); 1743 clusters_to_alloc);
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index 3bb928a2bf7d..c7fba396392d 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -116,6 +116,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
116 define_mask(ERROR), 116 define_mask(ERROR),
117 define_mask(NOTICE), 117 define_mask(NOTICE),
118 define_mask(KTHREAD), 118 define_mask(KTHREAD),
119 define_mask(RESERVATIONS),
119}; 120};
120 121
121static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, }; 122static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, };
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 3dfddbec32f2..fd96e2a2fa56 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -119,6 +119,7 @@
119#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ 119#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */
120#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ 120#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */
121#define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */ 121#define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */
122#define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */
122 123
123#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE) 124#define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE)
124#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT) 125#define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT)
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 73e743eea2c8..aa75ca3f78da 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -583,6 +583,9 @@ static void o2net_state_change(struct sock *sk)
583 o2net_sc_queue_work(sc, &sc->sc_connect_work); 583 o2net_sc_queue_work(sc, &sc->sc_connect_work);
584 break; 584 break;
585 default: 585 default:
586 printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT
587 " shutdown, state %d\n",
588 SC_NODEF_ARGS(sc), sk->sk_state);
586 o2net_sc_queue_work(sc, &sc->sc_shutdown_work); 589 o2net_sc_queue_work(sc, &sc->sc_shutdown_work);
587 break; 590 break;
588 } 591 }
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index efd77d071c80..f04ebcfffc4a 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -1194,7 +1194,7 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
1194 else 1194 else
1195 de->inode = 0; 1195 de->inode = 0;
1196 dir->i_version++; 1196 dir->i_version++;
1197 status = ocfs2_journal_dirty(handle, bh); 1197 ocfs2_journal_dirty(handle, bh);
1198 goto bail; 1198 goto bail;
1199 } 1199 }
1200 i += le16_to_cpu(de->rec_len); 1200 i += le16_to_cpu(de->rec_len);
@@ -1752,7 +1752,7 @@ int __ocfs2_add_entry(handle_t *handle,
1752 ocfs2_recalc_free_list(dir, handle, lookup); 1752 ocfs2_recalc_free_list(dir, handle, lookup);
1753 1753
1754 dir->i_version++; 1754 dir->i_version++;
1755 status = ocfs2_journal_dirty(handle, insert_bh); 1755 ocfs2_journal_dirty(handle, insert_bh);
1756 retval = 0; 1756 retval = 0;
1757 goto bail; 1757 goto bail;
1758 } 1758 }
@@ -2297,12 +2297,7 @@ static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb,
2297 } 2297 }
2298 2298
2299 ocfs2_fill_initial_dirents(inode, parent, data->id_data, size); 2299 ocfs2_fill_initial_dirents(inode, parent, data->id_data, size);
2300
2301 ocfs2_journal_dirty(handle, di_bh); 2300 ocfs2_journal_dirty(handle, di_bh);
2302 if (ret) {
2303 mlog_errno(ret);
2304 goto out;
2305 }
2306 2301
2307 i_size_write(inode, size); 2302 i_size_write(inode, size);
2308 inode->i_nlink = 2; 2303 inode->i_nlink = 2;
@@ -2366,11 +2361,7 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
2366 ocfs2_init_dir_trailer(inode, new_bh, size); 2361 ocfs2_init_dir_trailer(inode, new_bh, size);
2367 } 2362 }
2368 2363
2369 status = ocfs2_journal_dirty(handle, new_bh); 2364 ocfs2_journal_dirty(handle, new_bh);
2370 if (status < 0) {
2371 mlog_errno(status);
2372 goto bail;
2373 }
2374 2365
2375 i_size_write(inode, inode->i_sb->s_blocksize); 2366 i_size_write(inode, inode->i_sb->s_blocksize);
2376 inode->i_nlink = 2; 2367 inode->i_nlink = 2;
@@ -2404,15 +2395,15 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2404 int ret; 2395 int ret;
2405 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; 2396 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
2406 u16 dr_suballoc_bit; 2397 u16 dr_suballoc_bit;
2407 u64 dr_blkno; 2398 u64 suballoc_loc, dr_blkno;
2408 unsigned int num_bits; 2399 unsigned int num_bits;
2409 struct buffer_head *dx_root_bh = NULL; 2400 struct buffer_head *dx_root_bh = NULL;
2410 struct ocfs2_dx_root_block *dx_root; 2401 struct ocfs2_dx_root_block *dx_root;
2411 struct ocfs2_dir_block_trailer *trailer = 2402 struct ocfs2_dir_block_trailer *trailer =
2412 ocfs2_trailer_from_bh(dirdata_bh, dir->i_sb); 2403 ocfs2_trailer_from_bh(dirdata_bh, dir->i_sb);
2413 2404
2414 ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, &dr_suballoc_bit, 2405 ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
2415 &num_bits, &dr_blkno); 2406 &dr_suballoc_bit, &num_bits, &dr_blkno);
2416 if (ret) { 2407 if (ret) {
2417 mlog_errno(ret); 2408 mlog_errno(ret);
2418 goto out; 2409 goto out;
@@ -2440,6 +2431,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2440 memset(dx_root, 0, osb->sb->s_blocksize); 2431 memset(dx_root, 0, osb->sb->s_blocksize);
2441 strcpy(dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE); 2432 strcpy(dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE);
2442 dx_root->dr_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); 2433 dx_root->dr_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
2434 dx_root->dr_suballoc_loc = cpu_to_le64(suballoc_loc);
2443 dx_root->dr_suballoc_bit = cpu_to_le16(dr_suballoc_bit); 2435 dx_root->dr_suballoc_bit = cpu_to_le16(dr_suballoc_bit);
2444 dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation); 2436 dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation);
2445 dx_root->dr_blkno = cpu_to_le64(dr_blkno); 2437 dx_root->dr_blkno = cpu_to_le64(dr_blkno);
@@ -2458,10 +2450,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2458 dx_root->dr_list.l_count = 2450 dx_root->dr_list.l_count =
2459 cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb)); 2451 cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb));
2460 } 2452 }
2461 2453 ocfs2_journal_dirty(handle, dx_root_bh);
2462 ret = ocfs2_journal_dirty(handle, dx_root_bh);
2463 if (ret)
2464 mlog_errno(ret);
2465 2454
2466 ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh, 2455 ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh,
2467 OCFS2_JOURNAL_ACCESS_CREATE); 2456 OCFS2_JOURNAL_ACCESS_CREATE);
@@ -2475,9 +2464,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2475 OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL; 2464 OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL;
2476 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features); 2465 di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
2477 2466
2478 ret = ocfs2_journal_dirty(handle, di_bh); 2467 ocfs2_journal_dirty(handle, di_bh);
2479 if (ret)
2480 mlog_errno(ret);
2481 2468
2482 *ret_dx_root_bh = dx_root_bh; 2469 *ret_dx_root_bh = dx_root_bh;
2483 dx_root_bh = NULL; 2470 dx_root_bh = NULL;
@@ -2558,7 +2545,7 @@ static int __ocfs2_dx_dir_new_cluster(struct inode *dir,
2558 * chance of contiguousness as the directory grows in number 2545 * chance of contiguousness as the directory grows in number
2559 * of entries. 2546 * of entries.
2560 */ 2547 */
2561 ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1, 1, &phys, &num); 2548 ret = __ocfs2_claim_clusters(handle, data_ac, 1, 1, &phys, &num);
2562 if (ret) { 2549 if (ret) {
2563 mlog_errno(ret); 2550 mlog_errno(ret);
2564 goto out; 2551 goto out;
@@ -2991,7 +2978,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
2991 * if we only get one now, that's enough to continue. The rest 2978 * if we only get one now, that's enough to continue. The rest
2992 * will be claimed after the conversion to extents. 2979 * will be claimed after the conversion to extents.
2993 */ 2980 */
2994 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len); 2981 if (ocfs2_dir_resv_allowed(osb))
2982 data_ac->ac_resv = &oi->ip_la_data_resv;
2983 ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off, &len);
2995 if (ret) { 2984 if (ret) {
2996 mlog_errno(ret); 2985 mlog_errno(ret);
2997 goto out_commit; 2986 goto out_commit;
@@ -3034,11 +3023,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
3034 ocfs2_init_dir_trailer(dir, dirdata_bh, i); 3023 ocfs2_init_dir_trailer(dir, dirdata_bh, i);
3035 } 3024 }
3036 3025
3037 ret = ocfs2_journal_dirty(handle, dirdata_bh); 3026 ocfs2_journal_dirty(handle, dirdata_bh);
3038 if (ret) {
3039 mlog_errno(ret);
3040 goto out_commit;
3041 }
3042 3027
3043 if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) { 3028 if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
3044 /* 3029 /*
@@ -3104,11 +3089,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
3104 */ 3089 */
3105 dir->i_blocks = ocfs2_inode_sector_count(dir); 3090 dir->i_blocks = ocfs2_inode_sector_count(dir);
3106 3091
3107 ret = ocfs2_journal_dirty(handle, di_bh); 3092 ocfs2_journal_dirty(handle, di_bh);
3108 if (ret) {
3109 mlog_errno(ret);
3110 goto out_commit;
3111 }
3112 3093
3113 if (ocfs2_supports_indexed_dirs(osb)) { 3094 if (ocfs2_supports_indexed_dirs(osb)) {
3114 ret = ocfs2_dx_dir_attach_index(osb, handle, dir, di_bh, 3095 ret = ocfs2_dx_dir_attach_index(osb, handle, dir, di_bh,
@@ -3138,7 +3119,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
3138 * pass. Claim the 2nd cluster as a separate extent. 3119 * pass. Claim the 2nd cluster as a separate extent.
3139 */ 3120 */
3140 if (alloc > len) { 3121 if (alloc > len) {
3141 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, 3122 ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off,
3142 &len); 3123 &len);
3143 if (ret) { 3124 if (ret) {
3144 mlog_errno(ret); 3125 mlog_errno(ret);
@@ -3369,6 +3350,9 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
3369 goto bail; 3350 goto bail;
3370 } 3351 }
3371 3352
3353 if (ocfs2_dir_resv_allowed(osb))
3354 data_ac->ac_resv = &OCFS2_I(dir)->ip_la_data_resv;
3355
3372 credits = ocfs2_calc_extend_credits(sb, el, 1); 3356 credits = ocfs2_calc_extend_credits(sb, el, 1);
3373 } else { 3357 } else {
3374 spin_unlock(&OCFS2_I(dir)->ip_lock); 3358 spin_unlock(&OCFS2_I(dir)->ip_lock);
@@ -3423,11 +3407,7 @@ do_extend:
3423 } else { 3407 } else {
3424 de->rec_len = cpu_to_le16(sb->s_blocksize); 3408 de->rec_len = cpu_to_le16(sb->s_blocksize);
3425 } 3409 }
3426 status = ocfs2_journal_dirty(handle, new_bh); 3410 ocfs2_journal_dirty(handle, new_bh);
3427 if (status < 0) {
3428 mlog_errno(status);
3429 goto bail;
3430 }
3431 3411
3432 dir_i_size += dir->i_sb->s_blocksize; 3412 dir_i_size += dir->i_sb->s_blocksize;
3433 i_size_write(dir, dir_i_size); 3413 i_size_write(dir, dir_i_size);
@@ -3906,11 +3886,7 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
3906 sizeof(struct ocfs2_dx_entry), dx_leaf_sort_cmp, 3886 sizeof(struct ocfs2_dx_entry), dx_leaf_sort_cmp,
3907 dx_leaf_sort_swap); 3887 dx_leaf_sort_swap);
3908 3888
3909 ret = ocfs2_journal_dirty(handle, dx_leaf_bh); 3889 ocfs2_journal_dirty(handle, dx_leaf_bh);
3910 if (ret) {
3911 mlog_errno(ret);
3912 goto out_commit;
3913 }
3914 3890
3915 ret = ocfs2_dx_dir_find_leaf_split(dx_leaf, leaf_cpos, insert_hash, 3891 ret = ocfs2_dx_dir_find_leaf_split(dx_leaf, leaf_cpos, insert_hash,
3916 &split_hash); 3892 &split_hash);
@@ -4490,7 +4466,10 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir,
4490 4466
4491 blk = le64_to_cpu(dx_root->dr_blkno); 4467 blk = le64_to_cpu(dx_root->dr_blkno);
4492 bit = le16_to_cpu(dx_root->dr_suballoc_bit); 4468 bit = le16_to_cpu(dx_root->dr_suballoc_bit);
4493 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 4469 if (dx_root->dr_suballoc_loc)
4470 bg_blkno = le64_to_cpu(dx_root->dr_suballoc_loc);
4471 else
4472 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
4494 ret = ocfs2_free_suballoc_bits(handle, dx_alloc_inode, dx_alloc_bh, 4473 ret = ocfs2_free_suballoc_bits(handle, dx_alloc_inode, dx_alloc_bh,
4495 bit, bg_blkno, 1); 4474 bit, bg_blkno, 1);
4496 if (ret) 4475 if (ret)
@@ -4551,8 +4530,8 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh)
4551 4530
4552 p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno); 4531 p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno);
4553 4532
4554 ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, 4533 ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, 0,
4555 &dealloc); 4534 &dealloc, 0);
4556 if (ret) { 4535 if (ret) {
4557 mlog_errno(ret); 4536 mlog_errno(ret);
4558 goto out; 4537 goto out;
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 12d5eb78a11a..f44999156839 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -88,7 +88,7 @@ static int dlm_should_cancel_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
88 return 0; 88 return 0;
89} 89}
90 90
91static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock) 91void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
92{ 92{
93 mlog_entry_void(); 93 mlog_entry_void();
94 94
@@ -145,7 +145,7 @@ void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
145} 145}
146 146
147 147
148static void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock) 148void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
149{ 149{
150 mlog_entry_void(); 150 mlog_entry_void();
151 151
@@ -451,7 +451,9 @@ int dlm_send_proxy_ast_msg(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
451 ret = o2net_send_message_vec(DLM_PROXY_AST_MSG, dlm->key, vec, veclen, 451 ret = o2net_send_message_vec(DLM_PROXY_AST_MSG, dlm->key, vec, veclen,
452 lock->ml.node, &status); 452 lock->ml.node, &status);
453 if (ret < 0) 453 if (ret < 0)
454 mlog_errno(ret); 454 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
455 "node %u\n", ret, DLM_PROXY_AST_MSG, dlm->key,
456 lock->ml.node);
455 else { 457 else {
456 if (status == DLM_RECOVERING) { 458 if (status == DLM_RECOVERING) {
457 mlog(ML_ERROR, "sent AST to node %u, it thinks this " 459 mlog(ML_ERROR, "sent AST to node %u, it thinks this "
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 0102be35980c..4b6ae2c13b47 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -37,7 +37,7 @@
37#define DLM_THREAD_SHUFFLE_INTERVAL 5 // flush everything every 5 passes 37#define DLM_THREAD_SHUFFLE_INTERVAL 5 // flush everything every 5 passes
38#define DLM_THREAD_MS 200 // flush at least every 200 ms 38#define DLM_THREAD_MS 200 // flush at least every 200 ms
39 39
40#define DLM_HASH_SIZE_DEFAULT (1 << 14) 40#define DLM_HASH_SIZE_DEFAULT (1 << 17)
41#if DLM_HASH_SIZE_DEFAULT < PAGE_SIZE 41#if DLM_HASH_SIZE_DEFAULT < PAGE_SIZE
42# define DLM_HASH_PAGES 1 42# define DLM_HASH_PAGES 1
43#else 43#else
@@ -904,6 +904,8 @@ void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
904 904
905void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock); 905void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
906void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock); 906void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
907void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
908void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
907void dlm_do_local_ast(struct dlm_ctxt *dlm, 909void dlm_do_local_ast(struct dlm_ctxt *dlm,
908 struct dlm_lock_resource *res, 910 struct dlm_lock_resource *res,
909 struct dlm_lock *lock); 911 struct dlm_lock *lock);
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index 90803b47cd8c..9f30491e5e88 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -390,7 +390,9 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm,
390 } else if (ret != DLM_NORMAL && ret != DLM_NOTQUEUED) 390 } else if (ret != DLM_NORMAL && ret != DLM_NOTQUEUED)
391 dlm_error(ret); 391 dlm_error(ret);
392 } else { 392 } else {
393 mlog_errno(tmpret); 393 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
394 "node %u\n", tmpret, DLM_CONVERT_LOCK_MSG, dlm->key,
395 res->owner);
394 if (dlm_is_host_down(tmpret)) { 396 if (dlm_is_host_down(tmpret)) {
395 /* instead of logging the same network error over 397 /* instead of logging the same network error over
396 * and over, sleep here and wait for the heartbeat 398 * and over, sleep here and wait for the heartbeat
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 988c9055fd4e..6b5a492e1749 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -511,7 +511,7 @@ static void __dlm_print_nodes(struct dlm_ctxt *dlm)
511 511
512 assert_spin_locked(&dlm->spinlock); 512 assert_spin_locked(&dlm->spinlock);
513 513
514 printk(KERN_INFO "ocfs2_dlm: Nodes in domain (\"%s\"): ", dlm->name); 514 printk(KERN_NOTICE "o2dlm: Nodes in domain %s: ", dlm->name);
515 515
516 while ((node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 516 while ((node = find_next_bit(dlm->domain_map, O2NM_MAX_NODES,
517 node + 1)) < O2NM_MAX_NODES) { 517 node + 1)) < O2NM_MAX_NODES) {
@@ -534,7 +534,7 @@ static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data,
534 534
535 node = exit_msg->node_idx; 535 node = exit_msg->node_idx;
536 536
537 printk(KERN_INFO "ocfs2_dlm: Node %u leaves domain %s\n", node, dlm->name); 537 printk(KERN_NOTICE "o2dlm: Node %u leaves domain %s\n", node, dlm->name);
538 538
539 spin_lock(&dlm->spinlock); 539 spin_lock(&dlm->spinlock);
540 clear_bit(node, dlm->domain_map); 540 clear_bit(node, dlm->domain_map);
@@ -565,7 +565,9 @@ static int dlm_send_one_domain_exit(struct dlm_ctxt *dlm,
565 status = o2net_send_message(DLM_EXIT_DOMAIN_MSG, dlm->key, 565 status = o2net_send_message(DLM_EXIT_DOMAIN_MSG, dlm->key,
566 &leave_msg, sizeof(leave_msg), node, 566 &leave_msg, sizeof(leave_msg), node,
567 NULL); 567 NULL);
568 568 if (status < 0)
569 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
570 "node %u\n", status, DLM_EXIT_DOMAIN_MSG, dlm->key, node);
569 mlog(0, "status return %d from o2net_send_message\n", status); 571 mlog(0, "status return %d from o2net_send_message\n", status);
570 572
571 return status; 573 return status;
@@ -904,7 +906,7 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data,
904 set_bit(assert->node_idx, dlm->domain_map); 906 set_bit(assert->node_idx, dlm->domain_map);
905 __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); 907 __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN);
906 908
907 printk(KERN_INFO "ocfs2_dlm: Node %u joins domain %s\n", 909 printk(KERN_NOTICE "o2dlm: Node %u joins domain %s\n",
908 assert->node_idx, dlm->name); 910 assert->node_idx, dlm->name);
909 __dlm_print_nodes(dlm); 911 __dlm_print_nodes(dlm);
910 912
@@ -962,7 +964,9 @@ static int dlm_send_one_join_cancel(struct dlm_ctxt *dlm,
962 &cancel_msg, sizeof(cancel_msg), node, 964 &cancel_msg, sizeof(cancel_msg), node,
963 NULL); 965 NULL);
964 if (status < 0) { 966 if (status < 0) {
965 mlog_errno(status); 967 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
968 "node %u\n", status, DLM_CANCEL_JOIN_MSG, DLM_MOD_KEY,
969 node);
966 goto bail; 970 goto bail;
967 } 971 }
968 972
@@ -1029,10 +1033,11 @@ static int dlm_request_join(struct dlm_ctxt *dlm,
1029 byte_copymap(join_msg.node_map, dlm->live_nodes_map, O2NM_MAX_NODES); 1033 byte_copymap(join_msg.node_map, dlm->live_nodes_map, O2NM_MAX_NODES);
1030 1034
1031 status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg, 1035 status = o2net_send_message(DLM_QUERY_JOIN_MSG, DLM_MOD_KEY, &join_msg,
1032 sizeof(join_msg), node, 1036 sizeof(join_msg), node, &join_resp);
1033 &join_resp);
1034 if (status < 0 && status != -ENOPROTOOPT) { 1037 if (status < 0 && status != -ENOPROTOOPT) {
1035 mlog_errno(status); 1038 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
1039 "node %u\n", status, DLM_QUERY_JOIN_MSG, DLM_MOD_KEY,
1040 node);
1036 goto bail; 1041 goto bail;
1037 } 1042 }
1038 dlm_query_join_wire_to_packet(join_resp, &packet); 1043 dlm_query_join_wire_to_packet(join_resp, &packet);
@@ -1103,7 +1108,9 @@ static int dlm_send_one_join_assert(struct dlm_ctxt *dlm,
1103 &assert_msg, sizeof(assert_msg), node, 1108 &assert_msg, sizeof(assert_msg), node,
1104 NULL); 1109 NULL);
1105 if (status < 0) 1110 if (status < 0)
1106 mlog_errno(status); 1111 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
1112 "node %u\n", status, DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY,
1113 node);
1107 1114
1108 return status; 1115 return status;
1109} 1116}
@@ -1516,7 +1523,7 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
1516 goto leave; 1523 goto leave;
1517 } 1524 }
1518 1525
1519 dlm->name = kmalloc(strlen(domain) + 1, GFP_KERNEL); 1526 dlm->name = kstrdup(domain, GFP_KERNEL);
1520 if (dlm->name == NULL) { 1527 if (dlm->name == NULL) {
1521 mlog_errno(-ENOMEM); 1528 mlog_errno(-ENOMEM);
1522 kfree(dlm); 1529 kfree(dlm);
@@ -1550,7 +1557,6 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain,
1550 for (i = 0; i < DLM_HASH_BUCKETS; i++) 1557 for (i = 0; i < DLM_HASH_BUCKETS; i++)
1551 INIT_HLIST_HEAD(dlm_master_hash(dlm, i)); 1558 INIT_HLIST_HEAD(dlm_master_hash(dlm, i));
1552 1559
1553 strcpy(dlm->name, domain);
1554 dlm->key = key; 1560 dlm->key = key;
1555 dlm->node_num = o2nm_this_node(); 1561 dlm->node_num = o2nm_this_node();
1556 1562
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 733337772671..69cf369961c4 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -329,7 +329,9 @@ static enum dlm_status dlm_send_remote_lock_request(struct dlm_ctxt *dlm,
329 BUG(); 329 BUG();
330 } 330 }
331 } else { 331 } else {
332 mlog_errno(tmpret); 332 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
333 "node %u\n", tmpret, DLM_CREATE_LOCK_MSG, dlm->key,
334 res->owner);
333 if (dlm_is_host_down(tmpret)) { 335 if (dlm_is_host_down(tmpret)) {
334 ret = DLM_RECOVERING; 336 ret = DLM_RECOVERING;
335 mlog(0, "node %u died so returning DLM_RECOVERING " 337 mlog(0, "node %u died so returning DLM_RECOVERING "
@@ -429,7 +431,7 @@ struct dlm_lock * dlm_new_lock(int type, u8 node, u64 cookie,
429 struct dlm_lock *lock; 431 struct dlm_lock *lock;
430 int kernel_allocated = 0; 432 int kernel_allocated = 0;
431 433
432 lock = (struct dlm_lock *) kmem_cache_zalloc(dlm_lock_cache, GFP_NOFS); 434 lock = kmem_cache_zalloc(dlm_lock_cache, GFP_NOFS);
433 if (!lock) 435 if (!lock)
434 return NULL; 436 return NULL;
435 437
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 9289b4357d27..4a7506a4e314 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -617,13 +617,11 @@ struct dlm_lock_resource *dlm_new_lockres(struct dlm_ctxt *dlm,
617{ 617{
618 struct dlm_lock_resource *res = NULL; 618 struct dlm_lock_resource *res = NULL;
619 619
620 res = (struct dlm_lock_resource *) 620 res = kmem_cache_zalloc(dlm_lockres_cache, GFP_NOFS);
621 kmem_cache_zalloc(dlm_lockres_cache, GFP_NOFS);
622 if (!res) 621 if (!res)
623 goto error; 622 goto error;
624 623
625 res->lockname.name = (char *) 624 res->lockname.name = kmem_cache_zalloc(dlm_lockname_cache, GFP_NOFS);
626 kmem_cache_zalloc(dlm_lockname_cache, GFP_NOFS);
627 if (!res->lockname.name) 625 if (!res->lockname.name)
628 goto error; 626 goto error;
629 627
@@ -757,8 +755,7 @@ lookup:
757 spin_unlock(&dlm->spinlock); 755 spin_unlock(&dlm->spinlock);
758 mlog(0, "allocating a new resource\n"); 756 mlog(0, "allocating a new resource\n");
759 /* nothing found and we need to allocate one. */ 757 /* nothing found and we need to allocate one. */
760 alloc_mle = (struct dlm_master_list_entry *) 758 alloc_mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
761 kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
762 if (!alloc_mle) 759 if (!alloc_mle)
763 goto leave; 760 goto leave;
764 res = dlm_new_lockres(dlm, lockid, namelen); 761 res = dlm_new_lockres(dlm, lockid, namelen);
@@ -1542,8 +1539,7 @@ way_up_top:
1542 spin_unlock(&dlm->master_lock); 1539 spin_unlock(&dlm->master_lock);
1543 spin_unlock(&dlm->spinlock); 1540 spin_unlock(&dlm->spinlock);
1544 1541
1545 mle = (struct dlm_master_list_entry *) 1542 mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
1546 kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
1547 if (!mle) { 1543 if (!mle) {
1548 response = DLM_MASTER_RESP_ERROR; 1544 response = DLM_MASTER_RESP_ERROR;
1549 mlog_errno(-ENOMEM); 1545 mlog_errno(-ENOMEM);
@@ -1666,7 +1662,9 @@ again:
1666 tmpret = o2net_send_message(DLM_ASSERT_MASTER_MSG, dlm->key, 1662 tmpret = o2net_send_message(DLM_ASSERT_MASTER_MSG, dlm->key,
1667 &assert, sizeof(assert), to, &r); 1663 &assert, sizeof(assert), to, &r);
1668 if (tmpret < 0) { 1664 if (tmpret < 0) {
1669 mlog(0, "assert_master returned %d!\n", tmpret); 1665 mlog(ML_ERROR, "Error %d when sending message %u (key "
1666 "0x%x) to node %u\n", tmpret,
1667 DLM_ASSERT_MASTER_MSG, dlm->key, to);
1670 if (!dlm_is_host_down(tmpret)) { 1668 if (!dlm_is_host_down(tmpret)) {
1671 mlog(ML_ERROR, "unhandled error=%d!\n", tmpret); 1669 mlog(ML_ERROR, "unhandled error=%d!\n", tmpret);
1672 BUG(); 1670 BUG();
@@ -2205,7 +2203,9 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
2205 ret = o2net_send_message(DLM_DEREF_LOCKRES_MSG, dlm->key, 2203 ret = o2net_send_message(DLM_DEREF_LOCKRES_MSG, dlm->key,
2206 &deref, sizeof(deref), res->owner, &r); 2204 &deref, sizeof(deref), res->owner, &r);
2207 if (ret < 0) 2205 if (ret < 0)
2208 mlog_errno(ret); 2206 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
2207 "node %u\n", ret, DLM_DEREF_LOCKRES_MSG, dlm->key,
2208 res->owner);
2209 else if (r < 0) { 2209 else if (r < 0) {
2210 /* BAD. other node says I did not have a ref. */ 2210 /* BAD. other node says I did not have a ref. */
2211 mlog(ML_ERROR,"while dropping ref on %s:%.*s " 2211 mlog(ML_ERROR,"while dropping ref on %s:%.*s "
@@ -2452,8 +2452,7 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2452 goto leave; 2452 goto leave;
2453 } 2453 }
2454 2454
2455 mle = (struct dlm_master_list_entry *) kmem_cache_alloc(dlm_mle_cache, 2455 mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
2456 GFP_NOFS);
2457 if (!mle) { 2456 if (!mle) {
2458 mlog_errno(ret); 2457 mlog_errno(ret);
2459 goto leave; 2458 goto leave;
@@ -2975,7 +2974,9 @@ static int dlm_do_migrate_request(struct dlm_ctxt *dlm,
2975 &migrate, sizeof(migrate), nodenum, 2974 &migrate, sizeof(migrate), nodenum,
2976 &status); 2975 &status);
2977 if (ret < 0) { 2976 if (ret < 0) {
2978 mlog(0, "migrate_request returned %d!\n", ret); 2977 mlog(ML_ERROR, "Error %d when sending message %u (key "
2978 "0x%x) to node %u\n", ret, DLM_MIGRATE_REQUEST_MSG,
2979 dlm->key, nodenum);
2979 if (!dlm_is_host_down(ret)) { 2980 if (!dlm_is_host_down(ret)) {
2980 mlog(ML_ERROR, "unhandled error=%d!\n", ret); 2981 mlog(ML_ERROR, "unhandled error=%d!\n", ret);
2981 BUG(); 2982 BUG();
@@ -3033,8 +3034,7 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data,
3033 hash = dlm_lockid_hash(name, namelen); 3034 hash = dlm_lockid_hash(name, namelen);
3034 3035
3035 /* preallocate.. if this fails, abort */ 3036 /* preallocate.. if this fails, abort */
3036 mle = (struct dlm_master_list_entry *) kmem_cache_alloc(dlm_mle_cache, 3037 mle = kmem_cache_alloc(dlm_mle_cache, GFP_NOFS);
3037 GFP_NOFS);
3038 3038
3039 if (!mle) { 3039 if (!mle) {
3040 ret = -ENOMEM; 3040 ret = -ENOMEM;
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index b4f99de2caf3..f8b75ce4be70 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -803,7 +803,9 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from,
803 803
804 /* negative status is handled by caller */ 804 /* negative status is handled by caller */
805 if (ret < 0) 805 if (ret < 0)
806 mlog_errno(ret); 806 mlog(ML_ERROR, "Error %d when sending message %u (key "
807 "0x%x) to node %u\n", ret, DLM_LOCK_REQUEST_MSG,
808 dlm->key, request_from);
807 809
808 // return from here, then 810 // return from here, then
809 // sleep until all received or error 811 // sleep until all received or error
@@ -955,10 +957,10 @@ static int dlm_send_all_done_msg(struct dlm_ctxt *dlm, u8 dead_node, u8 send_to)
955 ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg, 957 ret = o2net_send_message(DLM_RECO_DATA_DONE_MSG, dlm->key, &done_msg,
956 sizeof(done_msg), send_to, &tmpret); 958 sizeof(done_msg), send_to, &tmpret);
957 if (ret < 0) { 959 if (ret < 0) {
960 mlog(ML_ERROR, "Error %d when sending message %u (key "
961 "0x%x) to node %u\n", ret, DLM_RECO_DATA_DONE_MSG,
962 dlm->key, send_to);
958 if (!dlm_is_host_down(ret)) { 963 if (!dlm_is_host_down(ret)) {
959 mlog_errno(ret);
960 mlog(ML_ERROR, "%s: unknown error sending data-done "
961 "to %u\n", dlm->name, send_to);
962 BUG(); 964 BUG();
963 } 965 }
964 } else 966 } else
@@ -1126,7 +1128,9 @@ static int dlm_send_mig_lockres_msg(struct dlm_ctxt *dlm,
1126 if (ret < 0) { 1128 if (ret < 0) {
1127 /* XXX: negative status is not handled. 1129 /* XXX: negative status is not handled.
1128 * this will end up killing this node. */ 1130 * this will end up killing this node. */
1129 mlog_errno(ret); 1131 mlog(ML_ERROR, "Error %d when sending message %u (key "
1132 "0x%x) to node %u\n", ret, DLM_MIG_LOCKRES_MSG,
1133 dlm->key, send_to);
1130 } else { 1134 } else {
1131 /* might get an -ENOMEM back here */ 1135 /* might get an -ENOMEM back here */
1132 ret = status; 1136 ret = status;
@@ -1642,7 +1646,9 @@ int dlm_do_master_requery(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
1642 &req, sizeof(req), nodenum, &status); 1646 &req, sizeof(req), nodenum, &status);
1643 /* XXX: negative status not handled properly here. */ 1647 /* XXX: negative status not handled properly here. */
1644 if (ret < 0) 1648 if (ret < 0)
1645 mlog_errno(ret); 1649 mlog(ML_ERROR, "Error %d when sending message %u (key "
1650 "0x%x) to node %u\n", ret, DLM_MASTER_REQUERY_MSG,
1651 dlm->key, nodenum);
1646 else { 1652 else {
1647 BUG_ON(status < 0); 1653 BUG_ON(status < 0);
1648 BUG_ON(status > DLM_LOCK_RES_OWNER_UNKNOWN); 1654 BUG_ON(status > DLM_LOCK_RES_OWNER_UNKNOWN);
@@ -2640,7 +2646,7 @@ retry:
2640 if (dlm_is_host_down(ret)) { 2646 if (dlm_is_host_down(ret)) {
2641 /* node is down. not involved in recovery 2647 /* node is down. not involved in recovery
2642 * so just keep going */ 2648 * so just keep going */
2643 mlog(0, "%s: node %u was down when sending " 2649 mlog(ML_NOTICE, "%s: node %u was down when sending "
2644 "begin reco msg (%d)\n", dlm->name, nodenum, ret); 2650 "begin reco msg (%d)\n", dlm->name, nodenum, ret);
2645 ret = 0; 2651 ret = 0;
2646 } 2652 }
@@ -2660,11 +2666,12 @@ retry:
2660 } 2666 }
2661 if (ret < 0) { 2667 if (ret < 0) {
2662 struct dlm_lock_resource *res; 2668 struct dlm_lock_resource *res;
2669
2663 /* this is now a serious problem, possibly ENOMEM 2670 /* this is now a serious problem, possibly ENOMEM
2664 * in the network stack. must retry */ 2671 * in the network stack. must retry */
2665 mlog_errno(ret); 2672 mlog_errno(ret);
2666 mlog(ML_ERROR, "begin reco of dlm %s to node %u " 2673 mlog(ML_ERROR, "begin reco of dlm %s to node %u "
2667 " returned %d\n", dlm->name, nodenum, ret); 2674 "returned %d\n", dlm->name, nodenum, ret);
2668 res = dlm_lookup_lockres(dlm, DLM_RECOVERY_LOCK_NAME, 2675 res = dlm_lookup_lockres(dlm, DLM_RECOVERY_LOCK_NAME,
2669 DLM_RECOVERY_LOCK_NAME_LEN); 2676 DLM_RECOVERY_LOCK_NAME_LEN);
2670 if (res) { 2677 if (res) {
@@ -2789,7 +2796,9 @@ stage2:
2789 if (ret >= 0) 2796 if (ret >= 0)
2790 ret = status; 2797 ret = status;
2791 if (ret < 0) { 2798 if (ret < 0) {
2792 mlog_errno(ret); 2799 mlog(ML_ERROR, "Error %d when sending message %u (key "
2800 "0x%x) to node %u\n", ret, DLM_FINALIZE_RECO_MSG,
2801 dlm->key, nodenum);
2793 if (dlm_is_host_down(ret)) { 2802 if (dlm_is_host_down(ret)) {
2794 /* this has no effect on this recovery 2803 /* this has no effect on this recovery
2795 * session, so set the status to zero to 2804 * session, so set the status to zero to
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index 11a6d1fd1d35..d4f73ca68fe5 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -309,6 +309,7 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm,
309 * spinlock, and because we know that it is not migrating/ 309 * spinlock, and because we know that it is not migrating/
310 * recovering/in-progress, it is fine to reserve asts and 310 * recovering/in-progress, it is fine to reserve asts and
311 * basts right before queueing them all throughout */ 311 * basts right before queueing them all throughout */
312 assert_spin_locked(&dlm->ast_lock);
312 assert_spin_locked(&res->spinlock); 313 assert_spin_locked(&res->spinlock);
313 BUG_ON((res->state & (DLM_LOCK_RES_MIGRATING| 314 BUG_ON((res->state & (DLM_LOCK_RES_MIGRATING|
314 DLM_LOCK_RES_RECOVERING| 315 DLM_LOCK_RES_RECOVERING|
@@ -337,7 +338,7 @@ converting:
337 /* queue the BAST if not already */ 338 /* queue the BAST if not already */
338 if (lock->ml.highest_blocked == LKM_IVMODE) { 339 if (lock->ml.highest_blocked == LKM_IVMODE) {
339 __dlm_lockres_reserve_ast(res); 340 __dlm_lockres_reserve_ast(res);
340 dlm_queue_bast(dlm, lock); 341 __dlm_queue_bast(dlm, lock);
341 } 342 }
342 /* update the highest_blocked if needed */ 343 /* update the highest_blocked if needed */
343 if (lock->ml.highest_blocked < target->ml.convert_type) 344 if (lock->ml.highest_blocked < target->ml.convert_type)
@@ -355,7 +356,7 @@ converting:
355 can_grant = 0; 356 can_grant = 0;
356 if (lock->ml.highest_blocked == LKM_IVMODE) { 357 if (lock->ml.highest_blocked == LKM_IVMODE) {
357 __dlm_lockres_reserve_ast(res); 358 __dlm_lockres_reserve_ast(res);
358 dlm_queue_bast(dlm, lock); 359 __dlm_queue_bast(dlm, lock);
359 } 360 }
360 if (lock->ml.highest_blocked < target->ml.convert_type) 361 if (lock->ml.highest_blocked < target->ml.convert_type)
361 lock->ml.highest_blocked = 362 lock->ml.highest_blocked =
@@ -383,7 +384,7 @@ converting:
383 spin_unlock(&target->spinlock); 384 spin_unlock(&target->spinlock);
384 385
385 __dlm_lockres_reserve_ast(res); 386 __dlm_lockres_reserve_ast(res);
386 dlm_queue_ast(dlm, target); 387 __dlm_queue_ast(dlm, target);
387 /* go back and check for more */ 388 /* go back and check for more */
388 goto converting; 389 goto converting;
389 } 390 }
@@ -402,7 +403,7 @@ blocked:
402 can_grant = 0; 403 can_grant = 0;
403 if (lock->ml.highest_blocked == LKM_IVMODE) { 404 if (lock->ml.highest_blocked == LKM_IVMODE) {
404 __dlm_lockres_reserve_ast(res); 405 __dlm_lockres_reserve_ast(res);
405 dlm_queue_bast(dlm, lock); 406 __dlm_queue_bast(dlm, lock);
406 } 407 }
407 if (lock->ml.highest_blocked < target->ml.type) 408 if (lock->ml.highest_blocked < target->ml.type)
408 lock->ml.highest_blocked = target->ml.type; 409 lock->ml.highest_blocked = target->ml.type;
@@ -418,7 +419,7 @@ blocked:
418 can_grant = 0; 419 can_grant = 0;
419 if (lock->ml.highest_blocked == LKM_IVMODE) { 420 if (lock->ml.highest_blocked == LKM_IVMODE) {
420 __dlm_lockres_reserve_ast(res); 421 __dlm_lockres_reserve_ast(res);
421 dlm_queue_bast(dlm, lock); 422 __dlm_queue_bast(dlm, lock);
422 } 423 }
423 if (lock->ml.highest_blocked < target->ml.type) 424 if (lock->ml.highest_blocked < target->ml.type)
424 lock->ml.highest_blocked = target->ml.type; 425 lock->ml.highest_blocked = target->ml.type;
@@ -444,7 +445,7 @@ blocked:
444 spin_unlock(&target->spinlock); 445 spin_unlock(&target->spinlock);
445 446
446 __dlm_lockres_reserve_ast(res); 447 __dlm_lockres_reserve_ast(res);
447 dlm_queue_ast(dlm, target); 448 __dlm_queue_ast(dlm, target);
448 /* go back and check for more */ 449 /* go back and check for more */
449 goto converting; 450 goto converting;
450 } 451 }
@@ -674,6 +675,7 @@ static int dlm_thread(void *data)
674 /* lockres can be re-dirtied/re-added to the 675 /* lockres can be re-dirtied/re-added to the
675 * dirty_list in this gap, but that is ok */ 676 * dirty_list in this gap, but that is ok */
676 677
678 spin_lock(&dlm->ast_lock);
677 spin_lock(&res->spinlock); 679 spin_lock(&res->spinlock);
678 if (res->owner != dlm->node_num) { 680 if (res->owner != dlm->node_num) {
679 __dlm_print_one_lock_resource(res); 681 __dlm_print_one_lock_resource(res);
@@ -694,6 +696,7 @@ static int dlm_thread(void *data)
694 /* move it to the tail and keep going */ 696 /* move it to the tail and keep going */
695 res->state &= ~DLM_LOCK_RES_DIRTY; 697 res->state &= ~DLM_LOCK_RES_DIRTY;
696 spin_unlock(&res->spinlock); 698 spin_unlock(&res->spinlock);
699 spin_unlock(&dlm->ast_lock);
697 mlog(0, "delaying list shuffling for in-" 700 mlog(0, "delaying list shuffling for in-"
698 "progress lockres %.*s, state=%d\n", 701 "progress lockres %.*s, state=%d\n",
699 res->lockname.len, res->lockname.name, 702 res->lockname.len, res->lockname.name,
@@ -715,6 +718,7 @@ static int dlm_thread(void *data)
715 dlm_shuffle_lists(dlm, res); 718 dlm_shuffle_lists(dlm, res);
716 res->state &= ~DLM_LOCK_RES_DIRTY; 719 res->state &= ~DLM_LOCK_RES_DIRTY;
717 spin_unlock(&res->spinlock); 720 spin_unlock(&res->spinlock);
721 spin_unlock(&dlm->ast_lock);
718 722
719 dlm_lockres_calc_usage(dlm, res); 723 dlm_lockres_calc_usage(dlm, res);
720 724
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index b47c1b92b82b..817287c6a6db 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -354,7 +354,8 @@ static enum dlm_status dlm_send_remote_unlock_request(struct dlm_ctxt *dlm,
354 mlog(0, "master was in-progress. retry\n"); 354 mlog(0, "master was in-progress. retry\n");
355 ret = status; 355 ret = status;
356 } else { 356 } else {
357 mlog_errno(tmpret); 357 mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to "
358 "node %u\n", tmpret, DLM_UNLOCK_LOCK_MSG, dlm->key, owner);
358 if (dlm_is_host_down(tmpret)) { 359 if (dlm_is_host_down(tmpret)) {
359 /* NOTE: this seems strange, but it is what we want. 360 /* NOTE: this seems strange, but it is what we want.
360 * when the master goes down during a cancel or 361 * when the master goes down during a cancel or
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index a5fbd9cea968..f74f1400eccd 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -278,10 +278,7 @@ int ocfs2_update_inode_atime(struct inode *inode,
278 inode->i_atime = CURRENT_TIME; 278 inode->i_atime = CURRENT_TIME;
279 di->i_atime = cpu_to_le64(inode->i_atime.tv_sec); 279 di->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
280 di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); 280 di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
281 281 ocfs2_journal_dirty(handle, bh);
282 ret = ocfs2_journal_dirty(handle, bh);
283 if (ret < 0)
284 mlog_errno(ret);
285 282
286out_commit: 283out_commit:
287 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 284 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
@@ -430,9 +427,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
430 di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec); 427 di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
431 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 428 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
432 429
433 status = ocfs2_journal_dirty(handle, fe_bh); 430 ocfs2_journal_dirty(handle, fe_bh);
434 if (status < 0)
435 mlog_errno(status);
436 431
437out_commit: 432out_commit:
438 ocfs2_commit_trans(osb, handle); 433 ocfs2_commit_trans(osb, handle);
@@ -449,7 +444,6 @@ static int ocfs2_truncate_file(struct inode *inode,
449 int status = 0; 444 int status = 0;
450 struct ocfs2_dinode *fe = NULL; 445 struct ocfs2_dinode *fe = NULL;
451 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 446 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
452 struct ocfs2_truncate_context *tc = NULL;
453 447
454 mlog_entry("(inode = %llu, new_i_size = %llu\n", 448 mlog_entry("(inode = %llu, new_i_size = %llu\n",
455 (unsigned long long)OCFS2_I(inode)->ip_blkno, 449 (unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -488,6 +482,9 @@ static int ocfs2_truncate_file(struct inode *inode,
488 482
489 down_write(&OCFS2_I(inode)->ip_alloc_sem); 483 down_write(&OCFS2_I(inode)->ip_alloc_sem);
490 484
485 ocfs2_resv_discard(&osb->osb_la_resmap,
486 &OCFS2_I(inode)->ip_la_data_resv);
487
491 /* 488 /*
492 * The inode lock forced other nodes to sync and drop their 489 * The inode lock forced other nodes to sync and drop their
493 * pages, which (correctly) happens even if we have a truncate 490 * pages, which (correctly) happens even if we have a truncate
@@ -517,13 +514,7 @@ static int ocfs2_truncate_file(struct inode *inode,
517 goto bail_unlock_sem; 514 goto bail_unlock_sem;
518 } 515 }
519 516
520 status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc); 517 status = ocfs2_commit_truncate(osb, inode, di_bh);
521 if (status < 0) {
522 mlog_errno(status);
523 goto bail_unlock_sem;
524 }
525
526 status = ocfs2_commit_truncate(osb, inode, di_bh, tc);
527 if (status < 0) { 518 if (status < 0) {
528 mlog_errno(status); 519 mlog_errno(status);
529 goto bail_unlock_sem; 520 goto bail_unlock_sem;
@@ -666,11 +657,7 @@ restarted_transaction:
666 goto leave; 657 goto leave;
667 } 658 }
668 659
669 status = ocfs2_journal_dirty(handle, bh); 660 ocfs2_journal_dirty(handle, bh);
670 if (status < 0) {
671 mlog_errno(status);
672 goto leave;
673 }
674 661
675 spin_lock(&OCFS2_I(inode)->ip_lock); 662 spin_lock(&OCFS2_I(inode)->ip_lock);
676 clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters); 663 clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters);
@@ -1195,9 +1182,7 @@ static int __ocfs2_write_remove_suid(struct inode *inode,
1195 di = (struct ocfs2_dinode *) bh->b_data; 1182 di = (struct ocfs2_dinode *) bh->b_data;
1196 di->i_mode = cpu_to_le16(inode->i_mode); 1183 di->i_mode = cpu_to_le16(inode->i_mode);
1197 1184
1198 ret = ocfs2_journal_dirty(handle, bh); 1185 ocfs2_journal_dirty(handle, bh);
1199 if (ret < 0)
1200 mlog_errno(ret);
1201 1186
1202out_trans: 1187out_trans:
1203 ocfs2_commit_trans(osb, handle); 1188 ocfs2_commit_trans(osb, handle);
@@ -1434,16 +1419,90 @@ out:
1434 return ret; 1419 return ret;
1435} 1420}
1436 1421
1422static int ocfs2_find_rec(struct ocfs2_extent_list *el, u32 pos)
1423{
1424 int i;
1425 struct ocfs2_extent_rec *rec = NULL;
1426
1427 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
1428
1429 rec = &el->l_recs[i];
1430
1431 if (le32_to_cpu(rec->e_cpos) < pos)
1432 break;
1433 }
1434
1435 return i;
1436}
1437
1438/*
1439 * Helper to calculate the punching pos and length in one run, we handle the
1440 * following three cases in order:
1441 *
1442 * - remove the entire record
1443 * - remove a partial record
1444 * - no record needs to be removed (hole-punching completed)
1445*/
1446static void ocfs2_calc_trunc_pos(struct inode *inode,
1447 struct ocfs2_extent_list *el,
1448 struct ocfs2_extent_rec *rec,
1449 u32 trunc_start, u32 *trunc_cpos,
1450 u32 *trunc_len, u32 *trunc_end,
1451 u64 *blkno, int *done)
1452{
1453 int ret = 0;
1454 u32 coff, range;
1455
1456 range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
1457
1458 if (le32_to_cpu(rec->e_cpos) >= trunc_start) {
1459 *trunc_cpos = le32_to_cpu(rec->e_cpos);
1460 /*
1461 * Skip holes if any.
1462 */
1463 if (range < *trunc_end)
1464 *trunc_end = range;
1465 *trunc_len = *trunc_end - le32_to_cpu(rec->e_cpos);
1466 *blkno = le64_to_cpu(rec->e_blkno);
1467 *trunc_end = le32_to_cpu(rec->e_cpos);
1468 } else if (range > trunc_start) {
1469 *trunc_cpos = trunc_start;
1470 *trunc_len = *trunc_end - trunc_start;
1471 coff = trunc_start - le32_to_cpu(rec->e_cpos);
1472 *blkno = le64_to_cpu(rec->e_blkno) +
1473 ocfs2_clusters_to_blocks(inode->i_sb, coff);
1474 *trunc_end = trunc_start;
1475 } else {
1476 /*
1477 * It may have two following possibilities:
1478 *
1479 * - last record has been removed
1480 * - trunc_start was within a hole
1481 *
1482 * both two cases mean the completion of hole punching.
1483 */
1484 ret = 1;
1485 }
1486
1487 *done = ret;
1488}
1489
1437static int ocfs2_remove_inode_range(struct inode *inode, 1490static int ocfs2_remove_inode_range(struct inode *inode,
1438 struct buffer_head *di_bh, u64 byte_start, 1491 struct buffer_head *di_bh, u64 byte_start,
1439 u64 byte_len) 1492 u64 byte_len)
1440{ 1493{
1441 int ret = 0; 1494 int ret = 0, flags = 0, done = 0, i;
1442 u32 trunc_start, trunc_len, cpos, phys_cpos, alloc_size; 1495 u32 trunc_start, trunc_len, trunc_end, trunc_cpos, phys_cpos;
1496 u32 cluster_in_el;
1443 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1497 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1444 struct ocfs2_cached_dealloc_ctxt dealloc; 1498 struct ocfs2_cached_dealloc_ctxt dealloc;
1445 struct address_space *mapping = inode->i_mapping; 1499 struct address_space *mapping = inode->i_mapping;
1446 struct ocfs2_extent_tree et; 1500 struct ocfs2_extent_tree et;
1501 struct ocfs2_path *path = NULL;
1502 struct ocfs2_extent_list *el = NULL;
1503 struct ocfs2_extent_rec *rec = NULL;
1504 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1505 u64 blkno, refcount_loc = le64_to_cpu(di->i_refcount_loc);
1447 1506
1448 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); 1507 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
1449 ocfs2_init_dealloc_ctxt(&dealloc); 1508 ocfs2_init_dealloc_ctxt(&dealloc);
@@ -1469,17 +1528,35 @@ static int ocfs2_remove_inode_range(struct inode *inode,
1469 goto out; 1528 goto out;
1470 } 1529 }
1471 1530
1531 /*
1532 * For reflinks, we may need to CoW 2 clusters which might be
1533 * partially zero'd later, if hole's start and end offset were
1534 * within one cluster(means is not exactly aligned to clustersize).
1535 */
1536
1537 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
1538
1539 ret = ocfs2_cow_file_pos(inode, di_bh, byte_start);
1540 if (ret) {
1541 mlog_errno(ret);
1542 goto out;
1543 }
1544
1545 ret = ocfs2_cow_file_pos(inode, di_bh, byte_start + byte_len);
1546 if (ret) {
1547 mlog_errno(ret);
1548 goto out;
1549 }
1550 }
1551
1472 trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start); 1552 trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start);
1473 trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits; 1553 trunc_end = (byte_start + byte_len) >> osb->s_clustersize_bits;
1474 if (trunc_len >= trunc_start) 1554 cluster_in_el = trunc_end;
1475 trunc_len -= trunc_start;
1476 else
1477 trunc_len = 0;
1478 1555
1479 mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u\n", 1556 mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, cend: %u\n",
1480 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1557 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1481 (unsigned long long)byte_start, 1558 (unsigned long long)byte_start,
1482 (unsigned long long)byte_len, trunc_start, trunc_len); 1559 (unsigned long long)byte_len, trunc_start, trunc_end);
1483 1560
1484 ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len); 1561 ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len);
1485 if (ret) { 1562 if (ret) {
@@ -1487,31 +1564,79 @@ static int ocfs2_remove_inode_range(struct inode *inode,
1487 goto out; 1564 goto out;
1488 } 1565 }
1489 1566
1490 cpos = trunc_start; 1567 path = ocfs2_new_path_from_et(&et);
1491 while (trunc_len) { 1568 if (!path) {
1492 ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, 1569 ret = -ENOMEM;
1493 &alloc_size, NULL); 1570 mlog_errno(ret);
1571 goto out;
1572 }
1573
1574 while (trunc_end > trunc_start) {
1575
1576 ret = ocfs2_find_path(INODE_CACHE(inode), path,
1577 cluster_in_el);
1494 if (ret) { 1578 if (ret) {
1495 mlog_errno(ret); 1579 mlog_errno(ret);
1496 goto out; 1580 goto out;
1497 } 1581 }
1498 1582
1499 if (alloc_size > trunc_len) 1583 el = path_leaf_el(path);
1500 alloc_size = trunc_len;
1501 1584
1502 /* Only do work for non-holes */ 1585 i = ocfs2_find_rec(el, trunc_end);
1503 if (phys_cpos != 0) { 1586 /*
1504 ret = ocfs2_remove_btree_range(inode, &et, cpos, 1587 * Need to go to previous extent block.
1505 phys_cpos, alloc_size, 1588 */
1506 &dealloc); 1589 if (i < 0) {
1590 if (path->p_tree_depth == 0)
1591 break;
1592
1593 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb,
1594 path,
1595 &cluster_in_el);
1507 if (ret) { 1596 if (ret) {
1508 mlog_errno(ret); 1597 mlog_errno(ret);
1509 goto out; 1598 goto out;
1510 } 1599 }
1600
1601 /*
1602 * We've reached the leftmost extent block,
1603 * it's safe to leave.
1604 */
1605 if (cluster_in_el == 0)
1606 break;
1607
1608 /*
1609 * The 'pos' searched for previous extent block is
1610 * always one cluster less than actual trunc_end.
1611 */
1612 trunc_end = cluster_in_el + 1;
1613
1614 ocfs2_reinit_path(path, 1);
1615
1616 continue;
1617
1618 } else
1619 rec = &el->l_recs[i];
1620
1621 ocfs2_calc_trunc_pos(inode, el, rec, trunc_start, &trunc_cpos,
1622 &trunc_len, &trunc_end, &blkno, &done);
1623 if (done)
1624 break;
1625
1626 flags = rec->e_flags;
1627 phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
1628
1629 ret = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
1630 phys_cpos, trunc_len, flags,
1631 &dealloc, refcount_loc);
1632 if (ret < 0) {
1633 mlog_errno(ret);
1634 goto out;
1511 } 1635 }
1512 1636
1513 cpos += alloc_size; 1637 cluster_in_el = trunc_end;
1514 trunc_len -= alloc_size; 1638
1639 ocfs2_reinit_path(path, 1);
1515 } 1640 }
1516 1641
1517 ocfs2_truncate_cluster_pages(inode, byte_start, byte_len); 1642 ocfs2_truncate_cluster_pages(inode, byte_start, byte_len);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index af189887201c..abb0a95cc717 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -376,6 +376,10 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
376 376
377 OCFS2_I(inode)->ip_last_used_slot = 0; 377 OCFS2_I(inode)->ip_last_used_slot = 0;
378 OCFS2_I(inode)->ip_last_used_group = 0; 378 OCFS2_I(inode)->ip_last_used_group = 0;
379
380 if (S_ISDIR(inode->i_mode))
381 ocfs2_resv_set_type(&OCFS2_I(inode)->ip_la_data_resv,
382 OCFS2_RESV_FLAG_DIR);
379 mlog_exit_void(); 383 mlog_exit_void();
380} 384}
381 385
@@ -539,7 +543,6 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
539 struct buffer_head *fe_bh) 543 struct buffer_head *fe_bh)
540{ 544{
541 int status = 0; 545 int status = 0;
542 struct ocfs2_truncate_context *tc = NULL;
543 struct ocfs2_dinode *fe; 546 struct ocfs2_dinode *fe;
544 handle_t *handle = NULL; 547 handle_t *handle = NULL;
545 548
@@ -582,13 +585,7 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
582 ocfs2_commit_trans(osb, handle); 585 ocfs2_commit_trans(osb, handle);
583 handle = NULL; 586 handle = NULL;
584 587
585 status = ocfs2_prepare_truncate(osb, inode, fe_bh, &tc); 588 status = ocfs2_commit_truncate(osb, inode, fe_bh);
586 if (status < 0) {
587 mlog_errno(status);
588 goto out;
589 }
590
591 status = ocfs2_commit_truncate(osb, inode, fe_bh, tc);
592 if (status < 0) { 589 if (status < 0) {
593 mlog_errno(status); 590 mlog_errno(status);
594 goto out; 591 goto out;
@@ -659,12 +656,7 @@ static int ocfs2_remove_inode(struct inode *inode,
659 656
660 di->i_dtime = cpu_to_le64(CURRENT_TIME.tv_sec); 657 di->i_dtime = cpu_to_le64(CURRENT_TIME.tv_sec);
661 di->i_flags &= cpu_to_le32(~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL)); 658 di->i_flags &= cpu_to_le32(~(OCFS2_VALID_FL | OCFS2_ORPHANED_FL));
662 659 ocfs2_journal_dirty(handle, di_bh);
663 status = ocfs2_journal_dirty(handle, di_bh);
664 if (status < 0) {
665 mlog_errno(status);
666 goto bail_commit;
667 }
668 660
669 ocfs2_remove_from_cache(INODE_CACHE(inode), di_bh); 661 ocfs2_remove_from_cache(INODE_CACHE(inode), di_bh);
670 dquot_free_inode(inode); 662 dquot_free_inode(inode);
@@ -980,7 +972,7 @@ static void ocfs2_cleanup_delete_inode(struct inode *inode,
980void ocfs2_delete_inode(struct inode *inode) 972void ocfs2_delete_inode(struct inode *inode)
981{ 973{
982 int wipe, status; 974 int wipe, status;
983 sigset_t blocked, oldset; 975 sigset_t oldset;
984 struct buffer_head *di_bh = NULL; 976 struct buffer_head *di_bh = NULL;
985 977
986 mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); 978 mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
@@ -1007,13 +999,7 @@ void ocfs2_delete_inode(struct inode *inode)
1007 * messaging paths may return us -ERESTARTSYS. Which would 999 * messaging paths may return us -ERESTARTSYS. Which would
1008 * cause us to exit early, resulting in inodes being orphaned 1000 * cause us to exit early, resulting in inodes being orphaned
1009 * forever. */ 1001 * forever. */
1010 sigfillset(&blocked); 1002 ocfs2_block_signals(&oldset);
1011 status = sigprocmask(SIG_BLOCK, &blocked, &oldset);
1012 if (status < 0) {
1013 mlog_errno(status);
1014 ocfs2_cleanup_delete_inode(inode, 1);
1015 goto bail;
1016 }
1017 1003
1018 /* 1004 /*
1019 * Synchronize us against ocfs2_get_dentry. We take this in 1005 * Synchronize us against ocfs2_get_dentry. We take this in
@@ -1087,9 +1073,7 @@ bail_unlock_nfs_sync:
1087 ocfs2_nfs_sync_unlock(OCFS2_SB(inode->i_sb), 0); 1073 ocfs2_nfs_sync_unlock(OCFS2_SB(inode->i_sb), 0);
1088 1074
1089bail_unblock: 1075bail_unblock:
1090 status = sigprocmask(SIG_SETMASK, &oldset, NULL); 1076 ocfs2_unblock_signals(&oldset);
1091 if (status < 0)
1092 mlog_errno(status);
1093bail: 1077bail:
1094 clear_inode(inode); 1078 clear_inode(inode);
1095 mlog_exit_void(); 1079 mlog_exit_void();
@@ -1123,6 +1107,10 @@ void ocfs2_clear_inode(struct inode *inode)
1123 ocfs2_mark_lockres_freeing(&oi->ip_inode_lockres); 1107 ocfs2_mark_lockres_freeing(&oi->ip_inode_lockres);
1124 ocfs2_mark_lockres_freeing(&oi->ip_open_lockres); 1108 ocfs2_mark_lockres_freeing(&oi->ip_open_lockres);
1125 1109
1110 ocfs2_resv_discard(&OCFS2_SB(inode->i_sb)->osb_la_resmap,
1111 &oi->ip_la_data_resv);
1112 ocfs2_resv_init_once(&oi->ip_la_data_resv);
1113
1126 /* We very well may get a clear_inode before all an inodes 1114 /* We very well may get a clear_inode before all an inodes
1127 * metadata has hit disk. Of course, we can't drop any cluster 1115 * metadata has hit disk. Of course, we can't drop any cluster
1128 * locks until the journal has finished with it. The only 1116 * locks until the journal has finished with it. The only
@@ -1298,13 +1286,8 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
1298 fe->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec); 1286 fe->i_mtime = cpu_to_le64(inode->i_mtime.tv_sec);
1299 fe->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); 1287 fe->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
1300 1288
1301 status = ocfs2_journal_dirty(handle, bh); 1289 ocfs2_journal_dirty(handle, bh);
1302 if (status < 0)
1303 mlog_errno(status);
1304
1305 status = 0;
1306leave: 1290leave:
1307
1308 mlog_exit(status); 1291 mlog_exit(status);
1309 return status; 1292 return status;
1310} 1293}
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 0b28e1921a39..9f5f5fcadc45 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -70,6 +70,8 @@ struct ocfs2_inode_info
70 /* Only valid if the inode is the dir. */ 70 /* Only valid if the inode is the dir. */
71 u32 ip_last_used_slot; 71 u32 ip_last_used_slot;
72 u64 ip_last_used_group; 72 u64 ip_last_used_group;
73
74 struct ocfs2_alloc_reservation ip_la_data_resv;
73}; 75};
74 76
75/* 77/*
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 9336c60e3a36..47878cf16418 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -402,9 +402,7 @@ int ocfs2_commit_trans(struct ocfs2_super *osb,
402} 402}
403 403
404/* 404/*
405 * 'nblocks' is what you want to add to the current 405 * 'nblocks' is what you want to add to the current transaction.
406 * transaction. extend_trans will either extend the current handle by
407 * nblocks, or commit it and start a new one with nblocks credits.
408 * 406 *
409 * This might call jbd2_journal_restart() which will commit dirty buffers 407 * This might call jbd2_journal_restart() which will commit dirty buffers
410 * and then restart the transaction. Before calling 408 * and then restart the transaction. Before calling
@@ -422,11 +420,15 @@ int ocfs2_commit_trans(struct ocfs2_super *osb,
422 */ 420 */
423int ocfs2_extend_trans(handle_t *handle, int nblocks) 421int ocfs2_extend_trans(handle_t *handle, int nblocks)
424{ 422{
425 int status; 423 int status, old_nblocks;
426 424
427 BUG_ON(!handle); 425 BUG_ON(!handle);
428 BUG_ON(!nblocks); 426 BUG_ON(nblocks < 0);
427
428 if (!nblocks)
429 return 0;
429 430
431 old_nblocks = handle->h_buffer_credits;
430 mlog_entry_void(); 432 mlog_entry_void();
431 433
432 mlog(0, "Trying to extend transaction by %d blocks\n", nblocks); 434 mlog(0, "Trying to extend transaction by %d blocks\n", nblocks);
@@ -445,7 +447,8 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks)
445 mlog(0, 447 mlog(0,
446 "jbd2_journal_extend failed, trying " 448 "jbd2_journal_extend failed, trying "
447 "jbd2_journal_restart\n"); 449 "jbd2_journal_restart\n");
448 status = jbd2_journal_restart(handle, nblocks); 450 status = jbd2_journal_restart(handle,
451 old_nblocks + nblocks);
449 if (status < 0) { 452 if (status < 0) {
450 mlog_errno(status); 453 mlog_errno(status);
451 goto bail; 454 goto bail;
@@ -734,8 +737,7 @@ int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
734 return __ocfs2_journal_access(handle, ci, bh, NULL, type); 737 return __ocfs2_journal_access(handle, ci, bh, NULL, type);
735} 738}
736 739
737int ocfs2_journal_dirty(handle_t *handle, 740void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh)
738 struct buffer_head *bh)
739{ 741{
740 int status; 742 int status;
741 743
@@ -743,13 +745,9 @@ int ocfs2_journal_dirty(handle_t *handle,
743 (unsigned long long)bh->b_blocknr); 745 (unsigned long long)bh->b_blocknr);
744 746
745 status = jbd2_journal_dirty_metadata(handle, bh); 747 status = jbd2_journal_dirty_metadata(handle, bh);
746 if (status < 0) 748 BUG_ON(status);
747 mlog(ML_ERROR, "Could not dirty metadata buffer. "
748 "(bh->b_blocknr=%llu)\n",
749 (unsigned long long)bh->b_blocknr);
750 749
751 mlog_exit(status); 750 mlog_exit_void();
752 return status;
753} 751}
754 752
755#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE) 753#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 3f74e09b0d80..b5baaa8e710f 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -325,8 +325,7 @@ int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
325 * <modify the bh> 325 * <modify the bh>
326 * ocfs2_journal_dirty(handle, bh); 326 * ocfs2_journal_dirty(handle, bh);
327 */ 327 */
328int ocfs2_journal_dirty(handle_t *handle, 328void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh);
329 struct buffer_head *bh);
330 329
331/* 330/*
332 * Credit Macros: 331 * Credit Macros:
@@ -562,6 +561,18 @@ static inline int ocfs2_calc_group_alloc_credits(struct super_block *sb,
562 return blocks; 561 return blocks;
563} 562}
564 563
564/*
565 * Allocating a discontiguous block group requires the credits from
566 * ocfs2_calc_group_alloc_credits() as well as enough credits to fill
567 * the group descriptor's extent list. The caller already has started
568 * the transaction with ocfs2_calc_group_alloc_credits(). They extend
569 * it with these credits.
570 */
571static inline int ocfs2_calc_bg_discontig_credits(struct super_block *sb)
572{
573 return ocfs2_extent_recs_per_gd(sb);
574}
575
565static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb, 576static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb,
566 unsigned int clusters_to_del, 577 unsigned int clusters_to_del,
567 struct ocfs2_dinode *fe, 578 struct ocfs2_dinode *fe,
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index c983715d8d8c..3d7419682dc0 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -52,7 +52,8 @@ static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc);
52 52
53static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 53static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
54 struct ocfs2_dinode *alloc, 54 struct ocfs2_dinode *alloc,
55 u32 numbits); 55 u32 *numbits,
56 struct ocfs2_alloc_reservation *resv);
56 57
57static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc); 58static void ocfs2_clear_local_alloc(struct ocfs2_dinode *alloc);
58 59
@@ -74,6 +75,144 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
74static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 75static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
75 struct inode *local_alloc_inode); 76 struct inode *local_alloc_inode);
76 77
78/*
79 * ocfs2_la_default_mb() - determine a default size, in megabytes of
80 * the local alloc.
81 *
82 * Generally, we'd like to pick as large a local alloc as
83 * possible. Performance on large workloads tends to scale
84 * proportionally to la size. In addition to that, the reservations
85 * code functions more efficiently as it can reserve more windows for
86 * write.
87 *
88 * Some things work against us when trying to choose a large local alloc:
89 *
90 * - We need to ensure our sizing is picked to leave enough space in
91 * group descriptors for other allocations (such as block groups,
92 * etc). Picking default sizes which are a multiple of 4 could help
93 * - block groups are allocated in 2mb and 4mb chunks.
94 *
95 * - Likewise, we don't want to starve other nodes of bits on small
96 * file systems. This can easily be taken care of by limiting our
97 * default to a reasonable size (256M) on larger cluster sizes.
98 *
99 * - Some file systems can't support very large sizes - 4k and 8k in
100 * particular are limited to less than 128 and 256 megabytes respectively.
101 *
102 * The following reference table shows group descriptor and local
103 * alloc maximums at various cluster sizes (4k blocksize)
104 *
105 * csize: 4K group: 126M la: 121M
106 * csize: 8K group: 252M la: 243M
107 * csize: 16K group: 504M la: 486M
108 * csize: 32K group: 1008M la: 972M
109 * csize: 64K group: 2016M la: 1944M
110 * csize: 128K group: 4032M la: 3888M
111 * csize: 256K group: 8064M la: 7776M
112 * csize: 512K group: 16128M la: 15552M
113 * csize: 1024K group: 32256M la: 31104M
114 */
115#define OCFS2_LA_MAX_DEFAULT_MB 256
116#define OCFS2_LA_OLD_DEFAULT 8
117unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb)
118{
119 unsigned int la_mb;
120 unsigned int gd_mb;
121 unsigned int megs_per_slot;
122 struct super_block *sb = osb->sb;
123
124 gd_mb = ocfs2_clusters_to_megabytes(osb->sb,
125 8 * ocfs2_group_bitmap_size(sb, 0, osb->s_feature_incompat));
126
127 /*
128 * This takes care of files systems with very small group
129 * descriptors - 512 byte blocksize at cluster sizes lower
130 * than 16K and also 1k blocksize with 4k cluster size.
131 */
132 if ((sb->s_blocksize == 512 && osb->s_clustersize <= 8192)
133 || (sb->s_blocksize == 1024 && osb->s_clustersize == 4096))
134 return OCFS2_LA_OLD_DEFAULT;
135
136 /*
137 * Leave enough room for some block groups and make the final
138 * value we work from a multiple of 4.
139 */
140 gd_mb -= 16;
141 gd_mb &= 0xFFFFFFFB;
142
143 la_mb = gd_mb;
144
145 /*
146 * Keep window sizes down to a reasonable default
147 */
148 if (la_mb > OCFS2_LA_MAX_DEFAULT_MB) {
149 /*
150 * Some clustersize / blocksize combinations will have
151 * given us a larger than OCFS2_LA_MAX_DEFAULT_MB
152 * default size, but get poor distribution when
153 * limited to exactly 256 megabytes.
154 *
155 * As an example, 16K clustersize at 4K blocksize
156 * gives us a cluster group size of 504M. Paring the
157 * local alloc size down to 256 however, would give us
158 * only one window and around 200MB left in the
159 * cluster group. Instead, find the first size below
160 * 256 which would give us an even distribution.
161 *
162 * Larger cluster group sizes actually work out pretty
163 * well when pared to 256, so we don't have to do this
164 * for any group that fits more than two
165 * OCFS2_LA_MAX_DEFAULT_MB windows.
166 */
167 if (gd_mb > (2 * OCFS2_LA_MAX_DEFAULT_MB))
168 la_mb = 256;
169 else {
170 unsigned int gd_mult = gd_mb;
171
172 while (gd_mult > 256)
173 gd_mult = gd_mult >> 1;
174
175 la_mb = gd_mult;
176 }
177 }
178
179 megs_per_slot = osb->osb_clusters_at_boot / osb->max_slots;
180 megs_per_slot = ocfs2_clusters_to_megabytes(osb->sb, megs_per_slot);
181 /* Too many nodes, too few disk clusters. */
182 if (megs_per_slot < la_mb)
183 la_mb = megs_per_slot;
184
185 return la_mb;
186}
187
188void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb)
189{
190 struct super_block *sb = osb->sb;
191 unsigned int la_default_mb = ocfs2_la_default_mb(osb);
192 unsigned int la_max_mb;
193
194 la_max_mb = ocfs2_clusters_to_megabytes(sb,
195 ocfs2_local_alloc_size(sb) * 8);
196
197 mlog(0, "requested: %dM, max: %uM, default: %uM\n",
198 requested_mb, la_max_mb, la_default_mb);
199
200 if (requested_mb == -1) {
201 /* No user request - use defaults */
202 osb->local_alloc_default_bits =
203 ocfs2_megabytes_to_clusters(sb, la_default_mb);
204 } else if (requested_mb > la_max_mb) {
205 /* Request is too big, we give the maximum available */
206 osb->local_alloc_default_bits =
207 ocfs2_megabytes_to_clusters(sb, la_max_mb);
208 } else {
209 osb->local_alloc_default_bits =
210 ocfs2_megabytes_to_clusters(sb, requested_mb);
211 }
212
213 osb->local_alloc_bits = osb->local_alloc_default_bits;
214}
215
77static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb) 216static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
78{ 217{
79 return (osb->local_alloc_state == OCFS2_LA_THROTTLED || 218 return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
@@ -156,7 +295,7 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
156 osb->local_alloc_bits, (osb->bitmap_cpg - 1)); 295 osb->local_alloc_bits, (osb->bitmap_cpg - 1));
157 osb->local_alloc_bits = 296 osb->local_alloc_bits =
158 ocfs2_megabytes_to_clusters(osb->sb, 297 ocfs2_megabytes_to_clusters(osb->sb,
159 OCFS2_DEFAULT_LOCAL_ALLOC_SIZE); 298 ocfs2_la_default_mb(osb));
160 } 299 }
161 300
162 /* read the alloc off disk */ 301 /* read the alloc off disk */
@@ -262,6 +401,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
262 401
263 osb->local_alloc_state = OCFS2_LA_DISABLED; 402 osb->local_alloc_state = OCFS2_LA_DISABLED;
264 403
404 ocfs2_resmap_uninit(&osb->osb_la_resmap);
405
265 main_bm_inode = ocfs2_get_system_file_inode(osb, 406 main_bm_inode = ocfs2_get_system_file_inode(osb,
266 GLOBAL_BITMAP_SYSTEM_INODE, 407 GLOBAL_BITMAP_SYSTEM_INODE,
267 OCFS2_INVALID_SLOT); 408 OCFS2_INVALID_SLOT);
@@ -305,12 +446,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
305 } 446 }
306 447
307 ocfs2_clear_local_alloc(alloc); 448 ocfs2_clear_local_alloc(alloc);
308 449 ocfs2_journal_dirty(handle, bh);
309 status = ocfs2_journal_dirty(handle, bh);
310 if (status < 0) {
311 mlog_errno(status);
312 goto out_commit;
313 }
314 450
315 brelse(bh); 451 brelse(bh);
316 osb->local_alloc_bh = NULL; 452 osb->local_alloc_bh = NULL;
@@ -481,46 +617,6 @@ out:
481 return status; 617 return status;
482} 618}
483 619
484/* Check to see if the local alloc window is within ac->ac_max_block */
485static int ocfs2_local_alloc_in_range(struct inode *inode,
486 struct ocfs2_alloc_context *ac,
487 u32 bits_wanted)
488{
489 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
490 struct ocfs2_dinode *alloc;
491 struct ocfs2_local_alloc *la;
492 int start;
493 u64 block_off;
494
495 if (!ac->ac_max_block)
496 return 1;
497
498 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
499 la = OCFS2_LOCAL_ALLOC(alloc);
500
501 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
502 if (start == -1) {
503 mlog_errno(-ENOSPC);
504 return 0;
505 }
506
507 /*
508 * Converting (bm_off + start + bits_wanted) to blocks gives us
509 * the blkno just past our actual allocation. This is perfect
510 * to compare with ac_max_block.
511 */
512 block_off = ocfs2_clusters_to_blocks(inode->i_sb,
513 le32_to_cpu(la->la_bm_off) +
514 start + bits_wanted);
515 mlog(0, "Checking %llu against %llu\n",
516 (unsigned long long)block_off,
517 (unsigned long long)ac->ac_max_block);
518 if (block_off > ac->ac_max_block)
519 return 0;
520
521 return 1;
522}
523
524/* 620/*
525 * make sure we've got at least bits_wanted contiguous bits in the 621 * make sure we've got at least bits_wanted contiguous bits in the
526 * local alloc. You lose them when you drop i_mutex. 622 * local alloc. You lose them when you drop i_mutex.
@@ -613,17 +709,6 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
613 mlog(0, "Calling in_range for max block %llu\n", 709 mlog(0, "Calling in_range for max block %llu\n",
614 (unsigned long long)ac->ac_max_block); 710 (unsigned long long)ac->ac_max_block);
615 711
616 if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac,
617 bits_wanted)) {
618 /*
619 * The window is outside ac->ac_max_block.
620 * This errno tells the caller to keep localalloc enabled
621 * but to get the allocation from the main bitmap.
622 */
623 status = -EFBIG;
624 goto bail;
625 }
626
627 ac->ac_inode = local_alloc_inode; 712 ac->ac_inode = local_alloc_inode;
628 /* We should never use localalloc from another slot */ 713 /* We should never use localalloc from another slot */
629 ac->ac_alloc_slot = osb->slot_num; 714 ac->ac_alloc_slot = osb->slot_num;
@@ -664,7 +749,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
664 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 749 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
665 la = OCFS2_LOCAL_ALLOC(alloc); 750 la = OCFS2_LOCAL_ALLOC(alloc);
666 751
667 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted); 752 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, &bits_wanted,
753 ac->ac_resv);
668 if (start == -1) { 754 if (start == -1) {
669 /* TODO: Shouldn't we just BUG here? */ 755 /* TODO: Shouldn't we just BUG here? */
670 status = -ENOSPC; 756 status = -ENOSPC;
@@ -674,8 +760,6 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
674 760
675 bitmap = la->la_bitmap; 761 bitmap = la->la_bitmap;
676 *bit_off = le32_to_cpu(la->la_bm_off) + start; 762 *bit_off = le32_to_cpu(la->la_bm_off) + start;
677 /* local alloc is always contiguous by nature -- we never
678 * delete bits from it! */
679 *num_bits = bits_wanted; 763 *num_bits = bits_wanted;
680 764
681 status = ocfs2_journal_access_di(handle, 765 status = ocfs2_journal_access_di(handle,
@@ -687,18 +771,15 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
687 goto bail; 771 goto bail;
688 } 772 }
689 773
774 ocfs2_resmap_claimed_bits(&osb->osb_la_resmap, ac->ac_resv, start,
775 bits_wanted);
776
690 while(bits_wanted--) 777 while(bits_wanted--)
691 ocfs2_set_bit(start++, bitmap); 778 ocfs2_set_bit(start++, bitmap);
692 779
693 le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits); 780 le32_add_cpu(&alloc->id1.bitmap1.i_used, *num_bits);
781 ocfs2_journal_dirty(handle, osb->local_alloc_bh);
694 782
695 status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
696 if (status < 0) {
697 mlog_errno(status);
698 goto bail;
699 }
700
701 status = 0;
702bail: 783bail:
703 mlog_exit(status); 784 mlog_exit(status);
704 return status; 785 return status;
@@ -722,13 +803,17 @@ static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
722} 803}
723 804
724static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 805static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
725 struct ocfs2_dinode *alloc, 806 struct ocfs2_dinode *alloc,
726 u32 numbits) 807 u32 *numbits,
808 struct ocfs2_alloc_reservation *resv)
727{ 809{
728 int numfound, bitoff, left, startoff, lastzero; 810 int numfound, bitoff, left, startoff, lastzero;
811 int local_resv = 0;
812 struct ocfs2_alloc_reservation r;
729 void *bitmap = NULL; 813 void *bitmap = NULL;
814 struct ocfs2_reservation_map *resmap = &osb->osb_la_resmap;
730 815
731 mlog_entry("(numbits wanted = %u)\n", numbits); 816 mlog_entry("(numbits wanted = %u)\n", *numbits);
732 817
733 if (!alloc->id1.bitmap1.i_total) { 818 if (!alloc->id1.bitmap1.i_total) {
734 mlog(0, "No bits in my window!\n"); 819 mlog(0, "No bits in my window!\n");
@@ -736,6 +821,30 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
736 goto bail; 821 goto bail;
737 } 822 }
738 823
824 if (!resv) {
825 local_resv = 1;
826 ocfs2_resv_init_once(&r);
827 ocfs2_resv_set_type(&r, OCFS2_RESV_FLAG_TMP);
828 resv = &r;
829 }
830
831 numfound = *numbits;
832 if (ocfs2_resmap_resv_bits(resmap, resv, &bitoff, &numfound) == 0) {
833 if (numfound < *numbits)
834 *numbits = numfound;
835 goto bail;
836 }
837
838 /*
839 * Code error. While reservations are enabled, local
840 * allocation should _always_ go through them.
841 */
842 BUG_ON(osb->osb_resv_level != 0);
843
844 /*
845 * Reservations are disabled. Handle this the old way.
846 */
847
739 bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap; 848 bitmap = OCFS2_LOCAL_ALLOC(alloc)->la_bitmap;
740 849
741 numfound = bitoff = startoff = 0; 850 numfound = bitoff = startoff = 0;
@@ -761,7 +870,7 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
761 startoff = bitoff+1; 870 startoff = bitoff+1;
762 } 871 }
763 /* we got everything we needed */ 872 /* we got everything we needed */
764 if (numfound == numbits) { 873 if (numfound == *numbits) {
765 /* mlog(0, "Found it all!\n"); */ 874 /* mlog(0, "Found it all!\n"); */
766 break; 875 break;
767 } 876 }
@@ -770,12 +879,15 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
770 mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff, 879 mlog(0, "Exiting loop, bitoff = %d, numfound = %d\n", bitoff,
771 numfound); 880 numfound);
772 881
773 if (numfound == numbits) 882 if (numfound == *numbits)
774 bitoff = startoff - numfound; 883 bitoff = startoff - numfound;
775 else 884 else
776 bitoff = -1; 885 bitoff = -1;
777 886
778bail: 887bail:
888 if (local_resv)
889 ocfs2_resv_discard(resmap, resv);
890
779 mlog_exit(bitoff); 891 mlog_exit(bitoff);
780 return bitoff; 892 return bitoff;
781} 893}
@@ -1049,7 +1161,7 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
1049 /* we used the generic suballoc reserve function, but we set 1161 /* we used the generic suballoc reserve function, but we set
1050 * everything up nicely, so there's no reason why we can't use 1162 * everything up nicely, so there's no reason why we can't use
1051 * the more specific cluster api to claim bits. */ 1163 * the more specific cluster api to claim bits. */
1052 status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits, 1164 status = ocfs2_claim_clusters(handle, ac, osb->local_alloc_bits,
1053 &cluster_off, &cluster_count); 1165 &cluster_off, &cluster_count);
1054 if (status == -ENOSPC) { 1166 if (status == -ENOSPC) {
1055retry_enospc: 1167retry_enospc:
@@ -1063,7 +1175,7 @@ retry_enospc:
1063 goto bail; 1175 goto bail;
1064 1176
1065 ac->ac_bits_wanted = osb->local_alloc_default_bits; 1177 ac->ac_bits_wanted = osb->local_alloc_default_bits;
1066 status = ocfs2_claim_clusters(osb, handle, ac, 1178 status = ocfs2_claim_clusters(handle, ac,
1067 osb->local_alloc_bits, 1179 osb->local_alloc_bits,
1068 &cluster_off, 1180 &cluster_off,
1069 &cluster_count); 1181 &cluster_count);
@@ -1098,6 +1210,9 @@ retry_enospc:
1098 memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0, 1210 memset(OCFS2_LOCAL_ALLOC(alloc)->la_bitmap, 0,
1099 le16_to_cpu(la->la_size)); 1211 le16_to_cpu(la->la_size));
1100 1212
1213 ocfs2_resmap_restart(&osb->osb_la_resmap, cluster_count,
1214 OCFS2_LOCAL_ALLOC(alloc)->la_bitmap);
1215
1101 mlog(0, "New window allocated:\n"); 1216 mlog(0, "New window allocated:\n");
1102 mlog(0, "window la_bm_off = %u\n", 1217 mlog(0, "window la_bm_off = %u\n",
1103 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off); 1218 OCFS2_LOCAL_ALLOC(alloc)->la_bm_off);
@@ -1169,12 +1284,7 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
1169 } 1284 }
1170 1285
1171 ocfs2_clear_local_alloc(alloc); 1286 ocfs2_clear_local_alloc(alloc);
1172 1287 ocfs2_journal_dirty(handle, osb->local_alloc_bh);
1173 status = ocfs2_journal_dirty(handle, osb->local_alloc_bh);
1174 if (status < 0) {
1175 mlog_errno(status);
1176 goto bail;
1177 }
1178 1288
1179 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy, 1289 status = ocfs2_sync_local_to_main(osb, handle, alloc_copy,
1180 main_bm_inode, main_bm_bh); 1290 main_bm_inode, main_bm_bh);
@@ -1192,7 +1302,6 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
1192 1302
1193 atomic_inc(&osb->alloc_stats.moves); 1303 atomic_inc(&osb->alloc_stats.moves);
1194 1304
1195 status = 0;
1196bail: 1305bail:
1197 if (handle) 1306 if (handle)
1198 ocfs2_commit_trans(osb, handle); 1307 ocfs2_commit_trans(osb, handle);
diff --git a/fs/ocfs2/localalloc.h b/fs/ocfs2/localalloc.h
index ac5ea9f86653..1be9b5864460 100644
--- a/fs/ocfs2/localalloc.h
+++ b/fs/ocfs2/localalloc.h
@@ -30,6 +30,9 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb);
30 30
31void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb); 31void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb);
32 32
33void ocfs2_la_set_sizes(struct ocfs2_super *osb, int requested_mb);
34unsigned int ocfs2_la_default_mb(struct ocfs2_super *osb);
35
33int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, 36int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
34 int node_num, 37 int node_num,
35 struct ocfs2_dinode **alloc_copy); 38 struct ocfs2_dinode **alloc_copy);
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 7898bd3a99f5..af2b8fe1f139 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -41,44 +41,20 @@
41#include "file.h" 41#include "file.h"
42#include "inode.h" 42#include "inode.h"
43#include "mmap.h" 43#include "mmap.h"
44#include "super.h"
44 45
45static inline int ocfs2_vm_op_block_sigs(sigset_t *blocked, sigset_t *oldset)
46{
47 /* The best way to deal with signals in the vm path is
48 * to block them upfront, rather than allowing the
49 * locking paths to return -ERESTARTSYS. */
50 sigfillset(blocked);
51
52 /* We should technically never get a bad return value
53 * from sigprocmask */
54 return sigprocmask(SIG_BLOCK, blocked, oldset);
55}
56
57static inline int ocfs2_vm_op_unblock_sigs(sigset_t *oldset)
58{
59 return sigprocmask(SIG_SETMASK, oldset, NULL);
60}
61 46
62static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf) 47static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf)
63{ 48{
64 sigset_t blocked, oldset; 49 sigset_t oldset;
65 int error, ret; 50 int ret;
66 51
67 mlog_entry("(area=%p, page offset=%lu)\n", area, vmf->pgoff); 52 mlog_entry("(area=%p, page offset=%lu)\n", area, vmf->pgoff);
68 53
69 error = ocfs2_vm_op_block_sigs(&blocked, &oldset); 54 ocfs2_block_signals(&oldset);
70 if (error < 0) {
71 mlog_errno(error);
72 ret = VM_FAULT_SIGBUS;
73 goto out;
74 }
75
76 ret = filemap_fault(area, vmf); 55 ret = filemap_fault(area, vmf);
56 ocfs2_unblock_signals(&oldset);
77 57
78 error = ocfs2_vm_op_unblock_sigs(&oldset);
79 if (error < 0)
80 mlog_errno(error);
81out:
82 mlog_exit_ptr(vmf->page); 58 mlog_exit_ptr(vmf->page);
83 return ret; 59 return ret;
84} 60}
@@ -158,14 +134,10 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
158 struct page *page = vmf->page; 134 struct page *page = vmf->page;
159 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 135 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
160 struct buffer_head *di_bh = NULL; 136 struct buffer_head *di_bh = NULL;
161 sigset_t blocked, oldset; 137 sigset_t oldset;
162 int ret, ret2; 138 int ret;
163 139
164 ret = ocfs2_vm_op_block_sigs(&blocked, &oldset); 140 ocfs2_block_signals(&oldset);
165 if (ret < 0) {
166 mlog_errno(ret);
167 return ret;
168 }
169 141
170 /* 142 /*
171 * The cluster locks taken will block a truncate from another 143 * The cluster locks taken will block a truncate from another
@@ -193,9 +165,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
193 ocfs2_inode_unlock(inode, 1); 165 ocfs2_inode_unlock(inode, 1);
194 166
195out: 167out:
196 ret2 = ocfs2_vm_op_unblock_sigs(&oldset); 168 ocfs2_unblock_signals(&oldset);
197 if (ret2 < 0)
198 mlog_errno(ret2);
199 if (ret) 169 if (ret)
200 ret = VM_FAULT_SIGBUS; 170 ret = VM_FAULT_SIGBUS;
201 return ret; 171 return ret;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 4cbb18f26c5f..db5dd3ed4df4 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -239,6 +239,8 @@ static int ocfs2_mknod(struct inode *dir,
239 }; 239 };
240 int did_quota_inode = 0; 240 int did_quota_inode = 0;
241 struct ocfs2_dir_lookup_result lookup = { NULL, }; 241 struct ocfs2_dir_lookup_result lookup = { NULL, };
242 sigset_t oldset;
243 int did_block_signals = 0;
242 244
243 mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode, 245 mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
244 (unsigned long)dev, dentry->d_name.len, 246 (unsigned long)dev, dentry->d_name.len,
@@ -350,6 +352,10 @@ static int ocfs2_mknod(struct inode *dir,
350 goto leave; 352 goto leave;
351 } 353 }
352 354
355 /* Starting to change things, restart is no longer possible. */
356 ocfs2_block_signals(&oldset);
357 did_block_signals = 1;
358
353 status = dquot_alloc_inode(inode); 359 status = dquot_alloc_inode(inode);
354 if (status) 360 if (status)
355 goto leave; 361 goto leave;
@@ -384,11 +390,7 @@ static int ocfs2_mknod(struct inode *dir,
384 goto leave; 390 goto leave;
385 } 391 }
386 ocfs2_add_links_count(dirfe, 1); 392 ocfs2_add_links_count(dirfe, 1);
387 status = ocfs2_journal_dirty(handle, parent_fe_bh); 393 ocfs2_journal_dirty(handle, parent_fe_bh);
388 if (status < 0) {
389 mlog_errno(status);
390 goto leave;
391 }
392 inc_nlink(dir); 394 inc_nlink(dir);
393 } 395 }
394 396
@@ -439,6 +441,8 @@ leave:
439 ocfs2_commit_trans(osb, handle); 441 ocfs2_commit_trans(osb, handle);
440 442
441 ocfs2_inode_unlock(dir, 1); 443 ocfs2_inode_unlock(dir, 1);
444 if (did_block_signals)
445 ocfs2_unblock_signals(&oldset);
442 446
443 if (status == -ENOSPC) 447 if (status == -ENOSPC)
444 mlog(0, "Disk is full\n"); 448 mlog(0, "Disk is full\n");
@@ -487,14 +491,15 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
487 int status = 0; 491 int status = 0;
488 struct ocfs2_dinode *fe = NULL; 492 struct ocfs2_dinode *fe = NULL;
489 struct ocfs2_extent_list *fel; 493 struct ocfs2_extent_list *fel;
490 u64 fe_blkno = 0; 494 u64 suballoc_loc, fe_blkno = 0;
491 u16 suballoc_bit; 495 u16 suballoc_bit;
492 u16 feat; 496 u16 feat;
493 497
494 *new_fe_bh = NULL; 498 *new_fe_bh = NULL;
495 499
496 status = ocfs2_claim_new_inode(osb, handle, dir, parent_fe_bh, 500 status = ocfs2_claim_new_inode(handle, dir, parent_fe_bh,
497 inode_ac, &suballoc_bit, &fe_blkno); 501 inode_ac, &suballoc_loc,
502 &suballoc_bit, &fe_blkno);
498 if (status < 0) { 503 if (status < 0) {
499 mlog_errno(status); 504 mlog_errno(status);
500 goto leave; 505 goto leave;
@@ -531,6 +536,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
531 fe->i_generation = cpu_to_le32(inode->i_generation); 536 fe->i_generation = cpu_to_le32(inode->i_generation);
532 fe->i_fs_generation = cpu_to_le32(osb->fs_generation); 537 fe->i_fs_generation = cpu_to_le32(osb->fs_generation);
533 fe->i_blkno = cpu_to_le64(fe_blkno); 538 fe->i_blkno = cpu_to_le64(fe_blkno);
539 fe->i_suballoc_loc = cpu_to_le64(suballoc_loc);
534 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit); 540 fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
535 fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot); 541 fe->i_suballoc_slot = cpu_to_le16(inode_ac->ac_alloc_slot);
536 fe->i_uid = cpu_to_le32(inode->i_uid); 542 fe->i_uid = cpu_to_le32(inode->i_uid);
@@ -567,11 +573,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
567 fel->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(osb->sb)); 573 fel->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(osb->sb));
568 } 574 }
569 575
570 status = ocfs2_journal_dirty(handle, *new_fe_bh); 576 ocfs2_journal_dirty(handle, *new_fe_bh);
571 if (status < 0) {
572 mlog_errno(status);
573 goto leave;
574 }
575 577
576 ocfs2_populate_inode(inode, fe, 1); 578 ocfs2_populate_inode(inode, fe, 1);
577 ocfs2_ci_set_new(osb, INODE_CACHE(inode)); 579 ocfs2_ci_set_new(osb, INODE_CACHE(inode));
@@ -637,6 +639,7 @@ static int ocfs2_link(struct dentry *old_dentry,
637 struct ocfs2_dinode *fe = NULL; 639 struct ocfs2_dinode *fe = NULL;
638 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb); 640 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
639 struct ocfs2_dir_lookup_result lookup = { NULL, }; 641 struct ocfs2_dir_lookup_result lookup = { NULL, };
642 sigset_t oldset;
640 643
641 mlog_entry("(inode=%lu, old='%.*s' new='%.*s')\n", inode->i_ino, 644 mlog_entry("(inode=%lu, old='%.*s' new='%.*s')\n", inode->i_ino,
642 old_dentry->d_name.len, old_dentry->d_name.name, 645 old_dentry->d_name.len, old_dentry->d_name.name,
@@ -693,6 +696,9 @@ static int ocfs2_link(struct dentry *old_dentry,
693 goto out_unlock_inode; 696 goto out_unlock_inode;
694 } 697 }
695 698
699 /* Starting to change things, restart is no longer possible. */
700 ocfs2_block_signals(&oldset);
701
696 err = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh, 702 err = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh,
697 OCFS2_JOURNAL_ACCESS_WRITE); 703 OCFS2_JOURNAL_ACCESS_WRITE);
698 if (err < 0) { 704 if (err < 0) {
@@ -705,14 +711,7 @@ static int ocfs2_link(struct dentry *old_dentry,
705 ocfs2_set_links_count(fe, inode->i_nlink); 711 ocfs2_set_links_count(fe, inode->i_nlink);
706 fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); 712 fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
707 fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 713 fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
708 714 ocfs2_journal_dirty(handle, fe_bh);
709 err = ocfs2_journal_dirty(handle, fe_bh);
710 if (err < 0) {
711 ocfs2_add_links_count(fe, -1);
712 drop_nlink(inode);
713 mlog_errno(err);
714 goto out_commit;
715 }
716 715
717 err = ocfs2_add_entry(handle, dentry, inode, 716 err = ocfs2_add_entry(handle, dentry, inode,
718 OCFS2_I(inode)->ip_blkno, 717 OCFS2_I(inode)->ip_blkno,
@@ -736,6 +735,7 @@ static int ocfs2_link(struct dentry *old_dentry,
736 735
737out_commit: 736out_commit:
738 ocfs2_commit_trans(osb, handle); 737 ocfs2_commit_trans(osb, handle);
738 ocfs2_unblock_signals(&oldset);
739out_unlock_inode: 739out_unlock_inode:
740 ocfs2_inode_unlock(inode, 1); 740 ocfs2_inode_unlock(inode, 1);
741 741
@@ -909,12 +909,7 @@ static int ocfs2_unlink(struct inode *dir,
909 drop_nlink(inode); 909 drop_nlink(inode);
910 drop_nlink(inode); 910 drop_nlink(inode);
911 ocfs2_set_links_count(fe, inode->i_nlink); 911 ocfs2_set_links_count(fe, inode->i_nlink);
912 912 ocfs2_journal_dirty(handle, fe_bh);
913 status = ocfs2_journal_dirty(handle, fe_bh);
914 if (status < 0) {
915 mlog_errno(status);
916 goto leave;
917 }
918 913
919 dir->i_ctime = dir->i_mtime = CURRENT_TIME; 914 dir->i_ctime = dir->i_mtime = CURRENT_TIME;
920 if (S_ISDIR(inode->i_mode)) 915 if (S_ISDIR(inode->i_mode))
@@ -1332,12 +1327,7 @@ static int ocfs2_rename(struct inode *old_dir,
1332 ocfs2_set_links_count(newfe, 0); 1327 ocfs2_set_links_count(newfe, 0);
1333 else 1328 else
1334 ocfs2_add_links_count(newfe, -1); 1329 ocfs2_add_links_count(newfe, -1);
1335 1330 ocfs2_journal_dirty(handle, newfe_bh);
1336 status = ocfs2_journal_dirty(handle, newfe_bh);
1337 if (status < 0) {
1338 mlog_errno(status);
1339 goto bail;
1340 }
1341 } else { 1331 } else {
1342 /* if the name was not found in new_dir, add it now */ 1332 /* if the name was not found in new_dir, add it now */
1343 status = ocfs2_add_entry(handle, new_dentry, old_inode, 1333 status = ocfs2_add_entry(handle, new_dentry, old_inode,
@@ -1356,10 +1346,7 @@ static int ocfs2_rename(struct inode *old_dir,
1356 1346
1357 old_di->i_ctime = cpu_to_le64(old_inode->i_ctime.tv_sec); 1347 old_di->i_ctime = cpu_to_le64(old_inode->i_ctime.tv_sec);
1358 old_di->i_ctime_nsec = cpu_to_le32(old_inode->i_ctime.tv_nsec); 1348 old_di->i_ctime_nsec = cpu_to_le32(old_inode->i_ctime.tv_nsec);
1359 1349 ocfs2_journal_dirty(handle, old_inode_bh);
1360 status = ocfs2_journal_dirty(handle, old_inode_bh);
1361 if (status < 0)
1362 mlog_errno(status);
1363 } else 1350 } else
1364 mlog_errno(status); 1351 mlog_errno(status);
1365 1352
@@ -1431,7 +1418,7 @@ static int ocfs2_rename(struct inode *old_dir,
1431 OCFS2_JOURNAL_ACCESS_WRITE); 1418 OCFS2_JOURNAL_ACCESS_WRITE);
1432 fe = (struct ocfs2_dinode *) old_dir_bh->b_data; 1419 fe = (struct ocfs2_dinode *) old_dir_bh->b_data;
1433 ocfs2_set_links_count(fe, old_dir->i_nlink); 1420 ocfs2_set_links_count(fe, old_dir->i_nlink);
1434 status = ocfs2_journal_dirty(handle, old_dir_bh); 1421 ocfs2_journal_dirty(handle, old_dir_bh);
1435 } 1422 }
1436 } 1423 }
1437 ocfs2_dentry_move(old_dentry, new_dentry, old_dir, new_dir); 1424 ocfs2_dentry_move(old_dentry, new_dentry, old_dir, new_dir);
@@ -1563,11 +1550,7 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
1563 (bytes_left > sb->s_blocksize) ? sb->s_blocksize : 1550 (bytes_left > sb->s_blocksize) ? sb->s_blocksize :
1564 bytes_left); 1551 bytes_left);
1565 1552
1566 status = ocfs2_journal_dirty(handle, bhs[virtual]); 1553 ocfs2_journal_dirty(handle, bhs[virtual]);
1567 if (status < 0) {
1568 mlog_errno(status);
1569 goto bail;
1570 }
1571 1554
1572 virtual++; 1555 virtual++;
1573 p_blkno++; 1556 p_blkno++;
@@ -1611,6 +1594,8 @@ static int ocfs2_symlink(struct inode *dir,
1611 }; 1594 };
1612 int did_quota = 0, did_quota_inode = 0; 1595 int did_quota = 0, did_quota_inode = 0;
1613 struct ocfs2_dir_lookup_result lookup = { NULL, }; 1596 struct ocfs2_dir_lookup_result lookup = { NULL, };
1597 sigset_t oldset;
1598 int did_block_signals = 0;
1614 1599
1615 mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir, 1600 mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir,
1616 dentry, symname, dentry->d_name.len, dentry->d_name.name); 1601 dentry, symname, dentry->d_name.len, dentry->d_name.name);
@@ -1706,6 +1691,10 @@ static int ocfs2_symlink(struct inode *dir,
1706 goto bail; 1691 goto bail;
1707 } 1692 }
1708 1693
1694 /* Starting to change things, restart is no longer possible. */
1695 ocfs2_block_signals(&oldset);
1696 did_block_signals = 1;
1697
1709 status = dquot_alloc_inode(inode); 1698 status = dquot_alloc_inode(inode);
1710 if (status) 1699 if (status)
1711 goto bail; 1700 goto bail;
@@ -1814,6 +1803,8 @@ bail:
1814 ocfs2_commit_trans(osb, handle); 1803 ocfs2_commit_trans(osb, handle);
1815 1804
1816 ocfs2_inode_unlock(dir, 1); 1805 ocfs2_inode_unlock(dir, 1);
1806 if (did_block_signals)
1807 ocfs2_unblock_signals(&oldset);
1817 1808
1818 brelse(new_fe_bh); 1809 brelse(new_fe_bh);
1819 brelse(parent_fe_bh); 1810 brelse(parent_fe_bh);
@@ -1961,12 +1952,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
1961 if (S_ISDIR(inode->i_mode)) 1952 if (S_ISDIR(inode->i_mode))
1962 ocfs2_add_links_count(orphan_fe, 1); 1953 ocfs2_add_links_count(orphan_fe, 1);
1963 orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe); 1954 orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe);
1964 1955 ocfs2_journal_dirty(handle, orphan_dir_bh);
1965 status = ocfs2_journal_dirty(handle, orphan_dir_bh);
1966 if (status < 0) {
1967 mlog_errno(status);
1968 goto leave;
1969 }
1970 1956
1971 status = __ocfs2_add_entry(handle, orphan_dir_inode, name, 1957 status = __ocfs2_add_entry(handle, orphan_dir_inode, name,
1972 OCFS2_ORPHAN_NAMELEN, inode, 1958 OCFS2_ORPHAN_NAMELEN, inode,
@@ -2065,12 +2051,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
2065 if (S_ISDIR(inode->i_mode)) 2051 if (S_ISDIR(inode->i_mode))
2066 ocfs2_add_links_count(orphan_fe, -1); 2052 ocfs2_add_links_count(orphan_fe, -1);
2067 orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe); 2053 orphan_dir_inode->i_nlink = ocfs2_read_links_count(orphan_fe);
2068 2054 ocfs2_journal_dirty(handle, orphan_dir_bh);
2069 status = ocfs2_journal_dirty(handle, orphan_dir_bh);
2070 if (status < 0) {
2071 mlog_errno(status);
2072 goto leave;
2073 }
2074 2055
2075leave: 2056leave:
2076 ocfs2_free_dir_lookup_result(&lookup); 2057 ocfs2_free_dir_lookup_result(&lookup);
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index adf5e2ebc2c4..c67003b6b5a2 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -47,6 +47,7 @@
47/* For struct ocfs2_blockcheck_stats */ 47/* For struct ocfs2_blockcheck_stats */
48#include "blockcheck.h" 48#include "blockcheck.h"
49 49
50#include "reservations.h"
50 51
51/* Caching of metadata buffers */ 52/* Caching of metadata buffers */
52 53
@@ -341,6 +342,9 @@ struct ocfs2_super
341 */ 342 */
342 unsigned int local_alloc_bits; 343 unsigned int local_alloc_bits;
343 unsigned int local_alloc_default_bits; 344 unsigned int local_alloc_default_bits;
345 /* osb_clusters_at_boot can become stale! Do not trust it to
346 * be up to date. */
347 unsigned int osb_clusters_at_boot;
344 348
345 enum ocfs2_local_alloc_state local_alloc_state; /* protected 349 enum ocfs2_local_alloc_state local_alloc_state; /* protected
346 * by osb_lock */ 350 * by osb_lock */
@@ -349,6 +353,11 @@ struct ocfs2_super
349 353
350 u64 la_last_gd; 354 u64 la_last_gd;
351 355
356 struct ocfs2_reservation_map osb_la_resmap;
357
358 unsigned int osb_resv_level;
359 unsigned int osb_dir_resv_level;
360
352 /* Next three fields are for local node slot recovery during 361 /* Next three fields are for local node slot recovery during
353 * mount. */ 362 * mount. */
354 int dirty; 363 int dirty;
@@ -482,6 +491,13 @@ static inline int ocfs2_supports_indexed_dirs(struct ocfs2_super *osb)
482 return 0; 491 return 0;
483} 492}
484 493
494static inline int ocfs2_supports_discontig_bg(struct ocfs2_super *osb)
495{
496 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG)
497 return 1;
498 return 0;
499}
500
485static inline unsigned int ocfs2_link_max(struct ocfs2_super *osb) 501static inline unsigned int ocfs2_link_max(struct ocfs2_super *osb)
486{ 502{
487 if (ocfs2_supports_indexed_dirs(osb)) 503 if (ocfs2_supports_indexed_dirs(osb))
@@ -763,6 +779,12 @@ static inline unsigned int ocfs2_megabytes_to_clusters(struct super_block *sb,
763 return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits); 779 return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits);
764} 780}
765 781
782static inline unsigned int ocfs2_clusters_to_megabytes(struct super_block *sb,
783 unsigned int clusters)
784{
785 return clusters >> (20 - OCFS2_SB(sb)->s_clustersize_bits);
786}
787
766static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap) 788static inline void _ocfs2_set_bit(unsigned int bit, unsigned long *bitmap)
767{ 789{
768 ext2_set_bit(bit, bitmap); 790 ext2_set_bit(bit, bitmap);
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index bb37218a7978..33f1c9a8258d 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -100,7 +100,8 @@
100 | OCFS2_FEATURE_INCOMPAT_XATTR \ 100 | OCFS2_FEATURE_INCOMPAT_XATTR \
101 | OCFS2_FEATURE_INCOMPAT_META_ECC \ 101 | OCFS2_FEATURE_INCOMPAT_META_ECC \
102 | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \ 102 | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \
103 | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE) 103 | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \
104 | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG)
104#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ 105#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
105 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ 106 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
106 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) 107 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
@@ -165,6 +166,9 @@
165/* Refcount tree support */ 166/* Refcount tree support */
166#define OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE 0x1000 167#define OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE 0x1000
167 168
169/* Discontigous block groups */
170#define OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG 0x2000
171
168/* 172/*
169 * backup superblock flag is used to indicate that this volume 173 * backup superblock flag is used to indicate that this volume
170 * has backup superblocks. 174 * has backup superblocks.
@@ -283,14 +287,6 @@
283#define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) 287#define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024)
284 288
285/* 289/*
286 * Default local alloc size (in megabytes)
287 *
288 * The value chosen should be such that most allocations, including new
289 * block groups, use local alloc.
290 */
291#define OCFS2_DEFAULT_LOCAL_ALLOC_SIZE 8
292
293/*
294 * Inline extended attribute size (in bytes) 290 * Inline extended attribute size (in bytes)
295 * The value chosen should be aligned to 16 byte boundaries. 291 * The value chosen should be aligned to 16 byte boundaries.
296 */ 292 */
@@ -512,7 +508,10 @@ struct ocfs2_extent_block
512 block group */ 508 block group */
513 __le32 h_fs_generation; /* Must match super block */ 509 __le32 h_fs_generation; /* Must match super block */
514 __le64 h_blkno; /* Offset on disk, in blocks */ 510 __le64 h_blkno; /* Offset on disk, in blocks */
515/*20*/ __le64 h_reserved3; 511/*20*/ __le64 h_suballoc_loc; /* Suballocator block group this
512 eb belongs to. Only valid
513 if allocated from a
514 discontiguous block group */
516 __le64 h_next_leaf_blk; /* Offset on disk, in blocks, 515 __le64 h_next_leaf_blk; /* Offset on disk, in blocks,
517 of next leaf header pointing 516 of next leaf header pointing
518 to data */ 517 to data */
@@ -679,7 +678,11 @@ struct ocfs2_dinode {
679/*80*/ struct ocfs2_block_check i_check; /* Error checking */ 678/*80*/ struct ocfs2_block_check i_check; /* Error checking */
680/*88*/ __le64 i_dx_root; /* Pointer to dir index root block */ 679/*88*/ __le64 i_dx_root; /* Pointer to dir index root block */
681/*90*/ __le64 i_refcount_loc; 680/*90*/ __le64 i_refcount_loc;
682 __le64 i_reserved2[4]; 681 __le64 i_suballoc_loc; /* Suballocator block group this
682 inode belongs to. Only valid
683 if allocated from a
684 discontiguous block group */
685/*A0*/ __le64 i_reserved2[3];
683/*B8*/ union { 686/*B8*/ union {
684 __le64 i_pad1; /* Generic way to refer to this 687 __le64 i_pad1; /* Generic way to refer to this
685 64bit union */ 688 64bit union */
@@ -814,7 +817,12 @@ struct ocfs2_dx_root_block {
814 __le32 dr_reserved2; 817 __le32 dr_reserved2;
815 __le64 dr_free_blk; /* Pointer to head of free 818 __le64 dr_free_blk; /* Pointer to head of free
816 * unindexed block list. */ 819 * unindexed block list. */
817 __le64 dr_reserved3[15]; 820 __le64 dr_suballoc_loc; /* Suballocator block group
821 this root belongs to.
822 Only valid if allocated
823 from a discontiguous
824 block group */
825 __le64 dr_reserved3[14];
818 union { 826 union {
819 struct ocfs2_extent_list dr_list; /* Keep this aligned to 128 827 struct ocfs2_extent_list dr_list; /* Keep this aligned to 128
820 * bits for maximum space 828 * bits for maximum space
@@ -840,6 +848,13 @@ struct ocfs2_dx_leaf {
840}; 848};
841 849
842/* 850/*
851 * Largest bitmap for a block (suballocator) group in bytes. This limit
852 * does not affect cluster groups (global allocator). Cluster group
853 * bitmaps run to the end of the block.
854 */
855#define OCFS2_MAX_BG_BITMAP_SIZE 256
856
857/*
843 * On disk allocator group structure for OCFS2 858 * On disk allocator group structure for OCFS2
844 */ 859 */
845struct ocfs2_group_desc 860struct ocfs2_group_desc
@@ -860,7 +875,29 @@ struct ocfs2_group_desc
860 __le64 bg_blkno; /* Offset on disk, in blocks */ 875 __le64 bg_blkno; /* Offset on disk, in blocks */
861/*30*/ struct ocfs2_block_check bg_check; /* Error checking */ 876/*30*/ struct ocfs2_block_check bg_check; /* Error checking */
862 __le64 bg_reserved2; 877 __le64 bg_reserved2;
863/*40*/ __u8 bg_bitmap[0]; 878/*40*/ union {
879 __u8 bg_bitmap[0];
880 struct {
881 /*
882 * Block groups may be discontiguous when
883 * OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG is set.
884 * The extents of a discontigous block group are
885 * stored in bg_list. It is a flat list.
886 * l_tree_depth must always be zero. A
887 * discontiguous group is signified by a non-zero
888 * bg_list->l_next_free_rec. Only block groups
889 * can be discontiguous; Cluster groups cannot.
890 * We've never made a block group with more than
891 * 2048 blocks (256 bytes of bg_bitmap). This
892 * codifies that limit so that we can fit bg_list.
893 * bg_size of a discontiguous block group will
894 * be 256 to match bg_bitmap_filler.
895 */
896 __u8 bg_bitmap_filler[OCFS2_MAX_BG_BITMAP_SIZE];
897/*140*/ struct ocfs2_extent_list bg_list;
898 };
899 };
900/* Actual on-disk size is one block */
864}; 901};
865 902
866struct ocfs2_refcount_rec { 903struct ocfs2_refcount_rec {
@@ -905,7 +942,11 @@ struct ocfs2_refcount_block {
905/*40*/ __le32 rf_generation; /* generation number. all be the same 942/*40*/ __le32 rf_generation; /* generation number. all be the same
906 * for the same refcount tree. */ 943 * for the same refcount tree. */
907 __le32 rf_reserved0; 944 __le32 rf_reserved0;
908 __le64 rf_reserved1[7]; 945 __le64 rf_suballoc_loc; /* Suballocator block group this
946 refcount block belongs to. Only
947 valid if allocated from a
948 discontiguous block group */
949/*50*/ __le64 rf_reserved1[6];
909/*80*/ union { 950/*80*/ union {
910 struct ocfs2_refcount_list rf_records; /* List of refcount 951 struct ocfs2_refcount_list rf_records; /* List of refcount
911 records */ 952 records */
@@ -1017,7 +1058,10 @@ struct ocfs2_xattr_block {
1017 real xattr or a xattr tree. */ 1058 real xattr or a xattr tree. */
1018 __le16 xb_reserved0; 1059 __le16 xb_reserved0;
1019 __le32 xb_reserved1; 1060 __le32 xb_reserved1;
1020 __le64 xb_reserved2; 1061 __le64 xb_suballoc_loc; /* Suballocator block group this
1062 xattr block belongs to. Only
1063 valid if allocated from a
1064 discontiguous block group */
1021/*30*/ union { 1065/*30*/ union {
1022 struct ocfs2_xattr_header xb_header; /* xattr header if this 1066 struct ocfs2_xattr_header xb_header; /* xattr header if this
1023 block contains xattr */ 1067 block contains xattr */
@@ -1254,6 +1298,16 @@ static inline u16 ocfs2_extent_recs_per_eb(struct super_block *sb)
1254 return size / sizeof(struct ocfs2_extent_rec); 1298 return size / sizeof(struct ocfs2_extent_rec);
1255} 1299}
1256 1300
1301static inline u16 ocfs2_extent_recs_per_gd(struct super_block *sb)
1302{
1303 int size;
1304
1305 size = sb->s_blocksize -
1306 offsetof(struct ocfs2_group_desc, bg_list.l_recs);
1307
1308 return size / sizeof(struct ocfs2_extent_rec);
1309}
1310
1257static inline int ocfs2_dx_entries_per_leaf(struct super_block *sb) 1311static inline int ocfs2_dx_entries_per_leaf(struct super_block *sb)
1258{ 1312{
1259 int size; 1313 int size;
@@ -1284,13 +1338,23 @@ static inline u16 ocfs2_local_alloc_size(struct super_block *sb)
1284 return size; 1338 return size;
1285} 1339}
1286 1340
1287static inline int ocfs2_group_bitmap_size(struct super_block *sb) 1341static inline int ocfs2_group_bitmap_size(struct super_block *sb,
1342 int suballocator,
1343 u32 feature_incompat)
1288{ 1344{
1289 int size; 1345 int size = sb->s_blocksize -
1290
1291 size = sb->s_blocksize -
1292 offsetof(struct ocfs2_group_desc, bg_bitmap); 1346 offsetof(struct ocfs2_group_desc, bg_bitmap);
1293 1347
1348 /*
1349 * The cluster allocator uses the entire block. Suballocators have
1350 * never used more than OCFS2_MAX_BG_BITMAP_SIZE. Unfortunately, older
1351 * code expects bg_size set to the maximum. Thus we must keep
1352 * bg_size as-is unless discontig_bg is enabled.
1353 */
1354 if (suballocator &&
1355 (feature_incompat & OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG))
1356 size = OCFS2_MAX_BG_BITMAP_SIZE;
1357
1294 return size; 1358 return size;
1295} 1359}
1296 1360
@@ -1402,23 +1466,43 @@ static inline int ocfs2_extent_recs_per_eb(int blocksize)
1402 return size / sizeof(struct ocfs2_extent_rec); 1466 return size / sizeof(struct ocfs2_extent_rec);
1403} 1467}
1404 1468
1405static inline int ocfs2_local_alloc_size(int blocksize) 1469static inline int ocfs2_extent_recs_per_gd(int blocksize)
1406{ 1470{
1407 int size; 1471 int size;
1408 1472
1409 size = blocksize - 1473 size = blocksize -
1410 offsetof(struct ocfs2_dinode, id2.i_lab.la_bitmap); 1474 offsetof(struct ocfs2_group_desc, bg_list.l_recs);
1411 1475
1412 return size; 1476 return size / sizeof(struct ocfs2_extent_rec);
1413} 1477}
1414 1478
1415static inline int ocfs2_group_bitmap_size(int blocksize) 1479static inline int ocfs2_local_alloc_size(int blocksize)
1416{ 1480{
1417 int size; 1481 int size;
1418 1482
1419 size = blocksize - 1483 size = blocksize -
1484 offsetof(struct ocfs2_dinode, id2.i_lab.la_bitmap);
1485
1486 return size;
1487}
1488
1489static inline int ocfs2_group_bitmap_size(int blocksize,
1490 int suballocator,
1491 uint32_t feature_incompat)
1492{
1493 int size = sb->s_blocksize -
1420 offsetof(struct ocfs2_group_desc, bg_bitmap); 1494 offsetof(struct ocfs2_group_desc, bg_bitmap);
1421 1495
1496 /*
1497 * The cluster allocator uses the entire block. Suballocators have
1498 * never used more than OCFS2_MAX_BG_BITMAP_SIZE. Unfortunately, older
1499 * code expects bg_size set to the maximum. Thus we must keep
1500 * bg_size as-is unless discontig_bg is enabled.
1501 */
1502 if (suballocator &&
1503 (feature_incompat & OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG))
1504 size = OCFS2_MAX_BG_BITMAP_SIZE;
1505
1422 return size; 1506 return size;
1423} 1507}
1424 1508
@@ -1491,5 +1575,19 @@ static inline void ocfs2_set_de_type(struct ocfs2_dir_entry *de,
1491 de->file_type = ocfs2_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; 1575 de->file_type = ocfs2_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
1492} 1576}
1493 1577
1578static inline int ocfs2_gd_is_discontig(struct ocfs2_group_desc *gd)
1579{
1580 if ((offsetof(struct ocfs2_group_desc, bg_bitmap) +
1581 le16_to_cpu(gd->bg_size)) !=
1582 offsetof(struct ocfs2_group_desc, bg_list))
1583 return 0;
1584 /*
1585 * Only valid to check l_next_free_rec if
1586 * bg_bitmap + bg_size == bg_list.
1587 */
1588 if (!gd->bg_list.l_next_free_rec)
1589 return 0;
1590 return 1;
1591}
1494#endif /* _OCFS2_FS_H */ 1592#endif /* _OCFS2_FS_H */
1495 1593
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index ab42a74c7539..04ae76d8c6ab 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -261,10 +261,8 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
261 brelse(bh); 261 brelse(bh);
262 goto out; 262 goto out;
263 } 263 }
264 err = ocfs2_journal_dirty(handle, bh); 264 ocfs2_journal_dirty(handle, bh);
265 brelse(bh); 265 brelse(bh);
266 if (err < 0)
267 goto out;
268out: 266out:
269 if (err) { 267 if (err) {
270 mutex_unlock(&gqinode->i_mutex); 268 mutex_unlock(&gqinode->i_mutex);
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 9ad49305f450..884b641f199e 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -119,12 +119,8 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
119 lock_buffer(bh); 119 lock_buffer(bh);
120 modify(bh, private); 120 modify(bh, private);
121 unlock_buffer(bh); 121 unlock_buffer(bh);
122 status = ocfs2_journal_dirty(handle, bh); 122 ocfs2_journal_dirty(handle, bh);
123 if (status < 0) { 123
124 mlog_errno(status);
125 ocfs2_commit_trans(OCFS2_SB(sb), handle);
126 return status;
127 }
128 status = ocfs2_commit_trans(OCFS2_SB(sb), handle); 124 status = ocfs2_commit_trans(OCFS2_SB(sb), handle);
129 if (status < 0) { 125 if (status < 0) {
130 mlog_errno(status); 126 mlog_errno(status);
@@ -523,9 +519,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
523 ocfs2_clear_bit(bit, dchunk->dqc_bitmap); 519 ocfs2_clear_bit(bit, dchunk->dqc_bitmap);
524 le32_add_cpu(&dchunk->dqc_free, 1); 520 le32_add_cpu(&dchunk->dqc_free, 1);
525 unlock_buffer(qbh); 521 unlock_buffer(qbh);
526 status = ocfs2_journal_dirty(handle, qbh); 522 ocfs2_journal_dirty(handle, qbh);
527 if (status < 0)
528 mlog_errno(status);
529out_commit: 523out_commit:
530 mutex_unlock(&sb_dqopt(sb)->dqio_mutex); 524 mutex_unlock(&sb_dqopt(sb)->dqio_mutex);
531 ocfs2_commit_trans(OCFS2_SB(sb), handle); 525 ocfs2_commit_trans(OCFS2_SB(sb), handle);
@@ -631,9 +625,7 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
631 lock_buffer(bh); 625 lock_buffer(bh);
632 ldinfo->dqi_flags = cpu_to_le32(flags | OLQF_CLEAN); 626 ldinfo->dqi_flags = cpu_to_le32(flags | OLQF_CLEAN);
633 unlock_buffer(bh); 627 unlock_buffer(bh);
634 status = ocfs2_journal_dirty(handle, bh); 628 ocfs2_journal_dirty(handle, bh);
635 if (status < 0)
636 mlog_errno(status);
637out_trans: 629out_trans:
638 ocfs2_commit_trans(osb, handle); 630 ocfs2_commit_trans(osb, handle);
639out_bh: 631out_bh:
@@ -1009,11 +1001,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
1009 sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) - 1001 sb->s_blocksize - sizeof(struct ocfs2_local_disk_chunk) -
1010 OCFS2_QBLK_RESERVED_SPACE); 1002 OCFS2_QBLK_RESERVED_SPACE);
1011 unlock_buffer(bh); 1003 unlock_buffer(bh);
1012 status = ocfs2_journal_dirty(handle, bh); 1004 ocfs2_journal_dirty(handle, bh);
1013 if (status < 0) {
1014 mlog_errno(status);
1015 goto out_trans;
1016 }
1017 1005
1018 /* Initialize new block with structures */ 1006 /* Initialize new block with structures */
1019 down_read(&OCFS2_I(lqinode)->ip_alloc_sem); 1007 down_read(&OCFS2_I(lqinode)->ip_alloc_sem);
@@ -1040,11 +1028,7 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
1040 lock_buffer(dbh); 1028 lock_buffer(dbh);
1041 memset(dbh->b_data, 0, sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE); 1029 memset(dbh->b_data, 0, sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE);
1042 unlock_buffer(dbh); 1030 unlock_buffer(dbh);
1043 status = ocfs2_journal_dirty(handle, dbh); 1031 ocfs2_journal_dirty(handle, dbh);
1044 if (status < 0) {
1045 mlog_errno(status);
1046 goto out_trans;
1047 }
1048 1032
1049 /* Update local quotafile info */ 1033 /* Update local quotafile info */
1050 oinfo->dqi_blocks += 2; 1034 oinfo->dqi_blocks += 2;
@@ -1155,11 +1139,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1155 lock_buffer(bh); 1139 lock_buffer(bh);
1156 memset(bh->b_data, 0, sb->s_blocksize); 1140 memset(bh->b_data, 0, sb->s_blocksize);
1157 unlock_buffer(bh); 1141 unlock_buffer(bh);
1158 status = ocfs2_journal_dirty(handle, bh); 1142 ocfs2_journal_dirty(handle, bh);
1159 if (status < 0) { 1143
1160 mlog_errno(status);
1161 goto out_trans;
1162 }
1163 /* Update chunk header */ 1144 /* Update chunk header */
1164 status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode), 1145 status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode),
1165 chunk->qc_headerbh, 1146 chunk->qc_headerbh,
@@ -1173,11 +1154,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1173 lock_buffer(chunk->qc_headerbh); 1154 lock_buffer(chunk->qc_headerbh);
1174 le32_add_cpu(&dchunk->dqc_free, ol_quota_entries_per_block(sb)); 1155 le32_add_cpu(&dchunk->dqc_free, ol_quota_entries_per_block(sb));
1175 unlock_buffer(chunk->qc_headerbh); 1156 unlock_buffer(chunk->qc_headerbh);
1176 status = ocfs2_journal_dirty(handle, chunk->qc_headerbh); 1157 ocfs2_journal_dirty(handle, chunk->qc_headerbh);
1177 if (status < 0) { 1158
1178 mlog_errno(status);
1179 goto out_trans;
1180 }
1181 /* Update file header */ 1159 /* Update file header */
1182 oinfo->dqi_blocks++; 1160 oinfo->dqi_blocks++;
1183 status = ocfs2_local_write_info(sb, type); 1161 status = ocfs2_local_write_info(sb, type);
@@ -1312,12 +1290,8 @@ static int ocfs2_local_release_dquot(struct dquot *dquot)
1312 ocfs2_clear_bit(offset, dchunk->dqc_bitmap); 1290 ocfs2_clear_bit(offset, dchunk->dqc_bitmap);
1313 le32_add_cpu(&dchunk->dqc_free, 1); 1291 le32_add_cpu(&dchunk->dqc_free, 1);
1314 unlock_buffer(od->dq_chunk->qc_headerbh); 1292 unlock_buffer(od->dq_chunk->qc_headerbh);
1315 status = ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh); 1293 ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh);
1316 if (status < 0) { 1294
1317 mlog_errno(status);
1318 goto out;
1319 }
1320 status = 0;
1321out: 1295out:
1322 /* Clear the read bit so that next time someone uses this 1296 /* Clear the read bit so that next time someone uses this
1323 * dquot he reads fresh info from disk and allocates local 1297 * dquot he reads fresh info from disk and allocates local
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 5cbcd0f008fc..4793f36f6518 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -570,7 +570,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
570 struct ocfs2_refcount_tree *new_tree = NULL, *tree = NULL; 570 struct ocfs2_refcount_tree *new_tree = NULL, *tree = NULL;
571 u16 suballoc_bit_start; 571 u16 suballoc_bit_start;
572 u32 num_got; 572 u32 num_got;
573 u64 first_blkno; 573 u64 suballoc_loc, first_blkno;
574 574
575 BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL); 575 BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
576 576
@@ -596,7 +596,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
596 goto out_commit; 596 goto out_commit;
597 } 597 }
598 598
599 ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, 599 ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
600 &suballoc_bit_start, &num_got, 600 &suballoc_bit_start, &num_got,
601 &first_blkno); 601 &first_blkno);
602 if (ret) { 602 if (ret) {
@@ -626,6 +626,7 @@ static int ocfs2_create_refcount_tree(struct inode *inode,
626 memset(rb, 0, inode->i_sb->s_blocksize); 626 memset(rb, 0, inode->i_sb->s_blocksize);
627 strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); 627 strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
628 rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); 628 rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
629 rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc);
629 rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); 630 rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
630 rb->rf_fs_generation = cpu_to_le32(osb->fs_generation); 631 rb->rf_fs_generation = cpu_to_le32(osb->fs_generation);
631 rb->rf_blkno = cpu_to_le64(first_blkno); 632 rb->rf_blkno = cpu_to_le64(first_blkno);
@@ -790,7 +791,10 @@ int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh)
790 if (le32_to_cpu(rb->rf_count) == 1) { 791 if (le32_to_cpu(rb->rf_count) == 1) {
791 blk = le64_to_cpu(rb->rf_blkno); 792 blk = le64_to_cpu(rb->rf_blkno);
792 bit = le16_to_cpu(rb->rf_suballoc_bit); 793 bit = le16_to_cpu(rb->rf_suballoc_bit);
793 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 794 if (rb->rf_suballoc_loc)
795 bg_blkno = le64_to_cpu(rb->rf_suballoc_loc);
796 else
797 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
794 798
795 alloc_inode = ocfs2_get_system_file_inode(osb, 799 alloc_inode = ocfs2_get_system_file_inode(osb,
796 EXTENT_ALLOC_SYSTEM_INODE, 800 EXTENT_ALLOC_SYSTEM_INODE,
@@ -1268,9 +1272,7 @@ static int ocfs2_change_refcount_rec(handle_t *handle,
1268 } else if (merge) 1272 } else if (merge)
1269 ocfs2_refcount_rec_merge(rb, index); 1273 ocfs2_refcount_rec_merge(rb, index);
1270 1274
1271 ret = ocfs2_journal_dirty(handle, ref_leaf_bh); 1275 ocfs2_journal_dirty(handle, ref_leaf_bh);
1272 if (ret)
1273 mlog_errno(ret);
1274out: 1276out:
1275 return ret; 1277 return ret;
1276} 1278}
@@ -1284,7 +1286,7 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle,
1284 int ret; 1286 int ret;
1285 u16 suballoc_bit_start; 1287 u16 suballoc_bit_start;
1286 u32 num_got; 1288 u32 num_got;
1287 u64 blkno; 1289 u64 suballoc_loc, blkno;
1288 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 1290 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
1289 struct buffer_head *new_bh = NULL; 1291 struct buffer_head *new_bh = NULL;
1290 struct ocfs2_refcount_block *new_rb; 1292 struct ocfs2_refcount_block *new_rb;
@@ -1298,7 +1300,7 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle,
1298 goto out; 1300 goto out;
1299 } 1301 }
1300 1302
1301 ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1, 1303 ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
1302 &suballoc_bit_start, &num_got, 1304 &suballoc_bit_start, &num_got,
1303 &blkno); 1305 &blkno);
1304 if (ret) { 1306 if (ret) {
@@ -1330,6 +1332,7 @@ static int ocfs2_expand_inline_ref_root(handle_t *handle,
1330 1332
1331 new_rb = (struct ocfs2_refcount_block *)new_bh->b_data; 1333 new_rb = (struct ocfs2_refcount_block *)new_bh->b_data;
1332 new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); 1334 new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
1335 new_rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc);
1333 new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); 1336 new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
1334 new_rb->rf_blkno = cpu_to_le64(blkno); 1337 new_rb->rf_blkno = cpu_to_le64(blkno);
1335 new_rb->rf_cpos = cpu_to_le32(0); 1338 new_rb->rf_cpos = cpu_to_le32(0);
@@ -1524,7 +1527,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle,
1524 int ret; 1527 int ret;
1525 u16 suballoc_bit_start; 1528 u16 suballoc_bit_start;
1526 u32 num_got, new_cpos; 1529 u32 num_got, new_cpos;
1527 u64 blkno; 1530 u64 suballoc_loc, blkno;
1528 struct super_block *sb = ocfs2_metadata_cache_get_super(ci); 1531 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
1529 struct ocfs2_refcount_block *root_rb = 1532 struct ocfs2_refcount_block *root_rb =
1530 (struct ocfs2_refcount_block *)ref_root_bh->b_data; 1533 (struct ocfs2_refcount_block *)ref_root_bh->b_data;
@@ -1548,7 +1551,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle,
1548 goto out; 1551 goto out;
1549 } 1552 }
1550 1553
1551 ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1, 1554 ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
1552 &suballoc_bit_start, &num_got, 1555 &suballoc_bit_start, &num_got,
1553 &blkno); 1556 &blkno);
1554 if (ret) { 1557 if (ret) {
@@ -1576,6 +1579,7 @@ static int ocfs2_new_leaf_refcount_block(handle_t *handle,
1576 memset(new_rb, 0, sb->s_blocksize); 1579 memset(new_rb, 0, sb->s_blocksize);
1577 strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE); 1580 strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
1578 new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot); 1581 new_rb->rf_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
1582 new_rb->rf_suballoc_loc = cpu_to_le64(suballoc_loc);
1579 new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start); 1583 new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
1580 new_rb->rf_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation); 1584 new_rb->rf_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
1581 new_rb->rf_blkno = cpu_to_le64(blkno); 1585 new_rb->rf_blkno = cpu_to_le64(blkno);
@@ -1694,7 +1698,7 @@ static int ocfs2_adjust_refcount_rec(handle_t *handle,
1694 * 2 more credits, one for the leaf refcount block, one for 1698 * 2 more credits, one for the leaf refcount block, one for
1695 * the extent block contains the extent rec. 1699 * the extent block contains the extent rec.
1696 */ 1700 */
1697 ret = ocfs2_extend_trans(handle, handle->h_buffer_credits + 2); 1701 ret = ocfs2_extend_trans(handle, 2);
1698 if (ret < 0) { 1702 if (ret < 0) {
1699 mlog_errno(ret); 1703 mlog_errno(ret);
1700 goto out; 1704 goto out;
@@ -1802,11 +1806,7 @@ static int ocfs2_insert_refcount_rec(handle_t *handle,
1802 if (merge) 1806 if (merge)
1803 ocfs2_refcount_rec_merge(rb, index); 1807 ocfs2_refcount_rec_merge(rb, index);
1804 1808
1805 ret = ocfs2_journal_dirty(handle, ref_leaf_bh); 1809 ocfs2_journal_dirty(handle, ref_leaf_bh);
1806 if (ret) {
1807 mlog_errno(ret);
1808 goto out;
1809 }
1810 1810
1811 if (index == 0) { 1811 if (index == 0) {
1812 ret = ocfs2_adjust_refcount_rec(handle, ci, 1812 ret = ocfs2_adjust_refcount_rec(handle, ci,
@@ -1977,9 +1977,7 @@ static int ocfs2_split_refcount_rec(handle_t *handle,
1977 ocfs2_refcount_rec_merge(rb, index); 1977 ocfs2_refcount_rec_merge(rb, index);
1978 } 1978 }
1979 1979
1980 ret = ocfs2_journal_dirty(handle, ref_leaf_bh); 1980 ocfs2_journal_dirty(handle, ref_leaf_bh);
1981 if (ret)
1982 mlog_errno(ret);
1983 1981
1984out: 1982out:
1985 brelse(new_bh); 1983 brelse(new_bh);
@@ -2112,6 +2110,7 @@ static int ocfs2_remove_refcount_extent(handle_t *handle,
2112 */ 2110 */
2113 ret = ocfs2_cache_block_dealloc(dealloc, EXTENT_ALLOC_SYSTEM_INODE, 2111 ret = ocfs2_cache_block_dealloc(dealloc, EXTENT_ALLOC_SYSTEM_INODE,
2114 le16_to_cpu(rb->rf_suballoc_slot), 2112 le16_to_cpu(rb->rf_suballoc_slot),
2113 le64_to_cpu(rb->rf_suballoc_loc),
2115 le64_to_cpu(rb->rf_blkno), 2114 le64_to_cpu(rb->rf_blkno),
2116 le16_to_cpu(rb->rf_suballoc_bit)); 2115 le16_to_cpu(rb->rf_suballoc_bit));
2117 if (ret) { 2116 if (ret) {
@@ -2516,20 +2515,19 @@ out:
2516 * 2515 *
2517 * Normally the refcount blocks store these refcount should be 2516 * Normally the refcount blocks store these refcount should be
2518 * contiguous also, so that we can get the number easily. 2517 * contiguous also, so that we can get the number easily.
2519 * As for meta_ac, we will at most add split 2 refcount record and 2518 * We will at most add split 2 refcount records and 2 more
2520 * 2 more refcount block, so just check it in a rough way. 2519 * refcount blocks, so just check it in a rough way.
2521 * 2520 *
2522 * Caller must hold refcount tree lock. 2521 * Caller must hold refcount tree lock.
2523 */ 2522 */
2524int ocfs2_prepare_refcount_change_for_del(struct inode *inode, 2523int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
2525 struct buffer_head *di_bh, 2524 u64 refcount_loc,
2526 u64 phys_blkno, 2525 u64 phys_blkno,
2527 u32 clusters, 2526 u32 clusters,
2528 int *credits, 2527 int *credits,
2529 struct ocfs2_alloc_context **meta_ac) 2528 int *ref_blocks)
2530{ 2529{
2531 int ret, ref_blocks = 0; 2530 int ret;
2532 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2533 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2531 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2534 struct buffer_head *ref_root_bh = NULL; 2532 struct buffer_head *ref_root_bh = NULL;
2535 struct ocfs2_refcount_tree *tree; 2533 struct ocfs2_refcount_tree *tree;
@@ -2546,14 +2544,13 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
2546 BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); 2544 BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
2547 2545
2548 ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb), 2546 ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb),
2549 le64_to_cpu(di->i_refcount_loc), &tree); 2547 refcount_loc, &tree);
2550 if (ret) { 2548 if (ret) {
2551 mlog_errno(ret); 2549 mlog_errno(ret);
2552 goto out; 2550 goto out;
2553 } 2551 }
2554 2552
2555 ret = ocfs2_read_refcount_block(&tree->rf_ci, 2553 ret = ocfs2_read_refcount_block(&tree->rf_ci, refcount_loc,
2556 le64_to_cpu(di->i_refcount_loc),
2557 &ref_root_bh); 2554 &ref_root_bh);
2558 if (ret) { 2555 if (ret) {
2559 mlog_errno(ret); 2556 mlog_errno(ret);
@@ -2564,21 +2561,14 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
2564 &tree->rf_ci, 2561 &tree->rf_ci,
2565 ref_root_bh, 2562 ref_root_bh,
2566 start_cpos, clusters, 2563 start_cpos, clusters,
2567 &ref_blocks, credits); 2564 ref_blocks, credits);
2568 if (ret) { 2565 if (ret) {
2569 mlog_errno(ret); 2566 mlog_errno(ret);
2570 goto out; 2567 goto out;
2571 } 2568 }
2572 2569
2573 mlog(0, "reserve new metadata %d, credits = %d\n", 2570 mlog(0, "reserve new metadata %d blocks, credits = %d\n",
2574 ref_blocks, *credits); 2571 *ref_blocks, *credits);
2575
2576 if (ref_blocks) {
2577 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
2578 ref_blocks, meta_ac);
2579 if (ret)
2580 mlog_errno(ret);
2581 }
2582 2572
2583out: 2573out:
2584 brelse(ref_root_bh); 2574 brelse(ref_root_bh);
@@ -3040,11 +3030,7 @@ static int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
3040 } 3030 }
3041 3031
3042 memcpy(new_bh->b_data, old_bh->b_data, sb->s_blocksize); 3032 memcpy(new_bh->b_data, old_bh->b_data, sb->s_blocksize);
3043 ret = ocfs2_journal_dirty(handle, new_bh); 3033 ocfs2_journal_dirty(handle, new_bh);
3044 if (ret) {
3045 mlog_errno(ret);
3046 break;
3047 }
3048 3034
3049 brelse(new_bh); 3035 brelse(new_bh);
3050 brelse(old_bh); 3036 brelse(old_bh);
@@ -3282,7 +3268,7 @@ static int ocfs2_make_clusters_writable(struct super_block *sb,
3282 } else { 3268 } else {
3283 delete = 1; 3269 delete = 1;
3284 3270
3285 ret = __ocfs2_claim_clusters(osb, handle, 3271 ret = __ocfs2_claim_clusters(handle,
3286 context->data_ac, 3272 context->data_ac,
3287 1, set_len, 3273 1, set_len,
3288 &new_bit, &new_len); 3274 &new_bit, &new_len);
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index c1d19b1d3ecc..9983ba1570e2 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -47,11 +47,11 @@ int ocfs2_decrease_refcount(struct inode *inode,
47 struct ocfs2_cached_dealloc_ctxt *dealloc, 47 struct ocfs2_cached_dealloc_ctxt *dealloc,
48 int delete); 48 int delete);
49int ocfs2_prepare_refcount_change_for_del(struct inode *inode, 49int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
50 struct buffer_head *di_bh, 50 u64 refcount_loc,
51 u64 phys_blkno, 51 u64 phys_blkno,
52 u32 clusters, 52 u32 clusters,
53 int *credits, 53 int *credits,
54 struct ocfs2_alloc_context **meta_ac); 54 int *ref_blocks);
55int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh, 55int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh,
56 u32 cpos, u32 write_len, u32 max_cpos); 56 u32 cpos, u32 write_len, u32 max_cpos);
57 57
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c
new file mode 100644
index 000000000000..40650021fc24
--- /dev/null
+++ b/fs/ocfs2/reservations.c
@@ -0,0 +1,847 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * reservations.c
5 *
6 * Allocation reservations implementation
7 *
8 * Some code borrowed from fs/ext3/balloc.c and is:
9 *
10 * Copyright (C) 1992, 1993, 1994, 1995
11 * Remy Card (card@masi.ibp.fr)
12 * Laboratoire MASI - Institut Blaise Pascal
13 * Universite Pierre et Marie Curie (Paris VI)
14 *
15 * The rest is copyright (C) 2010 Novell. All rights reserved.
16 *
17 * This program is free software; you can redistribute it and/or
18 * modify it under the terms of the GNU General Public
19 * License version 2 as published by the Free Software Foundation.
20 *
21 * This program is distributed in the hope that it will be useful,
22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
24 * General Public License for more details.
25 */
26
27#include <linux/fs.h>
28#include <linux/types.h>
29#include <linux/slab.h>
30#include <linux/highmem.h>
31#include <linux/bitops.h>
32#include <linux/list.h>
33
34#define MLOG_MASK_PREFIX ML_RESERVATIONS
35#include <cluster/masklog.h>
36
37#include "ocfs2.h"
38
39#ifdef CONFIG_OCFS2_DEBUG_FS
40#define OCFS2_CHECK_RESERVATIONS
41#endif
42
43DEFINE_SPINLOCK(resv_lock);
44
45#define OCFS2_MIN_RESV_WINDOW_BITS 8
46#define OCFS2_MAX_RESV_WINDOW_BITS 1024
47
48int ocfs2_dir_resv_allowed(struct ocfs2_super *osb)
49{
50 return (osb->osb_resv_level && osb->osb_dir_resv_level);
51}
52
53static unsigned int ocfs2_resv_window_bits(struct ocfs2_reservation_map *resmap,
54 struct ocfs2_alloc_reservation *resv)
55{
56 struct ocfs2_super *osb = resmap->m_osb;
57 unsigned int bits;
58
59 if (!(resv->r_flags & OCFS2_RESV_FLAG_DIR)) {
60 /* 8, 16, 32, 64, 128, 256, 512, 1024 */
61 bits = 4 << osb->osb_resv_level;
62 } else {
63 bits = 4 << osb->osb_dir_resv_level;
64 }
65 return bits;
66}
67
68static inline unsigned int ocfs2_resv_end(struct ocfs2_alloc_reservation *resv)
69{
70 if (resv->r_len)
71 return resv->r_start + resv->r_len - 1;
72 return resv->r_start;
73}
74
75static inline int ocfs2_resv_empty(struct ocfs2_alloc_reservation *resv)
76{
77 return !!(resv->r_len == 0);
78}
79
80static inline int ocfs2_resmap_disabled(struct ocfs2_reservation_map *resmap)
81{
82 if (resmap->m_osb->osb_resv_level == 0)
83 return 1;
84 return 0;
85}
86
87static void ocfs2_dump_resv(struct ocfs2_reservation_map *resmap)
88{
89 struct ocfs2_super *osb = resmap->m_osb;
90 struct rb_node *node;
91 struct ocfs2_alloc_reservation *resv;
92 int i = 0;
93
94 mlog(ML_NOTICE, "Dumping resmap for device %s. Bitmap length: %u\n",
95 osb->dev_str, resmap->m_bitmap_len);
96
97 node = rb_first(&resmap->m_reservations);
98 while (node) {
99 resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node);
100
101 mlog(ML_NOTICE, "start: %u\tend: %u\tlen: %u\tlast_start: %u"
102 "\tlast_len: %u\n", resv->r_start,
103 ocfs2_resv_end(resv), resv->r_len, resv->r_last_start,
104 resv->r_last_len);
105
106 node = rb_next(node);
107 i++;
108 }
109
110 mlog(ML_NOTICE, "%d reservations found. LRU follows\n", i);
111
112 i = 0;
113 list_for_each_entry(resv, &resmap->m_lru, r_lru) {
114 mlog(ML_NOTICE, "LRU(%d) start: %u\tend: %u\tlen: %u\t"
115 "last_start: %u\tlast_len: %u\n", i, resv->r_start,
116 ocfs2_resv_end(resv), resv->r_len, resv->r_last_start,
117 resv->r_last_len);
118
119 i++;
120 }
121}
122
123#ifdef OCFS2_CHECK_RESERVATIONS
124static int ocfs2_validate_resmap_bits(struct ocfs2_reservation_map *resmap,
125 int i,
126 struct ocfs2_alloc_reservation *resv)
127{
128 char *disk_bitmap = resmap->m_disk_bitmap;
129 unsigned int start = resv->r_start;
130 unsigned int end = ocfs2_resv_end(resv);
131
132 while (start <= end) {
133 if (ocfs2_test_bit(start, disk_bitmap)) {
134 mlog(ML_ERROR,
135 "reservation %d covers an allocated area "
136 "starting at bit %u!\n", i, start);
137 return 1;
138 }
139
140 start++;
141 }
142 return 0;
143}
144
145static void ocfs2_check_resmap(struct ocfs2_reservation_map *resmap)
146{
147 unsigned int off = 0;
148 int i = 0;
149 struct rb_node *node;
150 struct ocfs2_alloc_reservation *resv;
151
152 node = rb_first(&resmap->m_reservations);
153 while (node) {
154 resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node);
155
156 if (i > 0 && resv->r_start <= off) {
157 mlog(ML_ERROR, "reservation %d has bad start off!\n",
158 i);
159 goto bad;
160 }
161
162 if (resv->r_len == 0) {
163 mlog(ML_ERROR, "reservation %d has no length!\n",
164 i);
165 goto bad;
166 }
167
168 if (resv->r_start > ocfs2_resv_end(resv)) {
169 mlog(ML_ERROR, "reservation %d has invalid range!\n",
170 i);
171 goto bad;
172 }
173
174 if (ocfs2_resv_end(resv) >= resmap->m_bitmap_len) {
175 mlog(ML_ERROR, "reservation %d extends past bitmap!\n",
176 i);
177 goto bad;
178 }
179
180 if (ocfs2_validate_resmap_bits(resmap, i, resv))
181 goto bad;
182
183 off = ocfs2_resv_end(resv);
184 node = rb_next(node);
185
186 i++;
187 }
188 return;
189
190bad:
191 ocfs2_dump_resv(resmap);
192 BUG();
193}
194#else
195static inline void ocfs2_check_resmap(struct ocfs2_reservation_map *resmap)
196{
197
198}
199#endif
200
201void ocfs2_resv_init_once(struct ocfs2_alloc_reservation *resv)
202{
203 memset(resv, 0, sizeof(*resv));
204 INIT_LIST_HEAD(&resv->r_lru);
205}
206
207void ocfs2_resv_set_type(struct ocfs2_alloc_reservation *resv,
208 unsigned int flags)
209{
210 BUG_ON(flags & ~OCFS2_RESV_TYPES);
211
212 resv->r_flags |= flags;
213}
214
215int ocfs2_resmap_init(struct ocfs2_super *osb,
216 struct ocfs2_reservation_map *resmap)
217{
218 memset(resmap, 0, sizeof(*resmap));
219
220 resmap->m_osb = osb;
221 resmap->m_reservations = RB_ROOT;
222 /* m_bitmap_len is initialized to zero by the above memset. */
223 INIT_LIST_HEAD(&resmap->m_lru);
224
225 return 0;
226}
227
228static void ocfs2_resv_mark_lru(struct ocfs2_reservation_map *resmap,
229 struct ocfs2_alloc_reservation *resv)
230{
231 assert_spin_locked(&resv_lock);
232
233 if (!list_empty(&resv->r_lru))
234 list_del_init(&resv->r_lru);
235
236 list_add_tail(&resv->r_lru, &resmap->m_lru);
237}
238
239static void __ocfs2_resv_trunc(struct ocfs2_alloc_reservation *resv)
240{
241 resv->r_len = 0;
242 resv->r_start = 0;
243}
244
245static void ocfs2_resv_remove(struct ocfs2_reservation_map *resmap,
246 struct ocfs2_alloc_reservation *resv)
247{
248 if (resv->r_flags & OCFS2_RESV_FLAG_INUSE) {
249 list_del_init(&resv->r_lru);
250 rb_erase(&resv->r_node, &resmap->m_reservations);
251 resv->r_flags &= ~OCFS2_RESV_FLAG_INUSE;
252 }
253}
254
255static void __ocfs2_resv_discard(struct ocfs2_reservation_map *resmap,
256 struct ocfs2_alloc_reservation *resv)
257{
258 assert_spin_locked(&resv_lock);
259
260 __ocfs2_resv_trunc(resv);
261 /*
262 * last_len and last_start no longer make sense if
263 * we're changing the range of our allocations.
264 */
265 resv->r_last_len = resv->r_last_start = 0;
266
267 ocfs2_resv_remove(resmap, resv);
268}
269
270/* does nothing if 'resv' is null */
271void ocfs2_resv_discard(struct ocfs2_reservation_map *resmap,
272 struct ocfs2_alloc_reservation *resv)
273{
274 if (resv) {
275 spin_lock(&resv_lock);
276 __ocfs2_resv_discard(resmap, resv);
277 spin_unlock(&resv_lock);
278 }
279}
280
281static void ocfs2_resmap_clear_all_resv(struct ocfs2_reservation_map *resmap)
282{
283 struct rb_node *node;
284 struct ocfs2_alloc_reservation *resv;
285
286 assert_spin_locked(&resv_lock);
287
288 while ((node = rb_last(&resmap->m_reservations)) != NULL) {
289 resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node);
290
291 __ocfs2_resv_discard(resmap, resv);
292 }
293}
294
295void ocfs2_resmap_restart(struct ocfs2_reservation_map *resmap,
296 unsigned int clen, char *disk_bitmap)
297{
298 if (ocfs2_resmap_disabled(resmap))
299 return;
300
301 spin_lock(&resv_lock);
302
303 ocfs2_resmap_clear_all_resv(resmap);
304 resmap->m_bitmap_len = clen;
305 resmap->m_disk_bitmap = disk_bitmap;
306
307 spin_unlock(&resv_lock);
308}
309
310void ocfs2_resmap_uninit(struct ocfs2_reservation_map *resmap)
311{
312 /* Does nothing for now. Keep this around for API symmetry */
313}
314
315static void ocfs2_resv_insert(struct ocfs2_reservation_map *resmap,
316 struct ocfs2_alloc_reservation *new)
317{
318 struct rb_root *root = &resmap->m_reservations;
319 struct rb_node *parent = NULL;
320 struct rb_node **p = &root->rb_node;
321 struct ocfs2_alloc_reservation *tmp;
322
323 assert_spin_locked(&resv_lock);
324
325 mlog(0, "Insert reservation start: %u len: %u\n", new->r_start,
326 new->r_len);
327
328 while (*p) {
329 parent = *p;
330
331 tmp = rb_entry(parent, struct ocfs2_alloc_reservation, r_node);
332
333 if (new->r_start < tmp->r_start) {
334 p = &(*p)->rb_left;
335
336 /*
337 * This is a good place to check for
338 * overlapping reservations.
339 */
340 BUG_ON(ocfs2_resv_end(new) >= tmp->r_start);
341 } else if (new->r_start > ocfs2_resv_end(tmp)) {
342 p = &(*p)->rb_right;
343 } else {
344 /* This should never happen! */
345 mlog(ML_ERROR, "Duplicate reservation window!\n");
346 BUG();
347 }
348 }
349
350 rb_link_node(&new->r_node, parent, p);
351 rb_insert_color(&new->r_node, root);
352 new->r_flags |= OCFS2_RESV_FLAG_INUSE;
353
354 ocfs2_resv_mark_lru(resmap, new);
355
356 ocfs2_check_resmap(resmap);
357}
358
359/**
360 * ocfs2_find_resv_lhs() - find the window which contains goal
361 * @resmap: reservation map to search
362 * @goal: which bit to search for
363 *
364 * If a window containing that goal is not found, we return the window
365 * which comes before goal. Returns NULL on empty rbtree or no window
366 * before goal.
367 */
368static struct ocfs2_alloc_reservation *
369ocfs2_find_resv_lhs(struct ocfs2_reservation_map *resmap, unsigned int goal)
370{
371 struct ocfs2_alloc_reservation *resv = NULL;
372 struct ocfs2_alloc_reservation *prev_resv = NULL;
373 struct rb_node *node = resmap->m_reservations.rb_node;
374
375 assert_spin_locked(&resv_lock);
376
377 if (!node)
378 return NULL;
379
380 node = rb_first(&resmap->m_reservations);
381 while (node) {
382 resv = rb_entry(node, struct ocfs2_alloc_reservation, r_node);
383
384 if (resv->r_start <= goal && ocfs2_resv_end(resv) >= goal)
385 break;
386
387 /* Check if we overshot the reservation just before goal? */
388 if (resv->r_start > goal) {
389 resv = prev_resv;
390 break;
391 }
392
393 prev_resv = resv;
394 node = rb_next(node);
395 }
396
397 return resv;
398}
399
400/*
401 * We are given a range within the bitmap, which corresponds to a gap
402 * inside the reservations tree (search_start, search_len). The range
403 * can be anything from the whole bitmap, to a gap between
404 * reservations.
405 *
406 * The start value of *rstart is insignificant.
407 *
408 * This function searches the bitmap range starting at search_start
409 * with length search_len for a set of contiguous free bits. We try
410 * to find up to 'wanted' bits, but can sometimes return less.
411 *
412 * Returns the length of allocation, 0 if no free bits are found.
413 *
414 * *cstart and *clen will also be populated with the result.
415 */
416static int ocfs2_resmap_find_free_bits(struct ocfs2_reservation_map *resmap,
417 unsigned int wanted,
418 unsigned int search_start,
419 unsigned int search_len,
420 unsigned int *rstart,
421 unsigned int *rlen)
422{
423 void *bitmap = resmap->m_disk_bitmap;
424 unsigned int best_start, best_len = 0;
425 int offset, start, found;
426
427 mlog(0, "Find %u bits within range (%u, len %u) resmap len: %u\n",
428 wanted, search_start, search_len, resmap->m_bitmap_len);
429
430 found = best_start = best_len = 0;
431
432 start = search_start;
433 while ((offset = ocfs2_find_next_zero_bit(bitmap, resmap->m_bitmap_len,
434 start)) != -1) {
435 /* Search reached end of the region */
436 if (offset >= (search_start + search_len))
437 break;
438
439 if (offset == start) {
440 /* we found a zero */
441 found++;
442 /* move start to the next bit to test */
443 start++;
444 } else {
445 /* got a zero after some ones */
446 found = 1;
447 start = offset + 1;
448 }
449 if (found > best_len) {
450 best_len = found;
451 best_start = start - found;
452 }
453
454 if (found >= wanted)
455 break;
456 }
457
458 if (best_len == 0)
459 return 0;
460
461 if (best_len >= wanted)
462 best_len = wanted;
463
464 *rlen = best_len;
465 *rstart = best_start;
466
467 mlog(0, "Found start: %u len: %u\n", best_start, best_len);
468
469 return *rlen;
470}
471
472static void __ocfs2_resv_find_window(struct ocfs2_reservation_map *resmap,
473 struct ocfs2_alloc_reservation *resv,
474 unsigned int goal, unsigned int wanted)
475{
476 struct rb_root *root = &resmap->m_reservations;
477 unsigned int gap_start, gap_end, gap_len;
478 struct ocfs2_alloc_reservation *prev_resv, *next_resv;
479 struct rb_node *prev, *next;
480 unsigned int cstart, clen;
481 unsigned int best_start = 0, best_len = 0;
482
483 /*
484 * Nasty cases to consider:
485 *
486 * - rbtree is empty
487 * - our window should be first in all reservations
488 * - our window should be last in all reservations
489 * - need to make sure we don't go past end of bitmap
490 */
491
492 mlog(0, "resv start: %u resv end: %u goal: %u wanted: %u\n",
493 resv->r_start, ocfs2_resv_end(resv), goal, wanted);
494
495 assert_spin_locked(&resv_lock);
496
497 if (RB_EMPTY_ROOT(root)) {
498 /*
499 * Easiest case - empty tree. We can just take
500 * whatever window of free bits we want.
501 */
502
503 mlog(0, "Empty root\n");
504
505 clen = ocfs2_resmap_find_free_bits(resmap, wanted, goal,
506 resmap->m_bitmap_len - goal,
507 &cstart, &clen);
508
509 /*
510 * This should never happen - the local alloc window
511 * will always have free bits when we're called.
512 */
513 BUG_ON(goal == 0 && clen == 0);
514
515 if (clen == 0)
516 return;
517
518 resv->r_start = cstart;
519 resv->r_len = clen;
520
521 ocfs2_resv_insert(resmap, resv);
522 return;
523 }
524
525 prev_resv = ocfs2_find_resv_lhs(resmap, goal);
526
527 if (prev_resv == NULL) {
528 mlog(0, "Goal on LHS of leftmost window\n");
529
530 /*
531 * A NULL here means that the search code couldn't
532 * find a window that starts before goal.
533 *
534 * However, we can take the first window after goal,
535 * which is also by definition, the leftmost window in
536 * the entire tree. If we can find free bits in the
537 * gap between goal and the LHS window, then the
538 * reservation can safely be placed there.
539 *
540 * Otherwise we fall back to a linear search, checking
541 * the gaps in between windows for a place to
542 * allocate.
543 */
544
545 next = rb_first(root);
546 next_resv = rb_entry(next, struct ocfs2_alloc_reservation,
547 r_node);
548
549 /*
550 * The search should never return such a window. (see
551 * comment above
552 */
553 if (next_resv->r_start <= goal) {
554 mlog(ML_ERROR, "goal: %u next_resv: start %u len %u\n",
555 goal, next_resv->r_start, next_resv->r_len);
556 ocfs2_dump_resv(resmap);
557 BUG();
558 }
559
560 clen = ocfs2_resmap_find_free_bits(resmap, wanted, goal,
561 next_resv->r_start - goal,
562 &cstart, &clen);
563 if (clen) {
564 best_len = clen;
565 best_start = cstart;
566 if (best_len == wanted)
567 goto out_insert;
568 }
569
570 prev_resv = next_resv;
571 next_resv = NULL;
572 }
573
574 prev = &prev_resv->r_node;
575
576 /* Now we do a linear search for a window, starting at 'prev_rsv' */
577 while (1) {
578 next = rb_next(prev);
579 if (next) {
580 mlog(0, "One more resv found in linear search\n");
581 next_resv = rb_entry(next,
582 struct ocfs2_alloc_reservation,
583 r_node);
584
585 gap_start = ocfs2_resv_end(prev_resv) + 1;
586 gap_end = next_resv->r_start - 1;
587 gap_len = gap_end - gap_start + 1;
588 } else {
589 mlog(0, "No next node\n");
590 /*
591 * We're at the rightmost edge of the
592 * tree. See if a reservation between this
593 * window and the end of the bitmap will work.
594 */
595 gap_start = ocfs2_resv_end(prev_resv) + 1;
596 gap_len = resmap->m_bitmap_len - gap_start;
597 gap_end = resmap->m_bitmap_len - 1;
598 }
599
600 /*
601 * No need to check this gap if we have already found
602 * a larger region of free bits.
603 */
604 if (gap_len <= best_len)
605 goto next_resv;
606
607 clen = ocfs2_resmap_find_free_bits(resmap, wanted, gap_start,
608 gap_len, &cstart, &clen);
609 if (clen == wanted) {
610 best_len = clen;
611 best_start = cstart;
612 goto out_insert;
613 } else if (clen > best_len) {
614 best_len = clen;
615 best_start = cstart;
616 }
617
618next_resv:
619 if (!next)
620 break;
621
622 prev = next;
623 prev_resv = rb_entry(prev, struct ocfs2_alloc_reservation,
624 r_node);
625 }
626
627out_insert:
628 if (best_len) {
629 resv->r_start = best_start;
630 resv->r_len = best_len;
631 ocfs2_resv_insert(resmap, resv);
632 }
633}
634
635static void ocfs2_cannibalize_resv(struct ocfs2_reservation_map *resmap,
636 struct ocfs2_alloc_reservation *resv,
637 unsigned int wanted)
638{
639 struct ocfs2_alloc_reservation *lru_resv;
640 int tmpwindow = !!(resv->r_flags & OCFS2_RESV_FLAG_TMP);
641 unsigned int min_bits;
642
643 if (!tmpwindow)
644 min_bits = ocfs2_resv_window_bits(resmap, resv) >> 1;
645 else
646 min_bits = wanted; /* We at know the temp window will use all
647 * of these bits */
648
649 /*
650 * Take the first reservation off the LRU as our 'target'. We
651 * don't try to be smart about it. There might be a case for
652 * searching based on size but I don't have enough data to be
653 * sure. --Mark (3/16/2010)
654 */
655 lru_resv = list_first_entry(&resmap->m_lru,
656 struct ocfs2_alloc_reservation, r_lru);
657
658 mlog(0, "lru resv: start: %u len: %u end: %u\n", lru_resv->r_start,
659 lru_resv->r_len, ocfs2_resv_end(lru_resv));
660
661 /*
662 * Cannibalize (some or all) of the target reservation and
663 * feed it to the current window.
664 */
665 if (lru_resv->r_len <= min_bits) {
666 /*
667 * Discard completely if size is less than or equal to a
668 * reasonable threshold - 50% of window bits for non temporary
669 * windows.
670 */
671 resv->r_start = lru_resv->r_start;
672 resv->r_len = lru_resv->r_len;
673
674 __ocfs2_resv_discard(resmap, lru_resv);
675 } else {
676 unsigned int shrink;
677 if (tmpwindow)
678 shrink = min_bits;
679 else
680 shrink = lru_resv->r_len / 2;
681
682 lru_resv->r_len -= shrink;
683
684 resv->r_start = ocfs2_resv_end(lru_resv) + 1;
685 resv->r_len = shrink;
686 }
687
688 mlog(0, "Reservation now looks like: r_start: %u r_end: %u "
689 "r_len: %u r_last_start: %u r_last_len: %u\n",
690 resv->r_start, ocfs2_resv_end(resv), resv->r_len,
691 resv->r_last_start, resv->r_last_len);
692
693 ocfs2_resv_insert(resmap, resv);
694}
695
696static void ocfs2_resv_find_window(struct ocfs2_reservation_map *resmap,
697 struct ocfs2_alloc_reservation *resv,
698 unsigned int wanted)
699{
700 unsigned int goal = 0;
701
702 BUG_ON(!ocfs2_resv_empty(resv));
703
704 /*
705 * Begin by trying to get a window as close to the previous
706 * one as possible. Using the most recent allocation as a
707 * start goal makes sense.
708 */
709 if (resv->r_last_len) {
710 goal = resv->r_last_start + resv->r_last_len;
711 if (goal >= resmap->m_bitmap_len)
712 goal = 0;
713 }
714
715 __ocfs2_resv_find_window(resmap, resv, goal, wanted);
716
717 /* Search from last alloc didn't work, try once more from beginning. */
718 if (ocfs2_resv_empty(resv) && goal != 0)
719 __ocfs2_resv_find_window(resmap, resv, 0, wanted);
720
721 if (ocfs2_resv_empty(resv)) {
722 /*
723 * Still empty? Pull oldest one off the LRU, remove it from
724 * tree, put this one in it's place.
725 */
726 ocfs2_cannibalize_resv(resmap, resv, wanted);
727 }
728
729 BUG_ON(ocfs2_resv_empty(resv));
730}
731
732int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap,
733 struct ocfs2_alloc_reservation *resv,
734 int *cstart, int *clen)
735{
736 unsigned int wanted = *clen;
737
738 if (resv == NULL || ocfs2_resmap_disabled(resmap))
739 return -ENOSPC;
740
741 spin_lock(&resv_lock);
742
743 /*
744 * We don't want to over-allocate for temporary
745 * windows. Otherwise, we run the risk of fragmenting the
746 * allocation space.
747 */
748 wanted = ocfs2_resv_window_bits(resmap, resv);
749 if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen)
750 wanted = *clen;
751
752 if (ocfs2_resv_empty(resv)) {
753 mlog(0, "empty reservation, find new window\n");
754
755 /*
756 * Try to get a window here. If it works, we must fall
757 * through and test the bitmap . This avoids some
758 * ping-ponging of windows due to non-reserved space
759 * being allocation before we initialize a window for
760 * that inode.
761 */
762 ocfs2_resv_find_window(resmap, resv, wanted);
763 }
764
765 BUG_ON(ocfs2_resv_empty(resv));
766
767 *cstart = resv->r_start;
768 *clen = resv->r_len;
769
770 spin_unlock(&resv_lock);
771 return 0;
772}
773
774static void
775 ocfs2_adjust_resv_from_alloc(struct ocfs2_reservation_map *resmap,
776 struct ocfs2_alloc_reservation *resv,
777 unsigned int start, unsigned int end)
778{
779 unsigned int rhs = 0;
780 unsigned int old_end = ocfs2_resv_end(resv);
781
782 BUG_ON(start != resv->r_start || old_end < end);
783
784 /*
785 * Completely used? We can remove it then.
786 */
787 if (old_end == end) {
788 __ocfs2_resv_discard(resmap, resv);
789 return;
790 }
791
792 rhs = old_end - end;
793
794 /*
795 * This should have been trapped above.
796 */
797 BUG_ON(rhs == 0);
798
799 resv->r_start = end + 1;
800 resv->r_len = old_end - resv->r_start + 1;
801}
802
803void ocfs2_resmap_claimed_bits(struct ocfs2_reservation_map *resmap,
804 struct ocfs2_alloc_reservation *resv,
805 u32 cstart, u32 clen)
806{
807 unsigned int cend = cstart + clen - 1;
808
809 if (resmap == NULL || ocfs2_resmap_disabled(resmap))
810 return;
811
812 if (resv == NULL)
813 return;
814
815 BUG_ON(cstart != resv->r_start);
816
817 spin_lock(&resv_lock);
818
819 mlog(0, "claim bits: cstart: %u cend: %u clen: %u r_start: %u "
820 "r_end: %u r_len: %u, r_last_start: %u r_last_len: %u\n",
821 cstart, cend, clen, resv->r_start, ocfs2_resv_end(resv),
822 resv->r_len, resv->r_last_start, resv->r_last_len);
823
824 BUG_ON(cstart < resv->r_start);
825 BUG_ON(cstart > ocfs2_resv_end(resv));
826 BUG_ON(cend > ocfs2_resv_end(resv));
827
828 ocfs2_adjust_resv_from_alloc(resmap, resv, cstart, cend);
829 resv->r_last_start = cstart;
830 resv->r_last_len = clen;
831
832 /*
833 * May have been discarded above from
834 * ocfs2_adjust_resv_from_alloc().
835 */
836 if (!ocfs2_resv_empty(resv))
837 ocfs2_resv_mark_lru(resmap, resv);
838
839 mlog(0, "Reservation now looks like: r_start: %u r_end: %u "
840 "r_len: %u r_last_start: %u r_last_len: %u\n",
841 resv->r_start, ocfs2_resv_end(resv), resv->r_len,
842 resv->r_last_start, resv->r_last_len);
843
844 ocfs2_check_resmap(resmap);
845
846 spin_unlock(&resv_lock);
847}
diff --git a/fs/ocfs2/reservations.h b/fs/ocfs2/reservations.h
new file mode 100644
index 000000000000..1e49cc29d06c
--- /dev/null
+++ b/fs/ocfs2/reservations.h
@@ -0,0 +1,159 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * reservations.h
5 *
6 * Allocation reservations function prototypes and structures.
7 *
8 * Copyright (C) 2010 Novell. All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License version 2 as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 */
19
20#ifndef OCFS2_RESERVATIONS_H
21#define OCFS2_RESERVATIONS_H
22
23#include <linux/rbtree.h>
24
25#define OCFS2_DEFAULT_RESV_LEVEL 2
26#define OCFS2_MAX_RESV_LEVEL 9
27#define OCFS2_MIN_RESV_LEVEL 0
28
29struct ocfs2_alloc_reservation {
30 struct rb_node r_node;
31
32 unsigned int r_start; /* Begining of current window */
33 unsigned int r_len; /* Length of the window */
34
35 unsigned int r_last_len; /* Length of most recent alloc */
36 unsigned int r_last_start; /* Start of most recent alloc */
37 struct list_head r_lru; /* LRU list head */
38
39 unsigned int r_flags;
40};
41
42#define OCFS2_RESV_FLAG_INUSE 0x01 /* Set when r_node is part of a btree */
43#define OCFS2_RESV_FLAG_TMP 0x02 /* Temporary reservation, will be
44 * destroyed immedately after use */
45#define OCFS2_RESV_FLAG_DIR 0x04 /* Reservation is for an unindexed
46 * directory btree */
47
48struct ocfs2_reservation_map {
49 struct rb_root m_reservations;
50 char *m_disk_bitmap;
51
52 struct ocfs2_super *m_osb;
53
54 /* The following are not initialized to meaningful values until a disk
55 * bitmap is provided. */
56 u32 m_bitmap_len; /* Number of valid
57 * bits available */
58
59 struct list_head m_lru; /* LRU of reservations
60 * structures. */
61
62};
63
64void ocfs2_resv_init_once(struct ocfs2_alloc_reservation *resv);
65
66#define OCFS2_RESV_TYPES (OCFS2_RESV_FLAG_TMP|OCFS2_RESV_FLAG_DIR)
67void ocfs2_resv_set_type(struct ocfs2_alloc_reservation *resv,
68 unsigned int flags);
69
70int ocfs2_dir_resv_allowed(struct ocfs2_super *osb);
71
72/**
73 * ocfs2_resv_discard() - truncate a reservation
74 * @resmap:
75 * @resv: the reservation to truncate.
76 *
77 * After this function is called, the reservation will be empty, and
78 * unlinked from the rbtree.
79 */
80void ocfs2_resv_discard(struct ocfs2_reservation_map *resmap,
81 struct ocfs2_alloc_reservation *resv);
82
83
84/**
85 * ocfs2_resmap_init() - Initialize fields of a reservations bitmap
86 * @resmap: struct ocfs2_reservation_map to initialize
87 * @obj: unused for now
88 * @ops: unused for now
89 * @max_bitmap_bytes: Maximum size of the bitmap (typically blocksize)
90 *
91 * Only possible return value other than '0' is -ENOMEM for failure to
92 * allocation mirror bitmap.
93 */
94int ocfs2_resmap_init(struct ocfs2_super *osb,
95 struct ocfs2_reservation_map *resmap);
96
97/**
98 * ocfs2_resmap_restart() - "restart" a reservation bitmap
99 * @resmap: reservations bitmap
100 * @clen: Number of valid bits in the bitmap
101 * @disk_bitmap: the disk bitmap this resmap should refer to.
102 *
103 * Re-initialize the parameters of a reservation bitmap. This is
104 * useful for local alloc window slides.
105 *
106 * This function will call ocfs2_trunc_resv against all existing
107 * reservations. A future version will recalculate existing
108 * reservations based on the new bitmap.
109 */
110void ocfs2_resmap_restart(struct ocfs2_reservation_map *resmap,
111 unsigned int clen, char *disk_bitmap);
112
113/**
114 * ocfs2_resmap_uninit() - uninitialize a reservation bitmap structure
115 * @resmap: the struct ocfs2_reservation_map to uninitialize
116 */
117void ocfs2_resmap_uninit(struct ocfs2_reservation_map *resmap);
118
119/**
120 * ocfs2_resmap_resv_bits() - Return still-valid reservation bits
121 * @resmap: reservations bitmap
122 * @resv: reservation to base search from
123 * @cstart: start of proposed allocation
124 * @clen: length (in clusters) of proposed allocation
125 *
126 * Using the reservation data from resv, this function will compare
127 * resmap and resmap->m_disk_bitmap to determine what part (if any) of
128 * the reservation window is still clear to use. If resv is empty,
129 * this function will try to allocate a window for it.
130 *
131 * On success, zero is returned and the valid allocation area is set in cstart
132 * and clen.
133 *
134 * Returns -ENOSPC if reservations are disabled.
135 */
136int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap,
137 struct ocfs2_alloc_reservation *resv,
138 int *cstart, int *clen);
139
140/**
141 * ocfs2_resmap_claimed_bits() - Tell the reservation code that bits were used.
142 * @resmap: reservations bitmap
143 * @resv: optional reservation to recalulate based on new bitmap
144 * @cstart: start of allocation in clusters
145 * @clen: end of allocation in clusters.
146 *
147 * Tell the reservation code that bits were used to fulfill allocation in
148 * resmap. The bits don't have to have been part of any existing
149 * reservation. But we must always call this function when bits are claimed.
150 * Internally, the reservations code will use this information to mark the
151 * reservations bitmap. If resv is passed, it's next allocation window will be
152 * calculated. It also expects that 'cstart' is the same as we passed back
153 * from ocfs2_resmap_resv_bits().
154 */
155void ocfs2_resmap_claimed_bits(struct ocfs2_reservation_map *resmap,
156 struct ocfs2_alloc_reservation *resv,
157 u32 cstart, u32 clen);
158
159#endif /* OCFS2_RESERVATIONS_H */
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index 3c3d673a4d20..dacd553d8617 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -134,11 +134,7 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
134 le16_add_cpu(&group->bg_free_bits_count, -1 * backups); 134 le16_add_cpu(&group->bg_free_bits_count, -1 * backups);
135 } 135 }
136 136
137 ret = ocfs2_journal_dirty(handle, group_bh); 137 ocfs2_journal_dirty(handle, group_bh);
138 if (ret < 0) {
139 mlog_errno(ret);
140 goto out_rollback;
141 }
142 138
143 /* update the inode accordingly. */ 139 /* update the inode accordingly. */
144 ret = ocfs2_journal_access_di(handle, INODE_CACHE(bm_inode), bm_bh, 140 ret = ocfs2_journal_access_di(handle, INODE_CACHE(bm_inode), bm_bh,
@@ -319,7 +315,8 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
319 BUG_ON(!OCFS2_IS_VALID_DINODE(fe)); 315 BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
320 316
321 if (le16_to_cpu(fe->id2.i_chain.cl_cpg) != 317 if (le16_to_cpu(fe->id2.i_chain.cl_cpg) !=
322 ocfs2_group_bitmap_size(osb->sb) * 8) { 318 ocfs2_group_bitmap_size(osb->sb, 0,
319 osb->s_feature_incompat) * 8) {
323 mlog(ML_ERROR, "The disk is too old and small. " 320 mlog(ML_ERROR, "The disk is too old and small. "
324 "Force to do offline resize."); 321 "Force to do offline resize.");
325 ret = -EINVAL; 322 ret = -EINVAL;
@@ -500,7 +497,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
500 fe = (struct ocfs2_dinode *)main_bm_bh->b_data; 497 fe = (struct ocfs2_dinode *)main_bm_bh->b_data;
501 498
502 if (le16_to_cpu(fe->id2.i_chain.cl_cpg) != 499 if (le16_to_cpu(fe->id2.i_chain.cl_cpg) !=
503 ocfs2_group_bitmap_size(osb->sb) * 8) { 500 ocfs2_group_bitmap_size(osb->sb, 0,
501 osb->s_feature_incompat) * 8) {
504 mlog(ML_ERROR, "The disk is too old and small." 502 mlog(ML_ERROR, "The disk is too old and small."
505 " Force to do offline resize."); 503 " Force to do offline resize.");
506 ret = -EINVAL; 504 ret = -EINVAL;
@@ -545,12 +543,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
545 543
546 group = (struct ocfs2_group_desc *)group_bh->b_data; 544 group = (struct ocfs2_group_desc *)group_bh->b_data;
547 group->bg_next_group = cr->c_blkno; 545 group->bg_next_group = cr->c_blkno;
548 546 ocfs2_journal_dirty(handle, group_bh);
549 ret = ocfs2_journal_dirty(handle, group_bh);
550 if (ret < 0) {
551 mlog_errno(ret);
552 goto out_commit;
553 }
554 547
555 ret = ocfs2_journal_access_di(handle, INODE_CACHE(main_bm_inode), 548 ret = ocfs2_journal_access_di(handle, INODE_CACHE(main_bm_inode),
556 main_bm_bh, OCFS2_JOURNAL_ACCESS_WRITE); 549 main_bm_bh, OCFS2_JOURNAL_ACCESS_WRITE);
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 19ba00f28547..f4c2a9eb8c4d 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -53,6 +53,15 @@
53 53
54#define OCFS2_MAX_TO_STEAL 1024 54#define OCFS2_MAX_TO_STEAL 1024
55 55
56struct ocfs2_suballoc_result {
57 u64 sr_bg_blkno; /* The bg we allocated from. Set
58 to 0 when a block group is
59 contiguous. */
60 u64 sr_blkno; /* The first allocated block */
61 unsigned int sr_bit_offset; /* The bit in the bg */
62 unsigned int sr_bits; /* How many bits we claimed */
63};
64
56static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg); 65static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg);
57static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe); 66static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe);
58static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl); 67static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl);
@@ -60,6 +69,7 @@ static int ocfs2_block_group_fill(handle_t *handle,
60 struct inode *alloc_inode, 69 struct inode *alloc_inode,
61 struct buffer_head *bg_bh, 70 struct buffer_head *bg_bh,
62 u64 group_blkno, 71 u64 group_blkno,
72 unsigned int group_clusters,
63 u16 my_chain, 73 u16 my_chain,
64 struct ocfs2_chain_list *cl); 74 struct ocfs2_chain_list *cl);
65static int ocfs2_block_group_alloc(struct ocfs2_super *osb, 75static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
@@ -73,20 +83,17 @@ static int ocfs2_cluster_group_search(struct inode *inode,
73 struct buffer_head *group_bh, 83 struct buffer_head *group_bh,
74 u32 bits_wanted, u32 min_bits, 84 u32 bits_wanted, u32 min_bits,
75 u64 max_block, 85 u64 max_block,
76 u16 *bit_off, u16 *bits_found); 86 struct ocfs2_suballoc_result *res);
77static int ocfs2_block_group_search(struct inode *inode, 87static int ocfs2_block_group_search(struct inode *inode,
78 struct buffer_head *group_bh, 88 struct buffer_head *group_bh,
79 u32 bits_wanted, u32 min_bits, 89 u32 bits_wanted, u32 min_bits,
80 u64 max_block, 90 u64 max_block,
81 u16 *bit_off, u16 *bits_found); 91 struct ocfs2_suballoc_result *res);
82static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, 92static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
83 struct ocfs2_alloc_context *ac,
84 handle_t *handle, 93 handle_t *handle,
85 u32 bits_wanted, 94 u32 bits_wanted,
86 u32 min_bits, 95 u32 min_bits,
87 u16 *bit_off, 96 struct ocfs2_suballoc_result *res);
88 unsigned int *num_bits,
89 u64 *bg_blkno);
90static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh, 97static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
91 int nr); 98 int nr);
92static inline int ocfs2_block_group_set_bits(handle_t *handle, 99static inline int ocfs2_block_group_set_bits(handle_t *handle,
@@ -130,6 +137,7 @@ void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
130 } 137 }
131 brelse(ac->ac_bh); 138 brelse(ac->ac_bh);
132 ac->ac_bh = NULL; 139 ac->ac_bh = NULL;
140 ac->ac_resv = NULL;
133} 141}
134 142
135void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) 143void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
@@ -325,14 +333,38 @@ out:
325 return rc; 333 return rc;
326} 334}
327 335
336static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb,
337 struct ocfs2_group_desc *bg,
338 struct ocfs2_chain_list *cl,
339 u64 p_blkno, u32 clusters)
340{
341 struct ocfs2_extent_list *el = &bg->bg_list;
342 struct ocfs2_extent_rec *rec;
343
344 BUG_ON(!ocfs2_supports_discontig_bg(osb));
345 if (!el->l_next_free_rec)
346 el->l_count = cpu_to_le16(ocfs2_extent_recs_per_gd(osb->sb));
347 rec = &el->l_recs[le16_to_cpu(el->l_next_free_rec)];
348 rec->e_blkno = cpu_to_le64(p_blkno);
349 rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) /
350 le16_to_cpu(cl->cl_bpc));
351 rec->e_leaf_clusters = cpu_to_le32(clusters);
352 le16_add_cpu(&bg->bg_bits, clusters * le16_to_cpu(cl->cl_bpc));
353 le16_add_cpu(&bg->bg_free_bits_count,
354 clusters * le16_to_cpu(cl->cl_bpc));
355 le16_add_cpu(&el->l_next_free_rec, 1);
356}
357
328static int ocfs2_block_group_fill(handle_t *handle, 358static int ocfs2_block_group_fill(handle_t *handle,
329 struct inode *alloc_inode, 359 struct inode *alloc_inode,
330 struct buffer_head *bg_bh, 360 struct buffer_head *bg_bh,
331 u64 group_blkno, 361 u64 group_blkno,
362 unsigned int group_clusters,
332 u16 my_chain, 363 u16 my_chain,
333 struct ocfs2_chain_list *cl) 364 struct ocfs2_chain_list *cl)
334{ 365{
335 int status = 0; 366 int status = 0;
367 struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
336 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data; 368 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
337 struct super_block * sb = alloc_inode->i_sb; 369 struct super_block * sb = alloc_inode->i_sb;
338 370
@@ -359,19 +391,23 @@ static int ocfs2_block_group_fill(handle_t *handle,
359 memset(bg, 0, sb->s_blocksize); 391 memset(bg, 0, sb->s_blocksize);
360 strcpy(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE); 392 strcpy(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE);
361 bg->bg_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation); 393 bg->bg_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
362 bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb)); 394 bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb, 1,
363 bg->bg_bits = cpu_to_le16(ocfs2_bits_per_group(cl)); 395 osb->s_feature_incompat));
364 bg->bg_chain = cpu_to_le16(my_chain); 396 bg->bg_chain = cpu_to_le16(my_chain);
365 bg->bg_next_group = cl->cl_recs[my_chain].c_blkno; 397 bg->bg_next_group = cl->cl_recs[my_chain].c_blkno;
366 bg->bg_parent_dinode = cpu_to_le64(OCFS2_I(alloc_inode)->ip_blkno); 398 bg->bg_parent_dinode = cpu_to_le64(OCFS2_I(alloc_inode)->ip_blkno);
367 bg->bg_blkno = cpu_to_le64(group_blkno); 399 bg->bg_blkno = cpu_to_le64(group_blkno);
400 if (group_clusters == le16_to_cpu(cl->cl_cpg))
401 bg->bg_bits = cpu_to_le16(ocfs2_bits_per_group(cl));
402 else
403 ocfs2_bg_discontig_add_extent(osb, bg, cl, group_blkno,
404 group_clusters);
405
368 /* set the 1st bit in the bitmap to account for the descriptor block */ 406 /* set the 1st bit in the bitmap to account for the descriptor block */
369 ocfs2_set_bit(0, (unsigned long *)bg->bg_bitmap); 407 ocfs2_set_bit(0, (unsigned long *)bg->bg_bitmap);
370 bg->bg_free_bits_count = cpu_to_le16(le16_to_cpu(bg->bg_bits) - 1); 408 bg->bg_free_bits_count = cpu_to_le16(le16_to_cpu(bg->bg_bits) - 1);
371 409
372 status = ocfs2_journal_dirty(handle, bg_bh); 410 ocfs2_journal_dirty(handle, bg_bh);
373 if (status < 0)
374 mlog_errno(status);
375 411
376 /* There is no need to zero out or otherwise initialize the 412 /* There is no need to zero out or otherwise initialize the
377 * other blocks in a group - All valid FS metadata in a block 413 * other blocks in a group - All valid FS metadata in a block
@@ -397,6 +433,238 @@ static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl)
397 return best; 433 return best;
398} 434}
399 435
436static struct buffer_head *
437ocfs2_block_group_alloc_contig(struct ocfs2_super *osb, handle_t *handle,
438 struct inode *alloc_inode,
439 struct ocfs2_alloc_context *ac,
440 struct ocfs2_chain_list *cl)
441{
442 int status;
443 u32 bit_off, num_bits;
444 u64 bg_blkno;
445 struct buffer_head *bg_bh;
446 unsigned int alloc_rec = ocfs2_find_smallest_chain(cl);
447
448 status = ocfs2_claim_clusters(handle, ac,
449 le16_to_cpu(cl->cl_cpg), &bit_off,
450 &num_bits);
451 if (status < 0) {
452 if (status != -ENOSPC)
453 mlog_errno(status);
454 goto bail;
455 }
456
457 /* setup the group */
458 bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
459 mlog(0, "new descriptor, record %u, at block %llu\n",
460 alloc_rec, (unsigned long long)bg_blkno);
461
462 bg_bh = sb_getblk(osb->sb, bg_blkno);
463 if (!bg_bh) {
464 status = -EIO;
465 mlog_errno(status);
466 goto bail;
467 }
468 ocfs2_set_new_buffer_uptodate(INODE_CACHE(alloc_inode), bg_bh);
469
470 status = ocfs2_block_group_fill(handle, alloc_inode, bg_bh,
471 bg_blkno, num_bits, alloc_rec, cl);
472 if (status < 0) {
473 brelse(bg_bh);
474 mlog_errno(status);
475 }
476
477bail:
478 return status ? ERR_PTR(status) : bg_bh;
479}
480
481static int ocfs2_block_group_claim_bits(struct ocfs2_super *osb,
482 handle_t *handle,
483 struct ocfs2_alloc_context *ac,
484 unsigned int min_bits,
485 u32 *bit_off, u32 *num_bits)
486{
487 int status = 0;
488
489 while (min_bits) {
490 status = ocfs2_claim_clusters(handle, ac, min_bits,
491 bit_off, num_bits);
492 if (status != -ENOSPC)
493 break;
494
495 min_bits >>= 1;
496 }
497
498 return status;
499}
500
501static int ocfs2_block_group_grow_discontig(handle_t *handle,
502 struct inode *alloc_inode,
503 struct buffer_head *bg_bh,
504 struct ocfs2_alloc_context *ac,
505 struct ocfs2_chain_list *cl,
506 unsigned int min_bits)
507{
508 int status;
509 struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
510 struct ocfs2_group_desc *bg =
511 (struct ocfs2_group_desc *)bg_bh->b_data;
512 unsigned int needed = le16_to_cpu(cl->cl_cpg) -
513 le16_to_cpu(bg->bg_bits) / le16_to_cpu(cl->cl_bpc);
514 u32 p_cpos, clusters;
515 u64 p_blkno;
516 struct ocfs2_extent_list *el = &bg->bg_list;
517
518 status = ocfs2_journal_access_gd(handle,
519 INODE_CACHE(alloc_inode),
520 bg_bh,
521 OCFS2_JOURNAL_ACCESS_CREATE);
522 if (status < 0) {
523 mlog_errno(status);
524 goto bail;
525 }
526
527 while ((needed > 0) && (le16_to_cpu(el->l_next_free_rec) <
528 le16_to_cpu(el->l_count))) {
529 if (min_bits > needed)
530 min_bits = needed;
531 status = ocfs2_block_group_claim_bits(osb, handle, ac,
532 min_bits, &p_cpos,
533 &clusters);
534 if (status < 0) {
535 if (status != -ENOSPC)
536 mlog_errno(status);
537 goto bail;
538 }
539 p_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cpos);
540 ocfs2_bg_discontig_add_extent(osb, bg, cl, p_blkno,
541 clusters);
542
543 min_bits = clusters;
544 needed = le16_to_cpu(cl->cl_cpg) -
545 le16_to_cpu(bg->bg_bits) / le16_to_cpu(cl->cl_bpc);
546 }
547
548 if (needed > 0) {
549 /*
550 * We have used up all the extent rec but can't fill up
551 * the cpg. So bail out.
552 */
553 status = -ENOSPC;
554 goto bail;
555 }
556
557 ocfs2_journal_dirty(handle, bg_bh);
558
559bail:
560 return status;
561}
562
563static void ocfs2_bg_alloc_cleanup(handle_t *handle,
564 struct ocfs2_alloc_context *cluster_ac,
565 struct inode *alloc_inode,
566 struct buffer_head *bg_bh)
567{
568 int i, ret;
569 struct ocfs2_group_desc *bg;
570 struct ocfs2_extent_list *el;
571 struct ocfs2_extent_rec *rec;
572
573 if (!bg_bh)
574 return;
575
576 bg = (struct ocfs2_group_desc *)bg_bh->b_data;
577 el = &bg->bg_list;
578 for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
579 rec = &el->l_recs[i];
580 ret = ocfs2_free_clusters(handle, cluster_ac->ac_inode,
581 cluster_ac->ac_bh,
582 le64_to_cpu(rec->e_blkno),
583 le32_to_cpu(rec->e_leaf_clusters));
584 if (ret)
585 mlog_errno(ret);
586 /* Try all the clusters to free */
587 }
588
589 ocfs2_remove_from_cache(INODE_CACHE(alloc_inode), bg_bh);
590 brelse(bg_bh);
591}
592
593static struct buffer_head *
594ocfs2_block_group_alloc_discontig(handle_t *handle,
595 struct inode *alloc_inode,
596 struct ocfs2_alloc_context *ac,
597 struct ocfs2_chain_list *cl)
598{
599 int status;
600 u32 bit_off, num_bits;
601 u64 bg_blkno;
602 unsigned int min_bits = le16_to_cpu(cl->cl_cpg) >> 1;
603 struct buffer_head *bg_bh = NULL;
604 unsigned int alloc_rec = ocfs2_find_smallest_chain(cl);
605 struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
606
607 if (!ocfs2_supports_discontig_bg(osb)) {
608 status = -ENOSPC;
609 goto bail;
610 }
611
612 status = ocfs2_extend_trans(handle,
613 ocfs2_calc_bg_discontig_credits(osb->sb));
614 if (status) {
615 mlog_errno(status);
616 goto bail;
617 }
618
619 /*
620 * We're going to be grabbing from multiple cluster groups.
621 * We don't have enough credits to relink them all, and the
622 * cluster groups will be staying in cache for the duration of
623 * this operation.
624 */
625 ac->ac_allow_chain_relink = 0;
626
627 /* Claim the first region */
628 status = ocfs2_block_group_claim_bits(osb, handle, ac, min_bits,
629 &bit_off, &num_bits);
630 if (status < 0) {
631 if (status != -ENOSPC)
632 mlog_errno(status);
633 goto bail;
634 }
635 min_bits = num_bits;
636
637 /* setup the group */
638 bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
639 mlog(0, "new descriptor, record %u, at block %llu\n",
640 alloc_rec, (unsigned long long)bg_blkno);
641
642 bg_bh = sb_getblk(osb->sb, bg_blkno);
643 if (!bg_bh) {
644 status = -EIO;
645 mlog_errno(status);
646 goto bail;
647 }
648 ocfs2_set_new_buffer_uptodate(INODE_CACHE(alloc_inode), bg_bh);
649
650 status = ocfs2_block_group_fill(handle, alloc_inode, bg_bh,
651 bg_blkno, num_bits, alloc_rec, cl);
652 if (status < 0) {
653 mlog_errno(status);
654 goto bail;
655 }
656
657 status = ocfs2_block_group_grow_discontig(handle, alloc_inode,
658 bg_bh, ac, cl, min_bits);
659 if (status)
660 mlog_errno(status);
661
662bail:
663 if (status)
664 ocfs2_bg_alloc_cleanup(handle, ac, alloc_inode, bg_bh);
665 return status ? ERR_PTR(status) : bg_bh;
666}
667
400/* 668/*
401 * We expect the block group allocator to already be locked. 669 * We expect the block group allocator to already be locked.
402 */ 670 */
@@ -412,9 +680,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
412 struct ocfs2_chain_list *cl; 680 struct ocfs2_chain_list *cl;
413 struct ocfs2_alloc_context *ac = NULL; 681 struct ocfs2_alloc_context *ac = NULL;
414 handle_t *handle = NULL; 682 handle_t *handle = NULL;
415 u32 bit_off, num_bits;
416 u16 alloc_rec; 683 u16 alloc_rec;
417 u64 bg_blkno;
418 struct buffer_head *bg_bh = NULL; 684 struct buffer_head *bg_bh = NULL;
419 struct ocfs2_group_desc *bg; 685 struct ocfs2_group_desc *bg;
420 686
@@ -447,44 +713,20 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
447 (unsigned long long)*last_alloc_group); 713 (unsigned long long)*last_alloc_group);
448 ac->ac_last_group = *last_alloc_group; 714 ac->ac_last_group = *last_alloc_group;
449 } 715 }
450 status = ocfs2_claim_clusters(osb, 716
451 handle, 717 bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode,
452 ac, 718 ac, cl);
453 le16_to_cpu(cl->cl_cpg), 719 if (IS_ERR(bg_bh) && (PTR_ERR(bg_bh) == -ENOSPC))
454 &bit_off, 720 bg_bh = ocfs2_block_group_alloc_discontig(handle,
455 &num_bits); 721 alloc_inode,
456 if (status < 0) { 722 ac, cl);
723 if (IS_ERR(bg_bh)) {
724 status = PTR_ERR(bg_bh);
725 bg_bh = NULL;
457 if (status != -ENOSPC) 726 if (status != -ENOSPC)
458 mlog_errno(status); 727 mlog_errno(status);
459 goto bail; 728 goto bail;
460 } 729 }
461
462 alloc_rec = ocfs2_find_smallest_chain(cl);
463
464 /* setup the group */
465 bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
466 mlog(0, "new descriptor, record %u, at block %llu\n",
467 alloc_rec, (unsigned long long)bg_blkno);
468
469 bg_bh = sb_getblk(osb->sb, bg_blkno);
470 if (!bg_bh) {
471 status = -EIO;
472 mlog_errno(status);
473 goto bail;
474 }
475 ocfs2_set_new_buffer_uptodate(INODE_CACHE(alloc_inode), bg_bh);
476
477 status = ocfs2_block_group_fill(handle,
478 alloc_inode,
479 bg_bh,
480 bg_blkno,
481 alloc_rec,
482 cl);
483 if (status < 0) {
484 mlog_errno(status);
485 goto bail;
486 }
487
488 bg = (struct ocfs2_group_desc *) bg_bh->b_data; 730 bg = (struct ocfs2_group_desc *) bg_bh->b_data;
489 731
490 status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode), 732 status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
@@ -494,10 +736,12 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
494 goto bail; 736 goto bail;
495 } 737 }
496 738
739 alloc_rec = le16_to_cpu(bg->bg_chain);
497 le32_add_cpu(&cl->cl_recs[alloc_rec].c_free, 740 le32_add_cpu(&cl->cl_recs[alloc_rec].c_free,
498 le16_to_cpu(bg->bg_free_bits_count)); 741 le16_to_cpu(bg->bg_free_bits_count));
499 le32_add_cpu(&cl->cl_recs[alloc_rec].c_total, le16_to_cpu(bg->bg_bits)); 742 le32_add_cpu(&cl->cl_recs[alloc_rec].c_total,
500 cl->cl_recs[alloc_rec].c_blkno = cpu_to_le64(bg_blkno); 743 le16_to_cpu(bg->bg_bits));
744 cl->cl_recs[alloc_rec].c_blkno = cpu_to_le64(bg->bg_blkno);
501 if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count)) 745 if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count))
502 le16_add_cpu(&cl->cl_next_free_rec, 1); 746 le16_add_cpu(&cl->cl_next_free_rec, 1);
503 747
@@ -506,11 +750,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
506 le32_add_cpu(&fe->id1.bitmap1.i_total, le16_to_cpu(bg->bg_bits)); 750 le32_add_cpu(&fe->id1.bitmap1.i_total, le16_to_cpu(bg->bg_bits));
507 le32_add_cpu(&fe->i_clusters, le16_to_cpu(cl->cl_cpg)); 751 le32_add_cpu(&fe->i_clusters, le16_to_cpu(cl->cl_cpg));
508 752
509 status = ocfs2_journal_dirty(handle, bh); 753 ocfs2_journal_dirty(handle, bh);
510 if (status < 0) {
511 mlog_errno(status);
512 goto bail;
513 }
514 754
515 spin_lock(&OCFS2_I(alloc_inode)->ip_lock); 755 spin_lock(&OCFS2_I(alloc_inode)->ip_lock);
516 OCFS2_I(alloc_inode)->ip_clusters = le32_to_cpu(fe->i_clusters); 756 OCFS2_I(alloc_inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
@@ -760,7 +1000,7 @@ int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb,
760 status = ocfs2_reserve_suballoc_bits(osb, (*ac), 1000 status = ocfs2_reserve_suballoc_bits(osb, (*ac),
761 EXTENT_ALLOC_SYSTEM_INODE, 1001 EXTENT_ALLOC_SYSTEM_INODE,
762 (u32)osb->slot_num, NULL, 1002 (u32)osb->slot_num, NULL,
763 ALLOC_NEW_GROUP); 1003 ALLOC_GROUPS_FROM_GLOBAL|ALLOC_NEW_GROUP);
764 1004
765 1005
766 if (status >= 0) { 1006 if (status >= 0) {
@@ -946,11 +1186,7 @@ static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
946 status = ocfs2_reserve_local_alloc_bits(osb, 1186 status = ocfs2_reserve_local_alloc_bits(osb,
947 bits_wanted, 1187 bits_wanted,
948 *ac); 1188 *ac);
949 if (status == -EFBIG) { 1189 if ((status < 0) && (status != -ENOSPC)) {
950 /* The local alloc window is outside ac_max_block.
951 * use the main bitmap. */
952 status = -ENOSPC;
953 } else if ((status < 0) && (status != -ENOSPC)) {
954 mlog_errno(status); 1190 mlog_errno(status);
955 goto bail; 1191 goto bail;
956 } 1192 }
@@ -1033,8 +1269,7 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
1033 struct buffer_head *bg_bh, 1269 struct buffer_head *bg_bh,
1034 unsigned int bits_wanted, 1270 unsigned int bits_wanted,
1035 unsigned int total_bits, 1271 unsigned int total_bits,
1036 u16 *bit_off, 1272 struct ocfs2_suballoc_result *res)
1037 u16 *bits_found)
1038{ 1273{
1039 void *bitmap; 1274 void *bitmap;
1040 u16 best_offset, best_size; 1275 u16 best_offset, best_size;
@@ -1078,14 +1313,9 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
1078 } 1313 }
1079 } 1314 }
1080 1315
1081 /* XXX: I think the first clause is equivalent to the second 1316 if (best_size) {
1082 * - jlbec */ 1317 res->sr_bit_offset = best_offset;
1083 if (found == bits_wanted) { 1318 res->sr_bits = best_size;
1084 *bit_off = start - found;
1085 *bits_found = found;
1086 } else if (best_size) {
1087 *bit_off = best_offset;
1088 *bits_found = best_size;
1089 } else { 1319 } else {
1090 status = -ENOSPC; 1320 status = -ENOSPC;
1091 /* No error log here -- see the comment above 1321 /* No error log here -- see the comment above
@@ -1129,16 +1359,10 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
1129 } 1359 }
1130 1360
1131 le16_add_cpu(&bg->bg_free_bits_count, -num_bits); 1361 le16_add_cpu(&bg->bg_free_bits_count, -num_bits);
1132
1133 while(num_bits--) 1362 while(num_bits--)
1134 ocfs2_set_bit(bit_off++, bitmap); 1363 ocfs2_set_bit(bit_off++, bitmap);
1135 1364
1136 status = ocfs2_journal_dirty(handle, 1365 ocfs2_journal_dirty(handle, group_bh);
1137 group_bh);
1138 if (status < 0) {
1139 mlog_errno(status);
1140 goto bail;
1141 }
1142 1366
1143bail: 1367bail:
1144 mlog_exit(status); 1368 mlog_exit(status);
@@ -1202,12 +1426,7 @@ static int ocfs2_relink_block_group(handle_t *handle,
1202 } 1426 }
1203 1427
1204 prev_bg->bg_next_group = bg->bg_next_group; 1428 prev_bg->bg_next_group = bg->bg_next_group;
1205 1429 ocfs2_journal_dirty(handle, prev_bg_bh);
1206 status = ocfs2_journal_dirty(handle, prev_bg_bh);
1207 if (status < 0) {
1208 mlog_errno(status);
1209 goto out_rollback;
1210 }
1211 1430
1212 status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode), 1431 status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
1213 bg_bh, OCFS2_JOURNAL_ACCESS_WRITE); 1432 bg_bh, OCFS2_JOURNAL_ACCESS_WRITE);
@@ -1217,12 +1436,7 @@ static int ocfs2_relink_block_group(handle_t *handle,
1217 } 1436 }
1218 1437
1219 bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno; 1438 bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno;
1220 1439 ocfs2_journal_dirty(handle, bg_bh);
1221 status = ocfs2_journal_dirty(handle, bg_bh);
1222 if (status < 0) {
1223 mlog_errno(status);
1224 goto out_rollback;
1225 }
1226 1440
1227 status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode), 1441 status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
1228 fe_bh, OCFS2_JOURNAL_ACCESS_WRITE); 1442 fe_bh, OCFS2_JOURNAL_ACCESS_WRITE);
@@ -1232,14 +1446,8 @@ static int ocfs2_relink_block_group(handle_t *handle,
1232 } 1446 }
1233 1447
1234 fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno; 1448 fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno;
1449 ocfs2_journal_dirty(handle, fe_bh);
1235 1450
1236 status = ocfs2_journal_dirty(handle, fe_bh);
1237 if (status < 0) {
1238 mlog_errno(status);
1239 goto out_rollback;
1240 }
1241
1242 status = 0;
1243out_rollback: 1451out_rollback:
1244 if (status < 0) { 1452 if (status < 0) {
1245 fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr); 1453 fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr);
@@ -1263,14 +1471,13 @@ static int ocfs2_cluster_group_search(struct inode *inode,
1263 struct buffer_head *group_bh, 1471 struct buffer_head *group_bh,
1264 u32 bits_wanted, u32 min_bits, 1472 u32 bits_wanted, u32 min_bits,
1265 u64 max_block, 1473 u64 max_block,
1266 u16 *bit_off, u16 *bits_found) 1474 struct ocfs2_suballoc_result *res)
1267{ 1475{
1268 int search = -ENOSPC; 1476 int search = -ENOSPC;
1269 int ret; 1477 int ret;
1270 u64 blkoff; 1478 u64 blkoff;
1271 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data; 1479 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
1272 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1480 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1273 u16 tmp_off, tmp_found;
1274 unsigned int max_bits, gd_cluster_off; 1481 unsigned int max_bits, gd_cluster_off;
1275 1482
1276 BUG_ON(!ocfs2_is_cluster_bitmap(inode)); 1483 BUG_ON(!ocfs2_is_cluster_bitmap(inode));
@@ -1297,15 +1504,15 @@ static int ocfs2_cluster_group_search(struct inode *inode,
1297 1504
1298 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), 1505 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
1299 group_bh, bits_wanted, 1506 group_bh, bits_wanted,
1300 max_bits, 1507 max_bits, res);
1301 &tmp_off, &tmp_found);
1302 if (ret) 1508 if (ret)
1303 return ret; 1509 return ret;
1304 1510
1305 if (max_block) { 1511 if (max_block) {
1306 blkoff = ocfs2_clusters_to_blocks(inode->i_sb, 1512 blkoff = ocfs2_clusters_to_blocks(inode->i_sb,
1307 gd_cluster_off + 1513 gd_cluster_off +
1308 tmp_off + tmp_found); 1514 res->sr_bit_offset +
1515 res->sr_bits);
1309 mlog(0, "Checking %llu against %llu\n", 1516 mlog(0, "Checking %llu against %llu\n",
1310 (unsigned long long)blkoff, 1517 (unsigned long long)blkoff,
1311 (unsigned long long)max_block); 1518 (unsigned long long)max_block);
@@ -1317,16 +1524,14 @@ static int ocfs2_cluster_group_search(struct inode *inode,
1317 * return success, but we still want to return 1524 * return success, but we still want to return
1318 * -ENOSPC unless it found the minimum number 1525 * -ENOSPC unless it found the minimum number
1319 * of bits. */ 1526 * of bits. */
1320 if (min_bits <= tmp_found) { 1527 if (min_bits <= res->sr_bits)
1321 *bit_off = tmp_off;
1322 *bits_found = tmp_found;
1323 search = 0; /* success */ 1528 search = 0; /* success */
1324 } else if (tmp_found) { 1529 else if (res->sr_bits) {
1325 /* 1530 /*
1326 * Don't show bits which we'll be returning 1531 * Don't show bits which we'll be returning
1327 * for allocation to the local alloc bitmap. 1532 * for allocation to the local alloc bitmap.
1328 */ 1533 */
1329 ocfs2_local_alloc_seen_free_bits(osb, tmp_found); 1534 ocfs2_local_alloc_seen_free_bits(osb, res->sr_bits);
1330 } 1535 }
1331 } 1536 }
1332 1537
@@ -1337,7 +1542,7 @@ static int ocfs2_block_group_search(struct inode *inode,
1337 struct buffer_head *group_bh, 1542 struct buffer_head *group_bh,
1338 u32 bits_wanted, u32 min_bits, 1543 u32 bits_wanted, u32 min_bits,
1339 u64 max_block, 1544 u64 max_block,
1340 u16 *bit_off, u16 *bits_found) 1545 struct ocfs2_suballoc_result *res)
1341{ 1546{
1342 int ret = -ENOSPC; 1547 int ret = -ENOSPC;
1343 u64 blkoff; 1548 u64 blkoff;
@@ -1350,10 +1555,10 @@ static int ocfs2_block_group_search(struct inode *inode,
1350 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), 1555 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
1351 group_bh, bits_wanted, 1556 group_bh, bits_wanted,
1352 le16_to_cpu(bg->bg_bits), 1557 le16_to_cpu(bg->bg_bits),
1353 bit_off, bits_found); 1558 res);
1354 if (!ret && max_block) { 1559 if (!ret && max_block) {
1355 blkoff = le64_to_cpu(bg->bg_blkno) + *bit_off + 1560 blkoff = le64_to_cpu(bg->bg_blkno) +
1356 *bits_found; 1561 res->sr_bit_offset + res->sr_bits;
1357 mlog(0, "Checking %llu against %llu\n", 1562 mlog(0, "Checking %llu against %llu\n",
1358 (unsigned long long)blkoff, 1563 (unsigned long long)blkoff,
1359 (unsigned long long)max_block); 1564 (unsigned long long)max_block);
@@ -1386,33 +1591,76 @@ static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
1386 tmp_used = le32_to_cpu(di->id1.bitmap1.i_used); 1591 tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
1387 di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used); 1592 di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used);
1388 le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits); 1593 le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits);
1389 1594 ocfs2_journal_dirty(handle, di_bh);
1390 ret = ocfs2_journal_dirty(handle, di_bh);
1391 if (ret < 0)
1392 mlog_errno(ret);
1393 1595
1394out: 1596out:
1395 return ret; 1597 return ret;
1396} 1598}
1397 1599
1600static int ocfs2_bg_discontig_fix_by_rec(struct ocfs2_suballoc_result *res,
1601 struct ocfs2_extent_rec *rec,
1602 struct ocfs2_chain_list *cl)
1603{
1604 unsigned int bpc = le16_to_cpu(cl->cl_bpc);
1605 unsigned int bitoff = le32_to_cpu(rec->e_cpos) * bpc;
1606 unsigned int bitcount = le32_to_cpu(rec->e_leaf_clusters) * bpc;
1607
1608 if (res->sr_bit_offset < bitoff)
1609 return 0;
1610 if (res->sr_bit_offset >= (bitoff + bitcount))
1611 return 0;
1612 res->sr_blkno = le64_to_cpu(rec->e_blkno) +
1613 (res->sr_bit_offset - bitoff);
1614 if ((res->sr_bit_offset + res->sr_bits) > (bitoff + bitcount))
1615 res->sr_bits = (bitoff + bitcount) - res->sr_bit_offset;
1616 return 1;
1617}
1618
1619static void ocfs2_bg_discontig_fix_result(struct ocfs2_alloc_context *ac,
1620 struct ocfs2_group_desc *bg,
1621 struct ocfs2_suballoc_result *res)
1622{
1623 int i;
1624 u64 bg_blkno = res->sr_bg_blkno; /* Save off */
1625 struct ocfs2_extent_rec *rec;
1626 struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data;
1627 struct ocfs2_chain_list *cl = &di->id2.i_chain;
1628
1629 if (ocfs2_is_cluster_bitmap(ac->ac_inode)) {
1630 res->sr_blkno = 0;
1631 return;
1632 }
1633
1634 res->sr_blkno = res->sr_bg_blkno + res->sr_bit_offset;
1635 res->sr_bg_blkno = 0; /* Clear it for contig block groups */
1636 if (!ocfs2_supports_discontig_bg(OCFS2_SB(ac->ac_inode->i_sb)) ||
1637 !bg->bg_list.l_next_free_rec)
1638 return;
1639
1640 for (i = 0; i < le16_to_cpu(bg->bg_list.l_next_free_rec); i++) {
1641 rec = &bg->bg_list.l_recs[i];
1642 if (ocfs2_bg_discontig_fix_by_rec(res, rec, cl)) {
1643 res->sr_bg_blkno = bg_blkno; /* Restore */
1644 break;
1645 }
1646 }
1647}
1648
1398static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, 1649static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1399 handle_t *handle, 1650 handle_t *handle,
1400 u32 bits_wanted, 1651 u32 bits_wanted,
1401 u32 min_bits, 1652 u32 min_bits,
1402 u16 *bit_off, 1653 struct ocfs2_suballoc_result *res,
1403 unsigned int *num_bits,
1404 u64 gd_blkno,
1405 u16 *bits_left) 1654 u16 *bits_left)
1406{ 1655{
1407 int ret; 1656 int ret;
1408 u16 found;
1409 struct buffer_head *group_bh = NULL; 1657 struct buffer_head *group_bh = NULL;
1410 struct ocfs2_group_desc *gd; 1658 struct ocfs2_group_desc *gd;
1411 struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data; 1659 struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data;
1412 struct inode *alloc_inode = ac->ac_inode; 1660 struct inode *alloc_inode = ac->ac_inode;
1413 1661
1414 ret = ocfs2_read_group_descriptor(alloc_inode, di, gd_blkno, 1662 ret = ocfs2_read_group_descriptor(alloc_inode, di,
1415 &group_bh); 1663 res->sr_bg_blkno, &group_bh);
1416 if (ret < 0) { 1664 if (ret < 0) {
1417 mlog_errno(ret); 1665 mlog_errno(ret);
1418 return ret; 1666 return ret;
@@ -1420,17 +1668,18 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1420 1668
1421 gd = (struct ocfs2_group_desc *) group_bh->b_data; 1669 gd = (struct ocfs2_group_desc *) group_bh->b_data;
1422 ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, 1670 ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
1423 ac->ac_max_block, bit_off, &found); 1671 ac->ac_max_block, res);
1424 if (ret < 0) { 1672 if (ret < 0) {
1425 if (ret != -ENOSPC) 1673 if (ret != -ENOSPC)
1426 mlog_errno(ret); 1674 mlog_errno(ret);
1427 goto out; 1675 goto out;
1428 } 1676 }
1429 1677
1430 *num_bits = found; 1678 if (!ret)
1679 ocfs2_bg_discontig_fix_result(ac, gd, res);
1431 1680
1432 ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh, 1681 ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh,
1433 *num_bits, 1682 res->sr_bits,
1434 le16_to_cpu(gd->bg_chain)); 1683 le16_to_cpu(gd->bg_chain));
1435 if (ret < 0) { 1684 if (ret < 0) {
1436 mlog_errno(ret); 1685 mlog_errno(ret);
@@ -1438,7 +1687,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1438 } 1687 }
1439 1688
1440 ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh, 1689 ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh,
1441 *bit_off, *num_bits); 1690 res->sr_bit_offset, res->sr_bits);
1442 if (ret < 0) 1691 if (ret < 0)
1443 mlog_errno(ret); 1692 mlog_errno(ret);
1444 1693
@@ -1454,13 +1703,11 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1454 handle_t *handle, 1703 handle_t *handle,
1455 u32 bits_wanted, 1704 u32 bits_wanted,
1456 u32 min_bits, 1705 u32 min_bits,
1457 u16 *bit_off, 1706 struct ocfs2_suballoc_result *res,
1458 unsigned int *num_bits,
1459 u64 *bg_blkno,
1460 u16 *bits_left) 1707 u16 *bits_left)
1461{ 1708{
1462 int status; 1709 int status;
1463 u16 chain, tmp_bits; 1710 u16 chain;
1464 u32 tmp_used; 1711 u32 tmp_used;
1465 u64 next_group; 1712 u64 next_group;
1466 struct inode *alloc_inode = ac->ac_inode; 1713 struct inode *alloc_inode = ac->ac_inode;
@@ -1489,8 +1736,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1489 * the 1st group with any empty bits. */ 1736 * the 1st group with any empty bits. */
1490 while ((status = ac->ac_group_search(alloc_inode, group_bh, 1737 while ((status = ac->ac_group_search(alloc_inode, group_bh,
1491 bits_wanted, min_bits, 1738 bits_wanted, min_bits,
1492 ac->ac_max_block, bit_off, 1739 ac->ac_max_block,
1493 &tmp_bits)) == -ENOSPC) { 1740 res)) == -ENOSPC) {
1494 if (!bg->bg_next_group) 1741 if (!bg->bg_next_group)
1495 break; 1742 break;
1496 1743
@@ -1515,11 +1762,14 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1515 } 1762 }
1516 1763
1517 mlog(0, "alloc succeeds: we give %u bits from block group %llu\n", 1764 mlog(0, "alloc succeeds: we give %u bits from block group %llu\n",
1518 tmp_bits, (unsigned long long)le64_to_cpu(bg->bg_blkno)); 1765 res->sr_bits, (unsigned long long)le64_to_cpu(bg->bg_blkno));
1519 1766
1520 *num_bits = tmp_bits; 1767 res->sr_bg_blkno = le64_to_cpu(bg->bg_blkno);
1768
1769 BUG_ON(res->sr_bits == 0);
1770 if (!status)
1771 ocfs2_bg_discontig_fix_result(ac, bg, res);
1521 1772
1522 BUG_ON(*num_bits == 0);
1523 1773
1524 /* 1774 /*
1525 * Keep track of previous block descriptor read. When 1775 * Keep track of previous block descriptor read. When
@@ -1536,7 +1786,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1536 */ 1786 */
1537 if (ac->ac_allow_chain_relink && 1787 if (ac->ac_allow_chain_relink &&
1538 (prev_group_bh) && 1788 (prev_group_bh) &&
1539 (ocfs2_block_group_reasonably_empty(bg, *num_bits))) { 1789 (ocfs2_block_group_reasonably_empty(bg, res->sr_bits))) {
1540 status = ocfs2_relink_block_group(handle, alloc_inode, 1790 status = ocfs2_relink_block_group(handle, alloc_inode,
1541 ac->ac_bh, group_bh, 1791 ac->ac_bh, group_bh,
1542 prev_group_bh, chain); 1792 prev_group_bh, chain);
@@ -1558,31 +1808,24 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1558 } 1808 }
1559 1809
1560 tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); 1810 tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
1561 fe->id1.bitmap1.i_used = cpu_to_le32(*num_bits + tmp_used); 1811 fe->id1.bitmap1.i_used = cpu_to_le32(res->sr_bits + tmp_used);
1562 le32_add_cpu(&cl->cl_recs[chain].c_free, -(*num_bits)); 1812 le32_add_cpu(&cl->cl_recs[chain].c_free, -res->sr_bits);
1563 1813 ocfs2_journal_dirty(handle, ac->ac_bh);
1564 status = ocfs2_journal_dirty(handle,
1565 ac->ac_bh);
1566 if (status < 0) {
1567 mlog_errno(status);
1568 goto bail;
1569 }
1570 1814
1571 status = ocfs2_block_group_set_bits(handle, 1815 status = ocfs2_block_group_set_bits(handle,
1572 alloc_inode, 1816 alloc_inode,
1573 bg, 1817 bg,
1574 group_bh, 1818 group_bh,
1575 *bit_off, 1819 res->sr_bit_offset,
1576 *num_bits); 1820 res->sr_bits);
1577 if (status < 0) { 1821 if (status < 0) {
1578 mlog_errno(status); 1822 mlog_errno(status);
1579 goto bail; 1823 goto bail;
1580 } 1824 }
1581 1825
1582 mlog(0, "Allocated %u bits from suballocator %llu\n", *num_bits, 1826 mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits,
1583 (unsigned long long)le64_to_cpu(fe->i_blkno)); 1827 (unsigned long long)le64_to_cpu(fe->i_blkno));
1584 1828
1585 *bg_blkno = le64_to_cpu(bg->bg_blkno);
1586 *bits_left = le16_to_cpu(bg->bg_free_bits_count); 1829 *bits_left = le16_to_cpu(bg->bg_free_bits_count);
1587bail: 1830bail:
1588 brelse(group_bh); 1831 brelse(group_bh);
@@ -1593,19 +1836,15 @@ bail:
1593} 1836}
1594 1837
1595/* will give out up to bits_wanted contiguous bits. */ 1838/* will give out up to bits_wanted contiguous bits. */
1596static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, 1839static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
1597 struct ocfs2_alloc_context *ac,
1598 handle_t *handle, 1840 handle_t *handle,
1599 u32 bits_wanted, 1841 u32 bits_wanted,
1600 u32 min_bits, 1842 u32 min_bits,
1601 u16 *bit_off, 1843 struct ocfs2_suballoc_result *res)
1602 unsigned int *num_bits,
1603 u64 *bg_blkno)
1604{ 1844{
1605 int status; 1845 int status;
1606 u16 victim, i; 1846 u16 victim, i;
1607 u16 bits_left = 0; 1847 u16 bits_left = 0;
1608 u64 hint_blkno = ac->ac_last_group;
1609 struct ocfs2_chain_list *cl; 1848 struct ocfs2_chain_list *cl;
1610 struct ocfs2_dinode *fe; 1849 struct ocfs2_dinode *fe;
1611 1850
@@ -1623,7 +1862,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1623 1862
1624 if (le32_to_cpu(fe->id1.bitmap1.i_used) >= 1863 if (le32_to_cpu(fe->id1.bitmap1.i_used) >=
1625 le32_to_cpu(fe->id1.bitmap1.i_total)) { 1864 le32_to_cpu(fe->id1.bitmap1.i_total)) {
1626 ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used " 1865 ocfs2_error(ac->ac_inode->i_sb,
1866 "Chain allocator dinode %llu has %u used "
1627 "bits but only %u total.", 1867 "bits but only %u total.",
1628 (unsigned long long)le64_to_cpu(fe->i_blkno), 1868 (unsigned long long)le64_to_cpu(fe->i_blkno),
1629 le32_to_cpu(fe->id1.bitmap1.i_used), 1869 le32_to_cpu(fe->id1.bitmap1.i_used),
@@ -1632,22 +1872,16 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1632 goto bail; 1872 goto bail;
1633 } 1873 }
1634 1874
1635 if (hint_blkno) { 1875 res->sr_bg_blkno = ac->ac_last_group;
1876 if (res->sr_bg_blkno) {
1636 /* Attempt to short-circuit the usual search mechanism 1877 /* Attempt to short-circuit the usual search mechanism
1637 * by jumping straight to the most recently used 1878 * by jumping straight to the most recently used
1638 * allocation group. This helps us mantain some 1879 * allocation group. This helps us mantain some
1639 * contiguousness across allocations. */ 1880 * contiguousness across allocations. */
1640 status = ocfs2_search_one_group(ac, handle, bits_wanted, 1881 status = ocfs2_search_one_group(ac, handle, bits_wanted,
1641 min_bits, bit_off, num_bits, 1882 min_bits, res, &bits_left);
1642 hint_blkno, &bits_left); 1883 if (!status)
1643 if (!status) {
1644 /* Be careful to update *bg_blkno here as the
1645 * caller is expecting it to be filled in, and
1646 * ocfs2_search_one_group() won't do that for
1647 * us. */
1648 *bg_blkno = hint_blkno;
1649 goto set_hint; 1884 goto set_hint;
1650 }
1651 if (status < 0 && status != -ENOSPC) { 1885 if (status < 0 && status != -ENOSPC) {
1652 mlog_errno(status); 1886 mlog_errno(status);
1653 goto bail; 1887 goto bail;
@@ -1660,8 +1894,8 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1660 ac->ac_chain = victim; 1894 ac->ac_chain = victim;
1661 ac->ac_allow_chain_relink = 1; 1895 ac->ac_allow_chain_relink = 1;
1662 1896
1663 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, bit_off, 1897 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
1664 num_bits, bg_blkno, &bits_left); 1898 res, &bits_left);
1665 if (!status) 1899 if (!status)
1666 goto set_hint; 1900 goto set_hint;
1667 if (status < 0 && status != -ENOSPC) { 1901 if (status < 0 && status != -ENOSPC) {
@@ -1685,8 +1919,7 @@ static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1685 1919
1686 ac->ac_chain = i; 1920 ac->ac_chain = i;
1687 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, 1921 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
1688 bit_off, num_bits, bg_blkno, 1922 res, &bits_left);
1689 &bits_left);
1690 if (!status) 1923 if (!status)
1691 break; 1924 break;
1692 if (status < 0 && status != -ENOSPC) { 1925 if (status < 0 && status != -ENOSPC) {
@@ -1703,7 +1936,7 @@ set_hint:
1703 if (bits_left < min_bits) 1936 if (bits_left < min_bits)
1704 ac->ac_last_group = 0; 1937 ac->ac_last_group = 0;
1705 else 1938 else
1706 ac->ac_last_group = *bg_blkno; 1939 ac->ac_last_group = res->sr_bg_blkno;
1707 } 1940 }
1708 1941
1709bail: 1942bail:
@@ -1711,37 +1944,37 @@ bail:
1711 return status; 1944 return status;
1712} 1945}
1713 1946
1714int ocfs2_claim_metadata(struct ocfs2_super *osb, 1947int ocfs2_claim_metadata(handle_t *handle,
1715 handle_t *handle,
1716 struct ocfs2_alloc_context *ac, 1948 struct ocfs2_alloc_context *ac,
1717 u32 bits_wanted, 1949 u32 bits_wanted,
1950 u64 *suballoc_loc,
1718 u16 *suballoc_bit_start, 1951 u16 *suballoc_bit_start,
1719 unsigned int *num_bits, 1952 unsigned int *num_bits,
1720 u64 *blkno_start) 1953 u64 *blkno_start)
1721{ 1954{
1722 int status; 1955 int status;
1723 u64 bg_blkno; 1956 struct ocfs2_suballoc_result res = { .sr_blkno = 0, };
1724 1957
1725 BUG_ON(!ac); 1958 BUG_ON(!ac);
1726 BUG_ON(ac->ac_bits_wanted < (ac->ac_bits_given + bits_wanted)); 1959 BUG_ON(ac->ac_bits_wanted < (ac->ac_bits_given + bits_wanted));
1727 BUG_ON(ac->ac_which != OCFS2_AC_USE_META); 1960 BUG_ON(ac->ac_which != OCFS2_AC_USE_META);
1728 1961
1729 status = ocfs2_claim_suballoc_bits(osb, 1962 status = ocfs2_claim_suballoc_bits(ac,
1730 ac,
1731 handle, 1963 handle,
1732 bits_wanted, 1964 bits_wanted,
1733 1, 1965 1,
1734 suballoc_bit_start, 1966 &res);
1735 num_bits,
1736 &bg_blkno);
1737 if (status < 0) { 1967 if (status < 0) {
1738 mlog_errno(status); 1968 mlog_errno(status);
1739 goto bail; 1969 goto bail;
1740 } 1970 }
1741 atomic_inc(&osb->alloc_stats.bg_allocs); 1971 atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs);
1742 1972
1743 *blkno_start = bg_blkno + (u64) *suballoc_bit_start; 1973 *suballoc_loc = res.sr_bg_blkno;
1744 ac->ac_bits_given += (*num_bits); 1974 *suballoc_bit_start = res.sr_bit_offset;
1975 *blkno_start = res.sr_blkno;
1976 ac->ac_bits_given += res.sr_bits;
1977 *num_bits = res.sr_bits;
1745 status = 0; 1978 status = 0;
1746bail: 1979bail:
1747 mlog_exit(status); 1980 mlog_exit(status);
@@ -1749,10 +1982,10 @@ bail:
1749} 1982}
1750 1983
1751static void ocfs2_init_inode_ac_group(struct inode *dir, 1984static void ocfs2_init_inode_ac_group(struct inode *dir,
1752 struct buffer_head *parent_fe_bh, 1985 struct buffer_head *parent_di_bh,
1753 struct ocfs2_alloc_context *ac) 1986 struct ocfs2_alloc_context *ac)
1754{ 1987{
1755 struct ocfs2_dinode *fe = (struct ocfs2_dinode *)parent_fe_bh->b_data; 1988 struct ocfs2_dinode *di = (struct ocfs2_dinode *)parent_di_bh->b_data;
1756 /* 1989 /*
1757 * Try to allocate inodes from some specific group. 1990 * Try to allocate inodes from some specific group.
1758 * 1991 *
@@ -1766,10 +1999,14 @@ static void ocfs2_init_inode_ac_group(struct inode *dir,
1766 if (OCFS2_I(dir)->ip_last_used_group && 1999 if (OCFS2_I(dir)->ip_last_used_group &&
1767 OCFS2_I(dir)->ip_last_used_slot == ac->ac_alloc_slot) 2000 OCFS2_I(dir)->ip_last_used_slot == ac->ac_alloc_slot)
1768 ac->ac_last_group = OCFS2_I(dir)->ip_last_used_group; 2001 ac->ac_last_group = OCFS2_I(dir)->ip_last_used_group;
1769 else if (le16_to_cpu(fe->i_suballoc_slot) == ac->ac_alloc_slot) 2002 else if (le16_to_cpu(di->i_suballoc_slot) == ac->ac_alloc_slot) {
1770 ac->ac_last_group = ocfs2_which_suballoc_group( 2003 if (di->i_suballoc_loc)
1771 le64_to_cpu(fe->i_blkno), 2004 ac->ac_last_group = le64_to_cpu(di->i_suballoc_loc);
1772 le16_to_cpu(fe->i_suballoc_bit)); 2005 else
2006 ac->ac_last_group = ocfs2_which_suballoc_group(
2007 le64_to_cpu(di->i_blkno),
2008 le16_to_cpu(di->i_suballoc_bit));
2009 }
1773} 2010}
1774 2011
1775static inline void ocfs2_save_inode_ac_group(struct inode *dir, 2012static inline void ocfs2_save_inode_ac_group(struct inode *dir,
@@ -1779,17 +2016,16 @@ static inline void ocfs2_save_inode_ac_group(struct inode *dir,
1779 OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot; 2016 OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot;
1780} 2017}
1781 2018
1782int ocfs2_claim_new_inode(struct ocfs2_super *osb, 2019int ocfs2_claim_new_inode(handle_t *handle,
1783 handle_t *handle,
1784 struct inode *dir, 2020 struct inode *dir,
1785 struct buffer_head *parent_fe_bh, 2021 struct buffer_head *parent_fe_bh,
1786 struct ocfs2_alloc_context *ac, 2022 struct ocfs2_alloc_context *ac,
2023 u64 *suballoc_loc,
1787 u16 *suballoc_bit, 2024 u16 *suballoc_bit,
1788 u64 *fe_blkno) 2025 u64 *fe_blkno)
1789{ 2026{
1790 int status; 2027 int status;
1791 unsigned int num_bits; 2028 struct ocfs2_suballoc_result res;
1792 u64 bg_blkno;
1793 2029
1794 mlog_entry_void(); 2030 mlog_entry_void();
1795 2031
@@ -1800,23 +2036,22 @@ int ocfs2_claim_new_inode(struct ocfs2_super *osb,
1800 2036
1801 ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac); 2037 ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac);
1802 2038
1803 status = ocfs2_claim_suballoc_bits(osb, 2039 status = ocfs2_claim_suballoc_bits(ac,
1804 ac,
1805 handle, 2040 handle,
1806 1, 2041 1,
1807 1, 2042 1,
1808 suballoc_bit, 2043 &res);
1809 &num_bits,
1810 &bg_blkno);
1811 if (status < 0) { 2044 if (status < 0) {
1812 mlog_errno(status); 2045 mlog_errno(status);
1813 goto bail; 2046 goto bail;
1814 } 2047 }
1815 atomic_inc(&osb->alloc_stats.bg_allocs); 2048 atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs);
1816 2049
1817 BUG_ON(num_bits != 1); 2050 BUG_ON(res.sr_bits != 1);
1818 2051
1819 *fe_blkno = bg_blkno + (u64) (*suballoc_bit); 2052 *suballoc_loc = res.sr_bg_blkno;
2053 *suballoc_bit = res.sr_bit_offset;
2054 *fe_blkno = res.sr_blkno;
1820 ac->ac_bits_given++; 2055 ac->ac_bits_given++;
1821 ocfs2_save_inode_ac_group(dir, ac); 2056 ocfs2_save_inode_ac_group(dir, ac);
1822 status = 0; 2057 status = 0;
@@ -1886,8 +2121,7 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode,
1886 * contig. allocation, set to '1' to indicate we can deal with extents 2121 * contig. allocation, set to '1' to indicate we can deal with extents
1887 * of any size. 2122 * of any size.
1888 */ 2123 */
1889int __ocfs2_claim_clusters(struct ocfs2_super *osb, 2124int __ocfs2_claim_clusters(handle_t *handle,
1890 handle_t *handle,
1891 struct ocfs2_alloc_context *ac, 2125 struct ocfs2_alloc_context *ac,
1892 u32 min_clusters, 2126 u32 min_clusters,
1893 u32 max_clusters, 2127 u32 max_clusters,
@@ -1896,8 +2130,8 @@ int __ocfs2_claim_clusters(struct ocfs2_super *osb,
1896{ 2130{
1897 int status; 2131 int status;
1898 unsigned int bits_wanted = max_clusters; 2132 unsigned int bits_wanted = max_clusters;
1899 u64 bg_blkno = 0; 2133 struct ocfs2_suballoc_result res = { .sr_blkno = 0, };
1900 u16 bg_bit_off; 2134 struct ocfs2_super *osb = OCFS2_SB(ac->ac_inode->i_sb);
1901 2135
1902 mlog_entry_void(); 2136 mlog_entry_void();
1903 2137
@@ -1907,6 +2141,8 @@ int __ocfs2_claim_clusters(struct ocfs2_super *osb,
1907 && ac->ac_which != OCFS2_AC_USE_MAIN); 2141 && ac->ac_which != OCFS2_AC_USE_MAIN);
1908 2142
1909 if (ac->ac_which == OCFS2_AC_USE_LOCAL) { 2143 if (ac->ac_which == OCFS2_AC_USE_LOCAL) {
2144 WARN_ON(min_clusters > 1);
2145
1910 status = ocfs2_claim_local_alloc_bits(osb, 2146 status = ocfs2_claim_local_alloc_bits(osb,
1911 handle, 2147 handle,
1912 ac, 2148 ac,
@@ -1929,20 +2165,19 @@ int __ocfs2_claim_clusters(struct ocfs2_super *osb,
1929 if (bits_wanted > (osb->bitmap_cpg - 1)) 2165 if (bits_wanted > (osb->bitmap_cpg - 1))
1930 bits_wanted = osb->bitmap_cpg - 1; 2166 bits_wanted = osb->bitmap_cpg - 1;
1931 2167
1932 status = ocfs2_claim_suballoc_bits(osb, 2168 status = ocfs2_claim_suballoc_bits(ac,
1933 ac,
1934 handle, 2169 handle,
1935 bits_wanted, 2170 bits_wanted,
1936 min_clusters, 2171 min_clusters,
1937 &bg_bit_off, 2172 &res);
1938 num_clusters,
1939 &bg_blkno);
1940 if (!status) { 2173 if (!status) {
2174 BUG_ON(res.sr_blkno); /* cluster alloc can't set */
1941 *cluster_start = 2175 *cluster_start =
1942 ocfs2_desc_bitmap_to_cluster_off(ac->ac_inode, 2176 ocfs2_desc_bitmap_to_cluster_off(ac->ac_inode,
1943 bg_blkno, 2177 res.sr_bg_blkno,
1944 bg_bit_off); 2178 res.sr_bit_offset);
1945 atomic_inc(&osb->alloc_stats.bitmap_data); 2179 atomic_inc(&osb->alloc_stats.bitmap_data);
2180 *num_clusters = res.sr_bits;
1946 } 2181 }
1947 } 2182 }
1948 if (status < 0) { 2183 if (status < 0) {
@@ -1958,8 +2193,7 @@ bail:
1958 return status; 2193 return status;
1959} 2194}
1960 2195
1961int ocfs2_claim_clusters(struct ocfs2_super *osb, 2196int ocfs2_claim_clusters(handle_t *handle,
1962 handle_t *handle,
1963 struct ocfs2_alloc_context *ac, 2197 struct ocfs2_alloc_context *ac,
1964 u32 min_clusters, 2198 u32 min_clusters,
1965 u32 *cluster_start, 2199 u32 *cluster_start,
@@ -1967,7 +2201,7 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
1967{ 2201{
1968 unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given; 2202 unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
1969 2203
1970 return __ocfs2_claim_clusters(osb, handle, ac, min_clusters, 2204 return __ocfs2_claim_clusters(handle, ac, min_clusters,
1971 bits_wanted, cluster_start, num_clusters); 2205 bits_wanted, cluster_start, num_clusters);
1972} 2206}
1973 2207
@@ -2023,9 +2257,7 @@ static int ocfs2_block_group_clear_bits(handle_t *handle,
2023 if (undo_fn) 2257 if (undo_fn)
2024 jbd_unlock_bh_state(group_bh); 2258 jbd_unlock_bh_state(group_bh);
2025 2259
2026 status = ocfs2_journal_dirty(handle, group_bh); 2260 ocfs2_journal_dirty(handle, group_bh);
2027 if (status < 0)
2028 mlog_errno(status);
2029bail: 2261bail:
2030 return status; 2262 return status;
2031} 2263}
@@ -2092,12 +2324,7 @@ static int _ocfs2_free_suballoc_bits(handle_t *handle,
2092 count); 2324 count);
2093 tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used); 2325 tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
2094 fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count); 2326 fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count);
2095 2327 ocfs2_journal_dirty(handle, alloc_bh);
2096 status = ocfs2_journal_dirty(handle, alloc_bh);
2097 if (status < 0) {
2098 mlog_errno(status);
2099 goto bail;
2100 }
2101 2328
2102bail: 2329bail:
2103 brelse(group_bh); 2330 brelse(group_bh);
@@ -2126,6 +2353,8 @@ int ocfs2_free_dinode(handle_t *handle,
2126 u16 bit = le16_to_cpu(di->i_suballoc_bit); 2353 u16 bit = le16_to_cpu(di->i_suballoc_bit);
2127 u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 2354 u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
2128 2355
2356 if (di->i_suballoc_loc)
2357 bg_blkno = le64_to_cpu(di->i_suballoc_loc);
2129 return ocfs2_free_suballoc_bits(handle, inode_alloc_inode, 2358 return ocfs2_free_suballoc_bits(handle, inode_alloc_inode,
2130 inode_alloc_bh, bit, bg_blkno, 1); 2359 inode_alloc_bh, bit, bg_blkno, 1);
2131} 2360}
@@ -2395,7 +2624,7 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
2395 struct buffer_head *alloc_bh, u64 blkno, 2624 struct buffer_head *alloc_bh, u64 blkno,
2396 u16 bit, int *res) 2625 u16 bit, int *res)
2397{ 2626{
2398 struct ocfs2_dinode *alloc_fe; 2627 struct ocfs2_dinode *alloc_di;
2399 struct ocfs2_group_desc *group; 2628 struct ocfs2_group_desc *group;
2400 struct buffer_head *group_bh = NULL; 2629 struct buffer_head *group_bh = NULL;
2401 u64 bg_blkno; 2630 u64 bg_blkno;
@@ -2404,17 +2633,20 @@ static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
2404 mlog_entry("blkno: %llu bit: %u\n", (unsigned long long)blkno, 2633 mlog_entry("blkno: %llu bit: %u\n", (unsigned long long)blkno,
2405 (unsigned int)bit); 2634 (unsigned int)bit);
2406 2635
2407 alloc_fe = (struct ocfs2_dinode *)alloc_bh->b_data; 2636 alloc_di = (struct ocfs2_dinode *)alloc_bh->b_data;
2408 if ((bit + 1) > ocfs2_bits_per_group(&alloc_fe->id2.i_chain)) { 2637 if ((bit + 1) > ocfs2_bits_per_group(&alloc_di->id2.i_chain)) {
2409 mlog(ML_ERROR, "suballoc bit %u out of range of %u\n", 2638 mlog(ML_ERROR, "suballoc bit %u out of range of %u\n",
2410 (unsigned int)bit, 2639 (unsigned int)bit,
2411 ocfs2_bits_per_group(&alloc_fe->id2.i_chain)); 2640 ocfs2_bits_per_group(&alloc_di->id2.i_chain));
2412 status = -EINVAL; 2641 status = -EINVAL;
2413 goto bail; 2642 goto bail;
2414 } 2643 }
2415 2644
2416 bg_blkno = ocfs2_which_suballoc_group(blkno, bit); 2645 if (alloc_di->i_suballoc_loc)
2417 status = ocfs2_read_group_descriptor(suballoc, alloc_fe, bg_blkno, 2646 bg_blkno = le64_to_cpu(alloc_di->i_suballoc_loc);
2647 else
2648 bg_blkno = ocfs2_which_suballoc_group(blkno, bit);
2649 status = ocfs2_read_group_descriptor(suballoc, alloc_di, bg_blkno,
2418 &group_bh); 2650 &group_bh);
2419 if (status < 0) { 2651 if (status < 0) {
2420 mlog(ML_ERROR, "read group %llu failed %d\n", 2652 mlog(ML_ERROR, "read group %llu failed %d\n",
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index e0f46df357e6..a017dd3ee7d9 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -26,13 +26,14 @@
26#ifndef _CHAINALLOC_H_ 26#ifndef _CHAINALLOC_H_
27#define _CHAINALLOC_H_ 27#define _CHAINALLOC_H_
28 28
29struct ocfs2_suballoc_result;
29typedef int (group_search_t)(struct inode *, 30typedef int (group_search_t)(struct inode *,
30 struct buffer_head *, 31 struct buffer_head *,
31 u32, /* bits_wanted */ 32 u32, /* bits_wanted */
32 u32, /* min_bits */ 33 u32, /* min_bits */
33 u64, /* max_block */ 34 u64, /* max_block */
34 u16 *, /* *bit_off */ 35 struct ocfs2_suballoc_result *);
35 u16 *); /* *bits_found */ 36 /* found bits */
36 37
37struct ocfs2_alloc_context { 38struct ocfs2_alloc_context {
38 struct inode *ac_inode; /* which bitmap are we allocating from? */ 39 struct inode *ac_inode; /* which bitmap are we allocating from? */
@@ -54,6 +55,8 @@ struct ocfs2_alloc_context {
54 u64 ac_last_group; 55 u64 ac_last_group;
55 u64 ac_max_block; /* Highest block number to allocate. 0 is 56 u64 ac_max_block; /* Highest block number to allocate. 0 is
56 is the same as ~0 - unlimited */ 57 is the same as ~0 - unlimited */
58
59 struct ocfs2_alloc_reservation *ac_resv;
57}; 60};
58 61
59void ocfs2_init_steal_slots(struct ocfs2_super *osb); 62void ocfs2_init_steal_slots(struct ocfs2_super *osb);
@@ -80,22 +83,21 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb,
80 u32 bits_wanted, 83 u32 bits_wanted,
81 struct ocfs2_alloc_context **ac); 84 struct ocfs2_alloc_context **ac);
82 85
83int ocfs2_claim_metadata(struct ocfs2_super *osb, 86int ocfs2_claim_metadata(handle_t *handle,
84 handle_t *handle,
85 struct ocfs2_alloc_context *ac, 87 struct ocfs2_alloc_context *ac,
86 u32 bits_wanted, 88 u32 bits_wanted,
89 u64 *suballoc_loc,
87 u16 *suballoc_bit_start, 90 u16 *suballoc_bit_start,
88 u32 *num_bits, 91 u32 *num_bits,
89 u64 *blkno_start); 92 u64 *blkno_start);
90int ocfs2_claim_new_inode(struct ocfs2_super *osb, 93int ocfs2_claim_new_inode(handle_t *handle,
91 handle_t *handle,
92 struct inode *dir, 94 struct inode *dir,
93 struct buffer_head *parent_fe_bh, 95 struct buffer_head *parent_fe_bh,
94 struct ocfs2_alloc_context *ac, 96 struct ocfs2_alloc_context *ac,
97 u64 *suballoc_loc,
95 u16 *suballoc_bit, 98 u16 *suballoc_bit,
96 u64 *fe_blkno); 99 u64 *fe_blkno);
97int ocfs2_claim_clusters(struct ocfs2_super *osb, 100int ocfs2_claim_clusters(handle_t *handle,
98 handle_t *handle,
99 struct ocfs2_alloc_context *ac, 101 struct ocfs2_alloc_context *ac,
100 u32 min_clusters, 102 u32 min_clusters,
101 u32 *cluster_start, 103 u32 *cluster_start,
@@ -104,8 +106,7 @@ int ocfs2_claim_clusters(struct ocfs2_super *osb,
104 * Use this variant of ocfs2_claim_clusters to specify a maxiumum 106 * Use this variant of ocfs2_claim_clusters to specify a maxiumum
105 * number of clusters smaller than the allocation reserved. 107 * number of clusters smaller than the allocation reserved.
106 */ 108 */
107int __ocfs2_claim_clusters(struct ocfs2_super *osb, 109int __ocfs2_claim_clusters(handle_t *handle,
108 handle_t *handle,
109 struct ocfs2_alloc_context *ac, 110 struct ocfs2_alloc_context *ac,
110 u32 min_clusters, 111 u32 min_clusters,
111 u32 max_clusters, 112 u32 max_clusters,
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index dee03197a494..1c2c39f6f0b6 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -94,7 +94,9 @@ struct mount_options
94 unsigned long mount_opt; 94 unsigned long mount_opt;
95 unsigned int atime_quantum; 95 unsigned int atime_quantum;
96 signed short slot; 96 signed short slot;
97 unsigned int localalloc_opt; 97 int localalloc_opt;
98 unsigned int resv_level;
99 int dir_resv_level;
98 char cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; 100 char cluster_stack[OCFS2_STACK_LABEL_LEN + 1];
99}; 101};
100 102
@@ -176,6 +178,8 @@ enum {
176 Opt_noacl, 178 Opt_noacl,
177 Opt_usrquota, 179 Opt_usrquota,
178 Opt_grpquota, 180 Opt_grpquota,
181 Opt_resv_level,
182 Opt_dir_resv_level,
179 Opt_err, 183 Opt_err,
180}; 184};
181 185
@@ -202,6 +206,8 @@ static const match_table_t tokens = {
202 {Opt_noacl, "noacl"}, 206 {Opt_noacl, "noacl"},
203 {Opt_usrquota, "usrquota"}, 207 {Opt_usrquota, "usrquota"},
204 {Opt_grpquota, "grpquota"}, 208 {Opt_grpquota, "grpquota"},
209 {Opt_resv_level, "resv_level=%u"},
210 {Opt_dir_resv_level, "dir_resv_level=%u"},
205 {Opt_err, NULL} 211 {Opt_err, NULL}
206}; 212};
207 213
@@ -1028,8 +1034,14 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
1028 osb->s_atime_quantum = parsed_options.atime_quantum; 1034 osb->s_atime_quantum = parsed_options.atime_quantum;
1029 osb->preferred_slot = parsed_options.slot; 1035 osb->preferred_slot = parsed_options.slot;
1030 osb->osb_commit_interval = parsed_options.commit_interval; 1036 osb->osb_commit_interval = parsed_options.commit_interval;
1031 osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt); 1037
1032 osb->local_alloc_bits = osb->local_alloc_default_bits; 1038 ocfs2_la_set_sizes(osb, parsed_options.localalloc_opt);
1039 osb->osb_resv_level = parsed_options.resv_level;
1040 osb->osb_dir_resv_level = parsed_options.resv_level;
1041 if (parsed_options.dir_resv_level == -1)
1042 osb->osb_dir_resv_level = parsed_options.resv_level;
1043 else
1044 osb->osb_dir_resv_level = parsed_options.dir_resv_level;
1033 1045
1034 status = ocfs2_verify_userspace_stack(osb, &parsed_options); 1046 status = ocfs2_verify_userspace_stack(osb, &parsed_options);
1035 if (status) 1047 if (status)
@@ -1285,11 +1297,13 @@ static int ocfs2_parse_options(struct super_block *sb,
1285 options ? options : "(none)"); 1297 options ? options : "(none)");
1286 1298
1287 mopt->commit_interval = 0; 1299 mopt->commit_interval = 0;
1288 mopt->mount_opt = 0; 1300 mopt->mount_opt = OCFS2_MOUNT_NOINTR;
1289 mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM; 1301 mopt->atime_quantum = OCFS2_DEFAULT_ATIME_QUANTUM;
1290 mopt->slot = OCFS2_INVALID_SLOT; 1302 mopt->slot = OCFS2_INVALID_SLOT;
1291 mopt->localalloc_opt = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; 1303 mopt->localalloc_opt = -1;
1292 mopt->cluster_stack[0] = '\0'; 1304 mopt->cluster_stack[0] = '\0';
1305 mopt->resv_level = OCFS2_DEFAULT_RESV_LEVEL;
1306 mopt->dir_resv_level = -1;
1293 1307
1294 if (!options) { 1308 if (!options) {
1295 status = 1; 1309 status = 1;
@@ -1380,7 +1394,7 @@ static int ocfs2_parse_options(struct super_block *sb,
1380 status = 0; 1394 status = 0;
1381 goto bail; 1395 goto bail;
1382 } 1396 }
1383 if (option >= 0 && (option <= ocfs2_local_alloc_size(sb) * 8)) 1397 if (option >= 0)
1384 mopt->localalloc_opt = option; 1398 mopt->localalloc_opt = option;
1385 break; 1399 break;
1386 case Opt_localflocks: 1400 case Opt_localflocks:
@@ -1433,6 +1447,28 @@ static int ocfs2_parse_options(struct super_block *sb,
1433 mopt->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL; 1447 mopt->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL;
1434 mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL; 1448 mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
1435 break; 1449 break;
1450 case Opt_resv_level:
1451 if (is_remount)
1452 break;
1453 if (match_int(&args[0], &option)) {
1454 status = 0;
1455 goto bail;
1456 }
1457 if (option >= OCFS2_MIN_RESV_LEVEL &&
1458 option < OCFS2_MAX_RESV_LEVEL)
1459 mopt->resv_level = option;
1460 break;
1461 case Opt_dir_resv_level:
1462 if (is_remount)
1463 break;
1464 if (match_int(&args[0], &option)) {
1465 status = 0;
1466 goto bail;
1467 }
1468 if (option >= OCFS2_MIN_RESV_LEVEL &&
1469 option < OCFS2_MAX_RESV_LEVEL)
1470 mopt->dir_resv_level = option;
1471 break;
1436 default: 1472 default:
1437 mlog(ML_ERROR, 1473 mlog(ML_ERROR,
1438 "Unrecognized mount option \"%s\" " 1474 "Unrecognized mount option \"%s\" "
@@ -1487,7 +1523,7 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1487 (unsigned) (osb->osb_commit_interval / HZ)); 1523 (unsigned) (osb->osb_commit_interval / HZ));
1488 1524
1489 local_alloc_megs = osb->local_alloc_bits >> (20 - osb->s_clustersize_bits); 1525 local_alloc_megs = osb->local_alloc_bits >> (20 - osb->s_clustersize_bits);
1490 if (local_alloc_megs != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE) 1526 if (local_alloc_megs != ocfs2_la_default_mb(osb))
1491 seq_printf(s, ",localalloc=%d", local_alloc_megs); 1527 seq_printf(s, ",localalloc=%d", local_alloc_megs);
1492 1528
1493 if (opts & OCFS2_MOUNT_LOCALFLOCKS) 1529 if (opts & OCFS2_MOUNT_LOCALFLOCKS)
@@ -1514,6 +1550,12 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
1514 else 1550 else
1515 seq_printf(s, ",noacl"); 1551 seq_printf(s, ",noacl");
1516 1552
1553 if (osb->osb_resv_level != OCFS2_DEFAULT_RESV_LEVEL)
1554 seq_printf(s, ",resv_level=%d", osb->osb_resv_level);
1555
1556 if (osb->osb_dir_resv_level != osb->osb_resv_level)
1557 seq_printf(s, ",dir_resv_level=%d", osb->osb_resv_level);
1558
1517 return 0; 1559 return 0;
1518} 1560}
1519 1561
@@ -1688,6 +1730,8 @@ static void ocfs2_inode_init_once(void *data)
1688 oi->ip_blkno = 0ULL; 1730 oi->ip_blkno = 0ULL;
1689 oi->ip_clusters = 0; 1731 oi->ip_clusters = 0;
1690 1732
1733 ocfs2_resv_init_once(&oi->ip_la_data_resv);
1734
1691 ocfs2_lock_res_init_once(&oi->ip_rw_lockres); 1735 ocfs2_lock_res_init_once(&oi->ip_rw_lockres);
1692 ocfs2_lock_res_init_once(&oi->ip_inode_lockres); 1736 ocfs2_lock_res_init_once(&oi->ip_inode_lockres);
1693 ocfs2_lock_res_init_once(&oi->ip_open_lockres); 1737 ocfs2_lock_res_init_once(&oi->ip_open_lockres);
@@ -2042,6 +2086,12 @@ static int ocfs2_initialize_super(struct super_block *sb,
2042 2086
2043 init_waitqueue_head(&osb->osb_mount_event); 2087 init_waitqueue_head(&osb->osb_mount_event);
2044 2088
2089 status = ocfs2_resmap_init(osb, &osb->osb_la_resmap);
2090 if (status) {
2091 mlog_errno(status);
2092 goto bail;
2093 }
2094
2045 osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL); 2095 osb->vol_label = kmalloc(OCFS2_MAX_VOL_LABEL_LEN, GFP_KERNEL);
2046 if (!osb->vol_label) { 2096 if (!osb->vol_label) {
2047 mlog(ML_ERROR, "unable to alloc vol label\n"); 2097 mlog(ML_ERROR, "unable to alloc vol label\n");
@@ -2224,9 +2274,11 @@ static int ocfs2_initialize_super(struct super_block *sb,
2224 } 2274 }
2225 2275
2226 osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno; 2276 osb->bitmap_blkno = OCFS2_I(inode)->ip_blkno;
2277 osb->osb_clusters_at_boot = OCFS2_I(inode)->ip_clusters;
2227 iput(inode); 2278 iput(inode);
2228 2279
2229 osb->bitmap_cpg = ocfs2_group_bitmap_size(sb) * 8; 2280 osb->bitmap_cpg = ocfs2_group_bitmap_size(sb, 0,
2281 osb->s_feature_incompat) * 8;
2230 2282
2231 status = ocfs2_init_slot_info(osb); 2283 status = ocfs2_init_slot_info(osb);
2232 if (status < 0) { 2284 if (status < 0) {
@@ -2509,5 +2561,25 @@ void __ocfs2_abort(struct super_block* sb,
2509 ocfs2_handle_error(sb); 2561 ocfs2_handle_error(sb);
2510} 2562}
2511 2563
2564/*
2565 * Void signal blockers, because in-kernel sigprocmask() only fails
2566 * when SIG_* is wrong.
2567 */
2568void ocfs2_block_signals(sigset_t *oldset)
2569{
2570 int rc;
2571 sigset_t blocked;
2572
2573 sigfillset(&blocked);
2574 rc = sigprocmask(SIG_BLOCK, &blocked, oldset);
2575 BUG_ON(rc);
2576}
2577
2578void ocfs2_unblock_signals(sigset_t *oldset)
2579{
2580 int rc = sigprocmask(SIG_SETMASK, oldset, NULL);
2581 BUG_ON(rc);
2582}
2583
2512module_init(ocfs2_init); 2584module_init(ocfs2_init);
2513module_exit(ocfs2_exit); 2585module_exit(ocfs2_exit);
diff --git a/fs/ocfs2/super.h b/fs/ocfs2/super.h
index 783f5270f2a1..40c7de084c10 100644
--- a/fs/ocfs2/super.h
+++ b/fs/ocfs2/super.h
@@ -45,4 +45,11 @@ void __ocfs2_abort(struct super_block *sb,
45 45
46#define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args) 46#define ocfs2_abort(sb, fmt, args...) __ocfs2_abort(sb, __PRETTY_FUNCTION__, fmt, ##args)
47 47
48/*
49 * Void signal blockers, because in-kernel sigprocmask() only fails
50 * when SIG_* is wrong.
51 */
52void ocfs2_block_signals(sigset_t *oldset);
53void ocfs2_unblock_signals(sigset_t *oldset);
54
48#endif /* OCFS2_SUPER_H */ 55#endif /* OCFS2_SUPER_H */
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 3e7773089b96..98ee6c44102d 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -79,6 +79,7 @@ struct ocfs2_xattr_set_ctxt {
79 struct ocfs2_alloc_context *meta_ac; 79 struct ocfs2_alloc_context *meta_ac;
80 struct ocfs2_alloc_context *data_ac; 80 struct ocfs2_alloc_context *data_ac;
81 struct ocfs2_cached_dealloc_ctxt dealloc; 81 struct ocfs2_cached_dealloc_ctxt dealloc;
82 int set_abort;
82}; 83};
83 84
84#define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) 85#define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root))
@@ -739,11 +740,7 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
739 goto leave; 740 goto leave;
740 } 741 }
741 742
742 status = ocfs2_journal_dirty(handle, vb->vb_bh); 743 ocfs2_journal_dirty(handle, vb->vb_bh);
743 if (status < 0) {
744 mlog_errno(status);
745 goto leave;
746 }
747 744
748 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters; 745 clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
749 746
@@ -786,12 +783,7 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
786 } 783 }
787 784
788 le32_add_cpu(&vb->vb_xv->xr_clusters, -len); 785 le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
789 786 ocfs2_journal_dirty(handle, vb->vb_bh);
790 ret = ocfs2_journal_dirty(handle, vb->vb_bh);
791 if (ret) {
792 mlog_errno(ret);
793 goto out;
794 }
795 787
796 if (ext_flags & OCFS2_EXT_REFCOUNTED) 788 if (ext_flags & OCFS2_EXT_REFCOUNTED)
797 ret = ocfs2_decrease_refcount(inode, handle, 789 ret = ocfs2_decrease_refcount(inode, handle,
@@ -1374,11 +1366,7 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1374 memset(bh->b_data + cp_len, 0, 1366 memset(bh->b_data + cp_len, 0,
1375 blocksize - cp_len); 1367 blocksize - cp_len);
1376 1368
1377 ret = ocfs2_journal_dirty(handle, bh); 1369 ocfs2_journal_dirty(handle, bh);
1378 if (ret < 0) {
1379 mlog_errno(ret);
1380 goto out;
1381 }
1382 brelse(bh); 1370 brelse(bh);
1383 bh = NULL; 1371 bh = NULL;
1384 1372
@@ -2148,15 +2136,19 @@ alloc_value:
2148 orig_clusters = ocfs2_xa_value_clusters(loc); 2136 orig_clusters = ocfs2_xa_value_clusters(loc);
2149 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt); 2137 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt);
2150 if (rc < 0) { 2138 if (rc < 0) {
2151 /* 2139 ctxt->set_abort = 1;
2152 * If we tried to grow an existing external value,
2153 * ocfs2_xa_cleanuP-value_truncate() is going to
2154 * let it stand. We have to restore its original
2155 * value size.
2156 */
2157 loc->xl_entry->xe_value_size = orig_value_size;
2158 ocfs2_xa_cleanup_value_truncate(loc, "growing", 2140 ocfs2_xa_cleanup_value_truncate(loc, "growing",
2159 orig_clusters); 2141 orig_clusters);
2142 /*
2143 * If we were growing an existing value,
2144 * ocfs2_xa_cleanup_value_truncate() won't remove
2145 * the entry. We need to restore the original value
2146 * size.
2147 */
2148 if (loc->xl_entry) {
2149 BUG_ON(!orig_value_size);
2150 loc->xl_entry->xe_value_size = orig_value_size;
2151 }
2160 mlog_errno(rc); 2152 mlog_errno(rc);
2161 } 2153 }
2162 } 2154 }
@@ -2479,7 +2471,10 @@ static int ocfs2_xattr_free_block(struct inode *inode,
2479 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 2471 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2480 blk = le64_to_cpu(xb->xb_blkno); 2472 blk = le64_to_cpu(xb->xb_blkno);
2481 bit = le16_to_cpu(xb->xb_suballoc_bit); 2473 bit = le16_to_cpu(xb->xb_suballoc_bit);
2482 bg_blkno = ocfs2_which_suballoc_group(blk, bit); 2474 if (xb->xb_suballoc_loc)
2475 bg_blkno = le64_to_cpu(xb->xb_suballoc_loc);
2476 else
2477 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
2483 2478
2484 xb_alloc_inode = ocfs2_get_system_file_inode(osb, 2479 xb_alloc_inode = ocfs2_get_system_file_inode(osb,
2485 EXTENT_ALLOC_SYSTEM_INODE, 2480 EXTENT_ALLOC_SYSTEM_INODE,
@@ -2594,9 +2589,7 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
2594 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2589 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2595 spin_unlock(&oi->ip_lock); 2590 spin_unlock(&oi->ip_lock);
2596 2591
2597 ret = ocfs2_journal_dirty(handle, di_bh); 2592 ocfs2_journal_dirty(handle, di_bh);
2598 if (ret < 0)
2599 mlog_errno(ret);
2600out_commit: 2593out_commit:
2601 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 2594 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
2602out: 2595out:
@@ -2724,9 +2717,7 @@ static int ocfs2_xattr_ibody_init(struct inode *inode,
2724 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); 2717 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2725 spin_unlock(&oi->ip_lock); 2718 spin_unlock(&oi->ip_lock);
2726 2719
2727 ret = ocfs2_journal_dirty(ctxt->handle, di_bh); 2720 ocfs2_journal_dirty(ctxt->handle, di_bh);
2728 if (ret < 0)
2729 mlog_errno(ret);
2730 2721
2731out: 2722out:
2732 return ret; 2723 return ret;
@@ -2846,9 +2837,8 @@ static int ocfs2_create_xattr_block(struct inode *inode,
2846 int ret; 2837 int ret;
2847 u16 suballoc_bit_start; 2838 u16 suballoc_bit_start;
2848 u32 num_got; 2839 u32 num_got;
2849 u64 first_blkno; 2840 u64 suballoc_loc, first_blkno;
2850 struct ocfs2_dinode *di = (struct ocfs2_dinode *)inode_bh->b_data; 2841 struct ocfs2_dinode *di = (struct ocfs2_dinode *)inode_bh->b_data;
2851 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2852 struct buffer_head *new_bh = NULL; 2842 struct buffer_head *new_bh = NULL;
2853 struct ocfs2_xattr_block *xblk; 2843 struct ocfs2_xattr_block *xblk;
2854 2844
@@ -2859,9 +2849,9 @@ static int ocfs2_create_xattr_block(struct inode *inode,
2859 goto end; 2849 goto end;
2860 } 2850 }
2861 2851
2862 ret = ocfs2_claim_metadata(osb, ctxt->handle, ctxt->meta_ac, 1, 2852 ret = ocfs2_claim_metadata(ctxt->handle, ctxt->meta_ac, 1,
2863 &suballoc_bit_start, &num_got, 2853 &suballoc_loc, &suballoc_bit_start,
2864 &first_blkno); 2854 &num_got, &first_blkno);
2865 if (ret < 0) { 2855 if (ret < 0) {
2866 mlog_errno(ret); 2856 mlog_errno(ret);
2867 goto end; 2857 goto end;
@@ -2883,8 +2873,10 @@ static int ocfs2_create_xattr_block(struct inode *inode,
2883 memset(xblk, 0, inode->i_sb->s_blocksize); 2873 memset(xblk, 0, inode->i_sb->s_blocksize);
2884 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE); 2874 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2885 xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot); 2875 xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot);
2876 xblk->xb_suballoc_loc = cpu_to_le64(suballoc_loc);
2886 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start); 2877 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2887 xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation); 2878 xblk->xb_fs_generation =
2879 cpu_to_le32(OCFS2_SB(inode->i_sb)->fs_generation);
2888 xblk->xb_blkno = cpu_to_le64(first_blkno); 2880 xblk->xb_blkno = cpu_to_le64(first_blkno);
2889 if (indexed) { 2881 if (indexed) {
2890 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root; 2882 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root;
@@ -2956,7 +2948,7 @@ static int ocfs2_xattr_block_set(struct inode *inode,
2956 ret = ocfs2_xa_set(&loc, xi, ctxt); 2948 ret = ocfs2_xa_set(&loc, xi, ctxt);
2957 if (!ret) 2949 if (!ret)
2958 xs->here = loc.xl_entry; 2950 xs->here = loc.xl_entry;
2959 else if (ret != -ENOSPC) 2951 else if ((ret != -ENOSPC) || ctxt->set_abort)
2960 goto end; 2952 goto end;
2961 else { 2953 else {
2962 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt); 2954 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
@@ -3312,14 +3304,13 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
3312 goto out; 3304 goto out;
3313 } 3305 }
3314 3306
3315 ret = ocfs2_extend_trans(ctxt->handle, credits + 3307 ret = ocfs2_extend_trans(ctxt->handle, credits);
3316 ctxt->handle->h_buffer_credits);
3317 if (ret) { 3308 if (ret) {
3318 mlog_errno(ret); 3309 mlog_errno(ret);
3319 goto out; 3310 goto out;
3320 } 3311 }
3321 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt); 3312 ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3322 } else if (ret == -ENOSPC) { 3313 } else if ((ret == -ENOSPC) && !ctxt->set_abort) {
3323 if (di->i_xattr_loc && !xbs->xattr_bh) { 3314 if (di->i_xattr_loc && !xbs->xattr_bh) {
3324 ret = ocfs2_xattr_block_find(inode, 3315 ret = ocfs2_xattr_block_find(inode,
3325 xi->xi_name_index, 3316 xi->xi_name_index,
@@ -3343,8 +3334,7 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
3343 goto out; 3334 goto out;
3344 } 3335 }
3345 3336
3346 ret = ocfs2_extend_trans(ctxt->handle, credits + 3337 ret = ocfs2_extend_trans(ctxt->handle, credits);
3347 ctxt->handle->h_buffer_credits);
3348 if (ret) { 3338 if (ret) {
3349 mlog_errno(ret); 3339 mlog_errno(ret);
3350 goto out; 3340 goto out;
@@ -3378,8 +3368,7 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
3378 goto out; 3368 goto out;
3379 } 3369 }
3380 3370
3381 ret = ocfs2_extend_trans(ctxt->handle, credits + 3371 ret = ocfs2_extend_trans(ctxt->handle, credits);
3382 ctxt->handle->h_buffer_credits);
3383 if (ret) { 3372 if (ret) {
3384 mlog_errno(ret); 3373 mlog_errno(ret);
3385 goto out; 3374 goto out;
@@ -4249,7 +4238,6 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
4249 u32 bit_off, len; 4238 u32 bit_off, len;
4250 u64 blkno; 4239 u64 blkno;
4251 handle_t *handle = ctxt->handle; 4240 handle_t *handle = ctxt->handle;
4252 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4253 struct ocfs2_inode_info *oi = OCFS2_I(inode); 4241 struct ocfs2_inode_info *oi = OCFS2_I(inode);
4254 struct buffer_head *xb_bh = xs->xattr_bh; 4242 struct buffer_head *xb_bh = xs->xattr_bh;
4255 struct ocfs2_xattr_block *xb = 4243 struct ocfs2_xattr_block *xb =
@@ -4277,7 +4265,7 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
4277 goto out; 4265 goto out;
4278 } 4266 }
4279 4267
4280 ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 4268 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac,
4281 1, 1, &bit_off, &len); 4269 1, 1, &bit_off, &len);
4282 if (ret) { 4270 if (ret) {
4283 mlog_errno(ret); 4271 mlog_errno(ret);
@@ -4887,8 +4875,7 @@ static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
4887 * We need to update the first bucket of the old extent and all 4875 * We need to update the first bucket of the old extent and all
4888 * the buckets going to the new extent. 4876 * the buckets going to the new extent.
4889 */ 4877 */
4890 credits = ((num_buckets + 1) * blks_per_bucket) + 4878 credits = ((num_buckets + 1) * blks_per_bucket);
4891 handle->h_buffer_credits;
4892 ret = ocfs2_extend_trans(handle, credits); 4879 ret = ocfs2_extend_trans(handle, credits);
4893 if (ret) { 4880 if (ret) {
4894 mlog_errno(ret); 4881 mlog_errno(ret);
@@ -4958,7 +4945,7 @@ static int ocfs2_divide_xattr_cluster(struct inode *inode,
4958 u32 *first_hash) 4945 u32 *first_hash)
4959{ 4946{
4960 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 4947 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4961 int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits; 4948 int ret, credits = 2 * blk_per_bucket;
4962 4949
4963 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize); 4950 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
4964 4951
@@ -5099,7 +5086,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
5099 goto leave; 5086 goto leave;
5100 } 5087 }
5101 5088
5102 ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1, 5089 ret = __ocfs2_claim_clusters(handle, ctxt->data_ac, 1,
5103 clusters_to_add, &bit_off, &num_bits); 5090 clusters_to_add, &bit_off, &num_bits);
5104 if (ret < 0) { 5091 if (ret < 0) {
5105 if (ret != -ENOSPC) 5092 if (ret != -ENOSPC)
@@ -5153,9 +5140,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
5153 goto leave; 5140 goto leave;
5154 } 5141 }
5155 5142
5156 ret = ocfs2_journal_dirty(handle, root_bh); 5143 ocfs2_journal_dirty(handle, root_bh);
5157 if (ret < 0)
5158 mlog_errno(ret);
5159 5144
5160leave: 5145leave:
5161 return ret; 5146 return ret;
@@ -5200,8 +5185,7 @@ static int ocfs2_extend_xattr_bucket(struct inode *inode,
5200 * existing bucket. Then we add the last existing bucket, the 5185 * existing bucket. Then we add the last existing bucket, the
5201 * new bucket, and the first bucket (3 * blk_per_bucket). 5186 * new bucket, and the first bucket (3 * blk_per_bucket).
5202 */ 5187 */
5203 credits = (end_blk - target_blk) + (3 * blk_per_bucket) + 5188 credits = (end_blk - target_blk) + (3 * blk_per_bucket);
5204 handle->h_buffer_credits;
5205 ret = ocfs2_extend_trans(handle, credits); 5189 ret = ocfs2_extend_trans(handle, credits);
5206 if (ret) { 5190 if (ret) {
5207 mlog_errno(ret); 5191 mlog_errno(ret);
@@ -5477,12 +5461,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
5477 } 5461 }
5478 5462
5479 le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len); 5463 le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
5480 5464 ocfs2_journal_dirty(handle, root_bh);
5481 ret = ocfs2_journal_dirty(handle, root_bh);
5482 if (ret) {
5483 mlog_errno(ret);
5484 goto out_commit;
5485 }
5486 5465
5487 ret = ocfs2_truncate_log_append(osb, handle, blkno, len); 5466 ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
5488 if (ret) 5467 if (ret)
@@ -6935,7 +6914,7 @@ static int ocfs2_reflink_xattr_rec(struct inode *inode,
6935 goto out; 6914 goto out;
6936 } 6915 }
6937 6916
6938 ret = ocfs2_claim_clusters(osb, handle, data_ac, 6917 ret = ocfs2_claim_clusters(handle, data_ac,
6939 len, &p_cluster, &num_clusters); 6918 len, &p_cluster, &num_clusters);
6940 if (ret) { 6919 if (ret) {
6941 mlog_errno(ret); 6920 mlog_errno(ret);