aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/Makefile3
-rw-r--r--fs/ocfs2/alloc.c913
-rw-r--r--fs/ocfs2/alloc.h86
-rw-r--r--fs/ocfs2/aops.c60
-rw-r--r--fs/ocfs2/buffer_head_io.c134
-rw-r--r--fs/ocfs2/buffer_head_io.h23
-rw-r--r--fs/ocfs2/cluster/masklog.c1
-rw-r--r--fs/ocfs2/cluster/masklog.h1
-rw-r--r--fs/ocfs2/dir.c109
-rw-r--r--fs/ocfs2/dlmglue.c9
-rw-r--r--fs/ocfs2/extent_map.c70
-rw-r--r--fs/ocfs2/extent_map.h4
-rw-r--r--fs/ocfs2/file.c333
-rw-r--r--fs/ocfs2/file.h32
-rw-r--r--fs/ocfs2/inode.c87
-rw-r--r--fs/ocfs2/inode.h6
-rw-r--r--fs/ocfs2/ioctl.c3
-rw-r--r--fs/ocfs2/journal.c89
-rw-r--r--fs/ocfs2/journal.h52
-rw-r--r--fs/ocfs2/localalloc.c384
-rw-r--r--fs/ocfs2/localalloc.h4
-rw-r--r--fs/ocfs2/locks.c15
-rw-r--r--fs/ocfs2/locks.h1
-rw-r--r--fs/ocfs2/namei.c101
-rw-r--r--fs/ocfs2/ocfs2.h56
-rw-r--r--fs/ocfs2/ocfs2_fs.h220
-rw-r--r--fs/ocfs2/ocfs2_jbd_compat.h82
-rw-r--r--fs/ocfs2/resize.c11
-rw-r--r--fs/ocfs2/slot_map.c7
-rw-r--r--fs/ocfs2/stack_user.c33
-rw-r--r--fs/ocfs2/stackglue.c20
-rw-r--r--fs/ocfs2/stackglue.h19
-rw-r--r--fs/ocfs2/suballoc.c248
-rw-r--r--fs/ocfs2/suballoc.h26
-rw-r--r--fs/ocfs2/super.c62
-rw-r--r--fs/ocfs2/symlink.c18
-rw-r--r--fs/ocfs2/uptodate.c38
-rw-r--r--fs/ocfs2/uptodate.h3
-rw-r--r--fs/ocfs2/xattr.c4834
-rw-r--r--fs/ocfs2/xattr.h68
40 files changed, 7331 insertions, 934 deletions
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index f6956de56fdb..589dcdfdfe3c 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -34,7 +34,8 @@ ocfs2-objs := \
34 symlink.o \ 34 symlink.o \
35 sysfile.o \ 35 sysfile.o \
36 uptodate.o \ 36 uptodate.o \
37 ver.o 37 ver.o \
38 xattr.o
38 39
39ocfs2_stackglue-objs := stackglue.o 40ocfs2_stackglue-objs := stackglue.o
40ocfs2_stack_o2cb-objs := stack_o2cb.o 41ocfs2_stack_o2cb-objs := stack_o2cb.o
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 29ff57ec5d1f..0cc2deb9394c 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -49,6 +49,340 @@
49 49
50#include "buffer_head_io.h" 50#include "buffer_head_io.h"
51 51
52
53/*
54 * Operations for a specific extent tree type.
55 *
56 * To implement an on-disk btree (extent tree) type in ocfs2, add
57 * an ocfs2_extent_tree_operations structure and the matching
58 * ocfs2_init_<thingy>_extent_tree() function. That's pretty much it
59 * for the allocation portion of the extent tree.
60 */
61struct ocfs2_extent_tree_operations {
62 /*
63 * last_eb_blk is the block number of the right most leaf extent
64 * block. Most on-disk structures containing an extent tree store
65 * this value for fast access. The ->eo_set_last_eb_blk() and
66 * ->eo_get_last_eb_blk() operations access this value. They are
67 * both required.
68 */
69 void (*eo_set_last_eb_blk)(struct ocfs2_extent_tree *et,
70 u64 blkno);
71 u64 (*eo_get_last_eb_blk)(struct ocfs2_extent_tree *et);
72
73 /*
74 * The on-disk structure usually keeps track of how many total
75 * clusters are stored in this extent tree. This function updates
76 * that value. new_clusters is the delta, and must be
77 * added to the total. Required.
78 */
79 void (*eo_update_clusters)(struct inode *inode,
80 struct ocfs2_extent_tree *et,
81 u32 new_clusters);
82
83 /*
84 * If ->eo_insert_check() exists, it is called before rec is
85 * inserted into the extent tree. It is optional.
86 */
87 int (*eo_insert_check)(struct inode *inode,
88 struct ocfs2_extent_tree *et,
89 struct ocfs2_extent_rec *rec);
90 int (*eo_sanity_check)(struct inode *inode, struct ocfs2_extent_tree *et);
91
92 /*
93 * --------------------------------------------------------------
94 * The remaining are internal to ocfs2_extent_tree and don't have
95 * accessor functions
96 */
97
98 /*
99 * ->eo_fill_root_el() takes et->et_object and sets et->et_root_el.
100 * It is required.
101 */
102 void (*eo_fill_root_el)(struct ocfs2_extent_tree *et);
103
104 /*
105 * ->eo_fill_max_leaf_clusters sets et->et_max_leaf_clusters if
106 * it exists. If it does not, et->et_max_leaf_clusters is set
107 * to 0 (unlimited). Optional.
108 */
109 void (*eo_fill_max_leaf_clusters)(struct inode *inode,
110 struct ocfs2_extent_tree *et);
111};
112
113
114/*
115 * Pre-declare ocfs2_dinode_et_ops so we can use it as a sanity check
116 * in the methods.
117 */
118static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et);
119static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et,
120 u64 blkno);
121static void ocfs2_dinode_update_clusters(struct inode *inode,
122 struct ocfs2_extent_tree *et,
123 u32 clusters);
124static int ocfs2_dinode_insert_check(struct inode *inode,
125 struct ocfs2_extent_tree *et,
126 struct ocfs2_extent_rec *rec);
127static int ocfs2_dinode_sanity_check(struct inode *inode,
128 struct ocfs2_extent_tree *et);
129static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et);
130static struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = {
131 .eo_set_last_eb_blk = ocfs2_dinode_set_last_eb_blk,
132 .eo_get_last_eb_blk = ocfs2_dinode_get_last_eb_blk,
133 .eo_update_clusters = ocfs2_dinode_update_clusters,
134 .eo_insert_check = ocfs2_dinode_insert_check,
135 .eo_sanity_check = ocfs2_dinode_sanity_check,
136 .eo_fill_root_el = ocfs2_dinode_fill_root_el,
137};
138
139static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et,
140 u64 blkno)
141{
142 struct ocfs2_dinode *di = et->et_object;
143
144 BUG_ON(et->et_ops != &ocfs2_dinode_et_ops);
145 di->i_last_eb_blk = cpu_to_le64(blkno);
146}
147
148static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et)
149{
150 struct ocfs2_dinode *di = et->et_object;
151
152 BUG_ON(et->et_ops != &ocfs2_dinode_et_ops);
153 return le64_to_cpu(di->i_last_eb_blk);
154}
155
156static void ocfs2_dinode_update_clusters(struct inode *inode,
157 struct ocfs2_extent_tree *et,
158 u32 clusters)
159{
160 struct ocfs2_dinode *di = et->et_object;
161
162 le32_add_cpu(&di->i_clusters, clusters);
163 spin_lock(&OCFS2_I(inode)->ip_lock);
164 OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters);
165 spin_unlock(&OCFS2_I(inode)->ip_lock);
166}
167
168static int ocfs2_dinode_insert_check(struct inode *inode,
169 struct ocfs2_extent_tree *et,
170 struct ocfs2_extent_rec *rec)
171{
172 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
173
174 BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
175 mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) &&
176 (OCFS2_I(inode)->ip_clusters != rec->e_cpos),
177 "Device %s, asking for sparse allocation: inode %llu, "
178 "cpos %u, clusters %u\n",
179 osb->dev_str,
180 (unsigned long long)OCFS2_I(inode)->ip_blkno,
181 rec->e_cpos,
182 OCFS2_I(inode)->ip_clusters);
183
184 return 0;
185}
186
187static int ocfs2_dinode_sanity_check(struct inode *inode,
188 struct ocfs2_extent_tree *et)
189{
190 int ret = 0;
191 struct ocfs2_dinode *di;
192
193 BUG_ON(et->et_ops != &ocfs2_dinode_et_ops);
194
195 di = et->et_object;
196 if (!OCFS2_IS_VALID_DINODE(di)) {
197 ret = -EIO;
198 ocfs2_error(inode->i_sb,
199 "Inode %llu has invalid path root",
200 (unsigned long long)OCFS2_I(inode)->ip_blkno);
201 }
202
203 return ret;
204}
205
206static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et)
207{
208 struct ocfs2_dinode *di = et->et_object;
209
210 et->et_root_el = &di->id2.i_list;
211}
212
213
214static void ocfs2_xattr_value_fill_root_el(struct ocfs2_extent_tree *et)
215{
216 struct ocfs2_xattr_value_root *xv = et->et_object;
217
218 et->et_root_el = &xv->xr_list;
219}
220
221static void ocfs2_xattr_value_set_last_eb_blk(struct ocfs2_extent_tree *et,
222 u64 blkno)
223{
224 struct ocfs2_xattr_value_root *xv =
225 (struct ocfs2_xattr_value_root *)et->et_object;
226
227 xv->xr_last_eb_blk = cpu_to_le64(blkno);
228}
229
230static u64 ocfs2_xattr_value_get_last_eb_blk(struct ocfs2_extent_tree *et)
231{
232 struct ocfs2_xattr_value_root *xv =
233 (struct ocfs2_xattr_value_root *) et->et_object;
234
235 return le64_to_cpu(xv->xr_last_eb_blk);
236}
237
238static void ocfs2_xattr_value_update_clusters(struct inode *inode,
239 struct ocfs2_extent_tree *et,
240 u32 clusters)
241{
242 struct ocfs2_xattr_value_root *xv =
243 (struct ocfs2_xattr_value_root *)et->et_object;
244
245 le32_add_cpu(&xv->xr_clusters, clusters);
246}
247
248static struct ocfs2_extent_tree_operations ocfs2_xattr_value_et_ops = {
249 .eo_set_last_eb_blk = ocfs2_xattr_value_set_last_eb_blk,
250 .eo_get_last_eb_blk = ocfs2_xattr_value_get_last_eb_blk,
251 .eo_update_clusters = ocfs2_xattr_value_update_clusters,
252 .eo_fill_root_el = ocfs2_xattr_value_fill_root_el,
253};
254
255static void ocfs2_xattr_tree_fill_root_el(struct ocfs2_extent_tree *et)
256{
257 struct ocfs2_xattr_block *xb = et->et_object;
258
259 et->et_root_el = &xb->xb_attrs.xb_root.xt_list;
260}
261
262static void ocfs2_xattr_tree_fill_max_leaf_clusters(struct inode *inode,
263 struct ocfs2_extent_tree *et)
264{
265 et->et_max_leaf_clusters =
266 ocfs2_clusters_for_bytes(inode->i_sb,
267 OCFS2_MAX_XATTR_TREE_LEAF_SIZE);
268}
269
270static void ocfs2_xattr_tree_set_last_eb_blk(struct ocfs2_extent_tree *et,
271 u64 blkno)
272{
273 struct ocfs2_xattr_block *xb = et->et_object;
274 struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
275
276 xt->xt_last_eb_blk = cpu_to_le64(blkno);
277}
278
279static u64 ocfs2_xattr_tree_get_last_eb_blk(struct ocfs2_extent_tree *et)
280{
281 struct ocfs2_xattr_block *xb = et->et_object;
282 struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
283
284 return le64_to_cpu(xt->xt_last_eb_blk);
285}
286
287static void ocfs2_xattr_tree_update_clusters(struct inode *inode,
288 struct ocfs2_extent_tree *et,
289 u32 clusters)
290{
291 struct ocfs2_xattr_block *xb = et->et_object;
292
293 le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, clusters);
294}
295
296static struct ocfs2_extent_tree_operations ocfs2_xattr_tree_et_ops = {
297 .eo_set_last_eb_blk = ocfs2_xattr_tree_set_last_eb_blk,
298 .eo_get_last_eb_blk = ocfs2_xattr_tree_get_last_eb_blk,
299 .eo_update_clusters = ocfs2_xattr_tree_update_clusters,
300 .eo_fill_root_el = ocfs2_xattr_tree_fill_root_el,
301 .eo_fill_max_leaf_clusters = ocfs2_xattr_tree_fill_max_leaf_clusters,
302};
303
304static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
305 struct inode *inode,
306 struct buffer_head *bh,
307 void *obj,
308 struct ocfs2_extent_tree_operations *ops)
309{
310 et->et_ops = ops;
311 et->et_root_bh = bh;
312 if (!obj)
313 obj = (void *)bh->b_data;
314 et->et_object = obj;
315
316 et->et_ops->eo_fill_root_el(et);
317 if (!et->et_ops->eo_fill_max_leaf_clusters)
318 et->et_max_leaf_clusters = 0;
319 else
320 et->et_ops->eo_fill_max_leaf_clusters(inode, et);
321}
322
323void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
324 struct inode *inode,
325 struct buffer_head *bh)
326{
327 __ocfs2_init_extent_tree(et, inode, bh, NULL, &ocfs2_dinode_et_ops);
328}
329
330void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
331 struct inode *inode,
332 struct buffer_head *bh)
333{
334 __ocfs2_init_extent_tree(et, inode, bh, NULL,
335 &ocfs2_xattr_tree_et_ops);
336}
337
338void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
339 struct inode *inode,
340 struct buffer_head *bh,
341 struct ocfs2_xattr_value_root *xv)
342{
343 __ocfs2_init_extent_tree(et, inode, bh, xv,
344 &ocfs2_xattr_value_et_ops);
345}
346
347static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et,
348 u64 new_last_eb_blk)
349{
350 et->et_ops->eo_set_last_eb_blk(et, new_last_eb_blk);
351}
352
353static inline u64 ocfs2_et_get_last_eb_blk(struct ocfs2_extent_tree *et)
354{
355 return et->et_ops->eo_get_last_eb_blk(et);
356}
357
358static inline void ocfs2_et_update_clusters(struct inode *inode,
359 struct ocfs2_extent_tree *et,
360 u32 clusters)
361{
362 et->et_ops->eo_update_clusters(inode, et, clusters);
363}
364
365static inline int ocfs2_et_insert_check(struct inode *inode,
366 struct ocfs2_extent_tree *et,
367 struct ocfs2_extent_rec *rec)
368{
369 int ret = 0;
370
371 if (et->et_ops->eo_insert_check)
372 ret = et->et_ops->eo_insert_check(inode, et, rec);
373 return ret;
374}
375
376static inline int ocfs2_et_sanity_check(struct inode *inode,
377 struct ocfs2_extent_tree *et)
378{
379 int ret = 0;
380
381 if (et->et_ops->eo_sanity_check)
382 ret = et->et_ops->eo_sanity_check(inode, et);
383 return ret;
384}
385
52static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); 386static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc);
53static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, 387static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
54 struct ocfs2_extent_block *eb); 388 struct ocfs2_extent_block *eb);
@@ -205,17 +539,6 @@ static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh,
205} 539}
206 540
207/* 541/*
208 * Allocate and initialize a new path based on a disk inode tree.
209 */
210static struct ocfs2_path *ocfs2_new_inode_path(struct buffer_head *di_bh)
211{
212 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
213 struct ocfs2_extent_list *el = &di->id2.i_list;
214
215 return ocfs2_new_path(di_bh, el);
216}
217
218/*
219 * Convenience function to journal all components in a path. 542 * Convenience function to journal all components in a path.
220 */ 543 */
221static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle, 544static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle,
@@ -368,39 +691,35 @@ struct ocfs2_merge_ctxt {
368 */ 691 */
369int ocfs2_num_free_extents(struct ocfs2_super *osb, 692int ocfs2_num_free_extents(struct ocfs2_super *osb,
370 struct inode *inode, 693 struct inode *inode,
371 struct ocfs2_dinode *fe) 694 struct ocfs2_extent_tree *et)
372{ 695{
373 int retval; 696 int retval;
374 struct ocfs2_extent_list *el; 697 struct ocfs2_extent_list *el = NULL;
375 struct ocfs2_extent_block *eb; 698 struct ocfs2_extent_block *eb;
376 struct buffer_head *eb_bh = NULL; 699 struct buffer_head *eb_bh = NULL;
700 u64 last_eb_blk = 0;
377 701
378 mlog_entry_void(); 702 mlog_entry_void();
379 703
380 if (!OCFS2_IS_VALID_DINODE(fe)) { 704 el = et->et_root_el;
381 OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); 705 last_eb_blk = ocfs2_et_get_last_eb_blk(et);
382 retval = -EIO;
383 goto bail;
384 }
385 706
386 if (fe->i_last_eb_blk) { 707 if (last_eb_blk) {
387 retval = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), 708 retval = ocfs2_read_block(inode, last_eb_blk,
388 &eb_bh, OCFS2_BH_CACHED, inode); 709 &eb_bh);
389 if (retval < 0) { 710 if (retval < 0) {
390 mlog_errno(retval); 711 mlog_errno(retval);
391 goto bail; 712 goto bail;
392 } 713 }
393 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 714 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
394 el = &eb->h_list; 715 el = &eb->h_list;
395 } else 716 }
396 el = &fe->id2.i_list;
397 717
398 BUG_ON(el->l_tree_depth != 0); 718 BUG_ON(el->l_tree_depth != 0);
399 719
400 retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec); 720 retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec);
401bail: 721bail:
402 if (eb_bh) 722 brelse(eb_bh);
403 brelse(eb_bh);
404 723
405 mlog_exit(retval); 724 mlog_exit(retval);
406 return retval; 725 return retval;
@@ -486,8 +805,7 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
486bail: 805bail:
487 if (status < 0) { 806 if (status < 0) {
488 for(i = 0; i < wanted; i++) { 807 for(i = 0; i < wanted; i++) {
489 if (bhs[i]) 808 brelse(bhs[i]);
490 brelse(bhs[i]);
491 bhs[i] = NULL; 809 bhs[i] = NULL;
492 } 810 }
493 } 811 }
@@ -531,7 +849,7 @@ static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list *el)
531static int ocfs2_add_branch(struct ocfs2_super *osb, 849static int ocfs2_add_branch(struct ocfs2_super *osb,
532 handle_t *handle, 850 handle_t *handle,
533 struct inode *inode, 851 struct inode *inode,
534 struct buffer_head *fe_bh, 852 struct ocfs2_extent_tree *et,
535 struct buffer_head *eb_bh, 853 struct buffer_head *eb_bh,
536 struct buffer_head **last_eb_bh, 854 struct buffer_head **last_eb_bh,
537 struct ocfs2_alloc_context *meta_ac) 855 struct ocfs2_alloc_context *meta_ac)
@@ -540,7 +858,6 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
540 u64 next_blkno, new_last_eb_blk; 858 u64 next_blkno, new_last_eb_blk;
541 struct buffer_head *bh; 859 struct buffer_head *bh;
542 struct buffer_head **new_eb_bhs = NULL; 860 struct buffer_head **new_eb_bhs = NULL;
543 struct ocfs2_dinode *fe;
544 struct ocfs2_extent_block *eb; 861 struct ocfs2_extent_block *eb;
545 struct ocfs2_extent_list *eb_el; 862 struct ocfs2_extent_list *eb_el;
546 struct ocfs2_extent_list *el; 863 struct ocfs2_extent_list *el;
@@ -550,13 +867,11 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
550 867
551 BUG_ON(!last_eb_bh || !*last_eb_bh); 868 BUG_ON(!last_eb_bh || !*last_eb_bh);
552 869
553 fe = (struct ocfs2_dinode *) fe_bh->b_data;
554
555 if (eb_bh) { 870 if (eb_bh) {
556 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 871 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
557 el = &eb->h_list; 872 el = &eb->h_list;
558 } else 873 } else
559 el = &fe->id2.i_list; 874 el = et->et_root_el;
560 875
561 /* we never add a branch to a leaf. */ 876 /* we never add a branch to a leaf. */
562 BUG_ON(!el->l_tree_depth); 877 BUG_ON(!el->l_tree_depth);
@@ -646,7 +961,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
646 mlog_errno(status); 961 mlog_errno(status);
647 goto bail; 962 goto bail;
648 } 963 }
649 status = ocfs2_journal_access(handle, inode, fe_bh, 964 status = ocfs2_journal_access(handle, inode, et->et_root_bh,
650 OCFS2_JOURNAL_ACCESS_WRITE); 965 OCFS2_JOURNAL_ACCESS_WRITE);
651 if (status < 0) { 966 if (status < 0) {
652 mlog_errno(status); 967 mlog_errno(status);
@@ -662,7 +977,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
662 } 977 }
663 978
664 /* Link the new branch into the rest of the tree (el will 979 /* Link the new branch into the rest of the tree (el will
665 * either be on the fe, or the extent block passed in. */ 980 * either be on the root_bh, or the extent block passed in. */
666 i = le16_to_cpu(el->l_next_free_rec); 981 i = le16_to_cpu(el->l_next_free_rec);
667 el->l_recs[i].e_blkno = cpu_to_le64(next_blkno); 982 el->l_recs[i].e_blkno = cpu_to_le64(next_blkno);
668 el->l_recs[i].e_cpos = cpu_to_le32(new_cpos); 983 el->l_recs[i].e_cpos = cpu_to_le32(new_cpos);
@@ -671,7 +986,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
671 986
672 /* fe needs a new last extent block pointer, as does the 987 /* fe needs a new last extent block pointer, as does the
673 * next_leaf on the previously last-extent-block. */ 988 * next_leaf on the previously last-extent-block. */
674 fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk); 989 ocfs2_et_set_last_eb_blk(et, new_last_eb_blk);
675 990
676 eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data; 991 eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data;
677 eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk); 992 eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk);
@@ -679,7 +994,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
679 status = ocfs2_journal_dirty(handle, *last_eb_bh); 994 status = ocfs2_journal_dirty(handle, *last_eb_bh);
680 if (status < 0) 995 if (status < 0)
681 mlog_errno(status); 996 mlog_errno(status);
682 status = ocfs2_journal_dirty(handle, fe_bh); 997 status = ocfs2_journal_dirty(handle, et->et_root_bh);
683 if (status < 0) 998 if (status < 0)
684 mlog_errno(status); 999 mlog_errno(status);
685 if (eb_bh) { 1000 if (eb_bh) {
@@ -700,8 +1015,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
700bail: 1015bail:
701 if (new_eb_bhs) { 1016 if (new_eb_bhs) {
702 for (i = 0; i < new_blocks; i++) 1017 for (i = 0; i < new_blocks; i++)
703 if (new_eb_bhs[i]) 1018 brelse(new_eb_bhs[i]);
704 brelse(new_eb_bhs[i]);
705 kfree(new_eb_bhs); 1019 kfree(new_eb_bhs);
706 } 1020 }
707 1021
@@ -717,16 +1031,15 @@ bail:
717static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, 1031static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
718 handle_t *handle, 1032 handle_t *handle,
719 struct inode *inode, 1033 struct inode *inode,
720 struct buffer_head *fe_bh, 1034 struct ocfs2_extent_tree *et,
721 struct ocfs2_alloc_context *meta_ac, 1035 struct ocfs2_alloc_context *meta_ac,
722 struct buffer_head **ret_new_eb_bh) 1036 struct buffer_head **ret_new_eb_bh)
723{ 1037{
724 int status, i; 1038 int status, i;
725 u32 new_clusters; 1039 u32 new_clusters;
726 struct buffer_head *new_eb_bh = NULL; 1040 struct buffer_head *new_eb_bh = NULL;
727 struct ocfs2_dinode *fe;
728 struct ocfs2_extent_block *eb; 1041 struct ocfs2_extent_block *eb;
729 struct ocfs2_extent_list *fe_el; 1042 struct ocfs2_extent_list *root_el;
730 struct ocfs2_extent_list *eb_el; 1043 struct ocfs2_extent_list *eb_el;
731 1044
732 mlog_entry_void(); 1045 mlog_entry_void();
@@ -746,8 +1059,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
746 } 1059 }
747 1060
748 eb_el = &eb->h_list; 1061 eb_el = &eb->h_list;
749 fe = (struct ocfs2_dinode *) fe_bh->b_data; 1062 root_el = et->et_root_el;
750 fe_el = &fe->id2.i_list;
751 1063
752 status = ocfs2_journal_access(handle, inode, new_eb_bh, 1064 status = ocfs2_journal_access(handle, inode, new_eb_bh,
753 OCFS2_JOURNAL_ACCESS_CREATE); 1065 OCFS2_JOURNAL_ACCESS_CREATE);
@@ -756,11 +1068,11 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
756 goto bail; 1068 goto bail;
757 } 1069 }
758 1070
759 /* copy the fe data into the new extent block */ 1071 /* copy the root extent list data into the new extent block */
760 eb_el->l_tree_depth = fe_el->l_tree_depth; 1072 eb_el->l_tree_depth = root_el->l_tree_depth;
761 eb_el->l_next_free_rec = fe_el->l_next_free_rec; 1073 eb_el->l_next_free_rec = root_el->l_next_free_rec;
762 for(i = 0; i < le16_to_cpu(fe_el->l_next_free_rec); i++) 1074 for (i = 0; i < le16_to_cpu(root_el->l_next_free_rec); i++)
763 eb_el->l_recs[i] = fe_el->l_recs[i]; 1075 eb_el->l_recs[i] = root_el->l_recs[i];
764 1076
765 status = ocfs2_journal_dirty(handle, new_eb_bh); 1077 status = ocfs2_journal_dirty(handle, new_eb_bh);
766 if (status < 0) { 1078 if (status < 0) {
@@ -768,7 +1080,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
768 goto bail; 1080 goto bail;
769 } 1081 }
770 1082
771 status = ocfs2_journal_access(handle, inode, fe_bh, 1083 status = ocfs2_journal_access(handle, inode, et->et_root_bh,
772 OCFS2_JOURNAL_ACCESS_WRITE); 1084 OCFS2_JOURNAL_ACCESS_WRITE);
773 if (status < 0) { 1085 if (status < 0) {
774 mlog_errno(status); 1086 mlog_errno(status);
@@ -777,21 +1089,21 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
777 1089
778 new_clusters = ocfs2_sum_rightmost_rec(eb_el); 1090 new_clusters = ocfs2_sum_rightmost_rec(eb_el);
779 1091
780 /* update fe now */ 1092 /* update root_bh now */
781 le16_add_cpu(&fe_el->l_tree_depth, 1); 1093 le16_add_cpu(&root_el->l_tree_depth, 1);
782 fe_el->l_recs[0].e_cpos = 0; 1094 root_el->l_recs[0].e_cpos = 0;
783 fe_el->l_recs[0].e_blkno = eb->h_blkno; 1095 root_el->l_recs[0].e_blkno = eb->h_blkno;
784 fe_el->l_recs[0].e_int_clusters = cpu_to_le32(new_clusters); 1096 root_el->l_recs[0].e_int_clusters = cpu_to_le32(new_clusters);
785 for(i = 1; i < le16_to_cpu(fe_el->l_next_free_rec); i++) 1097 for (i = 1; i < le16_to_cpu(root_el->l_next_free_rec); i++)
786 memset(&fe_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec)); 1098 memset(&root_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec));
787 fe_el->l_next_free_rec = cpu_to_le16(1); 1099 root_el->l_next_free_rec = cpu_to_le16(1);
788 1100
789 /* If this is our 1st tree depth shift, then last_eb_blk 1101 /* If this is our 1st tree depth shift, then last_eb_blk
790 * becomes the allocated extent block */ 1102 * becomes the allocated extent block */
791 if (fe_el->l_tree_depth == cpu_to_le16(1)) 1103 if (root_el->l_tree_depth == cpu_to_le16(1))
792 fe->i_last_eb_blk = eb->h_blkno; 1104 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
793 1105
794 status = ocfs2_journal_dirty(handle, fe_bh); 1106 status = ocfs2_journal_dirty(handle, et->et_root_bh);
795 if (status < 0) { 1107 if (status < 0) {
796 mlog_errno(status); 1108 mlog_errno(status);
797 goto bail; 1109 goto bail;
@@ -801,8 +1113,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
801 new_eb_bh = NULL; 1113 new_eb_bh = NULL;
802 status = 0; 1114 status = 0;
803bail: 1115bail:
804 if (new_eb_bh) 1116 brelse(new_eb_bh);
805 brelse(new_eb_bh);
806 1117
807 mlog_exit(status); 1118 mlog_exit(status);
808 return status; 1119 return status;
@@ -817,22 +1128,21 @@ bail:
817 * 1) a lowest extent block is found, then we pass it back in 1128 * 1) a lowest extent block is found, then we pass it back in
818 * *lowest_eb_bh and return '0' 1129 * *lowest_eb_bh and return '0'
819 * 1130 *
820 * 2) the search fails to find anything, but the dinode has room. We 1131 * 2) the search fails to find anything, but the root_el has room. We
821 * pass NULL back in *lowest_eb_bh, but still return '0' 1132 * pass NULL back in *lowest_eb_bh, but still return '0'
822 * 1133 *
823 * 3) the search fails to find anything AND the dinode is full, in 1134 * 3) the search fails to find anything AND the root_el is full, in
824 * which case we return > 0 1135 * which case we return > 0
825 * 1136 *
826 * return status < 0 indicates an error. 1137 * return status < 0 indicates an error.
827 */ 1138 */
828static int ocfs2_find_branch_target(struct ocfs2_super *osb, 1139static int ocfs2_find_branch_target(struct ocfs2_super *osb,
829 struct inode *inode, 1140 struct inode *inode,
830 struct buffer_head *fe_bh, 1141 struct ocfs2_extent_tree *et,
831 struct buffer_head **target_bh) 1142 struct buffer_head **target_bh)
832{ 1143{
833 int status = 0, i; 1144 int status = 0, i;
834 u64 blkno; 1145 u64 blkno;
835 struct ocfs2_dinode *fe;
836 struct ocfs2_extent_block *eb; 1146 struct ocfs2_extent_block *eb;
837 struct ocfs2_extent_list *el; 1147 struct ocfs2_extent_list *el;
838 struct buffer_head *bh = NULL; 1148 struct buffer_head *bh = NULL;
@@ -842,8 +1152,7 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
842 1152
843 *target_bh = NULL; 1153 *target_bh = NULL;
844 1154
845 fe = (struct ocfs2_dinode *) fe_bh->b_data; 1155 el = et->et_root_el;
846 el = &fe->id2.i_list;
847 1156
848 while(le16_to_cpu(el->l_tree_depth) > 1) { 1157 while(le16_to_cpu(el->l_tree_depth) > 1) {
849 if (le16_to_cpu(el->l_next_free_rec) == 0) { 1158 if (le16_to_cpu(el->l_next_free_rec) == 0) {
@@ -864,13 +1173,10 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
864 goto bail; 1173 goto bail;
865 } 1174 }
866 1175
867 if (bh) { 1176 brelse(bh);
868 brelse(bh); 1177 bh = NULL;
869 bh = NULL;
870 }
871 1178
872 status = ocfs2_read_block(osb, blkno, &bh, OCFS2_BH_CACHED, 1179 status = ocfs2_read_block(inode, blkno, &bh);
873 inode);
874 if (status < 0) { 1180 if (status < 0) {
875 mlog_errno(status); 1181 mlog_errno(status);
876 goto bail; 1182 goto bail;
@@ -886,8 +1192,7 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
886 1192
887 if (le16_to_cpu(el->l_next_free_rec) < 1193 if (le16_to_cpu(el->l_next_free_rec) <
888 le16_to_cpu(el->l_count)) { 1194 le16_to_cpu(el->l_count)) {
889 if (lowest_bh) 1195 brelse(lowest_bh);
890 brelse(lowest_bh);
891 lowest_bh = bh; 1196 lowest_bh = bh;
892 get_bh(lowest_bh); 1197 get_bh(lowest_bh);
893 } 1198 }
@@ -895,14 +1200,13 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
895 1200
896 /* If we didn't find one and the fe doesn't have any room, 1201 /* If we didn't find one and the fe doesn't have any room,
897 * then return '1' */ 1202 * then return '1' */
898 if (!lowest_bh 1203 el = et->et_root_el;
899 && (fe->id2.i_list.l_next_free_rec == fe->id2.i_list.l_count)) 1204 if (!lowest_bh && (el->l_next_free_rec == el->l_count))
900 status = 1; 1205 status = 1;
901 1206
902 *target_bh = lowest_bh; 1207 *target_bh = lowest_bh;
903bail: 1208bail:
904 if (bh) 1209 brelse(bh);
905 brelse(bh);
906 1210
907 mlog_exit(status); 1211 mlog_exit(status);
908 return status; 1212 return status;
@@ -919,19 +1223,19 @@ bail:
919 * *last_eb_bh will be updated by ocfs2_add_branch(). 1223 * *last_eb_bh will be updated by ocfs2_add_branch().
920 */ 1224 */
921static int ocfs2_grow_tree(struct inode *inode, handle_t *handle, 1225static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
922 struct buffer_head *di_bh, int *final_depth, 1226 struct ocfs2_extent_tree *et, int *final_depth,
923 struct buffer_head **last_eb_bh, 1227 struct buffer_head **last_eb_bh,
924 struct ocfs2_alloc_context *meta_ac) 1228 struct ocfs2_alloc_context *meta_ac)
925{ 1229{
926 int ret, shift; 1230 int ret, shift;
927 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 1231 struct ocfs2_extent_list *el = et->et_root_el;
928 int depth = le16_to_cpu(di->id2.i_list.l_tree_depth); 1232 int depth = le16_to_cpu(el->l_tree_depth);
929 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1233 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
930 struct buffer_head *bh = NULL; 1234 struct buffer_head *bh = NULL;
931 1235
932 BUG_ON(meta_ac == NULL); 1236 BUG_ON(meta_ac == NULL);
933 1237
934 shift = ocfs2_find_branch_target(osb, inode, di_bh, &bh); 1238 shift = ocfs2_find_branch_target(osb, inode, et, &bh);
935 if (shift < 0) { 1239 if (shift < 0) {
936 ret = shift; 1240 ret = shift;
937 mlog_errno(ret); 1241 mlog_errno(ret);
@@ -948,7 +1252,7 @@ static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
948 /* ocfs2_shift_tree_depth will return us a buffer with 1252 /* ocfs2_shift_tree_depth will return us a buffer with
949 * the new extent block (so we can pass that to 1253 * the new extent block (so we can pass that to
950 * ocfs2_add_branch). */ 1254 * ocfs2_add_branch). */
951 ret = ocfs2_shift_tree_depth(osb, handle, inode, di_bh, 1255 ret = ocfs2_shift_tree_depth(osb, handle, inode, et,
952 meta_ac, &bh); 1256 meta_ac, &bh);
953 if (ret < 0) { 1257 if (ret < 0) {
954 mlog_errno(ret); 1258 mlog_errno(ret);
@@ -975,7 +1279,7 @@ static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
975 /* call ocfs2_add_branch to add the final part of the tree with 1279 /* call ocfs2_add_branch to add the final part of the tree with
976 * the new data. */ 1280 * the new data. */
977 mlog(0, "add branch. bh = %p\n", bh); 1281 mlog(0, "add branch. bh = %p\n", bh);
978 ret = ocfs2_add_branch(osb, handle, inode, di_bh, bh, last_eb_bh, 1282 ret = ocfs2_add_branch(osb, handle, inode, et, bh, last_eb_bh,
979 meta_ac); 1283 meta_ac);
980 if (ret < 0) { 1284 if (ret < 0) {
981 mlog_errno(ret); 1285 mlog_errno(ret);
@@ -1236,8 +1540,7 @@ static int __ocfs2_find_path(struct inode *inode,
1236 1540
1237 brelse(bh); 1541 brelse(bh);
1238 bh = NULL; 1542 bh = NULL;
1239 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno, 1543 ret = ocfs2_read_block(inode, blkno, &bh);
1240 &bh, OCFS2_BH_CACHED, inode);
1241 if (ret) { 1544 if (ret) {
1242 mlog_errno(ret); 1545 mlog_errno(ret);
1243 goto out; 1546 goto out;
@@ -2058,11 +2361,11 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2058 struct ocfs2_path *right_path, 2361 struct ocfs2_path *right_path,
2059 int subtree_index, 2362 int subtree_index,
2060 struct ocfs2_cached_dealloc_ctxt *dealloc, 2363 struct ocfs2_cached_dealloc_ctxt *dealloc,
2061 int *deleted) 2364 int *deleted,
2365 struct ocfs2_extent_tree *et)
2062{ 2366{
2063 int ret, i, del_right_subtree = 0, right_has_empty = 0; 2367 int ret, i, del_right_subtree = 0, right_has_empty = 0;
2064 struct buffer_head *root_bh, *di_bh = path_root_bh(right_path); 2368 struct buffer_head *root_bh, *et_root_bh = path_root_bh(right_path);
2065 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2066 struct ocfs2_extent_list *right_leaf_el, *left_leaf_el; 2369 struct ocfs2_extent_list *right_leaf_el, *left_leaf_el;
2067 struct ocfs2_extent_block *eb; 2370 struct ocfs2_extent_block *eb;
2068 2371
@@ -2114,7 +2417,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2114 * We have to update i_last_eb_blk during the meta 2417 * We have to update i_last_eb_blk during the meta
2115 * data delete. 2418 * data delete.
2116 */ 2419 */
2117 ret = ocfs2_journal_access(handle, inode, di_bh, 2420 ret = ocfs2_journal_access(handle, inode, et_root_bh,
2118 OCFS2_JOURNAL_ACCESS_WRITE); 2421 OCFS2_JOURNAL_ACCESS_WRITE);
2119 if (ret) { 2422 if (ret) {
2120 mlog_errno(ret); 2423 mlog_errno(ret);
@@ -2189,7 +2492,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2189 ocfs2_update_edge_lengths(inode, handle, left_path); 2492 ocfs2_update_edge_lengths(inode, handle, left_path);
2190 2493
2191 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; 2494 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
2192 di->i_last_eb_blk = eb->h_blkno; 2495 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
2193 2496
2194 /* 2497 /*
2195 * Removal of the extent in the left leaf was skipped 2498 * Removal of the extent in the left leaf was skipped
@@ -2199,7 +2502,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2199 if (right_has_empty) 2502 if (right_has_empty)
2200 ocfs2_remove_empty_extent(left_leaf_el); 2503 ocfs2_remove_empty_extent(left_leaf_el);
2201 2504
2202 ret = ocfs2_journal_dirty(handle, di_bh); 2505 ret = ocfs2_journal_dirty(handle, et_root_bh);
2203 if (ret) 2506 if (ret)
2204 mlog_errno(ret); 2507 mlog_errno(ret);
2205 2508
@@ -2322,7 +2625,8 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
2322 handle_t *handle, int orig_credits, 2625 handle_t *handle, int orig_credits,
2323 struct ocfs2_path *path, 2626 struct ocfs2_path *path,
2324 struct ocfs2_cached_dealloc_ctxt *dealloc, 2627 struct ocfs2_cached_dealloc_ctxt *dealloc,
2325 struct ocfs2_path **empty_extent_path) 2628 struct ocfs2_path **empty_extent_path,
2629 struct ocfs2_extent_tree *et)
2326{ 2630{
2327 int ret, subtree_root, deleted; 2631 int ret, subtree_root, deleted;
2328 u32 right_cpos; 2632 u32 right_cpos;
@@ -2395,7 +2699,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
2395 2699
2396 ret = ocfs2_rotate_subtree_left(inode, handle, left_path, 2700 ret = ocfs2_rotate_subtree_left(inode, handle, left_path,
2397 right_path, subtree_root, 2701 right_path, subtree_root,
2398 dealloc, &deleted); 2702 dealloc, &deleted, et);
2399 if (ret == -EAGAIN) { 2703 if (ret == -EAGAIN) {
2400 /* 2704 /*
2401 * The rotation has to temporarily stop due to 2705 * The rotation has to temporarily stop due to
@@ -2438,29 +2742,20 @@ out:
2438} 2742}
2439 2743
2440static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle, 2744static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
2441 struct ocfs2_path *path, 2745 struct ocfs2_path *path,
2442 struct ocfs2_cached_dealloc_ctxt *dealloc) 2746 struct ocfs2_cached_dealloc_ctxt *dealloc,
2747 struct ocfs2_extent_tree *et)
2443{ 2748{
2444 int ret, subtree_index; 2749 int ret, subtree_index;
2445 u32 cpos; 2750 u32 cpos;
2446 struct ocfs2_path *left_path = NULL; 2751 struct ocfs2_path *left_path = NULL;
2447 struct ocfs2_dinode *di;
2448 struct ocfs2_extent_block *eb; 2752 struct ocfs2_extent_block *eb;
2449 struct ocfs2_extent_list *el; 2753 struct ocfs2_extent_list *el;
2450 2754
2451 /*
2452 * XXX: This code assumes that the root is an inode, which is
2453 * true for now but may change as tree code gets generic.
2454 */
2455 di = (struct ocfs2_dinode *)path_root_bh(path)->b_data;
2456 if (!OCFS2_IS_VALID_DINODE(di)) {
2457 ret = -EIO;
2458 ocfs2_error(inode->i_sb,
2459 "Inode %llu has invalid path root",
2460 (unsigned long long)OCFS2_I(inode)->ip_blkno);
2461 goto out;
2462 }
2463 2755
2756 ret = ocfs2_et_sanity_check(inode, et);
2757 if (ret)
2758 goto out;
2464 /* 2759 /*
2465 * There's two ways we handle this depending on 2760 * There's two ways we handle this depending on
2466 * whether path is the only existing one. 2761 * whether path is the only existing one.
@@ -2517,7 +2812,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
2517 ocfs2_update_edge_lengths(inode, handle, left_path); 2812 ocfs2_update_edge_lengths(inode, handle, left_path);
2518 2813
2519 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; 2814 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
2520 di->i_last_eb_blk = eb->h_blkno; 2815 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
2521 } else { 2816 } else {
2522 /* 2817 /*
2523 * 'path' is also the leftmost path which 2818 * 'path' is also the leftmost path which
@@ -2528,12 +2823,12 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
2528 */ 2823 */
2529 ocfs2_unlink_path(inode, handle, dealloc, path, 1); 2824 ocfs2_unlink_path(inode, handle, dealloc, path, 1);
2530 2825
2531 el = &di->id2.i_list; 2826 el = et->et_root_el;
2532 el->l_tree_depth = 0; 2827 el->l_tree_depth = 0;
2533 el->l_next_free_rec = 0; 2828 el->l_next_free_rec = 0;
2534 memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); 2829 memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
2535 2830
2536 di->i_last_eb_blk = 0; 2831 ocfs2_et_set_last_eb_blk(et, 0);
2537 } 2832 }
2538 2833
2539 ocfs2_journal_dirty(handle, path_root_bh(path)); 2834 ocfs2_journal_dirty(handle, path_root_bh(path));
@@ -2561,7 +2856,8 @@ out:
2561 */ 2856 */
2562static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle, 2857static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle,
2563 struct ocfs2_path *path, 2858 struct ocfs2_path *path,
2564 struct ocfs2_cached_dealloc_ctxt *dealloc) 2859 struct ocfs2_cached_dealloc_ctxt *dealloc,
2860 struct ocfs2_extent_tree *et)
2565{ 2861{
2566 int ret, orig_credits = handle->h_buffer_credits; 2862 int ret, orig_credits = handle->h_buffer_credits;
2567 struct ocfs2_path *tmp_path = NULL, *restart_path = NULL; 2863 struct ocfs2_path *tmp_path = NULL, *restart_path = NULL;
@@ -2575,7 +2871,7 @@ static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle,
2575 if (path->p_tree_depth == 0) { 2871 if (path->p_tree_depth == 0) {
2576rightmost_no_delete: 2872rightmost_no_delete:
2577 /* 2873 /*
2578 * In-inode extents. This is trivially handled, so do 2874 * Inline extents. This is trivially handled, so do
2579 * it up front. 2875 * it up front.
2580 */ 2876 */
2581 ret = ocfs2_rotate_rightmost_leaf_left(inode, handle, 2877 ret = ocfs2_rotate_rightmost_leaf_left(inode, handle,
@@ -2629,7 +2925,7 @@ rightmost_no_delete:
2629 */ 2925 */
2630 2926
2631 ret = ocfs2_remove_rightmost_path(inode, handle, path, 2927 ret = ocfs2_remove_rightmost_path(inode, handle, path,
2632 dealloc); 2928 dealloc, et);
2633 if (ret) 2929 if (ret)
2634 mlog_errno(ret); 2930 mlog_errno(ret);
2635 goto out; 2931 goto out;
@@ -2641,7 +2937,7 @@ rightmost_no_delete:
2641 */ 2937 */
2642try_rotate: 2938try_rotate:
2643 ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, path, 2939 ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, path,
2644 dealloc, &restart_path); 2940 dealloc, &restart_path, et);
2645 if (ret && ret != -EAGAIN) { 2941 if (ret && ret != -EAGAIN) {
2646 mlog_errno(ret); 2942 mlog_errno(ret);
2647 goto out; 2943 goto out;
@@ -2653,7 +2949,7 @@ try_rotate:
2653 2949
2654 ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, 2950 ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits,
2655 tmp_path, dealloc, 2951 tmp_path, dealloc,
2656 &restart_path); 2952 &restart_path, et);
2657 if (ret && ret != -EAGAIN) { 2953 if (ret && ret != -EAGAIN) {
2658 mlog_errno(ret); 2954 mlog_errno(ret);
2659 goto out; 2955 goto out;
@@ -2939,6 +3235,7 @@ static int ocfs2_merge_rec_left(struct inode *inode,
2939 handle_t *handle, 3235 handle_t *handle,
2940 struct ocfs2_extent_rec *split_rec, 3236 struct ocfs2_extent_rec *split_rec,
2941 struct ocfs2_cached_dealloc_ctxt *dealloc, 3237 struct ocfs2_cached_dealloc_ctxt *dealloc,
3238 struct ocfs2_extent_tree *et,
2942 int index) 3239 int index)
2943{ 3240{
2944 int ret, i, subtree_index = 0, has_empty_extent = 0; 3241 int ret, i, subtree_index = 0, has_empty_extent = 0;
@@ -3059,7 +3356,8 @@ static int ocfs2_merge_rec_left(struct inode *inode,
3059 le16_to_cpu(el->l_next_free_rec) == 1) { 3356 le16_to_cpu(el->l_next_free_rec) == 1) {
3060 3357
3061 ret = ocfs2_remove_rightmost_path(inode, handle, 3358 ret = ocfs2_remove_rightmost_path(inode, handle,
3062 right_path, dealloc); 3359 right_path,
3360 dealloc, et);
3063 if (ret) { 3361 if (ret) {
3064 mlog_errno(ret); 3362 mlog_errno(ret);
3065 goto out; 3363 goto out;
@@ -3086,7 +3384,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
3086 int split_index, 3384 int split_index,
3087 struct ocfs2_extent_rec *split_rec, 3385 struct ocfs2_extent_rec *split_rec,
3088 struct ocfs2_cached_dealloc_ctxt *dealloc, 3386 struct ocfs2_cached_dealloc_ctxt *dealloc,
3089 struct ocfs2_merge_ctxt *ctxt) 3387 struct ocfs2_merge_ctxt *ctxt,
3388 struct ocfs2_extent_tree *et)
3090 3389
3091{ 3390{
3092 int ret = 0; 3391 int ret = 0;
@@ -3104,7 +3403,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
3104 * illegal. 3403 * illegal.
3105 */ 3404 */
3106 ret = ocfs2_rotate_tree_left(inode, handle, path, 3405 ret = ocfs2_rotate_tree_left(inode, handle, path,
3107 dealloc); 3406 dealloc, et);
3108 if (ret) { 3407 if (ret) {
3109 mlog_errno(ret); 3408 mlog_errno(ret);
3110 goto out; 3409 goto out;
@@ -3147,7 +3446,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
3147 BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); 3446 BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
3148 3447
3149 /* The merge left us with an empty extent, remove it. */ 3448 /* The merge left us with an empty extent, remove it. */
3150 ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); 3449 ret = ocfs2_rotate_tree_left(inode, handle, path,
3450 dealloc, et);
3151 if (ret) { 3451 if (ret) {
3152 mlog_errno(ret); 3452 mlog_errno(ret);
3153 goto out; 3453 goto out;
@@ -3161,7 +3461,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
3161 */ 3461 */
3162 ret = ocfs2_merge_rec_left(inode, path, 3462 ret = ocfs2_merge_rec_left(inode, path,
3163 handle, rec, 3463 handle, rec,
3164 dealloc, 3464 dealloc, et,
3165 split_index); 3465 split_index);
3166 3466
3167 if (ret) { 3467 if (ret) {
@@ -3170,7 +3470,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
3170 } 3470 }
3171 3471
3172 ret = ocfs2_rotate_tree_left(inode, handle, path, 3472 ret = ocfs2_rotate_tree_left(inode, handle, path,
3173 dealloc); 3473 dealloc, et);
3174 /* 3474 /*
3175 * Error from this last rotate is not critical, so 3475 * Error from this last rotate is not critical, so
3176 * print but don't bubble it up. 3476 * print but don't bubble it up.
@@ -3190,7 +3490,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
3190 ret = ocfs2_merge_rec_left(inode, 3490 ret = ocfs2_merge_rec_left(inode,
3191 path, 3491 path,
3192 handle, split_rec, 3492 handle, split_rec,
3193 dealloc, 3493 dealloc, et,
3194 split_index); 3494 split_index);
3195 if (ret) { 3495 if (ret) {
3196 mlog_errno(ret); 3496 mlog_errno(ret);
@@ -3213,7 +3513,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
3213 * our leaf. Try to rotate it away. 3513 * our leaf. Try to rotate it away.
3214 */ 3514 */
3215 ret = ocfs2_rotate_tree_left(inode, handle, path, 3515 ret = ocfs2_rotate_tree_left(inode, handle, path,
3216 dealloc); 3516 dealloc, et);
3217 if (ret) 3517 if (ret)
3218 mlog_errno(ret); 3518 mlog_errno(ret);
3219 ret = 0; 3519 ret = 0;
@@ -3347,16 +3647,6 @@ rotate:
3347 ocfs2_rotate_leaf(el, insert_rec); 3647 ocfs2_rotate_leaf(el, insert_rec);
3348} 3648}
3349 3649
3350static inline void ocfs2_update_dinode_clusters(struct inode *inode,
3351 struct ocfs2_dinode *di,
3352 u32 clusters)
3353{
3354 le32_add_cpu(&di->i_clusters, clusters);
3355 spin_lock(&OCFS2_I(inode)->ip_lock);
3356 OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters);
3357 spin_unlock(&OCFS2_I(inode)->ip_lock);
3358}
3359
3360static void ocfs2_adjust_rightmost_records(struct inode *inode, 3650static void ocfs2_adjust_rightmost_records(struct inode *inode,
3361 handle_t *handle, 3651 handle_t *handle,
3362 struct ocfs2_path *path, 3652 struct ocfs2_path *path,
@@ -3558,8 +3848,8 @@ static void ocfs2_split_record(struct inode *inode,
3558} 3848}
3559 3849
3560/* 3850/*
3561 * This function only does inserts on an allocation b-tree. For dinode 3851 * This function only does inserts on an allocation b-tree. For tree
3562 * lists, ocfs2_insert_at_leaf() is called directly. 3852 * depth = 0, ocfs2_insert_at_leaf() is called directly.
3563 * 3853 *
3564 * right_path is the path we want to do the actual insert 3854 * right_path is the path we want to do the actual insert
3565 * in. left_path should only be passed in if we need to update that 3855 * in. left_path should only be passed in if we need to update that
@@ -3656,7 +3946,7 @@ out:
3656 3946
3657static int ocfs2_do_insert_extent(struct inode *inode, 3947static int ocfs2_do_insert_extent(struct inode *inode,
3658 handle_t *handle, 3948 handle_t *handle,
3659 struct buffer_head *di_bh, 3949 struct ocfs2_extent_tree *et,
3660 struct ocfs2_extent_rec *insert_rec, 3950 struct ocfs2_extent_rec *insert_rec,
3661 struct ocfs2_insert_type *type) 3951 struct ocfs2_insert_type *type)
3662{ 3952{
@@ -3664,13 +3954,11 @@ static int ocfs2_do_insert_extent(struct inode *inode,
3664 u32 cpos; 3954 u32 cpos;
3665 struct ocfs2_path *right_path = NULL; 3955 struct ocfs2_path *right_path = NULL;
3666 struct ocfs2_path *left_path = NULL; 3956 struct ocfs2_path *left_path = NULL;
3667 struct ocfs2_dinode *di;
3668 struct ocfs2_extent_list *el; 3957 struct ocfs2_extent_list *el;
3669 3958
3670 di = (struct ocfs2_dinode *) di_bh->b_data; 3959 el = et->et_root_el;
3671 el = &di->id2.i_list;
3672 3960
3673 ret = ocfs2_journal_access(handle, inode, di_bh, 3961 ret = ocfs2_journal_access(handle, inode, et->et_root_bh,
3674 OCFS2_JOURNAL_ACCESS_WRITE); 3962 OCFS2_JOURNAL_ACCESS_WRITE);
3675 if (ret) { 3963 if (ret) {
3676 mlog_errno(ret); 3964 mlog_errno(ret);
@@ -3682,7 +3970,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
3682 goto out_update_clusters; 3970 goto out_update_clusters;
3683 } 3971 }
3684 3972
3685 right_path = ocfs2_new_inode_path(di_bh); 3973 right_path = ocfs2_new_path(et->et_root_bh, et->et_root_el);
3686 if (!right_path) { 3974 if (!right_path) {
3687 ret = -ENOMEM; 3975 ret = -ENOMEM;
3688 mlog_errno(ret); 3976 mlog_errno(ret);
@@ -3732,7 +4020,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
3732 * ocfs2_rotate_tree_right() might have extended the 4020 * ocfs2_rotate_tree_right() might have extended the
3733 * transaction without re-journaling our tree root. 4021 * transaction without re-journaling our tree root.
3734 */ 4022 */
3735 ret = ocfs2_journal_access(handle, inode, di_bh, 4023 ret = ocfs2_journal_access(handle, inode, et->et_root_bh,
3736 OCFS2_JOURNAL_ACCESS_WRITE); 4024 OCFS2_JOURNAL_ACCESS_WRITE);
3737 if (ret) { 4025 if (ret) {
3738 mlog_errno(ret); 4026 mlog_errno(ret);
@@ -3757,10 +4045,10 @@ static int ocfs2_do_insert_extent(struct inode *inode,
3757 4045
3758out_update_clusters: 4046out_update_clusters:
3759 if (type->ins_split == SPLIT_NONE) 4047 if (type->ins_split == SPLIT_NONE)
3760 ocfs2_update_dinode_clusters(inode, di, 4048 ocfs2_et_update_clusters(inode, et,
3761 le16_to_cpu(insert_rec->e_leaf_clusters)); 4049 le16_to_cpu(insert_rec->e_leaf_clusters));
3762 4050
3763 ret = ocfs2_journal_dirty(handle, di_bh); 4051 ret = ocfs2_journal_dirty(handle, et->et_root_bh);
3764 if (ret) 4052 if (ret)
3765 mlog_errno(ret); 4053 mlog_errno(ret);
3766 4054
@@ -3890,7 +4178,8 @@ out:
3890static void ocfs2_figure_contig_type(struct inode *inode, 4178static void ocfs2_figure_contig_type(struct inode *inode,
3891 struct ocfs2_insert_type *insert, 4179 struct ocfs2_insert_type *insert,
3892 struct ocfs2_extent_list *el, 4180 struct ocfs2_extent_list *el,
3893 struct ocfs2_extent_rec *insert_rec) 4181 struct ocfs2_extent_rec *insert_rec,
4182 struct ocfs2_extent_tree *et)
3894{ 4183{
3895 int i; 4184 int i;
3896 enum ocfs2_contig_type contig_type = CONTIG_NONE; 4185 enum ocfs2_contig_type contig_type = CONTIG_NONE;
@@ -3906,6 +4195,21 @@ static void ocfs2_figure_contig_type(struct inode *inode,
3906 } 4195 }
3907 } 4196 }
3908 insert->ins_contig = contig_type; 4197 insert->ins_contig = contig_type;
4198
4199 if (insert->ins_contig != CONTIG_NONE) {
4200 struct ocfs2_extent_rec *rec =
4201 &el->l_recs[insert->ins_contig_index];
4202 unsigned int len = le16_to_cpu(rec->e_leaf_clusters) +
4203 le16_to_cpu(insert_rec->e_leaf_clusters);
4204
4205 /*
4206 * Caller might want us to limit the size of extents, don't
4207 * calculate contiguousness if we might exceed that limit.
4208 */
4209 if (et->et_max_leaf_clusters &&
4210 (len > et->et_max_leaf_clusters))
4211 insert->ins_contig = CONTIG_NONE;
4212 }
3909} 4213}
3910 4214
3911/* 4215/*
@@ -3914,8 +4218,8 @@ static void ocfs2_figure_contig_type(struct inode *inode,
3914 * ocfs2_figure_appending_type() will figure out whether we'll have to 4218 * ocfs2_figure_appending_type() will figure out whether we'll have to
3915 * insert at the tail of the rightmost leaf. 4219 * insert at the tail of the rightmost leaf.
3916 * 4220 *
3917 * This should also work against the dinode list for tree's with 0 4221 * This should also work against the root extent list for tree's with 0
3918 * depth. If we consider the dinode list to be the rightmost leaf node 4222 * depth. If we consider the root extent list to be the rightmost leaf node
3919 * then the logic here makes sense. 4223 * then the logic here makes sense.
3920 */ 4224 */
3921static void ocfs2_figure_appending_type(struct ocfs2_insert_type *insert, 4225static void ocfs2_figure_appending_type(struct ocfs2_insert_type *insert,
@@ -3966,14 +4270,13 @@ set_tail_append:
3966 * structure. 4270 * structure.
3967 */ 4271 */
3968static int ocfs2_figure_insert_type(struct inode *inode, 4272static int ocfs2_figure_insert_type(struct inode *inode,
3969 struct buffer_head *di_bh, 4273 struct ocfs2_extent_tree *et,
3970 struct buffer_head **last_eb_bh, 4274 struct buffer_head **last_eb_bh,
3971 struct ocfs2_extent_rec *insert_rec, 4275 struct ocfs2_extent_rec *insert_rec,
3972 int *free_records, 4276 int *free_records,
3973 struct ocfs2_insert_type *insert) 4277 struct ocfs2_insert_type *insert)
3974{ 4278{
3975 int ret; 4279 int ret;
3976 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
3977 struct ocfs2_extent_block *eb; 4280 struct ocfs2_extent_block *eb;
3978 struct ocfs2_extent_list *el; 4281 struct ocfs2_extent_list *el;
3979 struct ocfs2_path *path = NULL; 4282 struct ocfs2_path *path = NULL;
@@ -3981,7 +4284,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
3981 4284
3982 insert->ins_split = SPLIT_NONE; 4285 insert->ins_split = SPLIT_NONE;
3983 4286
3984 el = &di->id2.i_list; 4287 el = et->et_root_el;
3985 insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth); 4288 insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth);
3986 4289
3987 if (el->l_tree_depth) { 4290 if (el->l_tree_depth) {
@@ -3991,9 +4294,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
3991 * ocfs2_figure_insert_type() and ocfs2_add_branch() 4294 * ocfs2_figure_insert_type() and ocfs2_add_branch()
3992 * may want it later. 4295 * may want it later.
3993 */ 4296 */
3994 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), 4297 ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), &bh);
3995 le64_to_cpu(di->i_last_eb_blk), &bh,
3996 OCFS2_BH_CACHED, inode);
3997 if (ret) { 4298 if (ret) {
3998 mlog_exit(ret); 4299 mlog_exit(ret);
3999 goto out; 4300 goto out;
@@ -4014,12 +4315,12 @@ static int ocfs2_figure_insert_type(struct inode *inode,
4014 le16_to_cpu(el->l_next_free_rec); 4315 le16_to_cpu(el->l_next_free_rec);
4015 4316
4016 if (!insert->ins_tree_depth) { 4317 if (!insert->ins_tree_depth) {
4017 ocfs2_figure_contig_type(inode, insert, el, insert_rec); 4318 ocfs2_figure_contig_type(inode, insert, el, insert_rec, et);
4018 ocfs2_figure_appending_type(insert, el, insert_rec); 4319 ocfs2_figure_appending_type(insert, el, insert_rec);
4019 return 0; 4320 return 0;
4020 } 4321 }
4021 4322
4022 path = ocfs2_new_inode_path(di_bh); 4323 path = ocfs2_new_path(et->et_root_bh, et->et_root_el);
4023 if (!path) { 4324 if (!path) {
4024 ret = -ENOMEM; 4325 ret = -ENOMEM;
4025 mlog_errno(ret); 4326 mlog_errno(ret);
@@ -4048,7 +4349,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
4048 * into two types of appends: simple record append, or a 4349 * into two types of appends: simple record append, or a
4049 * rotate inside the tail leaf. 4350 * rotate inside the tail leaf.
4050 */ 4351 */
4051 ocfs2_figure_contig_type(inode, insert, el, insert_rec); 4352 ocfs2_figure_contig_type(inode, insert, el, insert_rec, et);
4052 4353
4053 /* 4354 /*
4054 * The insert code isn't quite ready to deal with all cases of 4355 * The insert code isn't quite ready to deal with all cases of
@@ -4069,7 +4370,8 @@ static int ocfs2_figure_insert_type(struct inode *inode,
4069 * the case that we're doing a tail append, so maybe we can 4370 * the case that we're doing a tail append, so maybe we can
4070 * take advantage of that information somehow. 4371 * take advantage of that information somehow.
4071 */ 4372 */
4072 if (le64_to_cpu(di->i_last_eb_blk) == path_leaf_bh(path)->b_blocknr) { 4373 if (ocfs2_et_get_last_eb_blk(et) ==
4374 path_leaf_bh(path)->b_blocknr) {
4073 /* 4375 /*
4074 * Ok, ocfs2_find_path() returned us the rightmost 4376 * Ok, ocfs2_find_path() returned us the rightmost
4075 * tree path. This might be an appending insert. There are 4377 * tree path. This might be an appending insert. There are
@@ -4099,7 +4401,7 @@ out:
4099int ocfs2_insert_extent(struct ocfs2_super *osb, 4401int ocfs2_insert_extent(struct ocfs2_super *osb,
4100 handle_t *handle, 4402 handle_t *handle,
4101 struct inode *inode, 4403 struct inode *inode,
4102 struct buffer_head *fe_bh, 4404 struct ocfs2_extent_tree *et,
4103 u32 cpos, 4405 u32 cpos,
4104 u64 start_blk, 4406 u64 start_blk,
4105 u32 new_clusters, 4407 u32 new_clusters,
@@ -4112,26 +4414,21 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
4112 struct ocfs2_insert_type insert = {0, }; 4414 struct ocfs2_insert_type insert = {0, };
4113 struct ocfs2_extent_rec rec; 4415 struct ocfs2_extent_rec rec;
4114 4416
4115 BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
4116
4117 mlog(0, "add %u clusters at position %u to inode %llu\n", 4417 mlog(0, "add %u clusters at position %u to inode %llu\n",
4118 new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno); 4418 new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno);
4119 4419
4120 mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) &&
4121 (OCFS2_I(inode)->ip_clusters != cpos),
4122 "Device %s, asking for sparse allocation: inode %llu, "
4123 "cpos %u, clusters %u\n",
4124 osb->dev_str,
4125 (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos,
4126 OCFS2_I(inode)->ip_clusters);
4127
4128 memset(&rec, 0, sizeof(rec)); 4420 memset(&rec, 0, sizeof(rec));
4129 rec.e_cpos = cpu_to_le32(cpos); 4421 rec.e_cpos = cpu_to_le32(cpos);
4130 rec.e_blkno = cpu_to_le64(start_blk); 4422 rec.e_blkno = cpu_to_le64(start_blk);
4131 rec.e_leaf_clusters = cpu_to_le16(new_clusters); 4423 rec.e_leaf_clusters = cpu_to_le16(new_clusters);
4132 rec.e_flags = flags; 4424 rec.e_flags = flags;
4425 status = ocfs2_et_insert_check(inode, et, &rec);
4426 if (status) {
4427 mlog_errno(status);
4428 goto bail;
4429 }
4133 4430
4134 status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec, 4431 status = ocfs2_figure_insert_type(inode, et, &last_eb_bh, &rec,
4135 &free_records, &insert); 4432 &free_records, &insert);
4136 if (status < 0) { 4433 if (status < 0) {
4137 mlog_errno(status); 4434 mlog_errno(status);
@@ -4145,7 +4442,7 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
4145 free_records, insert.ins_tree_depth); 4442 free_records, insert.ins_tree_depth);
4146 4443
4147 if (insert.ins_contig == CONTIG_NONE && free_records == 0) { 4444 if (insert.ins_contig == CONTIG_NONE && free_records == 0) {
4148 status = ocfs2_grow_tree(inode, handle, fe_bh, 4445 status = ocfs2_grow_tree(inode, handle, et,
4149 &insert.ins_tree_depth, &last_eb_bh, 4446 &insert.ins_tree_depth, &last_eb_bh,
4150 meta_ac); 4447 meta_ac);
4151 if (status) { 4448 if (status) {
@@ -4155,17 +4452,124 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
4155 } 4452 }
4156 4453
4157 /* Finally, we can add clusters. This might rotate the tree for us. */ 4454 /* Finally, we can add clusters. This might rotate the tree for us. */
4158 status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert); 4455 status = ocfs2_do_insert_extent(inode, handle, et, &rec, &insert);
4159 if (status < 0) 4456 if (status < 0)
4160 mlog_errno(status); 4457 mlog_errno(status);
4161 else 4458 else if (et->et_ops == &ocfs2_dinode_et_ops)
4162 ocfs2_extent_map_insert_rec(inode, &rec); 4459 ocfs2_extent_map_insert_rec(inode, &rec);
4163 4460
4164bail: 4461bail:
4165 if (last_eb_bh) 4462 brelse(last_eb_bh);
4166 brelse(last_eb_bh); 4463
4464 mlog_exit(status);
4465 return status;
4466}
4467
4468/*
4469 * Allcate and add clusters into the extent b-tree.
4470 * The new clusters(clusters_to_add) will be inserted at logical_offset.
4471 * The extent b-tree's root is specified by et, and
4472 * it is not limited to the file storage. Any extent tree can use this
4473 * function if it implements the proper ocfs2_extent_tree.
4474 */
4475int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
4476 struct inode *inode,
4477 u32 *logical_offset,
4478 u32 clusters_to_add,
4479 int mark_unwritten,
4480 struct ocfs2_extent_tree *et,
4481 handle_t *handle,
4482 struct ocfs2_alloc_context *data_ac,
4483 struct ocfs2_alloc_context *meta_ac,
4484 enum ocfs2_alloc_restarted *reason_ret)
4485{
4486 int status = 0;
4487 int free_extents;
4488 enum ocfs2_alloc_restarted reason = RESTART_NONE;
4489 u32 bit_off, num_bits;
4490 u64 block;
4491 u8 flags = 0;
4492
4493 BUG_ON(!clusters_to_add);
4494
4495 if (mark_unwritten)
4496 flags = OCFS2_EXT_UNWRITTEN;
4497
4498 free_extents = ocfs2_num_free_extents(osb, inode, et);
4499 if (free_extents < 0) {
4500 status = free_extents;
4501 mlog_errno(status);
4502 goto leave;
4503 }
4504
4505 /* there are two cases which could cause us to EAGAIN in the
4506 * we-need-more-metadata case:
4507 * 1) we haven't reserved *any*
4508 * 2) we are so fragmented, we've needed to add metadata too
4509 * many times. */
4510 if (!free_extents && !meta_ac) {
4511 mlog(0, "we haven't reserved any metadata!\n");
4512 status = -EAGAIN;
4513 reason = RESTART_META;
4514 goto leave;
4515 } else if ((!free_extents)
4516 && (ocfs2_alloc_context_bits_left(meta_ac)
4517 < ocfs2_extend_meta_needed(et->et_root_el))) {
4518 mlog(0, "filesystem is really fragmented...\n");
4519 status = -EAGAIN;
4520 reason = RESTART_META;
4521 goto leave;
4522 }
4523
4524 status = __ocfs2_claim_clusters(osb, handle, data_ac, 1,
4525 clusters_to_add, &bit_off, &num_bits);
4526 if (status < 0) {
4527 if (status != -ENOSPC)
4528 mlog_errno(status);
4529 goto leave;
4530 }
4531
4532 BUG_ON(num_bits > clusters_to_add);
4167 4533
4534 /* reserve our write early -- insert_extent may update the inode */
4535 status = ocfs2_journal_access(handle, inode, et->et_root_bh,
4536 OCFS2_JOURNAL_ACCESS_WRITE);
4537 if (status < 0) {
4538 mlog_errno(status);
4539 goto leave;
4540 }
4541
4542 block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
4543 mlog(0, "Allocating %u clusters at block %u for inode %llu\n",
4544 num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
4545 status = ocfs2_insert_extent(osb, handle, inode, et,
4546 *logical_offset, block,
4547 num_bits, flags, meta_ac);
4548 if (status < 0) {
4549 mlog_errno(status);
4550 goto leave;
4551 }
4552
4553 status = ocfs2_journal_dirty(handle, et->et_root_bh);
4554 if (status < 0) {
4555 mlog_errno(status);
4556 goto leave;
4557 }
4558
4559 clusters_to_add -= num_bits;
4560 *logical_offset += num_bits;
4561
4562 if (clusters_to_add) {
4563 mlog(0, "need to alloc once more, wanted = %u\n",
4564 clusters_to_add);
4565 status = -EAGAIN;
4566 reason = RESTART_TRANS;
4567 }
4568
4569leave:
4168 mlog_exit(status); 4570 mlog_exit(status);
4571 if (reason_ret)
4572 *reason_ret = reason;
4169 return status; 4573 return status;
4170} 4574}
4171 4575
@@ -4192,7 +4596,7 @@ static void ocfs2_make_right_split_rec(struct super_block *sb,
4192static int ocfs2_split_and_insert(struct inode *inode, 4596static int ocfs2_split_and_insert(struct inode *inode,
4193 handle_t *handle, 4597 handle_t *handle,
4194 struct ocfs2_path *path, 4598 struct ocfs2_path *path,
4195 struct buffer_head *di_bh, 4599 struct ocfs2_extent_tree *et,
4196 struct buffer_head **last_eb_bh, 4600 struct buffer_head **last_eb_bh,
4197 int split_index, 4601 int split_index,
4198 struct ocfs2_extent_rec *orig_split_rec, 4602 struct ocfs2_extent_rec *orig_split_rec,
@@ -4206,7 +4610,6 @@ static int ocfs2_split_and_insert(struct inode *inode,
4206 struct ocfs2_extent_rec split_rec = *orig_split_rec; 4610 struct ocfs2_extent_rec split_rec = *orig_split_rec;
4207 struct ocfs2_insert_type insert; 4611 struct ocfs2_insert_type insert;
4208 struct ocfs2_extent_block *eb; 4612 struct ocfs2_extent_block *eb;
4209 struct ocfs2_dinode *di;
4210 4613
4211leftright: 4614leftright:
4212 /* 4615 /*
@@ -4215,8 +4618,7 @@ leftright:
4215 */ 4618 */
4216 rec = path_leaf_el(path)->l_recs[split_index]; 4619 rec = path_leaf_el(path)->l_recs[split_index];
4217 4620
4218 di = (struct ocfs2_dinode *)di_bh->b_data; 4621 rightmost_el = et->et_root_el;
4219 rightmost_el = &di->id2.i_list;
4220 4622
4221 depth = le16_to_cpu(rightmost_el->l_tree_depth); 4623 depth = le16_to_cpu(rightmost_el->l_tree_depth);
4222 if (depth) { 4624 if (depth) {
@@ -4227,8 +4629,8 @@ leftright:
4227 4629
4228 if (le16_to_cpu(rightmost_el->l_next_free_rec) == 4630 if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
4229 le16_to_cpu(rightmost_el->l_count)) { 4631 le16_to_cpu(rightmost_el->l_count)) {
4230 ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, last_eb_bh, 4632 ret = ocfs2_grow_tree(inode, handle, et,
4231 meta_ac); 4633 &depth, last_eb_bh, meta_ac);
4232 if (ret) { 4634 if (ret) {
4233 mlog_errno(ret); 4635 mlog_errno(ret);
4234 goto out; 4636 goto out;
@@ -4265,8 +4667,7 @@ leftright:
4265 do_leftright = 1; 4667 do_leftright = 1;
4266 } 4668 }
4267 4669
4268 ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec, 4670 ret = ocfs2_do_insert_extent(inode, handle, et, &split_rec, &insert);
4269 &insert);
4270 if (ret) { 4671 if (ret) {
4271 mlog_errno(ret); 4672 mlog_errno(ret);
4272 goto out; 4673 goto out;
@@ -4308,8 +4709,9 @@ out:
4308 * of the tree is required. All other cases will degrade into a less 4709 * of the tree is required. All other cases will degrade into a less
4309 * optimal tree layout. 4710 * optimal tree layout.
4310 * 4711 *
4311 * last_eb_bh should be the rightmost leaf block for any inode with a 4712 * last_eb_bh should be the rightmost leaf block for any extent
4312 * btree. Since a split may grow the tree or a merge might shrink it, the caller cannot trust the contents of that buffer after this call. 4713 * btree. Since a split may grow the tree or a merge might shrink it,
4714 * the caller cannot trust the contents of that buffer after this call.
4313 * 4715 *
4314 * This code is optimized for readability - several passes might be 4716 * This code is optimized for readability - several passes might be
4315 * made over certain portions of the tree. All of those blocks will 4717 * made over certain portions of the tree. All of those blocks will
@@ -4317,7 +4719,7 @@ out:
4317 * extra overhead is not expressed in terms of disk reads. 4719 * extra overhead is not expressed in terms of disk reads.
4318 */ 4720 */
4319static int __ocfs2_mark_extent_written(struct inode *inode, 4721static int __ocfs2_mark_extent_written(struct inode *inode,
4320 struct buffer_head *di_bh, 4722 struct ocfs2_extent_tree *et,
4321 handle_t *handle, 4723 handle_t *handle,
4322 struct ocfs2_path *path, 4724 struct ocfs2_path *path,
4323 int split_index, 4725 int split_index,
@@ -4357,11 +4759,9 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
4357 */ 4759 */
4358 if (path->p_tree_depth) { 4760 if (path->p_tree_depth) {
4359 struct ocfs2_extent_block *eb; 4761 struct ocfs2_extent_block *eb;
4360 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
4361 4762
4362 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), 4763 ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et),
4363 le64_to_cpu(di->i_last_eb_blk), 4764 &last_eb_bh);
4364 &last_eb_bh, OCFS2_BH_CACHED, inode);
4365 if (ret) { 4765 if (ret) {
4366 mlog_exit(ret); 4766 mlog_exit(ret);
4367 goto out; 4767 goto out;
@@ -4394,7 +4794,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
4394 if (ctxt.c_split_covers_rec) 4794 if (ctxt.c_split_covers_rec)
4395 el->l_recs[split_index] = *split_rec; 4795 el->l_recs[split_index] = *split_rec;
4396 else 4796 else
4397 ret = ocfs2_split_and_insert(inode, handle, path, di_bh, 4797 ret = ocfs2_split_and_insert(inode, handle, path, et,
4398 &last_eb_bh, split_index, 4798 &last_eb_bh, split_index,
4399 split_rec, meta_ac); 4799 split_rec, meta_ac);
4400 if (ret) 4800 if (ret)
@@ -4402,7 +4802,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
4402 } else { 4802 } else {
4403 ret = ocfs2_try_to_merge_extent(inode, handle, path, 4803 ret = ocfs2_try_to_merge_extent(inode, handle, path,
4404 split_index, split_rec, 4804 split_index, split_rec,
4405 dealloc, &ctxt); 4805 dealloc, &ctxt, et);
4406 if (ret) 4806 if (ret)
4407 mlog_errno(ret); 4807 mlog_errno(ret);
4408 } 4808 }
@@ -4420,7 +4820,8 @@ out:
4420 * 4820 *
4421 * The caller is responsible for passing down meta_ac if we'll need it. 4821 * The caller is responsible for passing down meta_ac if we'll need it.
4422 */ 4822 */
4423int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, 4823int ocfs2_mark_extent_written(struct inode *inode,
4824 struct ocfs2_extent_tree *et,
4424 handle_t *handle, u32 cpos, u32 len, u32 phys, 4825 handle_t *handle, u32 cpos, u32 len, u32 phys,
4425 struct ocfs2_alloc_context *meta_ac, 4826 struct ocfs2_alloc_context *meta_ac,
4426 struct ocfs2_cached_dealloc_ctxt *dealloc) 4827 struct ocfs2_cached_dealloc_ctxt *dealloc)
@@ -4446,10 +4847,14 @@ int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh,
4446 /* 4847 /*
4447 * XXX: This should be fixed up so that we just re-insert the 4848 * XXX: This should be fixed up so that we just re-insert the
4448 * next extent records. 4849 * next extent records.
4850 *
4851 * XXX: This is a hack on the extent tree, maybe it should be
4852 * an op?
4449 */ 4853 */
4450 ocfs2_extent_map_trunc(inode, 0); 4854 if (et->et_ops == &ocfs2_dinode_et_ops)
4855 ocfs2_extent_map_trunc(inode, 0);
4451 4856
4452 left_path = ocfs2_new_inode_path(di_bh); 4857 left_path = ocfs2_new_path(et->et_root_bh, et->et_root_el);
4453 if (!left_path) { 4858 if (!left_path) {
4454 ret = -ENOMEM; 4859 ret = -ENOMEM;
4455 mlog_errno(ret); 4860 mlog_errno(ret);
@@ -4480,8 +4885,9 @@ int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh,
4480 split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags; 4885 split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags;
4481 split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN; 4886 split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN;
4482 4887
4483 ret = __ocfs2_mark_extent_written(inode, di_bh, handle, left_path, 4888 ret = __ocfs2_mark_extent_written(inode, et, handle, left_path,
4484 index, &split_rec, meta_ac, dealloc); 4889 index, &split_rec, meta_ac,
4890 dealloc);
4485 if (ret) 4891 if (ret)
4486 mlog_errno(ret); 4892 mlog_errno(ret);
4487 4893
@@ -4490,13 +4896,12 @@ out:
4490 return ret; 4896 return ret;
4491} 4897}
4492 4898
4493static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh, 4899static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et,
4494 handle_t *handle, struct ocfs2_path *path, 4900 handle_t *handle, struct ocfs2_path *path,
4495 int index, u32 new_range, 4901 int index, u32 new_range,
4496 struct ocfs2_alloc_context *meta_ac) 4902 struct ocfs2_alloc_context *meta_ac)
4497{ 4903{
4498 int ret, depth, credits = handle->h_buffer_credits; 4904 int ret, depth, credits = handle->h_buffer_credits;
4499 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
4500 struct buffer_head *last_eb_bh = NULL; 4905 struct buffer_head *last_eb_bh = NULL;
4501 struct ocfs2_extent_block *eb; 4906 struct ocfs2_extent_block *eb;
4502 struct ocfs2_extent_list *rightmost_el, *el; 4907 struct ocfs2_extent_list *rightmost_el, *el;
@@ -4513,9 +4918,8 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh,
4513 4918
4514 depth = path->p_tree_depth; 4919 depth = path->p_tree_depth;
4515 if (depth > 0) { 4920 if (depth > 0) {
4516 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), 4921 ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et),
4517 le64_to_cpu(di->i_last_eb_blk), 4922 &last_eb_bh);
4518 &last_eb_bh, OCFS2_BH_CACHED, inode);
4519 if (ret < 0) { 4923 if (ret < 0) {
4520 mlog_errno(ret); 4924 mlog_errno(ret);
4521 goto out; 4925 goto out;
@@ -4526,7 +4930,8 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh,
4526 } else 4930 } else
4527 rightmost_el = path_leaf_el(path); 4931 rightmost_el = path_leaf_el(path);
4528 4932
4529 credits += path->p_tree_depth + ocfs2_extend_meta_needed(di); 4933 credits += path->p_tree_depth +
4934 ocfs2_extend_meta_needed(et->et_root_el);
4530 ret = ocfs2_extend_trans(handle, credits); 4935 ret = ocfs2_extend_trans(handle, credits);
4531 if (ret) { 4936 if (ret) {
4532 mlog_errno(ret); 4937 mlog_errno(ret);
@@ -4535,7 +4940,7 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh,
4535 4940
4536 if (le16_to_cpu(rightmost_el->l_next_free_rec) == 4941 if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
4537 le16_to_cpu(rightmost_el->l_count)) { 4942 le16_to_cpu(rightmost_el->l_count)) {
4538 ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, &last_eb_bh, 4943 ret = ocfs2_grow_tree(inode, handle, et, &depth, &last_eb_bh,
4539 meta_ac); 4944 meta_ac);
4540 if (ret) { 4945 if (ret) {
4541 mlog_errno(ret); 4946 mlog_errno(ret);
@@ -4549,7 +4954,7 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh,
4549 insert.ins_split = SPLIT_RIGHT; 4954 insert.ins_split = SPLIT_RIGHT;
4550 insert.ins_tree_depth = depth; 4955 insert.ins_tree_depth = depth;
4551 4956
4552 ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec, &insert); 4957 ret = ocfs2_do_insert_extent(inode, handle, et, &split_rec, &insert);
4553 if (ret) 4958 if (ret)
4554 mlog_errno(ret); 4959 mlog_errno(ret);
4555 4960
@@ -4561,7 +4966,8 @@ out:
4561static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle, 4966static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
4562 struct ocfs2_path *path, int index, 4967 struct ocfs2_path *path, int index,
4563 struct ocfs2_cached_dealloc_ctxt *dealloc, 4968 struct ocfs2_cached_dealloc_ctxt *dealloc,
4564 u32 cpos, u32 len) 4969 u32 cpos, u32 len,
4970 struct ocfs2_extent_tree *et)
4565{ 4971{
4566 int ret; 4972 int ret;
4567 u32 left_cpos, rec_range, trunc_range; 4973 u32 left_cpos, rec_range, trunc_range;
@@ -4573,7 +4979,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
4573 struct ocfs2_extent_block *eb; 4979 struct ocfs2_extent_block *eb;
4574 4980
4575 if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) { 4981 if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) {
4576 ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); 4982 ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc, et);
4577 if (ret) { 4983 if (ret) {
4578 mlog_errno(ret); 4984 mlog_errno(ret);
4579 goto out; 4985 goto out;
@@ -4704,7 +5110,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
4704 5110
4705 ocfs2_journal_dirty(handle, path_leaf_bh(path)); 5111 ocfs2_journal_dirty(handle, path_leaf_bh(path));
4706 5112
4707 ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); 5113 ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc, et);
4708 if (ret) { 5114 if (ret) {
4709 mlog_errno(ret); 5115 mlog_errno(ret);
4710 goto out; 5116 goto out;
@@ -4715,7 +5121,8 @@ out:
4715 return ret; 5121 return ret;
4716} 5122}
4717 5123
4718int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, 5124int ocfs2_remove_extent(struct inode *inode,
5125 struct ocfs2_extent_tree *et,
4719 u32 cpos, u32 len, handle_t *handle, 5126 u32 cpos, u32 len, handle_t *handle,
4720 struct ocfs2_alloc_context *meta_ac, 5127 struct ocfs2_alloc_context *meta_ac,
4721 struct ocfs2_cached_dealloc_ctxt *dealloc) 5128 struct ocfs2_cached_dealloc_ctxt *dealloc)
@@ -4724,11 +5131,11 @@ int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
4724 u32 rec_range, trunc_range; 5131 u32 rec_range, trunc_range;
4725 struct ocfs2_extent_rec *rec; 5132 struct ocfs2_extent_rec *rec;
4726 struct ocfs2_extent_list *el; 5133 struct ocfs2_extent_list *el;
4727 struct ocfs2_path *path; 5134 struct ocfs2_path *path = NULL;
4728 5135
4729 ocfs2_extent_map_trunc(inode, 0); 5136 ocfs2_extent_map_trunc(inode, 0);
4730 5137
4731 path = ocfs2_new_inode_path(di_bh); 5138 path = ocfs2_new_path(et->et_root_bh, et->et_root_el);
4732 if (!path) { 5139 if (!path) {
4733 ret = -ENOMEM; 5140 ret = -ENOMEM;
4734 mlog_errno(ret); 5141 mlog_errno(ret);
@@ -4781,13 +5188,13 @@ int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
4781 5188
4782 if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) { 5189 if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) {
4783 ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, 5190 ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc,
4784 cpos, len); 5191 cpos, len, et);
4785 if (ret) { 5192 if (ret) {
4786 mlog_errno(ret); 5193 mlog_errno(ret);
4787 goto out; 5194 goto out;
4788 } 5195 }
4789 } else { 5196 } else {
4790 ret = ocfs2_split_tree(inode, di_bh, handle, path, index, 5197 ret = ocfs2_split_tree(inode, et, handle, path, index,
4791 trunc_range, meta_ac); 5198 trunc_range, meta_ac);
4792 if (ret) { 5199 if (ret) {
4793 mlog_errno(ret); 5200 mlog_errno(ret);
@@ -4836,7 +5243,7 @@ int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
4836 } 5243 }
4837 5244
4838 ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, 5245 ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc,
4839 cpos, len); 5246 cpos, len, et);
4840 if (ret) { 5247 if (ret) {
4841 mlog_errno(ret); 5248 mlog_errno(ret);
4842 goto out; 5249 goto out;
@@ -5179,8 +5586,7 @@ static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
5179 goto bail; 5586 goto bail;
5180 } 5587 }
5181 5588
5182 status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, 5589 status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh);
5183 OCFS2_BH_CACHED, inode);
5184 if (status < 0) { 5590 if (status < 0) {
5185 iput(inode); 5591 iput(inode);
5186 mlog_errno(status); 5592 mlog_errno(status);
@@ -5255,8 +5661,7 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
5255bail: 5661bail:
5256 if (tl_inode) 5662 if (tl_inode)
5257 iput(tl_inode); 5663 iput(tl_inode);
5258 if (tl_bh) 5664 brelse(tl_bh);
5259 brelse(tl_bh);
5260 5665
5261 if (status < 0 && (*tl_copy)) { 5666 if (status < 0 && (*tl_copy)) {
5262 kfree(*tl_copy); 5667 kfree(*tl_copy);
@@ -5999,20 +6404,13 @@ bail:
5999 return status; 6404 return status;
6000} 6405}
6001 6406
6002static int ocfs2_writeback_zero_func(handle_t *handle, struct buffer_head *bh) 6407static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh)
6003{ 6408{
6004 set_buffer_uptodate(bh); 6409 set_buffer_uptodate(bh);
6005 mark_buffer_dirty(bh); 6410 mark_buffer_dirty(bh);
6006 return 0; 6411 return 0;
6007} 6412}
6008 6413
6009static int ocfs2_ordered_zero_func(handle_t *handle, struct buffer_head *bh)
6010{
6011 set_buffer_uptodate(bh);
6012 mark_buffer_dirty(bh);
6013 return ocfs2_journal_dirty_data(handle, bh);
6014}
6015
6016static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, 6414static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
6017 unsigned int from, unsigned int to, 6415 unsigned int from, unsigned int to,
6018 struct page *page, int zero, u64 *phys) 6416 struct page *page, int zero, u64 *phys)
@@ -6031,17 +6429,18 @@ static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
6031 * here if they aren't - ocfs2_map_page_blocks() 6429 * here if they aren't - ocfs2_map_page_blocks()
6032 * might've skipped some 6430 * might've skipped some
6033 */ 6431 */
6034 if (ocfs2_should_order_data(inode)) { 6432 ret = walk_page_buffers(handle, page_buffers(page),
6035 ret = walk_page_buffers(handle, 6433 from, to, &partial,
6036 page_buffers(page), 6434 ocfs2_zero_func);
6037 from, to, &partial, 6435 if (ret < 0)
6038 ocfs2_ordered_zero_func); 6436 mlog_errno(ret);
6039 if (ret < 0) 6437 else if (ocfs2_should_order_data(inode)) {
6040 mlog_errno(ret); 6438 ret = ocfs2_jbd2_file_inode(handle, inode);
6041 } else { 6439#ifdef CONFIG_OCFS2_COMPAT_JBD
6042 ret = walk_page_buffers(handle, page_buffers(page), 6440 ret = walk_page_buffers(handle, page_buffers(page),
6043 from, to, &partial, 6441 from, to, &partial,
6044 ocfs2_writeback_zero_func); 6442 ocfs2_journal_dirty_data);
6443#endif
6045 if (ret < 0) 6444 if (ret < 0)
6046 mlog_errno(ret); 6445 mlog_errno(ret);
6047 } 6446 }
@@ -6206,20 +6605,29 @@ out:
6206 return ret; 6605 return ret;
6207} 6606}
6208 6607
6209static void ocfs2_zero_dinode_id2(struct inode *inode, struct ocfs2_dinode *di) 6608static void ocfs2_zero_dinode_id2_with_xattr(struct inode *inode,
6609 struct ocfs2_dinode *di)
6210{ 6610{
6211 unsigned int blocksize = 1 << inode->i_sb->s_blocksize_bits; 6611 unsigned int blocksize = 1 << inode->i_sb->s_blocksize_bits;
6612 unsigned int xattrsize = le16_to_cpu(di->i_xattr_inline_size);
6212 6613
6213 memset(&di->id2, 0, blocksize - offsetof(struct ocfs2_dinode, id2)); 6614 if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_XATTR_FL)
6615 memset(&di->id2, 0, blocksize -
6616 offsetof(struct ocfs2_dinode, id2) -
6617 xattrsize);
6618 else
6619 memset(&di->id2, 0, blocksize -
6620 offsetof(struct ocfs2_dinode, id2));
6214} 6621}
6215 6622
6216void ocfs2_dinode_new_extent_list(struct inode *inode, 6623void ocfs2_dinode_new_extent_list(struct inode *inode,
6217 struct ocfs2_dinode *di) 6624 struct ocfs2_dinode *di)
6218{ 6625{
6219 ocfs2_zero_dinode_id2(inode, di); 6626 ocfs2_zero_dinode_id2_with_xattr(inode, di);
6220 di->id2.i_list.l_tree_depth = 0; 6627 di->id2.i_list.l_tree_depth = 0;
6221 di->id2.i_list.l_next_free_rec = 0; 6628 di->id2.i_list.l_next_free_rec = 0;
6222 di->id2.i_list.l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(inode->i_sb)); 6629 di->id2.i_list.l_count = cpu_to_le16(
6630 ocfs2_extent_recs_per_inode_with_xattr(inode->i_sb, di));
6223} 6631}
6224 6632
6225void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di) 6633void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di)
@@ -6236,9 +6644,10 @@ void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di)
6236 * We clear the entire i_data structure here so that all 6644 * We clear the entire i_data structure here so that all
6237 * fields can be properly initialized. 6645 * fields can be properly initialized.
6238 */ 6646 */
6239 ocfs2_zero_dinode_id2(inode, di); 6647 ocfs2_zero_dinode_id2_with_xattr(inode, di);
6240 6648
6241 idata->id_count = cpu_to_le16(ocfs2_max_inline_data(inode->i_sb)); 6649 idata->id_count = cpu_to_le16(
6650 ocfs2_max_inline_data_with_xattr(inode->i_sb, di));
6242} 6651}
6243 6652
6244int ocfs2_convert_inline_data_to_extents(struct inode *inode, 6653int ocfs2_convert_inline_data_to_extents(struct inode *inode,
@@ -6253,6 +6662,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
6253 struct ocfs2_alloc_context *data_ac = NULL; 6662 struct ocfs2_alloc_context *data_ac = NULL;
6254 struct page **pages = NULL; 6663 struct page **pages = NULL;
6255 loff_t end = osb->s_clustersize; 6664 loff_t end = osb->s_clustersize;
6665 struct ocfs2_extent_tree et;
6256 6666
6257 has_data = i_size_read(inode) ? 1 : 0; 6667 has_data = i_size_read(inode) ? 1 : 0;
6258 6668
@@ -6352,7 +6762,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
6352 * this proves to be false, we could always re-build 6762 * this proves to be false, we could always re-build
6353 * the in-inode data from our pages. 6763 * the in-inode data from our pages.
6354 */ 6764 */
6355 ret = ocfs2_insert_extent(osb, handle, inode, di_bh, 6765 ocfs2_init_dinode_extent_tree(&et, inode, di_bh);
6766 ret = ocfs2_insert_extent(osb, handle, inode, &et,
6356 0, block, 1, 0, NULL); 6767 0, block, 1, 0, NULL);
6357 if (ret) { 6768 if (ret) {
6358 mlog_errno(ret); 6769 mlog_errno(ret);
@@ -6395,13 +6806,14 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
6395 handle_t *handle = NULL; 6806 handle_t *handle = NULL;
6396 struct inode *tl_inode = osb->osb_tl_inode; 6807 struct inode *tl_inode = osb->osb_tl_inode;
6397 struct ocfs2_path *path = NULL; 6808 struct ocfs2_path *path = NULL;
6809 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
6398 6810
6399 mlog_entry_void(); 6811 mlog_entry_void();
6400 6812
6401 new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, 6813 new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb,
6402 i_size_read(inode)); 6814 i_size_read(inode));
6403 6815
6404 path = ocfs2_new_inode_path(fe_bh); 6816 path = ocfs2_new_path(fe_bh, &di->id2.i_list);
6405 if (!path) { 6817 if (!path) {
6406 status = -ENOMEM; 6818 status = -ENOMEM;
6407 mlog_errno(status); 6819 mlog_errno(status);
@@ -6572,8 +6984,8 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
6572 ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc); 6984 ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc);
6573 6985
6574 if (fe->id2.i_list.l_tree_depth) { 6986 if (fe->id2.i_list.l_tree_depth) {
6575 status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), 6987 status = ocfs2_read_block(inode, le64_to_cpu(fe->i_last_eb_blk),
6576 &last_eb_bh, OCFS2_BH_CACHED, inode); 6988 &last_eb_bh);
6577 if (status < 0) { 6989 if (status < 0) {
6578 mlog_errno(status); 6990 mlog_errno(status);
6579 goto bail; 6991 goto bail;
@@ -6686,8 +7098,7 @@ static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
6686 mlog(ML_NOTICE, 7098 mlog(ML_NOTICE,
6687 "Truncate completion has non-empty dealloc context\n"); 7099 "Truncate completion has non-empty dealloc context\n");
6688 7100
6689 if (tc->tc_last_eb_bh) 7101 brelse(tc->tc_last_eb_bh);
6690 brelse(tc->tc_last_eb_bh);
6691 7102
6692 kfree(tc); 7103 kfree(tc);
6693} 7104}
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 60cd3d59230c..70257c84cfbe 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -26,30 +26,102 @@
26#ifndef OCFS2_ALLOC_H 26#ifndef OCFS2_ALLOC_H
27#define OCFS2_ALLOC_H 27#define OCFS2_ALLOC_H
28 28
29
30/*
31 * For xattr tree leaf, we limit the leaf byte size to be 64K.
32 */
33#define OCFS2_MAX_XATTR_TREE_LEAF_SIZE 65536
34
35/*
36 * ocfs2_extent_tree and ocfs2_extent_tree_operations are used to abstract
37 * the b-tree operations in ocfs2. Now all the b-tree operations are not
38 * limited to ocfs2_dinode only. Any data which need to allocate clusters
39 * to store can use b-tree. And it only needs to implement its ocfs2_extent_tree
40 * and operation.
41 *
42 * ocfs2_extent_tree becomes the first-class object for extent tree
43 * manipulation. Callers of the alloc.c code need to fill it via one of
44 * the ocfs2_init_*_extent_tree() operations below.
45 *
46 * ocfs2_extent_tree contains info for the root of the b-tree, it must have a
47 * root ocfs2_extent_list and a root_bh so that they can be used in the b-tree
48 * functions.
49 * ocfs2_extent_tree_operations abstract the normal operations we do for
50 * the root of extent b-tree.
51 */
52struct ocfs2_extent_tree_operations;
53struct ocfs2_extent_tree {
54 struct ocfs2_extent_tree_operations *et_ops;
55 struct buffer_head *et_root_bh;
56 struct ocfs2_extent_list *et_root_el;
57 void *et_object;
58 unsigned int et_max_leaf_clusters;
59};
60
61/*
62 * ocfs2_init_*_extent_tree() will fill an ocfs2_extent_tree from the
63 * specified object buffer.
64 */
65void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
66 struct inode *inode,
67 struct buffer_head *bh);
68void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
69 struct inode *inode,
70 struct buffer_head *bh);
71void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
72 struct inode *inode,
73 struct buffer_head *bh,
74 struct ocfs2_xattr_value_root *xv);
75
29struct ocfs2_alloc_context; 76struct ocfs2_alloc_context;
30int ocfs2_insert_extent(struct ocfs2_super *osb, 77int ocfs2_insert_extent(struct ocfs2_super *osb,
31 handle_t *handle, 78 handle_t *handle,
32 struct inode *inode, 79 struct inode *inode,
33 struct buffer_head *fe_bh, 80 struct ocfs2_extent_tree *et,
34 u32 cpos, 81 u32 cpos,
35 u64 start_blk, 82 u64 start_blk,
36 u32 new_clusters, 83 u32 new_clusters,
37 u8 flags, 84 u8 flags,
38 struct ocfs2_alloc_context *meta_ac); 85 struct ocfs2_alloc_context *meta_ac);
86
87enum ocfs2_alloc_restarted {
88 RESTART_NONE = 0,
89 RESTART_TRANS,
90 RESTART_META
91};
92int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
93 struct inode *inode,
94 u32 *logical_offset,
95 u32 clusters_to_add,
96 int mark_unwritten,
97 struct ocfs2_extent_tree *et,
98 handle_t *handle,
99 struct ocfs2_alloc_context *data_ac,
100 struct ocfs2_alloc_context *meta_ac,
101 enum ocfs2_alloc_restarted *reason_ret);
39struct ocfs2_cached_dealloc_ctxt; 102struct ocfs2_cached_dealloc_ctxt;
40int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, 103int ocfs2_mark_extent_written(struct inode *inode,
104 struct ocfs2_extent_tree *et,
41 handle_t *handle, u32 cpos, u32 len, u32 phys, 105 handle_t *handle, u32 cpos, u32 len, u32 phys,
42 struct ocfs2_alloc_context *meta_ac, 106 struct ocfs2_alloc_context *meta_ac,
43 struct ocfs2_cached_dealloc_ctxt *dealloc); 107 struct ocfs2_cached_dealloc_ctxt *dealloc);
44int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, 108int ocfs2_remove_extent(struct inode *inode,
109 struct ocfs2_extent_tree *et,
45 u32 cpos, u32 len, handle_t *handle, 110 u32 cpos, u32 len, handle_t *handle,
46 struct ocfs2_alloc_context *meta_ac, 111 struct ocfs2_alloc_context *meta_ac,
47 struct ocfs2_cached_dealloc_ctxt *dealloc); 112 struct ocfs2_cached_dealloc_ctxt *dealloc);
48int ocfs2_num_free_extents(struct ocfs2_super *osb, 113int ocfs2_num_free_extents(struct ocfs2_super *osb,
49 struct inode *inode, 114 struct inode *inode,
50 struct ocfs2_dinode *fe); 115 struct ocfs2_extent_tree *et);
51/* how many new metadata chunks would an allocation need at maximum? */ 116
52static inline int ocfs2_extend_meta_needed(struct ocfs2_dinode *fe) 117/*
118 * how many new metadata chunks would an allocation need at maximum?
119 *
120 * Please note that the caller must make sure that root_el is the root
121 * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise
122 * the result may be wrong.
123 */
124static inline int ocfs2_extend_meta_needed(struct ocfs2_extent_list *root_el)
53{ 125{
54 /* 126 /*
55 * Rather than do all the work of determining how much we need 127 * Rather than do all the work of determining how much we need
@@ -59,7 +131,7 @@ static inline int ocfs2_extend_meta_needed(struct ocfs2_dinode *fe)
59 * new tree_depth==0 extent_block, and one block at the new 131 * new tree_depth==0 extent_block, and one block at the new
60 * top-of-the tree. 132 * top-of-the tree.
61 */ 133 */
62 return le16_to_cpu(fe->id2.i_list.l_tree_depth) + 2; 134 return le16_to_cpu(root_el->l_tree_depth) + 2;
63} 135}
64 136
65void ocfs2_dinode_new_extent_list(struct inode *inode, struct ocfs2_dinode *di); 137void ocfs2_dinode_new_extent_list(struct inode *inode, struct ocfs2_dinode *di);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index a53da1466277..c22543b33420 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -68,9 +68,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
68 goto bail; 68 goto bail;
69 } 69 }
70 70
71 status = ocfs2_read_block(OCFS2_SB(inode->i_sb), 71 status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh);
72 OCFS2_I(inode)->ip_blkno,
73 &bh, OCFS2_BH_CACHED, inode);
74 if (status < 0) { 72 if (status < 0) {
75 mlog_errno(status); 73 mlog_errno(status);
76 goto bail; 74 goto bail;
@@ -128,8 +126,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
128 err = 0; 126 err = 0;
129 127
130bail: 128bail:
131 if (bh) 129 brelse(bh);
132 brelse(bh);
133 130
134 mlog_exit(err); 131 mlog_exit(err);
135 return err; 132 return err;
@@ -261,13 +258,11 @@ static int ocfs2_readpage_inline(struct inode *inode, struct page *page)
261{ 258{
262 int ret; 259 int ret;
263 struct buffer_head *di_bh = NULL; 260 struct buffer_head *di_bh = NULL;
264 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
265 261
266 BUG_ON(!PageLocked(page)); 262 BUG_ON(!PageLocked(page));
267 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)); 263 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL));
268 264
269 ret = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &di_bh, 265 ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh);
270 OCFS2_BH_CACHED, inode);
271 if (ret) { 266 if (ret) {
272 mlog_errno(ret); 267 mlog_errno(ret);
273 goto out; 268 goto out;
@@ -485,11 +480,14 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
485 } 480 }
486 481
487 if (ocfs2_should_order_data(inode)) { 482 if (ocfs2_should_order_data(inode)) {
483 ret = ocfs2_jbd2_file_inode(handle, inode);
484#ifdef CONFIG_OCFS2_COMPAT_JBD
488 ret = walk_page_buffers(handle, 485 ret = walk_page_buffers(handle,
489 page_buffers(page), 486 page_buffers(page),
490 from, to, NULL, 487 from, to, NULL,
491 ocfs2_journal_dirty_data); 488 ocfs2_journal_dirty_data);
492 if (ret < 0) 489#endif
490 if (ret < 0)
493 mlog_errno(ret); 491 mlog_errno(ret);
494 } 492 }
495out: 493out:
@@ -669,7 +667,7 @@ static void ocfs2_invalidatepage(struct page *page, unsigned long offset)
669{ 667{
670 journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; 668 journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal;
671 669
672 journal_invalidatepage(journal, page, offset); 670 jbd2_journal_invalidatepage(journal, page, offset);
673} 671}
674 672
675static int ocfs2_releasepage(struct page *page, gfp_t wait) 673static int ocfs2_releasepage(struct page *page, gfp_t wait)
@@ -678,7 +676,7 @@ static int ocfs2_releasepage(struct page *page, gfp_t wait)
678 676
679 if (!page_has_buffers(page)) 677 if (!page_has_buffers(page))
680 return 0; 678 return 0;
681 return journal_try_to_free_buffers(journal, page, wait); 679 return jbd2_journal_try_to_free_buffers(journal, page, wait);
682} 680}
683 681
684static ssize_t ocfs2_direct_IO(int rw, 682static ssize_t ocfs2_direct_IO(int rw,
@@ -1074,11 +1072,15 @@ static void ocfs2_write_failure(struct inode *inode,
1074 tmppage = wc->w_pages[i]; 1072 tmppage = wc->w_pages[i];
1075 1073
1076 if (page_has_buffers(tmppage)) { 1074 if (page_has_buffers(tmppage)) {
1077 if (ocfs2_should_order_data(inode)) 1075 if (ocfs2_should_order_data(inode)) {
1076 ocfs2_jbd2_file_inode(wc->w_handle, inode);
1077#ifdef CONFIG_OCFS2_COMPAT_JBD
1078 walk_page_buffers(wc->w_handle, 1078 walk_page_buffers(wc->w_handle,
1079 page_buffers(tmppage), 1079 page_buffers(tmppage),
1080 from, to, NULL, 1080 from, to, NULL,
1081 ocfs2_journal_dirty_data); 1081 ocfs2_journal_dirty_data);
1082#endif
1083 }
1082 1084
1083 block_commit_write(tmppage, from, to); 1085 block_commit_write(tmppage, from, to);
1084 } 1086 }
@@ -1242,6 +1244,7 @@ static int ocfs2_write_cluster(struct address_space *mapping,
1242 int ret, i, new, should_zero = 0; 1244 int ret, i, new, should_zero = 0;
1243 u64 v_blkno, p_blkno; 1245 u64 v_blkno, p_blkno;
1244 struct inode *inode = mapping->host; 1246 struct inode *inode = mapping->host;
1247 struct ocfs2_extent_tree et;
1245 1248
1246 new = phys == 0 ? 1 : 0; 1249 new = phys == 0 ? 1 : 0;
1247 if (new || unwritten) 1250 if (new || unwritten)
@@ -1255,10 +1258,10 @@ static int ocfs2_write_cluster(struct address_space *mapping,
1255 * any additional semaphores or cluster locks. 1258 * any additional semaphores or cluster locks.
1256 */ 1259 */
1257 tmp_pos = cpos; 1260 tmp_pos = cpos;
1258 ret = ocfs2_do_extend_allocation(OCFS2_SB(inode->i_sb), inode, 1261 ret = ocfs2_add_inode_data(OCFS2_SB(inode->i_sb), inode,
1259 &tmp_pos, 1, 0, wc->w_di_bh, 1262 &tmp_pos, 1, 0, wc->w_di_bh,
1260 wc->w_handle, data_ac, 1263 wc->w_handle, data_ac,
1261 meta_ac, NULL); 1264 meta_ac, NULL);
1262 /* 1265 /*
1263 * This shouldn't happen because we must have already 1266 * This shouldn't happen because we must have already
1264 * calculated the correct meta data allocation required. The 1267 * calculated the correct meta data allocation required. The
@@ -1276,7 +1279,8 @@ static int ocfs2_write_cluster(struct address_space *mapping,
1276 goto out; 1279 goto out;
1277 } 1280 }
1278 } else if (unwritten) { 1281 } else if (unwritten) {
1279 ret = ocfs2_mark_extent_written(inode, wc->w_di_bh, 1282 ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh);
1283 ret = ocfs2_mark_extent_written(inode, &et,
1280 wc->w_handle, cpos, 1, phys, 1284 wc->w_handle, cpos, 1, phys,
1281 meta_ac, &wc->w_dealloc); 1285 meta_ac, &wc->w_dealloc);
1282 if (ret < 0) { 1286 if (ret < 0) {
@@ -1665,6 +1669,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1665 struct ocfs2_alloc_context *data_ac = NULL; 1669 struct ocfs2_alloc_context *data_ac = NULL;
1666 struct ocfs2_alloc_context *meta_ac = NULL; 1670 struct ocfs2_alloc_context *meta_ac = NULL;
1667 handle_t *handle; 1671 handle_t *handle;
1672 struct ocfs2_extent_tree et;
1668 1673
1669 ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh); 1674 ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh);
1670 if (ret) { 1675 if (ret) {
@@ -1712,14 +1717,23 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1712 * ocfs2_lock_allocators(). It greatly over-estimates 1717 * ocfs2_lock_allocators(). It greatly over-estimates
1713 * the work to be done. 1718 * the work to be done.
1714 */ 1719 */
1715 ret = ocfs2_lock_allocators(inode, di, clusters_to_alloc, 1720 mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u,"
1716 extents_to_split, &data_ac, &meta_ac); 1721 " clusters_to_add = %u, extents_to_split = %u\n",
1722 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1723 (long long)i_size_read(inode), le32_to_cpu(di->i_clusters),
1724 clusters_to_alloc, extents_to_split);
1725
1726 ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh);
1727 ret = ocfs2_lock_allocators(inode, &et,
1728 clusters_to_alloc, extents_to_split,
1729 &data_ac, &meta_ac);
1717 if (ret) { 1730 if (ret) {
1718 mlog_errno(ret); 1731 mlog_errno(ret);
1719 goto out; 1732 goto out;
1720 } 1733 }
1721 1734
1722 credits = ocfs2_calc_extend_credits(inode->i_sb, di, 1735 credits = ocfs2_calc_extend_credits(inode->i_sb,
1736 &di->id2.i_list,
1723 clusters_to_alloc); 1737 clusters_to_alloc);
1724 1738
1725 } 1739 }
@@ -1905,11 +1919,15 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
1905 } 1919 }
1906 1920
1907 if (page_has_buffers(tmppage)) { 1921 if (page_has_buffers(tmppage)) {
1908 if (ocfs2_should_order_data(inode)) 1922 if (ocfs2_should_order_data(inode)) {
1923 ocfs2_jbd2_file_inode(wc->w_handle, inode);
1924#ifdef CONFIG_OCFS2_COMPAT_JBD
1909 walk_page_buffers(wc->w_handle, 1925 walk_page_buffers(wc->w_handle,
1910 page_buffers(tmppage), 1926 page_buffers(tmppage),
1911 from, to, NULL, 1927 from, to, NULL,
1912 ocfs2_journal_dirty_data); 1928 ocfs2_journal_dirty_data);
1929#endif
1930 }
1913 block_commit_write(tmppage, from, to); 1931 block_commit_write(tmppage, from, to);
1914 } 1932 }
1915 } 1933 }
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index f136639f5b41..7e947c672469 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -66,7 +66,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
66 /* remove from dirty list before I/O. */ 66 /* remove from dirty list before I/O. */
67 clear_buffer_dirty(bh); 67 clear_buffer_dirty(bh);
68 68
69 get_bh(bh); /* for end_buffer_write_sync() */ 69 get_bh(bh); /* for end_buffer_write_sync() */
70 bh->b_end_io = end_buffer_write_sync; 70 bh->b_end_io = end_buffer_write_sync;
71 submit_bh(WRITE, bh); 71 submit_bh(WRITE, bh);
72 72
@@ -88,22 +88,103 @@ out:
88 return ret; 88 return ret;
89} 89}
90 90
91int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, 91int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
92 struct buffer_head *bhs[], int flags, 92 unsigned int nr, struct buffer_head *bhs[])
93 struct inode *inode) 93{
94 int status = 0;
95 unsigned int i;
96 struct buffer_head *bh;
97
98 if (!nr) {
99 mlog(ML_BH_IO, "No buffers will be read!\n");
100 goto bail;
101 }
102
103 for (i = 0 ; i < nr ; i++) {
104 if (bhs[i] == NULL) {
105 bhs[i] = sb_getblk(osb->sb, block++);
106 if (bhs[i] == NULL) {
107 status = -EIO;
108 mlog_errno(status);
109 goto bail;
110 }
111 }
112 bh = bhs[i];
113
114 if (buffer_jbd(bh)) {
115 mlog(ML_ERROR,
116 "trying to sync read a jbd "
117 "managed bh (blocknr = %llu), skipping\n",
118 (unsigned long long)bh->b_blocknr);
119 continue;
120 }
121
122 if (buffer_dirty(bh)) {
123 /* This should probably be a BUG, or
124 * at least return an error. */
125 mlog(ML_ERROR,
126 "trying to sync read a dirty "
127 "buffer! (blocknr = %llu), skipping\n",
128 (unsigned long long)bh->b_blocknr);
129 continue;
130 }
131
132 lock_buffer(bh);
133 if (buffer_jbd(bh)) {
134 mlog(ML_ERROR,
135 "block %llu had the JBD bit set "
136 "while I was in lock_buffer!",
137 (unsigned long long)bh->b_blocknr);
138 BUG();
139 }
140
141 clear_buffer_uptodate(bh);
142 get_bh(bh); /* for end_buffer_read_sync() */
143 bh->b_end_io = end_buffer_read_sync;
144 submit_bh(READ, bh);
145 }
146
147 for (i = nr; i > 0; i--) {
148 bh = bhs[i - 1];
149
150 if (buffer_jbd(bh)) {
151 mlog(ML_ERROR,
152 "the journal got the buffer while it was "
153 "locked for io! (blocknr = %llu)\n",
154 (unsigned long long)bh->b_blocknr);
155 BUG();
156 }
157
158 wait_on_buffer(bh);
159 if (!buffer_uptodate(bh)) {
160 /* Status won't be cleared from here on out,
161 * so we can safely record this and loop back
162 * to cleanup the other buffers. */
163 status = -EIO;
164 put_bh(bh);
165 bhs[i - 1] = NULL;
166 }
167 }
168
169bail:
170 return status;
171}
172
173int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
174 struct buffer_head *bhs[], int flags)
94{ 175{
95 int status = 0; 176 int status = 0;
96 struct super_block *sb;
97 int i, ignore_cache = 0; 177 int i, ignore_cache = 0;
98 struct buffer_head *bh; 178 struct buffer_head *bh;
99 179
100 mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n", 180 mlog_entry("(inode=%p, block=(%llu), nr=(%d), flags=%d)\n",
101 (unsigned long long)block, nr, flags, inode); 181 inode, (unsigned long long)block, nr, flags);
102 182
183 BUG_ON(!inode);
103 BUG_ON((flags & OCFS2_BH_READAHEAD) && 184 BUG_ON((flags & OCFS2_BH_READAHEAD) &&
104 (!inode || !(flags & OCFS2_BH_CACHED))); 185 (flags & OCFS2_BH_IGNORE_CACHE));
105 186
106 if (osb == NULL || osb->sb == NULL || bhs == NULL) { 187 if (bhs == NULL) {
107 status = -EINVAL; 188 status = -EINVAL;
108 mlog_errno(status); 189 mlog_errno(status);
109 goto bail; 190 goto bail;
@@ -122,26 +203,19 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
122 goto bail; 203 goto bail;
123 } 204 }
124 205
125 sb = osb->sb; 206 mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
126
127 if (flags & OCFS2_BH_CACHED && !inode)
128 flags &= ~OCFS2_BH_CACHED;
129
130 if (inode)
131 mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
132 for (i = 0 ; i < nr ; i++) { 207 for (i = 0 ; i < nr ; i++) {
133 if (bhs[i] == NULL) { 208 if (bhs[i] == NULL) {
134 bhs[i] = sb_getblk(sb, block++); 209 bhs[i] = sb_getblk(inode->i_sb, block++);
135 if (bhs[i] == NULL) { 210 if (bhs[i] == NULL) {
136 if (inode) 211 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
137 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
138 status = -EIO; 212 status = -EIO;
139 mlog_errno(status); 213 mlog_errno(status);
140 goto bail; 214 goto bail;
141 } 215 }
142 } 216 }
143 bh = bhs[i]; 217 bh = bhs[i];
144 ignore_cache = 0; 218 ignore_cache = (flags & OCFS2_BH_IGNORE_CACHE);
145 219
146 /* There are three read-ahead cases here which we need to 220 /* There are three read-ahead cases here which we need to
147 * be concerned with. All three assume a buffer has 221 * be concerned with. All three assume a buffer has
@@ -167,26 +241,27 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
167 * before our is-it-in-flight check. 241 * before our is-it-in-flight check.
168 */ 242 */
169 243
170 if (flags & OCFS2_BH_CACHED && 244 if (!ignore_cache && !ocfs2_buffer_uptodate(inode, bh)) {
171 !ocfs2_buffer_uptodate(inode, bh)) {
172 mlog(ML_UPTODATE, 245 mlog(ML_UPTODATE,
173 "bh (%llu), inode %llu not uptodate\n", 246 "bh (%llu), inode %llu not uptodate\n",
174 (unsigned long long)bh->b_blocknr, 247 (unsigned long long)bh->b_blocknr,
175 (unsigned long long)OCFS2_I(inode)->ip_blkno); 248 (unsigned long long)OCFS2_I(inode)->ip_blkno);
249 /* We're using ignore_cache here to say
250 * "go to disk" */
176 ignore_cache = 1; 251 ignore_cache = 1;
177 } 252 }
178 253
179 /* XXX: Can we ever get this and *not* have the cached 254 /* XXX: Can we ever get this and *not* have the cached
180 * flag set? */ 255 * flag set? */
181 if (buffer_jbd(bh)) { 256 if (buffer_jbd(bh)) {
182 if (!(flags & OCFS2_BH_CACHED) || ignore_cache) 257 if (ignore_cache)
183 mlog(ML_BH_IO, "trying to sync read a jbd " 258 mlog(ML_BH_IO, "trying to sync read a jbd "
184 "managed bh (blocknr = %llu)\n", 259 "managed bh (blocknr = %llu)\n",
185 (unsigned long long)bh->b_blocknr); 260 (unsigned long long)bh->b_blocknr);
186 continue; 261 continue;
187 } 262 }
188 263
189 if (!(flags & OCFS2_BH_CACHED) || ignore_cache) { 264 if (ignore_cache) {
190 if (buffer_dirty(bh)) { 265 if (buffer_dirty(bh)) {
191 /* This should probably be a BUG, or 266 /* This should probably be a BUG, or
192 * at least return an error. */ 267 * at least return an error. */
@@ -221,7 +296,7 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
221 * previously read-ahead buffer may have 296 * previously read-ahead buffer may have
222 * completed I/O while we were waiting for the 297 * completed I/O while we were waiting for the
223 * buffer lock. */ 298 * buffer lock. */
224 if ((flags & OCFS2_BH_CACHED) 299 if (!(flags & OCFS2_BH_IGNORE_CACHE)
225 && !(flags & OCFS2_BH_READAHEAD) 300 && !(flags & OCFS2_BH_READAHEAD)
226 && ocfs2_buffer_uptodate(inode, bh)) { 301 && ocfs2_buffer_uptodate(inode, bh)) {
227 unlock_buffer(bh); 302 unlock_buffer(bh);
@@ -265,15 +340,14 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
265 /* Always set the buffer in the cache, even if it was 340 /* Always set the buffer in the cache, even if it was
266 * a forced read, or read-ahead which hasn't yet 341 * a forced read, or read-ahead which hasn't yet
267 * completed. */ 342 * completed. */
268 if (inode) 343 ocfs2_set_buffer_uptodate(inode, bh);
269 ocfs2_set_buffer_uptodate(inode, bh);
270 } 344 }
271 if (inode) 345 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
272 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
273 346
274 mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", 347 mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n",
275 (unsigned long long)block, nr, 348 (unsigned long long)block, nr,
276 (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes", flags); 349 ((flags & OCFS2_BH_IGNORE_CACHE) || ignore_cache) ? "no" : "yes",
350 flags);
277 351
278bail: 352bail:
279 353
diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h
index c2e78614c3e5..75e1dcb1ade7 100644
--- a/fs/ocfs2/buffer_head_io.h
+++ b/fs/ocfs2/buffer_head_io.h
@@ -31,31 +31,29 @@
31void ocfs2_end_buffer_io_sync(struct buffer_head *bh, 31void ocfs2_end_buffer_io_sync(struct buffer_head *bh,
32 int uptodate); 32 int uptodate);
33 33
34static inline int ocfs2_read_block(struct ocfs2_super *osb, 34static inline int ocfs2_read_block(struct inode *inode,
35 u64 off, 35 u64 off,
36 struct buffer_head **bh, 36 struct buffer_head **bh);
37 int flags,
38 struct inode *inode);
39 37
40int ocfs2_write_block(struct ocfs2_super *osb, 38int ocfs2_write_block(struct ocfs2_super *osb,
41 struct buffer_head *bh, 39 struct buffer_head *bh,
42 struct inode *inode); 40 struct inode *inode);
43int ocfs2_read_blocks(struct ocfs2_super *osb, 41int ocfs2_read_blocks(struct inode *inode,
44 u64 block, 42 u64 block,
45 int nr, 43 int nr,
46 struct buffer_head *bhs[], 44 struct buffer_head *bhs[],
47 int flags, 45 int flags);
48 struct inode *inode); 46int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
47 unsigned int nr, struct buffer_head *bhs[]);
49 48
50int ocfs2_write_super_or_backup(struct ocfs2_super *osb, 49int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
51 struct buffer_head *bh); 50 struct buffer_head *bh);
52 51
53#define OCFS2_BH_CACHED 1 52#define OCFS2_BH_IGNORE_CACHE 1
54#define OCFS2_BH_READAHEAD 8 53#define OCFS2_BH_READAHEAD 8
55 54
56static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off, 55static inline int ocfs2_read_block(struct inode *inode, u64 off,
57 struct buffer_head **bh, int flags, 56 struct buffer_head **bh)
58 struct inode *inode)
59{ 57{
60 int status = 0; 58 int status = 0;
61 59
@@ -65,8 +63,7 @@ static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off,
65 goto bail; 63 goto bail;
66 } 64 }
67 65
68 status = ocfs2_read_blocks(osb, off, 1, bh, 66 status = ocfs2_read_blocks(inode, off, 1, bh, 0);
69 flags, inode);
70 67
71bail: 68bail:
72 return status; 69 return status;
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index 23c732f27529..d8a0cb92cef6 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -109,6 +109,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
109 define_mask(CONN), 109 define_mask(CONN),
110 define_mask(QUORUM), 110 define_mask(QUORUM),
111 define_mask(EXPORT), 111 define_mask(EXPORT),
112 define_mask(XATTR),
112 define_mask(ERROR), 113 define_mask(ERROR),
113 define_mask(NOTICE), 114 define_mask(NOTICE),
114 define_mask(KTHREAD), 115 define_mask(KTHREAD),
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 597e064bb94f..57670c680471 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -112,6 +112,7 @@
112#define ML_CONN 0x0000000004000000ULL /* net connection management */ 112#define ML_CONN 0x0000000004000000ULL /* net connection management */
113#define ML_QUORUM 0x0000000008000000ULL /* net connection quorum */ 113#define ML_QUORUM 0x0000000008000000ULL /* net connection quorum */
114#define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */ 114#define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */
115#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */
115/* bits that are infrequently given and frequently matched in the high word */ 116/* bits that are infrequently given and frequently matched in the high word */
116#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ 117#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */
117#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ 118#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 9cce563fd627..026e6eb85187 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -82,6 +82,49 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
82 struct ocfs2_alloc_context *meta_ac, 82 struct ocfs2_alloc_context *meta_ac,
83 struct buffer_head **new_bh); 83 struct buffer_head **new_bh);
84 84
85static struct buffer_head *ocfs2_bread(struct inode *inode,
86 int block, int *err, int reada)
87{
88 struct buffer_head *bh = NULL;
89 int tmperr;
90 u64 p_blkno;
91 int readflags = 0;
92
93 if (reada)
94 readflags |= OCFS2_BH_READAHEAD;
95
96 if (((u64)block << inode->i_sb->s_blocksize_bits) >=
97 i_size_read(inode)) {
98 BUG_ON(!reada);
99 return NULL;
100 }
101
102 down_read(&OCFS2_I(inode)->ip_alloc_sem);
103 tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL,
104 NULL);
105 up_read(&OCFS2_I(inode)->ip_alloc_sem);
106 if (tmperr < 0) {
107 mlog_errno(tmperr);
108 goto fail;
109 }
110
111 tmperr = ocfs2_read_blocks(inode, p_blkno, 1, &bh, readflags);
112 if (tmperr < 0)
113 goto fail;
114
115 tmperr = 0;
116
117 *err = 0;
118 return bh;
119
120fail:
121 brelse(bh);
122 bh = NULL;
123
124 *err = -EIO;
125 return NULL;
126}
127
85/* 128/*
86 * bh passed here can be an inode block or a dir data block, depending 129 * bh passed here can be an inode block or a dir data block, depending
87 * on the inode inline data flag. 130 * on the inode inline data flag.
@@ -188,8 +231,7 @@ static struct buffer_head *ocfs2_find_entry_id(const char *name,
188 struct ocfs2_dinode *di; 231 struct ocfs2_dinode *di;
189 struct ocfs2_inline_data *data; 232 struct ocfs2_inline_data *data;
190 233
191 ret = ocfs2_read_block(OCFS2_SB(dir->i_sb), OCFS2_I(dir)->ip_blkno, 234 ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh);
192 &di_bh, OCFS2_BH_CACHED, dir);
193 if (ret) { 235 if (ret) {
194 mlog_errno(ret); 236 mlog_errno(ret);
195 goto out; 237 goto out;
@@ -260,14 +302,13 @@ restart:
260 } 302 }
261 if ((bh = bh_use[ra_ptr++]) == NULL) 303 if ((bh = bh_use[ra_ptr++]) == NULL)
262 goto next; 304 goto next;
263 wait_on_buffer(bh); 305 if (ocfs2_read_block(dir, block, &bh)) {
264 if (!buffer_uptodate(bh)) { 306 /* read error, skip block & hope for the best.
265 /* read error, skip block & hope for the best */ 307 * ocfs2_read_block() has released the bh. */
266 ocfs2_error(dir->i_sb, "reading directory %llu, " 308 ocfs2_error(dir->i_sb, "reading directory %llu, "
267 "offset %lu\n", 309 "offset %lu\n",
268 (unsigned long long)OCFS2_I(dir)->ip_blkno, 310 (unsigned long long)OCFS2_I(dir)->ip_blkno,
269 block); 311 block);
270 brelse(bh);
271 goto next; 312 goto next;
272 } 313 }
273 i = ocfs2_search_dirblock(bh, dir, name, namelen, 314 i = ocfs2_search_dirblock(bh, dir, name, namelen,
@@ -417,8 +458,7 @@ static inline int ocfs2_delete_entry_id(handle_t *handle,
417 struct ocfs2_dinode *di; 458 struct ocfs2_dinode *di;
418 struct ocfs2_inline_data *data; 459 struct ocfs2_inline_data *data;
419 460
420 ret = ocfs2_read_block(OCFS2_SB(dir->i_sb), OCFS2_I(dir)->ip_blkno, 461 ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh);
421 &di_bh, OCFS2_BH_CACHED, dir);
422 if (ret) { 462 if (ret) {
423 mlog_errno(ret); 463 mlog_errno(ret);
424 goto out; 464 goto out;
@@ -596,8 +636,7 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode,
596 struct ocfs2_inline_data *data; 636 struct ocfs2_inline_data *data;
597 struct ocfs2_dir_entry *de; 637 struct ocfs2_dir_entry *de;
598 638
599 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, 639 ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh);
600 &di_bh, OCFS2_BH_CACHED, inode);
601 if (ret) { 640 if (ret) {
602 mlog(ML_ERROR, "Unable to read inode block for dir %llu\n", 641 mlog(ML_ERROR, "Unable to read inode block for dir %llu\n",
603 (unsigned long long)OCFS2_I(inode)->ip_blkno); 642 (unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -716,8 +755,7 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
716 for (i = ra_sectors >> (sb->s_blocksize_bits - 9); 755 for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
717 i > 0; i--) { 756 i > 0; i--) {
718 tmp = ocfs2_bread(inode, ++blk, &err, 1); 757 tmp = ocfs2_bread(inode, ++blk, &err, 1);
719 if (tmp) 758 brelse(tmp);
720 brelse(tmp);
721 } 759 }
722 last_ra_blk = blk; 760 last_ra_blk = blk;
723 ra_sectors = 8; 761 ra_sectors = 8;
@@ -899,10 +937,8 @@ int ocfs2_find_files_on_disk(const char *name,
899leave: 937leave:
900 if (status < 0) { 938 if (status < 0) {
901 *dirent = NULL; 939 *dirent = NULL;
902 if (*dirent_bh) { 940 brelse(*dirent_bh);
903 brelse(*dirent_bh); 941 *dirent_bh = NULL;
904 *dirent_bh = NULL;
905 }
906 } 942 }
907 943
908 mlog_exit(status); 944 mlog_exit(status);
@@ -951,8 +987,7 @@ int ocfs2_check_dir_for_entry(struct inode *dir,
951 987
952 ret = 0; 988 ret = 0;
953bail: 989bail:
954 if (dirent_bh) 990 brelse(dirent_bh);
955 brelse(dirent_bh);
956 991
957 mlog_exit(ret); 992 mlog_exit(ret);
958 return ret; 993 return ret;
@@ -1127,8 +1162,7 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
1127 1162
1128 status = 0; 1163 status = 0;
1129bail: 1164bail:
1130 if (new_bh) 1165 brelse(new_bh);
1131 brelse(new_bh);
1132 1166
1133 mlog_exit(status); 1167 mlog_exit(status);
1134 return status; 1168 return status;
@@ -1192,6 +1226,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1192 struct buffer_head *dirdata_bh = NULL; 1226 struct buffer_head *dirdata_bh = NULL;
1193 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 1227 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1194 handle_t *handle; 1228 handle_t *handle;
1229 struct ocfs2_extent_tree et;
1230
1231 ocfs2_init_dinode_extent_tree(&et, dir, di_bh);
1195 1232
1196 alloc = ocfs2_clusters_for_bytes(sb, bytes); 1233 alloc = ocfs2_clusters_for_bytes(sb, bytes);
1197 1234
@@ -1305,8 +1342,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1305 * This should never fail as our extent list is empty and all 1342 * This should never fail as our extent list is empty and all
1306 * related blocks have been journaled already. 1343 * related blocks have been journaled already.
1307 */ 1344 */
1308 ret = ocfs2_insert_extent(osb, handle, dir, di_bh, 0, blkno, len, 0, 1345 ret = ocfs2_insert_extent(osb, handle, dir, &et, 0, blkno, len,
1309 NULL); 1346 0, NULL);
1310 if (ret) { 1347 if (ret) {
1311 mlog_errno(ret); 1348 mlog_errno(ret);
1312 goto out_commit; 1349 goto out_commit;
@@ -1337,8 +1374,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1337 } 1374 }
1338 blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off); 1375 blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
1339 1376
1340 ret = ocfs2_insert_extent(osb, handle, dir, di_bh, 1, blkno, 1377 ret = ocfs2_insert_extent(osb, handle, dir, &et, 1,
1341 len, 0, NULL); 1378 blkno, len, 0, NULL);
1342 if (ret) { 1379 if (ret) {
1343 mlog_errno(ret); 1380 mlog_errno(ret);
1344 goto out_commit; 1381 goto out_commit;
@@ -1383,9 +1420,9 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
1383 if (extend) { 1420 if (extend) {
1384 u32 offset = OCFS2_I(dir)->ip_clusters; 1421 u32 offset = OCFS2_I(dir)->ip_clusters;
1385 1422
1386 status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, &offset, 1423 status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset,
1387 1, 0, parent_fe_bh, handle, 1424 1, 0, parent_fe_bh, handle,
1388 data_ac, meta_ac, NULL); 1425 data_ac, meta_ac, NULL);
1389 BUG_ON(status == -EAGAIN); 1426 BUG_ON(status == -EAGAIN);
1390 if (status < 0) { 1427 if (status < 0) {
1391 mlog_errno(status); 1428 mlog_errno(status);
@@ -1430,12 +1467,14 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
1430 int credits, num_free_extents, drop_alloc_sem = 0; 1467 int credits, num_free_extents, drop_alloc_sem = 0;
1431 loff_t dir_i_size; 1468 loff_t dir_i_size;
1432 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) parent_fe_bh->b_data; 1469 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
1470 struct ocfs2_extent_list *el = &fe->id2.i_list;
1433 struct ocfs2_alloc_context *data_ac = NULL; 1471 struct ocfs2_alloc_context *data_ac = NULL;
1434 struct ocfs2_alloc_context *meta_ac = NULL; 1472 struct ocfs2_alloc_context *meta_ac = NULL;
1435 handle_t *handle = NULL; 1473 handle_t *handle = NULL;
1436 struct buffer_head *new_bh = NULL; 1474 struct buffer_head *new_bh = NULL;
1437 struct ocfs2_dir_entry * de; 1475 struct ocfs2_dir_entry * de;
1438 struct super_block *sb = osb->sb; 1476 struct super_block *sb = osb->sb;
1477 struct ocfs2_extent_tree et;
1439 1478
1440 mlog_entry_void(); 1479 mlog_entry_void();
1441 1480
@@ -1479,7 +1518,8 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
1479 spin_lock(&OCFS2_I(dir)->ip_lock); 1518 spin_lock(&OCFS2_I(dir)->ip_lock);
1480 if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) { 1519 if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) {
1481 spin_unlock(&OCFS2_I(dir)->ip_lock); 1520 spin_unlock(&OCFS2_I(dir)->ip_lock);
1482 num_free_extents = ocfs2_num_free_extents(osb, dir, fe); 1521 ocfs2_init_dinode_extent_tree(&et, dir, parent_fe_bh);
1522 num_free_extents = ocfs2_num_free_extents(osb, dir, &et);
1483 if (num_free_extents < 0) { 1523 if (num_free_extents < 0) {
1484 status = num_free_extents; 1524 status = num_free_extents;
1485 mlog_errno(status); 1525 mlog_errno(status);
@@ -1487,7 +1527,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
1487 } 1527 }
1488 1528
1489 if (!num_free_extents) { 1529 if (!num_free_extents) {
1490 status = ocfs2_reserve_new_metadata(osb, fe, &meta_ac); 1530 status = ocfs2_reserve_new_metadata(osb, el, &meta_ac);
1491 if (status < 0) { 1531 if (status < 0) {
1492 if (status != -ENOSPC) 1532 if (status != -ENOSPC)
1493 mlog_errno(status); 1533 mlog_errno(status);
@@ -1502,7 +1542,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
1502 goto bail; 1542 goto bail;
1503 } 1543 }
1504 1544
1505 credits = ocfs2_calc_extend_credits(sb, fe, 1); 1545 credits = ocfs2_calc_extend_credits(sb, el, 1);
1506 } else { 1546 } else {
1507 spin_unlock(&OCFS2_I(dir)->ip_lock); 1547 spin_unlock(&OCFS2_I(dir)->ip_lock);
1508 credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS; 1548 credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
@@ -1568,8 +1608,7 @@ bail:
1568 if (meta_ac) 1608 if (meta_ac)
1569 ocfs2_free_alloc_context(meta_ac); 1609 ocfs2_free_alloc_context(meta_ac);
1570 1610
1571 if (new_bh) 1611 brelse(new_bh);
1572 brelse(new_bh);
1573 1612
1574 mlog_exit(status); 1613 mlog_exit(status);
1575 return status; 1614 return status;
@@ -1696,8 +1735,7 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
1696 1735
1697 status = 0; 1736 status = 0;
1698bail: 1737bail:
1699 if (bh) 1738 brelse(bh);
1700 brelse(bh);
1701 1739
1702 mlog_exit(status); 1740 mlog_exit(status);
1703 return status; 1741 return status;
@@ -1756,7 +1794,6 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
1756 *ret_de_bh = bh; 1794 *ret_de_bh = bh;
1757 bh = NULL; 1795 bh = NULL;
1758out: 1796out:
1759 if (bh) 1797 brelse(bh);
1760 brelse(bh);
1761 return ret; 1798 return ret;
1762} 1799}
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index eae3d643a5e4..ec684426034b 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2024,8 +2024,7 @@ static int ocfs2_inode_lock_update(struct inode *inode,
2024 } else { 2024 } else {
2025 /* Boo, we have to go to disk. */ 2025 /* Boo, we have to go to disk. */
2026 /* read bh, cast, ocfs2_refresh_inode */ 2026 /* read bh, cast, ocfs2_refresh_inode */
2027 status = ocfs2_read_block(OCFS2_SB(inode->i_sb), oi->ip_blkno, 2027 status = ocfs2_read_block(inode, oi->ip_blkno, bh);
2028 bh, OCFS2_BH_CACHED, inode);
2029 if (status < 0) { 2028 if (status < 0) {
2030 mlog_errno(status); 2029 mlog_errno(status);
2031 goto bail_refresh; 2030 goto bail_refresh;
@@ -2086,11 +2085,7 @@ static int ocfs2_assign_bh(struct inode *inode,
2086 return 0; 2085 return 0;
2087 } 2086 }
2088 2087
2089 status = ocfs2_read_block(OCFS2_SB(inode->i_sb), 2088 status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, ret_bh);
2090 OCFS2_I(inode)->ip_blkno,
2091 ret_bh,
2092 OCFS2_BH_CACHED,
2093 inode);
2094 if (status < 0) 2089 if (status < 0)
2095 mlog_errno(status); 2090 mlog_errno(status);
2096 2091
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index aed268e80b49..2baedac58234 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -293,8 +293,7 @@ static int ocfs2_last_eb_is_empty(struct inode *inode,
293 struct ocfs2_extent_block *eb; 293 struct ocfs2_extent_block *eb;
294 struct ocfs2_extent_list *el; 294 struct ocfs2_extent_list *el;
295 295
296 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), last_eb_blk, 296 ret = ocfs2_read_block(inode, last_eb_blk, &eb_bh);
297 &eb_bh, OCFS2_BH_CACHED, inode);
298 if (ret) { 297 if (ret) {
299 mlog_errno(ret); 298 mlog_errno(ret);
300 goto out; 299 goto out;
@@ -382,9 +381,9 @@ static int ocfs2_figure_hole_clusters(struct inode *inode,
382 if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL) 381 if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
383 goto no_more_extents; 382 goto no_more_extents;
384 383
385 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), 384 ret = ocfs2_read_block(inode,
386 le64_to_cpu(eb->h_next_leaf_blk), 385 le64_to_cpu(eb->h_next_leaf_blk),
387 &next_eb_bh, OCFS2_BH_CACHED, inode); 386 &next_eb_bh);
388 if (ret) { 387 if (ret) {
389 mlog_errno(ret); 388 mlog_errno(ret);
390 goto out; 389 goto out;
@@ -551,6 +550,66 @@ static void ocfs2_relative_extent_offsets(struct super_block *sb,
551 *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff; 550 *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff;
552} 551}
553 552
553int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
554 u32 *p_cluster, u32 *num_clusters,
555 struct ocfs2_extent_list *el)
556{
557 int ret = 0, i;
558 struct buffer_head *eb_bh = NULL;
559 struct ocfs2_extent_block *eb;
560 struct ocfs2_extent_rec *rec;
561 u32 coff;
562
563 if (el->l_tree_depth) {
564 ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh);
565 if (ret) {
566 mlog_errno(ret);
567 goto out;
568 }
569
570 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
571 el = &eb->h_list;
572
573 if (el->l_tree_depth) {
574 ocfs2_error(inode->i_sb,
575 "Inode %lu has non zero tree depth in "
576 "xattr leaf block %llu\n", inode->i_ino,
577 (unsigned long long)eb_bh->b_blocknr);
578 ret = -EROFS;
579 goto out;
580 }
581 }
582
583 i = ocfs2_search_extent_list(el, v_cluster);
584 if (i == -1) {
585 ret = -EROFS;
586 mlog_errno(ret);
587 goto out;
588 } else {
589 rec = &el->l_recs[i];
590 BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
591
592 if (!rec->e_blkno) {
593 ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
594 "record (%u, %u, 0) in xattr", inode->i_ino,
595 le32_to_cpu(rec->e_cpos),
596 ocfs2_rec_clusters(el, rec));
597 ret = -EROFS;
598 goto out;
599 }
600 coff = v_cluster - le32_to_cpu(rec->e_cpos);
601 *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb,
602 le64_to_cpu(rec->e_blkno));
603 *p_cluster = *p_cluster + coff;
604 if (num_clusters)
605 *num_clusters = ocfs2_rec_clusters(el, rec) - coff;
606 }
607out:
608 if (eb_bh)
609 brelse(eb_bh);
610 return ret;
611}
612
554int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, 613int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
555 u32 *p_cluster, u32 *num_clusters, 614 u32 *p_cluster, u32 *num_clusters,
556 unsigned int *extent_flags) 615 unsigned int *extent_flags)
@@ -571,8 +630,7 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
571 if (ret == 0) 630 if (ret == 0)
572 goto out; 631 goto out;
573 632
574 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, 633 ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh);
575 &di_bh, OCFS2_BH_CACHED, inode);
576 if (ret) { 634 if (ret) {
577 mlog_errno(ret); 635 mlog_errno(ret);
578 goto out; 636 goto out;
diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h
index 1b97490e1ea8..1c4aa8b06f34 100644
--- a/fs/ocfs2/extent_map.h
+++ b/fs/ocfs2/extent_map.h
@@ -53,4 +53,8 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
53int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 53int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
54 u64 map_start, u64 map_len); 54 u64 map_start, u64 map_len);
55 55
56int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
57 u32 *p_cluster, u32 *num_clusters,
58 struct ocfs2_extent_list *el);
59
56#endif /* _EXTENT_MAP_H */ 60#endif /* _EXTENT_MAP_H */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index ed38796052d2..8d3225a78073 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -55,6 +55,7 @@
55#include "mmap.h" 55#include "mmap.h"
56#include "suballoc.h" 56#include "suballoc.h"
57#include "super.h" 57#include "super.h"
58#include "xattr.h"
58 59
59#include "buffer_head_io.h" 60#include "buffer_head_io.h"
60 61
@@ -184,7 +185,7 @@ static int ocfs2_sync_file(struct file *file,
184 goto bail; 185 goto bail;
185 186
186 journal = osb->journal->j_journal; 187 journal = osb->journal->j_journal;
187 err = journal_force_commit(journal); 188 err = jbd2_journal_force_commit(journal);
188 189
189bail: 190bail:
190 mlog_exit(err); 191 mlog_exit(err);
@@ -488,7 +489,7 @@ bail:
488} 489}
489 490
490/* 491/*
491 * extend allocation only here. 492 * extend file allocation only here.
492 * we'll update all the disk stuff, and oip->alloc_size 493 * we'll update all the disk stuff, and oip->alloc_size
493 * 494 *
494 * expect stuff to be locked, a transaction started and enough data / 495 * expect stuff to be locked, a transaction started and enough data /
@@ -497,189 +498,25 @@ bail:
497 * Will return -EAGAIN, and a reason if a restart is needed. 498 * Will return -EAGAIN, and a reason if a restart is needed.
498 * If passed in, *reason will always be set, even in error. 499 * If passed in, *reason will always be set, even in error.
499 */ 500 */
500int ocfs2_do_extend_allocation(struct ocfs2_super *osb, 501int ocfs2_add_inode_data(struct ocfs2_super *osb,
501 struct inode *inode, 502 struct inode *inode,
502 u32 *logical_offset, 503 u32 *logical_offset,
503 u32 clusters_to_add, 504 u32 clusters_to_add,
504 int mark_unwritten, 505 int mark_unwritten,
505 struct buffer_head *fe_bh, 506 struct buffer_head *fe_bh,
506 handle_t *handle, 507 handle_t *handle,
507 struct ocfs2_alloc_context *data_ac, 508 struct ocfs2_alloc_context *data_ac,
508 struct ocfs2_alloc_context *meta_ac, 509 struct ocfs2_alloc_context *meta_ac,
509 enum ocfs2_alloc_restarted *reason_ret) 510 enum ocfs2_alloc_restarted *reason_ret)
510{ 511{
511 int status = 0; 512 int ret;
512 int free_extents; 513 struct ocfs2_extent_tree et;
513 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data;
514 enum ocfs2_alloc_restarted reason = RESTART_NONE;
515 u32 bit_off, num_bits;
516 u64 block;
517 u8 flags = 0;
518
519 BUG_ON(!clusters_to_add);
520
521 if (mark_unwritten)
522 flags = OCFS2_EXT_UNWRITTEN;
523
524 free_extents = ocfs2_num_free_extents(osb, inode, fe);
525 if (free_extents < 0) {
526 status = free_extents;
527 mlog_errno(status);
528 goto leave;
529 }
530
531 /* there are two cases which could cause us to EAGAIN in the
532 * we-need-more-metadata case:
533 * 1) we haven't reserved *any*
534 * 2) we are so fragmented, we've needed to add metadata too
535 * many times. */
536 if (!free_extents && !meta_ac) {
537 mlog(0, "we haven't reserved any metadata!\n");
538 status = -EAGAIN;
539 reason = RESTART_META;
540 goto leave;
541 } else if ((!free_extents)
542 && (ocfs2_alloc_context_bits_left(meta_ac)
543 < ocfs2_extend_meta_needed(fe))) {
544 mlog(0, "filesystem is really fragmented...\n");
545 status = -EAGAIN;
546 reason = RESTART_META;
547 goto leave;
548 }
549
550 status = __ocfs2_claim_clusters(osb, handle, data_ac, 1,
551 clusters_to_add, &bit_off, &num_bits);
552 if (status < 0) {
553 if (status != -ENOSPC)
554 mlog_errno(status);
555 goto leave;
556 }
557
558 BUG_ON(num_bits > clusters_to_add);
559
560 /* reserve our write early -- insert_extent may update the inode */
561 status = ocfs2_journal_access(handle, inode, fe_bh,
562 OCFS2_JOURNAL_ACCESS_WRITE);
563 if (status < 0) {
564 mlog_errno(status);
565 goto leave;
566 }
567
568 block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
569 mlog(0, "Allocating %u clusters at block %u for inode %llu\n",
570 num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
571 status = ocfs2_insert_extent(osb, handle, inode, fe_bh,
572 *logical_offset, block, num_bits,
573 flags, meta_ac);
574 if (status < 0) {
575 mlog_errno(status);
576 goto leave;
577 }
578
579 status = ocfs2_journal_dirty(handle, fe_bh);
580 if (status < 0) {
581 mlog_errno(status);
582 goto leave;
583 }
584
585 clusters_to_add -= num_bits;
586 *logical_offset += num_bits;
587
588 if (clusters_to_add) {
589 mlog(0, "need to alloc once more, clusters = %u, wanted = "
590 "%u\n", fe->i_clusters, clusters_to_add);
591 status = -EAGAIN;
592 reason = RESTART_TRANS;
593 }
594
595leave:
596 mlog_exit(status);
597 if (reason_ret)
598 *reason_ret = reason;
599 return status;
600}
601
602/*
603 * For a given allocation, determine which allocators will need to be
604 * accessed, and lock them, reserving the appropriate number of bits.
605 *
606 * Sparse file systems call this from ocfs2_write_begin_nolock()
607 * and ocfs2_allocate_unwritten_extents().
608 *
609 * File systems which don't support holes call this from
610 * ocfs2_extend_allocation().
611 */
612int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
613 u32 clusters_to_add, u32 extents_to_split,
614 struct ocfs2_alloc_context **data_ac,
615 struct ocfs2_alloc_context **meta_ac)
616{
617 int ret = 0, num_free_extents;
618 unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;
619 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
620
621 *meta_ac = NULL;
622 if (data_ac)
623 *data_ac = NULL;
624
625 BUG_ON(clusters_to_add != 0 && data_ac == NULL);
626
627 mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, "
628 "clusters_to_add = %u, extents_to_split = %u\n",
629 (unsigned long long)OCFS2_I(inode)->ip_blkno, (long long)i_size_read(inode),
630 le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split);
631
632 num_free_extents = ocfs2_num_free_extents(osb, inode, di);
633 if (num_free_extents < 0) {
634 ret = num_free_extents;
635 mlog_errno(ret);
636 goto out;
637 }
638
639 /*
640 * Sparse allocation file systems need to be more conservative
641 * with reserving room for expansion - the actual allocation
642 * happens while we've got a journal handle open so re-taking
643 * a cluster lock (because we ran out of room for another
644 * extent) will violate ordering rules.
645 *
646 * Most of the time we'll only be seeing this 1 cluster at a time
647 * anyway.
648 *
649 * Always lock for any unwritten extents - we might want to
650 * add blocks during a split.
651 */
652 if (!num_free_extents ||
653 (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) {
654 ret = ocfs2_reserve_new_metadata(osb, di, meta_ac);
655 if (ret < 0) {
656 if (ret != -ENOSPC)
657 mlog_errno(ret);
658 goto out;
659 }
660 }
661
662 if (clusters_to_add == 0)
663 goto out;
664
665 ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac);
666 if (ret < 0) {
667 if (ret != -ENOSPC)
668 mlog_errno(ret);
669 goto out;
670 }
671
672out:
673 if (ret) {
674 if (*meta_ac) {
675 ocfs2_free_alloc_context(*meta_ac);
676 *meta_ac = NULL;
677 }
678 514
679 /* 515 ocfs2_init_dinode_extent_tree(&et, inode, fe_bh);
680 * We cannot have an error and a non null *data_ac. 516 ret = ocfs2_add_clusters_in_btree(osb, inode, logical_offset,
681 */ 517 clusters_to_add, mark_unwritten,
682 } 518 &et, handle,
519 data_ac, meta_ac, reason_ret);
683 520
684 return ret; 521 return ret;
685} 522}
@@ -698,6 +535,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
698 struct ocfs2_alloc_context *meta_ac = NULL; 535 struct ocfs2_alloc_context *meta_ac = NULL;
699 enum ocfs2_alloc_restarted why; 536 enum ocfs2_alloc_restarted why;
700 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 537 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
538 struct ocfs2_extent_tree et;
701 539
702 mlog_entry("(clusters_to_add = %u)\n", clusters_to_add); 540 mlog_entry("(clusters_to_add = %u)\n", clusters_to_add);
703 541
@@ -707,8 +545,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
707 */ 545 */
708 BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb)); 546 BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb));
709 547
710 status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, 548 status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh);
711 OCFS2_BH_CACHED, inode);
712 if (status < 0) { 549 if (status < 0) {
713 mlog_errno(status); 550 mlog_errno(status);
714 goto leave; 551 goto leave;
@@ -724,14 +561,21 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
724restart_all: 561restart_all:
725 BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); 562 BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
726 563
727 status = ocfs2_lock_allocators(inode, fe, clusters_to_add, 0, &data_ac, 564 mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, "
728 &meta_ac); 565 "clusters_to_add = %u\n",
566 (unsigned long long)OCFS2_I(inode)->ip_blkno,
567 (long long)i_size_read(inode), le32_to_cpu(fe->i_clusters),
568 clusters_to_add);
569 ocfs2_init_dinode_extent_tree(&et, inode, bh);
570 status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
571 &data_ac, &meta_ac);
729 if (status) { 572 if (status) {
730 mlog_errno(status); 573 mlog_errno(status);
731 goto leave; 574 goto leave;
732 } 575 }
733 576
734 credits = ocfs2_calc_extend_credits(osb->sb, fe, clusters_to_add); 577 credits = ocfs2_calc_extend_credits(osb->sb, &fe->id2.i_list,
578 clusters_to_add);
735 handle = ocfs2_start_trans(osb, credits); 579 handle = ocfs2_start_trans(osb, credits);
736 if (IS_ERR(handle)) { 580 if (IS_ERR(handle)) {
737 status = PTR_ERR(handle); 581 status = PTR_ERR(handle);
@@ -753,16 +597,16 @@ restarted_transaction:
753 597
754 prev_clusters = OCFS2_I(inode)->ip_clusters; 598 prev_clusters = OCFS2_I(inode)->ip_clusters;
755 599
756 status = ocfs2_do_extend_allocation(osb, 600 status = ocfs2_add_inode_data(osb,
757 inode, 601 inode,
758 &logical_start, 602 &logical_start,
759 clusters_to_add, 603 clusters_to_add,
760 mark_unwritten, 604 mark_unwritten,
761 bh, 605 bh,
762 handle, 606 handle,
763 data_ac, 607 data_ac,
764 meta_ac, 608 meta_ac,
765 &why); 609 &why);
766 if ((status < 0) && (status != -EAGAIN)) { 610 if ((status < 0) && (status != -EAGAIN)) {
767 if (status != -ENOSPC) 611 if (status != -ENOSPC)
768 mlog_errno(status); 612 mlog_errno(status);
@@ -789,7 +633,7 @@ restarted_transaction:
789 mlog(0, "restarting transaction.\n"); 633 mlog(0, "restarting transaction.\n");
790 /* TODO: This can be more intelligent. */ 634 /* TODO: This can be more intelligent. */
791 credits = ocfs2_calc_extend_credits(osb->sb, 635 credits = ocfs2_calc_extend_credits(osb->sb,
792 fe, 636 &fe->id2.i_list,
793 clusters_to_add); 637 clusters_to_add);
794 status = ocfs2_extend_trans(handle, credits); 638 status = ocfs2_extend_trans(handle, credits);
795 if (status < 0) { 639 if (status < 0) {
@@ -826,10 +670,8 @@ leave:
826 restart_func = 0; 670 restart_func = 0;
827 goto restart_all; 671 goto restart_all;
828 } 672 }
829 if (bh) { 673 brelse(bh);
830 brelse(bh); 674 bh = NULL;
831 bh = NULL;
832 }
833 675
834 mlog_exit(status); 676 mlog_exit(status);
835 return status; 677 return status;
@@ -1096,9 +938,15 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1096 goto bail_unlock; 938 goto bail_unlock;
1097 } 939 }
1098 940
1099 if (i_size_read(inode) > attr->ia_size) 941 if (i_size_read(inode) > attr->ia_size) {
942 if (ocfs2_should_order_data(inode)) {
943 status = ocfs2_begin_ordered_truncate(inode,
944 attr->ia_size);
945 if (status)
946 goto bail_unlock;
947 }
1100 status = ocfs2_truncate_file(inode, bh, attr->ia_size); 948 status = ocfs2_truncate_file(inode, bh, attr->ia_size);
1101 else 949 } else
1102 status = ocfs2_extend_file(inode, bh, attr->ia_size); 950 status = ocfs2_extend_file(inode, bh, attr->ia_size);
1103 if (status < 0) { 951 if (status < 0) {
1104 if (status != -ENOSPC) 952 if (status != -ENOSPC)
@@ -1140,8 +988,7 @@ bail_unlock_rw:
1140 if (size_change) 988 if (size_change)
1141 ocfs2_rw_unlock(inode, 1); 989 ocfs2_rw_unlock(inode, 1);
1142bail: 990bail:
1143 if (bh) 991 brelse(bh);
1144 brelse(bh);
1145 992
1146 mlog_exit(status); 993 mlog_exit(status);
1147 return status; 994 return status;
@@ -1284,8 +1131,7 @@ static int ocfs2_write_remove_suid(struct inode *inode)
1284 struct buffer_head *bh = NULL; 1131 struct buffer_head *bh = NULL;
1285 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1132 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1286 1133
1287 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), 1134 ret = ocfs2_read_block(inode, oi->ip_blkno, &bh);
1288 oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode);
1289 if (ret < 0) { 1135 if (ret < 0) {
1290 mlog_errno(ret); 1136 mlog_errno(ret);
1291 goto out; 1137 goto out;
@@ -1311,9 +1157,8 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode,
1311 struct buffer_head *di_bh = NULL; 1157 struct buffer_head *di_bh = NULL;
1312 1158
1313 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 1159 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1314 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), 1160 ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno,
1315 OCFS2_I(inode)->ip_blkno, &di_bh, 1161 &di_bh);
1316 OCFS2_BH_CACHED, inode);
1317 if (ret) { 1162 if (ret) {
1318 mlog_errno(ret); 1163 mlog_errno(ret);
1319 goto out; 1164 goto out;
@@ -1394,8 +1239,11 @@ static int __ocfs2_remove_inode_range(struct inode *inode,
1394 handle_t *handle; 1239 handle_t *handle;
1395 struct ocfs2_alloc_context *meta_ac = NULL; 1240 struct ocfs2_alloc_context *meta_ac = NULL;
1396 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 1241 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1242 struct ocfs2_extent_tree et;
1397 1243
1398 ret = ocfs2_lock_allocators(inode, di, 0, 1, NULL, &meta_ac); 1244 ocfs2_init_dinode_extent_tree(&et, inode, di_bh);
1245
1246 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
1399 if (ret) { 1247 if (ret) {
1400 mlog_errno(ret); 1248 mlog_errno(ret);
1401 return ret; 1249 return ret;
@@ -1425,7 +1273,7 @@ static int __ocfs2_remove_inode_range(struct inode *inode,
1425 goto out; 1273 goto out;
1426 } 1274 }
1427 1275
1428 ret = ocfs2_remove_extent(inode, di_bh, cpos, len, handle, meta_ac, 1276 ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
1429 dealloc); 1277 dealloc);
1430 if (ret) { 1278 if (ret) {
1431 mlog_errno(ret); 1279 mlog_errno(ret);
@@ -2040,7 +1888,7 @@ out_dio:
2040 */ 1888 */
2041 if (old_size != i_size_read(inode) || 1889 if (old_size != i_size_read(inode) ||
2042 old_clusters != OCFS2_I(inode)->ip_clusters) { 1890 old_clusters != OCFS2_I(inode)->ip_clusters) {
2043 ret = journal_force_commit(osb->journal->j_journal); 1891 ret = jbd2_journal_force_commit(osb->journal->j_journal);
2044 if (ret < 0) 1892 if (ret < 0)
2045 written = ret; 1893 written = ret;
2046 } 1894 }
@@ -2227,6 +2075,10 @@ const struct inode_operations ocfs2_file_iops = {
2227 .setattr = ocfs2_setattr, 2075 .setattr = ocfs2_setattr,
2228 .getattr = ocfs2_getattr, 2076 .getattr = ocfs2_getattr,
2229 .permission = ocfs2_permission, 2077 .permission = ocfs2_permission,
2078 .setxattr = generic_setxattr,
2079 .getxattr = generic_getxattr,
2080 .listxattr = ocfs2_listxattr,
2081 .removexattr = generic_removexattr,
2230 .fallocate = ocfs2_fallocate, 2082 .fallocate = ocfs2_fallocate,
2231 .fiemap = ocfs2_fiemap, 2083 .fiemap = ocfs2_fiemap,
2232}; 2084};
@@ -2237,6 +2089,10 @@ const struct inode_operations ocfs2_special_file_iops = {
2237 .permission = ocfs2_permission, 2089 .permission = ocfs2_permission,
2238}; 2090};
2239 2091
2092/*
2093 * Other than ->lock, keep ocfs2_fops and ocfs2_dops in sync with
2094 * ocfs2_fops_no_plocks and ocfs2_dops_no_plocks!
2095 */
2240const struct file_operations ocfs2_fops = { 2096const struct file_operations ocfs2_fops = {
2241 .llseek = generic_file_llseek, 2097 .llseek = generic_file_llseek,
2242 .read = do_sync_read, 2098 .read = do_sync_read,
@@ -2251,6 +2107,7 @@ const struct file_operations ocfs2_fops = {
2251#ifdef CONFIG_COMPAT 2107#ifdef CONFIG_COMPAT
2252 .compat_ioctl = ocfs2_compat_ioctl, 2108 .compat_ioctl = ocfs2_compat_ioctl,
2253#endif 2109#endif
2110 .lock = ocfs2_lock,
2254 .flock = ocfs2_flock, 2111 .flock = ocfs2_flock,
2255 .splice_read = ocfs2_file_splice_read, 2112 .splice_read = ocfs2_file_splice_read,
2256 .splice_write = ocfs2_file_splice_write, 2113 .splice_write = ocfs2_file_splice_write,
@@ -2267,5 +2124,51 @@ const struct file_operations ocfs2_dops = {
2267#ifdef CONFIG_COMPAT 2124#ifdef CONFIG_COMPAT
2268 .compat_ioctl = ocfs2_compat_ioctl, 2125 .compat_ioctl = ocfs2_compat_ioctl,
2269#endif 2126#endif
2127 .lock = ocfs2_lock,
2128 .flock = ocfs2_flock,
2129};
2130
2131/*
2132 * POSIX-lockless variants of our file_operations.
2133 *
2134 * These will be used if the underlying cluster stack does not support
2135 * posix file locking, if the user passes the "localflocks" mount
2136 * option, or if we have a local-only fs.
2137 *
2138 * ocfs2_flock is in here because all stacks handle UNIX file locks,
2139 * so we still want it in the case of no stack support for
2140 * plocks. Internally, it will do the right thing when asked to ignore
2141 * the cluster.
2142 */
2143const struct file_operations ocfs2_fops_no_plocks = {
2144 .llseek = generic_file_llseek,
2145 .read = do_sync_read,
2146 .write = do_sync_write,
2147 .mmap = ocfs2_mmap,
2148 .fsync = ocfs2_sync_file,
2149 .release = ocfs2_file_release,
2150 .open = ocfs2_file_open,
2151 .aio_read = ocfs2_file_aio_read,
2152 .aio_write = ocfs2_file_aio_write,
2153 .unlocked_ioctl = ocfs2_ioctl,
2154#ifdef CONFIG_COMPAT
2155 .compat_ioctl = ocfs2_compat_ioctl,
2156#endif
2157 .flock = ocfs2_flock,
2158 .splice_read = ocfs2_file_splice_read,
2159 .splice_write = ocfs2_file_splice_write,
2160};
2161
2162const struct file_operations ocfs2_dops_no_plocks = {
2163 .llseek = generic_file_llseek,
2164 .read = generic_read_dir,
2165 .readdir = ocfs2_readdir,
2166 .fsync = ocfs2_sync_file,
2167 .release = ocfs2_dir_release,
2168 .open = ocfs2_dir_open,
2169 .unlocked_ioctl = ocfs2_ioctl,
2170#ifdef CONFIG_COMPAT
2171 .compat_ioctl = ocfs2_compat_ioctl,
2172#endif
2270 .flock = ocfs2_flock, 2173 .flock = ocfs2_flock,
2271}; 2174};
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 1e27b4d017ea..e92382cbca5f 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -28,9 +28,12 @@
28 28
29extern const struct file_operations ocfs2_fops; 29extern const struct file_operations ocfs2_fops;
30extern const struct file_operations ocfs2_dops; 30extern const struct file_operations ocfs2_dops;
31extern const struct file_operations ocfs2_fops_no_plocks;
32extern const struct file_operations ocfs2_dops_no_plocks;
31extern const struct inode_operations ocfs2_file_iops; 33extern const struct inode_operations ocfs2_file_iops;
32extern const struct inode_operations ocfs2_special_file_iops; 34extern const struct inode_operations ocfs2_special_file_iops;
33struct ocfs2_alloc_context; 35struct ocfs2_alloc_context;
36enum ocfs2_alloc_restarted;
34 37
35struct ocfs2_file_private { 38struct ocfs2_file_private {
36 struct file *fp_file; 39 struct file *fp_file;
@@ -38,27 +41,18 @@ struct ocfs2_file_private {
38 struct ocfs2_lock_res fp_flock; 41 struct ocfs2_lock_res fp_flock;
39}; 42};
40 43
41enum ocfs2_alloc_restarted { 44int ocfs2_add_inode_data(struct ocfs2_super *osb,
42 RESTART_NONE = 0, 45 struct inode *inode,
43 RESTART_TRANS, 46 u32 *logical_offset,
44 RESTART_META 47 u32 clusters_to_add,
45}; 48 int mark_unwritten,
46int ocfs2_do_extend_allocation(struct ocfs2_super *osb, 49 struct buffer_head *fe_bh,
47 struct inode *inode, 50 handle_t *handle,
48 u32 *logical_offset, 51 struct ocfs2_alloc_context *data_ac,
49 u32 clusters_to_add, 52 struct ocfs2_alloc_context *meta_ac,
50 int mark_unwritten, 53 enum ocfs2_alloc_restarted *reason_ret);
51 struct buffer_head *fe_bh,
52 handle_t *handle,
53 struct ocfs2_alloc_context *data_ac,
54 struct ocfs2_alloc_context *meta_ac,
55 enum ocfs2_alloc_restarted *reason_ret);
56int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, 54int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size,
57 u64 zero_to); 55 u64 zero_to);
58int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
59 u32 clusters_to_add, u32 extents_to_split,
60 struct ocfs2_alloc_context **data_ac,
61 struct ocfs2_alloc_context **meta_ac);
62int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); 56int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
63int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, 57int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
64 struct kstat *stat); 58 struct kstat *stat);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 7e9e4c79aec7..4903688f72a9 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -49,6 +49,7 @@
49#include "symlink.h" 49#include "symlink.h"
50#include "sysfile.h" 50#include "sysfile.h"
51#include "uptodate.h" 51#include "uptodate.h"
52#include "xattr.h"
52 53
53#include "buffer_head_io.h" 54#include "buffer_head_io.h"
54 55
@@ -219,6 +220,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
219 struct super_block *sb; 220 struct super_block *sb;
220 struct ocfs2_super *osb; 221 struct ocfs2_super *osb;
221 int status = -EINVAL; 222 int status = -EINVAL;
223 int use_plocks = 1;
222 224
223 mlog_entry("(0x%p, size:%llu)\n", inode, 225 mlog_entry("(0x%p, size:%llu)\n", inode,
224 (unsigned long long)le64_to_cpu(fe->i_size)); 226 (unsigned long long)le64_to_cpu(fe->i_size));
@@ -226,6 +228,10 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
226 sb = inode->i_sb; 228 sb = inode->i_sb;
227 osb = OCFS2_SB(sb); 229 osb = OCFS2_SB(sb);
228 230
231 if ((osb->s_mount_opt & OCFS2_MOUNT_LOCALFLOCKS) ||
232 ocfs2_mount_local(osb) || !ocfs2_stack_supports_plocks())
233 use_plocks = 0;
234
229 /* this means that read_inode cannot create a superblock inode 235 /* this means that read_inode cannot create a superblock inode
230 * today. change if needed. */ 236 * today. change if needed. */
231 if (!OCFS2_IS_VALID_DINODE(fe) || 237 if (!OCFS2_IS_VALID_DINODE(fe) ||
@@ -295,13 +301,19 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
295 301
296 switch (inode->i_mode & S_IFMT) { 302 switch (inode->i_mode & S_IFMT) {
297 case S_IFREG: 303 case S_IFREG:
298 inode->i_fop = &ocfs2_fops; 304 if (use_plocks)
305 inode->i_fop = &ocfs2_fops;
306 else
307 inode->i_fop = &ocfs2_fops_no_plocks;
299 inode->i_op = &ocfs2_file_iops; 308 inode->i_op = &ocfs2_file_iops;
300 i_size_write(inode, le64_to_cpu(fe->i_size)); 309 i_size_write(inode, le64_to_cpu(fe->i_size));
301 break; 310 break;
302 case S_IFDIR: 311 case S_IFDIR:
303 inode->i_op = &ocfs2_dir_iops; 312 inode->i_op = &ocfs2_dir_iops;
304 inode->i_fop = &ocfs2_dops; 313 if (use_plocks)
314 inode->i_fop = &ocfs2_dops;
315 else
316 inode->i_fop = &ocfs2_dops_no_plocks;
305 i_size_write(inode, le64_to_cpu(fe->i_size)); 317 i_size_write(inode, le64_to_cpu(fe->i_size));
306 break; 318 break;
307 case S_IFLNK: 319 case S_IFLNK:
@@ -448,8 +460,11 @@ static int ocfs2_read_locked_inode(struct inode *inode,
448 } 460 }
449 } 461 }
450 462
451 status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0, 463 if (can_lock)
452 can_lock ? inode : NULL); 464 status = ocfs2_read_blocks(inode, args->fi_blkno, 1, &bh,
465 OCFS2_BH_IGNORE_CACHE);
466 else
467 status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh);
453 if (status < 0) { 468 if (status < 0) {
454 mlog_errno(status); 469 mlog_errno(status);
455 goto bail; 470 goto bail;
@@ -522,6 +537,9 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
522 * data and fast symlinks. 537 * data and fast symlinks.
523 */ 538 */
524 if (fe->i_clusters) { 539 if (fe->i_clusters) {
540 if (ocfs2_should_order_data(inode))
541 ocfs2_begin_ordered_truncate(inode, 0);
542
525 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 543 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
526 if (IS_ERR(handle)) { 544 if (IS_ERR(handle)) {
527 status = PTR_ERR(handle); 545 status = PTR_ERR(handle);
@@ -730,6 +748,13 @@ static int ocfs2_wipe_inode(struct inode *inode,
730 goto bail_unlock_dir; 748 goto bail_unlock_dir;
731 } 749 }
732 750
751 /*Free extended attribute resources associated with this inode.*/
752 status = ocfs2_xattr_remove(inode, di_bh);
753 if (status < 0) {
754 mlog_errno(status);
755 goto bail_unlock_dir;
756 }
757
733 status = ocfs2_remove_inode(inode, di_bh, orphan_dir_inode, 758 status = ocfs2_remove_inode(inode, di_bh, orphan_dir_inode,
734 orphan_dir_bh); 759 orphan_dir_bh);
735 if (status < 0) 760 if (status < 0)
@@ -1081,6 +1106,8 @@ void ocfs2_clear_inode(struct inode *inode)
1081 oi->ip_last_trans = 0; 1106 oi->ip_last_trans = 0;
1082 oi->ip_dir_start_lookup = 0; 1107 oi->ip_dir_start_lookup = 0;
1083 oi->ip_blkno = 0ULL; 1108 oi->ip_blkno = 0ULL;
1109 jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal,
1110 &oi->ip_jinode);
1084 1111
1085bail: 1112bail:
1086 mlog_exit_void(); 1113 mlog_exit_void();
@@ -1107,58 +1134,6 @@ void ocfs2_drop_inode(struct inode *inode)
1107} 1134}
1108 1135
1109/* 1136/*
1110 * TODO: this should probably be merged into ocfs2_get_block
1111 *
1112 * However, you now need to pay attention to the cont_prepare_write()
1113 * stuff in ocfs2_get_block (that is, ocfs2_get_block pretty much
1114 * expects never to extend).
1115 */
1116struct buffer_head *ocfs2_bread(struct inode *inode,
1117 int block, int *err, int reada)
1118{
1119 struct buffer_head *bh = NULL;
1120 int tmperr;
1121 u64 p_blkno;
1122 int readflags = OCFS2_BH_CACHED;
1123
1124 if (reada)
1125 readflags |= OCFS2_BH_READAHEAD;
1126
1127 if (((u64)block << inode->i_sb->s_blocksize_bits) >=
1128 i_size_read(inode)) {
1129 BUG_ON(!reada);
1130 return NULL;
1131 }
1132
1133 down_read(&OCFS2_I(inode)->ip_alloc_sem);
1134 tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL,
1135 NULL);
1136 up_read(&OCFS2_I(inode)->ip_alloc_sem);
1137 if (tmperr < 0) {
1138 mlog_errno(tmperr);
1139 goto fail;
1140 }
1141
1142 tmperr = ocfs2_read_block(OCFS2_SB(inode->i_sb), p_blkno, &bh,
1143 readflags, inode);
1144 if (tmperr < 0)
1145 goto fail;
1146
1147 tmperr = 0;
1148
1149 *err = 0;
1150 return bh;
1151
1152fail:
1153 if (bh) {
1154 brelse(bh);
1155 bh = NULL;
1156 }
1157 *err = -EIO;
1158 return NULL;
1159}
1160
1161/*
1162 * This is called from our getattr. 1137 * This is called from our getattr.
1163 */ 1138 */
1164int ocfs2_inode_revalidate(struct dentry *dentry) 1139int ocfs2_inode_revalidate(struct dentry *dentry)
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 390a85596aa0..2f37af9bcc4a 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -40,6 +40,9 @@ struct ocfs2_inode_info
40 /* protects allocation changes on this inode. */ 40 /* protects allocation changes on this inode. */
41 struct rw_semaphore ip_alloc_sem; 41 struct rw_semaphore ip_alloc_sem;
42 42
43 /* protects extended attribute changes on this inode */
44 struct rw_semaphore ip_xattr_sem;
45
43 /* These fields are protected by ip_lock */ 46 /* These fields are protected by ip_lock */
44 spinlock_t ip_lock; 47 spinlock_t ip_lock;
45 u32 ip_open_count; 48 u32 ip_open_count;
@@ -68,6 +71,7 @@ struct ocfs2_inode_info
68 struct ocfs2_extent_map ip_extent_map; 71 struct ocfs2_extent_map ip_extent_map;
69 72
70 struct inode vfs_inode; 73 struct inode vfs_inode;
74 struct jbd2_inode ip_jinode;
71}; 75};
72 76
73/* 77/*
@@ -113,8 +117,6 @@ extern struct kmem_cache *ocfs2_inode_cache;
113 117
114extern const struct address_space_operations ocfs2_aops; 118extern const struct address_space_operations ocfs2_aops;
115 119
116struct buffer_head *ocfs2_bread(struct inode *inode, int block,
117 int *err, int reada);
118void ocfs2_clear_inode(struct inode *inode); 120void ocfs2_clear_inode(struct inode *inode);
119void ocfs2_delete_inode(struct inode *inode); 121void ocfs2_delete_inode(struct inode *inode);
120void ocfs2_drop_inode(struct inode *inode); 122void ocfs2_drop_inode(struct inode *inode);
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 7b142f0ce995..9fcd36dcc9a0 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -102,8 +102,7 @@ bail_unlock:
102bail: 102bail:
103 mutex_unlock(&inode->i_mutex); 103 mutex_unlock(&inode->i_mutex);
104 104
105 if (bh) 105 brelse(bh);
106 brelse(bh);
107 106
108 mlog_exit(status); 107 mlog_exit(status);
109 return status; 108 return status;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index c47bc2a809c2..81e40677eecb 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -215,9 +215,9 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb)
215 goto finally; 215 goto finally;
216 } 216 }
217 217
218 journal_lock_updates(journal->j_journal); 218 jbd2_journal_lock_updates(journal->j_journal);
219 status = journal_flush(journal->j_journal); 219 status = jbd2_journal_flush(journal->j_journal);
220 journal_unlock_updates(journal->j_journal); 220 jbd2_journal_unlock_updates(journal->j_journal);
221 if (status < 0) { 221 if (status < 0) {
222 up_write(&journal->j_trans_barrier); 222 up_write(&journal->j_trans_barrier);
223 mlog_errno(status); 223 mlog_errno(status);
@@ -264,7 +264,7 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
264 264
265 down_read(&osb->journal->j_trans_barrier); 265 down_read(&osb->journal->j_trans_barrier);
266 266
267 handle = journal_start(journal, max_buffs); 267 handle = jbd2_journal_start(journal, max_buffs);
268 if (IS_ERR(handle)) { 268 if (IS_ERR(handle)) {
269 up_read(&osb->journal->j_trans_barrier); 269 up_read(&osb->journal->j_trans_barrier);
270 270
@@ -290,7 +290,7 @@ int ocfs2_commit_trans(struct ocfs2_super *osb,
290 290
291 BUG_ON(!handle); 291 BUG_ON(!handle);
292 292
293 ret = journal_stop(handle); 293 ret = jbd2_journal_stop(handle);
294 if (ret < 0) 294 if (ret < 0)
295 mlog_errno(ret); 295 mlog_errno(ret);
296 296
@@ -304,7 +304,7 @@ int ocfs2_commit_trans(struct ocfs2_super *osb,
304 * transaction. extend_trans will either extend the current handle by 304 * transaction. extend_trans will either extend the current handle by
305 * nblocks, or commit it and start a new one with nblocks credits. 305 * nblocks, or commit it and start a new one with nblocks credits.
306 * 306 *
307 * This might call journal_restart() which will commit dirty buffers 307 * This might call jbd2_journal_restart() which will commit dirty buffers
308 * and then restart the transaction. Before calling 308 * and then restart the transaction. Before calling
309 * ocfs2_extend_trans(), any changed blocks should have been 309 * ocfs2_extend_trans(), any changed blocks should have been
310 * dirtied. After calling it, all blocks which need to be changed must 310 * dirtied. After calling it, all blocks which need to be changed must
@@ -332,7 +332,7 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks)
332#ifdef CONFIG_OCFS2_DEBUG_FS 332#ifdef CONFIG_OCFS2_DEBUG_FS
333 status = 1; 333 status = 1;
334#else 334#else
335 status = journal_extend(handle, nblocks); 335 status = jbd2_journal_extend(handle, nblocks);
336 if (status < 0) { 336 if (status < 0) {
337 mlog_errno(status); 337 mlog_errno(status);
338 goto bail; 338 goto bail;
@@ -340,8 +340,10 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks)
340#endif 340#endif
341 341
342 if (status > 0) { 342 if (status > 0) {
343 mlog(0, "journal_extend failed, trying journal_restart\n"); 343 mlog(0,
344 status = journal_restart(handle, nblocks); 344 "jbd2_journal_extend failed, trying "
345 "jbd2_journal_restart\n");
346 status = jbd2_journal_restart(handle, nblocks);
345 if (status < 0) { 347 if (status < 0) {
346 mlog_errno(status); 348 mlog_errno(status);
347 goto bail; 349 goto bail;
@@ -393,11 +395,11 @@ int ocfs2_journal_access(handle_t *handle,
393 switch (type) { 395 switch (type) {
394 case OCFS2_JOURNAL_ACCESS_CREATE: 396 case OCFS2_JOURNAL_ACCESS_CREATE:
395 case OCFS2_JOURNAL_ACCESS_WRITE: 397 case OCFS2_JOURNAL_ACCESS_WRITE:
396 status = journal_get_write_access(handle, bh); 398 status = jbd2_journal_get_write_access(handle, bh);
397 break; 399 break;
398 400
399 case OCFS2_JOURNAL_ACCESS_UNDO: 401 case OCFS2_JOURNAL_ACCESS_UNDO:
400 status = journal_get_undo_access(handle, bh); 402 status = jbd2_journal_get_undo_access(handle, bh);
401 break; 403 break;
402 404
403 default: 405 default:
@@ -422,7 +424,7 @@ int ocfs2_journal_dirty(handle_t *handle,
422 mlog_entry("(bh->b_blocknr=%llu)\n", 424 mlog_entry("(bh->b_blocknr=%llu)\n",
423 (unsigned long long)bh->b_blocknr); 425 (unsigned long long)bh->b_blocknr);
424 426
425 status = journal_dirty_metadata(handle, bh); 427 status = jbd2_journal_dirty_metadata(handle, bh);
426 if (status < 0) 428 if (status < 0)
427 mlog(ML_ERROR, "Could not dirty metadata buffer. " 429 mlog(ML_ERROR, "Could not dirty metadata buffer. "
428 "(bh->b_blocknr=%llu)\n", 430 "(bh->b_blocknr=%llu)\n",
@@ -432,6 +434,7 @@ int ocfs2_journal_dirty(handle_t *handle,
432 return status; 434 return status;
433} 435}
434 436
437#ifdef CONFIG_OCFS2_COMPAT_JBD
435int ocfs2_journal_dirty_data(handle_t *handle, 438int ocfs2_journal_dirty_data(handle_t *handle,
436 struct buffer_head *bh) 439 struct buffer_head *bh)
437{ 440{
@@ -443,8 +446,9 @@ int ocfs2_journal_dirty_data(handle_t *handle,
443 446
444 return err; 447 return err;
445} 448}
449#endif
446 450
447#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD_DEFAULT_MAX_COMMIT_AGE) 451#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
448 452
449void ocfs2_set_journal_params(struct ocfs2_super *osb) 453void ocfs2_set_journal_params(struct ocfs2_super *osb)
450{ 454{
@@ -457,9 +461,9 @@ void ocfs2_set_journal_params(struct ocfs2_super *osb)
457 spin_lock(&journal->j_state_lock); 461 spin_lock(&journal->j_state_lock);
458 journal->j_commit_interval = commit_interval; 462 journal->j_commit_interval = commit_interval;
459 if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) 463 if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
460 journal->j_flags |= JFS_BARRIER; 464 journal->j_flags |= JBD2_BARRIER;
461 else 465 else
462 journal->j_flags &= ~JFS_BARRIER; 466 journal->j_flags &= ~JBD2_BARRIER;
463 spin_unlock(&journal->j_state_lock); 467 spin_unlock(&journal->j_state_lock);
464} 468}
465 469
@@ -524,14 +528,14 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
524 mlog(0, "inode->ip_clusters = %u\n", OCFS2_I(inode)->ip_clusters); 528 mlog(0, "inode->ip_clusters = %u\n", OCFS2_I(inode)->ip_clusters);
525 529
526 /* call the kernels journal init function now */ 530 /* call the kernels journal init function now */
527 j_journal = journal_init_inode(inode); 531 j_journal = jbd2_journal_init_inode(inode);
528 if (j_journal == NULL) { 532 if (j_journal == NULL) {
529 mlog(ML_ERROR, "Linux journal layer error\n"); 533 mlog(ML_ERROR, "Linux journal layer error\n");
530 status = -EINVAL; 534 status = -EINVAL;
531 goto done; 535 goto done;
532 } 536 }
533 537
534 mlog(0, "Returned from journal_init_inode\n"); 538 mlog(0, "Returned from jbd2_journal_init_inode\n");
535 mlog(0, "j_journal->j_maxlen = %u\n", j_journal->j_maxlen); 539 mlog(0, "j_journal->j_maxlen = %u\n", j_journal->j_maxlen);
536 540
537 *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) & 541 *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) &
@@ -550,8 +554,7 @@ done:
550 if (status < 0) { 554 if (status < 0) {
551 if (inode_lock) 555 if (inode_lock)
552 ocfs2_inode_unlock(inode, 1); 556 ocfs2_inode_unlock(inode, 1);
553 if (bh != NULL) 557 brelse(bh);
554 brelse(bh);
555 if (inode) { 558 if (inode) {
556 OCFS2_I(inode)->ip_open_count--; 559 OCFS2_I(inode)->ip_open_count--;
557 iput(inode); 560 iput(inode);
@@ -639,7 +642,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
639 if (journal->j_state != OCFS2_JOURNAL_LOADED) 642 if (journal->j_state != OCFS2_JOURNAL_LOADED)
640 goto done; 643 goto done;
641 644
642 /* need to inc inode use count as journal_destroy will iput. */ 645 /* need to inc inode use count - jbd2_journal_destroy will iput. */
643 if (!igrab(inode)) 646 if (!igrab(inode))
644 BUG(); 647 BUG();
645 648
@@ -668,9 +671,9 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
668 BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0); 671 BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0);
669 672
670 if (ocfs2_mount_local(osb)) { 673 if (ocfs2_mount_local(osb)) {
671 journal_lock_updates(journal->j_journal); 674 jbd2_journal_lock_updates(journal->j_journal);
672 status = journal_flush(journal->j_journal); 675 status = jbd2_journal_flush(journal->j_journal);
673 journal_unlock_updates(journal->j_journal); 676 jbd2_journal_unlock_updates(journal->j_journal);
674 if (status < 0) 677 if (status < 0)
675 mlog_errno(status); 678 mlog_errno(status);
676 } 679 }
@@ -686,7 +689,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
686 } 689 }
687 690
688 /* Shutdown the kernel journal system */ 691 /* Shutdown the kernel journal system */
689 journal_destroy(journal->j_journal); 692 jbd2_journal_destroy(journal->j_journal);
690 693
691 OCFS2_I(inode)->ip_open_count--; 694 OCFS2_I(inode)->ip_open_count--;
692 695
@@ -711,15 +714,15 @@ static void ocfs2_clear_journal_error(struct super_block *sb,
711{ 714{
712 int olderr; 715 int olderr;
713 716
714 olderr = journal_errno(journal); 717 olderr = jbd2_journal_errno(journal);
715 if (olderr) { 718 if (olderr) {
716 mlog(ML_ERROR, "File system error %d recorded in " 719 mlog(ML_ERROR, "File system error %d recorded in "
717 "journal %u.\n", olderr, slot); 720 "journal %u.\n", olderr, slot);
718 mlog(ML_ERROR, "File system on device %s needs checking.\n", 721 mlog(ML_ERROR, "File system on device %s needs checking.\n",
719 sb->s_id); 722 sb->s_id);
720 723
721 journal_ack_err(journal); 724 jbd2_journal_ack_err(journal);
722 journal_clear_err(journal); 725 jbd2_journal_clear_err(journal);
723 } 726 }
724} 727}
725 728
@@ -734,7 +737,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
734 737
735 osb = journal->j_osb; 738 osb = journal->j_osb;
736 739
737 status = journal_load(journal->j_journal); 740 status = jbd2_journal_load(journal->j_journal);
738 if (status < 0) { 741 if (status < 0) {
739 mlog(ML_ERROR, "Failed to load journal!\n"); 742 mlog(ML_ERROR, "Failed to load journal!\n");
740 goto done; 743 goto done;
@@ -778,7 +781,7 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
778 781
779 BUG_ON(!journal); 782 BUG_ON(!journal);
780 783
781 status = journal_wipe(journal->j_journal, full); 784 status = jbd2_journal_wipe(journal->j_journal, full);
782 if (status < 0) { 785 if (status < 0) {
783 mlog_errno(status); 786 mlog_errno(status);
784 goto bail; 787 goto bail;
@@ -847,9 +850,8 @@ static int ocfs2_force_read_journal(struct inode *inode)
847 850
848 /* We are reading journal data which should not 851 /* We are reading journal data which should not
849 * be put in the uptodate cache */ 852 * be put in the uptodate cache */
850 status = ocfs2_read_blocks(OCFS2_SB(inode->i_sb), 853 status = ocfs2_read_blocks_sync(OCFS2_SB(inode->i_sb),
851 p_blkno, p_blocks, bhs, 0, 854 p_blkno, p_blocks, bhs);
852 NULL);
853 if (status < 0) { 855 if (status < 0) {
854 mlog_errno(status); 856 mlog_errno(status);
855 goto bail; 857 goto bail;
@@ -865,8 +867,7 @@ static int ocfs2_force_read_journal(struct inode *inode)
865 867
866bail: 868bail:
867 for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++) 869 for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++)
868 if (bhs[i]) 870 brelse(bhs[i]);
869 brelse(bhs[i]);
870 mlog_exit(status); 871 mlog_exit(status);
871 return status; 872 return status;
872} 873}
@@ -1133,7 +1134,8 @@ static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
1133 } 1134 }
1134 SET_INODE_JOURNAL(inode); 1135 SET_INODE_JOURNAL(inode);
1135 1136
1136 status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, bh, 0, inode); 1137 status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, bh,
1138 OCFS2_BH_IGNORE_CACHE);
1137 if (status < 0) { 1139 if (status < 0) {
1138 mlog_errno(status); 1140 mlog_errno(status);
1139 goto bail; 1141 goto bail;
@@ -1229,19 +1231,19 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1229 } 1231 }
1230 1232
1231 mlog(0, "calling journal_init_inode\n"); 1233 mlog(0, "calling journal_init_inode\n");
1232 journal = journal_init_inode(inode); 1234 journal = jbd2_journal_init_inode(inode);
1233 if (journal == NULL) { 1235 if (journal == NULL) {
1234 mlog(ML_ERROR, "Linux journal layer error\n"); 1236 mlog(ML_ERROR, "Linux journal layer error\n");
1235 status = -EIO; 1237 status = -EIO;
1236 goto done; 1238 goto done;
1237 } 1239 }
1238 1240
1239 status = journal_load(journal); 1241 status = jbd2_journal_load(journal);
1240 if (status < 0) { 1242 if (status < 0) {
1241 mlog_errno(status); 1243 mlog_errno(status);
1242 if (!igrab(inode)) 1244 if (!igrab(inode))
1243 BUG(); 1245 BUG();
1244 journal_destroy(journal); 1246 jbd2_journal_destroy(journal);
1245 goto done; 1247 goto done;
1246 } 1248 }
1247 1249
@@ -1249,9 +1251,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1249 1251
1250 /* wipe the journal */ 1252 /* wipe the journal */
1251 mlog(0, "flushing the journal.\n"); 1253 mlog(0, "flushing the journal.\n");
1252 journal_lock_updates(journal); 1254 jbd2_journal_lock_updates(journal);
1253 status = journal_flush(journal); 1255 status = jbd2_journal_flush(journal);
1254 journal_unlock_updates(journal); 1256 jbd2_journal_unlock_updates(journal);
1255 if (status < 0) 1257 if (status < 0)
1256 mlog_errno(status); 1258 mlog_errno(status);
1257 1259
@@ -1272,7 +1274,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1272 if (!igrab(inode)) 1274 if (!igrab(inode))
1273 BUG(); 1275 BUG();
1274 1276
1275 journal_destroy(journal); 1277 jbd2_journal_destroy(journal);
1276 1278
1277done: 1279done:
1278 /* drop the lock on this nodes journal */ 1280 /* drop the lock on this nodes journal */
@@ -1282,8 +1284,7 @@ done:
1282 if (inode) 1284 if (inode)
1283 iput(inode); 1285 iput(inode);
1284 1286
1285 if (bh) 1287 brelse(bh);
1286 brelse(bh);
1287 1288
1288 mlog_exit(status); 1289 mlog_exit(status);
1289 return status; 1290 return status;
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 2178ebffa05f..d4d14e9a3cea 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -27,7 +27,12 @@
27#define OCFS2_JOURNAL_H 27#define OCFS2_JOURNAL_H
28 28
29#include <linux/fs.h> 29#include <linux/fs.h>
30#include <linux/jbd.h> 30#ifndef CONFIG_OCFS2_COMPAT_JBD
31# include <linux/jbd2.h>
32#else
33# include <linux/jbd.h>
34# include "ocfs2_jbd_compat.h"
35#endif
31 36
32enum ocfs2_journal_state { 37enum ocfs2_journal_state {
33 OCFS2_JOURNAL_FREE = 0, 38 OCFS2_JOURNAL_FREE = 0,
@@ -215,8 +220,8 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
215 * buffer. Will have to call ocfs2_journal_dirty once 220 * buffer. Will have to call ocfs2_journal_dirty once
216 * we've actually dirtied it. Type is one of . or . 221 * we've actually dirtied it. Type is one of . or .
217 * ocfs2_journal_dirty - Mark a journalled buffer as having dirty data. 222 * ocfs2_journal_dirty - Mark a journalled buffer as having dirty data.
218 * ocfs2_journal_dirty_data - Indicate that a data buffer should go out before 223 * ocfs2_jbd2_file_inode - Mark an inode so that its data goes out before
219 * the current handle commits. 224 * the current handle commits.
220 */ 225 */
221 226
222/* You must always start_trans with a number of buffs > 0, but it's 227/* You must always start_trans with a number of buffs > 0, but it's
@@ -268,8 +273,10 @@ int ocfs2_journal_access(handle_t *handle,
268 */ 273 */
269int ocfs2_journal_dirty(handle_t *handle, 274int ocfs2_journal_dirty(handle_t *handle,
270 struct buffer_head *bh); 275 struct buffer_head *bh);
276#ifdef CONFIG_OCFS2_COMPAT_JBD
271int ocfs2_journal_dirty_data(handle_t *handle, 277int ocfs2_journal_dirty_data(handle_t *handle,
272 struct buffer_head *bh); 278 struct buffer_head *bh);
279#endif
273 280
274/* 281/*
275 * Credit Macros: 282 * Credit Macros:
@@ -283,6 +290,9 @@ int ocfs2_journal_dirty_data(handle_t *handle,
283/* simple file updates like chmod, etc. */ 290/* simple file updates like chmod, etc. */
284#define OCFS2_INODE_UPDATE_CREDITS 1 291#define OCFS2_INODE_UPDATE_CREDITS 1
285 292
293/* extended attribute block update */
294#define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1
295
286/* group extend. inode update and last group update. */ 296/* group extend. inode update and last group update. */
287#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) 297#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
288 298
@@ -340,11 +350,23 @@ int ocfs2_journal_dirty_data(handle_t *handle,
340#define OCFS2_RENAME_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3 \ 350#define OCFS2_RENAME_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3 \
341 + OCFS2_UNLINK_CREDITS) 351 + OCFS2_UNLINK_CREDITS)
342 352
353/* global bitmap dinode, group desc., relinked group,
354 * suballocator dinode, group desc., relinked group,
355 * dinode, xattr block */
356#define OCFS2_XATTR_BLOCK_CREATE_CREDITS (OCFS2_SUBALLOC_ALLOC * 2 + \
357 + OCFS2_INODE_UPDATE_CREDITS \
358 + OCFS2_XATTR_BLOCK_UPDATE_CREDITS)
359
360/*
361 * Please note that the caller must make sure that root_el is the root
362 * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise
363 * the result may be wrong.
364 */
343static inline int ocfs2_calc_extend_credits(struct super_block *sb, 365static inline int ocfs2_calc_extend_credits(struct super_block *sb,
344 struct ocfs2_dinode *fe, 366 struct ocfs2_extent_list *root_el,
345 u32 bits_wanted) 367 u32 bits_wanted)
346{ 368{
347 int bitmap_blocks, sysfile_bitmap_blocks, dinode_blocks; 369 int bitmap_blocks, sysfile_bitmap_blocks, extent_blocks;
348 370
349 /* bitmap dinode, group desc. + relinked group. */ 371 /* bitmap dinode, group desc. + relinked group. */
350 bitmap_blocks = OCFS2_SUBALLOC_ALLOC; 372 bitmap_blocks = OCFS2_SUBALLOC_ALLOC;
@@ -355,16 +377,16 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb,
355 * however many metadata chunks needed * a remaining suballoc 377 * however many metadata chunks needed * a remaining suballoc
356 * alloc. */ 378 * alloc. */
357 sysfile_bitmap_blocks = 1 + 379 sysfile_bitmap_blocks = 1 +
358 (OCFS2_SUBALLOC_ALLOC - 1) * ocfs2_extend_meta_needed(fe); 380 (OCFS2_SUBALLOC_ALLOC - 1) * ocfs2_extend_meta_needed(root_el);
359 381
360 /* this does not include *new* metadata blocks, which are 382 /* this does not include *new* metadata blocks, which are
361 * accounted for in sysfile_bitmap_blocks. fe + 383 * accounted for in sysfile_bitmap_blocks. root_el +
362 * prev. last_eb_blk + blocks along edge of tree. 384 * prev. last_eb_blk + blocks along edge of tree.
363 * calc_symlink_credits passes because we just need 1 385 * calc_symlink_credits passes because we just need 1
364 * credit for the dinode there. */ 386 * credit for the dinode there. */
365 dinode_blocks = 1 + 1 + le16_to_cpu(fe->id2.i_list.l_tree_depth); 387 extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth);
366 388
367 return bitmap_blocks + sysfile_bitmap_blocks + dinode_blocks; 389 return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks;
368} 390}
369 391
370static inline int ocfs2_calc_symlink_credits(struct super_block *sb) 392static inline int ocfs2_calc_symlink_credits(struct super_block *sb)
@@ -415,4 +437,16 @@ static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb,
415 return credits; 437 return credits;
416} 438}
417 439
440static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode)
441{
442 return jbd2_journal_file_inode(handle, &OCFS2_I(inode)->ip_jinode);
443}
444
445static inline int ocfs2_begin_ordered_truncate(struct inode *inode,
446 loff_t new_size)
447{
448 return jbd2_journal_begin_ordered_truncate(&OCFS2_I(inode)->ip_jinode,
449 new_size);
450}
451
418#endif /* OCFS2_JOURNAL_H */ 452#endif /* OCFS2_JOURNAL_H */
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 28e492e4ec88..687b28713c32 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -28,6 +28,7 @@
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30#include <linux/bitops.h> 30#include <linux/bitops.h>
31#include <linux/debugfs.h>
31 32
32#define MLOG_MASK_PREFIX ML_DISK_ALLOC 33#define MLOG_MASK_PREFIX ML_DISK_ALLOC
33#include <cluster/masklog.h> 34#include <cluster/masklog.h>
@@ -47,8 +48,6 @@
47 48
48#define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab)) 49#define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab))
49 50
50static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb);
51
52static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc); 51static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc);
53 52
54static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 53static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
@@ -75,24 +74,129 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
75static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 74static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
76 struct inode *local_alloc_inode); 75 struct inode *local_alloc_inode);
77 76
78static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb) 77#ifdef CONFIG_OCFS2_FS_STATS
78
79static int ocfs2_la_debug_open(struct inode *inode, struct file *file)
80{
81 file->private_data = inode->i_private;
82 return 0;
83}
84
85#define LA_DEBUG_BUF_SZ PAGE_CACHE_SIZE
86#define LA_DEBUG_VER 1
87static ssize_t ocfs2_la_debug_read(struct file *file, char __user *userbuf,
88 size_t count, loff_t *ppos)
89{
90 static DEFINE_MUTEX(la_debug_mutex);
91 struct ocfs2_super *osb = file->private_data;
92 int written, ret;
93 char *buf = osb->local_alloc_debug_buf;
94
95 mutex_lock(&la_debug_mutex);
96 memset(buf, 0, LA_DEBUG_BUF_SZ);
97
98 written = snprintf(buf, LA_DEBUG_BUF_SZ,
99 "0x%x\t0x%llx\t%u\t%u\t0x%x\n",
100 LA_DEBUG_VER,
101 (unsigned long long)osb->la_last_gd,
102 osb->local_alloc_default_bits,
103 osb->local_alloc_bits, osb->local_alloc_state);
104
105 ret = simple_read_from_buffer(userbuf, count, ppos, buf, written);
106
107 mutex_unlock(&la_debug_mutex);
108 return ret;
109}
110
111static const struct file_operations ocfs2_la_debug_fops = {
112 .open = ocfs2_la_debug_open,
113 .read = ocfs2_la_debug_read,
114};
115
116static void ocfs2_init_la_debug(struct ocfs2_super *osb)
117{
118 osb->local_alloc_debug_buf = kmalloc(LA_DEBUG_BUF_SZ, GFP_NOFS);
119 if (!osb->local_alloc_debug_buf)
120 return;
121
122 osb->local_alloc_debug = debugfs_create_file("local_alloc_stats",
123 S_IFREG|S_IRUSR,
124 osb->osb_debug_root,
125 osb,
126 &ocfs2_la_debug_fops);
127 if (!osb->local_alloc_debug) {
128 kfree(osb->local_alloc_debug_buf);
129 osb->local_alloc_debug_buf = NULL;
130 }
131}
132
133static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb)
134{
135 if (osb->local_alloc_debug)
136 debugfs_remove(osb->local_alloc_debug);
137
138 if (osb->local_alloc_debug_buf)
139 kfree(osb->local_alloc_debug_buf);
140
141 osb->local_alloc_debug_buf = NULL;
142 osb->local_alloc_debug = NULL;
143}
144#else /* CONFIG_OCFS2_FS_STATS */
145static void ocfs2_init_la_debug(struct ocfs2_super *osb)
146{
147 return;
148}
149static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb)
150{
151 return;
152}
153#endif
154
155static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
79{ 156{
80 BUG_ON(osb->s_clustersize_bits > 20); 157 return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
158 osb->local_alloc_state == OCFS2_LA_ENABLED);
159}
81 160
82 /* Size local alloc windows by the megabyte */ 161void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
83 return osb->local_alloc_size << (20 - osb->s_clustersize_bits); 162 unsigned int num_clusters)
163{
164 spin_lock(&osb->osb_lock);
165 if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
166 osb->local_alloc_state == OCFS2_LA_THROTTLED)
167 if (num_clusters >= osb->local_alloc_default_bits) {
168 cancel_delayed_work(&osb->la_enable_wq);
169 osb->local_alloc_state = OCFS2_LA_ENABLED;
170 }
171 spin_unlock(&osb->osb_lock);
172}
173
174void ocfs2_la_enable_worker(struct work_struct *work)
175{
176 struct ocfs2_super *osb =
177 container_of(work, struct ocfs2_super,
178 la_enable_wq.work);
179 spin_lock(&osb->osb_lock);
180 osb->local_alloc_state = OCFS2_LA_ENABLED;
181 spin_unlock(&osb->osb_lock);
84} 182}
85 183
86/* 184/*
87 * Tell us whether a given allocation should use the local alloc 185 * Tell us whether a given allocation should use the local alloc
88 * file. Otherwise, it has to go to the main bitmap. 186 * file. Otherwise, it has to go to the main bitmap.
187 *
188 * This function does semi-dirty reads of local alloc size and state!
189 * This is ok however, as the values are re-checked once under mutex.
89 */ 190 */
90int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) 191int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits)
91{ 192{
92 int la_bits = ocfs2_local_alloc_window_bits(osb);
93 int ret = 0; 193 int ret = 0;
194 int la_bits;
195
196 spin_lock(&osb->osb_lock);
197 la_bits = osb->local_alloc_bits;
94 198
95 if (osb->local_alloc_state != OCFS2_LA_ENABLED) 199 if (!ocfs2_la_state_enabled(osb))
96 goto bail; 200 goto bail;
97 201
98 /* la_bits should be at least twice the size (in clusters) of 202 /* la_bits should be at least twice the size (in clusters) of
@@ -106,6 +210,7 @@ int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits)
106bail: 210bail:
107 mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n", 211 mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n",
108 osb->local_alloc_state, (unsigned long long)bits, la_bits, ret); 212 osb->local_alloc_state, (unsigned long long)bits, la_bits, ret);
213 spin_unlock(&osb->osb_lock);
109 return ret; 214 return ret;
110} 215}
111 216
@@ -120,14 +225,18 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
120 225
121 mlog_entry_void(); 226 mlog_entry_void();
122 227
123 if (osb->local_alloc_size == 0) 228 ocfs2_init_la_debug(osb);
229
230 if (osb->local_alloc_bits == 0)
124 goto bail; 231 goto bail;
125 232
126 if (ocfs2_local_alloc_window_bits(osb) >= osb->bitmap_cpg) { 233 if (osb->local_alloc_bits >= osb->bitmap_cpg) {
127 mlog(ML_NOTICE, "Requested local alloc window %d is larger " 234 mlog(ML_NOTICE, "Requested local alloc window %d is larger "
128 "than max possible %u. Using defaults.\n", 235 "than max possible %u. Using defaults.\n",
129 ocfs2_local_alloc_window_bits(osb), (osb->bitmap_cpg - 1)); 236 osb->local_alloc_bits, (osb->bitmap_cpg - 1));
130 osb->local_alloc_size = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; 237 osb->local_alloc_bits =
238 ocfs2_megabytes_to_clusters(osb->sb,
239 OCFS2_DEFAULT_LOCAL_ALLOC_SIZE);
131 } 240 }
132 241
133 /* read the alloc off disk */ 242 /* read the alloc off disk */
@@ -139,8 +248,8 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
139 goto bail; 248 goto bail;
140 } 249 }
141 250
142 status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, 251 status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1,
143 &alloc_bh, 0, inode); 252 &alloc_bh, OCFS2_BH_IGNORE_CACHE);
144 if (status < 0) { 253 if (status < 0) {
145 mlog_errno(status); 254 mlog_errno(status);
146 goto bail; 255 goto bail;
@@ -185,13 +294,14 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
185 294
186bail: 295bail:
187 if (status < 0) 296 if (status < 0)
188 if (alloc_bh) 297 brelse(alloc_bh);
189 brelse(alloc_bh);
190 if (inode) 298 if (inode)
191 iput(inode); 299 iput(inode);
192 300
193 mlog(0, "Local alloc window bits = %d\n", 301 if (status < 0)
194 ocfs2_local_alloc_window_bits(osb)); 302 ocfs2_shutdown_la_debug(osb);
303
304 mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits);
195 305
196 mlog_exit(status); 306 mlog_exit(status);
197 return status; 307 return status;
@@ -217,6 +327,11 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
217 327
218 mlog_entry_void(); 328 mlog_entry_void();
219 329
330 cancel_delayed_work(&osb->la_enable_wq);
331 flush_workqueue(ocfs2_wq);
332
333 ocfs2_shutdown_la_debug(osb);
334
220 if (osb->local_alloc_state == OCFS2_LA_UNUSED) 335 if (osb->local_alloc_state == OCFS2_LA_UNUSED)
221 goto out; 336 goto out;
222 337
@@ -295,8 +410,7 @@ out_commit:
295 ocfs2_commit_trans(osb, handle); 410 ocfs2_commit_trans(osb, handle);
296 411
297out_unlock: 412out_unlock:
298 if (main_bm_bh) 413 brelse(main_bm_bh);
299 brelse(main_bm_bh);
300 414
301 ocfs2_inode_unlock(main_bm_inode, 1); 415 ocfs2_inode_unlock(main_bm_inode, 1);
302 416
@@ -345,8 +459,8 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
345 459
346 mutex_lock(&inode->i_mutex); 460 mutex_lock(&inode->i_mutex);
347 461
348 status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, 462 status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1,
349 &alloc_bh, 0, inode); 463 &alloc_bh, OCFS2_BH_IGNORE_CACHE);
350 if (status < 0) { 464 if (status < 0) {
351 mlog_errno(status); 465 mlog_errno(status);
352 goto bail; 466 goto bail;
@@ -372,8 +486,7 @@ bail:
372 *alloc_copy = NULL; 486 *alloc_copy = NULL;
373 } 487 }
374 488
375 if (alloc_bh) 489 brelse(alloc_bh);
376 brelse(alloc_bh);
377 490
378 if (inode) { 491 if (inode) {
379 mutex_unlock(&inode->i_mutex); 492 mutex_unlock(&inode->i_mutex);
@@ -441,8 +554,7 @@ out_unlock:
441out_mutex: 554out_mutex:
442 mutex_unlock(&main_bm_inode->i_mutex); 555 mutex_unlock(&main_bm_inode->i_mutex);
443 556
444 if (main_bm_bh) 557 brelse(main_bm_bh);
445 brelse(main_bm_bh);
446 558
447 iput(main_bm_inode); 559 iput(main_bm_inode);
448 560
@@ -453,8 +565,48 @@ out:
453 return status; 565 return status;
454} 566}
455 567
568/* Check to see if the local alloc window is within ac->ac_max_block */
569static int ocfs2_local_alloc_in_range(struct inode *inode,
570 struct ocfs2_alloc_context *ac,
571 u32 bits_wanted)
572{
573 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
574 struct ocfs2_dinode *alloc;
575 struct ocfs2_local_alloc *la;
576 int start;
577 u64 block_off;
578
579 if (!ac->ac_max_block)
580 return 1;
581
582 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
583 la = OCFS2_LOCAL_ALLOC(alloc);
584
585 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
586 if (start == -1) {
587 mlog_errno(-ENOSPC);
588 return 0;
589 }
590
591 /*
592 * Converting (bm_off + start + bits_wanted) to blocks gives us
593 * the blkno just past our actual allocation. This is perfect
594 * to compare with ac_max_block.
595 */
596 block_off = ocfs2_clusters_to_blocks(inode->i_sb,
597 le32_to_cpu(la->la_bm_off) +
598 start + bits_wanted);
599 mlog(0, "Checking %llu against %llu\n",
600 (unsigned long long)block_off,
601 (unsigned long long)ac->ac_max_block);
602 if (block_off > ac->ac_max_block)
603 return 0;
604
605 return 1;
606}
607
456/* 608/*
457 * make sure we've got at least bitswanted contiguous bits in the 609 * make sure we've got at least bits_wanted contiguous bits in the
458 * local alloc. You lose them when you drop i_mutex. 610 * local alloc. You lose them when you drop i_mutex.
459 * 611 *
460 * We will add ourselves to the transaction passed in, but may start 612 * We will add ourselves to the transaction passed in, but may start
@@ -485,16 +637,18 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
485 637
486 mutex_lock(&local_alloc_inode->i_mutex); 638 mutex_lock(&local_alloc_inode->i_mutex);
487 639
488 if (osb->local_alloc_state != OCFS2_LA_ENABLED) { 640 /*
489 status = -ENOSPC; 641 * We must double check state and allocator bits because
490 goto bail; 642 * another process may have changed them while holding i_mutex.
491 } 643 */
492 644 spin_lock(&osb->osb_lock);
493 if (bits_wanted > ocfs2_local_alloc_window_bits(osb)) { 645 if (!ocfs2_la_state_enabled(osb) ||
494 mlog(0, "Asking for more than my max window size!\n"); 646 (bits_wanted > osb->local_alloc_bits)) {
647 spin_unlock(&osb->osb_lock);
495 status = -ENOSPC; 648 status = -ENOSPC;
496 goto bail; 649 goto bail;
497 } 650 }
651 spin_unlock(&osb->osb_lock);
498 652
499 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 653 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
500 654
@@ -522,6 +676,36 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
522 mlog_errno(status); 676 mlog_errno(status);
523 goto bail; 677 goto bail;
524 } 678 }
679
680 /*
681 * Under certain conditions, the window slide code
682 * might have reduced the number of bits available or
683 * disabled the the local alloc entirely. Re-check
684 * here and return -ENOSPC if necessary.
685 */
686 status = -ENOSPC;
687 if (!ocfs2_la_state_enabled(osb))
688 goto bail;
689
690 free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
691 le32_to_cpu(alloc->id1.bitmap1.i_used);
692 if (bits_wanted > free_bits)
693 goto bail;
694 }
695
696 if (ac->ac_max_block)
697 mlog(0, "Calling in_range for max block %llu\n",
698 (unsigned long long)ac->ac_max_block);
699
700 if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac,
701 bits_wanted)) {
702 /*
703 * The window is outside ac->ac_max_block.
704 * This errno tells the caller to keep localalloc enabled
705 * but to get the allocation from the main bitmap.
706 */
707 status = -EFBIG;
708 goto bail;
525 } 709 }
526 710
527 ac->ac_inode = local_alloc_inode; 711 ac->ac_inode = local_alloc_inode;
@@ -789,6 +973,85 @@ bail:
789 return status; 973 return status;
790} 974}
791 975
976enum ocfs2_la_event {
977 OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */
978 OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has
979 * enough bits theoretically
980 * free, but a contiguous
981 * allocation could not be
982 * found. */
983 OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have
984 * enough bits free to satisfy
985 * our request. */
986};
987#define OCFS2_LA_ENABLE_INTERVAL (30 * HZ)
988/*
989 * Given an event, calculate the size of our next local alloc window.
990 *
991 * This should always be called under i_mutex of the local alloc inode
992 * so that local alloc disabling doesn't race with processes trying to
993 * use the allocator.
994 *
995 * Returns the state which the local alloc was left in. This value can
996 * be ignored by some paths.
997 */
998static int ocfs2_recalc_la_window(struct ocfs2_super *osb,
999 enum ocfs2_la_event event)
1000{
1001 unsigned int bits;
1002 int state;
1003
1004 spin_lock(&osb->osb_lock);
1005 if (osb->local_alloc_state == OCFS2_LA_DISABLED) {
1006 WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED);
1007 goto out_unlock;
1008 }
1009
1010 /*
1011 * ENOSPC and fragmentation are treated similarly for now.
1012 */
1013 if (event == OCFS2_LA_EVENT_ENOSPC ||
1014 event == OCFS2_LA_EVENT_FRAGMENTED) {
1015 /*
1016 * We ran out of contiguous space in the primary
1017 * bitmap. Drastically reduce the number of bits used
1018 * by local alloc until we have to disable it.
1019 */
1020 bits = osb->local_alloc_bits >> 1;
1021 if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) {
1022 /*
1023 * By setting state to THROTTLED, we'll keep
1024 * the number of local alloc bits used down
1025 * until an event occurs which would give us
1026 * reason to assume the bitmap situation might
1027 * have changed.
1028 */
1029 osb->local_alloc_state = OCFS2_LA_THROTTLED;
1030 osb->local_alloc_bits = bits;
1031 } else {
1032 osb->local_alloc_state = OCFS2_LA_DISABLED;
1033 }
1034 queue_delayed_work(ocfs2_wq, &osb->la_enable_wq,
1035 OCFS2_LA_ENABLE_INTERVAL);
1036 goto out_unlock;
1037 }
1038
1039 /*
1040 * Don't increase the size of the local alloc window until we
1041 * know we might be able to fulfill the request. Otherwise, we
1042 * risk bouncing around the global bitmap during periods of
1043 * low space.
1044 */
1045 if (osb->local_alloc_state != OCFS2_LA_THROTTLED)
1046 osb->local_alloc_bits = osb->local_alloc_default_bits;
1047
1048out_unlock:
1049 state = osb->local_alloc_state;
1050 spin_unlock(&osb->osb_lock);
1051
1052 return state;
1053}
1054
792static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, 1055static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
793 struct ocfs2_alloc_context **ac, 1056 struct ocfs2_alloc_context **ac,
794 struct inode **bitmap_inode, 1057 struct inode **bitmap_inode,
@@ -803,12 +1066,21 @@ static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
803 goto bail; 1066 goto bail;
804 } 1067 }
805 1068
806 (*ac)->ac_bits_wanted = ocfs2_local_alloc_window_bits(osb); 1069retry_enospc:
1070 (*ac)->ac_bits_wanted = osb->local_alloc_bits;
807 1071
808 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); 1072 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
1073 if (status == -ENOSPC) {
1074 if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) ==
1075 OCFS2_LA_DISABLED)
1076 goto bail;
1077
1078 ocfs2_free_ac_resource(*ac);
1079 memset(*ac, 0, sizeof(struct ocfs2_alloc_context));
1080 goto retry_enospc;
1081 }
809 if (status < 0) { 1082 if (status < 0) {
810 if (status != -ENOSPC) 1083 mlog_errno(status);
811 mlog_errno(status);
812 goto bail; 1084 goto bail;
813 } 1085 }
814 1086
@@ -849,7 +1121,7 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
849 "one\n"); 1121 "one\n");
850 1122
851 mlog(0, "Allocating %u clusters for a new window.\n", 1123 mlog(0, "Allocating %u clusters for a new window.\n",
852 ocfs2_local_alloc_window_bits(osb)); 1124 osb->local_alloc_bits);
853 1125
854 /* Instruct the allocation code to try the most recently used 1126 /* Instruct the allocation code to try the most recently used
855 * cluster group. We'll re-record the group used this pass 1127 * cluster group. We'll re-record the group used this pass
@@ -859,9 +1131,36 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
859 /* we used the generic suballoc reserve function, but we set 1131 /* we used the generic suballoc reserve function, but we set
860 * everything up nicely, so there's no reason why we can't use 1132 * everything up nicely, so there's no reason why we can't use
861 * the more specific cluster api to claim bits. */ 1133 * the more specific cluster api to claim bits. */
862 status = ocfs2_claim_clusters(osb, handle, ac, 1134 status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits,
863 ocfs2_local_alloc_window_bits(osb),
864 &cluster_off, &cluster_count); 1135 &cluster_off, &cluster_count);
1136 if (status == -ENOSPC) {
1137retry_enospc:
1138 /*
1139 * Note: We could also try syncing the journal here to
1140 * allow use of any free bits which the current
1141 * transaction can't give us access to. --Mark
1142 */
1143 if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) ==
1144 OCFS2_LA_DISABLED)
1145 goto bail;
1146
1147 status = ocfs2_claim_clusters(osb, handle, ac,
1148 osb->local_alloc_bits,
1149 &cluster_off,
1150 &cluster_count);
1151 if (status == -ENOSPC)
1152 goto retry_enospc;
1153 /*
1154 * We only shrunk the *minimum* number of in our
1155 * request - it's entirely possible that the allocator
1156 * might give us more than we asked for.
1157 */
1158 if (status == 0) {
1159 spin_lock(&osb->osb_lock);
1160 osb->local_alloc_bits = cluster_count;
1161 spin_unlock(&osb->osb_lock);
1162 }
1163 }
865 if (status < 0) { 1164 if (status < 0) {
866 if (status != -ENOSPC) 1165 if (status != -ENOSPC)
867 mlog_errno(status); 1166 mlog_errno(status);
@@ -905,6 +1204,8 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
905 1204
906 mlog_entry_void(); 1205 mlog_entry_void();
907 1206
1207 ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE);
1208
908 /* This will lock the main bitmap for us. */ 1209 /* This will lock the main bitmap for us. */
909 status = ocfs2_local_alloc_reserve_for_window(osb, 1210 status = ocfs2_local_alloc_reserve_for_window(osb,
910 &ac, 1211 &ac,
@@ -976,8 +1277,7 @@ bail:
976 if (handle) 1277 if (handle)
977 ocfs2_commit_trans(osb, handle); 1278 ocfs2_commit_trans(osb, handle);
978 1279
979 if (main_bm_bh) 1280 brelse(main_bm_bh);
980 brelse(main_bm_bh);
981 1281
982 if (main_bm_inode) 1282 if (main_bm_inode)
983 iput(main_bm_inode); 1283 iput(main_bm_inode);
diff --git a/fs/ocfs2/localalloc.h b/fs/ocfs2/localalloc.h
index 3f76631e110c..ac5ea9f86653 100644
--- a/fs/ocfs2/localalloc.h
+++ b/fs/ocfs2/localalloc.h
@@ -52,4 +52,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
52 u32 *bit_off, 52 u32 *bit_off,
53 u32 *num_bits); 53 u32 *num_bits);
54 54
55void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
56 unsigned int num_clusters);
57void ocfs2_la_enable_worker(struct work_struct *work);
58
55#endif /* OCFS2_LOCALALLOC_H */ 59#endif /* OCFS2_LOCALALLOC_H */
diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c
index 203f87143877..544ac6245175 100644
--- a/fs/ocfs2/locks.c
+++ b/fs/ocfs2/locks.c
@@ -24,6 +24,7 @@
24 */ 24 */
25 25
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/fcntl.h>
27 28
28#define MLOG_MASK_PREFIX ML_INODE 29#define MLOG_MASK_PREFIX ML_INODE
29#include <cluster/masklog.h> 30#include <cluster/masklog.h>
@@ -32,6 +33,7 @@
32 33
33#include "dlmglue.h" 34#include "dlmglue.h"
34#include "file.h" 35#include "file.h"
36#include "inode.h"
35#include "locks.h" 37#include "locks.h"
36 38
37static int ocfs2_do_flock(struct file *file, struct inode *inode, 39static int ocfs2_do_flock(struct file *file, struct inode *inode,
@@ -123,3 +125,16 @@ int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl)
123 else 125 else
124 return ocfs2_do_flock(file, inode, cmd, fl); 126 return ocfs2_do_flock(file, inode, cmd, fl);
125} 127}
128
129int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl)
130{
131 struct inode *inode = file->f_mapping->host;
132 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
133
134 if (!(fl->fl_flags & FL_POSIX))
135 return -ENOLCK;
136 if (__mandatory_lock(inode))
137 return -ENOLCK;
138
139 return ocfs2_plock(osb->cconn, OCFS2_I(inode)->ip_blkno, file, cmd, fl);
140}
diff --git a/fs/ocfs2/locks.h b/fs/ocfs2/locks.h
index 9743ef2324ec..496d488b271f 100644
--- a/fs/ocfs2/locks.h
+++ b/fs/ocfs2/locks.h
@@ -27,5 +27,6 @@
27#define OCFS2_LOCKS_H 27#define OCFS2_LOCKS_H
28 28
29int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl); 29int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl);
30int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl);
30 31
31#endif /* OCFS2_LOCKS_H */ 32#endif /* OCFS2_LOCKS_H */
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index d5d808fe0140..485a6aa0ad39 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -60,6 +60,7 @@
60#include "symlink.h" 60#include "symlink.h"
61#include "sysfile.h" 61#include "sysfile.h"
62#include "uptodate.h" 62#include "uptodate.h"
63#include "xattr.h"
63 64
64#include "buffer_head_io.h" 65#include "buffer_head_io.h"
65 66
@@ -327,14 +328,9 @@ leave:
327 if (status == -ENOSPC) 328 if (status == -ENOSPC)
328 mlog(0, "Disk is full\n"); 329 mlog(0, "Disk is full\n");
329 330
330 if (new_fe_bh) 331 brelse(new_fe_bh);
331 brelse(new_fe_bh); 332 brelse(de_bh);
332 333 brelse(parent_fe_bh);
333 if (de_bh)
334 brelse(de_bh);
335
336 if (parent_fe_bh)
337 brelse(parent_fe_bh);
338 334
339 if ((status < 0) && inode) 335 if ((status < 0) && inode)
340 iput(inode); 336 iput(inode);
@@ -647,12 +643,9 @@ out_unlock_inode:
647out: 643out:
648 ocfs2_inode_unlock(dir, 1); 644 ocfs2_inode_unlock(dir, 1);
649 645
650 if (de_bh) 646 brelse(de_bh);
651 brelse(de_bh); 647 brelse(fe_bh);
652 if (fe_bh) 648 brelse(parent_fe_bh);
653 brelse(fe_bh);
654 if (parent_fe_bh)
655 brelse(parent_fe_bh);
656 649
657 mlog_exit(err); 650 mlog_exit(err);
658 651
@@ -851,17 +844,10 @@ leave:
851 iput(orphan_dir); 844 iput(orphan_dir);
852 } 845 }
853 846
854 if (fe_bh) 847 brelse(fe_bh);
855 brelse(fe_bh); 848 brelse(dirent_bh);
856 849 brelse(parent_node_bh);
857 if (dirent_bh) 850 brelse(orphan_entry_bh);
858 brelse(dirent_bh);
859
860 if (parent_node_bh)
861 brelse(parent_node_bh);
862
863 if (orphan_entry_bh)
864 brelse(orphan_entry_bh);
865 851
866 mlog_exit(status); 852 mlog_exit(status);
867 853
@@ -1372,24 +1358,15 @@ bail:
1372 1358
1373 if (new_inode) 1359 if (new_inode)
1374 iput(new_inode); 1360 iput(new_inode);
1375 if (newfe_bh) 1361 brelse(newfe_bh);
1376 brelse(newfe_bh); 1362 brelse(old_inode_bh);
1377 if (old_inode_bh) 1363 brelse(old_dir_bh);
1378 brelse(old_inode_bh); 1364 brelse(new_dir_bh);
1379 if (old_dir_bh) 1365 brelse(new_de_bh);
1380 brelse(old_dir_bh); 1366 brelse(old_de_bh);
1381 if (new_dir_bh) 1367 brelse(old_inode_de_bh);
1382 brelse(new_dir_bh); 1368 brelse(orphan_entry_bh);
1383 if (new_de_bh) 1369 brelse(insert_entry_bh);
1384 brelse(new_de_bh);
1385 if (old_de_bh)
1386 brelse(old_de_bh);
1387 if (old_inode_de_bh)
1388 brelse(old_inode_de_bh);
1389 if (orphan_entry_bh)
1390 brelse(orphan_entry_bh);
1391 if (insert_entry_bh)
1392 brelse(insert_entry_bh);
1393 1370
1394 mlog_exit(status); 1371 mlog_exit(status);
1395 1372
@@ -1492,8 +1469,7 @@ bail:
1492 1469
1493 if (bhs) { 1470 if (bhs) {
1494 for(i = 0; i < blocks; i++) 1471 for(i = 0; i < blocks; i++)
1495 if (bhs[i]) 1472 brelse(bhs[i]);
1496 brelse(bhs[i]);
1497 kfree(bhs); 1473 kfree(bhs);
1498 } 1474 }
1499 1475
@@ -1598,10 +1574,10 @@ static int ocfs2_symlink(struct inode *dir,
1598 u32 offset = 0; 1574 u32 offset = 0;
1599 1575
1600 inode->i_op = &ocfs2_symlink_inode_operations; 1576 inode->i_op = &ocfs2_symlink_inode_operations;
1601 status = ocfs2_do_extend_allocation(osb, inode, &offset, 1, 0, 1577 status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0,
1602 new_fe_bh, 1578 new_fe_bh,
1603 handle, data_ac, NULL, 1579 handle, data_ac, NULL,
1604 NULL); 1580 NULL);
1605 if (status < 0) { 1581 if (status < 0) {
1606 if (status != -ENOSPC && status != -EINTR) { 1582 if (status != -ENOSPC && status != -EINTR) {
1607 mlog(ML_ERROR, 1583 mlog(ML_ERROR,
@@ -1659,12 +1635,9 @@ bail:
1659 1635
1660 ocfs2_inode_unlock(dir, 1); 1636 ocfs2_inode_unlock(dir, 1);
1661 1637
1662 if (new_fe_bh) 1638 brelse(new_fe_bh);
1663 brelse(new_fe_bh); 1639 brelse(parent_fe_bh);
1664 if (parent_fe_bh) 1640 brelse(de_bh);
1665 brelse(parent_fe_bh);
1666 if (de_bh)
1667 brelse(de_bh);
1668 if (inode_ac) 1641 if (inode_ac)
1669 ocfs2_free_alloc_context(inode_ac); 1642 ocfs2_free_alloc_context(inode_ac);
1670 if (data_ac) 1643 if (data_ac)
@@ -1759,8 +1732,7 @@ leave:
1759 iput(orphan_dir_inode); 1732 iput(orphan_dir_inode);
1760 } 1733 }
1761 1734
1762 if (orphan_dir_bh) 1735 brelse(orphan_dir_bh);
1763 brelse(orphan_dir_bh);
1764 1736
1765 mlog_exit(status); 1737 mlog_exit(status);
1766 return status; 1738 return status;
@@ -1780,10 +1752,9 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
1780 1752
1781 mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); 1753 mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
1782 1754
1783 status = ocfs2_read_block(osb, 1755 status = ocfs2_read_block(orphan_dir_inode,
1784 OCFS2_I(orphan_dir_inode)->ip_blkno, 1756 OCFS2_I(orphan_dir_inode)->ip_blkno,
1785 &orphan_dir_bh, OCFS2_BH_CACHED, 1757 &orphan_dir_bh);
1786 orphan_dir_inode);
1787 if (status < 0) { 1758 if (status < 0) {
1788 mlog_errno(status); 1759 mlog_errno(status);
1789 goto leave; 1760 goto leave;
@@ -1829,8 +1800,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
1829 (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num); 1800 (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num);
1830 1801
1831leave: 1802leave:
1832 if (orphan_dir_bh) 1803 brelse(orphan_dir_bh);
1833 brelse(orphan_dir_bh);
1834 1804
1835 mlog_exit(status); 1805 mlog_exit(status);
1836 return status; 1806 return status;
@@ -1898,8 +1868,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
1898 } 1868 }
1899 1869
1900leave: 1870leave:
1901 if (target_de_bh) 1871 brelse(target_de_bh);
1902 brelse(target_de_bh);
1903 1872
1904 mlog_exit(status); 1873 mlog_exit(status);
1905 return status; 1874 return status;
@@ -1918,4 +1887,8 @@ const struct inode_operations ocfs2_dir_iops = {
1918 .setattr = ocfs2_setattr, 1887 .setattr = ocfs2_setattr,
1919 .getattr = ocfs2_getattr, 1888 .getattr = ocfs2_getattr,
1920 .permission = ocfs2_permission, 1889 .permission = ocfs2_permission,
1890 .setxattr = generic_setxattr,
1891 .getxattr = generic_getxattr,
1892 .listxattr = ocfs2_listxattr,
1893 .removexattr = generic_removexattr,
1921}; 1894};
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 7f625f2b1117..a21a465490c4 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -34,7 +34,12 @@
34#include <linux/workqueue.h> 34#include <linux/workqueue.h>
35#include <linux/kref.h> 35#include <linux/kref.h>
36#include <linux/mutex.h> 36#include <linux/mutex.h>
37#include <linux/jbd.h> 37#ifndef CONFIG_OCFS2_COMPAT_JBD
38# include <linux/jbd2.h>
39#else
40# include <linux/jbd.h>
41# include "ocfs2_jbd_compat.h"
42#endif
38 43
39/* For union ocfs2_dlm_lksb */ 44/* For union ocfs2_dlm_lksb */
40#include "stackglue.h" 45#include "stackglue.h"
@@ -171,9 +176,13 @@ struct ocfs2_alloc_stats
171 176
172enum ocfs2_local_alloc_state 177enum ocfs2_local_alloc_state
173{ 178{
174 OCFS2_LA_UNUSED = 0, 179 OCFS2_LA_UNUSED = 0, /* Local alloc will never be used for
175 OCFS2_LA_ENABLED, 180 * this mountpoint. */
176 OCFS2_LA_DISABLED 181 OCFS2_LA_ENABLED, /* Local alloc is in use. */
182 OCFS2_LA_THROTTLED, /* Local alloc is in use, but number
183 * of bits has been reduced. */
184 OCFS2_LA_DISABLED /* Local alloc has temporarily been
185 * disabled. */
177}; 186};
178 187
179enum ocfs2_mount_options 188enum ocfs2_mount_options
@@ -184,6 +193,8 @@ enum ocfs2_mount_options
184 OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ 193 OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */
185 OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */ 194 OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */
186 OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */ 195 OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */
196 OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */
197 OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */
187}; 198};
188 199
189#define OCFS2_OSB_SOFT_RO 0x0001 200#define OCFS2_OSB_SOFT_RO 0x0001
@@ -214,6 +225,7 @@ struct ocfs2_super
214 u32 bitmap_cpg; 225 u32 bitmap_cpg;
215 u8 *uuid; 226 u8 *uuid;
216 char *uuid_str; 227 char *uuid_str;
228 u32 uuid_hash;
217 u8 *vol_label; 229 u8 *vol_label;
218 u64 first_cluster_group_blkno; 230 u64 first_cluster_group_blkno;
219 u32 fs_generation; 231 u32 fs_generation;
@@ -241,6 +253,7 @@ struct ocfs2_super
241 int s_sectsize_bits; 253 int s_sectsize_bits;
242 int s_clustersize; 254 int s_clustersize;
243 int s_clustersize_bits; 255 int s_clustersize_bits;
256 unsigned int s_xattr_inline_size;
244 257
245 atomic_t vol_state; 258 atomic_t vol_state;
246 struct mutex recovery_lock; 259 struct mutex recovery_lock;
@@ -252,11 +265,27 @@ struct ocfs2_super
252 struct ocfs2_journal *journal; 265 struct ocfs2_journal *journal;
253 unsigned long osb_commit_interval; 266 unsigned long osb_commit_interval;
254 267
255 int local_alloc_size; 268 struct delayed_work la_enable_wq;
256 enum ocfs2_local_alloc_state local_alloc_state; 269
270 /*
271 * Must hold local alloc i_mutex and osb->osb_lock to change
272 * local_alloc_bits. Reads can be done under either lock.
273 */
274 unsigned int local_alloc_bits;
275 unsigned int local_alloc_default_bits;
276
277 enum ocfs2_local_alloc_state local_alloc_state; /* protected
278 * by osb_lock */
279
257 struct buffer_head *local_alloc_bh; 280 struct buffer_head *local_alloc_bh;
281
258 u64 la_last_gd; 282 u64 la_last_gd;
259 283
284#ifdef CONFIG_OCFS2_FS_STATS
285 struct dentry *local_alloc_debug;
286 char *local_alloc_debug_buf;
287#endif
288
260 /* Next two fields are for local node slot recovery during 289 /* Next two fields are for local node slot recovery during
261 * mount. */ 290 * mount. */
262 int dirty; 291 int dirty;
@@ -340,6 +369,13 @@ static inline int ocfs2_supports_inline_data(struct ocfs2_super *osb)
340 return 0; 369 return 0;
341} 370}
342 371
372static inline int ocfs2_supports_xattr(struct ocfs2_super *osb)
373{
374 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR)
375 return 1;
376 return 0;
377}
378
343/* set / clear functions because cluster events can make these happen 379/* set / clear functions because cluster events can make these happen
344 * in parallel so we want the transitions to be atomic. this also 380 * in parallel so we want the transitions to be atomic. this also
345 * means that any future flags osb_flags must be protected by spinlock 381 * means that any future flags osb_flags must be protected by spinlock
@@ -554,6 +590,14 @@ static inline unsigned int ocfs2_pages_per_cluster(struct super_block *sb)
554 return pages_per_cluster; 590 return pages_per_cluster;
555} 591}
556 592
593static inline unsigned int ocfs2_megabytes_to_clusters(struct super_block *sb,
594 unsigned int megs)
595{
596 BUILD_BUG_ON(OCFS2_MAX_CLUSTERSIZE > 1048576);
597
598 return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits);
599}
600
557static inline void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb) 601static inline void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb)
558{ 602{
559 spin_lock(&osb->osb_lock); 603 spin_lock(&osb->osb_lock);
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 4f619850ccf7..f24ce3d3f956 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -64,6 +64,7 @@
64#define OCFS2_INODE_SIGNATURE "INODE01" 64#define OCFS2_INODE_SIGNATURE "INODE01"
65#define OCFS2_EXTENT_BLOCK_SIGNATURE "EXBLK01" 65#define OCFS2_EXTENT_BLOCK_SIGNATURE "EXBLK01"
66#define OCFS2_GROUP_DESC_SIGNATURE "GROUP01" 66#define OCFS2_GROUP_DESC_SIGNATURE "GROUP01"
67#define OCFS2_XATTR_BLOCK_SIGNATURE "XATTR01"
67 68
68/* Compatibility flags */ 69/* Compatibility flags */
69#define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \ 70#define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \
@@ -90,7 +91,8 @@
90 | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \ 91 | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \
91 | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \ 92 | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \
92 | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \ 93 | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \
93 | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK) 94 | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \
95 | OCFS2_FEATURE_INCOMPAT_XATTR)
94#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 96#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN
95 97
96/* 98/*
@@ -127,10 +129,6 @@
127/* Support for data packed into inode blocks */ 129/* Support for data packed into inode blocks */
128#define OCFS2_FEATURE_INCOMPAT_INLINE_DATA 0x0040 130#define OCFS2_FEATURE_INCOMPAT_INLINE_DATA 0x0040
129 131
130/* Support for the extended slot map */
131#define OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP 0x100
132
133
134/* 132/*
135 * Support for alternate, userspace cluster stacks. If set, the superblock 133 * Support for alternate, userspace cluster stacks. If set, the superblock
136 * field s_cluster_info contains a tag for the alternate stack in use as 134 * field s_cluster_info contains a tag for the alternate stack in use as
@@ -142,6 +140,12 @@
142 */ 140 */
143#define OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK 0x0080 141#define OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK 0x0080
144 142
143/* Support for the extended slot map */
144#define OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP 0x100
145
146/* Support for extended attributes */
147#define OCFS2_FEATURE_INCOMPAT_XATTR 0x0200
148
145/* 149/*
146 * backup superblock flag is used to indicate that this volume 150 * backup superblock flag is used to indicate that this volume
147 * has backup superblocks. 151 * has backup superblocks.
@@ -299,6 +303,12 @@ struct ocfs2_new_group_input {
299 */ 303 */
300#define OCFS2_DEFAULT_LOCAL_ALLOC_SIZE 8 304#define OCFS2_DEFAULT_LOCAL_ALLOC_SIZE 8
301 305
306/*
307 * Inline extended attribute size (in bytes)
308 * The value chosen should be aligned to 16 byte boundaries.
309 */
310#define OCFS2_MIN_XATTR_INLINE_SIZE 256
311
302struct ocfs2_system_inode_info { 312struct ocfs2_system_inode_info {
303 char *si_name; 313 char *si_name;
304 int si_iflags; 314 int si_iflags;
@@ -563,7 +573,7 @@ struct ocfs2_super_block {
563/*40*/ __le16 s_max_slots; /* Max number of simultaneous mounts 573/*40*/ __le16 s_max_slots; /* Max number of simultaneous mounts
564 before tunefs required */ 574 before tunefs required */
565 __le16 s_tunefs_flag; 575 __le16 s_tunefs_flag;
566 __le32 s_reserved1; 576 __le32 s_uuid_hash; /* hash value of uuid */
567 __le64 s_first_cluster_group; /* Block offset of 1st cluster 577 __le64 s_first_cluster_group; /* Block offset of 1st cluster
568 * group header */ 578 * group header */
569/*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ 579/*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */
@@ -571,7 +581,11 @@ struct ocfs2_super_block {
571/*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Selected userspace 581/*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Selected userspace
572 stack. Only valid 582 stack. Only valid
573 with INCOMPAT flag. */ 583 with INCOMPAT flag. */
574/*B8*/ __le64 s_reserved2[17]; /* Fill out superblock */ 584/*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size
585 for this fs*/
586 __le16 s_reserved0;
587 __le32 s_reserved1;
588/*C0*/ __le64 s_reserved2[16]; /* Fill out superblock */
575/*140*/ 589/*140*/
576 590
577 /* 591 /*
@@ -621,7 +635,8 @@ struct ocfs2_dinode {
621 belongs to */ 635 belongs to */
622 __le16 i_suballoc_bit; /* Bit offset in suballocator 636 __le16 i_suballoc_bit; /* Bit offset in suballocator
623 block group */ 637 block group */
624/*10*/ __le32 i_reserved0; 638/*10*/ __le16 i_reserved0;
639 __le16 i_xattr_inline_size;
625 __le32 i_clusters; /* Cluster count */ 640 __le32 i_clusters; /* Cluster count */
626 __le32 i_uid; /* Owner UID */ 641 __le32 i_uid; /* Owner UID */
627 __le32 i_gid; /* Owning GID */ 642 __le32 i_gid; /* Owning GID */
@@ -640,11 +655,12 @@ struct ocfs2_dinode {
640 __le32 i_atime_nsec; 655 __le32 i_atime_nsec;
641 __le32 i_ctime_nsec; 656 __le32 i_ctime_nsec;
642 __le32 i_mtime_nsec; 657 __le32 i_mtime_nsec;
643 __le32 i_attr; 658/*70*/ __le32 i_attr;
644 __le16 i_orphaned_slot; /* Only valid when OCFS2_ORPHANED_FL 659 __le16 i_orphaned_slot; /* Only valid when OCFS2_ORPHANED_FL
645 was set in i_flags */ 660 was set in i_flags */
646 __le16 i_dyn_features; 661 __le16 i_dyn_features;
647/*70*/ __le64 i_reserved2[8]; 662 __le64 i_xattr_loc;
663/*80*/ __le64 i_reserved2[7];
648/*B8*/ union { 664/*B8*/ union {
649 __le64 i_pad1; /* Generic way to refer to this 665 __le64 i_pad1; /* Generic way to refer to this
650 64bit union */ 666 64bit union */
@@ -715,6 +731,136 @@ struct ocfs2_group_desc
715/*40*/ __u8 bg_bitmap[0]; 731/*40*/ __u8 bg_bitmap[0];
716}; 732};
717 733
734/*
735 * On disk extended attribute structure for OCFS2.
736 */
737
738/*
739 * ocfs2_xattr_entry indicates one extend attribute.
740 *
741 * Note that it can be stored in inode, one block or one xattr bucket.
742 */
743struct ocfs2_xattr_entry {
744 __le32 xe_name_hash; /* hash value of xattr prefix+suffix. */
745 __le16 xe_name_offset; /* byte offset from the 1st etnry in the local
746 local xattr storage(inode, xattr block or
747 xattr bucket). */
748 __u8 xe_name_len; /* xattr name len, does't include prefix. */
749 __u8 xe_type; /* the low 7 bits indicates the name prefix's
750 * type and the highest 1 bits indicate whether
751 * the EA is stored in the local storage. */
752 __le64 xe_value_size; /* real xattr value length. */
753};
754
755/*
756 * On disk structure for xattr header.
757 *
758 * One ocfs2_xattr_header describes how many ocfs2_xattr_entry records in
759 * the local xattr storage.
760 */
761struct ocfs2_xattr_header {
762 __le16 xh_count; /* contains the count of how
763 many records are in the
764 local xattr storage. */
765 __le16 xh_free_start; /* current offset for storing
766 xattr. */
767 __le16 xh_name_value_len; /* total length of name/value
768 length in this bucket. */
769 __le16 xh_num_buckets; /* bucket nums in one extent
770 record, only valid in the
771 first bucket. */
772 __le64 xh_csum;
773 struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */
774};
775
776/*
777 * On disk structure for xattr value root.
778 *
779 * It is used when one extended attribute's size is larger, and we will save it
780 * in an outside cluster. It will stored in a b-tree like file content.
781 */
782struct ocfs2_xattr_value_root {
783/*00*/ __le32 xr_clusters; /* clusters covered by xattr value. */
784 __le32 xr_reserved0;
785 __le64 xr_last_eb_blk; /* Pointer to last extent block */
786/*10*/ struct ocfs2_extent_list xr_list; /* Extent record list */
787};
788
789/*
790 * On disk structure for xattr tree root.
791 *
792 * It is used when there are too many extended attributes for one file. These
793 * attributes will be organized and stored in an indexed-btree.
794 */
795struct ocfs2_xattr_tree_root {
796/*00*/ __le32 xt_clusters; /* clusters covered by xattr. */
797 __le32 xt_reserved0;
798 __le64 xt_last_eb_blk; /* Pointer to last extent block */
799/*10*/ struct ocfs2_extent_list xt_list; /* Extent record list */
800};
801
802#define OCFS2_XATTR_INDEXED 0x1
803#define OCFS2_HASH_SHIFT 5
804#define OCFS2_XATTR_ROUND 3
805#define OCFS2_XATTR_SIZE(size) (((size) + OCFS2_XATTR_ROUND) & \
806 ~(OCFS2_XATTR_ROUND))
807
808#define OCFS2_XATTR_BUCKET_SIZE 4096
809#define OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET (OCFS2_XATTR_BUCKET_SIZE \
810 / OCFS2_MIN_BLOCKSIZE)
811
812/*
813 * On disk structure for xattr block.
814 */
815struct ocfs2_xattr_block {
816/*00*/ __u8 xb_signature[8]; /* Signature for verification */
817 __le16 xb_suballoc_slot; /* Slot suballocator this
818 block belongs to. */
819 __le16 xb_suballoc_bit; /* Bit offset in suballocator
820 block group */
821 __le32 xb_fs_generation; /* Must match super block */
822/*10*/ __le64 xb_blkno; /* Offset on disk, in blocks */
823 __le64 xb_csum;
824/*20*/ __le16 xb_flags; /* Indicates whether this block contains
825 real xattr or a xattr tree. */
826 __le16 xb_reserved0;
827 __le32 xb_reserved1;
828 __le64 xb_reserved2;
829/*30*/ union {
830 struct ocfs2_xattr_header xb_header; /* xattr header if this
831 block contains xattr */
832 struct ocfs2_xattr_tree_root xb_root;/* xattr tree root if this
833 block cotains xattr
834 tree. */
835 } xb_attrs;
836};
837
838#define OCFS2_XATTR_ENTRY_LOCAL 0x80
839#define OCFS2_XATTR_TYPE_MASK 0x7F
840static inline void ocfs2_xattr_set_local(struct ocfs2_xattr_entry *xe,
841 int local)
842{
843 if (local)
844 xe->xe_type |= OCFS2_XATTR_ENTRY_LOCAL;
845 else
846 xe->xe_type &= ~OCFS2_XATTR_ENTRY_LOCAL;
847}
848
849static inline int ocfs2_xattr_is_local(struct ocfs2_xattr_entry *xe)
850{
851 return xe->xe_type & OCFS2_XATTR_ENTRY_LOCAL;
852}
853
854static inline void ocfs2_xattr_set_type(struct ocfs2_xattr_entry *xe, int type)
855{
856 xe->xe_type |= type & OCFS2_XATTR_TYPE_MASK;
857}
858
859static inline int ocfs2_xattr_get_type(struct ocfs2_xattr_entry *xe)
860{
861 return xe->xe_type & OCFS2_XATTR_TYPE_MASK;
862}
863
718#ifdef __KERNEL__ 864#ifdef __KERNEL__
719static inline int ocfs2_fast_symlink_chars(struct super_block *sb) 865static inline int ocfs2_fast_symlink_chars(struct super_block *sb)
720{ 866{
@@ -728,6 +874,20 @@ static inline int ocfs2_max_inline_data(struct super_block *sb)
728 offsetof(struct ocfs2_dinode, id2.i_data.id_data); 874 offsetof(struct ocfs2_dinode, id2.i_data.id_data);
729} 875}
730 876
877static inline int ocfs2_max_inline_data_with_xattr(struct super_block *sb,
878 struct ocfs2_dinode *di)
879{
880 unsigned int xattrsize = le16_to_cpu(di->i_xattr_inline_size);
881
882 if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_XATTR_FL)
883 return sb->s_blocksize -
884 offsetof(struct ocfs2_dinode, id2.i_data.id_data) -
885 xattrsize;
886 else
887 return sb->s_blocksize -
888 offsetof(struct ocfs2_dinode, id2.i_data.id_data);
889}
890
731static inline int ocfs2_extent_recs_per_inode(struct super_block *sb) 891static inline int ocfs2_extent_recs_per_inode(struct super_block *sb)
732{ 892{
733 int size; 893 int size;
@@ -738,6 +898,24 @@ static inline int ocfs2_extent_recs_per_inode(struct super_block *sb)
738 return size / sizeof(struct ocfs2_extent_rec); 898 return size / sizeof(struct ocfs2_extent_rec);
739} 899}
740 900
901static inline int ocfs2_extent_recs_per_inode_with_xattr(
902 struct super_block *sb,
903 struct ocfs2_dinode *di)
904{
905 int size;
906 unsigned int xattrsize = le16_to_cpu(di->i_xattr_inline_size);
907
908 if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_XATTR_FL)
909 size = sb->s_blocksize -
910 offsetof(struct ocfs2_dinode, id2.i_list.l_recs) -
911 xattrsize;
912 else
913 size = sb->s_blocksize -
914 offsetof(struct ocfs2_dinode, id2.i_list.l_recs);
915
916 return size / sizeof(struct ocfs2_extent_rec);
917}
918
741static inline int ocfs2_chain_recs_per_inode(struct super_block *sb) 919static inline int ocfs2_chain_recs_per_inode(struct super_block *sb)
742{ 920{
743 int size; 921 int size;
@@ -801,6 +979,17 @@ static inline u64 ocfs2_backup_super_blkno(struct super_block *sb, int index)
801 return 0; 979 return 0;
802 980
803} 981}
982
983static inline u16 ocfs2_xattr_recs_per_xb(struct super_block *sb)
984{
985 int size;
986
987 size = sb->s_blocksize -
988 offsetof(struct ocfs2_xattr_block,
989 xb_attrs.xb_root.xt_list.l_recs);
990
991 return size / sizeof(struct ocfs2_extent_rec);
992}
804#else 993#else
805static inline int ocfs2_fast_symlink_chars(int blocksize) 994static inline int ocfs2_fast_symlink_chars(int blocksize)
806{ 995{
@@ -884,6 +1073,17 @@ static inline uint64_t ocfs2_backup_super_blkno(int blocksize, int index)
884 1073
885 return 0; 1074 return 0;
886} 1075}
1076
1077static inline int ocfs2_xattr_recs_per_xb(int blocksize)
1078{
1079 int size;
1080
1081 size = blocksize -
1082 offsetof(struct ocfs2_xattr_block,
1083 xb_attrs.xb_root.xt_list.l_recs);
1084
1085 return size / sizeof(struct ocfs2_extent_rec);
1086}
887#endif /* __KERNEL__ */ 1087#endif /* __KERNEL__ */
888 1088
889 1089
diff --git a/fs/ocfs2/ocfs2_jbd_compat.h b/fs/ocfs2/ocfs2_jbd_compat.h
new file mode 100644
index 000000000000..b91c78f8f558
--- /dev/null
+++ b/fs/ocfs2/ocfs2_jbd_compat.h
@@ -0,0 +1,82 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * ocfs2_jbd_compat.h
5 *
6 * Compatibility defines for JBD.
7 *
8 * Copyright (C) 2008 Oracle. All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License version 2 as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 */
19
20#ifndef OCFS2_JBD_COMPAT_H
21#define OCFS2_JBD_COMPAT_H
22
23#ifndef CONFIG_OCFS2_COMPAT_JBD
24# error Should not have been included
25#endif
26
27struct jbd2_inode {
28 unsigned int dummy;
29};
30
31#define JBD2_BARRIER JFS_BARRIER
32#define JBD2_DEFAULT_MAX_COMMIT_AGE JBD_DEFAULT_MAX_COMMIT_AGE
33
34#define jbd2_journal_ack_err journal_ack_err
35#define jbd2_journal_clear_err journal_clear_err
36#define jbd2_journal_destroy journal_destroy
37#define jbd2_journal_dirty_metadata journal_dirty_metadata
38#define jbd2_journal_errno journal_errno
39#define jbd2_journal_extend journal_extend
40#define jbd2_journal_flush journal_flush
41#define jbd2_journal_force_commit journal_force_commit
42#define jbd2_journal_get_write_access journal_get_write_access
43#define jbd2_journal_get_undo_access journal_get_undo_access
44#define jbd2_journal_init_inode journal_init_inode
45#define jbd2_journal_invalidatepage journal_invalidatepage
46#define jbd2_journal_load journal_load
47#define jbd2_journal_lock_updates journal_lock_updates
48#define jbd2_journal_restart journal_restart
49#define jbd2_journal_start journal_start
50#define jbd2_journal_start_commit journal_start_commit
51#define jbd2_journal_stop journal_stop
52#define jbd2_journal_try_to_free_buffers journal_try_to_free_buffers
53#define jbd2_journal_unlock_updates journal_unlock_updates
54#define jbd2_journal_wipe journal_wipe
55#define jbd2_log_wait_commit log_wait_commit
56
57static inline int jbd2_journal_file_inode(handle_t *handle,
58 struct jbd2_inode *inode)
59{
60 return 0;
61}
62
63static inline int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,
64 loff_t new_size)
65{
66 return 0;
67}
68
69static inline void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode,
70 struct inode *inode)
71{
72 return;
73}
74
75static inline void jbd2_journal_release_jbd_inode(journal_t *journal,
76 struct jbd2_inode *jinode)
77{
78 return;
79}
80
81
82#endif /* OCFS2_JBD_COMPAT_H */
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index 8166968e9015..ffd48db229a7 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -200,7 +200,7 @@ static int update_backups(struct inode * inode, u32 clusters, char *data)
200 if (cluster > clusters) 200 if (cluster > clusters)
201 break; 201 break;
202 202
203 ret = ocfs2_read_block(osb, blkno, &backup, 0, NULL); 203 ret = ocfs2_read_blocks_sync(osb, blkno, 1, &backup);
204 if (ret < 0) { 204 if (ret < 0) {
205 mlog_errno(ret); 205 mlog_errno(ret);
206 break; 206 break;
@@ -236,8 +236,8 @@ static void ocfs2_update_super_and_backups(struct inode *inode,
236 * update the superblock last. 236 * update the superblock last.
237 * It doesn't matter if the write failed. 237 * It doesn't matter if the write failed.
238 */ 238 */
239 ret = ocfs2_read_block(osb, OCFS2_SUPER_BLOCK_BLKNO, 239 ret = ocfs2_read_blocks_sync(osb, OCFS2_SUPER_BLOCK_BLKNO, 1,
240 &super_bh, 0, NULL); 240 &super_bh);
241 if (ret < 0) { 241 if (ret < 0) {
242 mlog_errno(ret); 242 mlog_errno(ret);
243 goto out; 243 goto out;
@@ -332,8 +332,7 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
332 lgd_blkno = ocfs2_which_cluster_group(main_bm_inode, 332 lgd_blkno = ocfs2_which_cluster_group(main_bm_inode,
333 first_new_cluster - 1); 333 first_new_cluster - 1);
334 334
335 ret = ocfs2_read_block(osb, lgd_blkno, &group_bh, OCFS2_BH_CACHED, 335 ret = ocfs2_read_block(main_bm_inode, lgd_blkno, &group_bh);
336 main_bm_inode);
337 if (ret < 0) { 336 if (ret < 0) {
338 mlog_errno(ret); 337 mlog_errno(ret);
339 goto out_unlock; 338 goto out_unlock;
@@ -540,7 +539,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
540 goto out_unlock; 539 goto out_unlock;
541 } 540 }
542 541
543 ret = ocfs2_read_block(osb, input->group, &group_bh, 0, NULL); 542 ret = ocfs2_read_blocks_sync(osb, input->group, 1, &group_bh);
544 if (ret < 0) { 543 if (ret < 0) {
545 mlog(ML_ERROR, "Can't read the group descriptor # %llu " 544 mlog(ML_ERROR, "Can't read the group descriptor # %llu "
546 "from the device.", (unsigned long long)input->group); 545 "from the device.", (unsigned long long)input->group);
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index bb5ff8939bf1..bdda2d8f8508 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -150,8 +150,8 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
150 * be !NULL. Thus, ocfs2_read_blocks() will ignore blocknr. If 150 * be !NULL. Thus, ocfs2_read_blocks() will ignore blocknr. If
151 * this is not true, the read of -1 (UINT64_MAX) will fail. 151 * this is not true, the read of -1 (UINT64_MAX) will fail.
152 */ 152 */
153 ret = ocfs2_read_blocks(osb, -1, si->si_blocks, si->si_bh, 0, 153 ret = ocfs2_read_blocks(si->si_inode, -1, si->si_blocks, si->si_bh,
154 si->si_inode); 154 OCFS2_BH_IGNORE_CACHE);
155 if (ret == 0) { 155 if (ret == 0) {
156 spin_lock(&osb->osb_lock); 156 spin_lock(&osb->osb_lock);
157 ocfs2_update_slot_info(si); 157 ocfs2_update_slot_info(si);
@@ -404,7 +404,8 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
404 (unsigned long long)blkno); 404 (unsigned long long)blkno);
405 405
406 bh = NULL; /* Acquire a fresh bh */ 406 bh = NULL; /* Acquire a fresh bh */
407 status = ocfs2_read_block(osb, blkno, &bh, 0, si->si_inode); 407 status = ocfs2_read_blocks(si->si_inode, blkno, 1, &bh,
408 OCFS2_BH_IGNORE_CACHE);
408 if (status < 0) { 409 if (status < 0) {
409 mlog_errno(status); 410 mlog_errno(status);
410 goto bail; 411 goto bail;
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 353fc35c6748..faec2d879357 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -28,6 +28,7 @@
28#include "ocfs2.h" /* For struct ocfs2_lock_res */ 28#include "ocfs2.h" /* For struct ocfs2_lock_res */
29#include "stackglue.h" 29#include "stackglue.h"
30 30
31#include <linux/dlm_plock.h>
31 32
32/* 33/*
33 * The control protocol starts with a handshake. Until the handshake 34 * The control protocol starts with a handshake. Until the handshake
@@ -746,6 +747,37 @@ static void user_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb)
746{ 747{
747} 748}
748 749
750static int user_plock(struct ocfs2_cluster_connection *conn,
751 u64 ino,
752 struct file *file,
753 int cmd,
754 struct file_lock *fl)
755{
756 /*
757 * This more or less just demuxes the plock request into any
758 * one of three dlm calls.
759 *
760 * Internally, fs/dlm will pass these to a misc device, which
761 * a userspace daemon will read and write to.
762 *
763 * For now, cancel requests (which happen internally only),
764 * are turned into unlocks. Most of this function taken from
765 * gfs2_lock.
766 */
767
768 if (cmd == F_CANCELLK) {
769 cmd = F_SETLK;
770 fl->fl_type = F_UNLCK;
771 }
772
773 if (IS_GETLK(cmd))
774 return dlm_posix_get(conn->cc_lockspace, ino, file, fl);
775 else if (fl->fl_type == F_UNLCK)
776 return dlm_posix_unlock(conn->cc_lockspace, ino, file, fl);
777 else
778 return dlm_posix_lock(conn->cc_lockspace, ino, file, cmd, fl);
779}
780
749/* 781/*
750 * Compare a requested locking protocol version against the current one. 782 * Compare a requested locking protocol version against the current one.
751 * 783 *
@@ -839,6 +871,7 @@ static struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
839 .dlm_unlock = user_dlm_unlock, 871 .dlm_unlock = user_dlm_unlock,
840 .lock_status = user_dlm_lock_status, 872 .lock_status = user_dlm_lock_status,
841 .lock_lvb = user_dlm_lvb, 873 .lock_lvb = user_dlm_lvb,
874 .plock = user_plock,
842 .dump_lksb = user_dlm_dump_lksb, 875 .dump_lksb = user_dlm_dump_lksb,
843}; 876};
844 877
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 07f348b8d721..68b668b0e60a 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -288,6 +288,26 @@ void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb)
288} 288}
289EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb); 289EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb);
290 290
291int ocfs2_stack_supports_plocks(void)
292{
293 return active_stack && active_stack->sp_ops->plock;
294}
295EXPORT_SYMBOL_GPL(ocfs2_stack_supports_plocks);
296
297/*
298 * ocfs2_plock() can only be safely called if
299 * ocfs2_stack_supports_plocks() returned true
300 */
301int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino,
302 struct file *file, int cmd, struct file_lock *fl)
303{
304 WARN_ON_ONCE(active_stack->sp_ops->plock == NULL);
305 if (active_stack->sp_ops->plock)
306 return active_stack->sp_ops->plock(conn, ino, file, cmd, fl);
307 return -EOPNOTSUPP;
308}
309EXPORT_SYMBOL_GPL(ocfs2_plock);
310
291int ocfs2_cluster_connect(const char *stack_name, 311int ocfs2_cluster_connect(const char *stack_name,
292 const char *group, 312 const char *group,
293 int grouplen, 313 int grouplen,
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index db56281dd1be..c571af375ef8 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -28,6 +28,10 @@
28#include "dlm/dlmapi.h" 28#include "dlm/dlmapi.h"
29#include <linux/dlm.h> 29#include <linux/dlm.h>
30 30
31/* Needed for plock-related prototypes */
32struct file;
33struct file_lock;
34
31/* 35/*
32 * dlmconstants.h does not have a LOCAL flag. We hope to remove it 36 * dlmconstants.h does not have a LOCAL flag. We hope to remove it
33 * some day, but right now we need it. Let's fake it. This value is larger 37 * some day, but right now we need it. Let's fake it. This value is larger
@@ -187,6 +191,17 @@ struct ocfs2_stack_operations {
187 void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb); 191 void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb);
188 192
189 /* 193 /*
194 * Cluster-aware posix locks
195 *
196 * This is NULL for stacks which do not support posix locks.
197 */
198 int (*plock)(struct ocfs2_cluster_connection *conn,
199 u64 ino,
200 struct file *file,
201 int cmd,
202 struct file_lock *fl);
203
204 /*
190 * This is an optoinal debugging hook. If provided, the 205 * This is an optoinal debugging hook. If provided, the
191 * stack can dump debugging information about this lock. 206 * stack can dump debugging information about this lock.
192 */ 207 */
@@ -240,6 +255,10 @@ int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb);
240void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb); 255void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb);
241void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb); 256void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb);
242 257
258int ocfs2_stack_supports_plocks(void);
259int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino,
260 struct file *file, int cmd, struct file_lock *fl);
261
243void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto); 262void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto);
244 263
245 264
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index d2d278fb9819..c5ff18b46b57 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -62,15 +62,18 @@ static int ocfs2_block_group_fill(handle_t *handle,
62 struct ocfs2_chain_list *cl); 62 struct ocfs2_chain_list *cl);
63static int ocfs2_block_group_alloc(struct ocfs2_super *osb, 63static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
64 struct inode *alloc_inode, 64 struct inode *alloc_inode,
65 struct buffer_head *bh); 65 struct buffer_head *bh,
66 u64 max_block);
66 67
67static int ocfs2_cluster_group_search(struct inode *inode, 68static int ocfs2_cluster_group_search(struct inode *inode,
68 struct buffer_head *group_bh, 69 struct buffer_head *group_bh,
69 u32 bits_wanted, u32 min_bits, 70 u32 bits_wanted, u32 min_bits,
71 u64 max_block,
70 u16 *bit_off, u16 *bits_found); 72 u16 *bit_off, u16 *bits_found);
71static int ocfs2_block_group_search(struct inode *inode, 73static int ocfs2_block_group_search(struct inode *inode,
72 struct buffer_head *group_bh, 74 struct buffer_head *group_bh,
73 u32 bits_wanted, u32 min_bits, 75 u32 bits_wanted, u32 min_bits,
76 u64 max_block,
74 u16 *bit_off, u16 *bits_found); 77 u16 *bit_off, u16 *bits_found);
75static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, 78static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
76 struct ocfs2_alloc_context *ac, 79 struct ocfs2_alloc_context *ac,
@@ -110,8 +113,11 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode,
110 u64 data_blkno, 113 u64 data_blkno,
111 u64 *bg_blkno, 114 u64 *bg_blkno,
112 u16 *bg_bit_off); 115 u16 *bg_bit_off);
116static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
117 u32 bits_wanted, u64 max_block,
118 struct ocfs2_alloc_context **ac);
113 119
114static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) 120void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
115{ 121{
116 struct inode *inode = ac->ac_inode; 122 struct inode *inode = ac->ac_inode;
117 123
@@ -124,10 +130,8 @@ static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
124 iput(inode); 130 iput(inode);
125 ac->ac_inode = NULL; 131 ac->ac_inode = NULL;
126 } 132 }
127 if (ac->ac_bh) { 133 brelse(ac->ac_bh);
128 brelse(ac->ac_bh); 134 ac->ac_bh = NULL;
129 ac->ac_bh = NULL;
130 }
131} 135}
132 136
133void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) 137void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
@@ -276,7 +280,8 @@ static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl)
276 */ 280 */
277static int ocfs2_block_group_alloc(struct ocfs2_super *osb, 281static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
278 struct inode *alloc_inode, 282 struct inode *alloc_inode,
279 struct buffer_head *bh) 283 struct buffer_head *bh,
284 u64 max_block)
280{ 285{
281 int status, credits; 286 int status, credits;
282 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data; 287 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
@@ -294,9 +299,9 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
294 mlog_entry_void(); 299 mlog_entry_void();
295 300
296 cl = &fe->id2.i_chain; 301 cl = &fe->id2.i_chain;
297 status = ocfs2_reserve_clusters(osb, 302 status = ocfs2_reserve_clusters_with_limit(osb,
298 le16_to_cpu(cl->cl_cpg), 303 le16_to_cpu(cl->cl_cpg),
299 &ac); 304 max_block, &ac);
300 if (status < 0) { 305 if (status < 0) {
301 if (status != -ENOSPC) 306 if (status != -ENOSPC)
302 mlog_errno(status); 307 mlog_errno(status);
@@ -394,8 +399,7 @@ bail:
394 if (ac) 399 if (ac)
395 ocfs2_free_alloc_context(ac); 400 ocfs2_free_alloc_context(ac);
396 401
397 if (bg_bh) 402 brelse(bg_bh);
398 brelse(bg_bh);
399 403
400 mlog_exit(status); 404 mlog_exit(status);
401 return status; 405 return status;
@@ -469,7 +473,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
469 goto bail; 473 goto bail;
470 } 474 }
471 475
472 status = ocfs2_block_group_alloc(osb, alloc_inode, bh); 476 status = ocfs2_block_group_alloc(osb, alloc_inode, bh,
477 ac->ac_max_block);
473 if (status < 0) { 478 if (status < 0) {
474 if (status != -ENOSPC) 479 if (status != -ENOSPC)
475 mlog_errno(status); 480 mlog_errno(status);
@@ -486,16 +491,15 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
486 get_bh(bh); 491 get_bh(bh);
487 ac->ac_bh = bh; 492 ac->ac_bh = bh;
488bail: 493bail:
489 if (bh) 494 brelse(bh);
490 brelse(bh);
491 495
492 mlog_exit(status); 496 mlog_exit(status);
493 return status; 497 return status;
494} 498}
495 499
496int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, 500int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb,
497 struct ocfs2_dinode *fe, 501 int blocks,
498 struct ocfs2_alloc_context **ac) 502 struct ocfs2_alloc_context **ac)
499{ 503{
500 int status; 504 int status;
501 u32 slot; 505 u32 slot;
@@ -507,7 +511,7 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
507 goto bail; 511 goto bail;
508 } 512 }
509 513
510 (*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe); 514 (*ac)->ac_bits_wanted = blocks;
511 (*ac)->ac_which = OCFS2_AC_USE_META; 515 (*ac)->ac_which = OCFS2_AC_USE_META;
512 slot = osb->slot_num; 516 slot = osb->slot_num;
513 (*ac)->ac_group_search = ocfs2_block_group_search; 517 (*ac)->ac_group_search = ocfs2_block_group_search;
@@ -532,6 +536,15 @@ bail:
532 return status; 536 return status;
533} 537}
534 538
539int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
540 struct ocfs2_extent_list *root_el,
541 struct ocfs2_alloc_context **ac)
542{
543 return ocfs2_reserve_new_metadata_blocks(osb,
544 ocfs2_extend_meta_needed(root_el),
545 ac);
546}
547
535static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb, 548static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb,
536 struct ocfs2_alloc_context *ac) 549 struct ocfs2_alloc_context *ac)
537{ 550{
@@ -582,6 +595,14 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
582 (*ac)->ac_group_search = ocfs2_block_group_search; 595 (*ac)->ac_group_search = ocfs2_block_group_search;
583 596
584 /* 597 /*
598 * stat(2) can't handle i_ino > 32bits, so we tell the
599 * lower levels not to allocate us a block group past that
600 * limit. The 'inode64' mount option avoids this behavior.
601 */
602 if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64))
603 (*ac)->ac_max_block = (u32)~0U;
604
605 /*
585 * slot is set when we successfully steal inode from other nodes. 606 * slot is set when we successfully steal inode from other nodes.
586 * It is reset in 3 places: 607 * It is reset in 3 places:
587 * 1. when we flush the truncate log 608 * 1. when we flush the truncate log
@@ -661,9 +682,9 @@ bail:
661/* Callers don't need to care which bitmap (local alloc or main) to 682/* Callers don't need to care which bitmap (local alloc or main) to
662 * use so we figure it out for them, but unfortunately this clutters 683 * use so we figure it out for them, but unfortunately this clutters
663 * things a bit. */ 684 * things a bit. */
664int ocfs2_reserve_clusters(struct ocfs2_super *osb, 685static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
665 u32 bits_wanted, 686 u32 bits_wanted, u64 max_block,
666 struct ocfs2_alloc_context **ac) 687 struct ocfs2_alloc_context **ac)
667{ 688{
668 int status; 689 int status;
669 690
@@ -677,24 +698,20 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb,
677 } 698 }
678 699
679 (*ac)->ac_bits_wanted = bits_wanted; 700 (*ac)->ac_bits_wanted = bits_wanted;
701 (*ac)->ac_max_block = max_block;
680 702
681 status = -ENOSPC; 703 status = -ENOSPC;
682 if (ocfs2_alloc_should_use_local(osb, bits_wanted)) { 704 if (ocfs2_alloc_should_use_local(osb, bits_wanted)) {
683 status = ocfs2_reserve_local_alloc_bits(osb, 705 status = ocfs2_reserve_local_alloc_bits(osb,
684 bits_wanted, 706 bits_wanted,
685 *ac); 707 *ac);
686 if ((status < 0) && (status != -ENOSPC)) { 708 if (status == -EFBIG) {
709 /* The local alloc window is outside ac_max_block.
710 * use the main bitmap. */
711 status = -ENOSPC;
712 } else if ((status < 0) && (status != -ENOSPC)) {
687 mlog_errno(status); 713 mlog_errno(status);
688 goto bail; 714 goto bail;
689 } else if (status == -ENOSPC) {
690 /* reserve_local_bits will return enospc with
691 * the local alloc inode still locked, so we
692 * can change this safely here. */
693 mlog(0, "Disabling local alloc\n");
694 /* We set to OCFS2_LA_DISABLED so that umount
695 * can clean up what's left of the local
696 * allocation */
697 osb->local_alloc_state = OCFS2_LA_DISABLED;
698 } 715 }
699 } 716 }
700 717
@@ -718,6 +735,13 @@ bail:
718 return status; 735 return status;
719} 736}
720 737
738int ocfs2_reserve_clusters(struct ocfs2_super *osb,
739 u32 bits_wanted,
740 struct ocfs2_alloc_context **ac)
741{
742 return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0, ac);
743}
744
721/* 745/*
722 * More or less lifted from ext3. I'll leave their description below: 746 * More or less lifted from ext3. I'll leave their description below:
723 * 747 *
@@ -1000,11 +1024,14 @@ static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg
1000static int ocfs2_cluster_group_search(struct inode *inode, 1024static int ocfs2_cluster_group_search(struct inode *inode,
1001 struct buffer_head *group_bh, 1025 struct buffer_head *group_bh,
1002 u32 bits_wanted, u32 min_bits, 1026 u32 bits_wanted, u32 min_bits,
1027 u64 max_block,
1003 u16 *bit_off, u16 *bits_found) 1028 u16 *bit_off, u16 *bits_found)
1004{ 1029{
1005 int search = -ENOSPC; 1030 int search = -ENOSPC;
1006 int ret; 1031 int ret;
1032 u64 blkoff;
1007 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data; 1033 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
1034 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1008 u16 tmp_off, tmp_found; 1035 u16 tmp_off, tmp_found;
1009 unsigned int max_bits, gd_cluster_off; 1036 unsigned int max_bits, gd_cluster_off;
1010 1037
@@ -1037,6 +1064,17 @@ static int ocfs2_cluster_group_search(struct inode *inode,
1037 if (ret) 1064 if (ret)
1038 return ret; 1065 return ret;
1039 1066
1067 if (max_block) {
1068 blkoff = ocfs2_clusters_to_blocks(inode->i_sb,
1069 gd_cluster_off +
1070 tmp_off + tmp_found);
1071 mlog(0, "Checking %llu against %llu\n",
1072 (unsigned long long)blkoff,
1073 (unsigned long long)max_block);
1074 if (blkoff > max_block)
1075 return -ENOSPC;
1076 }
1077
1040 /* ocfs2_block_group_find_clear_bits() might 1078 /* ocfs2_block_group_find_clear_bits() might
1041 * return success, but we still want to return 1079 * return success, but we still want to return
1042 * -ENOSPC unless it found the minimum number 1080 * -ENOSPC unless it found the minimum number
@@ -1045,6 +1083,12 @@ static int ocfs2_cluster_group_search(struct inode *inode,
1045 *bit_off = tmp_off; 1083 *bit_off = tmp_off;
1046 *bits_found = tmp_found; 1084 *bits_found = tmp_found;
1047 search = 0; /* success */ 1085 search = 0; /* success */
1086 } else if (tmp_found) {
1087 /*
1088 * Don't show bits which we'll be returning
1089 * for allocation to the local alloc bitmap.
1090 */
1091 ocfs2_local_alloc_seen_free_bits(osb, tmp_found);
1048 } 1092 }
1049 } 1093 }
1050 1094
@@ -1054,19 +1098,31 @@ static int ocfs2_cluster_group_search(struct inode *inode,
1054static int ocfs2_block_group_search(struct inode *inode, 1098static int ocfs2_block_group_search(struct inode *inode,
1055 struct buffer_head *group_bh, 1099 struct buffer_head *group_bh,
1056 u32 bits_wanted, u32 min_bits, 1100 u32 bits_wanted, u32 min_bits,
1101 u64 max_block,
1057 u16 *bit_off, u16 *bits_found) 1102 u16 *bit_off, u16 *bits_found)
1058{ 1103{
1059 int ret = -ENOSPC; 1104 int ret = -ENOSPC;
1105 u64 blkoff;
1060 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data; 1106 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data;
1061 1107
1062 BUG_ON(min_bits != 1); 1108 BUG_ON(min_bits != 1);
1063 BUG_ON(ocfs2_is_cluster_bitmap(inode)); 1109 BUG_ON(ocfs2_is_cluster_bitmap(inode));
1064 1110
1065 if (bg->bg_free_bits_count) 1111 if (bg->bg_free_bits_count) {
1066 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), 1112 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
1067 group_bh, bits_wanted, 1113 group_bh, bits_wanted,
1068 le16_to_cpu(bg->bg_bits), 1114 le16_to_cpu(bg->bg_bits),
1069 bit_off, bits_found); 1115 bit_off, bits_found);
1116 if (!ret && max_block) {
1117 blkoff = le64_to_cpu(bg->bg_blkno) + *bit_off +
1118 *bits_found;
1119 mlog(0, "Checking %llu against %llu\n",
1120 (unsigned long long)blkoff,
1121 (unsigned long long)max_block);
1122 if (blkoff > max_block)
1123 ret = -ENOSPC;
1124 }
1125 }
1070 1126
1071 return ret; 1127 return ret;
1072} 1128}
@@ -1116,8 +1172,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1116 struct ocfs2_group_desc *gd; 1172 struct ocfs2_group_desc *gd;
1117 struct inode *alloc_inode = ac->ac_inode; 1173 struct inode *alloc_inode = ac->ac_inode;
1118 1174
1119 ret = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), gd_blkno, 1175 ret = ocfs2_read_block(alloc_inode, gd_blkno, &group_bh);
1120 &group_bh, OCFS2_BH_CACHED, alloc_inode);
1121 if (ret < 0) { 1176 if (ret < 0) {
1122 mlog_errno(ret); 1177 mlog_errno(ret);
1123 return ret; 1178 return ret;
@@ -1131,7 +1186,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1131 } 1186 }
1132 1187
1133 ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, 1188 ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
1134 bit_off, &found); 1189 ac->ac_max_block, bit_off, &found);
1135 if (ret < 0) { 1190 if (ret < 0) {
1136 if (ret != -ENOSPC) 1191 if (ret != -ENOSPC)
1137 mlog_errno(ret); 1192 mlog_errno(ret);
@@ -1186,9 +1241,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1186 bits_wanted, chain, 1241 bits_wanted, chain,
1187 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno); 1242 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno);
1188 1243
1189 status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), 1244 status = ocfs2_read_block(alloc_inode,
1190 le64_to_cpu(cl->cl_recs[chain].c_blkno), 1245 le64_to_cpu(cl->cl_recs[chain].c_blkno),
1191 &group_bh, OCFS2_BH_CACHED, alloc_inode); 1246 &group_bh);
1192 if (status < 0) { 1247 if (status < 0) {
1193 mlog_errno(status); 1248 mlog_errno(status);
1194 goto bail; 1249 goto bail;
@@ -1204,21 +1259,20 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1204 /* for now, the chain search is a bit simplistic. We just use 1259 /* for now, the chain search is a bit simplistic. We just use
1205 * the 1st group with any empty bits. */ 1260 * the 1st group with any empty bits. */
1206 while ((status = ac->ac_group_search(alloc_inode, group_bh, 1261 while ((status = ac->ac_group_search(alloc_inode, group_bh,
1207 bits_wanted, min_bits, bit_off, 1262 bits_wanted, min_bits,
1263 ac->ac_max_block, bit_off,
1208 &tmp_bits)) == -ENOSPC) { 1264 &tmp_bits)) == -ENOSPC) {
1209 if (!bg->bg_next_group) 1265 if (!bg->bg_next_group)
1210 break; 1266 break;
1211 1267
1212 if (prev_group_bh) { 1268 brelse(prev_group_bh);
1213 brelse(prev_group_bh); 1269 prev_group_bh = NULL;
1214 prev_group_bh = NULL; 1270
1215 }
1216 next_group = le64_to_cpu(bg->bg_next_group); 1271 next_group = le64_to_cpu(bg->bg_next_group);
1217 prev_group_bh = group_bh; 1272 prev_group_bh = group_bh;
1218 group_bh = NULL; 1273 group_bh = NULL;
1219 status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), 1274 status = ocfs2_read_block(alloc_inode,
1220 next_group, &group_bh, 1275 next_group, &group_bh);
1221 OCFS2_BH_CACHED, alloc_inode);
1222 if (status < 0) { 1276 if (status < 0) {
1223 mlog_errno(status); 1277 mlog_errno(status);
1224 goto bail; 1278 goto bail;
@@ -1307,10 +1361,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1307 *bg_blkno = le64_to_cpu(bg->bg_blkno); 1361 *bg_blkno = le64_to_cpu(bg->bg_blkno);
1308 *bits_left = le16_to_cpu(bg->bg_free_bits_count); 1362 *bits_left = le16_to_cpu(bg->bg_free_bits_count);
1309bail: 1363bail:
1310 if (group_bh) 1364 brelse(group_bh);
1311 brelse(group_bh); 1365 brelse(prev_group_bh);
1312 if (prev_group_bh)
1313 brelse(prev_group_bh);
1314 1366
1315 mlog_exit(status); 1367 mlog_exit(status);
1316 return status; 1368 return status;
@@ -1723,7 +1775,6 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
1723{ 1775{
1724 int status = 0; 1776 int status = 0;
1725 u32 tmp_used; 1777 u32 tmp_used;
1726 struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
1727 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data; 1778 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data;
1728 struct ocfs2_chain_list *cl = &fe->id2.i_chain; 1779 struct ocfs2_chain_list *cl = &fe->id2.i_chain;
1729 struct buffer_head *group_bh = NULL; 1780 struct buffer_head *group_bh = NULL;
@@ -1742,8 +1793,7 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
1742 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count, 1793 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count,
1743 (unsigned long long)bg_blkno, start_bit); 1794 (unsigned long long)bg_blkno, start_bit);
1744 1795
1745 status = ocfs2_read_block(osb, bg_blkno, &group_bh, OCFS2_BH_CACHED, 1796 status = ocfs2_read_block(alloc_inode, bg_blkno, &group_bh);
1746 alloc_inode);
1747 if (status < 0) { 1797 if (status < 0) {
1748 mlog_errno(status); 1798 mlog_errno(status);
1749 goto bail; 1799 goto bail;
@@ -1784,8 +1834,7 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
1784 } 1834 }
1785 1835
1786bail: 1836bail:
1787 if (group_bh) 1837 brelse(group_bh);
1788 brelse(group_bh);
1789 1838
1790 mlog_exit(status); 1839 mlog_exit(status);
1791 return status; 1840 return status;
@@ -1838,9 +1887,15 @@ int ocfs2_free_clusters(handle_t *handle,
1838 status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, 1887 status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
1839 bg_start_bit, bg_blkno, 1888 bg_start_bit, bg_blkno,
1840 num_clusters); 1889 num_clusters);
1841 if (status < 0) 1890 if (status < 0) {
1842 mlog_errno(status); 1891 mlog_errno(status);
1892 goto out;
1893 }
1843 1894
1895 ocfs2_local_alloc_seen_free_bits(OCFS2_SB(bitmap_inode->i_sb),
1896 num_clusters);
1897
1898out:
1844 mlog_exit(status); 1899 mlog_exit(status);
1845 return status; 1900 return status;
1846} 1901}
@@ -1891,3 +1946,84 @@ static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe)
1891 (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno); 1946 (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno);
1892 } 1947 }
1893} 1948}
1949
1950/*
1951 * For a given allocation, determine which allocators will need to be
1952 * accessed, and lock them, reserving the appropriate number of bits.
1953 *
1954 * Sparse file systems call this from ocfs2_write_begin_nolock()
1955 * and ocfs2_allocate_unwritten_extents().
1956 *
1957 * File systems which don't support holes call this from
1958 * ocfs2_extend_allocation().
1959 */
1960int ocfs2_lock_allocators(struct inode *inode,
1961 struct ocfs2_extent_tree *et,
1962 u32 clusters_to_add, u32 extents_to_split,
1963 struct ocfs2_alloc_context **data_ac,
1964 struct ocfs2_alloc_context **meta_ac)
1965{
1966 int ret = 0, num_free_extents;
1967 unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;
1968 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1969
1970 *meta_ac = NULL;
1971 if (data_ac)
1972 *data_ac = NULL;
1973
1974 BUG_ON(clusters_to_add != 0 && data_ac == NULL);
1975
1976 num_free_extents = ocfs2_num_free_extents(osb, inode, et);
1977 if (num_free_extents < 0) {
1978 ret = num_free_extents;
1979 mlog_errno(ret);
1980 goto out;
1981 }
1982
1983 /*
1984 * Sparse allocation file systems need to be more conservative
1985 * with reserving room for expansion - the actual allocation
1986 * happens while we've got a journal handle open so re-taking
1987 * a cluster lock (because we ran out of room for another
1988 * extent) will violate ordering rules.
1989 *
1990 * Most of the time we'll only be seeing this 1 cluster at a time
1991 * anyway.
1992 *
1993 * Always lock for any unwritten extents - we might want to
1994 * add blocks during a split.
1995 */
1996 if (!num_free_extents ||
1997 (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) {
1998 ret = ocfs2_reserve_new_metadata(osb, et->et_root_el, meta_ac);
1999 if (ret < 0) {
2000 if (ret != -ENOSPC)
2001 mlog_errno(ret);
2002 goto out;
2003 }
2004 }
2005
2006 if (clusters_to_add == 0)
2007 goto out;
2008
2009 ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac);
2010 if (ret < 0) {
2011 if (ret != -ENOSPC)
2012 mlog_errno(ret);
2013 goto out;
2014 }
2015
2016out:
2017 if (ret) {
2018 if (*meta_ac) {
2019 ocfs2_free_alloc_context(*meta_ac);
2020 *meta_ac = NULL;
2021 }
2022
2023 /*
2024 * We cannot have an error and a non null *data_ac.
2025 */
2026 }
2027
2028 return ret;
2029}
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index 544c600662bd..4df159d8f450 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -28,10 +28,11 @@
28 28
29typedef int (group_search_t)(struct inode *, 29typedef int (group_search_t)(struct inode *,
30 struct buffer_head *, 30 struct buffer_head *,
31 u32, 31 u32, /* bits_wanted */
32 u32, 32 u32, /* min_bits */
33 u16 *, 33 u64, /* max_block */
34 u16 *); 34 u16 *, /* *bit_off */
35 u16 *); /* *bits_found */
35 36
36struct ocfs2_alloc_context { 37struct ocfs2_alloc_context {
37 struct inode *ac_inode; /* which bitmap are we allocating from? */ 38 struct inode *ac_inode; /* which bitmap are we allocating from? */
@@ -51,6 +52,8 @@ struct ocfs2_alloc_context {
51 group_search_t *ac_group_search; 52 group_search_t *ac_group_search;
52 53
53 u64 ac_last_group; 54 u64 ac_last_group;
55 u64 ac_max_block; /* Highest block number to allocate. 0 is
56 is the same as ~0 - unlimited */
54}; 57};
55 58
56void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac); 59void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac);
@@ -59,9 +62,17 @@ static inline int ocfs2_alloc_context_bits_left(struct ocfs2_alloc_context *ac)
59 return ac->ac_bits_wanted - ac->ac_bits_given; 62 return ac->ac_bits_wanted - ac->ac_bits_given;
60} 63}
61 64
65/*
66 * Please note that the caller must make sure that root_el is the root
67 * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise
68 * the result may be wrong.
69 */
62int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, 70int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
63 struct ocfs2_dinode *fe, 71 struct ocfs2_extent_list *root_el,
64 struct ocfs2_alloc_context **ac); 72 struct ocfs2_alloc_context **ac);
73int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb,
74 int blocks,
75 struct ocfs2_alloc_context **ac);
65int ocfs2_reserve_new_inode(struct ocfs2_super *osb, 76int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
66 struct ocfs2_alloc_context **ac); 77 struct ocfs2_alloc_context **ac);
67int ocfs2_reserve_clusters(struct ocfs2_super *osb, 78int ocfs2_reserve_clusters(struct ocfs2_super *osb,
@@ -147,6 +158,7 @@ static inline int ocfs2_is_cluster_bitmap(struct inode *inode)
147 * apis above. */ 158 * apis above. */
148int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, 159int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
149 struct ocfs2_alloc_context *ac); 160 struct ocfs2_alloc_context *ac);
161void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac);
150 162
151/* given a cluster offset, calculate which block group it belongs to 163/* given a cluster offset, calculate which block group it belongs to
152 * and return that block offset. */ 164 * and return that block offset. */
@@ -156,4 +168,8 @@ u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster);
156int ocfs2_check_group_descriptor(struct super_block *sb, 168int ocfs2_check_group_descriptor(struct super_block *sb,
157 struct ocfs2_dinode *di, 169 struct ocfs2_dinode *di,
158 struct ocfs2_group_desc *gd); 170 struct ocfs2_group_desc *gd);
171int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et,
172 u32 clusters_to_add, u32 extents_to_split,
173 struct ocfs2_alloc_context **data_ac,
174 struct ocfs2_alloc_context **meta_ac);
159#endif /* _CHAINALLOC_H_ */ 175#endif /* _CHAINALLOC_H_ */
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 70334d85aff1..304b63ac78cf 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -64,6 +64,7 @@
64#include "sysfile.h" 64#include "sysfile.h"
65#include "uptodate.h" 65#include "uptodate.h"
66#include "ver.h" 66#include "ver.h"
67#include "xattr.h"
67 68
68#include "buffer_head_io.h" 69#include "buffer_head_io.h"
69 70
@@ -154,6 +155,9 @@ enum {
154 Opt_localalloc, 155 Opt_localalloc,
155 Opt_localflocks, 156 Opt_localflocks,
156 Opt_stack, 157 Opt_stack,
158 Opt_user_xattr,
159 Opt_nouser_xattr,
160 Opt_inode64,
157 Opt_err, 161 Opt_err,
158}; 162};
159 163
@@ -173,6 +177,9 @@ static const match_table_t tokens = {
173 {Opt_localalloc, "localalloc=%d"}, 177 {Opt_localalloc, "localalloc=%d"},
174 {Opt_localflocks, "localflocks"}, 178 {Opt_localflocks, "localflocks"},
175 {Opt_stack, "cluster_stack=%s"}, 179 {Opt_stack, "cluster_stack=%s"},
180 {Opt_user_xattr, "user_xattr"},
181 {Opt_nouser_xattr, "nouser_xattr"},
182 {Opt_inode64, "inode64"},
176 {Opt_err, NULL} 183 {Opt_err, NULL}
177}; 184};
178 185
@@ -205,10 +212,11 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait)
205 ocfs2_schedule_truncate_log_flush(osb, 0); 212 ocfs2_schedule_truncate_log_flush(osb, 0);
206 } 213 }
207 214
208 if (journal_start_commit(OCFS2_SB(sb)->journal->j_journal, &target)) { 215 if (jbd2_journal_start_commit(OCFS2_SB(sb)->journal->j_journal,
216 &target)) {
209 if (wait) 217 if (wait)
210 log_wait_commit(OCFS2_SB(sb)->journal->j_journal, 218 jbd2_log_wait_commit(OCFS2_SB(sb)->journal->j_journal,
211 target); 219 target);
212 } 220 }
213 return 0; 221 return 0;
214} 222}
@@ -325,6 +333,7 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb)
325 if (!oi) 333 if (!oi)
326 return NULL; 334 return NULL;
327 335
336 jbd2_journal_init_jbd_inode(&oi->ip_jinode, &oi->vfs_inode);
328 return &oi->vfs_inode; 337 return &oi->vfs_inode;
329} 338}
330 339
@@ -406,6 +415,15 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
406 goto out; 415 goto out;
407 } 416 }
408 417
418 /* Probably don't want this on remount; it might
419 * mess with other nodes */
420 if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64) &&
421 (parsed_options.mount_opt & OCFS2_MOUNT_INODE64)) {
422 ret = -EINVAL;
423 mlog(ML_ERROR, "Cannot enable inode64 on remount\n");
424 goto out;
425 }
426
409 /* We're going to/from readonly mode. */ 427 /* We're going to/from readonly mode. */
410 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { 428 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
411 /* Lock here so the check of HARD_RO and the potential 429 /* Lock here so the check of HARD_RO and the potential
@@ -637,7 +655,8 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
637 osb->s_atime_quantum = parsed_options.atime_quantum; 655 osb->s_atime_quantum = parsed_options.atime_quantum;
638 osb->preferred_slot = parsed_options.slot; 656 osb->preferred_slot = parsed_options.slot;
639 osb->osb_commit_interval = parsed_options.commit_interval; 657 osb->osb_commit_interval = parsed_options.commit_interval;
640 osb->local_alloc_size = parsed_options.localalloc_opt; 658 osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt);
659 osb->local_alloc_bits = osb->local_alloc_default_bits;
641 660
642 status = ocfs2_verify_userspace_stack(osb, &parsed_options); 661 status = ocfs2_verify_userspace_stack(osb, &parsed_options);
643 if (status) 662 if (status)
@@ -743,8 +762,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
743 return status; 762 return status;
744 763
745read_super_error: 764read_super_error:
746 if (bh != NULL) 765 brelse(bh);
747 brelse(bh);
748 766
749 if (inode) 767 if (inode)
750 iput(inode); 768 iput(inode);
@@ -847,6 +865,12 @@ static int ocfs2_parse_options(struct super_block *sb,
847 case Opt_data_writeback: 865 case Opt_data_writeback:
848 mopt->mount_opt |= OCFS2_MOUNT_DATA_WRITEBACK; 866 mopt->mount_opt |= OCFS2_MOUNT_DATA_WRITEBACK;
849 break; 867 break;
868 case Opt_user_xattr:
869 mopt->mount_opt &= ~OCFS2_MOUNT_NOUSERXATTR;
870 break;
871 case Opt_nouser_xattr:
872 mopt->mount_opt |= OCFS2_MOUNT_NOUSERXATTR;
873 break;
850 case Opt_atime_quantum: 874 case Opt_atime_quantum:
851 if (match_int(&args[0], &option)) { 875 if (match_int(&args[0], &option)) {
852 status = 0; 876 status = 0;
@@ -873,7 +897,7 @@ static int ocfs2_parse_options(struct super_block *sb,
873 if (option < 0) 897 if (option < 0)
874 return 0; 898 return 0;
875 if (option == 0) 899 if (option == 0)
876 option = JBD_DEFAULT_MAX_COMMIT_AGE; 900 option = JBD2_DEFAULT_MAX_COMMIT_AGE;
877 mopt->commit_interval = HZ * option; 901 mopt->commit_interval = HZ * option;
878 break; 902 break;
879 case Opt_localalloc: 903 case Opt_localalloc:
@@ -918,6 +942,9 @@ static int ocfs2_parse_options(struct super_block *sb,
918 OCFS2_STACK_LABEL_LEN); 942 OCFS2_STACK_LABEL_LEN);
919 mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; 943 mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
920 break; 944 break;
945 case Opt_inode64:
946 mopt->mount_opt |= OCFS2_MOUNT_INODE64;
947 break;
921 default: 948 default:
922 mlog(ML_ERROR, 949 mlog(ML_ERROR,
923 "Unrecognized mount option \"%s\" " 950 "Unrecognized mount option \"%s\" "
@@ -938,6 +965,7 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
938{ 965{
939 struct ocfs2_super *osb = OCFS2_SB(mnt->mnt_sb); 966 struct ocfs2_super *osb = OCFS2_SB(mnt->mnt_sb);
940 unsigned long opts = osb->s_mount_opt; 967 unsigned long opts = osb->s_mount_opt;
968 unsigned int local_alloc_megs;
941 969
942 if (opts & OCFS2_MOUNT_HB_LOCAL) 970 if (opts & OCFS2_MOUNT_HB_LOCAL)
943 seq_printf(s, ",_netdev,heartbeat=local"); 971 seq_printf(s, ",_netdev,heartbeat=local");
@@ -970,8 +998,9 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
970 seq_printf(s, ",commit=%u", 998 seq_printf(s, ",commit=%u",
971 (unsigned) (osb->osb_commit_interval / HZ)); 999 (unsigned) (osb->osb_commit_interval / HZ));
972 1000
973 if (osb->local_alloc_size != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE) 1001 local_alloc_megs = osb->local_alloc_bits >> (20 - osb->s_clustersize_bits);
974 seq_printf(s, ",localalloc=%d", osb->local_alloc_size); 1002 if (local_alloc_megs != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE)
1003 seq_printf(s, ",localalloc=%d", local_alloc_megs);
975 1004
976 if (opts & OCFS2_MOUNT_LOCALFLOCKS) 1005 if (opts & OCFS2_MOUNT_LOCALFLOCKS)
977 seq_printf(s, ",localflocks,"); 1006 seq_printf(s, ",localflocks,");
@@ -980,6 +1009,14 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
980 seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN, 1009 seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN,
981 osb->osb_cluster_stack); 1010 osb->osb_cluster_stack);
982 1011
1012 if (opts & OCFS2_MOUNT_NOUSERXATTR)
1013 seq_printf(s, ",nouser_xattr");
1014 else
1015 seq_printf(s, ",user_xattr");
1016
1017 if (opts & OCFS2_MOUNT_INODE64)
1018 seq_printf(s, ",inode64");
1019
983 return 0; 1020 return 0;
984} 1021}
985 1022
@@ -1132,6 +1169,7 @@ static void ocfs2_inode_init_once(void *data)
1132 oi->ip_dir_start_lookup = 0; 1169 oi->ip_dir_start_lookup = 0;
1133 1170
1134 init_rwsem(&oi->ip_alloc_sem); 1171 init_rwsem(&oi->ip_alloc_sem);
1172 init_rwsem(&oi->ip_xattr_sem);
1135 mutex_init(&oi->ip_io_mutex); 1173 mutex_init(&oi->ip_io_mutex);
1136 1174
1137 oi->ip_blkno = 0ULL; 1175 oi->ip_blkno = 0ULL;
@@ -1375,6 +1413,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
1375 sb->s_fs_info = osb; 1413 sb->s_fs_info = osb;
1376 sb->s_op = &ocfs2_sops; 1414 sb->s_op = &ocfs2_sops;
1377 sb->s_export_op = &ocfs2_export_ops; 1415 sb->s_export_op = &ocfs2_export_ops;
1416 sb->s_xattr = ocfs2_xattr_handlers;
1378 sb->s_time_gran = 1; 1417 sb->s_time_gran = 1;
1379 sb->s_flags |= MS_NOATIME; 1418 sb->s_flags |= MS_NOATIME;
1380 /* this is needed to support O_LARGEFILE */ 1419 /* this is needed to support O_LARGEFILE */
@@ -1421,8 +1460,12 @@ static int ocfs2_initialize_super(struct super_block *sb,
1421 1460
1422 osb->slot_num = OCFS2_INVALID_SLOT; 1461 osb->slot_num = OCFS2_INVALID_SLOT;
1423 1462
1463 osb->s_xattr_inline_size = le16_to_cpu(
1464 di->id2.i_super.s_xattr_inline_size);
1465
1424 osb->local_alloc_state = OCFS2_LA_UNUSED; 1466 osb->local_alloc_state = OCFS2_LA_UNUSED;
1425 osb->local_alloc_bh = NULL; 1467 osb->local_alloc_bh = NULL;
1468 INIT_DELAYED_WORK(&osb->la_enable_wq, ocfs2_la_enable_worker);
1426 1469
1427 init_waitqueue_head(&osb->osb_mount_event); 1470 init_waitqueue_head(&osb->osb_mount_event);
1428 1471
@@ -1568,6 +1611,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
1568 osb->first_cluster_group_blkno = 1611 osb->first_cluster_group_blkno =
1569 le64_to_cpu(di->id2.i_super.s_first_cluster_group); 1612 le64_to_cpu(di->id2.i_super.s_first_cluster_group);
1570 osb->fs_generation = le32_to_cpu(di->i_fs_generation); 1613 osb->fs_generation = le32_to_cpu(di->i_fs_generation);
1614 osb->uuid_hash = le32_to_cpu(di->id2.i_super.s_uuid_hash);
1571 mlog(0, "vol_label: %s\n", osb->vol_label); 1615 mlog(0, "vol_label: %s\n", osb->vol_label);
1572 mlog(0, "uuid: %s\n", osb->uuid_str); 1616 mlog(0, "uuid: %s\n", osb->uuid_str);
1573 mlog(0, "root_blkno=%llu, system_dir_blkno=%llu\n", 1617 mlog(0, "root_blkno=%llu, system_dir_blkno=%llu\n",
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index ba9dbb51d25b..cbd03dfdc7b9 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -50,6 +50,7 @@
50#include "inode.h" 50#include "inode.h"
51#include "journal.h" 51#include "journal.h"
52#include "symlink.h" 52#include "symlink.h"
53#include "xattr.h"
53 54
54#include "buffer_head_io.h" 55#include "buffer_head_io.h"
55 56
@@ -83,11 +84,7 @@ static char *ocfs2_fast_symlink_getlink(struct inode *inode,
83 84
84 mlog_entry_void(); 85 mlog_entry_void();
85 86
86 status = ocfs2_read_block(OCFS2_SB(inode->i_sb), 87 status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, bh);
87 OCFS2_I(inode)->ip_blkno,
88 bh,
89 OCFS2_BH_CACHED,
90 inode);
91 if (status < 0) { 88 if (status < 0) {
92 mlog_errno(status); 89 mlog_errno(status);
93 link = ERR_PTR(status); 90 link = ERR_PTR(status);
@@ -157,8 +154,7 @@ bail:
157 kunmap(page); 154 kunmap(page);
158 page_cache_release(page); 155 page_cache_release(page);
159 } 156 }
160 if (bh) 157 brelse(bh);
161 brelse(bh);
162 158
163 return ERR_PTR(status); 159 return ERR_PTR(status);
164} 160}
@@ -168,10 +164,18 @@ const struct inode_operations ocfs2_symlink_inode_operations = {
168 .follow_link = ocfs2_follow_link, 164 .follow_link = ocfs2_follow_link,
169 .getattr = ocfs2_getattr, 165 .getattr = ocfs2_getattr,
170 .setattr = ocfs2_setattr, 166 .setattr = ocfs2_setattr,
167 .setxattr = generic_setxattr,
168 .getxattr = generic_getxattr,
169 .listxattr = ocfs2_listxattr,
170 .removexattr = generic_removexattr,
171}; 171};
172const struct inode_operations ocfs2_fast_symlink_inode_operations = { 172const struct inode_operations ocfs2_fast_symlink_inode_operations = {
173 .readlink = ocfs2_readlink, 173 .readlink = ocfs2_readlink,
174 .follow_link = ocfs2_follow_link, 174 .follow_link = ocfs2_follow_link,
175 .getattr = ocfs2_getattr, 175 .getattr = ocfs2_getattr,
176 .setattr = ocfs2_setattr, 176 .setattr = ocfs2_setattr,
177 .setxattr = generic_setxattr,
178 .getxattr = generic_getxattr,
179 .listxattr = ocfs2_listxattr,
180 .removexattr = generic_removexattr,
177}; 181};
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 4da8851f2b23..187b99ff0368 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -53,7 +53,11 @@
53#include <linux/highmem.h> 53#include <linux/highmem.h>
54#include <linux/buffer_head.h> 54#include <linux/buffer_head.h>
55#include <linux/rbtree.h> 55#include <linux/rbtree.h>
56#include <linux/jbd.h> 56#ifndef CONFIG_OCFS2_COMPAT_JBD
57# include <linux/jbd2.h>
58#else
59# include <linux/jbd.h>
60#endif
57 61
58#define MLOG_MASK_PREFIX ML_UPTODATE 62#define MLOG_MASK_PREFIX ML_UPTODATE
59 63
@@ -511,14 +515,10 @@ static void ocfs2_remove_metadata_tree(struct ocfs2_caching_info *ci,
511 ci->ci_num_cached--; 515 ci->ci_num_cached--;
512} 516}
513 517
514/* Called when we remove a chunk of metadata from an inode. We don't 518static void ocfs2_remove_block_from_cache(struct inode *inode,
515 * bother reverting things to an inlined array in the case of a remove 519 sector_t block)
516 * which moves us back under the limit. */
517void ocfs2_remove_from_cache(struct inode *inode,
518 struct buffer_head *bh)
519{ 520{
520 int index; 521 int index;
521 sector_t block = bh->b_blocknr;
522 struct ocfs2_meta_cache_item *item = NULL; 522 struct ocfs2_meta_cache_item *item = NULL;
523 struct ocfs2_inode_info *oi = OCFS2_I(inode); 523 struct ocfs2_inode_info *oi = OCFS2_I(inode);
524 struct ocfs2_caching_info *ci = &oi->ip_metadata_cache; 524 struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
@@ -544,6 +544,30 @@ void ocfs2_remove_from_cache(struct inode *inode,
544 kmem_cache_free(ocfs2_uptodate_cachep, item); 544 kmem_cache_free(ocfs2_uptodate_cachep, item);
545} 545}
546 546
547/*
548 * Called when we remove a chunk of metadata from an inode. We don't
549 * bother reverting things to an inlined array in the case of a remove
550 * which moves us back under the limit.
551 */
552void ocfs2_remove_from_cache(struct inode *inode,
553 struct buffer_head *bh)
554{
555 sector_t block = bh->b_blocknr;
556
557 ocfs2_remove_block_from_cache(inode, block);
558}
559
560/* Called when we remove xattr clusters from an inode. */
561void ocfs2_remove_xattr_clusters_from_cache(struct inode *inode,
562 sector_t block,
563 u32 c_len)
564{
565 unsigned int i, b_len = ocfs2_clusters_to_blocks(inode->i_sb, 1) * c_len;
566
567 for (i = 0; i < b_len; i++, block++)
568 ocfs2_remove_block_from_cache(inode, block);
569}
570
547int __init init_ocfs2_uptodate_cache(void) 571int __init init_ocfs2_uptodate_cache(void)
548{ 572{
549 ocfs2_uptodate_cachep = kmem_cache_create("ocfs2_uptodate", 573 ocfs2_uptodate_cachep = kmem_cache_create("ocfs2_uptodate",
diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h
index 2e73206059a8..531b4b3a0c47 100644
--- a/fs/ocfs2/uptodate.h
+++ b/fs/ocfs2/uptodate.h
@@ -40,6 +40,9 @@ void ocfs2_set_new_buffer_uptodate(struct inode *inode,
40 struct buffer_head *bh); 40 struct buffer_head *bh);
41void ocfs2_remove_from_cache(struct inode *inode, 41void ocfs2_remove_from_cache(struct inode *inode,
42 struct buffer_head *bh); 42 struct buffer_head *bh);
43void ocfs2_remove_xattr_clusters_from_cache(struct inode *inode,
44 sector_t block,
45 u32 c_len);
43int ocfs2_buffer_read_ahead(struct inode *inode, 46int ocfs2_buffer_read_ahead(struct inode *inode,
44 struct buffer_head *bh); 47 struct buffer_head *bh);
45 48
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
new file mode 100644
index 000000000000..c25780a70dfd
--- /dev/null
+++ b/fs/ocfs2/xattr.c
@@ -0,0 +1,4834 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * xattr.c
5 *
6 * Copyright (C) 2008 Oracle. All rights reserved.
7 *
8 * CREDITS:
9 * Lots of code in this file is taken from ext3.
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public
13 * License as published by the Free Software Foundation; either
14 * version 2 of the License, or (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public
22 * License along with this program; if not, write to the
23 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 * Boston, MA 021110-1307, USA.
25 */
26
27#include <linux/capability.h>
28#include <linux/fs.h>
29#include <linux/types.h>
30#include <linux/slab.h>
31#include <linux/highmem.h>
32#include <linux/pagemap.h>
33#include <linux/uio.h>
34#include <linux/sched.h>
35#include <linux/splice.h>
36#include <linux/mount.h>
37#include <linux/writeback.h>
38#include <linux/falloc.h>
39#include <linux/sort.h>
40#include <linux/init.h>
41#include <linux/module.h>
42#include <linux/string.h>
43
44#define MLOG_MASK_PREFIX ML_XATTR
45#include <cluster/masklog.h>
46
47#include "ocfs2.h"
48#include "alloc.h"
49#include "dlmglue.h"
50#include "file.h"
51#include "symlink.h"
52#include "sysfile.h"
53#include "inode.h"
54#include "journal.h"
55#include "ocfs2_fs.h"
56#include "suballoc.h"
57#include "uptodate.h"
58#include "buffer_head_io.h"
59#include "super.h"
60#include "xattr.h"
61
62
63struct ocfs2_xattr_def_value_root {
64 struct ocfs2_xattr_value_root xv;
65 struct ocfs2_extent_rec er;
66};
67
68struct ocfs2_xattr_bucket {
69 struct buffer_head *bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
70 struct ocfs2_xattr_header *xh;
71};
72
73#define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root))
74#define OCFS2_XATTR_INLINE_SIZE 80
75
76static struct ocfs2_xattr_def_value_root def_xv = {
77 .xv.xr_list.l_count = cpu_to_le16(1),
78};
79
80struct xattr_handler *ocfs2_xattr_handlers[] = {
81 &ocfs2_xattr_user_handler,
82 &ocfs2_xattr_trusted_handler,
83 NULL
84};
85
86static struct xattr_handler *ocfs2_xattr_handler_map[] = {
87 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler,
88 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler,
89};
90
91struct ocfs2_xattr_info {
92 int name_index;
93 const char *name;
94 const void *value;
95 size_t value_len;
96};
97
98struct ocfs2_xattr_search {
99 struct buffer_head *inode_bh;
100 /*
101 * xattr_bh point to the block buffer head which has extended attribute
102 * when extended attribute in inode, xattr_bh is equal to inode_bh.
103 */
104 struct buffer_head *xattr_bh;
105 struct ocfs2_xattr_header *header;
106 struct ocfs2_xattr_bucket bucket;
107 void *base;
108 void *end;
109 struct ocfs2_xattr_entry *here;
110 int not_found;
111};
112
113static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
114 struct ocfs2_xattr_header *xh,
115 int index,
116 int *block_off,
117 int *new_offset);
118
119static int ocfs2_xattr_index_block_find(struct inode *inode,
120 struct buffer_head *root_bh,
121 int name_index,
122 const char *name,
123 struct ocfs2_xattr_search *xs);
124
125static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
126 struct ocfs2_xattr_tree_root *xt,
127 char *buffer,
128 size_t buffer_size);
129
130static int ocfs2_xattr_create_index_block(struct inode *inode,
131 struct ocfs2_xattr_search *xs);
132
133static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
134 struct ocfs2_xattr_info *xi,
135 struct ocfs2_xattr_search *xs);
136
137static int ocfs2_delete_xattr_index_block(struct inode *inode,
138 struct buffer_head *xb_bh);
139
140static inline const char *ocfs2_xattr_prefix(int name_index)
141{
142 struct xattr_handler *handler = NULL;
143
144 if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
145 handler = ocfs2_xattr_handler_map[name_index];
146
147 return handler ? handler->prefix : NULL;
148}
149
150static u32 ocfs2_xattr_name_hash(struct inode *inode,
151 const char *name,
152 int name_len)
153{
154 /* Get hash value of uuid from super block */
155 u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
156 int i;
157
158 /* hash extended attribute name */
159 for (i = 0; i < name_len; i++) {
160 hash = (hash << OCFS2_HASH_SHIFT) ^
161 (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
162 *name++;
163 }
164
165 return hash;
166}
167
168/*
169 * ocfs2_xattr_hash_entry()
170 *
171 * Compute the hash of an extended attribute.
172 */
173static void ocfs2_xattr_hash_entry(struct inode *inode,
174 struct ocfs2_xattr_header *header,
175 struct ocfs2_xattr_entry *entry)
176{
177 u32 hash = 0;
178 char *name = (char *)header + le16_to_cpu(entry->xe_name_offset);
179
180 hash = ocfs2_xattr_name_hash(inode, name, entry->xe_name_len);
181 entry->xe_name_hash = cpu_to_le32(hash);
182
183 return;
184}
185
186static int ocfs2_xattr_extend_allocation(struct inode *inode,
187 u32 clusters_to_add,
188 struct buffer_head *xattr_bh,
189 struct ocfs2_xattr_value_root *xv)
190{
191 int status = 0;
192 int restart_func = 0;
193 int credits = 0;
194 handle_t *handle = NULL;
195 struct ocfs2_alloc_context *data_ac = NULL;
196 struct ocfs2_alloc_context *meta_ac = NULL;
197 enum ocfs2_alloc_restarted why;
198 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
199 u32 prev_clusters, logical_start = le32_to_cpu(xv->xr_clusters);
200 struct ocfs2_extent_tree et;
201
202 mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
203
204 ocfs2_init_xattr_value_extent_tree(&et, inode, xattr_bh, xv);
205
206restart_all:
207
208 status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
209 &data_ac, &meta_ac);
210 if (status) {
211 mlog_errno(status);
212 goto leave;
213 }
214
215 credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el,
216 clusters_to_add);
217 handle = ocfs2_start_trans(osb, credits);
218 if (IS_ERR(handle)) {
219 status = PTR_ERR(handle);
220 handle = NULL;
221 mlog_errno(status);
222 goto leave;
223 }
224
225restarted_transaction:
226 status = ocfs2_journal_access(handle, inode, xattr_bh,
227 OCFS2_JOURNAL_ACCESS_WRITE);
228 if (status < 0) {
229 mlog_errno(status);
230 goto leave;
231 }
232
233 prev_clusters = le32_to_cpu(xv->xr_clusters);
234 status = ocfs2_add_clusters_in_btree(osb,
235 inode,
236 &logical_start,
237 clusters_to_add,
238 0,
239 &et,
240 handle,
241 data_ac,
242 meta_ac,
243 &why);
244 if ((status < 0) && (status != -EAGAIN)) {
245 if (status != -ENOSPC)
246 mlog_errno(status);
247 goto leave;
248 }
249
250 status = ocfs2_journal_dirty(handle, xattr_bh);
251 if (status < 0) {
252 mlog_errno(status);
253 goto leave;
254 }
255
256 clusters_to_add -= le32_to_cpu(xv->xr_clusters) - prev_clusters;
257
258 if (why != RESTART_NONE && clusters_to_add) {
259 if (why == RESTART_META) {
260 mlog(0, "restarting function.\n");
261 restart_func = 1;
262 } else {
263 BUG_ON(why != RESTART_TRANS);
264
265 mlog(0, "restarting transaction.\n");
266 /* TODO: This can be more intelligent. */
267 credits = ocfs2_calc_extend_credits(osb->sb,
268 et.et_root_el,
269 clusters_to_add);
270 status = ocfs2_extend_trans(handle, credits);
271 if (status < 0) {
272 /* handle still has to be committed at
273 * this point. */
274 status = -ENOMEM;
275 mlog_errno(status);
276 goto leave;
277 }
278 goto restarted_transaction;
279 }
280 }
281
282leave:
283 if (handle) {
284 ocfs2_commit_trans(osb, handle);
285 handle = NULL;
286 }
287 if (data_ac) {
288 ocfs2_free_alloc_context(data_ac);
289 data_ac = NULL;
290 }
291 if (meta_ac) {
292 ocfs2_free_alloc_context(meta_ac);
293 meta_ac = NULL;
294 }
295 if ((!status) && restart_func) {
296 restart_func = 0;
297 goto restart_all;
298 }
299
300 return status;
301}
302
303static int __ocfs2_remove_xattr_range(struct inode *inode,
304 struct buffer_head *root_bh,
305 struct ocfs2_xattr_value_root *xv,
306 u32 cpos, u32 phys_cpos, u32 len,
307 struct ocfs2_cached_dealloc_ctxt *dealloc)
308{
309 int ret;
310 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
311 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
312 struct inode *tl_inode = osb->osb_tl_inode;
313 handle_t *handle;
314 struct ocfs2_alloc_context *meta_ac = NULL;
315 struct ocfs2_extent_tree et;
316
317 ocfs2_init_xattr_value_extent_tree(&et, inode, root_bh, xv);
318
319 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
320 if (ret) {
321 mlog_errno(ret);
322 return ret;
323 }
324
325 mutex_lock(&tl_inode->i_mutex);
326
327 if (ocfs2_truncate_log_needs_flush(osb)) {
328 ret = __ocfs2_flush_truncate_log(osb);
329 if (ret < 0) {
330 mlog_errno(ret);
331 goto out;
332 }
333 }
334
335 handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
336 if (IS_ERR(handle)) {
337 ret = PTR_ERR(handle);
338 mlog_errno(ret);
339 goto out;
340 }
341
342 ret = ocfs2_journal_access(handle, inode, root_bh,
343 OCFS2_JOURNAL_ACCESS_WRITE);
344 if (ret) {
345 mlog_errno(ret);
346 goto out_commit;
347 }
348
349 ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
350 dealloc);
351 if (ret) {
352 mlog_errno(ret);
353 goto out_commit;
354 }
355
356 le32_add_cpu(&xv->xr_clusters, -len);
357
358 ret = ocfs2_journal_dirty(handle, root_bh);
359 if (ret) {
360 mlog_errno(ret);
361 goto out_commit;
362 }
363
364 ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len);
365 if (ret)
366 mlog_errno(ret);
367
368out_commit:
369 ocfs2_commit_trans(osb, handle);
370out:
371 mutex_unlock(&tl_inode->i_mutex);
372
373 if (meta_ac)
374 ocfs2_free_alloc_context(meta_ac);
375
376 return ret;
377}
378
379static int ocfs2_xattr_shrink_size(struct inode *inode,
380 u32 old_clusters,
381 u32 new_clusters,
382 struct buffer_head *root_bh,
383 struct ocfs2_xattr_value_root *xv)
384{
385 int ret = 0;
386 u32 trunc_len, cpos, phys_cpos, alloc_size;
387 u64 block;
388 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
389 struct ocfs2_cached_dealloc_ctxt dealloc;
390
391 ocfs2_init_dealloc_ctxt(&dealloc);
392
393 if (old_clusters <= new_clusters)
394 return 0;
395
396 cpos = new_clusters;
397 trunc_len = old_clusters - new_clusters;
398 while (trunc_len) {
399 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
400 &alloc_size, &xv->xr_list);
401 if (ret) {
402 mlog_errno(ret);
403 goto out;
404 }
405
406 if (alloc_size > trunc_len)
407 alloc_size = trunc_len;
408
409 ret = __ocfs2_remove_xattr_range(inode, root_bh, xv, cpos,
410 phys_cpos, alloc_size,
411 &dealloc);
412 if (ret) {
413 mlog_errno(ret);
414 goto out;
415 }
416
417 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
418 ocfs2_remove_xattr_clusters_from_cache(inode, block,
419 alloc_size);
420 cpos += alloc_size;
421 trunc_len -= alloc_size;
422 }
423
424out:
425 ocfs2_schedule_truncate_log_flush(osb, 1);
426 ocfs2_run_deallocs(osb, &dealloc);
427
428 return ret;
429}
430
431static int ocfs2_xattr_value_truncate(struct inode *inode,
432 struct buffer_head *root_bh,
433 struct ocfs2_xattr_value_root *xv,
434 int len)
435{
436 int ret;
437 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
438 u32 old_clusters = le32_to_cpu(xv->xr_clusters);
439
440 if (new_clusters == old_clusters)
441 return 0;
442
443 if (new_clusters > old_clusters)
444 ret = ocfs2_xattr_extend_allocation(inode,
445 new_clusters - old_clusters,
446 root_bh, xv);
447 else
448 ret = ocfs2_xattr_shrink_size(inode,
449 old_clusters, new_clusters,
450 root_bh, xv);
451
452 return ret;
453}
454
455static int ocfs2_xattr_list_entry(char *buffer, size_t size,
456 size_t *result, const char *prefix,
457 const char *name, int name_len)
458{
459 char *p = buffer + *result;
460 int prefix_len = strlen(prefix);
461 int total_len = prefix_len + name_len + 1;
462
463 *result += total_len;
464
465 /* we are just looking for how big our buffer needs to be */
466 if (!size)
467 return 0;
468
469 if (*result > size)
470 return -ERANGE;
471
472 memcpy(p, prefix, prefix_len);
473 memcpy(p + prefix_len, name, name_len);
474 p[prefix_len + name_len] = '\0';
475
476 return 0;
477}
478
479static int ocfs2_xattr_list_entries(struct inode *inode,
480 struct ocfs2_xattr_header *header,
481 char *buffer, size_t buffer_size)
482{
483 size_t result = 0;
484 int i, type, ret;
485 const char *prefix, *name;
486
487 for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
488 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
489 type = ocfs2_xattr_get_type(entry);
490 prefix = ocfs2_xattr_prefix(type);
491
492 if (prefix) {
493 name = (const char *)header +
494 le16_to_cpu(entry->xe_name_offset);
495
496 ret = ocfs2_xattr_list_entry(buffer, buffer_size,
497 &result, prefix, name,
498 entry->xe_name_len);
499 if (ret)
500 return ret;
501 }
502 }
503
504 return result;
505}
506
507static int ocfs2_xattr_ibody_list(struct inode *inode,
508 struct ocfs2_dinode *di,
509 char *buffer,
510 size_t buffer_size)
511{
512 struct ocfs2_xattr_header *header = NULL;
513 struct ocfs2_inode_info *oi = OCFS2_I(inode);
514 int ret = 0;
515
516 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
517 return ret;
518
519 header = (struct ocfs2_xattr_header *)
520 ((void *)di + inode->i_sb->s_blocksize -
521 le16_to_cpu(di->i_xattr_inline_size));
522
523 ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
524
525 return ret;
526}
527
528static int ocfs2_xattr_block_list(struct inode *inode,
529 struct ocfs2_dinode *di,
530 char *buffer,
531 size_t buffer_size)
532{
533 struct buffer_head *blk_bh = NULL;
534 struct ocfs2_xattr_block *xb;
535 int ret = 0;
536
537 if (!di->i_xattr_loc)
538 return ret;
539
540 ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh);
541 if (ret < 0) {
542 mlog_errno(ret);
543 return ret;
544 }
545 /*Verify the signature of xattr block*/
546 if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
547 strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
548 ret = -EFAULT;
549 goto cleanup;
550 }
551
552 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
553
554 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
555 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
556 ret = ocfs2_xattr_list_entries(inode, header,
557 buffer, buffer_size);
558 } else {
559 struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
560 ret = ocfs2_xattr_tree_list_index_block(inode, xt,
561 buffer, buffer_size);
562 }
563cleanup:
564 brelse(blk_bh);
565
566 return ret;
567}
568
569ssize_t ocfs2_listxattr(struct dentry *dentry,
570 char *buffer,
571 size_t size)
572{
573 int ret = 0, i_ret = 0, b_ret = 0;
574 struct buffer_head *di_bh = NULL;
575 struct ocfs2_dinode *di = NULL;
576 struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
577
578 if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
579 return -EOPNOTSUPP;
580
581 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
582 return ret;
583
584 ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
585 if (ret < 0) {
586 mlog_errno(ret);
587 return ret;
588 }
589
590 di = (struct ocfs2_dinode *)di_bh->b_data;
591
592 down_read(&oi->ip_xattr_sem);
593 i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
594 if (i_ret < 0)
595 b_ret = 0;
596 else {
597 if (buffer) {
598 buffer += i_ret;
599 size -= i_ret;
600 }
601 b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
602 buffer, size);
603 if (b_ret < 0)
604 i_ret = 0;
605 }
606 up_read(&oi->ip_xattr_sem);
607 ocfs2_inode_unlock(dentry->d_inode, 0);
608
609 brelse(di_bh);
610
611 return i_ret + b_ret;
612}
613
614static int ocfs2_xattr_find_entry(int name_index,
615 const char *name,
616 struct ocfs2_xattr_search *xs)
617{
618 struct ocfs2_xattr_entry *entry;
619 size_t name_len;
620 int i, cmp = 1;
621
622 if (name == NULL)
623 return -EINVAL;
624
625 name_len = strlen(name);
626 entry = xs->here;
627 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
628 cmp = name_index - ocfs2_xattr_get_type(entry);
629 if (!cmp)
630 cmp = name_len - entry->xe_name_len;
631 if (!cmp)
632 cmp = memcmp(name, (xs->base +
633 le16_to_cpu(entry->xe_name_offset)),
634 name_len);
635 if (cmp == 0)
636 break;
637 entry += 1;
638 }
639 xs->here = entry;
640
641 return cmp ? -ENODATA : 0;
642}
643
644static int ocfs2_xattr_get_value_outside(struct inode *inode,
645 struct ocfs2_xattr_value_root *xv,
646 void *buffer,
647 size_t len)
648{
649 u32 cpos, p_cluster, num_clusters, bpc, clusters;
650 u64 blkno;
651 int i, ret = 0;
652 size_t cplen, blocksize;
653 struct buffer_head *bh = NULL;
654 struct ocfs2_extent_list *el;
655
656 el = &xv->xr_list;
657 clusters = le32_to_cpu(xv->xr_clusters);
658 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
659 blocksize = inode->i_sb->s_blocksize;
660
661 cpos = 0;
662 while (cpos < clusters) {
663 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
664 &num_clusters, el);
665 if (ret) {
666 mlog_errno(ret);
667 goto out;
668 }
669
670 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
671 /* Copy ocfs2_xattr_value */
672 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
673 ret = ocfs2_read_block(inode, blkno, &bh);
674 if (ret) {
675 mlog_errno(ret);
676 goto out;
677 }
678
679 cplen = len >= blocksize ? blocksize : len;
680 memcpy(buffer, bh->b_data, cplen);
681 len -= cplen;
682 buffer += cplen;
683
684 brelse(bh);
685 bh = NULL;
686 if (len == 0)
687 break;
688 }
689 cpos += num_clusters;
690 }
691out:
692 return ret;
693}
694
695static int ocfs2_xattr_ibody_get(struct inode *inode,
696 int name_index,
697 const char *name,
698 void *buffer,
699 size_t buffer_size,
700 struct ocfs2_xattr_search *xs)
701{
702 struct ocfs2_inode_info *oi = OCFS2_I(inode);
703 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
704 struct ocfs2_xattr_value_root *xv;
705 size_t size;
706 int ret = 0;
707
708 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
709 return -ENODATA;
710
711 xs->end = (void *)di + inode->i_sb->s_blocksize;
712 xs->header = (struct ocfs2_xattr_header *)
713 (xs->end - le16_to_cpu(di->i_xattr_inline_size));
714 xs->base = (void *)xs->header;
715 xs->here = xs->header->xh_entries;
716
717 ret = ocfs2_xattr_find_entry(name_index, name, xs);
718 if (ret)
719 return ret;
720 size = le64_to_cpu(xs->here->xe_value_size);
721 if (buffer) {
722 if (size > buffer_size)
723 return -ERANGE;
724 if (ocfs2_xattr_is_local(xs->here)) {
725 memcpy(buffer, (void *)xs->base +
726 le16_to_cpu(xs->here->xe_name_offset) +
727 OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
728 } else {
729 xv = (struct ocfs2_xattr_value_root *)
730 (xs->base + le16_to_cpu(
731 xs->here->xe_name_offset) +
732 OCFS2_XATTR_SIZE(xs->here->xe_name_len));
733 ret = ocfs2_xattr_get_value_outside(inode, xv,
734 buffer, size);
735 if (ret < 0) {
736 mlog_errno(ret);
737 return ret;
738 }
739 }
740 }
741
742 return size;
743}
744
745static int ocfs2_xattr_block_get(struct inode *inode,
746 int name_index,
747 const char *name,
748 void *buffer,
749 size_t buffer_size,
750 struct ocfs2_xattr_search *xs)
751{
752 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
753 struct buffer_head *blk_bh = NULL;
754 struct ocfs2_xattr_block *xb;
755 struct ocfs2_xattr_value_root *xv;
756 size_t size;
757 int ret = -ENODATA, name_offset, name_len, block_off, i;
758
759 if (!di->i_xattr_loc)
760 return ret;
761
762 memset(&xs->bucket, 0, sizeof(xs->bucket));
763
764 ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh);
765 if (ret < 0) {
766 mlog_errno(ret);
767 return ret;
768 }
769 /*Verify the signature of xattr block*/
770 if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
771 strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
772 ret = -EFAULT;
773 goto cleanup;
774 }
775
776 xs->xattr_bh = blk_bh;
777 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
778
779 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
780 xs->header = &xb->xb_attrs.xb_header;
781 xs->base = (void *)xs->header;
782 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
783 xs->here = xs->header->xh_entries;
784
785 ret = ocfs2_xattr_find_entry(name_index, name, xs);
786 } else
787 ret = ocfs2_xattr_index_block_find(inode, blk_bh,
788 name_index,
789 name, xs);
790
791 if (ret)
792 goto cleanup;
793 size = le64_to_cpu(xs->here->xe_value_size);
794 if (buffer) {
795 ret = -ERANGE;
796 if (size > buffer_size)
797 goto cleanup;
798
799 name_offset = le16_to_cpu(xs->here->xe_name_offset);
800 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
801 i = xs->here - xs->header->xh_entries;
802
803 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
804 ret = ocfs2_xattr_bucket_get_name_value(inode,
805 xs->bucket.xh,
806 i,
807 &block_off,
808 &name_offset);
809 xs->base = xs->bucket.bhs[block_off]->b_data;
810 }
811 if (ocfs2_xattr_is_local(xs->here)) {
812 memcpy(buffer, (void *)xs->base +
813 name_offset + name_len, size);
814 } else {
815 xv = (struct ocfs2_xattr_value_root *)
816 (xs->base + name_offset + name_len);
817 ret = ocfs2_xattr_get_value_outside(inode, xv,
818 buffer, size);
819 if (ret < 0) {
820 mlog_errno(ret);
821 goto cleanup;
822 }
823 }
824 }
825 ret = size;
826cleanup:
827 for (i = 0; i < OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET; i++)
828 brelse(xs->bucket.bhs[i]);
829 memset(&xs->bucket, 0, sizeof(xs->bucket));
830
831 brelse(blk_bh);
832 return ret;
833}
834
835/* ocfs2_xattr_get()
836 *
837 * Copy an extended attribute into the buffer provided.
838 * Buffer is NULL to compute the size of buffer required.
839 */
840int ocfs2_xattr_get(struct inode *inode,
841 int name_index,
842 const char *name,
843 void *buffer,
844 size_t buffer_size)
845{
846 int ret;
847 struct ocfs2_dinode *di = NULL;
848 struct buffer_head *di_bh = NULL;
849 struct ocfs2_inode_info *oi = OCFS2_I(inode);
850 struct ocfs2_xattr_search xis = {
851 .not_found = -ENODATA,
852 };
853 struct ocfs2_xattr_search xbs = {
854 .not_found = -ENODATA,
855 };
856
857 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
858 return -EOPNOTSUPP;
859
860 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
861 ret = -ENODATA;
862
863 ret = ocfs2_inode_lock(inode, &di_bh, 0);
864 if (ret < 0) {
865 mlog_errno(ret);
866 return ret;
867 }
868 xis.inode_bh = xbs.inode_bh = di_bh;
869 di = (struct ocfs2_dinode *)di_bh->b_data;
870
871 down_read(&oi->ip_xattr_sem);
872 ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
873 buffer_size, &xis);
874 if (ret == -ENODATA)
875 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
876 buffer_size, &xbs);
877 up_read(&oi->ip_xattr_sem);
878 ocfs2_inode_unlock(inode, 0);
879
880 brelse(di_bh);
881
882 return ret;
883}
884
885static int __ocfs2_xattr_set_value_outside(struct inode *inode,
886 struct ocfs2_xattr_value_root *xv,
887 const void *value,
888 int value_len)
889{
890 int ret = 0, i, cp_len, credits;
891 u16 blocksize = inode->i_sb->s_blocksize;
892 u32 p_cluster, num_clusters;
893 u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
894 u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
895 u64 blkno;
896 struct buffer_head *bh = NULL;
897 handle_t *handle;
898
899 BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
900
901 credits = clusters * bpc;
902 handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb), credits);
903 if (IS_ERR(handle)) {
904 ret = PTR_ERR(handle);
905 mlog_errno(ret);
906 goto out;
907 }
908
909 while (cpos < clusters) {
910 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
911 &num_clusters, &xv->xr_list);
912 if (ret) {
913 mlog_errno(ret);
914 goto out_commit;
915 }
916
917 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
918
919 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
920 ret = ocfs2_read_block(inode, blkno, &bh);
921 if (ret) {
922 mlog_errno(ret);
923 goto out_commit;
924 }
925
926 ret = ocfs2_journal_access(handle,
927 inode,
928 bh,
929 OCFS2_JOURNAL_ACCESS_WRITE);
930 if (ret < 0) {
931 mlog_errno(ret);
932 goto out_commit;
933 }
934
935 cp_len = value_len > blocksize ? blocksize : value_len;
936 memcpy(bh->b_data, value, cp_len);
937 value_len -= cp_len;
938 value += cp_len;
939 if (cp_len < blocksize)
940 memset(bh->b_data + cp_len, 0,
941 blocksize - cp_len);
942
943 ret = ocfs2_journal_dirty(handle, bh);
944 if (ret < 0) {
945 mlog_errno(ret);
946 goto out_commit;
947 }
948 brelse(bh);
949 bh = NULL;
950
951 /*
952 * XXX: do we need to empty all the following
953 * blocks in this cluster?
954 */
955 if (!value_len)
956 break;
957 }
958 cpos += num_clusters;
959 }
960out_commit:
961 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
962out:
963 brelse(bh);
964
965 return ret;
966}
967
968static int ocfs2_xattr_cleanup(struct inode *inode,
969 struct ocfs2_xattr_info *xi,
970 struct ocfs2_xattr_search *xs,
971 size_t offs)
972{
973 handle_t *handle = NULL;
974 int ret = 0;
975 size_t name_len = strlen(xi->name);
976 void *val = xs->base + offs;
977 size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
978
979 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
980 OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
981 if (IS_ERR(handle)) {
982 ret = PTR_ERR(handle);
983 mlog_errno(ret);
984 goto out;
985 }
986 ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
987 OCFS2_JOURNAL_ACCESS_WRITE);
988 if (ret) {
989 mlog_errno(ret);
990 goto out_commit;
991 }
992 /* Decrease xattr count */
993 le16_add_cpu(&xs->header->xh_count, -1);
994 /* Remove the xattr entry and tree root which has already be set*/
995 memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry));
996 memset(val, 0, size);
997
998 ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
999 if (ret < 0)
1000 mlog_errno(ret);
1001out_commit:
1002 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1003out:
1004 return ret;
1005}
1006
1007static int ocfs2_xattr_update_entry(struct inode *inode,
1008 struct ocfs2_xattr_info *xi,
1009 struct ocfs2_xattr_search *xs,
1010 size_t offs)
1011{
1012 handle_t *handle = NULL;
1013 int ret = 0;
1014
1015 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1016 OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
1017 if (IS_ERR(handle)) {
1018 ret = PTR_ERR(handle);
1019 mlog_errno(ret);
1020 goto out;
1021 }
1022 ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1023 OCFS2_JOURNAL_ACCESS_WRITE);
1024 if (ret) {
1025 mlog_errno(ret);
1026 goto out_commit;
1027 }
1028
1029 xs->here->xe_name_offset = cpu_to_le16(offs);
1030 xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1031 if (xi->value_len <= OCFS2_XATTR_INLINE_SIZE)
1032 ocfs2_xattr_set_local(xs->here, 1);
1033 else
1034 ocfs2_xattr_set_local(xs->here, 0);
1035 ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1036
1037 ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1038 if (ret < 0)
1039 mlog_errno(ret);
1040out_commit:
1041 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1042out:
1043 return ret;
1044}
1045
1046/*
1047 * ocfs2_xattr_set_value_outside()
1048 *
1049 * Set large size value in B tree.
1050 */
1051static int ocfs2_xattr_set_value_outside(struct inode *inode,
1052 struct ocfs2_xattr_info *xi,
1053 struct ocfs2_xattr_search *xs,
1054 size_t offs)
1055{
1056 size_t name_len = strlen(xi->name);
1057 void *val = xs->base + offs;
1058 struct ocfs2_xattr_value_root *xv = NULL;
1059 size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1060 int ret = 0;
1061
1062 memset(val, 0, size);
1063 memcpy(val, xi->name, name_len);
1064 xv = (struct ocfs2_xattr_value_root *)
1065 (val + OCFS2_XATTR_SIZE(name_len));
1066 xv->xr_clusters = 0;
1067 xv->xr_last_eb_blk = 0;
1068 xv->xr_list.l_tree_depth = 0;
1069 xv->xr_list.l_count = cpu_to_le16(1);
1070 xv->xr_list.l_next_free_rec = 0;
1071
1072 ret = ocfs2_xattr_value_truncate(inode, xs->xattr_bh, xv,
1073 xi->value_len);
1074 if (ret < 0) {
1075 mlog_errno(ret);
1076 return ret;
1077 }
1078 ret = __ocfs2_xattr_set_value_outside(inode, xv, xi->value,
1079 xi->value_len);
1080 if (ret < 0) {
1081 mlog_errno(ret);
1082 return ret;
1083 }
1084 ret = ocfs2_xattr_update_entry(inode, xi, xs, offs);
1085 if (ret < 0)
1086 mlog_errno(ret);
1087
1088 return ret;
1089}
1090
1091/*
1092 * ocfs2_xattr_set_entry_local()
1093 *
1094 * Set, replace or remove extended attribute in local.
1095 */
1096static void ocfs2_xattr_set_entry_local(struct inode *inode,
1097 struct ocfs2_xattr_info *xi,
1098 struct ocfs2_xattr_search *xs,
1099 struct ocfs2_xattr_entry *last,
1100 size_t min_offs)
1101{
1102 size_t name_len = strlen(xi->name);
1103 int i;
1104
1105 if (xi->value && xs->not_found) {
1106 /* Insert the new xattr entry. */
1107 le16_add_cpu(&xs->header->xh_count, 1);
1108 ocfs2_xattr_set_type(last, xi->name_index);
1109 ocfs2_xattr_set_local(last, 1);
1110 last->xe_name_len = name_len;
1111 } else {
1112 void *first_val;
1113 void *val;
1114 size_t offs, size;
1115
1116 first_val = xs->base + min_offs;
1117 offs = le16_to_cpu(xs->here->xe_name_offset);
1118 val = xs->base + offs;
1119
1120 if (le64_to_cpu(xs->here->xe_value_size) >
1121 OCFS2_XATTR_INLINE_SIZE)
1122 size = OCFS2_XATTR_SIZE(name_len) +
1123 OCFS2_XATTR_ROOT_SIZE;
1124 else
1125 size = OCFS2_XATTR_SIZE(name_len) +
1126 OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1127
1128 if (xi->value && size == OCFS2_XATTR_SIZE(name_len) +
1129 OCFS2_XATTR_SIZE(xi->value_len)) {
1130 /* The old and the new value have the
1131 same size. Just replace the value. */
1132 ocfs2_xattr_set_local(xs->here, 1);
1133 xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1134 /* Clear value bytes. */
1135 memset(val + OCFS2_XATTR_SIZE(name_len),
1136 0,
1137 OCFS2_XATTR_SIZE(xi->value_len));
1138 memcpy(val + OCFS2_XATTR_SIZE(name_len),
1139 xi->value,
1140 xi->value_len);
1141 return;
1142 }
1143 /* Remove the old name+value. */
1144 memmove(first_val + size, first_val, val - first_val);
1145 memset(first_val, 0, size);
1146 xs->here->xe_name_hash = 0;
1147 xs->here->xe_name_offset = 0;
1148 ocfs2_xattr_set_local(xs->here, 1);
1149 xs->here->xe_value_size = 0;
1150
1151 min_offs += size;
1152
1153 /* Adjust all value offsets. */
1154 last = xs->header->xh_entries;
1155 for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1156 size_t o = le16_to_cpu(last->xe_name_offset);
1157
1158 if (o < offs)
1159 last->xe_name_offset = cpu_to_le16(o + size);
1160 last += 1;
1161 }
1162
1163 if (!xi->value) {
1164 /* Remove the old entry. */
1165 last -= 1;
1166 memmove(xs->here, xs->here + 1,
1167 (void *)last - (void *)xs->here);
1168 memset(last, 0, sizeof(struct ocfs2_xattr_entry));
1169 le16_add_cpu(&xs->header->xh_count, -1);
1170 }
1171 }
1172 if (xi->value) {
1173 /* Insert the new name+value. */
1174 size_t size = OCFS2_XATTR_SIZE(name_len) +
1175 OCFS2_XATTR_SIZE(xi->value_len);
1176 void *val = xs->base + min_offs - size;
1177
1178 xs->here->xe_name_offset = cpu_to_le16(min_offs - size);
1179 memset(val, 0, size);
1180 memcpy(val, xi->name, name_len);
1181 memcpy(val + OCFS2_XATTR_SIZE(name_len),
1182 xi->value,
1183 xi->value_len);
1184 xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1185 ocfs2_xattr_set_local(xs->here, 1);
1186 ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1187 }
1188
1189 return;
1190}
1191
1192/*
1193 * ocfs2_xattr_set_entry()
1194 *
1195 * Set extended attribute entry into inode or block.
1196 *
1197 * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE,
1198 * We first insert tree root(ocfs2_xattr_value_root) with set_entry_local(),
1199 * then set value in B tree with set_value_outside().
1200 */
1201static int ocfs2_xattr_set_entry(struct inode *inode,
1202 struct ocfs2_xattr_info *xi,
1203 struct ocfs2_xattr_search *xs,
1204 int flag)
1205{
1206 struct ocfs2_xattr_entry *last;
1207 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1208 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1209 size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name);
1210 size_t size_l = 0;
1211 handle_t *handle = NULL;
1212 int free, i, ret;
1213 struct ocfs2_xattr_info xi_l = {
1214 .name_index = xi->name_index,
1215 .name = xi->name,
1216 .value = xi->value,
1217 .value_len = xi->value_len,
1218 };
1219
1220 /* Compute min_offs, last and free space. */
1221 last = xs->header->xh_entries;
1222
1223 for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1224 size_t offs = le16_to_cpu(last->xe_name_offset);
1225 if (offs < min_offs)
1226 min_offs = offs;
1227 last += 1;
1228 }
1229
1230 free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
1231 if (free < 0)
1232 return -EFAULT;
1233
1234 if (!xs->not_found) {
1235 size_t size = 0;
1236 if (ocfs2_xattr_is_local(xs->here))
1237 size = OCFS2_XATTR_SIZE(name_len) +
1238 OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1239 else
1240 size = OCFS2_XATTR_SIZE(name_len) +
1241 OCFS2_XATTR_ROOT_SIZE;
1242 free += (size + sizeof(struct ocfs2_xattr_entry));
1243 }
1244 /* Check free space in inode or block */
1245 if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1246 if (free < sizeof(struct ocfs2_xattr_entry) +
1247 OCFS2_XATTR_SIZE(name_len) +
1248 OCFS2_XATTR_ROOT_SIZE) {
1249 ret = -ENOSPC;
1250 goto out;
1251 }
1252 size_l = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1253 xi_l.value = (void *)&def_xv;
1254 xi_l.value_len = OCFS2_XATTR_ROOT_SIZE;
1255 } else if (xi->value) {
1256 if (free < sizeof(struct ocfs2_xattr_entry) +
1257 OCFS2_XATTR_SIZE(name_len) +
1258 OCFS2_XATTR_SIZE(xi->value_len)) {
1259 ret = -ENOSPC;
1260 goto out;
1261 }
1262 }
1263
1264 if (!xs->not_found) {
1265 /* For existing extended attribute */
1266 size_t size = OCFS2_XATTR_SIZE(name_len) +
1267 OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1268 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1269 void *val = xs->base + offs;
1270
1271 if (ocfs2_xattr_is_local(xs->here) && size == size_l) {
1272 /* Replace existing local xattr with tree root */
1273 ret = ocfs2_xattr_set_value_outside(inode, xi, xs,
1274 offs);
1275 if (ret < 0)
1276 mlog_errno(ret);
1277 goto out;
1278 } else if (!ocfs2_xattr_is_local(xs->here)) {
1279 /* For existing xattr which has value outside */
1280 struct ocfs2_xattr_value_root *xv = NULL;
1281 xv = (struct ocfs2_xattr_value_root *)(val +
1282 OCFS2_XATTR_SIZE(name_len));
1283
1284 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1285 /*
1286 * If new value need set outside also,
1287 * first truncate old value to new value,
1288 * then set new value with set_value_outside().
1289 */
1290 ret = ocfs2_xattr_value_truncate(inode,
1291 xs->xattr_bh,
1292 xv,
1293 xi->value_len);
1294 if (ret < 0) {
1295 mlog_errno(ret);
1296 goto out;
1297 }
1298
1299 ret = __ocfs2_xattr_set_value_outside(inode,
1300 xv,
1301 xi->value,
1302 xi->value_len);
1303 if (ret < 0) {
1304 mlog_errno(ret);
1305 goto out;
1306 }
1307
1308 ret = ocfs2_xattr_update_entry(inode,
1309 xi,
1310 xs,
1311 offs);
1312 if (ret < 0)
1313 mlog_errno(ret);
1314 goto out;
1315 } else {
1316 /*
1317 * If new value need set in local,
1318 * just trucate old value to zero.
1319 */
1320 ret = ocfs2_xattr_value_truncate(inode,
1321 xs->xattr_bh,
1322 xv,
1323 0);
1324 if (ret < 0)
1325 mlog_errno(ret);
1326 }
1327 }
1328 }
1329
1330 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1331 OCFS2_INODE_UPDATE_CREDITS);
1332 if (IS_ERR(handle)) {
1333 ret = PTR_ERR(handle);
1334 mlog_errno(ret);
1335 goto out;
1336 }
1337
1338 ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
1339 OCFS2_JOURNAL_ACCESS_WRITE);
1340 if (ret) {
1341 mlog_errno(ret);
1342 goto out_commit;
1343 }
1344
1345 if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1346 /* set extended attribute in external block. */
1347 ret = ocfs2_extend_trans(handle,
1348 OCFS2_INODE_UPDATE_CREDITS +
1349 OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
1350 if (ret) {
1351 mlog_errno(ret);
1352 goto out_commit;
1353 }
1354 ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1355 OCFS2_JOURNAL_ACCESS_WRITE);
1356 if (ret) {
1357 mlog_errno(ret);
1358 goto out_commit;
1359 }
1360 }
1361
1362 /*
1363 * Set value in local, include set tree root in local.
1364 * This is the first step for value size >INLINE_SIZE.
1365 */
1366 ocfs2_xattr_set_entry_local(inode, &xi_l, xs, last, min_offs);
1367
1368 if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1369 ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1370 if (ret < 0) {
1371 mlog_errno(ret);
1372 goto out_commit;
1373 }
1374 }
1375
1376 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) &&
1377 (flag & OCFS2_INLINE_XATTR_FL)) {
1378 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1379 unsigned int xattrsize = osb->s_xattr_inline_size;
1380
1381 /*
1382 * Adjust extent record count or inline data size
1383 * to reserve space for extended attribute.
1384 */
1385 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1386 struct ocfs2_inline_data *idata = &di->id2.i_data;
1387 le16_add_cpu(&idata->id_count, -xattrsize);
1388 } else if (!(ocfs2_inode_is_fast_symlink(inode))) {
1389 struct ocfs2_extent_list *el = &di->id2.i_list;
1390 le16_add_cpu(&el->l_count, -(xattrsize /
1391 sizeof(struct ocfs2_extent_rec)));
1392 }
1393 di->i_xattr_inline_size = cpu_to_le16(xattrsize);
1394 }
1395 /* Update xattr flag */
1396 spin_lock(&oi->ip_lock);
1397 oi->ip_dyn_features |= flag;
1398 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1399 spin_unlock(&oi->ip_lock);
1400 /* Update inode ctime */
1401 inode->i_ctime = CURRENT_TIME;
1402 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
1403 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
1404
1405 ret = ocfs2_journal_dirty(handle, xs->inode_bh);
1406 if (ret < 0)
1407 mlog_errno(ret);
1408
1409out_commit:
1410 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1411
1412 if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1413 /*
1414 * Set value outside in B tree.
1415 * This is the second step for value size > INLINE_SIZE.
1416 */
1417 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1418 ret = ocfs2_xattr_set_value_outside(inode, xi, xs, offs);
1419 if (ret < 0) {
1420 int ret2;
1421
1422 mlog_errno(ret);
1423 /*
1424 * If set value outside failed, we have to clean
1425 * the junk tree root we have already set in local.
1426 */
1427 ret2 = ocfs2_xattr_cleanup(inode, xi, xs, offs);
1428 if (ret2 < 0)
1429 mlog_errno(ret2);
1430 }
1431 }
1432out:
1433 return ret;
1434
1435}
1436
1437static int ocfs2_remove_value_outside(struct inode*inode,
1438 struct buffer_head *bh,
1439 struct ocfs2_xattr_header *header)
1440{
1441 int ret = 0, i;
1442
1443 for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
1444 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
1445
1446 if (!ocfs2_xattr_is_local(entry)) {
1447 struct ocfs2_xattr_value_root *xv;
1448 void *val;
1449
1450 val = (void *)header +
1451 le16_to_cpu(entry->xe_name_offset);
1452 xv = (struct ocfs2_xattr_value_root *)
1453 (val + OCFS2_XATTR_SIZE(entry->xe_name_len));
1454 ret = ocfs2_xattr_value_truncate(inode, bh, xv, 0);
1455 if (ret < 0) {
1456 mlog_errno(ret);
1457 return ret;
1458 }
1459 }
1460 }
1461
1462 return ret;
1463}
1464
1465static int ocfs2_xattr_ibody_remove(struct inode *inode,
1466 struct buffer_head *di_bh)
1467{
1468
1469 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1470 struct ocfs2_xattr_header *header;
1471 int ret;
1472
1473 header = (struct ocfs2_xattr_header *)
1474 ((void *)di + inode->i_sb->s_blocksize -
1475 le16_to_cpu(di->i_xattr_inline_size));
1476
1477 ret = ocfs2_remove_value_outside(inode, di_bh, header);
1478
1479 return ret;
1480}
1481
1482static int ocfs2_xattr_block_remove(struct inode *inode,
1483 struct buffer_head *blk_bh)
1484{
1485 struct ocfs2_xattr_block *xb;
1486 int ret = 0;
1487
1488 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1489 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1490 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
1491 ret = ocfs2_remove_value_outside(inode, blk_bh, header);
1492 } else
1493 ret = ocfs2_delete_xattr_index_block(inode, blk_bh);
1494
1495 return ret;
1496}
1497
1498static int ocfs2_xattr_free_block(struct inode *inode,
1499 u64 block)
1500{
1501 struct inode *xb_alloc_inode;
1502 struct buffer_head *xb_alloc_bh = NULL;
1503 struct buffer_head *blk_bh = NULL;
1504 struct ocfs2_xattr_block *xb;
1505 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1506 handle_t *handle;
1507 int ret = 0;
1508 u64 blk, bg_blkno;
1509 u16 bit;
1510
1511 ret = ocfs2_read_block(inode, block, &blk_bh);
1512 if (ret < 0) {
1513 mlog_errno(ret);
1514 goto out;
1515 }
1516
1517 /*Verify the signature of xattr block*/
1518 if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
1519 strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
1520 ret = -EFAULT;
1521 goto out;
1522 }
1523
1524 ret = ocfs2_xattr_block_remove(inode, blk_bh);
1525 if (ret < 0) {
1526 mlog_errno(ret);
1527 goto out;
1528 }
1529
1530 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1531 blk = le64_to_cpu(xb->xb_blkno);
1532 bit = le16_to_cpu(xb->xb_suballoc_bit);
1533 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
1534
1535 xb_alloc_inode = ocfs2_get_system_file_inode(osb,
1536 EXTENT_ALLOC_SYSTEM_INODE,
1537 le16_to_cpu(xb->xb_suballoc_slot));
1538 if (!xb_alloc_inode) {
1539 ret = -ENOMEM;
1540 mlog_errno(ret);
1541 goto out;
1542 }
1543 mutex_lock(&xb_alloc_inode->i_mutex);
1544
1545 ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
1546 if (ret < 0) {
1547 mlog_errno(ret);
1548 goto out_mutex;
1549 }
1550
1551 handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
1552 if (IS_ERR(handle)) {
1553 ret = PTR_ERR(handle);
1554 mlog_errno(ret);
1555 goto out_unlock;
1556 }
1557
1558 ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
1559 bit, bg_blkno, 1);
1560 if (ret < 0)
1561 mlog_errno(ret);
1562
1563 ocfs2_commit_trans(osb, handle);
1564out_unlock:
1565 ocfs2_inode_unlock(xb_alloc_inode, 1);
1566 brelse(xb_alloc_bh);
1567out_mutex:
1568 mutex_unlock(&xb_alloc_inode->i_mutex);
1569 iput(xb_alloc_inode);
1570out:
1571 brelse(blk_bh);
1572 return ret;
1573}
1574
1575/*
1576 * ocfs2_xattr_remove()
1577 *
1578 * Free extended attribute resources associated with this inode.
1579 */
1580int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
1581{
1582 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1583 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1584 handle_t *handle;
1585 int ret;
1586
1587 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1588 return 0;
1589
1590 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1591 return 0;
1592
1593 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1594 ret = ocfs2_xattr_ibody_remove(inode, di_bh);
1595 if (ret < 0) {
1596 mlog_errno(ret);
1597 goto out;
1598 }
1599 }
1600
1601 if (di->i_xattr_loc) {
1602 ret = ocfs2_xattr_free_block(inode,
1603 le64_to_cpu(di->i_xattr_loc));
1604 if (ret < 0) {
1605 mlog_errno(ret);
1606 goto out;
1607 }
1608 }
1609
1610 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1611 OCFS2_INODE_UPDATE_CREDITS);
1612 if (IS_ERR(handle)) {
1613 ret = PTR_ERR(handle);
1614 mlog_errno(ret);
1615 goto out;
1616 }
1617 ret = ocfs2_journal_access(handle, inode, di_bh,
1618 OCFS2_JOURNAL_ACCESS_WRITE);
1619 if (ret) {
1620 mlog_errno(ret);
1621 goto out_commit;
1622 }
1623
1624 di->i_xattr_loc = 0;
1625
1626 spin_lock(&oi->ip_lock);
1627 oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
1628 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1629 spin_unlock(&oi->ip_lock);
1630
1631 ret = ocfs2_journal_dirty(handle, di_bh);
1632 if (ret < 0)
1633 mlog_errno(ret);
1634out_commit:
1635 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1636out:
1637 return ret;
1638}
1639
1640static int ocfs2_xattr_has_space_inline(struct inode *inode,
1641 struct ocfs2_dinode *di)
1642{
1643 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1644 unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
1645 int free;
1646
1647 if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
1648 return 0;
1649
1650 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1651 struct ocfs2_inline_data *idata = &di->id2.i_data;
1652 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
1653 } else if (ocfs2_inode_is_fast_symlink(inode)) {
1654 free = ocfs2_fast_symlink_chars(inode->i_sb) -
1655 le64_to_cpu(di->i_size);
1656 } else {
1657 struct ocfs2_extent_list *el = &di->id2.i_list;
1658 free = (le16_to_cpu(el->l_count) -
1659 le16_to_cpu(el->l_next_free_rec)) *
1660 sizeof(struct ocfs2_extent_rec);
1661 }
1662 if (free >= xattrsize)
1663 return 1;
1664
1665 return 0;
1666}
1667
1668/*
1669 * ocfs2_xattr_ibody_find()
1670 *
1671 * Find extended attribute in inode block and
1672 * fill search info into struct ocfs2_xattr_search.
1673 */
1674static int ocfs2_xattr_ibody_find(struct inode *inode,
1675 int name_index,
1676 const char *name,
1677 struct ocfs2_xattr_search *xs)
1678{
1679 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1680 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1681 int ret;
1682 int has_space = 0;
1683
1684 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
1685 return 0;
1686
1687 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
1688 down_read(&oi->ip_alloc_sem);
1689 has_space = ocfs2_xattr_has_space_inline(inode, di);
1690 up_read(&oi->ip_alloc_sem);
1691 if (!has_space)
1692 return 0;
1693 }
1694
1695 xs->xattr_bh = xs->inode_bh;
1696 xs->end = (void *)di + inode->i_sb->s_blocksize;
1697 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
1698 xs->header = (struct ocfs2_xattr_header *)
1699 (xs->end - le16_to_cpu(di->i_xattr_inline_size));
1700 else
1701 xs->header = (struct ocfs2_xattr_header *)
1702 (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
1703 xs->base = (void *)xs->header;
1704 xs->here = xs->header->xh_entries;
1705
1706 /* Find the named attribute. */
1707 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1708 ret = ocfs2_xattr_find_entry(name_index, name, xs);
1709 if (ret && ret != -ENODATA)
1710 return ret;
1711 xs->not_found = ret;
1712 }
1713
1714 return 0;
1715}
1716
1717/*
1718 * ocfs2_xattr_ibody_set()
1719 *
1720 * Set, replace or remove an extended attribute into inode block.
1721 *
1722 */
1723static int ocfs2_xattr_ibody_set(struct inode *inode,
1724 struct ocfs2_xattr_info *xi,
1725 struct ocfs2_xattr_search *xs)
1726{
1727 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1728 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1729 int ret;
1730
1731 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
1732 return -ENOSPC;
1733
1734 down_write(&oi->ip_alloc_sem);
1735 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
1736 if (!ocfs2_xattr_has_space_inline(inode, di)) {
1737 ret = -ENOSPC;
1738 goto out;
1739 }
1740 }
1741
1742 ret = ocfs2_xattr_set_entry(inode, xi, xs,
1743 (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
1744out:
1745 up_write(&oi->ip_alloc_sem);
1746
1747 return ret;
1748}
1749
1750/*
1751 * ocfs2_xattr_block_find()
1752 *
1753 * Find extended attribute in external block and
1754 * fill search info into struct ocfs2_xattr_search.
1755 */
1756static int ocfs2_xattr_block_find(struct inode *inode,
1757 int name_index,
1758 const char *name,
1759 struct ocfs2_xattr_search *xs)
1760{
1761 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1762 struct buffer_head *blk_bh = NULL;
1763 struct ocfs2_xattr_block *xb;
1764 int ret = 0;
1765
1766 if (!di->i_xattr_loc)
1767 return ret;
1768
1769 ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh);
1770 if (ret < 0) {
1771 mlog_errno(ret);
1772 return ret;
1773 }
1774 /*Verify the signature of xattr block*/
1775 if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
1776 strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
1777 ret = -EFAULT;
1778 goto cleanup;
1779 }
1780
1781 xs->xattr_bh = blk_bh;
1782 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1783
1784 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1785 xs->header = &xb->xb_attrs.xb_header;
1786 xs->base = (void *)xs->header;
1787 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
1788 xs->here = xs->header->xh_entries;
1789
1790 ret = ocfs2_xattr_find_entry(name_index, name, xs);
1791 } else
1792 ret = ocfs2_xattr_index_block_find(inode, blk_bh,
1793 name_index,
1794 name, xs);
1795
1796 if (ret && ret != -ENODATA) {
1797 xs->xattr_bh = NULL;
1798 goto cleanup;
1799 }
1800 xs->not_found = ret;
1801 return 0;
1802cleanup:
1803 brelse(blk_bh);
1804
1805 return ret;
1806}
1807
1808/*
1809 * When all the xattrs are deleted from index btree, the ocfs2_xattr_tree
1810 * will be erased and ocfs2_xattr_block will have its ocfs2_xattr_header
1811 * re-initialized.
1812 */
1813static int ocfs2_restore_xattr_block(struct inode *inode,
1814 struct ocfs2_xattr_search *xs)
1815{
1816 int ret;
1817 handle_t *handle;
1818 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1819 struct ocfs2_xattr_block *xb =
1820 (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1821 struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
1822 u16 xb_flags = le16_to_cpu(xb->xb_flags);
1823
1824 BUG_ON(!(xb_flags & OCFS2_XATTR_INDEXED) ||
1825 le16_to_cpu(el->l_next_free_rec) != 0);
1826
1827 handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
1828 if (IS_ERR(handle)) {
1829 ret = PTR_ERR(handle);
1830 handle = NULL;
1831 goto out;
1832 }
1833
1834 ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1835 OCFS2_JOURNAL_ACCESS_WRITE);
1836 if (ret < 0) {
1837 mlog_errno(ret);
1838 goto out_commit;
1839 }
1840
1841 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
1842 offsetof(struct ocfs2_xattr_block, xb_attrs));
1843
1844 xb->xb_flags = cpu_to_le16(xb_flags & ~OCFS2_XATTR_INDEXED);
1845
1846 ocfs2_journal_dirty(handle, xs->xattr_bh);
1847
1848out_commit:
1849 ocfs2_commit_trans(osb, handle);
1850out:
1851 return ret;
1852}
1853
1854/*
1855 * ocfs2_xattr_block_set()
1856 *
1857 * Set, replace or remove an extended attribute into external block.
1858 *
1859 */
1860static int ocfs2_xattr_block_set(struct inode *inode,
1861 struct ocfs2_xattr_info *xi,
1862 struct ocfs2_xattr_search *xs)
1863{
1864 struct buffer_head *new_bh = NULL;
1865 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1866 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1867 struct ocfs2_alloc_context *meta_ac = NULL;
1868 handle_t *handle = NULL;
1869 struct ocfs2_xattr_block *xblk = NULL;
1870 u16 suballoc_bit_start;
1871 u32 num_got;
1872 u64 first_blkno;
1873 int ret;
1874
1875 if (!xs->xattr_bh) {
1876 /*
1877 * Alloc one external block for extended attribute
1878 * outside of inode.
1879 */
1880 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
1881 if (ret < 0) {
1882 mlog_errno(ret);
1883 goto out;
1884 }
1885 handle = ocfs2_start_trans(osb,
1886 OCFS2_XATTR_BLOCK_CREATE_CREDITS);
1887 if (IS_ERR(handle)) {
1888 ret = PTR_ERR(handle);
1889 mlog_errno(ret);
1890 goto out;
1891 }
1892 ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
1893 OCFS2_JOURNAL_ACCESS_CREATE);
1894 if (ret < 0) {
1895 mlog_errno(ret);
1896 goto out_commit;
1897 }
1898
1899 ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
1900 &suballoc_bit_start, &num_got,
1901 &first_blkno);
1902 if (ret < 0) {
1903 mlog_errno(ret);
1904 goto out_commit;
1905 }
1906
1907 new_bh = sb_getblk(inode->i_sb, first_blkno);
1908 ocfs2_set_new_buffer_uptodate(inode, new_bh);
1909
1910 ret = ocfs2_journal_access(handle, inode, new_bh,
1911 OCFS2_JOURNAL_ACCESS_CREATE);
1912 if (ret < 0) {
1913 mlog_errno(ret);
1914 goto out_commit;
1915 }
1916
1917 /* Initialize ocfs2_xattr_block */
1918 xs->xattr_bh = new_bh;
1919 xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
1920 memset(xblk, 0, inode->i_sb->s_blocksize);
1921 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
1922 xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num);
1923 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
1924 xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
1925 xblk->xb_blkno = cpu_to_le64(first_blkno);
1926
1927 xs->header = &xblk->xb_attrs.xb_header;
1928 xs->base = (void *)xs->header;
1929 xs->end = (void *)xblk + inode->i_sb->s_blocksize;
1930 xs->here = xs->header->xh_entries;
1931
1932
1933 ret = ocfs2_journal_dirty(handle, new_bh);
1934 if (ret < 0) {
1935 mlog_errno(ret);
1936 goto out_commit;
1937 }
1938 di->i_xattr_loc = cpu_to_le64(first_blkno);
1939 ret = ocfs2_journal_dirty(handle, xs->inode_bh);
1940 if (ret < 0)
1941 mlog_errno(ret);
1942out_commit:
1943 ocfs2_commit_trans(osb, handle);
1944out:
1945 if (meta_ac)
1946 ocfs2_free_alloc_context(meta_ac);
1947 if (ret < 0)
1948 return ret;
1949 } else
1950 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1951
1952 if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
1953 /* Set extended attribute into external block */
1954 ret = ocfs2_xattr_set_entry(inode, xi, xs, OCFS2_HAS_XATTR_FL);
1955 if (!ret || ret != -ENOSPC)
1956 goto end;
1957
1958 ret = ocfs2_xattr_create_index_block(inode, xs);
1959 if (ret)
1960 goto end;
1961 }
1962
1963 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs);
1964 if (!ret && xblk->xb_attrs.xb_root.xt_list.l_next_free_rec == 0)
1965 ret = ocfs2_restore_xattr_block(inode, xs);
1966
1967end:
1968
1969 return ret;
1970}
1971
1972/*
1973 * ocfs2_xattr_set()
1974 *
1975 * Set, replace or remove an extended attribute for this inode.
1976 * value is NULL to remove an existing extended attribute, else either
1977 * create or replace an extended attribute.
1978 */
1979int ocfs2_xattr_set(struct inode *inode,
1980 int name_index,
1981 const char *name,
1982 const void *value,
1983 size_t value_len,
1984 int flags)
1985{
1986 struct buffer_head *di_bh = NULL;
1987 struct ocfs2_dinode *di;
1988 int ret;
1989 u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
1990
1991 struct ocfs2_xattr_info xi = {
1992 .name_index = name_index,
1993 .name = name,
1994 .value = value,
1995 .value_len = value_len,
1996 };
1997
1998 struct ocfs2_xattr_search xis = {
1999 .not_found = -ENODATA,
2000 };
2001
2002 struct ocfs2_xattr_search xbs = {
2003 .not_found = -ENODATA,
2004 };
2005
2006 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2007 return -EOPNOTSUPP;
2008
2009 ret = ocfs2_inode_lock(inode, &di_bh, 1);
2010 if (ret < 0) {
2011 mlog_errno(ret);
2012 return ret;
2013 }
2014 xis.inode_bh = xbs.inode_bh = di_bh;
2015 di = (struct ocfs2_dinode *)di_bh->b_data;
2016
2017 down_write(&OCFS2_I(inode)->ip_xattr_sem);
2018 /*
2019 * Scan inode and external block to find the same name
2020 * extended attribute and collect search infomation.
2021 */
2022 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
2023 if (ret)
2024 goto cleanup;
2025 if (xis.not_found) {
2026 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
2027 if (ret)
2028 goto cleanup;
2029 }
2030
2031 if (xis.not_found && xbs.not_found) {
2032 ret = -ENODATA;
2033 if (flags & XATTR_REPLACE)
2034 goto cleanup;
2035 ret = 0;
2036 if (!value)
2037 goto cleanup;
2038 } else {
2039 ret = -EEXIST;
2040 if (flags & XATTR_CREATE)
2041 goto cleanup;
2042 }
2043
2044 if (!value) {
2045 /* Remove existing extended attribute */
2046 if (!xis.not_found)
2047 ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
2048 else if (!xbs.not_found)
2049 ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
2050 } else {
2051 /* We always try to set extended attribute into inode first*/
2052 ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
2053 if (!ret && !xbs.not_found) {
2054 /*
2055 * If succeed and that extended attribute existing in
2056 * external block, then we will remove it.
2057 */
2058 xi.value = NULL;
2059 xi.value_len = 0;
2060 ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
2061 } else if (ret == -ENOSPC) {
2062 if (di->i_xattr_loc && !xbs.xattr_bh) {
2063 ret = ocfs2_xattr_block_find(inode, name_index,
2064 name, &xbs);
2065 if (ret)
2066 goto cleanup;
2067 }
2068 /*
2069 * If no space in inode, we will set extended attribute
2070 * into external block.
2071 */
2072 ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
2073 if (ret)
2074 goto cleanup;
2075 if (!xis.not_found) {
2076 /*
2077 * If succeed and that extended attribute
2078 * existing in inode, we will remove it.
2079 */
2080 xi.value = NULL;
2081 xi.value_len = 0;
2082 ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
2083 }
2084 }
2085 }
2086cleanup:
2087 up_write(&OCFS2_I(inode)->ip_xattr_sem);
2088 ocfs2_inode_unlock(inode, 1);
2089 brelse(di_bh);
2090 brelse(xbs.xattr_bh);
2091 for (i = 0; i < blk_per_bucket; i++)
2092 brelse(xbs.bucket.bhs[i]);
2093
2094 return ret;
2095}
2096
2097/*
2098 * Find the xattr extent rec which may contains name_hash.
2099 * e_cpos will be the first name hash of the xattr rec.
2100 * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
2101 */
2102static int ocfs2_xattr_get_rec(struct inode *inode,
2103 u32 name_hash,
2104 u64 *p_blkno,
2105 u32 *e_cpos,
2106 u32 *num_clusters,
2107 struct ocfs2_extent_list *el)
2108{
2109 int ret = 0, i;
2110 struct buffer_head *eb_bh = NULL;
2111 struct ocfs2_extent_block *eb;
2112 struct ocfs2_extent_rec *rec = NULL;
2113 u64 e_blkno = 0;
2114
2115 if (el->l_tree_depth) {
2116 ret = ocfs2_find_leaf(inode, el, name_hash, &eb_bh);
2117 if (ret) {
2118 mlog_errno(ret);
2119 goto out;
2120 }
2121
2122 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
2123 el = &eb->h_list;
2124
2125 if (el->l_tree_depth) {
2126 ocfs2_error(inode->i_sb,
2127 "Inode %lu has non zero tree depth in "
2128 "xattr tree block %llu\n", inode->i_ino,
2129 (unsigned long long)eb_bh->b_blocknr);
2130 ret = -EROFS;
2131 goto out;
2132 }
2133 }
2134
2135 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
2136 rec = &el->l_recs[i];
2137
2138 if (le32_to_cpu(rec->e_cpos) <= name_hash) {
2139 e_blkno = le64_to_cpu(rec->e_blkno);
2140 break;
2141 }
2142 }
2143
2144 if (!e_blkno) {
2145 ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
2146 "record (%u, %u, 0) in xattr", inode->i_ino,
2147 le32_to_cpu(rec->e_cpos),
2148 ocfs2_rec_clusters(el, rec));
2149 ret = -EROFS;
2150 goto out;
2151 }
2152
2153 *p_blkno = le64_to_cpu(rec->e_blkno);
2154 *num_clusters = le16_to_cpu(rec->e_leaf_clusters);
2155 if (e_cpos)
2156 *e_cpos = le32_to_cpu(rec->e_cpos);
2157out:
2158 brelse(eb_bh);
2159 return ret;
2160}
2161
2162typedef int (xattr_bucket_func)(struct inode *inode,
2163 struct ocfs2_xattr_bucket *bucket,
2164 void *para);
2165
2166static int ocfs2_find_xe_in_bucket(struct inode *inode,
2167 struct buffer_head *header_bh,
2168 int name_index,
2169 const char *name,
2170 u32 name_hash,
2171 u16 *xe_index,
2172 int *found)
2173{
2174 int i, ret = 0, cmp = 1, block_off, new_offset;
2175 struct ocfs2_xattr_header *xh =
2176 (struct ocfs2_xattr_header *)header_bh->b_data;
2177 size_t name_len = strlen(name);
2178 struct ocfs2_xattr_entry *xe = NULL;
2179 struct buffer_head *name_bh = NULL;
2180 char *xe_name;
2181
2182 /*
2183 * We don't use binary search in the bucket because there
2184 * may be multiple entries with the same name hash.
2185 */
2186 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
2187 xe = &xh->xh_entries[i];
2188
2189 if (name_hash > le32_to_cpu(xe->xe_name_hash))
2190 continue;
2191 else if (name_hash < le32_to_cpu(xe->xe_name_hash))
2192 break;
2193
2194 cmp = name_index - ocfs2_xattr_get_type(xe);
2195 if (!cmp)
2196 cmp = name_len - xe->xe_name_len;
2197 if (cmp)
2198 continue;
2199
2200 ret = ocfs2_xattr_bucket_get_name_value(inode,
2201 xh,
2202 i,
2203 &block_off,
2204 &new_offset);
2205 if (ret) {
2206 mlog_errno(ret);
2207 break;
2208 }
2209
2210 ret = ocfs2_read_block(inode, header_bh->b_blocknr + block_off,
2211 &name_bh);
2212 if (ret) {
2213 mlog_errno(ret);
2214 break;
2215 }
2216 xe_name = name_bh->b_data + new_offset;
2217
2218 cmp = memcmp(name, xe_name, name_len);
2219 brelse(name_bh);
2220 name_bh = NULL;
2221
2222 if (cmp == 0) {
2223 *xe_index = i;
2224 *found = 1;
2225 ret = 0;
2226 break;
2227 }
2228 }
2229
2230 return ret;
2231}
2232
2233/*
2234 * Find the specified xattr entry in a series of buckets.
2235 * This series start from p_blkno and last for num_clusters.
2236 * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
2237 * the num of the valid buckets.
2238 *
2239 * Return the buffer_head this xattr should reside in. And if the xattr's
2240 * hash is in the gap of 2 buckets, return the lower bucket.
2241 */
2242static int ocfs2_xattr_bucket_find(struct inode *inode,
2243 int name_index,
2244 const char *name,
2245 u32 name_hash,
2246 u64 p_blkno,
2247 u32 first_hash,
2248 u32 num_clusters,
2249 struct ocfs2_xattr_search *xs)
2250{
2251 int ret, found = 0;
2252 struct buffer_head *bh = NULL;
2253 struct buffer_head *lower_bh = NULL;
2254 struct ocfs2_xattr_header *xh = NULL;
2255 struct ocfs2_xattr_entry *xe = NULL;
2256 u16 index = 0;
2257 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2258 int low_bucket = 0, bucket, high_bucket;
2259 u32 last_hash;
2260 u64 blkno;
2261
2262 ret = ocfs2_read_block(inode, p_blkno, &bh);
2263 if (ret) {
2264 mlog_errno(ret);
2265 goto out;
2266 }
2267
2268 xh = (struct ocfs2_xattr_header *)bh->b_data;
2269 high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
2270
2271 while (low_bucket <= high_bucket) {
2272 brelse(bh);
2273 bh = NULL;
2274 bucket = (low_bucket + high_bucket) / 2;
2275
2276 blkno = p_blkno + bucket * blk_per_bucket;
2277
2278 ret = ocfs2_read_block(inode, blkno, &bh);
2279 if (ret) {
2280 mlog_errno(ret);
2281 goto out;
2282 }
2283
2284 xh = (struct ocfs2_xattr_header *)bh->b_data;
2285 xe = &xh->xh_entries[0];
2286 if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
2287 high_bucket = bucket - 1;
2288 continue;
2289 }
2290
2291 /*
2292 * Check whether the hash of the last entry in our
2293 * bucket is larger than the search one. for an empty
2294 * bucket, the last one is also the first one.
2295 */
2296 if (xh->xh_count)
2297 xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
2298
2299 last_hash = le32_to_cpu(xe->xe_name_hash);
2300
2301 /* record lower_bh which may be the insert place. */
2302 brelse(lower_bh);
2303 lower_bh = bh;
2304 bh = NULL;
2305
2306 if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
2307 low_bucket = bucket + 1;
2308 continue;
2309 }
2310
2311 /* the searched xattr should reside in this bucket if exists. */
2312 ret = ocfs2_find_xe_in_bucket(inode, lower_bh,
2313 name_index, name, name_hash,
2314 &index, &found);
2315 if (ret) {
2316 mlog_errno(ret);
2317 goto out;
2318 }
2319 break;
2320 }
2321
2322 /*
2323 * Record the bucket we have found.
2324 * When the xattr's hash value is in the gap of 2 buckets, we will
2325 * always set it to the previous bucket.
2326 */
2327 if (!lower_bh) {
2328 /*
2329 * We can't find any bucket whose first name_hash is less
2330 * than the find name_hash.
2331 */
2332 BUG_ON(bh->b_blocknr != p_blkno);
2333 lower_bh = bh;
2334 bh = NULL;
2335 }
2336 xs->bucket.bhs[0] = lower_bh;
2337 xs->bucket.xh = (struct ocfs2_xattr_header *)
2338 xs->bucket.bhs[0]->b_data;
2339 lower_bh = NULL;
2340
2341 xs->header = xs->bucket.xh;
2342 xs->base = xs->bucket.bhs[0]->b_data;
2343 xs->end = xs->base + inode->i_sb->s_blocksize;
2344
2345 if (found) {
2346 /*
2347 * If we have found the xattr enty, read all the blocks in
2348 * this bucket.
2349 */
2350 ret = ocfs2_read_blocks(inode, xs->bucket.bhs[0]->b_blocknr + 1,
2351 blk_per_bucket - 1, &xs->bucket.bhs[1],
2352 OCFS2_BH_CACHED);
2353 if (ret) {
2354 mlog_errno(ret);
2355 goto out;
2356 }
2357
2358 xs->here = &xs->header->xh_entries[index];
2359 mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
2360 (unsigned long long)xs->bucket.bhs[0]->b_blocknr, index);
2361 } else
2362 ret = -ENODATA;
2363
2364out:
2365 brelse(bh);
2366 brelse(lower_bh);
2367 return ret;
2368}
2369
2370static int ocfs2_xattr_index_block_find(struct inode *inode,
2371 struct buffer_head *root_bh,
2372 int name_index,
2373 const char *name,
2374 struct ocfs2_xattr_search *xs)
2375{
2376 int ret;
2377 struct ocfs2_xattr_block *xb =
2378 (struct ocfs2_xattr_block *)root_bh->b_data;
2379 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
2380 struct ocfs2_extent_list *el = &xb_root->xt_list;
2381 u64 p_blkno = 0;
2382 u32 first_hash, num_clusters = 0;
2383 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
2384
2385 if (le16_to_cpu(el->l_next_free_rec) == 0)
2386 return -ENODATA;
2387
2388 mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n",
2389 name, name_hash, name_index);
2390
2391 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
2392 &num_clusters, el);
2393 if (ret) {
2394 mlog_errno(ret);
2395 goto out;
2396 }
2397
2398 BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
2399
2400 mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
2401 "in the rec is %u\n", num_clusters, p_blkno, first_hash);
2402
2403 ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
2404 p_blkno, first_hash, num_clusters, xs);
2405
2406out:
2407 return ret;
2408}
2409
2410static int ocfs2_iterate_xattr_buckets(struct inode *inode,
2411 u64 blkno,
2412 u32 clusters,
2413 xattr_bucket_func *func,
2414 void *para)
2415{
2416 int i, j, ret = 0;
2417 int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2418 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
2419 u32 num_buckets = clusters * bpc;
2420 struct ocfs2_xattr_bucket bucket;
2421
2422 memset(&bucket, 0, sizeof(bucket));
2423
2424 mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
2425 clusters, blkno);
2426
2427 for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) {
2428 ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket,
2429 bucket.bhs, OCFS2_BH_CACHED);
2430 if (ret) {
2431 mlog_errno(ret);
2432 goto out;
2433 }
2434
2435 bucket.xh = (struct ocfs2_xattr_header *)bucket.bhs[0]->b_data;
2436 /*
2437 * The real bucket num in this series of blocks is stored
2438 * in the 1st bucket.
2439 */
2440 if (i == 0)
2441 num_buckets = le16_to_cpu(bucket.xh->xh_num_buckets);
2442
2443 mlog(0, "iterating xattr bucket %llu, first hash %u\n", blkno,
2444 le32_to_cpu(bucket.xh->xh_entries[0].xe_name_hash));
2445 if (func) {
2446 ret = func(inode, &bucket, para);
2447 if (ret) {
2448 mlog_errno(ret);
2449 break;
2450 }
2451 }
2452
2453 for (j = 0; j < blk_per_bucket; j++)
2454 brelse(bucket.bhs[j]);
2455 memset(&bucket, 0, sizeof(bucket));
2456 }
2457
2458out:
2459 for (j = 0; j < blk_per_bucket; j++)
2460 brelse(bucket.bhs[j]);
2461
2462 return ret;
2463}
2464
2465struct ocfs2_xattr_tree_list {
2466 char *buffer;
2467 size_t buffer_size;
2468 size_t result;
2469};
2470
2471static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
2472 struct ocfs2_xattr_header *xh,
2473 int index,
2474 int *block_off,
2475 int *new_offset)
2476{
2477 u16 name_offset;
2478
2479 if (index < 0 || index >= le16_to_cpu(xh->xh_count))
2480 return -EINVAL;
2481
2482 name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
2483
2484 *block_off = name_offset >> inode->i_sb->s_blocksize_bits;
2485 *new_offset = name_offset % inode->i_sb->s_blocksize;
2486
2487 return 0;
2488}
2489
2490static int ocfs2_list_xattr_bucket(struct inode *inode,
2491 struct ocfs2_xattr_bucket *bucket,
2492 void *para)
2493{
2494 int ret = 0, type;
2495 struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
2496 int i, block_off, new_offset;
2497 const char *prefix, *name;
2498
2499 for (i = 0 ; i < le16_to_cpu(bucket->xh->xh_count); i++) {
2500 struct ocfs2_xattr_entry *entry = &bucket->xh->xh_entries[i];
2501 type = ocfs2_xattr_get_type(entry);
2502 prefix = ocfs2_xattr_prefix(type);
2503
2504 if (prefix) {
2505 ret = ocfs2_xattr_bucket_get_name_value(inode,
2506 bucket->xh,
2507 i,
2508 &block_off,
2509 &new_offset);
2510 if (ret)
2511 break;
2512
2513 name = (const char *)bucket->bhs[block_off]->b_data +
2514 new_offset;
2515 ret = ocfs2_xattr_list_entry(xl->buffer,
2516 xl->buffer_size,
2517 &xl->result,
2518 prefix, name,
2519 entry->xe_name_len);
2520 if (ret)
2521 break;
2522 }
2523 }
2524
2525 return ret;
2526}
2527
2528static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
2529 struct ocfs2_xattr_tree_root *xt,
2530 char *buffer,
2531 size_t buffer_size)
2532{
2533 struct ocfs2_extent_list *el = &xt->xt_list;
2534 int ret = 0;
2535 u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
2536 u64 p_blkno = 0;
2537 struct ocfs2_xattr_tree_list xl = {
2538 .buffer = buffer,
2539 .buffer_size = buffer_size,
2540 .result = 0,
2541 };
2542
2543 if (le16_to_cpu(el->l_next_free_rec) == 0)
2544 return 0;
2545
2546 while (name_hash > 0) {
2547 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
2548 &e_cpos, &num_clusters, el);
2549 if (ret) {
2550 mlog_errno(ret);
2551 goto out;
2552 }
2553
2554 ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
2555 ocfs2_list_xattr_bucket,
2556 &xl);
2557 if (ret) {
2558 mlog_errno(ret);
2559 goto out;
2560 }
2561
2562 if (e_cpos == 0)
2563 break;
2564
2565 name_hash = e_cpos - 1;
2566 }
2567
2568 ret = xl.result;
2569out:
2570 return ret;
2571}
2572
2573static int cmp_xe(const void *a, const void *b)
2574{
2575 const struct ocfs2_xattr_entry *l = a, *r = b;
2576 u32 l_hash = le32_to_cpu(l->xe_name_hash);
2577 u32 r_hash = le32_to_cpu(r->xe_name_hash);
2578
2579 if (l_hash > r_hash)
2580 return 1;
2581 if (l_hash < r_hash)
2582 return -1;
2583 return 0;
2584}
2585
2586static void swap_xe(void *a, void *b, int size)
2587{
2588 struct ocfs2_xattr_entry *l = a, *r = b, tmp;
2589
2590 tmp = *l;
2591 memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
2592 memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
2593}
2594
2595/*
2596 * When the ocfs2_xattr_block is filled up, new bucket will be created
2597 * and all the xattr entries will be moved to the new bucket.
2598 * Note: we need to sort the entries since they are not saved in order
2599 * in the ocfs2_xattr_block.
2600 */
2601static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
2602 struct buffer_head *xb_bh,
2603 struct buffer_head *xh_bh,
2604 struct buffer_head *data_bh)
2605{
2606 int i, blocksize = inode->i_sb->s_blocksize;
2607 u16 offset, size, off_change;
2608 struct ocfs2_xattr_entry *xe;
2609 struct ocfs2_xattr_block *xb =
2610 (struct ocfs2_xattr_block *)xb_bh->b_data;
2611 struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
2612 struct ocfs2_xattr_header *xh =
2613 (struct ocfs2_xattr_header *)xh_bh->b_data;
2614 u16 count = le16_to_cpu(xb_xh->xh_count);
2615 char *target = xh_bh->b_data, *src = xb_bh->b_data;
2616
2617 mlog(0, "cp xattr from block %llu to bucket %llu\n",
2618 (unsigned long long)xb_bh->b_blocknr,
2619 (unsigned long long)xh_bh->b_blocknr);
2620
2621 memset(xh_bh->b_data, 0, blocksize);
2622 if (data_bh)
2623 memset(data_bh->b_data, 0, blocksize);
2624 /*
2625 * Since the xe_name_offset is based on ocfs2_xattr_header,
2626 * there is a offset change corresponding to the change of
2627 * ocfs2_xattr_header's position.
2628 */
2629 off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
2630 xe = &xb_xh->xh_entries[count - 1];
2631 offset = le16_to_cpu(xe->xe_name_offset) + off_change;
2632 size = blocksize - offset;
2633
2634 /* copy all the names and values. */
2635 if (data_bh)
2636 target = data_bh->b_data;
2637 memcpy(target + offset, src + offset, size);
2638
2639 /* Init new header now. */
2640 xh->xh_count = xb_xh->xh_count;
2641 xh->xh_num_buckets = cpu_to_le16(1);
2642 xh->xh_name_value_len = cpu_to_le16(size);
2643 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
2644
2645 /* copy all the entries. */
2646 target = xh_bh->b_data;
2647 offset = offsetof(struct ocfs2_xattr_header, xh_entries);
2648 size = count * sizeof(struct ocfs2_xattr_entry);
2649 memcpy(target + offset, (char *)xb_xh + offset, size);
2650
2651 /* Change the xe offset for all the xe because of the move. */
2652 off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
2653 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
2654 for (i = 0; i < count; i++)
2655 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
2656
2657 mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
2658 offset, size, off_change);
2659
2660 sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
2661 cmp_xe, swap_xe);
2662}
2663
2664/*
2665 * After we move xattr from block to index btree, we have to
2666 * update ocfs2_xattr_search to the new xe and base.
2667 *
2668 * When the entry is in xattr block, xattr_bh indicates the storage place.
2669 * While if the entry is in index b-tree, "bucket" indicates the
2670 * real place of the xattr.
2671 */
2672static int ocfs2_xattr_update_xattr_search(struct inode *inode,
2673 struct ocfs2_xattr_search *xs,
2674 struct buffer_head *old_bh,
2675 struct buffer_head *new_bh)
2676{
2677 int ret = 0;
2678 char *buf = old_bh->b_data;
2679 struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
2680 struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
2681 int i, blocksize = inode->i_sb->s_blocksize;
2682 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2683
2684 xs->bucket.bhs[0] = new_bh;
2685 get_bh(new_bh);
2686 xs->bucket.xh = (struct ocfs2_xattr_header *)xs->bucket.bhs[0]->b_data;
2687 xs->header = xs->bucket.xh;
2688
2689 xs->base = new_bh->b_data;
2690 xs->end = xs->base + inode->i_sb->s_blocksize;
2691
2692 if (!xs->not_found) {
2693 if (OCFS2_XATTR_BUCKET_SIZE != blocksize) {
2694 ret = ocfs2_read_blocks(inode,
2695 xs->bucket.bhs[0]->b_blocknr + 1,
2696 blk_per_bucket - 1, &xs->bucket.bhs[1],
2697 OCFS2_BH_CACHED);
2698 if (ret) {
2699 mlog_errno(ret);
2700 return ret;
2701 }
2702
2703 i = xs->here - old_xh->xh_entries;
2704 xs->here = &xs->header->xh_entries[i];
2705 }
2706 }
2707
2708 return ret;
2709}
2710
2711static int ocfs2_xattr_create_index_block(struct inode *inode,
2712 struct ocfs2_xattr_search *xs)
2713{
2714 int ret, credits = OCFS2_SUBALLOC_ALLOC;
2715 u32 bit_off, len;
2716 u64 blkno;
2717 handle_t *handle;
2718 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2719 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2720 struct ocfs2_alloc_context *data_ac;
2721 struct buffer_head *xh_bh = NULL, *data_bh = NULL;
2722 struct buffer_head *xb_bh = xs->xattr_bh;
2723 struct ocfs2_xattr_block *xb =
2724 (struct ocfs2_xattr_block *)xb_bh->b_data;
2725 struct ocfs2_xattr_tree_root *xr;
2726 u16 xb_flags = le16_to_cpu(xb->xb_flags);
2727 u16 bpb = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2728
2729 mlog(0, "create xattr index block for %llu\n",
2730 (unsigned long long)xb_bh->b_blocknr);
2731
2732 BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
2733
2734 ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
2735 if (ret) {
2736 mlog_errno(ret);
2737 goto out;
2738 }
2739
2740 /*
2741 * XXX:
2742 * We can use this lock for now, and maybe move to a dedicated mutex
2743 * if performance becomes a problem later.
2744 */
2745 down_write(&oi->ip_alloc_sem);
2746
2747 /*
2748 * 3 more credits, one for xattr block update, one for the 1st block
2749 * of the new xattr bucket and one for the value/data.
2750 */
2751 credits += 3;
2752 handle = ocfs2_start_trans(osb, credits);
2753 if (IS_ERR(handle)) {
2754 ret = PTR_ERR(handle);
2755 mlog_errno(ret);
2756 goto out_sem;
2757 }
2758
2759 ret = ocfs2_journal_access(handle, inode, xb_bh,
2760 OCFS2_JOURNAL_ACCESS_WRITE);
2761 if (ret) {
2762 mlog_errno(ret);
2763 goto out_commit;
2764 }
2765
2766 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len);
2767 if (ret) {
2768 mlog_errno(ret);
2769 goto out_commit;
2770 }
2771
2772 /*
2773 * The bucket may spread in many blocks, and
2774 * we will only touch the 1st block and the last block
2775 * in the whole bucket(one for entry and one for data).
2776 */
2777 blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
2778
2779 mlog(0, "allocate 1 cluster from %llu to xattr block\n", blkno);
2780
2781 xh_bh = sb_getblk(inode->i_sb, blkno);
2782 if (!xh_bh) {
2783 ret = -EIO;
2784 mlog_errno(ret);
2785 goto out_commit;
2786 }
2787
2788 ocfs2_set_new_buffer_uptodate(inode, xh_bh);
2789
2790 ret = ocfs2_journal_access(handle, inode, xh_bh,
2791 OCFS2_JOURNAL_ACCESS_CREATE);
2792 if (ret) {
2793 mlog_errno(ret);
2794 goto out_commit;
2795 }
2796
2797 if (bpb > 1) {
2798 data_bh = sb_getblk(inode->i_sb, blkno + bpb - 1);
2799 if (!data_bh) {
2800 ret = -EIO;
2801 mlog_errno(ret);
2802 goto out_commit;
2803 }
2804
2805 ocfs2_set_new_buffer_uptodate(inode, data_bh);
2806
2807 ret = ocfs2_journal_access(handle, inode, data_bh,
2808 OCFS2_JOURNAL_ACCESS_CREATE);
2809 if (ret) {
2810 mlog_errno(ret);
2811 goto out_commit;
2812 }
2813 }
2814
2815 ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xh_bh, data_bh);
2816
2817 ocfs2_journal_dirty(handle, xh_bh);
2818 if (data_bh)
2819 ocfs2_journal_dirty(handle, data_bh);
2820
2821 ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh);
2822
2823 /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
2824 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
2825 offsetof(struct ocfs2_xattr_block, xb_attrs));
2826
2827 xr = &xb->xb_attrs.xb_root;
2828 xr->xt_clusters = cpu_to_le32(1);
2829 xr->xt_last_eb_blk = 0;
2830 xr->xt_list.l_tree_depth = 0;
2831 xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
2832 xr->xt_list.l_next_free_rec = cpu_to_le16(1);
2833
2834 xr->xt_list.l_recs[0].e_cpos = 0;
2835 xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
2836 xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
2837
2838 xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
2839
2840 ret = ocfs2_journal_dirty(handle, xb_bh);
2841 if (ret) {
2842 mlog_errno(ret);
2843 goto out_commit;
2844 }
2845
2846out_commit:
2847 ocfs2_commit_trans(osb, handle);
2848
2849out_sem:
2850 up_write(&oi->ip_alloc_sem);
2851
2852out:
2853 if (data_ac)
2854 ocfs2_free_alloc_context(data_ac);
2855
2856 brelse(xh_bh);
2857 brelse(data_bh);
2858
2859 return ret;
2860}
2861
2862static int cmp_xe_offset(const void *a, const void *b)
2863{
2864 const struct ocfs2_xattr_entry *l = a, *r = b;
2865 u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
2866 u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
2867
2868 if (l_name_offset < r_name_offset)
2869 return 1;
2870 if (l_name_offset > r_name_offset)
2871 return -1;
2872 return 0;
2873}
2874
2875/*
2876 * defrag a xattr bucket if we find that the bucket has some
2877 * holes beteen name/value pairs.
2878 * We will move all the name/value pairs to the end of the bucket
2879 * so that we can spare some space for insertion.
2880 */
2881static int ocfs2_defrag_xattr_bucket(struct inode *inode,
2882 struct ocfs2_xattr_bucket *bucket)
2883{
2884 int ret, i;
2885 size_t end, offset, len, value_len;
2886 struct ocfs2_xattr_header *xh;
2887 char *entries, *buf, *bucket_buf = NULL;
2888 u64 blkno = bucket->bhs[0]->b_blocknr;
2889 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2890 u16 xh_free_start;
2891 size_t blocksize = inode->i_sb->s_blocksize;
2892 handle_t *handle;
2893 struct buffer_head **bhs;
2894 struct ocfs2_xattr_entry *xe;
2895
2896 bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
2897 GFP_NOFS);
2898 if (!bhs)
2899 return -ENOMEM;
2900
2901 ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, bhs,
2902 OCFS2_BH_CACHED);
2903 if (ret)
2904 goto out;
2905
2906 /*
2907 * In order to make the operation more efficient and generic,
2908 * we copy all the blocks into a contiguous memory and do the
2909 * defragment there, so if anything is error, we will not touch
2910 * the real block.
2911 */
2912 bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
2913 if (!bucket_buf) {
2914 ret = -EIO;
2915 goto out;
2916 }
2917
2918 buf = bucket_buf;
2919 for (i = 0; i < blk_per_bucket; i++, buf += blocksize)
2920 memcpy(buf, bhs[i]->b_data, blocksize);
2921
2922 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), blk_per_bucket);
2923 if (IS_ERR(handle)) {
2924 ret = PTR_ERR(handle);
2925 handle = NULL;
2926 mlog_errno(ret);
2927 goto out;
2928 }
2929
2930 for (i = 0; i < blk_per_bucket; i++) {
2931 ret = ocfs2_journal_access(handle, inode, bhs[i],
2932 OCFS2_JOURNAL_ACCESS_WRITE);
2933 if (ret < 0) {
2934 mlog_errno(ret);
2935 goto commit;
2936 }
2937 }
2938
2939 xh = (struct ocfs2_xattr_header *)bucket_buf;
2940 entries = (char *)xh->xh_entries;
2941 xh_free_start = le16_to_cpu(xh->xh_free_start);
2942
2943 mlog(0, "adjust xattr bucket in %llu, count = %u, "
2944 "xh_free_start = %u, xh_name_value_len = %u.\n",
2945 blkno, le16_to_cpu(xh->xh_count), xh_free_start,
2946 le16_to_cpu(xh->xh_name_value_len));
2947
2948 /*
2949 * sort all the entries by their offset.
2950 * the largest will be the first, so that we can
2951 * move them to the end one by one.
2952 */
2953 sort(entries, le16_to_cpu(xh->xh_count),
2954 sizeof(struct ocfs2_xattr_entry),
2955 cmp_xe_offset, swap_xe);
2956
2957 /* Move all name/values to the end of the bucket. */
2958 xe = xh->xh_entries;
2959 end = OCFS2_XATTR_BUCKET_SIZE;
2960 for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
2961 offset = le16_to_cpu(xe->xe_name_offset);
2962 if (ocfs2_xattr_is_local(xe))
2963 value_len = OCFS2_XATTR_SIZE(
2964 le64_to_cpu(xe->xe_value_size));
2965 else
2966 value_len = OCFS2_XATTR_ROOT_SIZE;
2967 len = OCFS2_XATTR_SIZE(xe->xe_name_len) + value_len;
2968
2969 /*
2970 * We must make sure that the name/value pair
2971 * exist in the same block. So adjust end to
2972 * the previous block end if needed.
2973 */
2974 if (((end - len) / blocksize !=
2975 (end - 1) / blocksize))
2976 end = end - end % blocksize;
2977
2978 if (end > offset + len) {
2979 memmove(bucket_buf + end - len,
2980 bucket_buf + offset, len);
2981 xe->xe_name_offset = cpu_to_le16(end - len);
2982 }
2983
2984 mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
2985 "bucket %llu\n", (unsigned long long)blkno);
2986
2987 end -= len;
2988 }
2989
2990 mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
2991 "bucket %llu\n", (unsigned long long)blkno);
2992
2993 if (xh_free_start == end)
2994 goto commit;
2995
2996 memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
2997 xh->xh_free_start = cpu_to_le16(end);
2998
2999 /* sort the entries by their name_hash. */
3000 sort(entries, le16_to_cpu(xh->xh_count),
3001 sizeof(struct ocfs2_xattr_entry),
3002 cmp_xe, swap_xe);
3003
3004 buf = bucket_buf;
3005 for (i = 0; i < blk_per_bucket; i++, buf += blocksize) {
3006 memcpy(bhs[i]->b_data, buf, blocksize);
3007 ocfs2_journal_dirty(handle, bhs[i]);
3008 }
3009
3010commit:
3011 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
3012out:
3013
3014 if (bhs) {
3015 for (i = 0; i < blk_per_bucket; i++)
3016 brelse(bhs[i]);
3017 }
3018 kfree(bhs);
3019
3020 kfree(bucket_buf);
3021 return ret;
3022}
3023
3024/*
3025 * Move half nums of the xattr bucket in the previous cluster to this new
3026 * cluster. We only touch the last cluster of the previous extend record.
3027 *
3028 * first_bh is the first buffer_head of a series of bucket in the same
3029 * extent rec and header_bh is the header of one bucket in this cluster.
3030 * They will be updated if we move the data header_bh contains to the new
3031 * cluster. first_hash will be set as the 1st xe's name_hash of the new cluster.
3032 */
3033static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
3034 handle_t *handle,
3035 struct buffer_head **first_bh,
3036 struct buffer_head **header_bh,
3037 u64 new_blkno,
3038 u64 prev_blkno,
3039 u32 num_clusters,
3040 u32 *first_hash)
3041{
3042 int i, ret, credits;
3043 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3044 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3045 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
3046 int blocksize = inode->i_sb->s_blocksize;
3047 struct buffer_head *old_bh, *new_bh, *prev_bh, *new_first_bh = NULL;
3048 struct ocfs2_xattr_header *new_xh;
3049 struct ocfs2_xattr_header *xh =
3050 (struct ocfs2_xattr_header *)((*first_bh)->b_data);
3051
3052 BUG_ON(le16_to_cpu(xh->xh_num_buckets) < num_buckets);
3053 BUG_ON(OCFS2_XATTR_BUCKET_SIZE == osb->s_clustersize);
3054
3055 prev_bh = *first_bh;
3056 get_bh(prev_bh);
3057 xh = (struct ocfs2_xattr_header *)prev_bh->b_data;
3058
3059 prev_blkno += (num_clusters - 1) * bpc + bpc / 2;
3060
3061 mlog(0, "move half of xattrs in cluster %llu to %llu\n",
3062 prev_blkno, new_blkno);
3063
3064 /*
3065 * We need to update the 1st half of the new cluster and
3066 * 1 more for the update of the 1st bucket of the previous
3067 * extent record.
3068 */
3069 credits = bpc / 2 + 1;
3070 ret = ocfs2_extend_trans(handle, credits);
3071 if (ret) {
3072 mlog_errno(ret);
3073 goto out;
3074 }
3075
3076 ret = ocfs2_journal_access(handle, inode, prev_bh,
3077 OCFS2_JOURNAL_ACCESS_WRITE);
3078 if (ret) {
3079 mlog_errno(ret);
3080 goto out;
3081 }
3082
3083 for (i = 0; i < bpc / 2; i++, prev_blkno++, new_blkno++) {
3084 old_bh = new_bh = NULL;
3085 new_bh = sb_getblk(inode->i_sb, new_blkno);
3086 if (!new_bh) {
3087 ret = -EIO;
3088 mlog_errno(ret);
3089 goto out;
3090 }
3091
3092 ocfs2_set_new_buffer_uptodate(inode, new_bh);
3093
3094 ret = ocfs2_journal_access(handle, inode, new_bh,
3095 OCFS2_JOURNAL_ACCESS_CREATE);
3096 if (ret < 0) {
3097 mlog_errno(ret);
3098 brelse(new_bh);
3099 goto out;
3100 }
3101
3102 ret = ocfs2_read_block(inode, prev_blkno, &old_bh);
3103 if (ret < 0) {
3104 mlog_errno(ret);
3105 brelse(new_bh);
3106 goto out;
3107 }
3108
3109 memcpy(new_bh->b_data, old_bh->b_data, blocksize);
3110
3111 if (i == 0) {
3112 new_xh = (struct ocfs2_xattr_header *)new_bh->b_data;
3113 new_xh->xh_num_buckets = cpu_to_le16(num_buckets / 2);
3114
3115 if (first_hash)
3116 *first_hash = le32_to_cpu(
3117 new_xh->xh_entries[0].xe_name_hash);
3118 new_first_bh = new_bh;
3119 get_bh(new_first_bh);
3120 }
3121
3122 ocfs2_journal_dirty(handle, new_bh);
3123
3124 if (*header_bh == old_bh) {
3125 brelse(*header_bh);
3126 *header_bh = new_bh;
3127 get_bh(*header_bh);
3128
3129 brelse(*first_bh);
3130 *first_bh = new_first_bh;
3131 get_bh(*first_bh);
3132 }
3133 brelse(new_bh);
3134 brelse(old_bh);
3135 }
3136
3137 le16_add_cpu(&xh->xh_num_buckets, -(num_buckets / 2));
3138
3139 ocfs2_journal_dirty(handle, prev_bh);
3140out:
3141 brelse(prev_bh);
3142 brelse(new_first_bh);
3143 return ret;
3144}
3145
3146static int ocfs2_read_xattr_bucket(struct inode *inode,
3147 u64 blkno,
3148 struct buffer_head **bhs,
3149 int new)
3150{
3151 int ret = 0;
3152 u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3153
3154 if (!new)
3155 return ocfs2_read_blocks(inode, blkno,
3156 blk_per_bucket, bhs,
3157 OCFS2_BH_CACHED);
3158
3159 for (i = 0; i < blk_per_bucket; i++) {
3160 bhs[i] = sb_getblk(inode->i_sb, blkno + i);
3161 if (bhs[i] == NULL) {
3162 ret = -EIO;
3163 mlog_errno(ret);
3164 break;
3165 }
3166 ocfs2_set_new_buffer_uptodate(inode, bhs[i]);
3167 }
3168
3169 return ret;
3170}
3171
3172/*
3173 * Move half num of the xattrs in old bucket(blk) to new bucket(new_blk).
3174 * first_hash will record the 1st hash of the new bucket.
3175 */
3176static int ocfs2_half_xattr_bucket(struct inode *inode,
3177 handle_t *handle,
3178 u64 blk,
3179 u64 new_blk,
3180 u32 *first_hash,
3181 int new_bucket_head)
3182{
3183 int ret, i;
3184 u16 count, start, len, name_value_len, xe_len, name_offset;
3185 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3186 struct buffer_head **s_bhs, **t_bhs = NULL;
3187 struct ocfs2_xattr_header *xh;
3188 struct ocfs2_xattr_entry *xe;
3189 int blocksize = inode->i_sb->s_blocksize;
3190
3191 mlog(0, "move half of xattrs from bucket %llu to %llu\n",
3192 blk, new_blk);
3193
3194 s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS);
3195 if (!s_bhs)
3196 return -ENOMEM;
3197
3198 ret = ocfs2_read_xattr_bucket(inode, blk, s_bhs, 0);
3199 if (ret) {
3200 mlog_errno(ret);
3201 goto out;
3202 }
3203
3204 ret = ocfs2_journal_access(handle, inode, s_bhs[0],
3205 OCFS2_JOURNAL_ACCESS_WRITE);
3206 if (ret) {
3207 mlog_errno(ret);
3208 goto out;
3209 }
3210
3211 t_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS);
3212 if (!t_bhs) {
3213 ret = -ENOMEM;
3214 goto out;
3215 }
3216
3217 ret = ocfs2_read_xattr_bucket(inode, new_blk, t_bhs, new_bucket_head);
3218 if (ret) {
3219 mlog_errno(ret);
3220 goto out;
3221 }
3222
3223 for (i = 0; i < blk_per_bucket; i++) {
3224 ret = ocfs2_journal_access(handle, inode, t_bhs[i],
3225 OCFS2_JOURNAL_ACCESS_CREATE);
3226 if (ret) {
3227 mlog_errno(ret);
3228 goto out;
3229 }
3230 }
3231
3232 /* copy the whole bucket to the new first. */
3233 for (i = 0; i < blk_per_bucket; i++)
3234 memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
3235
3236 /* update the new bucket. */
3237 xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data;
3238 count = le16_to_cpu(xh->xh_count);
3239 start = count / 2;
3240
3241 /*
3242 * Calculate the total name/value len and xh_free_start for
3243 * the old bucket first.
3244 */
3245 name_offset = OCFS2_XATTR_BUCKET_SIZE;
3246 name_value_len = 0;
3247 for (i = 0; i < start; i++) {
3248 xe = &xh->xh_entries[i];
3249 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3250 if (ocfs2_xattr_is_local(xe))
3251 xe_len +=
3252 OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3253 else
3254 xe_len += OCFS2_XATTR_ROOT_SIZE;
3255 name_value_len += xe_len;
3256 if (le16_to_cpu(xe->xe_name_offset) < name_offset)
3257 name_offset = le16_to_cpu(xe->xe_name_offset);
3258 }
3259
3260 /*
3261 * Now begin the modification to the new bucket.
3262 *
3263 * In the new bucket, We just move the xattr entry to the beginning
3264 * and don't touch the name/value. So there will be some holes in the
3265 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
3266 * called.
3267 */
3268 xe = &xh->xh_entries[start];
3269 len = sizeof(struct ocfs2_xattr_entry) * (count - start);
3270 mlog(0, "mv xattr entry len %d from %d to %d\n", len,
3271 (int)((char *)xe - (char *)xh),
3272 (int)((char *)xh->xh_entries - (char *)xh));
3273 memmove((char *)xh->xh_entries, (char *)xe, len);
3274 xe = &xh->xh_entries[count - start];
3275 len = sizeof(struct ocfs2_xattr_entry) * start;
3276 memset((char *)xe, 0, len);
3277
3278 le16_add_cpu(&xh->xh_count, -start);
3279 le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
3280
3281 /* Calculate xh_free_start for the new bucket. */
3282 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
3283 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3284 xe = &xh->xh_entries[i];
3285 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3286 if (ocfs2_xattr_is_local(xe))
3287 xe_len +=
3288 OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3289 else
3290 xe_len += OCFS2_XATTR_ROOT_SIZE;
3291 if (le16_to_cpu(xe->xe_name_offset) <
3292 le16_to_cpu(xh->xh_free_start))
3293 xh->xh_free_start = xe->xe_name_offset;
3294 }
3295
3296 /* set xh->xh_num_buckets for the new xh. */
3297 if (new_bucket_head)
3298 xh->xh_num_buckets = cpu_to_le16(1);
3299 else
3300 xh->xh_num_buckets = 0;
3301
3302 for (i = 0; i < blk_per_bucket; i++) {
3303 ocfs2_journal_dirty(handle, t_bhs[i]);
3304 if (ret)
3305 mlog_errno(ret);
3306 }
3307
3308 /* store the first_hash of the new bucket. */
3309 if (first_hash)
3310 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3311
3312 /*
3313 * Now only update the 1st block of the old bucket.
3314 * Please note that the entry has been sorted already above.
3315 */
3316 xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data;
3317 memset(&xh->xh_entries[start], 0,
3318 sizeof(struct ocfs2_xattr_entry) * (count - start));
3319 xh->xh_count = cpu_to_le16(start);
3320 xh->xh_free_start = cpu_to_le16(name_offset);
3321 xh->xh_name_value_len = cpu_to_le16(name_value_len);
3322
3323 ocfs2_journal_dirty(handle, s_bhs[0]);
3324 if (ret)
3325 mlog_errno(ret);
3326
3327out:
3328 if (s_bhs) {
3329 for (i = 0; i < blk_per_bucket; i++)
3330 brelse(s_bhs[i]);
3331 }
3332 kfree(s_bhs);
3333
3334 if (t_bhs) {
3335 for (i = 0; i < blk_per_bucket; i++)
3336 brelse(t_bhs[i]);
3337 }
3338 kfree(t_bhs);
3339
3340 return ret;
3341}
3342
3343/*
3344 * Copy xattr from one bucket to another bucket.
3345 *
3346 * The caller must make sure that the journal transaction
3347 * has enough space for journaling.
3348 */
3349static int ocfs2_cp_xattr_bucket(struct inode *inode,
3350 handle_t *handle,
3351 u64 s_blkno,
3352 u64 t_blkno,
3353 int t_is_new)
3354{
3355 int ret, i;
3356 int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3357 int blocksize = inode->i_sb->s_blocksize;
3358 struct buffer_head **s_bhs, **t_bhs = NULL;
3359
3360 BUG_ON(s_blkno == t_blkno);
3361
3362 mlog(0, "cp bucket %llu to %llu, target is %d\n",
3363 s_blkno, t_blkno, t_is_new);
3364
3365 s_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
3366 GFP_NOFS);
3367 if (!s_bhs)
3368 return -ENOMEM;
3369
3370 ret = ocfs2_read_xattr_bucket(inode, s_blkno, s_bhs, 0);
3371 if (ret)
3372 goto out;
3373
3374 t_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
3375 GFP_NOFS);
3376 if (!t_bhs) {
3377 ret = -ENOMEM;
3378 goto out;
3379 }
3380
3381 ret = ocfs2_read_xattr_bucket(inode, t_blkno, t_bhs, t_is_new);
3382 if (ret)
3383 goto out;
3384
3385 for (i = 0; i < blk_per_bucket; i++) {
3386 ret = ocfs2_journal_access(handle, inode, t_bhs[i],
3387 OCFS2_JOURNAL_ACCESS_WRITE);
3388 if (ret)
3389 goto out;
3390 }
3391
3392 for (i = 0; i < blk_per_bucket; i++) {
3393 memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
3394 ocfs2_journal_dirty(handle, t_bhs[i]);
3395 }
3396
3397out:
3398 if (s_bhs) {
3399 for (i = 0; i < blk_per_bucket; i++)
3400 brelse(s_bhs[i]);
3401 }
3402 kfree(s_bhs);
3403
3404 if (t_bhs) {
3405 for (i = 0; i < blk_per_bucket; i++)
3406 brelse(t_bhs[i]);
3407 }
3408 kfree(t_bhs);
3409
3410 return ret;
3411}
3412
3413/*
3414 * Copy one xattr cluster from src_blk to to_blk.
3415 * The to_blk will become the first bucket header of the cluster, so its
3416 * xh_num_buckets will be initialized as the bucket num in the cluster.
3417 */
3418static int ocfs2_cp_xattr_cluster(struct inode *inode,
3419 handle_t *handle,
3420 struct buffer_head *first_bh,
3421 u64 src_blk,
3422 u64 to_blk,
3423 u32 *first_hash)
3424{
3425 int i, ret, credits;
3426 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3427 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3428 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
3429 struct buffer_head *bh = NULL;
3430 struct ocfs2_xattr_header *xh;
3431 u64 to_blk_start = to_blk;
3432
3433 mlog(0, "cp xattrs from cluster %llu to %llu\n", src_blk, to_blk);
3434
3435 /*
3436 * We need to update the new cluster and 1 more for the update of
3437 * the 1st bucket of the previous extent rec.
3438 */
3439 credits = bpc + 1;
3440 ret = ocfs2_extend_trans(handle, credits);
3441 if (ret) {
3442 mlog_errno(ret);
3443 goto out;
3444 }
3445
3446 ret = ocfs2_journal_access(handle, inode, first_bh,
3447 OCFS2_JOURNAL_ACCESS_WRITE);
3448 if (ret) {
3449 mlog_errno(ret);
3450 goto out;
3451 }
3452
3453 for (i = 0; i < num_buckets; i++) {
3454 ret = ocfs2_cp_xattr_bucket(inode, handle,
3455 src_blk, to_blk, 1);
3456 if (ret) {
3457 mlog_errno(ret);
3458 goto out;
3459 }
3460
3461 src_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3462 to_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3463 }
3464
3465 /* update the old bucket header. */
3466 xh = (struct ocfs2_xattr_header *)first_bh->b_data;
3467 le16_add_cpu(&xh->xh_num_buckets, -num_buckets);
3468
3469 ocfs2_journal_dirty(handle, first_bh);
3470
3471 /* update the new bucket header. */
3472 ret = ocfs2_read_block(inode, to_blk_start, &bh);
3473 if (ret < 0) {
3474 mlog_errno(ret);
3475 goto out;
3476 }
3477
3478 ret = ocfs2_journal_access(handle, inode, bh,
3479 OCFS2_JOURNAL_ACCESS_WRITE);
3480 if (ret) {
3481 mlog_errno(ret);
3482 goto out;
3483 }
3484
3485 xh = (struct ocfs2_xattr_header *)bh->b_data;
3486 xh->xh_num_buckets = cpu_to_le16(num_buckets);
3487
3488 ocfs2_journal_dirty(handle, bh);
3489
3490 if (first_hash)
3491 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3492out:
3493 brelse(bh);
3494 return ret;
3495}
3496
3497/*
3498 * Move half of the xattrs in this cluster to the new cluster.
3499 * This function should only be called when bucket size == cluster size.
3500 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
3501 */
3502static int ocfs2_half_xattr_cluster(struct inode *inode,
3503 handle_t *handle,
3504 u64 prev_blk,
3505 u64 new_blk,
3506 u32 *first_hash)
3507{
3508 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3509 int ret, credits = 2 * blk_per_bucket;
3510
3511 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
3512
3513 ret = ocfs2_extend_trans(handle, credits);
3514 if (ret) {
3515 mlog_errno(ret);
3516 return ret;
3517 }
3518
3519 /* Move half of the xattr in start_blk to the next bucket. */
3520 return ocfs2_half_xattr_bucket(inode, handle, prev_blk,
3521 new_blk, first_hash, 1);
3522}
3523
3524/*
3525 * Move some xattrs from the old cluster to the new one since they are not
3526 * contiguous in ocfs2 xattr tree.
3527 *
3528 * new_blk starts a new separate cluster, and we will move some xattrs from
3529 * prev_blk to it. v_start will be set as the first name hash value in this
3530 * new cluster so that it can be used as e_cpos during tree insertion and
3531 * don't collide with our original b-tree operations. first_bh and header_bh
3532 * will also be updated since they will be used in ocfs2_extend_xattr_bucket
3533 * to extend the insert bucket.
3534 *
3535 * The problem is how much xattr should we move to the new one and when should
3536 * we update first_bh and header_bh?
3537 * 1. If cluster size > bucket size, that means the previous cluster has more
3538 * than 1 bucket, so just move half nums of bucket into the new cluster and
3539 * update the first_bh and header_bh if the insert bucket has been moved
3540 * to the new cluster.
3541 * 2. If cluster_size == bucket_size:
3542 * a) If the previous extent rec has more than one cluster and the insert
3543 * place isn't in the last cluster, copy the entire last cluster to the
3544 * new one. This time, we don't need to upate the first_bh and header_bh
3545 * since they will not be moved into the new cluster.
3546 * b) Otherwise, move the bottom half of the xattrs in the last cluster into
3547 * the new one. And we set the extend flag to zero if the insert place is
3548 * moved into the new allocated cluster since no extend is needed.
3549 */
3550static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
3551 handle_t *handle,
3552 struct buffer_head **first_bh,
3553 struct buffer_head **header_bh,
3554 u64 new_blk,
3555 u64 prev_blk,
3556 u32 prev_clusters,
3557 u32 *v_start,
3558 int *extend)
3559{
3560 int ret = 0;
3561 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3562
3563 mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
3564 prev_blk, prev_clusters, new_blk);
3565
3566 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1)
3567 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
3568 handle,
3569 first_bh,
3570 header_bh,
3571 new_blk,
3572 prev_blk,
3573 prev_clusters,
3574 v_start);
3575 else {
3576 u64 last_blk = prev_blk + bpc * (prev_clusters - 1);
3577
3578 if (prev_clusters > 1 && (*header_bh)->b_blocknr != last_blk)
3579 ret = ocfs2_cp_xattr_cluster(inode, handle, *first_bh,
3580 last_blk, new_blk,
3581 v_start);
3582 else {
3583 ret = ocfs2_half_xattr_cluster(inode, handle,
3584 last_blk, new_blk,
3585 v_start);
3586
3587 if ((*header_bh)->b_blocknr == last_blk && extend)
3588 *extend = 0;
3589 }
3590 }
3591
3592 return ret;
3593}
3594
3595/*
3596 * Add a new cluster for xattr storage.
3597 *
3598 * If the new cluster is contiguous with the previous one, it will be
3599 * appended to the same extent record, and num_clusters will be updated.
3600 * If not, we will insert a new extent for it and move some xattrs in
3601 * the last cluster into the new allocated one.
3602 * We also need to limit the maximum size of a btree leaf, otherwise we'll
3603 * lose the benefits of hashing because we'll have to search large leaves.
3604 * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
3605 * if it's bigger).
3606 *
3607 * first_bh is the first block of the previous extent rec and header_bh
3608 * indicates the bucket we will insert the new xattrs. They will be updated
3609 * when the header_bh is moved into the new cluster.
3610 */
3611static int ocfs2_add_new_xattr_cluster(struct inode *inode,
3612 struct buffer_head *root_bh,
3613 struct buffer_head **first_bh,
3614 struct buffer_head **header_bh,
3615 u32 *num_clusters,
3616 u32 prev_cpos,
3617 u64 prev_blkno,
3618 int *extend)
3619{
3620 int ret, credits;
3621 u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3622 u32 prev_clusters = *num_clusters;
3623 u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
3624 u64 block;
3625 handle_t *handle = NULL;
3626 struct ocfs2_alloc_context *data_ac = NULL;
3627 struct ocfs2_alloc_context *meta_ac = NULL;
3628 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3629 struct ocfs2_extent_tree et;
3630
3631 mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
3632 "previous xattr blkno = %llu\n",
3633 (unsigned long long)OCFS2_I(inode)->ip_blkno,
3634 prev_cpos, prev_blkno);
3635
3636 ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
3637
3638 ret = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
3639 &data_ac, &meta_ac);
3640 if (ret) {
3641 mlog_errno(ret);
3642 goto leave;
3643 }
3644
3645 credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el,
3646 clusters_to_add);
3647 handle = ocfs2_start_trans(osb, credits);
3648 if (IS_ERR(handle)) {
3649 ret = PTR_ERR(handle);
3650 handle = NULL;
3651 mlog_errno(ret);
3652 goto leave;
3653 }
3654
3655 ret = ocfs2_journal_access(handle, inode, root_bh,
3656 OCFS2_JOURNAL_ACCESS_WRITE);
3657 if (ret < 0) {
3658 mlog_errno(ret);
3659 goto leave;
3660 }
3661
3662 ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1,
3663 clusters_to_add, &bit_off, &num_bits);
3664 if (ret < 0) {
3665 if (ret != -ENOSPC)
3666 mlog_errno(ret);
3667 goto leave;
3668 }
3669
3670 BUG_ON(num_bits > clusters_to_add);
3671
3672 block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
3673 mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
3674 num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
3675
3676 if (prev_blkno + prev_clusters * bpc == block &&
3677 (prev_clusters + num_bits) << osb->s_clustersize_bits <=
3678 OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
3679 /*
3680 * If this cluster is contiguous with the old one and
3681 * adding this new cluster, we don't surpass the limit of
3682 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
3683 * initialized and used like other buckets in the previous
3684 * cluster.
3685 * So add it as a contiguous one. The caller will handle
3686 * its init process.
3687 */
3688 v_start = prev_cpos + prev_clusters;
3689 *num_clusters = prev_clusters + num_bits;
3690 mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
3691 num_bits);
3692 } else {
3693 ret = ocfs2_adjust_xattr_cross_cluster(inode,
3694 handle,
3695 first_bh,
3696 header_bh,
3697 block,
3698 prev_blkno,
3699 prev_clusters,
3700 &v_start,
3701 extend);
3702 if (ret) {
3703 mlog_errno(ret);
3704 goto leave;
3705 }
3706 }
3707
3708 if (handle->h_buffer_credits < credits) {
3709 /*
3710 * The journal has been restarted before, and don't
3711 * have enough space for the insertion, so extend it
3712 * here.
3713 */
3714 ret = ocfs2_extend_trans(handle, credits);
3715 if (ret) {
3716 mlog_errno(ret);
3717 goto leave;
3718 }
3719 }
3720 mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
3721 num_bits, block, v_start);
3722 ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block,
3723 num_bits, 0, meta_ac);
3724 if (ret < 0) {
3725 mlog_errno(ret);
3726 goto leave;
3727 }
3728
3729 ret = ocfs2_journal_dirty(handle, root_bh);
3730 if (ret < 0) {
3731 mlog_errno(ret);
3732 goto leave;
3733 }
3734
3735leave:
3736 if (handle)
3737 ocfs2_commit_trans(osb, handle);
3738 if (data_ac)
3739 ocfs2_free_alloc_context(data_ac);
3740 if (meta_ac)
3741 ocfs2_free_alloc_context(meta_ac);
3742
3743 return ret;
3744}
3745
3746/*
3747 * Extend a new xattr bucket and move xattrs to the end one by one until
3748 * We meet with start_bh. Only move half of the xattrs to the bucket after it.
3749 */
3750static int ocfs2_extend_xattr_bucket(struct inode *inode,
3751 struct buffer_head *first_bh,
3752 struct buffer_head *start_bh,
3753 u32 num_clusters)
3754{
3755 int ret, credits;
3756 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3757 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3758 u64 start_blk = start_bh->b_blocknr, end_blk;
3759 u32 num_buckets = num_clusters * ocfs2_xattr_buckets_per_cluster(osb);
3760 handle_t *handle;
3761 struct ocfs2_xattr_header *first_xh =
3762 (struct ocfs2_xattr_header *)first_bh->b_data;
3763 u16 bucket = le16_to_cpu(first_xh->xh_num_buckets);
3764
3765 mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
3766 "from %llu, len = %u\n", start_blk,
3767 (unsigned long long)first_bh->b_blocknr, num_clusters);
3768
3769 BUG_ON(bucket >= num_buckets);
3770
3771 end_blk = first_bh->b_blocknr + (bucket - 1) * blk_per_bucket;
3772
3773 /*
3774 * We will touch all the buckets after the start_bh(include it).
3775 * Add one more bucket and modify the first_bh.
3776 */
3777 credits = end_blk - start_blk + 2 * blk_per_bucket + 1;
3778 handle = ocfs2_start_trans(osb, credits);
3779 if (IS_ERR(handle)) {
3780 ret = PTR_ERR(handle);
3781 handle = NULL;
3782 mlog_errno(ret);
3783 goto out;
3784 }
3785
3786 ret = ocfs2_journal_access(handle, inode, first_bh,
3787 OCFS2_JOURNAL_ACCESS_WRITE);
3788 if (ret) {
3789 mlog_errno(ret);
3790 goto commit;
3791 }
3792
3793 while (end_blk != start_blk) {
3794 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
3795 end_blk + blk_per_bucket, 0);
3796 if (ret)
3797 goto commit;
3798 end_blk -= blk_per_bucket;
3799 }
3800
3801 /* Move half of the xattr in start_blk to the next bucket. */
3802 ret = ocfs2_half_xattr_bucket(inode, handle, start_blk,
3803 start_blk + blk_per_bucket, NULL, 0);
3804
3805 le16_add_cpu(&first_xh->xh_num_buckets, 1);
3806 ocfs2_journal_dirty(handle, first_bh);
3807
3808commit:
3809 ocfs2_commit_trans(osb, handle);
3810out:
3811 return ret;
3812}
3813
3814/*
3815 * Add new xattr bucket in an extent record and adjust the buckets accordingly.
3816 * xb_bh is the ocfs2_xattr_block.
3817 * We will move all the buckets starting from header_bh to the next place. As
3818 * for this one, half num of its xattrs will be moved to the next one.
3819 *
3820 * We will allocate a new cluster if current cluster is full and adjust
3821 * header_bh and first_bh if the insert place is moved to the new cluster.
3822 */
3823static int ocfs2_add_new_xattr_bucket(struct inode *inode,
3824 struct buffer_head *xb_bh,
3825 struct buffer_head *header_bh)
3826{
3827 struct ocfs2_xattr_header *first_xh = NULL;
3828 struct buffer_head *first_bh = NULL;
3829 struct ocfs2_xattr_block *xb =
3830 (struct ocfs2_xattr_block *)xb_bh->b_data;
3831 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3832 struct ocfs2_extent_list *el = &xb_root->xt_list;
3833 struct ocfs2_xattr_header *xh =
3834 (struct ocfs2_xattr_header *)header_bh->b_data;
3835 u32 name_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3836 struct super_block *sb = inode->i_sb;
3837 struct ocfs2_super *osb = OCFS2_SB(sb);
3838 int ret, num_buckets, extend = 1;
3839 u64 p_blkno;
3840 u32 e_cpos, num_clusters;
3841
3842 mlog(0, "Add new xattr bucket starting form %llu\n",
3843 (unsigned long long)header_bh->b_blocknr);
3844
3845 /*
3846 * Add refrence for header_bh here because it may be
3847 * changed in ocfs2_add_new_xattr_cluster and we need
3848 * to free it in the end.
3849 */
3850 get_bh(header_bh);
3851
3852 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
3853 &num_clusters, el);
3854 if (ret) {
3855 mlog_errno(ret);
3856 goto out;
3857 }
3858
3859 ret = ocfs2_read_block(inode, p_blkno, &first_bh);
3860 if (ret) {
3861 mlog_errno(ret);
3862 goto out;
3863 }
3864
3865 num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
3866 first_xh = (struct ocfs2_xattr_header *)first_bh->b_data;
3867
3868 if (num_buckets == le16_to_cpu(first_xh->xh_num_buckets)) {
3869 ret = ocfs2_add_new_xattr_cluster(inode,
3870 xb_bh,
3871 &first_bh,
3872 &header_bh,
3873 &num_clusters,
3874 e_cpos,
3875 p_blkno,
3876 &extend);
3877 if (ret) {
3878 mlog_errno(ret);
3879 goto out;
3880 }
3881 }
3882
3883 if (extend)
3884 ret = ocfs2_extend_xattr_bucket(inode,
3885 first_bh,
3886 header_bh,
3887 num_clusters);
3888 if (ret)
3889 mlog_errno(ret);
3890out:
3891 brelse(first_bh);
3892 brelse(header_bh);
3893 return ret;
3894}
3895
3896static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
3897 struct ocfs2_xattr_bucket *bucket,
3898 int offs)
3899{
3900 int block_off = offs >> inode->i_sb->s_blocksize_bits;
3901
3902 offs = offs % inode->i_sb->s_blocksize;
3903 return bucket->bhs[block_off]->b_data + offs;
3904}
3905
3906/*
3907 * Handle the normal xattr set, including replace, delete and new.
3908 *
3909 * Note: "local" indicates the real data's locality. So we can't
3910 * just its bucket locality by its length.
3911 */
3912static void ocfs2_xattr_set_entry_normal(struct inode *inode,
3913 struct ocfs2_xattr_info *xi,
3914 struct ocfs2_xattr_search *xs,
3915 u32 name_hash,
3916 int local)
3917{
3918 struct ocfs2_xattr_entry *last, *xe;
3919 int name_len = strlen(xi->name);
3920 struct ocfs2_xattr_header *xh = xs->header;
3921 u16 count = le16_to_cpu(xh->xh_count), start;
3922 size_t blocksize = inode->i_sb->s_blocksize;
3923 char *val;
3924 size_t offs, size, new_size;
3925
3926 last = &xh->xh_entries[count];
3927 if (!xs->not_found) {
3928 xe = xs->here;
3929 offs = le16_to_cpu(xe->xe_name_offset);
3930 if (ocfs2_xattr_is_local(xe))
3931 size = OCFS2_XATTR_SIZE(name_len) +
3932 OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3933 else
3934 size = OCFS2_XATTR_SIZE(name_len) +
3935 OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
3936
3937 /*
3938 * If the new value will be stored outside, xi->value has been
3939 * initalized as an empty ocfs2_xattr_value_root, and the same
3940 * goes with xi->value_len, so we can set new_size safely here.
3941 * See ocfs2_xattr_set_in_bucket.
3942 */
3943 new_size = OCFS2_XATTR_SIZE(name_len) +
3944 OCFS2_XATTR_SIZE(xi->value_len);
3945
3946 le16_add_cpu(&xh->xh_name_value_len, -size);
3947 if (xi->value) {
3948 if (new_size > size)
3949 goto set_new_name_value;
3950
3951 /* Now replace the old value with new one. */
3952 if (local)
3953 xe->xe_value_size = cpu_to_le64(xi->value_len);
3954 else
3955 xe->xe_value_size = 0;
3956
3957 val = ocfs2_xattr_bucket_get_val(inode,
3958 &xs->bucket, offs);
3959 memset(val + OCFS2_XATTR_SIZE(name_len), 0,
3960 size - OCFS2_XATTR_SIZE(name_len));
3961 if (OCFS2_XATTR_SIZE(xi->value_len) > 0)
3962 memcpy(val + OCFS2_XATTR_SIZE(name_len),
3963 xi->value, xi->value_len);
3964
3965 le16_add_cpu(&xh->xh_name_value_len, new_size);
3966 ocfs2_xattr_set_local(xe, local);
3967 return;
3968 } else {
3969 /*
3970 * Remove the old entry if there is more than one.
3971 * We don't remove the last entry so that we can
3972 * use it to indicate the hash value of the empty
3973 * bucket.
3974 */
3975 last -= 1;
3976 le16_add_cpu(&xh->xh_count, -1);
3977 if (xh->xh_count) {
3978 memmove(xe, xe + 1,
3979 (void *)last - (void *)xe);
3980 memset(last, 0,
3981 sizeof(struct ocfs2_xattr_entry));
3982 } else
3983 xh->xh_free_start =
3984 cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
3985
3986 return;
3987 }
3988 } else {
3989 /* find a new entry for insert. */
3990 int low = 0, high = count - 1, tmp;
3991 struct ocfs2_xattr_entry *tmp_xe;
3992
3993 while (low <= high && count) {
3994 tmp = (low + high) / 2;
3995 tmp_xe = &xh->xh_entries[tmp];
3996
3997 if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
3998 low = tmp + 1;
3999 else if (name_hash <
4000 le32_to_cpu(tmp_xe->xe_name_hash))
4001 high = tmp - 1;
4002 else {
4003 low = tmp;
4004 break;
4005 }
4006 }
4007
4008 xe = &xh->xh_entries[low];
4009 if (low != count)
4010 memmove(xe + 1, xe, (void *)last - (void *)xe);
4011
4012 le16_add_cpu(&xh->xh_count, 1);
4013 memset(xe, 0, sizeof(struct ocfs2_xattr_entry));
4014 xe->xe_name_hash = cpu_to_le32(name_hash);
4015 xe->xe_name_len = name_len;
4016 ocfs2_xattr_set_type(xe, xi->name_index);
4017 }
4018
4019set_new_name_value:
4020 /* Insert the new name+value. */
4021 size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(xi->value_len);
4022
4023 /*
4024 * We must make sure that the name/value pair
4025 * exists in the same block.
4026 */
4027 offs = le16_to_cpu(xh->xh_free_start);
4028 start = offs - size;
4029
4030 if (start >> inode->i_sb->s_blocksize_bits !=
4031 (offs - 1) >> inode->i_sb->s_blocksize_bits) {
4032 offs = offs - offs % blocksize;
4033 xh->xh_free_start = cpu_to_le16(offs);
4034 }
4035
4036 val = ocfs2_xattr_bucket_get_val(inode,
4037 &xs->bucket, offs - size);
4038 xe->xe_name_offset = cpu_to_le16(offs - size);
4039
4040 memset(val, 0, size);
4041 memcpy(val, xi->name, name_len);
4042 memcpy(val + OCFS2_XATTR_SIZE(name_len), xi->value, xi->value_len);
4043
4044 xe->xe_value_size = cpu_to_le64(xi->value_len);
4045 ocfs2_xattr_set_local(xe, local);
4046 xs->here = xe;
4047 le16_add_cpu(&xh->xh_free_start, -size);
4048 le16_add_cpu(&xh->xh_name_value_len, size);
4049
4050 return;
4051}
4052
4053static int ocfs2_xattr_bucket_handle_journal(struct inode *inode,
4054 handle_t *handle,
4055 struct ocfs2_xattr_search *xs,
4056 struct buffer_head **bhs,
4057 u16 bh_num)
4058{
4059 int ret = 0, off, block_off;
4060 struct ocfs2_xattr_entry *xe = xs->here;
4061
4062 /*
4063 * First calculate all the blocks we should journal_access
4064 * and journal_dirty. The first block should always be touched.
4065 */
4066 ret = ocfs2_journal_dirty(handle, bhs[0]);
4067 if (ret)
4068 mlog_errno(ret);
4069
4070 /* calc the data. */
4071 off = le16_to_cpu(xe->xe_name_offset);
4072 block_off = off >> inode->i_sb->s_blocksize_bits;
4073 ret = ocfs2_journal_dirty(handle, bhs[block_off]);
4074 if (ret)
4075 mlog_errno(ret);
4076
4077 return ret;
4078}
4079
4080/*
4081 * Set the xattr entry in the specified bucket.
4082 * The bucket is indicated by xs->bucket and it should have the enough
4083 * space for the xattr insertion.
4084 */
4085static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
4086 struct ocfs2_xattr_info *xi,
4087 struct ocfs2_xattr_search *xs,
4088 u32 name_hash,
4089 int local)
4090{
4091 int i, ret;
4092 handle_t *handle = NULL;
4093 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4094 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4095
4096 mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
4097 (unsigned long)xi->value_len, xi->name_index,
4098 (unsigned long long)xs->bucket.bhs[0]->b_blocknr);
4099
4100 if (!xs->bucket.bhs[1]) {
4101 ret = ocfs2_read_blocks(inode,
4102 xs->bucket.bhs[0]->b_blocknr + 1,
4103 blk_per_bucket - 1, &xs->bucket.bhs[1],
4104 OCFS2_BH_CACHED);
4105 if (ret) {
4106 mlog_errno(ret);
4107 goto out;
4108 }
4109 }
4110
4111 handle = ocfs2_start_trans(osb, blk_per_bucket);
4112 if (IS_ERR(handle)) {
4113 ret = PTR_ERR(handle);
4114 handle = NULL;
4115 mlog_errno(ret);
4116 goto out;
4117 }
4118
4119 for (i = 0; i < blk_per_bucket; i++) {
4120 ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[i],
4121 OCFS2_JOURNAL_ACCESS_WRITE);
4122 if (ret < 0) {
4123 mlog_errno(ret);
4124 goto out;
4125 }
4126 }
4127
4128 ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local);
4129
4130 /*Only dirty the blocks we have touched in set xattr. */
4131 ret = ocfs2_xattr_bucket_handle_journal(inode, handle, xs,
4132 xs->bucket.bhs, blk_per_bucket);
4133 if (ret)
4134 mlog_errno(ret);
4135out:
4136 ocfs2_commit_trans(osb, handle);
4137
4138 return ret;
4139}
4140
4141static int ocfs2_xattr_value_update_size(struct inode *inode,
4142 struct buffer_head *xe_bh,
4143 struct ocfs2_xattr_entry *xe,
4144 u64 new_size)
4145{
4146 int ret;
4147 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4148 handle_t *handle = NULL;
4149
4150 handle = ocfs2_start_trans(osb, 1);
4151 if (handle == NULL) {
4152 ret = -ENOMEM;
4153 mlog_errno(ret);
4154 goto out;
4155 }
4156
4157 ret = ocfs2_journal_access(handle, inode, xe_bh,
4158 OCFS2_JOURNAL_ACCESS_WRITE);
4159 if (ret < 0) {
4160 mlog_errno(ret);
4161 goto out_commit;
4162 }
4163
4164 xe->xe_value_size = cpu_to_le64(new_size);
4165
4166 ret = ocfs2_journal_dirty(handle, xe_bh);
4167 if (ret < 0)
4168 mlog_errno(ret);
4169
4170out_commit:
4171 ocfs2_commit_trans(osb, handle);
4172out:
4173 return ret;
4174}
4175
4176/*
4177 * Truncate the specified xe_off entry in xattr bucket.
4178 * bucket is indicated by header_bh and len is the new length.
4179 * Both the ocfs2_xattr_value_root and the entry will be updated here.
4180 *
4181 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
4182 */
4183static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
4184 struct buffer_head *header_bh,
4185 int xe_off,
4186 int len)
4187{
4188 int ret, offset;
4189 u64 value_blk;
4190 struct buffer_head *value_bh = NULL;
4191 struct ocfs2_xattr_value_root *xv;
4192 struct ocfs2_xattr_entry *xe;
4193 struct ocfs2_xattr_header *xh =
4194 (struct ocfs2_xattr_header *)header_bh->b_data;
4195 size_t blocksize = inode->i_sb->s_blocksize;
4196
4197 xe = &xh->xh_entries[xe_off];
4198
4199 BUG_ON(!xe || ocfs2_xattr_is_local(xe));
4200
4201 offset = le16_to_cpu(xe->xe_name_offset) +
4202 OCFS2_XATTR_SIZE(xe->xe_name_len);
4203
4204 value_blk = offset / blocksize;
4205
4206 /* We don't allow ocfs2_xattr_value to be stored in different block. */
4207 BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
4208 value_blk += header_bh->b_blocknr;
4209
4210 ret = ocfs2_read_block(inode, value_blk, &value_bh);
4211 if (ret) {
4212 mlog_errno(ret);
4213 goto out;
4214 }
4215
4216 xv = (struct ocfs2_xattr_value_root *)
4217 (value_bh->b_data + offset % blocksize);
4218
4219 mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
4220 xe_off, (unsigned long long)header_bh->b_blocknr, len);
4221 ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len);
4222 if (ret) {
4223 mlog_errno(ret);
4224 goto out;
4225 }
4226
4227 ret = ocfs2_xattr_value_update_size(inode, header_bh, xe, len);
4228 if (ret) {
4229 mlog_errno(ret);
4230 goto out;
4231 }
4232
4233out:
4234 brelse(value_bh);
4235 return ret;
4236}
4237
4238static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
4239 struct ocfs2_xattr_search *xs,
4240 int len)
4241{
4242 int ret, offset;
4243 struct ocfs2_xattr_entry *xe = xs->here;
4244 struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
4245
4246 BUG_ON(!xs->bucket.bhs[0] || !xe || ocfs2_xattr_is_local(xe));
4247
4248 offset = xe - xh->xh_entries;
4249 ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket.bhs[0],
4250 offset, len);
4251 if (ret)
4252 mlog_errno(ret);
4253
4254 return ret;
4255}
4256
4257static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
4258 struct ocfs2_xattr_search *xs,
4259 char *val,
4260 int value_len)
4261{
4262 int offset;
4263 struct ocfs2_xattr_value_root *xv;
4264 struct ocfs2_xattr_entry *xe = xs->here;
4265
4266 BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
4267
4268 offset = le16_to_cpu(xe->xe_name_offset) +
4269 OCFS2_XATTR_SIZE(xe->xe_name_len);
4270
4271 xv = (struct ocfs2_xattr_value_root *)(xs->base + offset);
4272
4273 return __ocfs2_xattr_set_value_outside(inode, xv, val, value_len);
4274}
4275
4276static int ocfs2_rm_xattr_cluster(struct inode *inode,
4277 struct buffer_head *root_bh,
4278 u64 blkno,
4279 u32 cpos,
4280 u32 len)
4281{
4282 int ret;
4283 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4284 struct inode *tl_inode = osb->osb_tl_inode;
4285 handle_t *handle;
4286 struct ocfs2_xattr_block *xb =
4287 (struct ocfs2_xattr_block *)root_bh->b_data;
4288 struct ocfs2_alloc_context *meta_ac = NULL;
4289 struct ocfs2_cached_dealloc_ctxt dealloc;
4290 struct ocfs2_extent_tree et;
4291
4292 ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
4293
4294 ocfs2_init_dealloc_ctxt(&dealloc);
4295
4296 mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
4297 cpos, len, (unsigned long long)blkno);
4298
4299 ocfs2_remove_xattr_clusters_from_cache(inode, blkno, len);
4300
4301 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
4302 if (ret) {
4303 mlog_errno(ret);
4304 return ret;
4305 }
4306
4307 mutex_lock(&tl_inode->i_mutex);
4308
4309 if (ocfs2_truncate_log_needs_flush(osb)) {
4310 ret = __ocfs2_flush_truncate_log(osb);
4311 if (ret < 0) {
4312 mlog_errno(ret);
4313 goto out;
4314 }
4315 }
4316
4317 handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
4318 if (handle == NULL) {
4319 ret = -ENOMEM;
4320 mlog_errno(ret);
4321 goto out;
4322 }
4323
4324 ret = ocfs2_journal_access(handle, inode, root_bh,
4325 OCFS2_JOURNAL_ACCESS_WRITE);
4326 if (ret) {
4327 mlog_errno(ret);
4328 goto out_commit;
4329 }
4330
4331 ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
4332 &dealloc);
4333 if (ret) {
4334 mlog_errno(ret);
4335 goto out_commit;
4336 }
4337
4338 le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
4339
4340 ret = ocfs2_journal_dirty(handle, root_bh);
4341 if (ret) {
4342 mlog_errno(ret);
4343 goto out_commit;
4344 }
4345
4346 ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
4347 if (ret)
4348 mlog_errno(ret);
4349
4350out_commit:
4351 ocfs2_commit_trans(osb, handle);
4352out:
4353 ocfs2_schedule_truncate_log_flush(osb, 1);
4354
4355 mutex_unlock(&tl_inode->i_mutex);
4356
4357 if (meta_ac)
4358 ocfs2_free_alloc_context(meta_ac);
4359
4360 ocfs2_run_deallocs(osb, &dealloc);
4361
4362 return ret;
4363}
4364
4365static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
4366 struct ocfs2_xattr_search *xs)
4367{
4368 handle_t *handle = NULL;
4369 struct ocfs2_xattr_header *xh = xs->bucket.xh;
4370 struct ocfs2_xattr_entry *last = &xh->xh_entries[
4371 le16_to_cpu(xh->xh_count) - 1];
4372 int ret = 0;
4373
4374 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 1);
4375 if (IS_ERR(handle)) {
4376 ret = PTR_ERR(handle);
4377 mlog_errno(ret);
4378 return;
4379 }
4380
4381 ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[0],
4382 OCFS2_JOURNAL_ACCESS_WRITE);
4383 if (ret) {
4384 mlog_errno(ret);
4385 goto out_commit;
4386 }
4387
4388 /* Remove the old entry. */
4389 memmove(xs->here, xs->here + 1,
4390 (void *)last - (void *)xs->here);
4391 memset(last, 0, sizeof(struct ocfs2_xattr_entry));
4392 le16_add_cpu(&xh->xh_count, -1);
4393
4394 ret = ocfs2_journal_dirty(handle, xs->bucket.bhs[0]);
4395 if (ret < 0)
4396 mlog_errno(ret);
4397out_commit:
4398 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
4399}
4400
4401/*
4402 * Set the xattr name/value in the bucket specified in xs.
4403 *
4404 * As the new value in xi may be stored in the bucket or in an outside cluster,
4405 * we divide the whole process into 3 steps:
4406 * 1. insert name/value in the bucket(ocfs2_xattr_set_entry_in_bucket)
4407 * 2. truncate of the outside cluster(ocfs2_xattr_bucket_value_truncate_xs)
4408 * 3. Set the value to the outside cluster(ocfs2_xattr_bucket_set_value_outside)
4409 * 4. If the clusters for the new outside value can't be allocated, we need
4410 * to free the xattr we allocated in set.
4411 */
4412static int ocfs2_xattr_set_in_bucket(struct inode *inode,
4413 struct ocfs2_xattr_info *xi,
4414 struct ocfs2_xattr_search *xs)
4415{
4416 int ret, local = 1;
4417 size_t value_len;
4418 char *val = (char *)xi->value;
4419 struct ocfs2_xattr_entry *xe = xs->here;
4420 u32 name_hash = ocfs2_xattr_name_hash(inode, xi->name,
4421 strlen(xi->name));
4422
4423 if (!xs->not_found && !ocfs2_xattr_is_local(xe)) {
4424 /*
4425 * We need to truncate the xattr storage first.
4426 *
4427 * If both the old and new value are stored to
4428 * outside block, we only need to truncate
4429 * the storage and then set the value outside.
4430 *
4431 * If the new value should be stored within block,
4432 * we should free all the outside block first and
4433 * the modification to the xattr block will be done
4434 * by following steps.
4435 */
4436 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
4437 value_len = xi->value_len;
4438 else
4439 value_len = 0;
4440
4441 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4442 value_len);
4443 if (ret)
4444 goto out;
4445
4446 if (value_len)
4447 goto set_value_outside;
4448 }
4449
4450 value_len = xi->value_len;
4451 /* So we have to handle the inside block change now. */
4452 if (value_len > OCFS2_XATTR_INLINE_SIZE) {
4453 /*
4454 * If the new value will be stored outside of block,
4455 * initalize a new empty value root and insert it first.
4456 */
4457 local = 0;
4458 xi->value = &def_xv;
4459 xi->value_len = OCFS2_XATTR_ROOT_SIZE;
4460 }
4461
4462 ret = ocfs2_xattr_set_entry_in_bucket(inode, xi, xs, name_hash, local);
4463 if (ret) {
4464 mlog_errno(ret);
4465 goto out;
4466 }
4467
4468 if (value_len <= OCFS2_XATTR_INLINE_SIZE)
4469 goto out;
4470
4471 /* allocate the space now for the outside block storage. */
4472 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4473 value_len);
4474 if (ret) {
4475 mlog_errno(ret);
4476
4477 if (xs->not_found) {
4478 /*
4479 * We can't allocate enough clusters for outside
4480 * storage and we have allocated xattr already,
4481 * so need to remove it.
4482 */
4483 ocfs2_xattr_bucket_remove_xs(inode, xs);
4484 }
4485 goto out;
4486 }
4487
4488set_value_outside:
4489 ret = ocfs2_xattr_bucket_set_value_outside(inode, xs, val, value_len);
4490out:
4491 return ret;
4492}
4493
4494/* check whether the xattr bucket is filled up with the same hash value. */
4495static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
4496 struct ocfs2_xattr_bucket *bucket)
4497{
4498 struct ocfs2_xattr_header *xh = bucket->xh;
4499
4500 if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
4501 xh->xh_entries[0].xe_name_hash) {
4502 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
4503 "hash = %u\n",
4504 (unsigned long long)bucket->bhs[0]->b_blocknr,
4505 le32_to_cpu(xh->xh_entries[0].xe_name_hash));
4506 return -ENOSPC;
4507 }
4508
4509 return 0;
4510}
4511
4512static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
4513 struct ocfs2_xattr_info *xi,
4514 struct ocfs2_xattr_search *xs)
4515{
4516 struct ocfs2_xattr_header *xh;
4517 struct ocfs2_xattr_entry *xe;
4518 u16 count, header_size, xh_free_start;
4519 int i, free, max_free, need, old;
4520 size_t value_size = 0, name_len = strlen(xi->name);
4521 size_t blocksize = inode->i_sb->s_blocksize;
4522 int ret, allocation = 0;
4523 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4524
4525 mlog_entry("Set xattr %s in xattr index block\n", xi->name);
4526
4527try_again:
4528 xh = xs->header;
4529 count = le16_to_cpu(xh->xh_count);
4530 xh_free_start = le16_to_cpu(xh->xh_free_start);
4531 header_size = sizeof(struct ocfs2_xattr_header) +
4532 count * sizeof(struct ocfs2_xattr_entry);
4533 max_free = OCFS2_XATTR_BUCKET_SIZE -
4534 le16_to_cpu(xh->xh_name_value_len) - header_size;
4535
4536 mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
4537 "of %u which exceed block size\n",
4538 (unsigned long long)xs->bucket.bhs[0]->b_blocknr,
4539 header_size);
4540
4541 if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE)
4542 value_size = OCFS2_XATTR_ROOT_SIZE;
4543 else if (xi->value)
4544 value_size = OCFS2_XATTR_SIZE(xi->value_len);
4545
4546 if (xs->not_found)
4547 need = sizeof(struct ocfs2_xattr_entry) +
4548 OCFS2_XATTR_SIZE(name_len) + value_size;
4549 else {
4550 need = value_size + OCFS2_XATTR_SIZE(name_len);
4551
4552 /*
4553 * We only replace the old value if the new length is smaller
4554 * than the old one. Otherwise we will allocate new space in the
4555 * bucket to store it.
4556 */
4557 xe = xs->here;
4558 if (ocfs2_xattr_is_local(xe))
4559 old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4560 else
4561 old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
4562
4563 if (old >= value_size)
4564 need = 0;
4565 }
4566
4567 free = xh_free_start - header_size;
4568 /*
4569 * We need to make sure the new name/value pair
4570 * can exist in the same block.
4571 */
4572 if (xh_free_start % blocksize < need)
4573 free -= xh_free_start % blocksize;
4574
4575 mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
4576 "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
4577 " %u\n", xs->not_found,
4578 (unsigned long long)xs->bucket.bhs[0]->b_blocknr,
4579 free, need, max_free, le16_to_cpu(xh->xh_free_start),
4580 le16_to_cpu(xh->xh_name_value_len));
4581
4582 if (free < need || count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
4583 if (need <= max_free &&
4584 count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
4585 /*
4586 * We can create the space by defragment. Since only the
4587 * name/value will be moved, the xe shouldn't be changed
4588 * in xs.
4589 */
4590 ret = ocfs2_defrag_xattr_bucket(inode, &xs->bucket);
4591 if (ret) {
4592 mlog_errno(ret);
4593 goto out;
4594 }
4595
4596 xh_free_start = le16_to_cpu(xh->xh_free_start);
4597 free = xh_free_start - header_size;
4598 if (xh_free_start % blocksize < need)
4599 free -= xh_free_start % blocksize;
4600
4601 if (free >= need)
4602 goto xattr_set;
4603
4604 mlog(0, "Can't get enough space for xattr insert by "
4605 "defragment. Need %u bytes, but we have %d, so "
4606 "allocate new bucket for it.\n", need, free);
4607 }
4608
4609 /*
4610 * We have to add new buckets or clusters and one
4611 * allocation should leave us enough space for insert.
4612 */
4613 BUG_ON(allocation);
4614
4615 /*
4616 * We do not allow for overlapping ranges between buckets. And
4617 * the maximum number of collisions we will allow for then is
4618 * one bucket's worth, so check it here whether we need to
4619 * add a new bucket for the insert.
4620 */
4621 ret = ocfs2_check_xattr_bucket_collision(inode, &xs->bucket);
4622 if (ret) {
4623 mlog_errno(ret);
4624 goto out;
4625 }
4626
4627 ret = ocfs2_add_new_xattr_bucket(inode,
4628 xs->xattr_bh,
4629 xs->bucket.bhs[0]);
4630 if (ret) {
4631 mlog_errno(ret);
4632 goto out;
4633 }
4634
4635 for (i = 0; i < blk_per_bucket; i++)
4636 brelse(xs->bucket.bhs[i]);
4637
4638 memset(&xs->bucket, 0, sizeof(xs->bucket));
4639
4640 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
4641 xi->name_index,
4642 xi->name, xs);
4643 if (ret && ret != -ENODATA)
4644 goto out;
4645 xs->not_found = ret;
4646 allocation = 1;
4647 goto try_again;
4648 }
4649
4650xattr_set:
4651 ret = ocfs2_xattr_set_in_bucket(inode, xi, xs);
4652out:
4653 mlog_exit(ret);
4654 return ret;
4655}
4656
4657static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
4658 struct ocfs2_xattr_bucket *bucket,
4659 void *para)
4660{
4661 int ret = 0;
4662 struct ocfs2_xattr_header *xh = bucket->xh;
4663 u16 i;
4664 struct ocfs2_xattr_entry *xe;
4665
4666 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4667 xe = &xh->xh_entries[i];
4668 if (ocfs2_xattr_is_local(xe))
4669 continue;
4670
4671 ret = ocfs2_xattr_bucket_value_truncate(inode,
4672 bucket->bhs[0],
4673 i, 0);
4674 if (ret) {
4675 mlog_errno(ret);
4676 break;
4677 }
4678 }
4679
4680 return ret;
4681}
4682
4683static int ocfs2_delete_xattr_index_block(struct inode *inode,
4684 struct buffer_head *xb_bh)
4685{
4686 struct ocfs2_xattr_block *xb =
4687 (struct ocfs2_xattr_block *)xb_bh->b_data;
4688 struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
4689 int ret = 0;
4690 u32 name_hash = UINT_MAX, e_cpos, num_clusters;
4691 u64 p_blkno;
4692
4693 if (le16_to_cpu(el->l_next_free_rec) == 0)
4694 return 0;
4695
4696 while (name_hash > 0) {
4697 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
4698 &e_cpos, &num_clusters, el);
4699 if (ret) {
4700 mlog_errno(ret);
4701 goto out;
4702 }
4703
4704 ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
4705 ocfs2_delete_xattr_in_bucket,
4706 NULL);
4707 if (ret) {
4708 mlog_errno(ret);
4709 goto out;
4710 }
4711
4712 ret = ocfs2_rm_xattr_cluster(inode, xb_bh,
4713 p_blkno, e_cpos, num_clusters);
4714 if (ret) {
4715 mlog_errno(ret);
4716 break;
4717 }
4718
4719 if (e_cpos == 0)
4720 break;
4721
4722 name_hash = e_cpos - 1;
4723 }
4724
4725out:
4726 return ret;
4727}
4728
4729/*
4730 * 'trusted' attributes support
4731 */
4732
4733#define XATTR_TRUSTED_PREFIX "trusted."
4734
4735static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
4736 size_t list_size, const char *name,
4737 size_t name_len)
4738{
4739 const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX) - 1;
4740 const size_t total_len = prefix_len + name_len + 1;
4741
4742 if (list && total_len <= list_size) {
4743 memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
4744 memcpy(list + prefix_len, name, name_len);
4745 list[prefix_len + name_len] = '\0';
4746 }
4747 return total_len;
4748}
4749
4750static int ocfs2_xattr_trusted_get(struct inode *inode, const char *name,
4751 void *buffer, size_t size)
4752{
4753 if (strcmp(name, "") == 0)
4754 return -EINVAL;
4755 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, name,
4756 buffer, size);
4757}
4758
4759static int ocfs2_xattr_trusted_set(struct inode *inode, const char *name,
4760 const void *value, size_t size, int flags)
4761{
4762 if (strcmp(name, "") == 0)
4763 return -EINVAL;
4764
4765 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, name, value,
4766 size, flags);
4767}
4768
4769struct xattr_handler ocfs2_xattr_trusted_handler = {
4770 .prefix = XATTR_TRUSTED_PREFIX,
4771 .list = ocfs2_xattr_trusted_list,
4772 .get = ocfs2_xattr_trusted_get,
4773 .set = ocfs2_xattr_trusted_set,
4774};
4775
4776
4777/*
4778 * 'user' attributes support
4779 */
4780
4781#define XATTR_USER_PREFIX "user."
4782
4783static size_t ocfs2_xattr_user_list(struct inode *inode, char *list,
4784 size_t list_size, const char *name,
4785 size_t name_len)
4786{
4787 const size_t prefix_len = sizeof(XATTR_USER_PREFIX) - 1;
4788 const size_t total_len = prefix_len + name_len + 1;
4789 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4790
4791 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
4792 return 0;
4793
4794 if (list && total_len <= list_size) {
4795 memcpy(list, XATTR_USER_PREFIX, prefix_len);
4796 memcpy(list + prefix_len, name, name_len);
4797 list[prefix_len + name_len] = '\0';
4798 }
4799 return total_len;
4800}
4801
4802static int ocfs2_xattr_user_get(struct inode *inode, const char *name,
4803 void *buffer, size_t size)
4804{
4805 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4806
4807 if (strcmp(name, "") == 0)
4808 return -EINVAL;
4809 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
4810 return -EOPNOTSUPP;
4811 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name,
4812 buffer, size);
4813}
4814
4815static int ocfs2_xattr_user_set(struct inode *inode, const char *name,
4816 const void *value, size_t size, int flags)
4817{
4818 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4819
4820 if (strcmp(name, "") == 0)
4821 return -EINVAL;
4822 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
4823 return -EOPNOTSUPP;
4824
4825 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, name, value,
4826 size, flags);
4827}
4828
4829struct xattr_handler ocfs2_xattr_user_handler = {
4830 .prefix = XATTR_USER_PREFIX,
4831 .list = ocfs2_xattr_user_list,
4832 .get = ocfs2_xattr_user_get,
4833 .set = ocfs2_xattr_user_set,
4834};
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
new file mode 100644
index 000000000000..c25c7c62a059
--- /dev/null
+++ b/fs/ocfs2/xattr.h
@@ -0,0 +1,68 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * xattr.h
5 *
6 * Function prototypes
7 *
8 * Copyright (C) 2008 Oracle. All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 021110-1307, USA.
24 */
25
26#ifndef OCFS2_XATTR_H
27#define OCFS2_XATTR_H
28
29#include <linux/init.h>
30#include <linux/xattr.h>
31
32enum ocfs2_xattr_type {
33 OCFS2_XATTR_INDEX_USER = 1,
34 OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS,
35 OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
36 OCFS2_XATTR_INDEX_TRUSTED,
37 OCFS2_XATTR_INDEX_SECURITY,
38 OCFS2_XATTR_MAX
39};
40
41extern struct xattr_handler ocfs2_xattr_user_handler;
42extern struct xattr_handler ocfs2_xattr_trusted_handler;
43
44extern ssize_t ocfs2_listxattr(struct dentry *, char *, size_t);
45extern int ocfs2_xattr_get(struct inode *, int, const char *, void *, size_t);
46extern int ocfs2_xattr_set(struct inode *, int, const char *, const void *,
47 size_t, int);
48extern int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh);
49extern struct xattr_handler *ocfs2_xattr_handlers[];
50
51static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
52{
53 return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
54}
55
56static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
57{
58 return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
59}
60
61static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
62{
63 u16 len = sb->s_blocksize -
64 offsetof(struct ocfs2_xattr_header, xh_entries);
65
66 return len / sizeof(struct ocfs2_xattr_entry);
67}
68#endif /* OCFS2_XATTR_H */