aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig34
-rw-r--r--fs/ocfs2/Makefile3
-rw-r--r--fs/ocfs2/alloc.c913
-rw-r--r--fs/ocfs2/alloc.h86
-rw-r--r--fs/ocfs2/aops.c60
-rw-r--r--fs/ocfs2/buffer_head_io.c134
-rw-r--r--fs/ocfs2/buffer_head_io.h23
-rw-r--r--fs/ocfs2/cluster/masklog.c1
-rw-r--r--fs/ocfs2/cluster/masklog.h1
-rw-r--r--fs/ocfs2/dir.c109
-rw-r--r--fs/ocfs2/dlmglue.c9
-rw-r--r--fs/ocfs2/extent_map.c70
-rw-r--r--fs/ocfs2/extent_map.h4
-rw-r--r--fs/ocfs2/file.c333
-rw-r--r--fs/ocfs2/file.h32
-rw-r--r--fs/ocfs2/inode.c87
-rw-r--r--fs/ocfs2/inode.h6
-rw-r--r--fs/ocfs2/ioctl.c3
-rw-r--r--fs/ocfs2/journal.c89
-rw-r--r--fs/ocfs2/journal.h52
-rw-r--r--fs/ocfs2/localalloc.c384
-rw-r--r--fs/ocfs2/localalloc.h4
-rw-r--r--fs/ocfs2/locks.c15
-rw-r--r--fs/ocfs2/locks.h1
-rw-r--r--fs/ocfs2/namei.c101
-rw-r--r--fs/ocfs2/ocfs2.h56
-rw-r--r--fs/ocfs2/ocfs2_fs.h220
-rw-r--r--fs/ocfs2/ocfs2_jbd_compat.h82
-rw-r--r--fs/ocfs2/resize.c11
-rw-r--r--fs/ocfs2/slot_map.c7
-rw-r--r--fs/ocfs2/stack_user.c33
-rw-r--r--fs/ocfs2/stackglue.c20
-rw-r--r--fs/ocfs2/stackglue.h19
-rw-r--r--fs/ocfs2/suballoc.c248
-rw-r--r--fs/ocfs2/suballoc.h26
-rw-r--r--fs/ocfs2/super.c62
-rw-r--r--fs/ocfs2/symlink.c18
-rw-r--r--fs/ocfs2/uptodate.c38
-rw-r--r--fs/ocfs2/uptodate.h3
-rw-r--r--fs/ocfs2/xattr.c4834
-rw-r--r--fs/ocfs2/xattr.h68
41 files changed, 7353 insertions, 946 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 501f012e0c6f..9e9d70c02a07 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -220,17 +220,16 @@ config JBD
220 tristate 220 tristate
221 help 221 help
222 This is a generic journalling layer for block devices. It is 222 This is a generic journalling layer for block devices. It is
223 currently used by the ext3 and OCFS2 file systems, but it could 223 currently used by the ext3 file system, but it could also be
224 also be used to add journal support to other file systems or block 224 used to add journal support to other file systems or block
225 devices such as RAID or LVM. 225 devices such as RAID or LVM.
226 226
227 If you are using the ext3 or OCFS2 file systems, you need to 227 If you are using the ext3 file system, you need to say Y here.
228 say Y here. If you are not using ext3 OCFS2 then you will probably 228 If you are not using ext3 then you will probably want to say N.
229 want to say N.
230 229
231 To compile this device as a module, choose M here: the module will be 230 To compile this device as a module, choose M here: the module will be
232 called jbd. If you are compiling ext3 or OCFS2 into the kernel, 231 called jbd. If you are compiling ext3 into the kernel, you
233 you cannot compile this code as a module. 232 cannot compile this code as a module.
234 233
235config JBD_DEBUG 234config JBD_DEBUG
236 bool "JBD (ext3) debugging support" 235 bool "JBD (ext3) debugging support"
@@ -254,15 +253,16 @@ config JBD2
254 help 253 help
255 This is a generic journaling layer for block devices that support 254 This is a generic journaling layer for block devices that support
256 both 32-bit and 64-bit block numbers. It is currently used by 255 both 32-bit and 64-bit block numbers. It is currently used by
257 the ext4 filesystem, but it could also be used to add 256 the ext4 and OCFS2 filesystems, but it could also be used to add
258 journal support to other file systems or block devices such 257 journal support to other file systems or block devices such
259 as RAID or LVM. 258 as RAID or LVM.
260 259
261 If you are using ext4, you need to say Y here. If you are not 260 If you are using ext4 or OCFS2, you need to say Y here.
262 using ext4 then you will probably want to say N. 261 If you are not using ext4 or OCFS2 then you will
262 probably want to say N.
263 263
264 To compile this device as a module, choose M here. The module will be 264 To compile this device as a module, choose M here. The module will be
265 called jbd2. If you are compiling ext4 into the kernel, 265 called jbd2. If you are compiling ext4 or OCFS2 into the kernel,
266 you cannot compile this code as a module. 266 you cannot compile this code as a module.
267 267
268config JBD2_DEBUG 268config JBD2_DEBUG
@@ -448,7 +448,7 @@ config OCFS2_FS
448 tristate "OCFS2 file system support" 448 tristate "OCFS2 file system support"
449 depends on NET && SYSFS 449 depends on NET && SYSFS
450 select CONFIGFS_FS 450 select CONFIGFS_FS
451 select JBD 451 select JBD2
452 select CRC32 452 select CRC32
453 help 453 help
454 OCFS2 is a general purpose extent based shared disk cluster file 454 OCFS2 is a general purpose extent based shared disk cluster file
@@ -519,6 +519,16 @@ config OCFS2_DEBUG_FS
519 this option for debugging only as it is likely to decrease 519 this option for debugging only as it is likely to decrease
520 performance of the filesystem. 520 performance of the filesystem.
521 521
522config OCFS2_COMPAT_JBD
523 bool "Use JBD for compatibility"
524 depends on OCFS2_FS
525 default n
526 select JBD
527 help
528 The ocfs2 filesystem now uses JBD2 for its journalling. JBD2
529 is backwards compatible with JBD. It is safe to say N here.
530 However, if you really want to use the original JBD, say Y here.
531
522endif # BLOCK 532endif # BLOCK
523 533
524config DNOTIFY 534config DNOTIFY
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index f6956de56fdb..589dcdfdfe3c 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -34,7 +34,8 @@ ocfs2-objs := \
34 symlink.o \ 34 symlink.o \
35 sysfile.o \ 35 sysfile.o \
36 uptodate.o \ 36 uptodate.o \
37 ver.o 37 ver.o \
38 xattr.o
38 39
39ocfs2_stackglue-objs := stackglue.o 40ocfs2_stackglue-objs := stackglue.o
40ocfs2_stack_o2cb-objs := stack_o2cb.o 41ocfs2_stack_o2cb-objs := stack_o2cb.o
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 29ff57ec5d1f..0cc2deb9394c 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -49,6 +49,340 @@
49 49
50#include "buffer_head_io.h" 50#include "buffer_head_io.h"
51 51
52
53/*
54 * Operations for a specific extent tree type.
55 *
56 * To implement an on-disk btree (extent tree) type in ocfs2, add
57 * an ocfs2_extent_tree_operations structure and the matching
58 * ocfs2_init_<thingy>_extent_tree() function. That's pretty much it
59 * for the allocation portion of the extent tree.
60 */
61struct ocfs2_extent_tree_operations {
62 /*
63 * last_eb_blk is the block number of the right most leaf extent
64 * block. Most on-disk structures containing an extent tree store
65 * this value for fast access. The ->eo_set_last_eb_blk() and
66 * ->eo_get_last_eb_blk() operations access this value. They are
67 * both required.
68 */
69 void (*eo_set_last_eb_blk)(struct ocfs2_extent_tree *et,
70 u64 blkno);
71 u64 (*eo_get_last_eb_blk)(struct ocfs2_extent_tree *et);
72
73 /*
74 * The on-disk structure usually keeps track of how many total
75 * clusters are stored in this extent tree. This function updates
76 * that value. new_clusters is the delta, and must be
77 * added to the total. Required.
78 */
79 void (*eo_update_clusters)(struct inode *inode,
80 struct ocfs2_extent_tree *et,
81 u32 new_clusters);
82
83 /*
84 * If ->eo_insert_check() exists, it is called before rec is
85 * inserted into the extent tree. It is optional.
86 */
87 int (*eo_insert_check)(struct inode *inode,
88 struct ocfs2_extent_tree *et,
89 struct ocfs2_extent_rec *rec);
90 int (*eo_sanity_check)(struct inode *inode, struct ocfs2_extent_tree *et);
91
92 /*
93 * --------------------------------------------------------------
94 * The remaining are internal to ocfs2_extent_tree and don't have
95 * accessor functions
96 */
97
98 /*
99 * ->eo_fill_root_el() takes et->et_object and sets et->et_root_el.
100 * It is required.
101 */
102 void (*eo_fill_root_el)(struct ocfs2_extent_tree *et);
103
104 /*
105 * ->eo_fill_max_leaf_clusters sets et->et_max_leaf_clusters if
106 * it exists. If it does not, et->et_max_leaf_clusters is set
107 * to 0 (unlimited). Optional.
108 */
109 void (*eo_fill_max_leaf_clusters)(struct inode *inode,
110 struct ocfs2_extent_tree *et);
111};
112
113
114/*
115 * Pre-declare ocfs2_dinode_et_ops so we can use it as a sanity check
116 * in the methods.
117 */
118static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et);
119static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et,
120 u64 blkno);
121static void ocfs2_dinode_update_clusters(struct inode *inode,
122 struct ocfs2_extent_tree *et,
123 u32 clusters);
124static int ocfs2_dinode_insert_check(struct inode *inode,
125 struct ocfs2_extent_tree *et,
126 struct ocfs2_extent_rec *rec);
127static int ocfs2_dinode_sanity_check(struct inode *inode,
128 struct ocfs2_extent_tree *et);
129static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et);
130static struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = {
131 .eo_set_last_eb_blk = ocfs2_dinode_set_last_eb_blk,
132 .eo_get_last_eb_blk = ocfs2_dinode_get_last_eb_blk,
133 .eo_update_clusters = ocfs2_dinode_update_clusters,
134 .eo_insert_check = ocfs2_dinode_insert_check,
135 .eo_sanity_check = ocfs2_dinode_sanity_check,
136 .eo_fill_root_el = ocfs2_dinode_fill_root_el,
137};
138
139static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et,
140 u64 blkno)
141{
142 struct ocfs2_dinode *di = et->et_object;
143
144 BUG_ON(et->et_ops != &ocfs2_dinode_et_ops);
145 di->i_last_eb_blk = cpu_to_le64(blkno);
146}
147
148static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et)
149{
150 struct ocfs2_dinode *di = et->et_object;
151
152 BUG_ON(et->et_ops != &ocfs2_dinode_et_ops);
153 return le64_to_cpu(di->i_last_eb_blk);
154}
155
156static void ocfs2_dinode_update_clusters(struct inode *inode,
157 struct ocfs2_extent_tree *et,
158 u32 clusters)
159{
160 struct ocfs2_dinode *di = et->et_object;
161
162 le32_add_cpu(&di->i_clusters, clusters);
163 spin_lock(&OCFS2_I(inode)->ip_lock);
164 OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters);
165 spin_unlock(&OCFS2_I(inode)->ip_lock);
166}
167
168static int ocfs2_dinode_insert_check(struct inode *inode,
169 struct ocfs2_extent_tree *et,
170 struct ocfs2_extent_rec *rec)
171{
172 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
173
174 BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
175 mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) &&
176 (OCFS2_I(inode)->ip_clusters != rec->e_cpos),
177 "Device %s, asking for sparse allocation: inode %llu, "
178 "cpos %u, clusters %u\n",
179 osb->dev_str,
180 (unsigned long long)OCFS2_I(inode)->ip_blkno,
181 rec->e_cpos,
182 OCFS2_I(inode)->ip_clusters);
183
184 return 0;
185}
186
187static int ocfs2_dinode_sanity_check(struct inode *inode,
188 struct ocfs2_extent_tree *et)
189{
190 int ret = 0;
191 struct ocfs2_dinode *di;
192
193 BUG_ON(et->et_ops != &ocfs2_dinode_et_ops);
194
195 di = et->et_object;
196 if (!OCFS2_IS_VALID_DINODE(di)) {
197 ret = -EIO;
198 ocfs2_error(inode->i_sb,
199 "Inode %llu has invalid path root",
200 (unsigned long long)OCFS2_I(inode)->ip_blkno);
201 }
202
203 return ret;
204}
205
206static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et)
207{
208 struct ocfs2_dinode *di = et->et_object;
209
210 et->et_root_el = &di->id2.i_list;
211}
212
213
214static void ocfs2_xattr_value_fill_root_el(struct ocfs2_extent_tree *et)
215{
216 struct ocfs2_xattr_value_root *xv = et->et_object;
217
218 et->et_root_el = &xv->xr_list;
219}
220
221static void ocfs2_xattr_value_set_last_eb_blk(struct ocfs2_extent_tree *et,
222 u64 blkno)
223{
224 struct ocfs2_xattr_value_root *xv =
225 (struct ocfs2_xattr_value_root *)et->et_object;
226
227 xv->xr_last_eb_blk = cpu_to_le64(blkno);
228}
229
230static u64 ocfs2_xattr_value_get_last_eb_blk(struct ocfs2_extent_tree *et)
231{
232 struct ocfs2_xattr_value_root *xv =
233 (struct ocfs2_xattr_value_root *) et->et_object;
234
235 return le64_to_cpu(xv->xr_last_eb_blk);
236}
237
238static void ocfs2_xattr_value_update_clusters(struct inode *inode,
239 struct ocfs2_extent_tree *et,
240 u32 clusters)
241{
242 struct ocfs2_xattr_value_root *xv =
243 (struct ocfs2_xattr_value_root *)et->et_object;
244
245 le32_add_cpu(&xv->xr_clusters, clusters);
246}
247
248static struct ocfs2_extent_tree_operations ocfs2_xattr_value_et_ops = {
249 .eo_set_last_eb_blk = ocfs2_xattr_value_set_last_eb_blk,
250 .eo_get_last_eb_blk = ocfs2_xattr_value_get_last_eb_blk,
251 .eo_update_clusters = ocfs2_xattr_value_update_clusters,
252 .eo_fill_root_el = ocfs2_xattr_value_fill_root_el,
253};
254
255static void ocfs2_xattr_tree_fill_root_el(struct ocfs2_extent_tree *et)
256{
257 struct ocfs2_xattr_block *xb = et->et_object;
258
259 et->et_root_el = &xb->xb_attrs.xb_root.xt_list;
260}
261
262static void ocfs2_xattr_tree_fill_max_leaf_clusters(struct inode *inode,
263 struct ocfs2_extent_tree *et)
264{
265 et->et_max_leaf_clusters =
266 ocfs2_clusters_for_bytes(inode->i_sb,
267 OCFS2_MAX_XATTR_TREE_LEAF_SIZE);
268}
269
270static void ocfs2_xattr_tree_set_last_eb_blk(struct ocfs2_extent_tree *et,
271 u64 blkno)
272{
273 struct ocfs2_xattr_block *xb = et->et_object;
274 struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
275
276 xt->xt_last_eb_blk = cpu_to_le64(blkno);
277}
278
279static u64 ocfs2_xattr_tree_get_last_eb_blk(struct ocfs2_extent_tree *et)
280{
281 struct ocfs2_xattr_block *xb = et->et_object;
282 struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
283
284 return le64_to_cpu(xt->xt_last_eb_blk);
285}
286
287static void ocfs2_xattr_tree_update_clusters(struct inode *inode,
288 struct ocfs2_extent_tree *et,
289 u32 clusters)
290{
291 struct ocfs2_xattr_block *xb = et->et_object;
292
293 le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, clusters);
294}
295
296static struct ocfs2_extent_tree_operations ocfs2_xattr_tree_et_ops = {
297 .eo_set_last_eb_blk = ocfs2_xattr_tree_set_last_eb_blk,
298 .eo_get_last_eb_blk = ocfs2_xattr_tree_get_last_eb_blk,
299 .eo_update_clusters = ocfs2_xattr_tree_update_clusters,
300 .eo_fill_root_el = ocfs2_xattr_tree_fill_root_el,
301 .eo_fill_max_leaf_clusters = ocfs2_xattr_tree_fill_max_leaf_clusters,
302};
303
304static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
305 struct inode *inode,
306 struct buffer_head *bh,
307 void *obj,
308 struct ocfs2_extent_tree_operations *ops)
309{
310 et->et_ops = ops;
311 et->et_root_bh = bh;
312 if (!obj)
313 obj = (void *)bh->b_data;
314 et->et_object = obj;
315
316 et->et_ops->eo_fill_root_el(et);
317 if (!et->et_ops->eo_fill_max_leaf_clusters)
318 et->et_max_leaf_clusters = 0;
319 else
320 et->et_ops->eo_fill_max_leaf_clusters(inode, et);
321}
322
323void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
324 struct inode *inode,
325 struct buffer_head *bh)
326{
327 __ocfs2_init_extent_tree(et, inode, bh, NULL, &ocfs2_dinode_et_ops);
328}
329
330void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
331 struct inode *inode,
332 struct buffer_head *bh)
333{
334 __ocfs2_init_extent_tree(et, inode, bh, NULL,
335 &ocfs2_xattr_tree_et_ops);
336}
337
338void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
339 struct inode *inode,
340 struct buffer_head *bh,
341 struct ocfs2_xattr_value_root *xv)
342{
343 __ocfs2_init_extent_tree(et, inode, bh, xv,
344 &ocfs2_xattr_value_et_ops);
345}
346
347static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et,
348 u64 new_last_eb_blk)
349{
350 et->et_ops->eo_set_last_eb_blk(et, new_last_eb_blk);
351}
352
353static inline u64 ocfs2_et_get_last_eb_blk(struct ocfs2_extent_tree *et)
354{
355 return et->et_ops->eo_get_last_eb_blk(et);
356}
357
358static inline void ocfs2_et_update_clusters(struct inode *inode,
359 struct ocfs2_extent_tree *et,
360 u32 clusters)
361{
362 et->et_ops->eo_update_clusters(inode, et, clusters);
363}
364
365static inline int ocfs2_et_insert_check(struct inode *inode,
366 struct ocfs2_extent_tree *et,
367 struct ocfs2_extent_rec *rec)
368{
369 int ret = 0;
370
371 if (et->et_ops->eo_insert_check)
372 ret = et->et_ops->eo_insert_check(inode, et, rec);
373 return ret;
374}
375
376static inline int ocfs2_et_sanity_check(struct inode *inode,
377 struct ocfs2_extent_tree *et)
378{
379 int ret = 0;
380
381 if (et->et_ops->eo_sanity_check)
382 ret = et->et_ops->eo_sanity_check(inode, et);
383 return ret;
384}
385
52static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); 386static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc);
53static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, 387static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
54 struct ocfs2_extent_block *eb); 388 struct ocfs2_extent_block *eb);
@@ -205,17 +539,6 @@ static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh,
205} 539}
206 540
207/* 541/*
208 * Allocate and initialize a new path based on a disk inode tree.
209 */
210static struct ocfs2_path *ocfs2_new_inode_path(struct buffer_head *di_bh)
211{
212 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
213 struct ocfs2_extent_list *el = &di->id2.i_list;
214
215 return ocfs2_new_path(di_bh, el);
216}
217
218/*
219 * Convenience function to journal all components in a path. 542 * Convenience function to journal all components in a path.
220 */ 543 */
221static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle, 544static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle,
@@ -368,39 +691,35 @@ struct ocfs2_merge_ctxt {
368 */ 691 */
369int ocfs2_num_free_extents(struct ocfs2_super *osb, 692int ocfs2_num_free_extents(struct ocfs2_super *osb,
370 struct inode *inode, 693 struct inode *inode,
371 struct ocfs2_dinode *fe) 694 struct ocfs2_extent_tree *et)
372{ 695{
373 int retval; 696 int retval;
374 struct ocfs2_extent_list *el; 697 struct ocfs2_extent_list *el = NULL;
375 struct ocfs2_extent_block *eb; 698 struct ocfs2_extent_block *eb;
376 struct buffer_head *eb_bh = NULL; 699 struct buffer_head *eb_bh = NULL;
700 u64 last_eb_blk = 0;
377 701
378 mlog_entry_void(); 702 mlog_entry_void();
379 703
380 if (!OCFS2_IS_VALID_DINODE(fe)) { 704 el = et->et_root_el;
381 OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); 705 last_eb_blk = ocfs2_et_get_last_eb_blk(et);
382 retval = -EIO;
383 goto bail;
384 }
385 706
386 if (fe->i_last_eb_blk) { 707 if (last_eb_blk) {
387 retval = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), 708 retval = ocfs2_read_block(inode, last_eb_blk,
388 &eb_bh, OCFS2_BH_CACHED, inode); 709 &eb_bh);
389 if (retval < 0) { 710 if (retval < 0) {
390 mlog_errno(retval); 711 mlog_errno(retval);
391 goto bail; 712 goto bail;
392 } 713 }
393 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 714 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
394 el = &eb->h_list; 715 el = &eb->h_list;
395 } else 716 }
396 el = &fe->id2.i_list;
397 717
398 BUG_ON(el->l_tree_depth != 0); 718 BUG_ON(el->l_tree_depth != 0);
399 719
400 retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec); 720 retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec);
401bail: 721bail:
402 if (eb_bh) 722 brelse(eb_bh);
403 brelse(eb_bh);
404 723
405 mlog_exit(retval); 724 mlog_exit(retval);
406 return retval; 725 return retval;
@@ -486,8 +805,7 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
486bail: 805bail:
487 if (status < 0) { 806 if (status < 0) {
488 for(i = 0; i < wanted; i++) { 807 for(i = 0; i < wanted; i++) {
489 if (bhs[i]) 808 brelse(bhs[i]);
490 brelse(bhs[i]);
491 bhs[i] = NULL; 809 bhs[i] = NULL;
492 } 810 }
493 } 811 }
@@ -531,7 +849,7 @@ static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list *el)
531static int ocfs2_add_branch(struct ocfs2_super *osb, 849static int ocfs2_add_branch(struct ocfs2_super *osb,
532 handle_t *handle, 850 handle_t *handle,
533 struct inode *inode, 851 struct inode *inode,
534 struct buffer_head *fe_bh, 852 struct ocfs2_extent_tree *et,
535 struct buffer_head *eb_bh, 853 struct buffer_head *eb_bh,
536 struct buffer_head **last_eb_bh, 854 struct buffer_head **last_eb_bh,
537 struct ocfs2_alloc_context *meta_ac) 855 struct ocfs2_alloc_context *meta_ac)
@@ -540,7 +858,6 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
540 u64 next_blkno, new_last_eb_blk; 858 u64 next_blkno, new_last_eb_blk;
541 struct buffer_head *bh; 859 struct buffer_head *bh;
542 struct buffer_head **new_eb_bhs = NULL; 860 struct buffer_head **new_eb_bhs = NULL;
543 struct ocfs2_dinode *fe;
544 struct ocfs2_extent_block *eb; 861 struct ocfs2_extent_block *eb;
545 struct ocfs2_extent_list *eb_el; 862 struct ocfs2_extent_list *eb_el;
546 struct ocfs2_extent_list *el; 863 struct ocfs2_extent_list *el;
@@ -550,13 +867,11 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
550 867
551 BUG_ON(!last_eb_bh || !*last_eb_bh); 868 BUG_ON(!last_eb_bh || !*last_eb_bh);
552 869
553 fe = (struct ocfs2_dinode *) fe_bh->b_data;
554
555 if (eb_bh) { 870 if (eb_bh) {
556 eb = (struct ocfs2_extent_block *) eb_bh->b_data; 871 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
557 el = &eb->h_list; 872 el = &eb->h_list;
558 } else 873 } else
559 el = &fe->id2.i_list; 874 el = et->et_root_el;
560 875
561 /* we never add a branch to a leaf. */ 876 /* we never add a branch to a leaf. */
562 BUG_ON(!el->l_tree_depth); 877 BUG_ON(!el->l_tree_depth);
@@ -646,7 +961,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
646 mlog_errno(status); 961 mlog_errno(status);
647 goto bail; 962 goto bail;
648 } 963 }
649 status = ocfs2_journal_access(handle, inode, fe_bh, 964 status = ocfs2_journal_access(handle, inode, et->et_root_bh,
650 OCFS2_JOURNAL_ACCESS_WRITE); 965 OCFS2_JOURNAL_ACCESS_WRITE);
651 if (status < 0) { 966 if (status < 0) {
652 mlog_errno(status); 967 mlog_errno(status);
@@ -662,7 +977,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
662 } 977 }
663 978
664 /* Link the new branch into the rest of the tree (el will 979 /* Link the new branch into the rest of the tree (el will
665 * either be on the fe, or the extent block passed in. */ 980 * either be on the root_bh, or the extent block passed in. */
666 i = le16_to_cpu(el->l_next_free_rec); 981 i = le16_to_cpu(el->l_next_free_rec);
667 el->l_recs[i].e_blkno = cpu_to_le64(next_blkno); 982 el->l_recs[i].e_blkno = cpu_to_le64(next_blkno);
668 el->l_recs[i].e_cpos = cpu_to_le32(new_cpos); 983 el->l_recs[i].e_cpos = cpu_to_le32(new_cpos);
@@ -671,7 +986,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
671 986
672 /* fe needs a new last extent block pointer, as does the 987 /* fe needs a new last extent block pointer, as does the
673 * next_leaf on the previously last-extent-block. */ 988 * next_leaf on the previously last-extent-block. */
674 fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk); 989 ocfs2_et_set_last_eb_blk(et, new_last_eb_blk);
675 990
676 eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data; 991 eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data;
677 eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk); 992 eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk);
@@ -679,7 +994,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
679 status = ocfs2_journal_dirty(handle, *last_eb_bh); 994 status = ocfs2_journal_dirty(handle, *last_eb_bh);
680 if (status < 0) 995 if (status < 0)
681 mlog_errno(status); 996 mlog_errno(status);
682 status = ocfs2_journal_dirty(handle, fe_bh); 997 status = ocfs2_journal_dirty(handle, et->et_root_bh);
683 if (status < 0) 998 if (status < 0)
684 mlog_errno(status); 999 mlog_errno(status);
685 if (eb_bh) { 1000 if (eb_bh) {
@@ -700,8 +1015,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
700bail: 1015bail:
701 if (new_eb_bhs) { 1016 if (new_eb_bhs) {
702 for (i = 0; i < new_blocks; i++) 1017 for (i = 0; i < new_blocks; i++)
703 if (new_eb_bhs[i]) 1018 brelse(new_eb_bhs[i]);
704 brelse(new_eb_bhs[i]);
705 kfree(new_eb_bhs); 1019 kfree(new_eb_bhs);
706 } 1020 }
707 1021
@@ -717,16 +1031,15 @@ bail:
717static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, 1031static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
718 handle_t *handle, 1032 handle_t *handle,
719 struct inode *inode, 1033 struct inode *inode,
720 struct buffer_head *fe_bh, 1034 struct ocfs2_extent_tree *et,
721 struct ocfs2_alloc_context *meta_ac, 1035 struct ocfs2_alloc_context *meta_ac,
722 struct buffer_head **ret_new_eb_bh) 1036 struct buffer_head **ret_new_eb_bh)
723{ 1037{
724 int status, i; 1038 int status, i;
725 u32 new_clusters; 1039 u32 new_clusters;
726 struct buffer_head *new_eb_bh = NULL; 1040 struct buffer_head *new_eb_bh = NULL;
727 struct ocfs2_dinode *fe;
728 struct ocfs2_extent_block *eb; 1041 struct ocfs2_extent_block *eb;
729 struct ocfs2_extent_list *fe_el; 1042 struct ocfs2_extent_list *root_el;
730 struct ocfs2_extent_list *eb_el; 1043 struct ocfs2_extent_list *eb_el;
731 1044
732 mlog_entry_void(); 1045 mlog_entry_void();
@@ -746,8 +1059,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
746 } 1059 }
747 1060
748 eb_el = &eb->h_list; 1061 eb_el = &eb->h_list;
749 fe = (struct ocfs2_dinode *) fe_bh->b_data; 1062 root_el = et->et_root_el;
750 fe_el = &fe->id2.i_list;
751 1063
752 status = ocfs2_journal_access(handle, inode, new_eb_bh, 1064 status = ocfs2_journal_access(handle, inode, new_eb_bh,
753 OCFS2_JOURNAL_ACCESS_CREATE); 1065 OCFS2_JOURNAL_ACCESS_CREATE);
@@ -756,11 +1068,11 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
756 goto bail; 1068 goto bail;
757 } 1069 }
758 1070
759 /* copy the fe data into the new extent block */ 1071 /* copy the root extent list data into the new extent block */
760 eb_el->l_tree_depth = fe_el->l_tree_depth; 1072 eb_el->l_tree_depth = root_el->l_tree_depth;
761 eb_el->l_next_free_rec = fe_el->l_next_free_rec; 1073 eb_el->l_next_free_rec = root_el->l_next_free_rec;
762 for(i = 0; i < le16_to_cpu(fe_el->l_next_free_rec); i++) 1074 for (i = 0; i < le16_to_cpu(root_el->l_next_free_rec); i++)
763 eb_el->l_recs[i] = fe_el->l_recs[i]; 1075 eb_el->l_recs[i] = root_el->l_recs[i];
764 1076
765 status = ocfs2_journal_dirty(handle, new_eb_bh); 1077 status = ocfs2_journal_dirty(handle, new_eb_bh);
766 if (status < 0) { 1078 if (status < 0) {
@@ -768,7 +1080,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
768 goto bail; 1080 goto bail;
769 } 1081 }
770 1082
771 status = ocfs2_journal_access(handle, inode, fe_bh, 1083 status = ocfs2_journal_access(handle, inode, et->et_root_bh,
772 OCFS2_JOURNAL_ACCESS_WRITE); 1084 OCFS2_JOURNAL_ACCESS_WRITE);
773 if (status < 0) { 1085 if (status < 0) {
774 mlog_errno(status); 1086 mlog_errno(status);
@@ -777,21 +1089,21 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
777 1089
778 new_clusters = ocfs2_sum_rightmost_rec(eb_el); 1090 new_clusters = ocfs2_sum_rightmost_rec(eb_el);
779 1091
780 /* update fe now */ 1092 /* update root_bh now */
781 le16_add_cpu(&fe_el->l_tree_depth, 1); 1093 le16_add_cpu(&root_el->l_tree_depth, 1);
782 fe_el->l_recs[0].e_cpos = 0; 1094 root_el->l_recs[0].e_cpos = 0;
783 fe_el->l_recs[0].e_blkno = eb->h_blkno; 1095 root_el->l_recs[0].e_blkno = eb->h_blkno;
784 fe_el->l_recs[0].e_int_clusters = cpu_to_le32(new_clusters); 1096 root_el->l_recs[0].e_int_clusters = cpu_to_le32(new_clusters);
785 for(i = 1; i < le16_to_cpu(fe_el->l_next_free_rec); i++) 1097 for (i = 1; i < le16_to_cpu(root_el->l_next_free_rec); i++)
786 memset(&fe_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec)); 1098 memset(&root_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec));
787 fe_el->l_next_free_rec = cpu_to_le16(1); 1099 root_el->l_next_free_rec = cpu_to_le16(1);
788 1100
789 /* If this is our 1st tree depth shift, then last_eb_blk 1101 /* If this is our 1st tree depth shift, then last_eb_blk
790 * becomes the allocated extent block */ 1102 * becomes the allocated extent block */
791 if (fe_el->l_tree_depth == cpu_to_le16(1)) 1103 if (root_el->l_tree_depth == cpu_to_le16(1))
792 fe->i_last_eb_blk = eb->h_blkno; 1104 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
793 1105
794 status = ocfs2_journal_dirty(handle, fe_bh); 1106 status = ocfs2_journal_dirty(handle, et->et_root_bh);
795 if (status < 0) { 1107 if (status < 0) {
796 mlog_errno(status); 1108 mlog_errno(status);
797 goto bail; 1109 goto bail;
@@ -801,8 +1113,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
801 new_eb_bh = NULL; 1113 new_eb_bh = NULL;
802 status = 0; 1114 status = 0;
803bail: 1115bail:
804 if (new_eb_bh) 1116 brelse(new_eb_bh);
805 brelse(new_eb_bh);
806 1117
807 mlog_exit(status); 1118 mlog_exit(status);
808 return status; 1119 return status;
@@ -817,22 +1128,21 @@ bail:
817 * 1) a lowest extent block is found, then we pass it back in 1128 * 1) a lowest extent block is found, then we pass it back in
818 * *lowest_eb_bh and return '0' 1129 * *lowest_eb_bh and return '0'
819 * 1130 *
820 * 2) the search fails to find anything, but the dinode has room. We 1131 * 2) the search fails to find anything, but the root_el has room. We
821 * pass NULL back in *lowest_eb_bh, but still return '0' 1132 * pass NULL back in *lowest_eb_bh, but still return '0'
822 * 1133 *
823 * 3) the search fails to find anything AND the dinode is full, in 1134 * 3) the search fails to find anything AND the root_el is full, in
824 * which case we return > 0 1135 * which case we return > 0
825 * 1136 *
826 * return status < 0 indicates an error. 1137 * return status < 0 indicates an error.
827 */ 1138 */
828static int ocfs2_find_branch_target(struct ocfs2_super *osb, 1139static int ocfs2_find_branch_target(struct ocfs2_super *osb,
829 struct inode *inode, 1140 struct inode *inode,
830 struct buffer_head *fe_bh, 1141 struct ocfs2_extent_tree *et,
831 struct buffer_head **target_bh) 1142 struct buffer_head **target_bh)
832{ 1143{
833 int status = 0, i; 1144 int status = 0, i;
834 u64 blkno; 1145 u64 blkno;
835 struct ocfs2_dinode *fe;
836 struct ocfs2_extent_block *eb; 1146 struct ocfs2_extent_block *eb;
837 struct ocfs2_extent_list *el; 1147 struct ocfs2_extent_list *el;
838 struct buffer_head *bh = NULL; 1148 struct buffer_head *bh = NULL;
@@ -842,8 +1152,7 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
842 1152
843 *target_bh = NULL; 1153 *target_bh = NULL;
844 1154
845 fe = (struct ocfs2_dinode *) fe_bh->b_data; 1155 el = et->et_root_el;
846 el = &fe->id2.i_list;
847 1156
848 while(le16_to_cpu(el->l_tree_depth) > 1) { 1157 while(le16_to_cpu(el->l_tree_depth) > 1) {
849 if (le16_to_cpu(el->l_next_free_rec) == 0) { 1158 if (le16_to_cpu(el->l_next_free_rec) == 0) {
@@ -864,13 +1173,10 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
864 goto bail; 1173 goto bail;
865 } 1174 }
866 1175
867 if (bh) { 1176 brelse(bh);
868 brelse(bh); 1177 bh = NULL;
869 bh = NULL;
870 }
871 1178
872 status = ocfs2_read_block(osb, blkno, &bh, OCFS2_BH_CACHED, 1179 status = ocfs2_read_block(inode, blkno, &bh);
873 inode);
874 if (status < 0) { 1180 if (status < 0) {
875 mlog_errno(status); 1181 mlog_errno(status);
876 goto bail; 1182 goto bail;
@@ -886,8 +1192,7 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
886 1192
887 if (le16_to_cpu(el->l_next_free_rec) < 1193 if (le16_to_cpu(el->l_next_free_rec) <
888 le16_to_cpu(el->l_count)) { 1194 le16_to_cpu(el->l_count)) {
889 if (lowest_bh) 1195 brelse(lowest_bh);
890 brelse(lowest_bh);
891 lowest_bh = bh; 1196 lowest_bh = bh;
892 get_bh(lowest_bh); 1197 get_bh(lowest_bh);
893 } 1198 }
@@ -895,14 +1200,13 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
895 1200
896 /* If we didn't find one and the fe doesn't have any room, 1201 /* If we didn't find one and the fe doesn't have any room,
897 * then return '1' */ 1202 * then return '1' */
898 if (!lowest_bh 1203 el = et->et_root_el;
899 && (fe->id2.i_list.l_next_free_rec == fe->id2.i_list.l_count)) 1204 if (!lowest_bh && (el->l_next_free_rec == el->l_count))
900 status = 1; 1205 status = 1;
901 1206
902 *target_bh = lowest_bh; 1207 *target_bh = lowest_bh;
903bail: 1208bail:
904 if (bh) 1209 brelse(bh);
905 brelse(bh);
906 1210
907 mlog_exit(status); 1211 mlog_exit(status);
908 return status; 1212 return status;
@@ -919,19 +1223,19 @@ bail:
919 * *last_eb_bh will be updated by ocfs2_add_branch(). 1223 * *last_eb_bh will be updated by ocfs2_add_branch().
920 */ 1224 */
921static int ocfs2_grow_tree(struct inode *inode, handle_t *handle, 1225static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
922 struct buffer_head *di_bh, int *final_depth, 1226 struct ocfs2_extent_tree *et, int *final_depth,
923 struct buffer_head **last_eb_bh, 1227 struct buffer_head **last_eb_bh,
924 struct ocfs2_alloc_context *meta_ac) 1228 struct ocfs2_alloc_context *meta_ac)
925{ 1229{
926 int ret, shift; 1230 int ret, shift;
927 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 1231 struct ocfs2_extent_list *el = et->et_root_el;
928 int depth = le16_to_cpu(di->id2.i_list.l_tree_depth); 1232 int depth = le16_to_cpu(el->l_tree_depth);
929 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1233 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
930 struct buffer_head *bh = NULL; 1234 struct buffer_head *bh = NULL;
931 1235
932 BUG_ON(meta_ac == NULL); 1236 BUG_ON(meta_ac == NULL);
933 1237
934 shift = ocfs2_find_branch_target(osb, inode, di_bh, &bh); 1238 shift = ocfs2_find_branch_target(osb, inode, et, &bh);
935 if (shift < 0) { 1239 if (shift < 0) {
936 ret = shift; 1240 ret = shift;
937 mlog_errno(ret); 1241 mlog_errno(ret);
@@ -948,7 +1252,7 @@ static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
948 /* ocfs2_shift_tree_depth will return us a buffer with 1252 /* ocfs2_shift_tree_depth will return us a buffer with
949 * the new extent block (so we can pass that to 1253 * the new extent block (so we can pass that to
950 * ocfs2_add_branch). */ 1254 * ocfs2_add_branch). */
951 ret = ocfs2_shift_tree_depth(osb, handle, inode, di_bh, 1255 ret = ocfs2_shift_tree_depth(osb, handle, inode, et,
952 meta_ac, &bh); 1256 meta_ac, &bh);
953 if (ret < 0) { 1257 if (ret < 0) {
954 mlog_errno(ret); 1258 mlog_errno(ret);
@@ -975,7 +1279,7 @@ static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
975 /* call ocfs2_add_branch to add the final part of the tree with 1279 /* call ocfs2_add_branch to add the final part of the tree with
976 * the new data. */ 1280 * the new data. */
977 mlog(0, "add branch. bh = %p\n", bh); 1281 mlog(0, "add branch. bh = %p\n", bh);
978 ret = ocfs2_add_branch(osb, handle, inode, di_bh, bh, last_eb_bh, 1282 ret = ocfs2_add_branch(osb, handle, inode, et, bh, last_eb_bh,
979 meta_ac); 1283 meta_ac);
980 if (ret < 0) { 1284 if (ret < 0) {
981 mlog_errno(ret); 1285 mlog_errno(ret);
@@ -1236,8 +1540,7 @@ static int __ocfs2_find_path(struct inode *inode,
1236 1540
1237 brelse(bh); 1541 brelse(bh);
1238 bh = NULL; 1542 bh = NULL;
1239 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno, 1543 ret = ocfs2_read_block(inode, blkno, &bh);
1240 &bh, OCFS2_BH_CACHED, inode);
1241 if (ret) { 1544 if (ret) {
1242 mlog_errno(ret); 1545 mlog_errno(ret);
1243 goto out; 1546 goto out;
@@ -2058,11 +2361,11 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2058 struct ocfs2_path *right_path, 2361 struct ocfs2_path *right_path,
2059 int subtree_index, 2362 int subtree_index,
2060 struct ocfs2_cached_dealloc_ctxt *dealloc, 2363 struct ocfs2_cached_dealloc_ctxt *dealloc,
2061 int *deleted) 2364 int *deleted,
2365 struct ocfs2_extent_tree *et)
2062{ 2366{
2063 int ret, i, del_right_subtree = 0, right_has_empty = 0; 2367 int ret, i, del_right_subtree = 0, right_has_empty = 0;
2064 struct buffer_head *root_bh, *di_bh = path_root_bh(right_path); 2368 struct buffer_head *root_bh, *et_root_bh = path_root_bh(right_path);
2065 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2066 struct ocfs2_extent_list *right_leaf_el, *left_leaf_el; 2369 struct ocfs2_extent_list *right_leaf_el, *left_leaf_el;
2067 struct ocfs2_extent_block *eb; 2370 struct ocfs2_extent_block *eb;
2068 2371
@@ -2114,7 +2417,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2114 * We have to update i_last_eb_blk during the meta 2417 * We have to update i_last_eb_blk during the meta
2115 * data delete. 2418 * data delete.
2116 */ 2419 */
2117 ret = ocfs2_journal_access(handle, inode, di_bh, 2420 ret = ocfs2_journal_access(handle, inode, et_root_bh,
2118 OCFS2_JOURNAL_ACCESS_WRITE); 2421 OCFS2_JOURNAL_ACCESS_WRITE);
2119 if (ret) { 2422 if (ret) {
2120 mlog_errno(ret); 2423 mlog_errno(ret);
@@ -2189,7 +2492,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2189 ocfs2_update_edge_lengths(inode, handle, left_path); 2492 ocfs2_update_edge_lengths(inode, handle, left_path);
2190 2493
2191 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; 2494 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
2192 di->i_last_eb_blk = eb->h_blkno; 2495 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
2193 2496
2194 /* 2497 /*
2195 * Removal of the extent in the left leaf was skipped 2498 * Removal of the extent in the left leaf was skipped
@@ -2199,7 +2502,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2199 if (right_has_empty) 2502 if (right_has_empty)
2200 ocfs2_remove_empty_extent(left_leaf_el); 2503 ocfs2_remove_empty_extent(left_leaf_el);
2201 2504
2202 ret = ocfs2_journal_dirty(handle, di_bh); 2505 ret = ocfs2_journal_dirty(handle, et_root_bh);
2203 if (ret) 2506 if (ret)
2204 mlog_errno(ret); 2507 mlog_errno(ret);
2205 2508
@@ -2322,7 +2625,8 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
2322 handle_t *handle, int orig_credits, 2625 handle_t *handle, int orig_credits,
2323 struct ocfs2_path *path, 2626 struct ocfs2_path *path,
2324 struct ocfs2_cached_dealloc_ctxt *dealloc, 2627 struct ocfs2_cached_dealloc_ctxt *dealloc,
2325 struct ocfs2_path **empty_extent_path) 2628 struct ocfs2_path **empty_extent_path,
2629 struct ocfs2_extent_tree *et)
2326{ 2630{
2327 int ret, subtree_root, deleted; 2631 int ret, subtree_root, deleted;
2328 u32 right_cpos; 2632 u32 right_cpos;
@@ -2395,7 +2699,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
2395 2699
2396 ret = ocfs2_rotate_subtree_left(inode, handle, left_path, 2700 ret = ocfs2_rotate_subtree_left(inode, handle, left_path,
2397 right_path, subtree_root, 2701 right_path, subtree_root,
2398 dealloc, &deleted); 2702 dealloc, &deleted, et);
2399 if (ret == -EAGAIN) { 2703 if (ret == -EAGAIN) {
2400 /* 2704 /*
2401 * The rotation has to temporarily stop due to 2705 * The rotation has to temporarily stop due to
@@ -2438,29 +2742,20 @@ out:
2438} 2742}
2439 2743
2440static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle, 2744static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
2441 struct ocfs2_path *path, 2745 struct ocfs2_path *path,
2442 struct ocfs2_cached_dealloc_ctxt *dealloc) 2746 struct ocfs2_cached_dealloc_ctxt *dealloc,
2747 struct ocfs2_extent_tree *et)
2443{ 2748{
2444 int ret, subtree_index; 2749 int ret, subtree_index;
2445 u32 cpos; 2750 u32 cpos;
2446 struct ocfs2_path *left_path = NULL; 2751 struct ocfs2_path *left_path = NULL;
2447 struct ocfs2_dinode *di;
2448 struct ocfs2_extent_block *eb; 2752 struct ocfs2_extent_block *eb;
2449 struct ocfs2_extent_list *el; 2753 struct ocfs2_extent_list *el;
2450 2754
2451 /*
2452 * XXX: This code assumes that the root is an inode, which is
2453 * true for now but may change as tree code gets generic.
2454 */
2455 di = (struct ocfs2_dinode *)path_root_bh(path)->b_data;
2456 if (!OCFS2_IS_VALID_DINODE(di)) {
2457 ret = -EIO;
2458 ocfs2_error(inode->i_sb,
2459 "Inode %llu has invalid path root",
2460 (unsigned long long)OCFS2_I(inode)->ip_blkno);
2461 goto out;
2462 }
2463 2755
2756 ret = ocfs2_et_sanity_check(inode, et);
2757 if (ret)
2758 goto out;
2464 /* 2759 /*
2465 * There's two ways we handle this depending on 2760 * There's two ways we handle this depending on
2466 * whether path is the only existing one. 2761 * whether path is the only existing one.
@@ -2517,7 +2812,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
2517 ocfs2_update_edge_lengths(inode, handle, left_path); 2812 ocfs2_update_edge_lengths(inode, handle, left_path);
2518 2813
2519 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; 2814 eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data;
2520 di->i_last_eb_blk = eb->h_blkno; 2815 ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno));
2521 } else { 2816 } else {
2522 /* 2817 /*
2523 * 'path' is also the leftmost path which 2818 * 'path' is also the leftmost path which
@@ -2528,12 +2823,12 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
2528 */ 2823 */
2529 ocfs2_unlink_path(inode, handle, dealloc, path, 1); 2824 ocfs2_unlink_path(inode, handle, dealloc, path, 1);
2530 2825
2531 el = &di->id2.i_list; 2826 el = et->et_root_el;
2532 el->l_tree_depth = 0; 2827 el->l_tree_depth = 0;
2533 el->l_next_free_rec = 0; 2828 el->l_next_free_rec = 0;
2534 memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); 2829 memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec));
2535 2830
2536 di->i_last_eb_blk = 0; 2831 ocfs2_et_set_last_eb_blk(et, 0);
2537 } 2832 }
2538 2833
2539 ocfs2_journal_dirty(handle, path_root_bh(path)); 2834 ocfs2_journal_dirty(handle, path_root_bh(path));
@@ -2561,7 +2856,8 @@ out:
2561 */ 2856 */
2562static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle, 2857static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle,
2563 struct ocfs2_path *path, 2858 struct ocfs2_path *path,
2564 struct ocfs2_cached_dealloc_ctxt *dealloc) 2859 struct ocfs2_cached_dealloc_ctxt *dealloc,
2860 struct ocfs2_extent_tree *et)
2565{ 2861{
2566 int ret, orig_credits = handle->h_buffer_credits; 2862 int ret, orig_credits = handle->h_buffer_credits;
2567 struct ocfs2_path *tmp_path = NULL, *restart_path = NULL; 2863 struct ocfs2_path *tmp_path = NULL, *restart_path = NULL;
@@ -2575,7 +2871,7 @@ static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle,
2575 if (path->p_tree_depth == 0) { 2871 if (path->p_tree_depth == 0) {
2576rightmost_no_delete: 2872rightmost_no_delete:
2577 /* 2873 /*
2578 * In-inode extents. This is trivially handled, so do 2874 * Inline extents. This is trivially handled, so do
2579 * it up front. 2875 * it up front.
2580 */ 2876 */
2581 ret = ocfs2_rotate_rightmost_leaf_left(inode, handle, 2877 ret = ocfs2_rotate_rightmost_leaf_left(inode, handle,
@@ -2629,7 +2925,7 @@ rightmost_no_delete:
2629 */ 2925 */
2630 2926
2631 ret = ocfs2_remove_rightmost_path(inode, handle, path, 2927 ret = ocfs2_remove_rightmost_path(inode, handle, path,
2632 dealloc); 2928 dealloc, et);
2633 if (ret) 2929 if (ret)
2634 mlog_errno(ret); 2930 mlog_errno(ret);
2635 goto out; 2931 goto out;
@@ -2641,7 +2937,7 @@ rightmost_no_delete:
2641 */ 2937 */
2642try_rotate: 2938try_rotate:
2643 ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, path, 2939 ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, path,
2644 dealloc, &restart_path); 2940 dealloc, &restart_path, et);
2645 if (ret && ret != -EAGAIN) { 2941 if (ret && ret != -EAGAIN) {
2646 mlog_errno(ret); 2942 mlog_errno(ret);
2647 goto out; 2943 goto out;
@@ -2653,7 +2949,7 @@ try_rotate:
2653 2949
2654 ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, 2950 ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits,
2655 tmp_path, dealloc, 2951 tmp_path, dealloc,
2656 &restart_path); 2952 &restart_path, et);
2657 if (ret && ret != -EAGAIN) { 2953 if (ret && ret != -EAGAIN) {
2658 mlog_errno(ret); 2954 mlog_errno(ret);
2659 goto out; 2955 goto out;
@@ -2939,6 +3235,7 @@ static int ocfs2_merge_rec_left(struct inode *inode,
2939 handle_t *handle, 3235 handle_t *handle,
2940 struct ocfs2_extent_rec *split_rec, 3236 struct ocfs2_extent_rec *split_rec,
2941 struct ocfs2_cached_dealloc_ctxt *dealloc, 3237 struct ocfs2_cached_dealloc_ctxt *dealloc,
3238 struct ocfs2_extent_tree *et,
2942 int index) 3239 int index)
2943{ 3240{
2944 int ret, i, subtree_index = 0, has_empty_extent = 0; 3241 int ret, i, subtree_index = 0, has_empty_extent = 0;
@@ -3059,7 +3356,8 @@ static int ocfs2_merge_rec_left(struct inode *inode,
3059 le16_to_cpu(el->l_next_free_rec) == 1) { 3356 le16_to_cpu(el->l_next_free_rec) == 1) {
3060 3357
3061 ret = ocfs2_remove_rightmost_path(inode, handle, 3358 ret = ocfs2_remove_rightmost_path(inode, handle,
3062 right_path, dealloc); 3359 right_path,
3360 dealloc, et);
3063 if (ret) { 3361 if (ret) {
3064 mlog_errno(ret); 3362 mlog_errno(ret);
3065 goto out; 3363 goto out;
@@ -3086,7 +3384,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
3086 int split_index, 3384 int split_index,
3087 struct ocfs2_extent_rec *split_rec, 3385 struct ocfs2_extent_rec *split_rec,
3088 struct ocfs2_cached_dealloc_ctxt *dealloc, 3386 struct ocfs2_cached_dealloc_ctxt *dealloc,
3089 struct ocfs2_merge_ctxt *ctxt) 3387 struct ocfs2_merge_ctxt *ctxt,
3388 struct ocfs2_extent_tree *et)
3090 3389
3091{ 3390{
3092 int ret = 0; 3391 int ret = 0;
@@ -3104,7 +3403,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
3104 * illegal. 3403 * illegal.
3105 */ 3404 */
3106 ret = ocfs2_rotate_tree_left(inode, handle, path, 3405 ret = ocfs2_rotate_tree_left(inode, handle, path,
3107 dealloc); 3406 dealloc, et);
3108 if (ret) { 3407 if (ret) {
3109 mlog_errno(ret); 3408 mlog_errno(ret);
3110 goto out; 3409 goto out;
@@ -3147,7 +3446,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
3147 BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); 3446 BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
3148 3447
3149 /* The merge left us with an empty extent, remove it. */ 3448 /* The merge left us with an empty extent, remove it. */
3150 ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); 3449 ret = ocfs2_rotate_tree_left(inode, handle, path,
3450 dealloc, et);
3151 if (ret) { 3451 if (ret) {
3152 mlog_errno(ret); 3452 mlog_errno(ret);
3153 goto out; 3453 goto out;
@@ -3161,7 +3461,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
3161 */ 3461 */
3162 ret = ocfs2_merge_rec_left(inode, path, 3462 ret = ocfs2_merge_rec_left(inode, path,
3163 handle, rec, 3463 handle, rec,
3164 dealloc, 3464 dealloc, et,
3165 split_index); 3465 split_index);
3166 3466
3167 if (ret) { 3467 if (ret) {
@@ -3170,7 +3470,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
3170 } 3470 }
3171 3471
3172 ret = ocfs2_rotate_tree_left(inode, handle, path, 3472 ret = ocfs2_rotate_tree_left(inode, handle, path,
3173 dealloc); 3473 dealloc, et);
3174 /* 3474 /*
3175 * Error from this last rotate is not critical, so 3475 * Error from this last rotate is not critical, so
3176 * print but don't bubble it up. 3476 * print but don't bubble it up.
@@ -3190,7 +3490,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
3190 ret = ocfs2_merge_rec_left(inode, 3490 ret = ocfs2_merge_rec_left(inode,
3191 path, 3491 path,
3192 handle, split_rec, 3492 handle, split_rec,
3193 dealloc, 3493 dealloc, et,
3194 split_index); 3494 split_index);
3195 if (ret) { 3495 if (ret) {
3196 mlog_errno(ret); 3496 mlog_errno(ret);
@@ -3213,7 +3513,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
3213 * our leaf. Try to rotate it away. 3513 * our leaf. Try to rotate it away.
3214 */ 3514 */
3215 ret = ocfs2_rotate_tree_left(inode, handle, path, 3515 ret = ocfs2_rotate_tree_left(inode, handle, path,
3216 dealloc); 3516 dealloc, et);
3217 if (ret) 3517 if (ret)
3218 mlog_errno(ret); 3518 mlog_errno(ret);
3219 ret = 0; 3519 ret = 0;
@@ -3347,16 +3647,6 @@ rotate:
3347 ocfs2_rotate_leaf(el, insert_rec); 3647 ocfs2_rotate_leaf(el, insert_rec);
3348} 3648}
3349 3649
3350static inline void ocfs2_update_dinode_clusters(struct inode *inode,
3351 struct ocfs2_dinode *di,
3352 u32 clusters)
3353{
3354 le32_add_cpu(&di->i_clusters, clusters);
3355 spin_lock(&OCFS2_I(inode)->ip_lock);
3356 OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters);
3357 spin_unlock(&OCFS2_I(inode)->ip_lock);
3358}
3359
3360static void ocfs2_adjust_rightmost_records(struct inode *inode, 3650static void ocfs2_adjust_rightmost_records(struct inode *inode,
3361 handle_t *handle, 3651 handle_t *handle,
3362 struct ocfs2_path *path, 3652 struct ocfs2_path *path,
@@ -3558,8 +3848,8 @@ static void ocfs2_split_record(struct inode *inode,
3558} 3848}
3559 3849
3560/* 3850/*
3561 * This function only does inserts on an allocation b-tree. For dinode 3851 * This function only does inserts on an allocation b-tree. For tree
3562 * lists, ocfs2_insert_at_leaf() is called directly. 3852 * depth = 0, ocfs2_insert_at_leaf() is called directly.
3563 * 3853 *
3564 * right_path is the path we want to do the actual insert 3854 * right_path is the path we want to do the actual insert
3565 * in. left_path should only be passed in if we need to update that 3855 * in. left_path should only be passed in if we need to update that
@@ -3656,7 +3946,7 @@ out:
3656 3946
3657static int ocfs2_do_insert_extent(struct inode *inode, 3947static int ocfs2_do_insert_extent(struct inode *inode,
3658 handle_t *handle, 3948 handle_t *handle,
3659 struct buffer_head *di_bh, 3949 struct ocfs2_extent_tree *et,
3660 struct ocfs2_extent_rec *insert_rec, 3950 struct ocfs2_extent_rec *insert_rec,
3661 struct ocfs2_insert_type *type) 3951 struct ocfs2_insert_type *type)
3662{ 3952{
@@ -3664,13 +3954,11 @@ static int ocfs2_do_insert_extent(struct inode *inode,
3664 u32 cpos; 3954 u32 cpos;
3665 struct ocfs2_path *right_path = NULL; 3955 struct ocfs2_path *right_path = NULL;
3666 struct ocfs2_path *left_path = NULL; 3956 struct ocfs2_path *left_path = NULL;
3667 struct ocfs2_dinode *di;
3668 struct ocfs2_extent_list *el; 3957 struct ocfs2_extent_list *el;
3669 3958
3670 di = (struct ocfs2_dinode *) di_bh->b_data; 3959 el = et->et_root_el;
3671 el = &di->id2.i_list;
3672 3960
3673 ret = ocfs2_journal_access(handle, inode, di_bh, 3961 ret = ocfs2_journal_access(handle, inode, et->et_root_bh,
3674 OCFS2_JOURNAL_ACCESS_WRITE); 3962 OCFS2_JOURNAL_ACCESS_WRITE);
3675 if (ret) { 3963 if (ret) {
3676 mlog_errno(ret); 3964 mlog_errno(ret);
@@ -3682,7 +3970,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
3682 goto out_update_clusters; 3970 goto out_update_clusters;
3683 } 3971 }
3684 3972
3685 right_path = ocfs2_new_inode_path(di_bh); 3973 right_path = ocfs2_new_path(et->et_root_bh, et->et_root_el);
3686 if (!right_path) { 3974 if (!right_path) {
3687 ret = -ENOMEM; 3975 ret = -ENOMEM;
3688 mlog_errno(ret); 3976 mlog_errno(ret);
@@ -3732,7 +4020,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
3732 * ocfs2_rotate_tree_right() might have extended the 4020 * ocfs2_rotate_tree_right() might have extended the
3733 * transaction without re-journaling our tree root. 4021 * transaction without re-journaling our tree root.
3734 */ 4022 */
3735 ret = ocfs2_journal_access(handle, inode, di_bh, 4023 ret = ocfs2_journal_access(handle, inode, et->et_root_bh,
3736 OCFS2_JOURNAL_ACCESS_WRITE); 4024 OCFS2_JOURNAL_ACCESS_WRITE);
3737 if (ret) { 4025 if (ret) {
3738 mlog_errno(ret); 4026 mlog_errno(ret);
@@ -3757,10 +4045,10 @@ static int ocfs2_do_insert_extent(struct inode *inode,
3757 4045
3758out_update_clusters: 4046out_update_clusters:
3759 if (type->ins_split == SPLIT_NONE) 4047 if (type->ins_split == SPLIT_NONE)
3760 ocfs2_update_dinode_clusters(inode, di, 4048 ocfs2_et_update_clusters(inode, et,
3761 le16_to_cpu(insert_rec->e_leaf_clusters)); 4049 le16_to_cpu(insert_rec->e_leaf_clusters));
3762 4050
3763 ret = ocfs2_journal_dirty(handle, di_bh); 4051 ret = ocfs2_journal_dirty(handle, et->et_root_bh);
3764 if (ret) 4052 if (ret)
3765 mlog_errno(ret); 4053 mlog_errno(ret);
3766 4054
@@ -3890,7 +4178,8 @@ out:
3890static void ocfs2_figure_contig_type(struct inode *inode, 4178static void ocfs2_figure_contig_type(struct inode *inode,
3891 struct ocfs2_insert_type *insert, 4179 struct ocfs2_insert_type *insert,
3892 struct ocfs2_extent_list *el, 4180 struct ocfs2_extent_list *el,
3893 struct ocfs2_extent_rec *insert_rec) 4181 struct ocfs2_extent_rec *insert_rec,
4182 struct ocfs2_extent_tree *et)
3894{ 4183{
3895 int i; 4184 int i;
3896 enum ocfs2_contig_type contig_type = CONTIG_NONE; 4185 enum ocfs2_contig_type contig_type = CONTIG_NONE;
@@ -3906,6 +4195,21 @@ static void ocfs2_figure_contig_type(struct inode *inode,
3906 } 4195 }
3907 } 4196 }
3908 insert->ins_contig = contig_type; 4197 insert->ins_contig = contig_type;
4198
4199 if (insert->ins_contig != CONTIG_NONE) {
4200 struct ocfs2_extent_rec *rec =
4201 &el->l_recs[insert->ins_contig_index];
4202 unsigned int len = le16_to_cpu(rec->e_leaf_clusters) +
4203 le16_to_cpu(insert_rec->e_leaf_clusters);
4204
4205 /*
4206 * Caller might want us to limit the size of extents, don't
4207 * calculate contiguousness if we might exceed that limit.
4208 */
4209 if (et->et_max_leaf_clusters &&
4210 (len > et->et_max_leaf_clusters))
4211 insert->ins_contig = CONTIG_NONE;
4212 }
3909} 4213}
3910 4214
3911/* 4215/*
@@ -3914,8 +4218,8 @@ static void ocfs2_figure_contig_type(struct inode *inode,
3914 * ocfs2_figure_appending_type() will figure out whether we'll have to 4218 * ocfs2_figure_appending_type() will figure out whether we'll have to
3915 * insert at the tail of the rightmost leaf. 4219 * insert at the tail of the rightmost leaf.
3916 * 4220 *
3917 * This should also work against the dinode list for tree's with 0 4221 * This should also work against the root extent list for tree's with 0
3918 * depth. If we consider the dinode list to be the rightmost leaf node 4222 * depth. If we consider the root extent list to be the rightmost leaf node
3919 * then the logic here makes sense. 4223 * then the logic here makes sense.
3920 */ 4224 */
3921static void ocfs2_figure_appending_type(struct ocfs2_insert_type *insert, 4225static void ocfs2_figure_appending_type(struct ocfs2_insert_type *insert,
@@ -3966,14 +4270,13 @@ set_tail_append:
3966 * structure. 4270 * structure.
3967 */ 4271 */
3968static int ocfs2_figure_insert_type(struct inode *inode, 4272static int ocfs2_figure_insert_type(struct inode *inode,
3969 struct buffer_head *di_bh, 4273 struct ocfs2_extent_tree *et,
3970 struct buffer_head **last_eb_bh, 4274 struct buffer_head **last_eb_bh,
3971 struct ocfs2_extent_rec *insert_rec, 4275 struct ocfs2_extent_rec *insert_rec,
3972 int *free_records, 4276 int *free_records,
3973 struct ocfs2_insert_type *insert) 4277 struct ocfs2_insert_type *insert)
3974{ 4278{
3975 int ret; 4279 int ret;
3976 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
3977 struct ocfs2_extent_block *eb; 4280 struct ocfs2_extent_block *eb;
3978 struct ocfs2_extent_list *el; 4281 struct ocfs2_extent_list *el;
3979 struct ocfs2_path *path = NULL; 4282 struct ocfs2_path *path = NULL;
@@ -3981,7 +4284,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
3981 4284
3982 insert->ins_split = SPLIT_NONE; 4285 insert->ins_split = SPLIT_NONE;
3983 4286
3984 el = &di->id2.i_list; 4287 el = et->et_root_el;
3985 insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth); 4288 insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth);
3986 4289
3987 if (el->l_tree_depth) { 4290 if (el->l_tree_depth) {
@@ -3991,9 +4294,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
3991 * ocfs2_figure_insert_type() and ocfs2_add_branch() 4294 * ocfs2_figure_insert_type() and ocfs2_add_branch()
3992 * may want it later. 4295 * may want it later.
3993 */ 4296 */
3994 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), 4297 ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), &bh);
3995 le64_to_cpu(di->i_last_eb_blk), &bh,
3996 OCFS2_BH_CACHED, inode);
3997 if (ret) { 4298 if (ret) {
3998 mlog_exit(ret); 4299 mlog_exit(ret);
3999 goto out; 4300 goto out;
@@ -4014,12 +4315,12 @@ static int ocfs2_figure_insert_type(struct inode *inode,
4014 le16_to_cpu(el->l_next_free_rec); 4315 le16_to_cpu(el->l_next_free_rec);
4015 4316
4016 if (!insert->ins_tree_depth) { 4317 if (!insert->ins_tree_depth) {
4017 ocfs2_figure_contig_type(inode, insert, el, insert_rec); 4318 ocfs2_figure_contig_type(inode, insert, el, insert_rec, et);
4018 ocfs2_figure_appending_type(insert, el, insert_rec); 4319 ocfs2_figure_appending_type(insert, el, insert_rec);
4019 return 0; 4320 return 0;
4020 } 4321 }
4021 4322
4022 path = ocfs2_new_inode_path(di_bh); 4323 path = ocfs2_new_path(et->et_root_bh, et->et_root_el);
4023 if (!path) { 4324 if (!path) {
4024 ret = -ENOMEM; 4325 ret = -ENOMEM;
4025 mlog_errno(ret); 4326 mlog_errno(ret);
@@ -4048,7 +4349,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
4048 * into two types of appends: simple record append, or a 4349 * into two types of appends: simple record append, or a
4049 * rotate inside the tail leaf. 4350 * rotate inside the tail leaf.
4050 */ 4351 */
4051 ocfs2_figure_contig_type(inode, insert, el, insert_rec); 4352 ocfs2_figure_contig_type(inode, insert, el, insert_rec, et);
4052 4353
4053 /* 4354 /*
4054 * The insert code isn't quite ready to deal with all cases of 4355 * The insert code isn't quite ready to deal with all cases of
@@ -4069,7 +4370,8 @@ static int ocfs2_figure_insert_type(struct inode *inode,
4069 * the case that we're doing a tail append, so maybe we can 4370 * the case that we're doing a tail append, so maybe we can
4070 * take advantage of that information somehow. 4371 * take advantage of that information somehow.
4071 */ 4372 */
4072 if (le64_to_cpu(di->i_last_eb_blk) == path_leaf_bh(path)->b_blocknr) { 4373 if (ocfs2_et_get_last_eb_blk(et) ==
4374 path_leaf_bh(path)->b_blocknr) {
4073 /* 4375 /*
4074 * Ok, ocfs2_find_path() returned us the rightmost 4376 * Ok, ocfs2_find_path() returned us the rightmost
4075 * tree path. This might be an appending insert. There are 4377 * tree path. This might be an appending insert. There are
@@ -4099,7 +4401,7 @@ out:
4099int ocfs2_insert_extent(struct ocfs2_super *osb, 4401int ocfs2_insert_extent(struct ocfs2_super *osb,
4100 handle_t *handle, 4402 handle_t *handle,
4101 struct inode *inode, 4403 struct inode *inode,
4102 struct buffer_head *fe_bh, 4404 struct ocfs2_extent_tree *et,
4103 u32 cpos, 4405 u32 cpos,
4104 u64 start_blk, 4406 u64 start_blk,
4105 u32 new_clusters, 4407 u32 new_clusters,
@@ -4112,26 +4414,21 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
4112 struct ocfs2_insert_type insert = {0, }; 4414 struct ocfs2_insert_type insert = {0, };
4113 struct ocfs2_extent_rec rec; 4415 struct ocfs2_extent_rec rec;
4114 4416
4115 BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL);
4116
4117 mlog(0, "add %u clusters at position %u to inode %llu\n", 4417 mlog(0, "add %u clusters at position %u to inode %llu\n",
4118 new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno); 4418 new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno);
4119 4419
4120 mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) &&
4121 (OCFS2_I(inode)->ip_clusters != cpos),
4122 "Device %s, asking for sparse allocation: inode %llu, "
4123 "cpos %u, clusters %u\n",
4124 osb->dev_str,
4125 (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos,
4126 OCFS2_I(inode)->ip_clusters);
4127
4128 memset(&rec, 0, sizeof(rec)); 4420 memset(&rec, 0, sizeof(rec));
4129 rec.e_cpos = cpu_to_le32(cpos); 4421 rec.e_cpos = cpu_to_le32(cpos);
4130 rec.e_blkno = cpu_to_le64(start_blk); 4422 rec.e_blkno = cpu_to_le64(start_blk);
4131 rec.e_leaf_clusters = cpu_to_le16(new_clusters); 4423 rec.e_leaf_clusters = cpu_to_le16(new_clusters);
4132 rec.e_flags = flags; 4424 rec.e_flags = flags;
4425 status = ocfs2_et_insert_check(inode, et, &rec);
4426 if (status) {
4427 mlog_errno(status);
4428 goto bail;
4429 }
4133 4430
4134 status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec, 4431 status = ocfs2_figure_insert_type(inode, et, &last_eb_bh, &rec,
4135 &free_records, &insert); 4432 &free_records, &insert);
4136 if (status < 0) { 4433 if (status < 0) {
4137 mlog_errno(status); 4434 mlog_errno(status);
@@ -4145,7 +4442,7 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
4145 free_records, insert.ins_tree_depth); 4442 free_records, insert.ins_tree_depth);
4146 4443
4147 if (insert.ins_contig == CONTIG_NONE && free_records == 0) { 4444 if (insert.ins_contig == CONTIG_NONE && free_records == 0) {
4148 status = ocfs2_grow_tree(inode, handle, fe_bh, 4445 status = ocfs2_grow_tree(inode, handle, et,
4149 &insert.ins_tree_depth, &last_eb_bh, 4446 &insert.ins_tree_depth, &last_eb_bh,
4150 meta_ac); 4447 meta_ac);
4151 if (status) { 4448 if (status) {
@@ -4155,17 +4452,124 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
4155 } 4452 }
4156 4453
4157 /* Finally, we can add clusters. This might rotate the tree for us. */ 4454 /* Finally, we can add clusters. This might rotate the tree for us. */
4158 status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert); 4455 status = ocfs2_do_insert_extent(inode, handle, et, &rec, &insert);
4159 if (status < 0) 4456 if (status < 0)
4160 mlog_errno(status); 4457 mlog_errno(status);
4161 else 4458 else if (et->et_ops == &ocfs2_dinode_et_ops)
4162 ocfs2_extent_map_insert_rec(inode, &rec); 4459 ocfs2_extent_map_insert_rec(inode, &rec);
4163 4460
4164bail: 4461bail:
4165 if (last_eb_bh) 4462 brelse(last_eb_bh);
4166 brelse(last_eb_bh); 4463
4464 mlog_exit(status);
4465 return status;
4466}
4467
4468/*
4469 * Allcate and add clusters into the extent b-tree.
4470 * The new clusters(clusters_to_add) will be inserted at logical_offset.
4471 * The extent b-tree's root is specified by et, and
4472 * it is not limited to the file storage. Any extent tree can use this
4473 * function if it implements the proper ocfs2_extent_tree.
4474 */
4475int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
4476 struct inode *inode,
4477 u32 *logical_offset,
4478 u32 clusters_to_add,
4479 int mark_unwritten,
4480 struct ocfs2_extent_tree *et,
4481 handle_t *handle,
4482 struct ocfs2_alloc_context *data_ac,
4483 struct ocfs2_alloc_context *meta_ac,
4484 enum ocfs2_alloc_restarted *reason_ret)
4485{
4486 int status = 0;
4487 int free_extents;
4488 enum ocfs2_alloc_restarted reason = RESTART_NONE;
4489 u32 bit_off, num_bits;
4490 u64 block;
4491 u8 flags = 0;
4492
4493 BUG_ON(!clusters_to_add);
4494
4495 if (mark_unwritten)
4496 flags = OCFS2_EXT_UNWRITTEN;
4497
4498 free_extents = ocfs2_num_free_extents(osb, inode, et);
4499 if (free_extents < 0) {
4500 status = free_extents;
4501 mlog_errno(status);
4502 goto leave;
4503 }
4504
4505 /* there are two cases which could cause us to EAGAIN in the
4506 * we-need-more-metadata case:
4507 * 1) we haven't reserved *any*
4508 * 2) we are so fragmented, we've needed to add metadata too
4509 * many times. */
4510 if (!free_extents && !meta_ac) {
4511 mlog(0, "we haven't reserved any metadata!\n");
4512 status = -EAGAIN;
4513 reason = RESTART_META;
4514 goto leave;
4515 } else if ((!free_extents)
4516 && (ocfs2_alloc_context_bits_left(meta_ac)
4517 < ocfs2_extend_meta_needed(et->et_root_el))) {
4518 mlog(0, "filesystem is really fragmented...\n");
4519 status = -EAGAIN;
4520 reason = RESTART_META;
4521 goto leave;
4522 }
4523
4524 status = __ocfs2_claim_clusters(osb, handle, data_ac, 1,
4525 clusters_to_add, &bit_off, &num_bits);
4526 if (status < 0) {
4527 if (status != -ENOSPC)
4528 mlog_errno(status);
4529 goto leave;
4530 }
4531
4532 BUG_ON(num_bits > clusters_to_add);
4167 4533
4534 /* reserve our write early -- insert_extent may update the inode */
4535 status = ocfs2_journal_access(handle, inode, et->et_root_bh,
4536 OCFS2_JOURNAL_ACCESS_WRITE);
4537 if (status < 0) {
4538 mlog_errno(status);
4539 goto leave;
4540 }
4541
4542 block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
4543 mlog(0, "Allocating %u clusters at block %u for inode %llu\n",
4544 num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
4545 status = ocfs2_insert_extent(osb, handle, inode, et,
4546 *logical_offset, block,
4547 num_bits, flags, meta_ac);
4548 if (status < 0) {
4549 mlog_errno(status);
4550 goto leave;
4551 }
4552
4553 status = ocfs2_journal_dirty(handle, et->et_root_bh);
4554 if (status < 0) {
4555 mlog_errno(status);
4556 goto leave;
4557 }
4558
4559 clusters_to_add -= num_bits;
4560 *logical_offset += num_bits;
4561
4562 if (clusters_to_add) {
4563 mlog(0, "need to alloc once more, wanted = %u\n",
4564 clusters_to_add);
4565 status = -EAGAIN;
4566 reason = RESTART_TRANS;
4567 }
4568
4569leave:
4168 mlog_exit(status); 4570 mlog_exit(status);
4571 if (reason_ret)
4572 *reason_ret = reason;
4169 return status; 4573 return status;
4170} 4574}
4171 4575
@@ -4192,7 +4596,7 @@ static void ocfs2_make_right_split_rec(struct super_block *sb,
4192static int ocfs2_split_and_insert(struct inode *inode, 4596static int ocfs2_split_and_insert(struct inode *inode,
4193 handle_t *handle, 4597 handle_t *handle,
4194 struct ocfs2_path *path, 4598 struct ocfs2_path *path,
4195 struct buffer_head *di_bh, 4599 struct ocfs2_extent_tree *et,
4196 struct buffer_head **last_eb_bh, 4600 struct buffer_head **last_eb_bh,
4197 int split_index, 4601 int split_index,
4198 struct ocfs2_extent_rec *orig_split_rec, 4602 struct ocfs2_extent_rec *orig_split_rec,
@@ -4206,7 +4610,6 @@ static int ocfs2_split_and_insert(struct inode *inode,
4206 struct ocfs2_extent_rec split_rec = *orig_split_rec; 4610 struct ocfs2_extent_rec split_rec = *orig_split_rec;
4207 struct ocfs2_insert_type insert; 4611 struct ocfs2_insert_type insert;
4208 struct ocfs2_extent_block *eb; 4612 struct ocfs2_extent_block *eb;
4209 struct ocfs2_dinode *di;
4210 4613
4211leftright: 4614leftright:
4212 /* 4615 /*
@@ -4215,8 +4618,7 @@ leftright:
4215 */ 4618 */
4216 rec = path_leaf_el(path)->l_recs[split_index]; 4619 rec = path_leaf_el(path)->l_recs[split_index];
4217 4620
4218 di = (struct ocfs2_dinode *)di_bh->b_data; 4621 rightmost_el = et->et_root_el;
4219 rightmost_el = &di->id2.i_list;
4220 4622
4221 depth = le16_to_cpu(rightmost_el->l_tree_depth); 4623 depth = le16_to_cpu(rightmost_el->l_tree_depth);
4222 if (depth) { 4624 if (depth) {
@@ -4227,8 +4629,8 @@ leftright:
4227 4629
4228 if (le16_to_cpu(rightmost_el->l_next_free_rec) == 4630 if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
4229 le16_to_cpu(rightmost_el->l_count)) { 4631 le16_to_cpu(rightmost_el->l_count)) {
4230 ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, last_eb_bh, 4632 ret = ocfs2_grow_tree(inode, handle, et,
4231 meta_ac); 4633 &depth, last_eb_bh, meta_ac);
4232 if (ret) { 4634 if (ret) {
4233 mlog_errno(ret); 4635 mlog_errno(ret);
4234 goto out; 4636 goto out;
@@ -4265,8 +4667,7 @@ leftright:
4265 do_leftright = 1; 4667 do_leftright = 1;
4266 } 4668 }
4267 4669
4268 ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec, 4670 ret = ocfs2_do_insert_extent(inode, handle, et, &split_rec, &insert);
4269 &insert);
4270 if (ret) { 4671 if (ret) {
4271 mlog_errno(ret); 4672 mlog_errno(ret);
4272 goto out; 4673 goto out;
@@ -4308,8 +4709,9 @@ out:
4308 * of the tree is required. All other cases will degrade into a less 4709 * of the tree is required. All other cases will degrade into a less
4309 * optimal tree layout. 4710 * optimal tree layout.
4310 * 4711 *
4311 * last_eb_bh should be the rightmost leaf block for any inode with a 4712 * last_eb_bh should be the rightmost leaf block for any extent
4312 * btree. Since a split may grow the tree or a merge might shrink it, the caller cannot trust the contents of that buffer after this call. 4713 * btree. Since a split may grow the tree or a merge might shrink it,
4714 * the caller cannot trust the contents of that buffer after this call.
4313 * 4715 *
4314 * This code is optimized for readability - several passes might be 4716 * This code is optimized for readability - several passes might be
4315 * made over certain portions of the tree. All of those blocks will 4717 * made over certain portions of the tree. All of those blocks will
@@ -4317,7 +4719,7 @@ out:
4317 * extra overhead is not expressed in terms of disk reads. 4719 * extra overhead is not expressed in terms of disk reads.
4318 */ 4720 */
4319static int __ocfs2_mark_extent_written(struct inode *inode, 4721static int __ocfs2_mark_extent_written(struct inode *inode,
4320 struct buffer_head *di_bh, 4722 struct ocfs2_extent_tree *et,
4321 handle_t *handle, 4723 handle_t *handle,
4322 struct ocfs2_path *path, 4724 struct ocfs2_path *path,
4323 int split_index, 4725 int split_index,
@@ -4357,11 +4759,9 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
4357 */ 4759 */
4358 if (path->p_tree_depth) { 4760 if (path->p_tree_depth) {
4359 struct ocfs2_extent_block *eb; 4761 struct ocfs2_extent_block *eb;
4360 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
4361 4762
4362 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), 4763 ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et),
4363 le64_to_cpu(di->i_last_eb_blk), 4764 &last_eb_bh);
4364 &last_eb_bh, OCFS2_BH_CACHED, inode);
4365 if (ret) { 4765 if (ret) {
4366 mlog_exit(ret); 4766 mlog_exit(ret);
4367 goto out; 4767 goto out;
@@ -4394,7 +4794,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
4394 if (ctxt.c_split_covers_rec) 4794 if (ctxt.c_split_covers_rec)
4395 el->l_recs[split_index] = *split_rec; 4795 el->l_recs[split_index] = *split_rec;
4396 else 4796 else
4397 ret = ocfs2_split_and_insert(inode, handle, path, di_bh, 4797 ret = ocfs2_split_and_insert(inode, handle, path, et,
4398 &last_eb_bh, split_index, 4798 &last_eb_bh, split_index,
4399 split_rec, meta_ac); 4799 split_rec, meta_ac);
4400 if (ret) 4800 if (ret)
@@ -4402,7 +4802,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
4402 } else { 4802 } else {
4403 ret = ocfs2_try_to_merge_extent(inode, handle, path, 4803 ret = ocfs2_try_to_merge_extent(inode, handle, path,
4404 split_index, split_rec, 4804 split_index, split_rec,
4405 dealloc, &ctxt); 4805 dealloc, &ctxt, et);
4406 if (ret) 4806 if (ret)
4407 mlog_errno(ret); 4807 mlog_errno(ret);
4408 } 4808 }
@@ -4420,7 +4820,8 @@ out:
4420 * 4820 *
4421 * The caller is responsible for passing down meta_ac if we'll need it. 4821 * The caller is responsible for passing down meta_ac if we'll need it.
4422 */ 4822 */
4423int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, 4823int ocfs2_mark_extent_written(struct inode *inode,
4824 struct ocfs2_extent_tree *et,
4424 handle_t *handle, u32 cpos, u32 len, u32 phys, 4825 handle_t *handle, u32 cpos, u32 len, u32 phys,
4425 struct ocfs2_alloc_context *meta_ac, 4826 struct ocfs2_alloc_context *meta_ac,
4426 struct ocfs2_cached_dealloc_ctxt *dealloc) 4827 struct ocfs2_cached_dealloc_ctxt *dealloc)
@@ -4446,10 +4847,14 @@ int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh,
4446 /* 4847 /*
4447 * XXX: This should be fixed up so that we just re-insert the 4848 * XXX: This should be fixed up so that we just re-insert the
4448 * next extent records. 4849 * next extent records.
4850 *
4851 * XXX: This is a hack on the extent tree, maybe it should be
4852 * an op?
4449 */ 4853 */
4450 ocfs2_extent_map_trunc(inode, 0); 4854 if (et->et_ops == &ocfs2_dinode_et_ops)
4855 ocfs2_extent_map_trunc(inode, 0);
4451 4856
4452 left_path = ocfs2_new_inode_path(di_bh); 4857 left_path = ocfs2_new_path(et->et_root_bh, et->et_root_el);
4453 if (!left_path) { 4858 if (!left_path) {
4454 ret = -ENOMEM; 4859 ret = -ENOMEM;
4455 mlog_errno(ret); 4860 mlog_errno(ret);
@@ -4480,8 +4885,9 @@ int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh,
4480 split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags; 4885 split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags;
4481 split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN; 4886 split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN;
4482 4887
4483 ret = __ocfs2_mark_extent_written(inode, di_bh, handle, left_path, 4888 ret = __ocfs2_mark_extent_written(inode, et, handle, left_path,
4484 index, &split_rec, meta_ac, dealloc); 4889 index, &split_rec, meta_ac,
4890 dealloc);
4485 if (ret) 4891 if (ret)
4486 mlog_errno(ret); 4892 mlog_errno(ret);
4487 4893
@@ -4490,13 +4896,12 @@ out:
4490 return ret; 4896 return ret;
4491} 4897}
4492 4898
4493static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh, 4899static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et,
4494 handle_t *handle, struct ocfs2_path *path, 4900 handle_t *handle, struct ocfs2_path *path,
4495 int index, u32 new_range, 4901 int index, u32 new_range,
4496 struct ocfs2_alloc_context *meta_ac) 4902 struct ocfs2_alloc_context *meta_ac)
4497{ 4903{
4498 int ret, depth, credits = handle->h_buffer_credits; 4904 int ret, depth, credits = handle->h_buffer_credits;
4499 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
4500 struct buffer_head *last_eb_bh = NULL; 4905 struct buffer_head *last_eb_bh = NULL;
4501 struct ocfs2_extent_block *eb; 4906 struct ocfs2_extent_block *eb;
4502 struct ocfs2_extent_list *rightmost_el, *el; 4907 struct ocfs2_extent_list *rightmost_el, *el;
@@ -4513,9 +4918,8 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh,
4513 4918
4514 depth = path->p_tree_depth; 4919 depth = path->p_tree_depth;
4515 if (depth > 0) { 4920 if (depth > 0) {
4516 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), 4921 ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et),
4517 le64_to_cpu(di->i_last_eb_blk), 4922 &last_eb_bh);
4518 &last_eb_bh, OCFS2_BH_CACHED, inode);
4519 if (ret < 0) { 4923 if (ret < 0) {
4520 mlog_errno(ret); 4924 mlog_errno(ret);
4521 goto out; 4925 goto out;
@@ -4526,7 +4930,8 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh,
4526 } else 4930 } else
4527 rightmost_el = path_leaf_el(path); 4931 rightmost_el = path_leaf_el(path);
4528 4932
4529 credits += path->p_tree_depth + ocfs2_extend_meta_needed(di); 4933 credits += path->p_tree_depth +
4934 ocfs2_extend_meta_needed(et->et_root_el);
4530 ret = ocfs2_extend_trans(handle, credits); 4935 ret = ocfs2_extend_trans(handle, credits);
4531 if (ret) { 4936 if (ret) {
4532 mlog_errno(ret); 4937 mlog_errno(ret);
@@ -4535,7 +4940,7 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh,
4535 4940
4536 if (le16_to_cpu(rightmost_el->l_next_free_rec) == 4941 if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
4537 le16_to_cpu(rightmost_el->l_count)) { 4942 le16_to_cpu(rightmost_el->l_count)) {
4538 ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, &last_eb_bh, 4943 ret = ocfs2_grow_tree(inode, handle, et, &depth, &last_eb_bh,
4539 meta_ac); 4944 meta_ac);
4540 if (ret) { 4945 if (ret) {
4541 mlog_errno(ret); 4946 mlog_errno(ret);
@@ -4549,7 +4954,7 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh,
4549 insert.ins_split = SPLIT_RIGHT; 4954 insert.ins_split = SPLIT_RIGHT;
4550 insert.ins_tree_depth = depth; 4955 insert.ins_tree_depth = depth;
4551 4956
4552 ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec, &insert); 4957 ret = ocfs2_do_insert_extent(inode, handle, et, &split_rec, &insert);
4553 if (ret) 4958 if (ret)
4554 mlog_errno(ret); 4959 mlog_errno(ret);
4555 4960
@@ -4561,7 +4966,8 @@ out:
4561static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle, 4966static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
4562 struct ocfs2_path *path, int index, 4967 struct ocfs2_path *path, int index,
4563 struct ocfs2_cached_dealloc_ctxt *dealloc, 4968 struct ocfs2_cached_dealloc_ctxt *dealloc,
4564 u32 cpos, u32 len) 4969 u32 cpos, u32 len,
4970 struct ocfs2_extent_tree *et)
4565{ 4971{
4566 int ret; 4972 int ret;
4567 u32 left_cpos, rec_range, trunc_range; 4973 u32 left_cpos, rec_range, trunc_range;
@@ -4573,7 +4979,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
4573 struct ocfs2_extent_block *eb; 4979 struct ocfs2_extent_block *eb;
4574 4980
4575 if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) { 4981 if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) {
4576 ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); 4982 ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc, et);
4577 if (ret) { 4983 if (ret) {
4578 mlog_errno(ret); 4984 mlog_errno(ret);
4579 goto out; 4985 goto out;
@@ -4704,7 +5110,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
4704 5110
4705 ocfs2_journal_dirty(handle, path_leaf_bh(path)); 5111 ocfs2_journal_dirty(handle, path_leaf_bh(path));
4706 5112
4707 ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); 5113 ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc, et);
4708 if (ret) { 5114 if (ret) {
4709 mlog_errno(ret); 5115 mlog_errno(ret);
4710 goto out; 5116 goto out;
@@ -4715,7 +5121,8 @@ out:
4715 return ret; 5121 return ret;
4716} 5122}
4717 5123
4718int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, 5124int ocfs2_remove_extent(struct inode *inode,
5125 struct ocfs2_extent_tree *et,
4719 u32 cpos, u32 len, handle_t *handle, 5126 u32 cpos, u32 len, handle_t *handle,
4720 struct ocfs2_alloc_context *meta_ac, 5127 struct ocfs2_alloc_context *meta_ac,
4721 struct ocfs2_cached_dealloc_ctxt *dealloc) 5128 struct ocfs2_cached_dealloc_ctxt *dealloc)
@@ -4724,11 +5131,11 @@ int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
4724 u32 rec_range, trunc_range; 5131 u32 rec_range, trunc_range;
4725 struct ocfs2_extent_rec *rec; 5132 struct ocfs2_extent_rec *rec;
4726 struct ocfs2_extent_list *el; 5133 struct ocfs2_extent_list *el;
4727 struct ocfs2_path *path; 5134 struct ocfs2_path *path = NULL;
4728 5135
4729 ocfs2_extent_map_trunc(inode, 0); 5136 ocfs2_extent_map_trunc(inode, 0);
4730 5137
4731 path = ocfs2_new_inode_path(di_bh); 5138 path = ocfs2_new_path(et->et_root_bh, et->et_root_el);
4732 if (!path) { 5139 if (!path) {
4733 ret = -ENOMEM; 5140 ret = -ENOMEM;
4734 mlog_errno(ret); 5141 mlog_errno(ret);
@@ -4781,13 +5188,13 @@ int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
4781 5188
4782 if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) { 5189 if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) {
4783 ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, 5190 ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc,
4784 cpos, len); 5191 cpos, len, et);
4785 if (ret) { 5192 if (ret) {
4786 mlog_errno(ret); 5193 mlog_errno(ret);
4787 goto out; 5194 goto out;
4788 } 5195 }
4789 } else { 5196 } else {
4790 ret = ocfs2_split_tree(inode, di_bh, handle, path, index, 5197 ret = ocfs2_split_tree(inode, et, handle, path, index,
4791 trunc_range, meta_ac); 5198 trunc_range, meta_ac);
4792 if (ret) { 5199 if (ret) {
4793 mlog_errno(ret); 5200 mlog_errno(ret);
@@ -4836,7 +5243,7 @@ int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh,
4836 } 5243 }
4837 5244
4838 ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, 5245 ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc,
4839 cpos, len); 5246 cpos, len, et);
4840 if (ret) { 5247 if (ret) {
4841 mlog_errno(ret); 5248 mlog_errno(ret);
4842 goto out; 5249 goto out;
@@ -5179,8 +5586,7 @@ static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb,
5179 goto bail; 5586 goto bail;
5180 } 5587 }
5181 5588
5182 status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, 5589 status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh);
5183 OCFS2_BH_CACHED, inode);
5184 if (status < 0) { 5590 if (status < 0) {
5185 iput(inode); 5591 iput(inode);
5186 mlog_errno(status); 5592 mlog_errno(status);
@@ -5255,8 +5661,7 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
5255bail: 5661bail:
5256 if (tl_inode) 5662 if (tl_inode)
5257 iput(tl_inode); 5663 iput(tl_inode);
5258 if (tl_bh) 5664 brelse(tl_bh);
5259 brelse(tl_bh);
5260 5665
5261 if (status < 0 && (*tl_copy)) { 5666 if (status < 0 && (*tl_copy)) {
5262 kfree(*tl_copy); 5667 kfree(*tl_copy);
@@ -5999,20 +6404,13 @@ bail:
5999 return status; 6404 return status;
6000} 6405}
6001 6406
6002static int ocfs2_writeback_zero_func(handle_t *handle, struct buffer_head *bh) 6407static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh)
6003{ 6408{
6004 set_buffer_uptodate(bh); 6409 set_buffer_uptodate(bh);
6005 mark_buffer_dirty(bh); 6410 mark_buffer_dirty(bh);
6006 return 0; 6411 return 0;
6007} 6412}
6008 6413
6009static int ocfs2_ordered_zero_func(handle_t *handle, struct buffer_head *bh)
6010{
6011 set_buffer_uptodate(bh);
6012 mark_buffer_dirty(bh);
6013 return ocfs2_journal_dirty_data(handle, bh);
6014}
6015
6016static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, 6414static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
6017 unsigned int from, unsigned int to, 6415 unsigned int from, unsigned int to,
6018 struct page *page, int zero, u64 *phys) 6416 struct page *page, int zero, u64 *phys)
@@ -6031,17 +6429,18 @@ static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
6031 * here if they aren't - ocfs2_map_page_blocks() 6429 * here if they aren't - ocfs2_map_page_blocks()
6032 * might've skipped some 6430 * might've skipped some
6033 */ 6431 */
6034 if (ocfs2_should_order_data(inode)) { 6432 ret = walk_page_buffers(handle, page_buffers(page),
6035 ret = walk_page_buffers(handle, 6433 from, to, &partial,
6036 page_buffers(page), 6434 ocfs2_zero_func);
6037 from, to, &partial, 6435 if (ret < 0)
6038 ocfs2_ordered_zero_func); 6436 mlog_errno(ret);
6039 if (ret < 0) 6437 else if (ocfs2_should_order_data(inode)) {
6040 mlog_errno(ret); 6438 ret = ocfs2_jbd2_file_inode(handle, inode);
6041 } else { 6439#ifdef CONFIG_OCFS2_COMPAT_JBD
6042 ret = walk_page_buffers(handle, page_buffers(page), 6440 ret = walk_page_buffers(handle, page_buffers(page),
6043 from, to, &partial, 6441 from, to, &partial,
6044 ocfs2_writeback_zero_func); 6442 ocfs2_journal_dirty_data);
6443#endif
6045 if (ret < 0) 6444 if (ret < 0)
6046 mlog_errno(ret); 6445 mlog_errno(ret);
6047 } 6446 }
@@ -6206,20 +6605,29 @@ out:
6206 return ret; 6605 return ret;
6207} 6606}
6208 6607
6209static void ocfs2_zero_dinode_id2(struct inode *inode, struct ocfs2_dinode *di) 6608static void ocfs2_zero_dinode_id2_with_xattr(struct inode *inode,
6609 struct ocfs2_dinode *di)
6210{ 6610{
6211 unsigned int blocksize = 1 << inode->i_sb->s_blocksize_bits; 6611 unsigned int blocksize = 1 << inode->i_sb->s_blocksize_bits;
6612 unsigned int xattrsize = le16_to_cpu(di->i_xattr_inline_size);
6212 6613
6213 memset(&di->id2, 0, blocksize - offsetof(struct ocfs2_dinode, id2)); 6614 if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_XATTR_FL)
6615 memset(&di->id2, 0, blocksize -
6616 offsetof(struct ocfs2_dinode, id2) -
6617 xattrsize);
6618 else
6619 memset(&di->id2, 0, blocksize -
6620 offsetof(struct ocfs2_dinode, id2));
6214} 6621}
6215 6622
6216void ocfs2_dinode_new_extent_list(struct inode *inode, 6623void ocfs2_dinode_new_extent_list(struct inode *inode,
6217 struct ocfs2_dinode *di) 6624 struct ocfs2_dinode *di)
6218{ 6625{
6219 ocfs2_zero_dinode_id2(inode, di); 6626 ocfs2_zero_dinode_id2_with_xattr(inode, di);
6220 di->id2.i_list.l_tree_depth = 0; 6627 di->id2.i_list.l_tree_depth = 0;
6221 di->id2.i_list.l_next_free_rec = 0; 6628 di->id2.i_list.l_next_free_rec = 0;
6222 di->id2.i_list.l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(inode->i_sb)); 6629 di->id2.i_list.l_count = cpu_to_le16(
6630 ocfs2_extent_recs_per_inode_with_xattr(inode->i_sb, di));
6223} 6631}
6224 6632
6225void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di) 6633void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di)
@@ -6236,9 +6644,10 @@ void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di)
6236 * We clear the entire i_data structure here so that all 6644 * We clear the entire i_data structure here so that all
6237 * fields can be properly initialized. 6645 * fields can be properly initialized.
6238 */ 6646 */
6239 ocfs2_zero_dinode_id2(inode, di); 6647 ocfs2_zero_dinode_id2_with_xattr(inode, di);
6240 6648
6241 idata->id_count = cpu_to_le16(ocfs2_max_inline_data(inode->i_sb)); 6649 idata->id_count = cpu_to_le16(
6650 ocfs2_max_inline_data_with_xattr(inode->i_sb, di));
6242} 6651}
6243 6652
6244int ocfs2_convert_inline_data_to_extents(struct inode *inode, 6653int ocfs2_convert_inline_data_to_extents(struct inode *inode,
@@ -6253,6 +6662,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
6253 struct ocfs2_alloc_context *data_ac = NULL; 6662 struct ocfs2_alloc_context *data_ac = NULL;
6254 struct page **pages = NULL; 6663 struct page **pages = NULL;
6255 loff_t end = osb->s_clustersize; 6664 loff_t end = osb->s_clustersize;
6665 struct ocfs2_extent_tree et;
6256 6666
6257 has_data = i_size_read(inode) ? 1 : 0; 6667 has_data = i_size_read(inode) ? 1 : 0;
6258 6668
@@ -6352,7 +6762,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
6352 * this proves to be false, we could always re-build 6762 * this proves to be false, we could always re-build
6353 * the in-inode data from our pages. 6763 * the in-inode data from our pages.
6354 */ 6764 */
6355 ret = ocfs2_insert_extent(osb, handle, inode, di_bh, 6765 ocfs2_init_dinode_extent_tree(&et, inode, di_bh);
6766 ret = ocfs2_insert_extent(osb, handle, inode, &et,
6356 0, block, 1, 0, NULL); 6767 0, block, 1, 0, NULL);
6357 if (ret) { 6768 if (ret) {
6358 mlog_errno(ret); 6769 mlog_errno(ret);
@@ -6395,13 +6806,14 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
6395 handle_t *handle = NULL; 6806 handle_t *handle = NULL;
6396 struct inode *tl_inode = osb->osb_tl_inode; 6807 struct inode *tl_inode = osb->osb_tl_inode;
6397 struct ocfs2_path *path = NULL; 6808 struct ocfs2_path *path = NULL;
6809 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
6398 6810
6399 mlog_entry_void(); 6811 mlog_entry_void();
6400 6812
6401 new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, 6813 new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb,
6402 i_size_read(inode)); 6814 i_size_read(inode));
6403 6815
6404 path = ocfs2_new_inode_path(fe_bh); 6816 path = ocfs2_new_path(fe_bh, &di->id2.i_list);
6405 if (!path) { 6817 if (!path) {
6406 status = -ENOMEM; 6818 status = -ENOMEM;
6407 mlog_errno(status); 6819 mlog_errno(status);
@@ -6572,8 +6984,8 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
6572 ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc); 6984 ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc);
6573 6985
6574 if (fe->id2.i_list.l_tree_depth) { 6986 if (fe->id2.i_list.l_tree_depth) {
6575 status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), 6987 status = ocfs2_read_block(inode, le64_to_cpu(fe->i_last_eb_blk),
6576 &last_eb_bh, OCFS2_BH_CACHED, inode); 6988 &last_eb_bh);
6577 if (status < 0) { 6989 if (status < 0) {
6578 mlog_errno(status); 6990 mlog_errno(status);
6579 goto bail; 6991 goto bail;
@@ -6686,8 +7098,7 @@ static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc)
6686 mlog(ML_NOTICE, 7098 mlog(ML_NOTICE,
6687 "Truncate completion has non-empty dealloc context\n"); 7099 "Truncate completion has non-empty dealloc context\n");
6688 7100
6689 if (tc->tc_last_eb_bh) 7101 brelse(tc->tc_last_eb_bh);
6690 brelse(tc->tc_last_eb_bh);
6691 7102
6692 kfree(tc); 7103 kfree(tc);
6693} 7104}
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 60cd3d59230c..70257c84cfbe 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -26,30 +26,102 @@
26#ifndef OCFS2_ALLOC_H 26#ifndef OCFS2_ALLOC_H
27#define OCFS2_ALLOC_H 27#define OCFS2_ALLOC_H
28 28
29
30/*
31 * For xattr tree leaf, we limit the leaf byte size to be 64K.
32 */
33#define OCFS2_MAX_XATTR_TREE_LEAF_SIZE 65536
34
35/*
36 * ocfs2_extent_tree and ocfs2_extent_tree_operations are used to abstract
37 * the b-tree operations in ocfs2. Now all the b-tree operations are not
38 * limited to ocfs2_dinode only. Any data which need to allocate clusters
39 * to store can use b-tree. And it only needs to implement its ocfs2_extent_tree
40 * and operation.
41 *
42 * ocfs2_extent_tree becomes the first-class object for extent tree
43 * manipulation. Callers of the alloc.c code need to fill it via one of
44 * the ocfs2_init_*_extent_tree() operations below.
45 *
46 * ocfs2_extent_tree contains info for the root of the b-tree, it must have a
47 * root ocfs2_extent_list and a root_bh so that they can be used in the b-tree
48 * functions.
49 * ocfs2_extent_tree_operations abstract the normal operations we do for
50 * the root of extent b-tree.
51 */
52struct ocfs2_extent_tree_operations;
53struct ocfs2_extent_tree {
54 struct ocfs2_extent_tree_operations *et_ops;
55 struct buffer_head *et_root_bh;
56 struct ocfs2_extent_list *et_root_el;
57 void *et_object;
58 unsigned int et_max_leaf_clusters;
59};
60
61/*
62 * ocfs2_init_*_extent_tree() will fill an ocfs2_extent_tree from the
63 * specified object buffer.
64 */
65void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
66 struct inode *inode,
67 struct buffer_head *bh);
68void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
69 struct inode *inode,
70 struct buffer_head *bh);
71void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
72 struct inode *inode,
73 struct buffer_head *bh,
74 struct ocfs2_xattr_value_root *xv);
75
29struct ocfs2_alloc_context; 76struct ocfs2_alloc_context;
30int ocfs2_insert_extent(struct ocfs2_super *osb, 77int ocfs2_insert_extent(struct ocfs2_super *osb,
31 handle_t *handle, 78 handle_t *handle,
32 struct inode *inode, 79 struct inode *inode,
33 struct buffer_head *fe_bh, 80 struct ocfs2_extent_tree *et,
34 u32 cpos, 81 u32 cpos,
35 u64 start_blk, 82 u64 start_blk,
36 u32 new_clusters, 83 u32 new_clusters,
37 u8 flags, 84 u8 flags,
38 struct ocfs2_alloc_context *meta_ac); 85 struct ocfs2_alloc_context *meta_ac);
86
87enum ocfs2_alloc_restarted {
88 RESTART_NONE = 0,
89 RESTART_TRANS,
90 RESTART_META
91};
92int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
93 struct inode *inode,
94 u32 *logical_offset,
95 u32 clusters_to_add,
96 int mark_unwritten,
97 struct ocfs2_extent_tree *et,
98 handle_t *handle,
99 struct ocfs2_alloc_context *data_ac,
100 struct ocfs2_alloc_context *meta_ac,
101 enum ocfs2_alloc_restarted *reason_ret);
39struct ocfs2_cached_dealloc_ctxt; 102struct ocfs2_cached_dealloc_ctxt;
40int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, 103int ocfs2_mark_extent_written(struct inode *inode,
104 struct ocfs2_extent_tree *et,
41 handle_t *handle, u32 cpos, u32 len, u32 phys, 105 handle_t *handle, u32 cpos, u32 len, u32 phys,
42 struct ocfs2_alloc_context *meta_ac, 106 struct ocfs2_alloc_context *meta_ac,
43 struct ocfs2_cached_dealloc_ctxt *dealloc); 107 struct ocfs2_cached_dealloc_ctxt *dealloc);
44int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, 108int ocfs2_remove_extent(struct inode *inode,
109 struct ocfs2_extent_tree *et,
45 u32 cpos, u32 len, handle_t *handle, 110 u32 cpos, u32 len, handle_t *handle,
46 struct ocfs2_alloc_context *meta_ac, 111 struct ocfs2_alloc_context *meta_ac,
47 struct ocfs2_cached_dealloc_ctxt *dealloc); 112 struct ocfs2_cached_dealloc_ctxt *dealloc);
48int ocfs2_num_free_extents(struct ocfs2_super *osb, 113int ocfs2_num_free_extents(struct ocfs2_super *osb,
49 struct inode *inode, 114 struct inode *inode,
50 struct ocfs2_dinode *fe); 115 struct ocfs2_extent_tree *et);
51/* how many new metadata chunks would an allocation need at maximum? */ 116
52static inline int ocfs2_extend_meta_needed(struct ocfs2_dinode *fe) 117/*
118 * how many new metadata chunks would an allocation need at maximum?
119 *
120 * Please note that the caller must make sure that root_el is the root
121 * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise
122 * the result may be wrong.
123 */
124static inline int ocfs2_extend_meta_needed(struct ocfs2_extent_list *root_el)
53{ 125{
54 /* 126 /*
55 * Rather than do all the work of determining how much we need 127 * Rather than do all the work of determining how much we need
@@ -59,7 +131,7 @@ static inline int ocfs2_extend_meta_needed(struct ocfs2_dinode *fe)
59 * new tree_depth==0 extent_block, and one block at the new 131 * new tree_depth==0 extent_block, and one block at the new
60 * top-of-the tree. 132 * top-of-the tree.
61 */ 133 */
62 return le16_to_cpu(fe->id2.i_list.l_tree_depth) + 2; 134 return le16_to_cpu(root_el->l_tree_depth) + 2;
63} 135}
64 136
65void ocfs2_dinode_new_extent_list(struct inode *inode, struct ocfs2_dinode *di); 137void ocfs2_dinode_new_extent_list(struct inode *inode, struct ocfs2_dinode *di);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index a53da1466277..c22543b33420 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -68,9 +68,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
68 goto bail; 68 goto bail;
69 } 69 }
70 70
71 status = ocfs2_read_block(OCFS2_SB(inode->i_sb), 71 status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh);
72 OCFS2_I(inode)->ip_blkno,
73 &bh, OCFS2_BH_CACHED, inode);
74 if (status < 0) { 72 if (status < 0) {
75 mlog_errno(status); 73 mlog_errno(status);
76 goto bail; 74 goto bail;
@@ -128,8 +126,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock,
128 err = 0; 126 err = 0;
129 127
130bail: 128bail:
131 if (bh) 129 brelse(bh);
132 brelse(bh);
133 130
134 mlog_exit(err); 131 mlog_exit(err);
135 return err; 132 return err;
@@ -261,13 +258,11 @@ static int ocfs2_readpage_inline(struct inode *inode, struct page *page)
261{ 258{
262 int ret; 259 int ret;
263 struct buffer_head *di_bh = NULL; 260 struct buffer_head *di_bh = NULL;
264 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
265 261
266 BUG_ON(!PageLocked(page)); 262 BUG_ON(!PageLocked(page));
267 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)); 263 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL));
268 264
269 ret = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &di_bh, 265 ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh);
270 OCFS2_BH_CACHED, inode);
271 if (ret) { 266 if (ret) {
272 mlog_errno(ret); 267 mlog_errno(ret);
273 goto out; 268 goto out;
@@ -485,11 +480,14 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
485 } 480 }
486 481
487 if (ocfs2_should_order_data(inode)) { 482 if (ocfs2_should_order_data(inode)) {
483 ret = ocfs2_jbd2_file_inode(handle, inode);
484#ifdef CONFIG_OCFS2_COMPAT_JBD
488 ret = walk_page_buffers(handle, 485 ret = walk_page_buffers(handle,
489 page_buffers(page), 486 page_buffers(page),
490 from, to, NULL, 487 from, to, NULL,
491 ocfs2_journal_dirty_data); 488 ocfs2_journal_dirty_data);
492 if (ret < 0) 489#endif
490 if (ret < 0)
493 mlog_errno(ret); 491 mlog_errno(ret);
494 } 492 }
495out: 493out:
@@ -669,7 +667,7 @@ static void ocfs2_invalidatepage(struct page *page, unsigned long offset)
669{ 667{
670 journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; 668 journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal;
671 669
672 journal_invalidatepage(journal, page, offset); 670 jbd2_journal_invalidatepage(journal, page, offset);
673} 671}
674 672
675static int ocfs2_releasepage(struct page *page, gfp_t wait) 673static int ocfs2_releasepage(struct page *page, gfp_t wait)
@@ -678,7 +676,7 @@ static int ocfs2_releasepage(struct page *page, gfp_t wait)
678 676
679 if (!page_has_buffers(page)) 677 if (!page_has_buffers(page))
680 return 0; 678 return 0;
681 return journal_try_to_free_buffers(journal, page, wait); 679 return jbd2_journal_try_to_free_buffers(journal, page, wait);
682} 680}
683 681
684static ssize_t ocfs2_direct_IO(int rw, 682static ssize_t ocfs2_direct_IO(int rw,
@@ -1074,11 +1072,15 @@ static void ocfs2_write_failure(struct inode *inode,
1074 tmppage = wc->w_pages[i]; 1072 tmppage = wc->w_pages[i];
1075 1073
1076 if (page_has_buffers(tmppage)) { 1074 if (page_has_buffers(tmppage)) {
1077 if (ocfs2_should_order_data(inode)) 1075 if (ocfs2_should_order_data(inode)) {
1076 ocfs2_jbd2_file_inode(wc->w_handle, inode);
1077#ifdef CONFIG_OCFS2_COMPAT_JBD
1078 walk_page_buffers(wc->w_handle, 1078 walk_page_buffers(wc->w_handle,
1079 page_buffers(tmppage), 1079 page_buffers(tmppage),
1080 from, to, NULL, 1080 from, to, NULL,
1081 ocfs2_journal_dirty_data); 1081 ocfs2_journal_dirty_data);
1082#endif
1083 }
1082 1084
1083 block_commit_write(tmppage, from, to); 1085 block_commit_write(tmppage, from, to);
1084 } 1086 }
@@ -1242,6 +1244,7 @@ static int ocfs2_write_cluster(struct address_space *mapping,
1242 int ret, i, new, should_zero = 0; 1244 int ret, i, new, should_zero = 0;
1243 u64 v_blkno, p_blkno; 1245 u64 v_blkno, p_blkno;
1244 struct inode *inode = mapping->host; 1246 struct inode *inode = mapping->host;
1247 struct ocfs2_extent_tree et;
1245 1248
1246 new = phys == 0 ? 1 : 0; 1249 new = phys == 0 ? 1 : 0;
1247 if (new || unwritten) 1250 if (new || unwritten)
@@ -1255,10 +1258,10 @@ static int ocfs2_write_cluster(struct address_space *mapping,
1255 * any additional semaphores or cluster locks. 1258 * any additional semaphores or cluster locks.
1256 */ 1259 */
1257 tmp_pos = cpos; 1260 tmp_pos = cpos;
1258 ret = ocfs2_do_extend_allocation(OCFS2_SB(inode->i_sb), inode, 1261 ret = ocfs2_add_inode_data(OCFS2_SB(inode->i_sb), inode,
1259 &tmp_pos, 1, 0, wc->w_di_bh, 1262 &tmp_pos, 1, 0, wc->w_di_bh,
1260 wc->w_handle, data_ac, 1263 wc->w_handle, data_ac,
1261 meta_ac, NULL); 1264 meta_ac, NULL);
1262 /* 1265 /*
1263 * This shouldn't happen because we must have already 1266 * This shouldn't happen because we must have already
1264 * calculated the correct meta data allocation required. The 1267 * calculated the correct meta data allocation required. The
@@ -1276,7 +1279,8 @@ static int ocfs2_write_cluster(struct address_space *mapping,
1276 goto out; 1279 goto out;
1277 } 1280 }
1278 } else if (unwritten) { 1281 } else if (unwritten) {
1279 ret = ocfs2_mark_extent_written(inode, wc->w_di_bh, 1282 ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh);
1283 ret = ocfs2_mark_extent_written(inode, &et,
1280 wc->w_handle, cpos, 1, phys, 1284 wc->w_handle, cpos, 1, phys,
1281 meta_ac, &wc->w_dealloc); 1285 meta_ac, &wc->w_dealloc);
1282 if (ret < 0) { 1286 if (ret < 0) {
@@ -1665,6 +1669,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1665 struct ocfs2_alloc_context *data_ac = NULL; 1669 struct ocfs2_alloc_context *data_ac = NULL;
1666 struct ocfs2_alloc_context *meta_ac = NULL; 1670 struct ocfs2_alloc_context *meta_ac = NULL;
1667 handle_t *handle; 1671 handle_t *handle;
1672 struct ocfs2_extent_tree et;
1668 1673
1669 ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh); 1674 ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh);
1670 if (ret) { 1675 if (ret) {
@@ -1712,14 +1717,23 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1712 * ocfs2_lock_allocators(). It greatly over-estimates 1717 * ocfs2_lock_allocators(). It greatly over-estimates
1713 * the work to be done. 1718 * the work to be done.
1714 */ 1719 */
1715 ret = ocfs2_lock_allocators(inode, di, clusters_to_alloc, 1720 mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u,"
1716 extents_to_split, &data_ac, &meta_ac); 1721 " clusters_to_add = %u, extents_to_split = %u\n",
1722 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1723 (long long)i_size_read(inode), le32_to_cpu(di->i_clusters),
1724 clusters_to_alloc, extents_to_split);
1725
1726 ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh);
1727 ret = ocfs2_lock_allocators(inode, &et,
1728 clusters_to_alloc, extents_to_split,
1729 &data_ac, &meta_ac);
1717 if (ret) { 1730 if (ret) {
1718 mlog_errno(ret); 1731 mlog_errno(ret);
1719 goto out; 1732 goto out;
1720 } 1733 }
1721 1734
1722 credits = ocfs2_calc_extend_credits(inode->i_sb, di, 1735 credits = ocfs2_calc_extend_credits(inode->i_sb,
1736 &di->id2.i_list,
1723 clusters_to_alloc); 1737 clusters_to_alloc);
1724 1738
1725 } 1739 }
@@ -1905,11 +1919,15 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
1905 } 1919 }
1906 1920
1907 if (page_has_buffers(tmppage)) { 1921 if (page_has_buffers(tmppage)) {
1908 if (ocfs2_should_order_data(inode)) 1922 if (ocfs2_should_order_data(inode)) {
1923 ocfs2_jbd2_file_inode(wc->w_handle, inode);
1924#ifdef CONFIG_OCFS2_COMPAT_JBD
1909 walk_page_buffers(wc->w_handle, 1925 walk_page_buffers(wc->w_handle,
1910 page_buffers(tmppage), 1926 page_buffers(tmppage),
1911 from, to, NULL, 1927 from, to, NULL,
1912 ocfs2_journal_dirty_data); 1928 ocfs2_journal_dirty_data);
1929#endif
1930 }
1913 block_commit_write(tmppage, from, to); 1931 block_commit_write(tmppage, from, to);
1914 } 1932 }
1915 } 1933 }
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index f136639f5b41..7e947c672469 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -66,7 +66,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
66 /* remove from dirty list before I/O. */ 66 /* remove from dirty list before I/O. */
67 clear_buffer_dirty(bh); 67 clear_buffer_dirty(bh);
68 68
69 get_bh(bh); /* for end_buffer_write_sync() */ 69 get_bh(bh); /* for end_buffer_write_sync() */
70 bh->b_end_io = end_buffer_write_sync; 70 bh->b_end_io = end_buffer_write_sync;
71 submit_bh(WRITE, bh); 71 submit_bh(WRITE, bh);
72 72
@@ -88,22 +88,103 @@ out:
88 return ret; 88 return ret;
89} 89}
90 90
91int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, 91int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
92 struct buffer_head *bhs[], int flags, 92 unsigned int nr, struct buffer_head *bhs[])
93 struct inode *inode) 93{
94 int status = 0;
95 unsigned int i;
96 struct buffer_head *bh;
97
98 if (!nr) {
99 mlog(ML_BH_IO, "No buffers will be read!\n");
100 goto bail;
101 }
102
103 for (i = 0 ; i < nr ; i++) {
104 if (bhs[i] == NULL) {
105 bhs[i] = sb_getblk(osb->sb, block++);
106 if (bhs[i] == NULL) {
107 status = -EIO;
108 mlog_errno(status);
109 goto bail;
110 }
111 }
112 bh = bhs[i];
113
114 if (buffer_jbd(bh)) {
115 mlog(ML_ERROR,
116 "trying to sync read a jbd "
117 "managed bh (blocknr = %llu), skipping\n",
118 (unsigned long long)bh->b_blocknr);
119 continue;
120 }
121
122 if (buffer_dirty(bh)) {
123 /* This should probably be a BUG, or
124 * at least return an error. */
125 mlog(ML_ERROR,
126 "trying to sync read a dirty "
127 "buffer! (blocknr = %llu), skipping\n",
128 (unsigned long long)bh->b_blocknr);
129 continue;
130 }
131
132 lock_buffer(bh);
133 if (buffer_jbd(bh)) {
134 mlog(ML_ERROR,
135 "block %llu had the JBD bit set "
136 "while I was in lock_buffer!",
137 (unsigned long long)bh->b_blocknr);
138 BUG();
139 }
140
141 clear_buffer_uptodate(bh);
142 get_bh(bh); /* for end_buffer_read_sync() */
143 bh->b_end_io = end_buffer_read_sync;
144 submit_bh(READ, bh);
145 }
146
147 for (i = nr; i > 0; i--) {
148 bh = bhs[i - 1];
149
150 if (buffer_jbd(bh)) {
151 mlog(ML_ERROR,
152 "the journal got the buffer while it was "
153 "locked for io! (blocknr = %llu)\n",
154 (unsigned long long)bh->b_blocknr);
155 BUG();
156 }
157
158 wait_on_buffer(bh);
159 if (!buffer_uptodate(bh)) {
160 /* Status won't be cleared from here on out,
161 * so we can safely record this and loop back
162 * to cleanup the other buffers. */
163 status = -EIO;
164 put_bh(bh);
165 bhs[i - 1] = NULL;
166 }
167 }
168
169bail:
170 return status;
171}
172
173int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
174 struct buffer_head *bhs[], int flags)
94{ 175{
95 int status = 0; 176 int status = 0;
96 struct super_block *sb;
97 int i, ignore_cache = 0; 177 int i, ignore_cache = 0;
98 struct buffer_head *bh; 178 struct buffer_head *bh;
99 179
100 mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n", 180 mlog_entry("(inode=%p, block=(%llu), nr=(%d), flags=%d)\n",
101 (unsigned long long)block, nr, flags, inode); 181 inode, (unsigned long long)block, nr, flags);
102 182
183 BUG_ON(!inode);
103 BUG_ON((flags & OCFS2_BH_READAHEAD) && 184 BUG_ON((flags & OCFS2_BH_READAHEAD) &&
104 (!inode || !(flags & OCFS2_BH_CACHED))); 185 (flags & OCFS2_BH_IGNORE_CACHE));
105 186
106 if (osb == NULL || osb->sb == NULL || bhs == NULL) { 187 if (bhs == NULL) {
107 status = -EINVAL; 188 status = -EINVAL;
108 mlog_errno(status); 189 mlog_errno(status);
109 goto bail; 190 goto bail;
@@ -122,26 +203,19 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
122 goto bail; 203 goto bail;
123 } 204 }
124 205
125 sb = osb->sb; 206 mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
126
127 if (flags & OCFS2_BH_CACHED && !inode)
128 flags &= ~OCFS2_BH_CACHED;
129
130 if (inode)
131 mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
132 for (i = 0 ; i < nr ; i++) { 207 for (i = 0 ; i < nr ; i++) {
133 if (bhs[i] == NULL) { 208 if (bhs[i] == NULL) {
134 bhs[i] = sb_getblk(sb, block++); 209 bhs[i] = sb_getblk(inode->i_sb, block++);
135 if (bhs[i] == NULL) { 210 if (bhs[i] == NULL) {
136 if (inode) 211 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
137 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
138 status = -EIO; 212 status = -EIO;
139 mlog_errno(status); 213 mlog_errno(status);
140 goto bail; 214 goto bail;
141 } 215 }
142 } 216 }
143 bh = bhs[i]; 217 bh = bhs[i];
144 ignore_cache = 0; 218 ignore_cache = (flags & OCFS2_BH_IGNORE_CACHE);
145 219
146 /* There are three read-ahead cases here which we need to 220 /* There are three read-ahead cases here which we need to
147 * be concerned with. All three assume a buffer has 221 * be concerned with. All three assume a buffer has
@@ -167,26 +241,27 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
167 * before our is-it-in-flight check. 241 * before our is-it-in-flight check.
168 */ 242 */
169 243
170 if (flags & OCFS2_BH_CACHED && 244 if (!ignore_cache && !ocfs2_buffer_uptodate(inode, bh)) {
171 !ocfs2_buffer_uptodate(inode, bh)) {
172 mlog(ML_UPTODATE, 245 mlog(ML_UPTODATE,
173 "bh (%llu), inode %llu not uptodate\n", 246 "bh (%llu), inode %llu not uptodate\n",
174 (unsigned long long)bh->b_blocknr, 247 (unsigned long long)bh->b_blocknr,
175 (unsigned long long)OCFS2_I(inode)->ip_blkno); 248 (unsigned long long)OCFS2_I(inode)->ip_blkno);
249 /* We're using ignore_cache here to say
250 * "go to disk" */
176 ignore_cache = 1; 251 ignore_cache = 1;
177 } 252 }
178 253
179 /* XXX: Can we ever get this and *not* have the cached 254 /* XXX: Can we ever get this and *not* have the cached
180 * flag set? */ 255 * flag set? */
181 if (buffer_jbd(bh)) { 256 if (buffer_jbd(bh)) {
182 if (!(flags & OCFS2_BH_CACHED) || ignore_cache) 257 if (ignore_cache)
183 mlog(ML_BH_IO, "trying to sync read a jbd " 258 mlog(ML_BH_IO, "trying to sync read a jbd "
184 "managed bh (blocknr = %llu)\n", 259 "managed bh (blocknr = %llu)\n",
185 (unsigned long long)bh->b_blocknr); 260 (unsigned long long)bh->b_blocknr);
186 continue; 261 continue;
187 } 262 }
188 263
189 if (!(flags & OCFS2_BH_CACHED) || ignore_cache) { 264 if (ignore_cache) {
190 if (buffer_dirty(bh)) { 265 if (buffer_dirty(bh)) {
191 /* This should probably be a BUG, or 266 /* This should probably be a BUG, or
192 * at least return an error. */ 267 * at least return an error. */
@@ -221,7 +296,7 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
221 * previously read-ahead buffer may have 296 * previously read-ahead buffer may have
222 * completed I/O while we were waiting for the 297 * completed I/O while we were waiting for the
223 * buffer lock. */ 298 * buffer lock. */
224 if ((flags & OCFS2_BH_CACHED) 299 if (!(flags & OCFS2_BH_IGNORE_CACHE)
225 && !(flags & OCFS2_BH_READAHEAD) 300 && !(flags & OCFS2_BH_READAHEAD)
226 && ocfs2_buffer_uptodate(inode, bh)) { 301 && ocfs2_buffer_uptodate(inode, bh)) {
227 unlock_buffer(bh); 302 unlock_buffer(bh);
@@ -265,15 +340,14 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr,
265 /* Always set the buffer in the cache, even if it was 340 /* Always set the buffer in the cache, even if it was
266 * a forced read, or read-ahead which hasn't yet 341 * a forced read, or read-ahead which hasn't yet
267 * completed. */ 342 * completed. */
268 if (inode) 343 ocfs2_set_buffer_uptodate(inode, bh);
269 ocfs2_set_buffer_uptodate(inode, bh);
270 } 344 }
271 if (inode) 345 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
272 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
273 346
274 mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", 347 mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n",
275 (unsigned long long)block, nr, 348 (unsigned long long)block, nr,
276 (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes", flags); 349 ((flags & OCFS2_BH_IGNORE_CACHE) || ignore_cache) ? "no" : "yes",
350 flags);
277 351
278bail: 352bail:
279 353
diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h
index c2e78614c3e5..75e1dcb1ade7 100644
--- a/fs/ocfs2/buffer_head_io.h
+++ b/fs/ocfs2/buffer_head_io.h
@@ -31,31 +31,29 @@
31void ocfs2_end_buffer_io_sync(struct buffer_head *bh, 31void ocfs2_end_buffer_io_sync(struct buffer_head *bh,
32 int uptodate); 32 int uptodate);
33 33
34static inline int ocfs2_read_block(struct ocfs2_super *osb, 34static inline int ocfs2_read_block(struct inode *inode,
35 u64 off, 35 u64 off,
36 struct buffer_head **bh, 36 struct buffer_head **bh);
37 int flags,
38 struct inode *inode);
39 37
40int ocfs2_write_block(struct ocfs2_super *osb, 38int ocfs2_write_block(struct ocfs2_super *osb,
41 struct buffer_head *bh, 39 struct buffer_head *bh,
42 struct inode *inode); 40 struct inode *inode);
43int ocfs2_read_blocks(struct ocfs2_super *osb, 41int ocfs2_read_blocks(struct inode *inode,
44 u64 block, 42 u64 block,
45 int nr, 43 int nr,
46 struct buffer_head *bhs[], 44 struct buffer_head *bhs[],
47 int flags, 45 int flags);
48 struct inode *inode); 46int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
47 unsigned int nr, struct buffer_head *bhs[]);
49 48
50int ocfs2_write_super_or_backup(struct ocfs2_super *osb, 49int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
51 struct buffer_head *bh); 50 struct buffer_head *bh);
52 51
53#define OCFS2_BH_CACHED 1 52#define OCFS2_BH_IGNORE_CACHE 1
54#define OCFS2_BH_READAHEAD 8 53#define OCFS2_BH_READAHEAD 8
55 54
56static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off, 55static inline int ocfs2_read_block(struct inode *inode, u64 off,
57 struct buffer_head **bh, int flags, 56 struct buffer_head **bh)
58 struct inode *inode)
59{ 57{
60 int status = 0; 58 int status = 0;
61 59
@@ -65,8 +63,7 @@ static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off,
65 goto bail; 63 goto bail;
66 } 64 }
67 65
68 status = ocfs2_read_blocks(osb, off, 1, bh, 66 status = ocfs2_read_blocks(inode, off, 1, bh, 0);
69 flags, inode);
70 67
71bail: 68bail:
72 return status; 69 return status;
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index 23c732f27529..d8a0cb92cef6 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -109,6 +109,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
109 define_mask(CONN), 109 define_mask(CONN),
110 define_mask(QUORUM), 110 define_mask(QUORUM),
111 define_mask(EXPORT), 111 define_mask(EXPORT),
112 define_mask(XATTR),
112 define_mask(ERROR), 113 define_mask(ERROR),
113 define_mask(NOTICE), 114 define_mask(NOTICE),
114 define_mask(KTHREAD), 115 define_mask(KTHREAD),
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 597e064bb94f..57670c680471 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -112,6 +112,7 @@
112#define ML_CONN 0x0000000004000000ULL /* net connection management */ 112#define ML_CONN 0x0000000004000000ULL /* net connection management */
113#define ML_QUORUM 0x0000000008000000ULL /* net connection quorum */ 113#define ML_QUORUM 0x0000000008000000ULL /* net connection quorum */
114#define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */ 114#define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */
115#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */
115/* bits that are infrequently given and frequently matched in the high word */ 116/* bits that are infrequently given and frequently matched in the high word */
116#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ 117#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */
117#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ 118#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 9cce563fd627..026e6eb85187 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -82,6 +82,49 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
82 struct ocfs2_alloc_context *meta_ac, 82 struct ocfs2_alloc_context *meta_ac,
83 struct buffer_head **new_bh); 83 struct buffer_head **new_bh);
84 84
85static struct buffer_head *ocfs2_bread(struct inode *inode,
86 int block, int *err, int reada)
87{
88 struct buffer_head *bh = NULL;
89 int tmperr;
90 u64 p_blkno;
91 int readflags = 0;
92
93 if (reada)
94 readflags |= OCFS2_BH_READAHEAD;
95
96 if (((u64)block << inode->i_sb->s_blocksize_bits) >=
97 i_size_read(inode)) {
98 BUG_ON(!reada);
99 return NULL;
100 }
101
102 down_read(&OCFS2_I(inode)->ip_alloc_sem);
103 tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL,
104 NULL);
105 up_read(&OCFS2_I(inode)->ip_alloc_sem);
106 if (tmperr < 0) {
107 mlog_errno(tmperr);
108 goto fail;
109 }
110
111 tmperr = ocfs2_read_blocks(inode, p_blkno, 1, &bh, readflags);
112 if (tmperr < 0)
113 goto fail;
114
115 tmperr = 0;
116
117 *err = 0;
118 return bh;
119
120fail:
121 brelse(bh);
122 bh = NULL;
123
124 *err = -EIO;
125 return NULL;
126}
127
85/* 128/*
86 * bh passed here can be an inode block or a dir data block, depending 129 * bh passed here can be an inode block or a dir data block, depending
87 * on the inode inline data flag. 130 * on the inode inline data flag.
@@ -188,8 +231,7 @@ static struct buffer_head *ocfs2_find_entry_id(const char *name,
188 struct ocfs2_dinode *di; 231 struct ocfs2_dinode *di;
189 struct ocfs2_inline_data *data; 232 struct ocfs2_inline_data *data;
190 233
191 ret = ocfs2_read_block(OCFS2_SB(dir->i_sb), OCFS2_I(dir)->ip_blkno, 234 ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh);
192 &di_bh, OCFS2_BH_CACHED, dir);
193 if (ret) { 235 if (ret) {
194 mlog_errno(ret); 236 mlog_errno(ret);
195 goto out; 237 goto out;
@@ -260,14 +302,13 @@ restart:
260 } 302 }
261 if ((bh = bh_use[ra_ptr++]) == NULL) 303 if ((bh = bh_use[ra_ptr++]) == NULL)
262 goto next; 304 goto next;
263 wait_on_buffer(bh); 305 if (ocfs2_read_block(dir, block, &bh)) {
264 if (!buffer_uptodate(bh)) { 306 /* read error, skip block & hope for the best.
265 /* read error, skip block & hope for the best */ 307 * ocfs2_read_block() has released the bh. */
266 ocfs2_error(dir->i_sb, "reading directory %llu, " 308 ocfs2_error(dir->i_sb, "reading directory %llu, "
267 "offset %lu\n", 309 "offset %lu\n",
268 (unsigned long long)OCFS2_I(dir)->ip_blkno, 310 (unsigned long long)OCFS2_I(dir)->ip_blkno,
269 block); 311 block);
270 brelse(bh);
271 goto next; 312 goto next;
272 } 313 }
273 i = ocfs2_search_dirblock(bh, dir, name, namelen, 314 i = ocfs2_search_dirblock(bh, dir, name, namelen,
@@ -417,8 +458,7 @@ static inline int ocfs2_delete_entry_id(handle_t *handle,
417 struct ocfs2_dinode *di; 458 struct ocfs2_dinode *di;
418 struct ocfs2_inline_data *data; 459 struct ocfs2_inline_data *data;
419 460
420 ret = ocfs2_read_block(OCFS2_SB(dir->i_sb), OCFS2_I(dir)->ip_blkno, 461 ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh);
421 &di_bh, OCFS2_BH_CACHED, dir);
422 if (ret) { 462 if (ret) {
423 mlog_errno(ret); 463 mlog_errno(ret);
424 goto out; 464 goto out;
@@ -596,8 +636,7 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode,
596 struct ocfs2_inline_data *data; 636 struct ocfs2_inline_data *data;
597 struct ocfs2_dir_entry *de; 637 struct ocfs2_dir_entry *de;
598 638
599 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, 639 ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh);
600 &di_bh, OCFS2_BH_CACHED, inode);
601 if (ret) { 640 if (ret) {
602 mlog(ML_ERROR, "Unable to read inode block for dir %llu\n", 641 mlog(ML_ERROR, "Unable to read inode block for dir %llu\n",
603 (unsigned long long)OCFS2_I(inode)->ip_blkno); 642 (unsigned long long)OCFS2_I(inode)->ip_blkno);
@@ -716,8 +755,7 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
716 for (i = ra_sectors >> (sb->s_blocksize_bits - 9); 755 for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
717 i > 0; i--) { 756 i > 0; i--) {
718 tmp = ocfs2_bread(inode, ++blk, &err, 1); 757 tmp = ocfs2_bread(inode, ++blk, &err, 1);
719 if (tmp) 758 brelse(tmp);
720 brelse(tmp);
721 } 759 }
722 last_ra_blk = blk; 760 last_ra_blk = blk;
723 ra_sectors = 8; 761 ra_sectors = 8;
@@ -899,10 +937,8 @@ int ocfs2_find_files_on_disk(const char *name,
899leave: 937leave:
900 if (status < 0) { 938 if (status < 0) {
901 *dirent = NULL; 939 *dirent = NULL;
902 if (*dirent_bh) { 940 brelse(*dirent_bh);
903 brelse(*dirent_bh); 941 *dirent_bh = NULL;
904 *dirent_bh = NULL;
905 }
906 } 942 }
907 943
908 mlog_exit(status); 944 mlog_exit(status);
@@ -951,8 +987,7 @@ int ocfs2_check_dir_for_entry(struct inode *dir,
951 987
952 ret = 0; 988 ret = 0;
953bail: 989bail:
954 if (dirent_bh) 990 brelse(dirent_bh);
955 brelse(dirent_bh);
956 991
957 mlog_exit(ret); 992 mlog_exit(ret);
958 return ret; 993 return ret;
@@ -1127,8 +1162,7 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
1127 1162
1128 status = 0; 1163 status = 0;
1129bail: 1164bail:
1130 if (new_bh) 1165 brelse(new_bh);
1131 brelse(new_bh);
1132 1166
1133 mlog_exit(status); 1167 mlog_exit(status);
1134 return status; 1168 return status;
@@ -1192,6 +1226,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1192 struct buffer_head *dirdata_bh = NULL; 1226 struct buffer_head *dirdata_bh = NULL;
1193 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 1227 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1194 handle_t *handle; 1228 handle_t *handle;
1229 struct ocfs2_extent_tree et;
1230
1231 ocfs2_init_dinode_extent_tree(&et, dir, di_bh);
1195 1232
1196 alloc = ocfs2_clusters_for_bytes(sb, bytes); 1233 alloc = ocfs2_clusters_for_bytes(sb, bytes);
1197 1234
@@ -1305,8 +1342,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1305 * This should never fail as our extent list is empty and all 1342 * This should never fail as our extent list is empty and all
1306 * related blocks have been journaled already. 1343 * related blocks have been journaled already.
1307 */ 1344 */
1308 ret = ocfs2_insert_extent(osb, handle, dir, di_bh, 0, blkno, len, 0, 1345 ret = ocfs2_insert_extent(osb, handle, dir, &et, 0, blkno, len,
1309 NULL); 1346 0, NULL);
1310 if (ret) { 1347 if (ret) {
1311 mlog_errno(ret); 1348 mlog_errno(ret);
1312 goto out_commit; 1349 goto out_commit;
@@ -1337,8 +1374,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
1337 } 1374 }
1338 blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off); 1375 blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
1339 1376
1340 ret = ocfs2_insert_extent(osb, handle, dir, di_bh, 1, blkno, 1377 ret = ocfs2_insert_extent(osb, handle, dir, &et, 1,
1341 len, 0, NULL); 1378 blkno, len, 0, NULL);
1342 if (ret) { 1379 if (ret) {
1343 mlog_errno(ret); 1380 mlog_errno(ret);
1344 goto out_commit; 1381 goto out_commit;
@@ -1383,9 +1420,9 @@ static int ocfs2_do_extend_dir(struct super_block *sb,
1383 if (extend) { 1420 if (extend) {
1384 u32 offset = OCFS2_I(dir)->ip_clusters; 1421 u32 offset = OCFS2_I(dir)->ip_clusters;
1385 1422
1386 status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, &offset, 1423 status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset,
1387 1, 0, parent_fe_bh, handle, 1424 1, 0, parent_fe_bh, handle,
1388 data_ac, meta_ac, NULL); 1425 data_ac, meta_ac, NULL);
1389 BUG_ON(status == -EAGAIN); 1426 BUG_ON(status == -EAGAIN);
1390 if (status < 0) { 1427 if (status < 0) {
1391 mlog_errno(status); 1428 mlog_errno(status);
@@ -1430,12 +1467,14 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
1430 int credits, num_free_extents, drop_alloc_sem = 0; 1467 int credits, num_free_extents, drop_alloc_sem = 0;
1431 loff_t dir_i_size; 1468 loff_t dir_i_size;
1432 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) parent_fe_bh->b_data; 1469 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
1470 struct ocfs2_extent_list *el = &fe->id2.i_list;
1433 struct ocfs2_alloc_context *data_ac = NULL; 1471 struct ocfs2_alloc_context *data_ac = NULL;
1434 struct ocfs2_alloc_context *meta_ac = NULL; 1472 struct ocfs2_alloc_context *meta_ac = NULL;
1435 handle_t *handle = NULL; 1473 handle_t *handle = NULL;
1436 struct buffer_head *new_bh = NULL; 1474 struct buffer_head *new_bh = NULL;
1437 struct ocfs2_dir_entry * de; 1475 struct ocfs2_dir_entry * de;
1438 struct super_block *sb = osb->sb; 1476 struct super_block *sb = osb->sb;
1477 struct ocfs2_extent_tree et;
1439 1478
1440 mlog_entry_void(); 1479 mlog_entry_void();
1441 1480
@@ -1479,7 +1518,8 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
1479 spin_lock(&OCFS2_I(dir)->ip_lock); 1518 spin_lock(&OCFS2_I(dir)->ip_lock);
1480 if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) { 1519 if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) {
1481 spin_unlock(&OCFS2_I(dir)->ip_lock); 1520 spin_unlock(&OCFS2_I(dir)->ip_lock);
1482 num_free_extents = ocfs2_num_free_extents(osb, dir, fe); 1521 ocfs2_init_dinode_extent_tree(&et, dir, parent_fe_bh);
1522 num_free_extents = ocfs2_num_free_extents(osb, dir, &et);
1483 if (num_free_extents < 0) { 1523 if (num_free_extents < 0) {
1484 status = num_free_extents; 1524 status = num_free_extents;
1485 mlog_errno(status); 1525 mlog_errno(status);
@@ -1487,7 +1527,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
1487 } 1527 }
1488 1528
1489 if (!num_free_extents) { 1529 if (!num_free_extents) {
1490 status = ocfs2_reserve_new_metadata(osb, fe, &meta_ac); 1530 status = ocfs2_reserve_new_metadata(osb, el, &meta_ac);
1491 if (status < 0) { 1531 if (status < 0) {
1492 if (status != -ENOSPC) 1532 if (status != -ENOSPC)
1493 mlog_errno(status); 1533 mlog_errno(status);
@@ -1502,7 +1542,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
1502 goto bail; 1542 goto bail;
1503 } 1543 }
1504 1544
1505 credits = ocfs2_calc_extend_credits(sb, fe, 1); 1545 credits = ocfs2_calc_extend_credits(sb, el, 1);
1506 } else { 1546 } else {
1507 spin_unlock(&OCFS2_I(dir)->ip_lock); 1547 spin_unlock(&OCFS2_I(dir)->ip_lock);
1508 credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS; 1548 credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
@@ -1568,8 +1608,7 @@ bail:
1568 if (meta_ac) 1608 if (meta_ac)
1569 ocfs2_free_alloc_context(meta_ac); 1609 ocfs2_free_alloc_context(meta_ac);
1570 1610
1571 if (new_bh) 1611 brelse(new_bh);
1572 brelse(new_bh);
1573 1612
1574 mlog_exit(status); 1613 mlog_exit(status);
1575 return status; 1614 return status;
@@ -1696,8 +1735,7 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
1696 1735
1697 status = 0; 1736 status = 0;
1698bail: 1737bail:
1699 if (bh) 1738 brelse(bh);
1700 brelse(bh);
1701 1739
1702 mlog_exit(status); 1740 mlog_exit(status);
1703 return status; 1741 return status;
@@ -1756,7 +1794,6 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
1756 *ret_de_bh = bh; 1794 *ret_de_bh = bh;
1757 bh = NULL; 1795 bh = NULL;
1758out: 1796out:
1759 if (bh) 1797 brelse(bh);
1760 brelse(bh);
1761 return ret; 1798 return ret;
1762} 1799}
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index eae3d643a5e4..ec684426034b 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2024,8 +2024,7 @@ static int ocfs2_inode_lock_update(struct inode *inode,
2024 } else { 2024 } else {
2025 /* Boo, we have to go to disk. */ 2025 /* Boo, we have to go to disk. */
2026 /* read bh, cast, ocfs2_refresh_inode */ 2026 /* read bh, cast, ocfs2_refresh_inode */
2027 status = ocfs2_read_block(OCFS2_SB(inode->i_sb), oi->ip_blkno, 2027 status = ocfs2_read_block(inode, oi->ip_blkno, bh);
2028 bh, OCFS2_BH_CACHED, inode);
2029 if (status < 0) { 2028 if (status < 0) {
2030 mlog_errno(status); 2029 mlog_errno(status);
2031 goto bail_refresh; 2030 goto bail_refresh;
@@ -2086,11 +2085,7 @@ static int ocfs2_assign_bh(struct inode *inode,
2086 return 0; 2085 return 0;
2087 } 2086 }
2088 2087
2089 status = ocfs2_read_block(OCFS2_SB(inode->i_sb), 2088 status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, ret_bh);
2090 OCFS2_I(inode)->ip_blkno,
2091 ret_bh,
2092 OCFS2_BH_CACHED,
2093 inode);
2094 if (status < 0) 2089 if (status < 0)
2095 mlog_errno(status); 2090 mlog_errno(status);
2096 2091
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index aed268e80b49..2baedac58234 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -293,8 +293,7 @@ static int ocfs2_last_eb_is_empty(struct inode *inode,
293 struct ocfs2_extent_block *eb; 293 struct ocfs2_extent_block *eb;
294 struct ocfs2_extent_list *el; 294 struct ocfs2_extent_list *el;
295 295
296 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), last_eb_blk, 296 ret = ocfs2_read_block(inode, last_eb_blk, &eb_bh);
297 &eb_bh, OCFS2_BH_CACHED, inode);
298 if (ret) { 297 if (ret) {
299 mlog_errno(ret); 298 mlog_errno(ret);
300 goto out; 299 goto out;
@@ -382,9 +381,9 @@ static int ocfs2_figure_hole_clusters(struct inode *inode,
382 if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL) 381 if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
383 goto no_more_extents; 382 goto no_more_extents;
384 383
385 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), 384 ret = ocfs2_read_block(inode,
386 le64_to_cpu(eb->h_next_leaf_blk), 385 le64_to_cpu(eb->h_next_leaf_blk),
387 &next_eb_bh, OCFS2_BH_CACHED, inode); 386 &next_eb_bh);
388 if (ret) { 387 if (ret) {
389 mlog_errno(ret); 388 mlog_errno(ret);
390 goto out; 389 goto out;
@@ -551,6 +550,66 @@ static void ocfs2_relative_extent_offsets(struct super_block *sb,
551 *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff; 550 *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff;
552} 551}
553 552
553int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
554 u32 *p_cluster, u32 *num_clusters,
555 struct ocfs2_extent_list *el)
556{
557 int ret = 0, i;
558 struct buffer_head *eb_bh = NULL;
559 struct ocfs2_extent_block *eb;
560 struct ocfs2_extent_rec *rec;
561 u32 coff;
562
563 if (el->l_tree_depth) {
564 ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh);
565 if (ret) {
566 mlog_errno(ret);
567 goto out;
568 }
569
570 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
571 el = &eb->h_list;
572
573 if (el->l_tree_depth) {
574 ocfs2_error(inode->i_sb,
575 "Inode %lu has non zero tree depth in "
576 "xattr leaf block %llu\n", inode->i_ino,
577 (unsigned long long)eb_bh->b_blocknr);
578 ret = -EROFS;
579 goto out;
580 }
581 }
582
583 i = ocfs2_search_extent_list(el, v_cluster);
584 if (i == -1) {
585 ret = -EROFS;
586 mlog_errno(ret);
587 goto out;
588 } else {
589 rec = &el->l_recs[i];
590 BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos));
591
592 if (!rec->e_blkno) {
593 ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
594 "record (%u, %u, 0) in xattr", inode->i_ino,
595 le32_to_cpu(rec->e_cpos),
596 ocfs2_rec_clusters(el, rec));
597 ret = -EROFS;
598 goto out;
599 }
600 coff = v_cluster - le32_to_cpu(rec->e_cpos);
601 *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb,
602 le64_to_cpu(rec->e_blkno));
603 *p_cluster = *p_cluster + coff;
604 if (num_clusters)
605 *num_clusters = ocfs2_rec_clusters(el, rec) - coff;
606 }
607out:
608 if (eb_bh)
609 brelse(eb_bh);
610 return ret;
611}
612
554int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, 613int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
555 u32 *p_cluster, u32 *num_clusters, 614 u32 *p_cluster, u32 *num_clusters,
556 unsigned int *extent_flags) 615 unsigned int *extent_flags)
@@ -571,8 +630,7 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster,
571 if (ret == 0) 630 if (ret == 0)
572 goto out; 631 goto out;
573 632
574 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, 633 ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh);
575 &di_bh, OCFS2_BH_CACHED, inode);
576 if (ret) { 634 if (ret) {
577 mlog_errno(ret); 635 mlog_errno(ret);
578 goto out; 636 goto out;
diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h
index 1b97490e1ea8..1c4aa8b06f34 100644
--- a/fs/ocfs2/extent_map.h
+++ b/fs/ocfs2/extent_map.h
@@ -53,4 +53,8 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno,
53int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 53int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
54 u64 map_start, u64 map_len); 54 u64 map_start, u64 map_len);
55 55
56int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
57 u32 *p_cluster, u32 *num_clusters,
58 struct ocfs2_extent_list *el);
59
56#endif /* _EXTENT_MAP_H */ 60#endif /* _EXTENT_MAP_H */
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index ed38796052d2..8d3225a78073 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -55,6 +55,7 @@
55#include "mmap.h" 55#include "mmap.h"
56#include "suballoc.h" 56#include "suballoc.h"
57#include "super.h" 57#include "super.h"
58#include "xattr.h"
58 59
59#include "buffer_head_io.h" 60#include "buffer_head_io.h"
60 61
@@ -184,7 +185,7 @@ static int ocfs2_sync_file(struct file *file,
184 goto bail; 185 goto bail;
185 186
186 journal = osb->journal->j_journal; 187 journal = osb->journal->j_journal;
187 err = journal_force_commit(journal); 188 err = jbd2_journal_force_commit(journal);
188 189
189bail: 190bail:
190 mlog_exit(err); 191 mlog_exit(err);
@@ -488,7 +489,7 @@ bail:
488} 489}
489 490
490/* 491/*
491 * extend allocation only here. 492 * extend file allocation only here.
492 * we'll update all the disk stuff, and oip->alloc_size 493 * we'll update all the disk stuff, and oip->alloc_size
493 * 494 *
494 * expect stuff to be locked, a transaction started and enough data / 495 * expect stuff to be locked, a transaction started and enough data /
@@ -497,189 +498,25 @@ bail:
497 * Will return -EAGAIN, and a reason if a restart is needed. 498 * Will return -EAGAIN, and a reason if a restart is needed.
498 * If passed in, *reason will always be set, even in error. 499 * If passed in, *reason will always be set, even in error.
499 */ 500 */
500int ocfs2_do_extend_allocation(struct ocfs2_super *osb, 501int ocfs2_add_inode_data(struct ocfs2_super *osb,
501 struct inode *inode, 502 struct inode *inode,
502 u32 *logical_offset, 503 u32 *logical_offset,
503 u32 clusters_to_add, 504 u32 clusters_to_add,
504 int mark_unwritten, 505 int mark_unwritten,
505 struct buffer_head *fe_bh, 506 struct buffer_head *fe_bh,
506 handle_t *handle, 507 handle_t *handle,
507 struct ocfs2_alloc_context *data_ac, 508 struct ocfs2_alloc_context *data_ac,
508 struct ocfs2_alloc_context *meta_ac, 509 struct ocfs2_alloc_context *meta_ac,
509 enum ocfs2_alloc_restarted *reason_ret) 510 enum ocfs2_alloc_restarted *reason_ret)
510{ 511{
511 int status = 0; 512 int ret;
512 int free_extents; 513 struct ocfs2_extent_tree et;
513 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data;
514 enum ocfs2_alloc_restarted reason = RESTART_NONE;
515 u32 bit_off, num_bits;
516 u64 block;
517 u8 flags = 0;
518
519 BUG_ON(!clusters_to_add);
520
521 if (mark_unwritten)
522 flags = OCFS2_EXT_UNWRITTEN;
523
524 free_extents = ocfs2_num_free_extents(osb, inode, fe);
525 if (free_extents < 0) {
526 status = free_extents;
527 mlog_errno(status);
528 goto leave;
529 }
530
531 /* there are two cases which could cause us to EAGAIN in the
532 * we-need-more-metadata case:
533 * 1) we haven't reserved *any*
534 * 2) we are so fragmented, we've needed to add metadata too
535 * many times. */
536 if (!free_extents && !meta_ac) {
537 mlog(0, "we haven't reserved any metadata!\n");
538 status = -EAGAIN;
539 reason = RESTART_META;
540 goto leave;
541 } else if ((!free_extents)
542 && (ocfs2_alloc_context_bits_left(meta_ac)
543 < ocfs2_extend_meta_needed(fe))) {
544 mlog(0, "filesystem is really fragmented...\n");
545 status = -EAGAIN;
546 reason = RESTART_META;
547 goto leave;
548 }
549
550 status = __ocfs2_claim_clusters(osb, handle, data_ac, 1,
551 clusters_to_add, &bit_off, &num_bits);
552 if (status < 0) {
553 if (status != -ENOSPC)
554 mlog_errno(status);
555 goto leave;
556 }
557
558 BUG_ON(num_bits > clusters_to_add);
559
560 /* reserve our write early -- insert_extent may update the inode */
561 status = ocfs2_journal_access(handle, inode, fe_bh,
562 OCFS2_JOURNAL_ACCESS_WRITE);
563 if (status < 0) {
564 mlog_errno(status);
565 goto leave;
566 }
567
568 block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
569 mlog(0, "Allocating %u clusters at block %u for inode %llu\n",
570 num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
571 status = ocfs2_insert_extent(osb, handle, inode, fe_bh,
572 *logical_offset, block, num_bits,
573 flags, meta_ac);
574 if (status < 0) {
575 mlog_errno(status);
576 goto leave;
577 }
578
579 status = ocfs2_journal_dirty(handle, fe_bh);
580 if (status < 0) {
581 mlog_errno(status);
582 goto leave;
583 }
584
585 clusters_to_add -= num_bits;
586 *logical_offset += num_bits;
587
588 if (clusters_to_add) {
589 mlog(0, "need to alloc once more, clusters = %u, wanted = "
590 "%u\n", fe->i_clusters, clusters_to_add);
591 status = -EAGAIN;
592 reason = RESTART_TRANS;
593 }
594
595leave:
596 mlog_exit(status);
597 if (reason_ret)
598 *reason_ret = reason;
599 return status;
600}
601
602/*
603 * For a given allocation, determine which allocators will need to be
604 * accessed, and lock them, reserving the appropriate number of bits.
605 *
606 * Sparse file systems call this from ocfs2_write_begin_nolock()
607 * and ocfs2_allocate_unwritten_extents().
608 *
609 * File systems which don't support holes call this from
610 * ocfs2_extend_allocation().
611 */
612int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
613 u32 clusters_to_add, u32 extents_to_split,
614 struct ocfs2_alloc_context **data_ac,
615 struct ocfs2_alloc_context **meta_ac)
616{
617 int ret = 0, num_free_extents;
618 unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;
619 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
620
621 *meta_ac = NULL;
622 if (data_ac)
623 *data_ac = NULL;
624
625 BUG_ON(clusters_to_add != 0 && data_ac == NULL);
626
627 mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, "
628 "clusters_to_add = %u, extents_to_split = %u\n",
629 (unsigned long long)OCFS2_I(inode)->ip_blkno, (long long)i_size_read(inode),
630 le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split);
631
632 num_free_extents = ocfs2_num_free_extents(osb, inode, di);
633 if (num_free_extents < 0) {
634 ret = num_free_extents;
635 mlog_errno(ret);
636 goto out;
637 }
638
639 /*
640 * Sparse allocation file systems need to be more conservative
641 * with reserving room for expansion - the actual allocation
642 * happens while we've got a journal handle open so re-taking
643 * a cluster lock (because we ran out of room for another
644 * extent) will violate ordering rules.
645 *
646 * Most of the time we'll only be seeing this 1 cluster at a time
647 * anyway.
648 *
649 * Always lock for any unwritten extents - we might want to
650 * add blocks during a split.
651 */
652 if (!num_free_extents ||
653 (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) {
654 ret = ocfs2_reserve_new_metadata(osb, di, meta_ac);
655 if (ret < 0) {
656 if (ret != -ENOSPC)
657 mlog_errno(ret);
658 goto out;
659 }
660 }
661
662 if (clusters_to_add == 0)
663 goto out;
664
665 ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac);
666 if (ret < 0) {
667 if (ret != -ENOSPC)
668 mlog_errno(ret);
669 goto out;
670 }
671
672out:
673 if (ret) {
674 if (*meta_ac) {
675 ocfs2_free_alloc_context(*meta_ac);
676 *meta_ac = NULL;
677 }
678 514
679 /* 515 ocfs2_init_dinode_extent_tree(&et, inode, fe_bh);
680 * We cannot have an error and a non null *data_ac. 516 ret = ocfs2_add_clusters_in_btree(osb, inode, logical_offset,
681 */ 517 clusters_to_add, mark_unwritten,
682 } 518 &et, handle,
519 data_ac, meta_ac, reason_ret);
683 520
684 return ret; 521 return ret;
685} 522}
@@ -698,6 +535,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
698 struct ocfs2_alloc_context *meta_ac = NULL; 535 struct ocfs2_alloc_context *meta_ac = NULL;
699 enum ocfs2_alloc_restarted why; 536 enum ocfs2_alloc_restarted why;
700 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 537 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
538 struct ocfs2_extent_tree et;
701 539
702 mlog_entry("(clusters_to_add = %u)\n", clusters_to_add); 540 mlog_entry("(clusters_to_add = %u)\n", clusters_to_add);
703 541
@@ -707,8 +545,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
707 */ 545 */
708 BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb)); 546 BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb));
709 547
710 status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, 548 status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh);
711 OCFS2_BH_CACHED, inode);
712 if (status < 0) { 549 if (status < 0) {
713 mlog_errno(status); 550 mlog_errno(status);
714 goto leave; 551 goto leave;
@@ -724,14 +561,21 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start,
724restart_all: 561restart_all:
725 BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); 562 BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters);
726 563
727 status = ocfs2_lock_allocators(inode, fe, clusters_to_add, 0, &data_ac, 564 mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, "
728 &meta_ac); 565 "clusters_to_add = %u\n",
566 (unsigned long long)OCFS2_I(inode)->ip_blkno,
567 (long long)i_size_read(inode), le32_to_cpu(fe->i_clusters),
568 clusters_to_add);
569 ocfs2_init_dinode_extent_tree(&et, inode, bh);
570 status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
571 &data_ac, &meta_ac);
729 if (status) { 572 if (status) {
730 mlog_errno(status); 573 mlog_errno(status);
731 goto leave; 574 goto leave;
732 } 575 }
733 576
734 credits = ocfs2_calc_extend_credits(osb->sb, fe, clusters_to_add); 577 credits = ocfs2_calc_extend_credits(osb->sb, &fe->id2.i_list,
578 clusters_to_add);
735 handle = ocfs2_start_trans(osb, credits); 579 handle = ocfs2_start_trans(osb, credits);
736 if (IS_ERR(handle)) { 580 if (IS_ERR(handle)) {
737 status = PTR_ERR(handle); 581 status = PTR_ERR(handle);
@@ -753,16 +597,16 @@ restarted_transaction:
753 597
754 prev_clusters = OCFS2_I(inode)->ip_clusters; 598 prev_clusters = OCFS2_I(inode)->ip_clusters;
755 599
756 status = ocfs2_do_extend_allocation(osb, 600 status = ocfs2_add_inode_data(osb,
757 inode, 601 inode,
758 &logical_start, 602 &logical_start,
759 clusters_to_add, 603 clusters_to_add,
760 mark_unwritten, 604 mark_unwritten,
761 bh, 605 bh,
762 handle, 606 handle,
763 data_ac, 607 data_ac,
764 meta_ac, 608 meta_ac,
765 &why); 609 &why);
766 if ((status < 0) && (status != -EAGAIN)) { 610 if ((status < 0) && (status != -EAGAIN)) {
767 if (status != -ENOSPC) 611 if (status != -ENOSPC)
768 mlog_errno(status); 612 mlog_errno(status);
@@ -789,7 +633,7 @@ restarted_transaction:
789 mlog(0, "restarting transaction.\n"); 633 mlog(0, "restarting transaction.\n");
790 /* TODO: This can be more intelligent. */ 634 /* TODO: This can be more intelligent. */
791 credits = ocfs2_calc_extend_credits(osb->sb, 635 credits = ocfs2_calc_extend_credits(osb->sb,
792 fe, 636 &fe->id2.i_list,
793 clusters_to_add); 637 clusters_to_add);
794 status = ocfs2_extend_trans(handle, credits); 638 status = ocfs2_extend_trans(handle, credits);
795 if (status < 0) { 639 if (status < 0) {
@@ -826,10 +670,8 @@ leave:
826 restart_func = 0; 670 restart_func = 0;
827 goto restart_all; 671 goto restart_all;
828 } 672 }
829 if (bh) { 673 brelse(bh);
830 brelse(bh); 674 bh = NULL;
831 bh = NULL;
832 }
833 675
834 mlog_exit(status); 676 mlog_exit(status);
835 return status; 677 return status;
@@ -1096,9 +938,15 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1096 goto bail_unlock; 938 goto bail_unlock;
1097 } 939 }
1098 940
1099 if (i_size_read(inode) > attr->ia_size) 941 if (i_size_read(inode) > attr->ia_size) {
942 if (ocfs2_should_order_data(inode)) {
943 status = ocfs2_begin_ordered_truncate(inode,
944 attr->ia_size);
945 if (status)
946 goto bail_unlock;
947 }
1100 status = ocfs2_truncate_file(inode, bh, attr->ia_size); 948 status = ocfs2_truncate_file(inode, bh, attr->ia_size);
1101 else 949 } else
1102 status = ocfs2_extend_file(inode, bh, attr->ia_size); 950 status = ocfs2_extend_file(inode, bh, attr->ia_size);
1103 if (status < 0) { 951 if (status < 0) {
1104 if (status != -ENOSPC) 952 if (status != -ENOSPC)
@@ -1140,8 +988,7 @@ bail_unlock_rw:
1140 if (size_change) 988 if (size_change)
1141 ocfs2_rw_unlock(inode, 1); 989 ocfs2_rw_unlock(inode, 1);
1142bail: 990bail:
1143 if (bh) 991 brelse(bh);
1144 brelse(bh);
1145 992
1146 mlog_exit(status); 993 mlog_exit(status);
1147 return status; 994 return status;
@@ -1284,8 +1131,7 @@ static int ocfs2_write_remove_suid(struct inode *inode)
1284 struct buffer_head *bh = NULL; 1131 struct buffer_head *bh = NULL;
1285 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1132 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1286 1133
1287 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), 1134 ret = ocfs2_read_block(inode, oi->ip_blkno, &bh);
1288 oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode);
1289 if (ret < 0) { 1135 if (ret < 0) {
1290 mlog_errno(ret); 1136 mlog_errno(ret);
1291 goto out; 1137 goto out;
@@ -1311,9 +1157,8 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode,
1311 struct buffer_head *di_bh = NULL; 1157 struct buffer_head *di_bh = NULL;
1312 1158
1313 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { 1159 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1314 ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), 1160 ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno,
1315 OCFS2_I(inode)->ip_blkno, &di_bh, 1161 &di_bh);
1316 OCFS2_BH_CACHED, inode);
1317 if (ret) { 1162 if (ret) {
1318 mlog_errno(ret); 1163 mlog_errno(ret);
1319 goto out; 1164 goto out;
@@ -1394,8 +1239,11 @@ static int __ocfs2_remove_inode_range(struct inode *inode,
1394 handle_t *handle; 1239 handle_t *handle;
1395 struct ocfs2_alloc_context *meta_ac = NULL; 1240 struct ocfs2_alloc_context *meta_ac = NULL;
1396 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 1241 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1242 struct ocfs2_extent_tree et;
1397 1243
1398 ret = ocfs2_lock_allocators(inode, di, 0, 1, NULL, &meta_ac); 1244 ocfs2_init_dinode_extent_tree(&et, inode, di_bh);
1245
1246 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
1399 if (ret) { 1247 if (ret) {
1400 mlog_errno(ret); 1248 mlog_errno(ret);
1401 return ret; 1249 return ret;
@@ -1425,7 +1273,7 @@ static int __ocfs2_remove_inode_range(struct inode *inode,
1425 goto out; 1273 goto out;
1426 } 1274 }
1427 1275
1428 ret = ocfs2_remove_extent(inode, di_bh, cpos, len, handle, meta_ac, 1276 ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
1429 dealloc); 1277 dealloc);
1430 if (ret) { 1278 if (ret) {
1431 mlog_errno(ret); 1279 mlog_errno(ret);
@@ -2040,7 +1888,7 @@ out_dio:
2040 */ 1888 */
2041 if (old_size != i_size_read(inode) || 1889 if (old_size != i_size_read(inode) ||
2042 old_clusters != OCFS2_I(inode)->ip_clusters) { 1890 old_clusters != OCFS2_I(inode)->ip_clusters) {
2043 ret = journal_force_commit(osb->journal->j_journal); 1891 ret = jbd2_journal_force_commit(osb->journal->j_journal);
2044 if (ret < 0) 1892 if (ret < 0)
2045 written = ret; 1893 written = ret;
2046 } 1894 }
@@ -2227,6 +2075,10 @@ const struct inode_operations ocfs2_file_iops = {
2227 .setattr = ocfs2_setattr, 2075 .setattr = ocfs2_setattr,
2228 .getattr = ocfs2_getattr, 2076 .getattr = ocfs2_getattr,
2229 .permission = ocfs2_permission, 2077 .permission = ocfs2_permission,
2078 .setxattr = generic_setxattr,
2079 .getxattr = generic_getxattr,
2080 .listxattr = ocfs2_listxattr,
2081 .removexattr = generic_removexattr,
2230 .fallocate = ocfs2_fallocate, 2082 .fallocate = ocfs2_fallocate,
2231 .fiemap = ocfs2_fiemap, 2083 .fiemap = ocfs2_fiemap,
2232}; 2084};
@@ -2237,6 +2089,10 @@ const struct inode_operations ocfs2_special_file_iops = {
2237 .permission = ocfs2_permission, 2089 .permission = ocfs2_permission,
2238}; 2090};
2239 2091
2092/*
2093 * Other than ->lock, keep ocfs2_fops and ocfs2_dops in sync with
2094 * ocfs2_fops_no_plocks and ocfs2_dops_no_plocks!
2095 */
2240const struct file_operations ocfs2_fops = { 2096const struct file_operations ocfs2_fops = {
2241 .llseek = generic_file_llseek, 2097 .llseek = generic_file_llseek,
2242 .read = do_sync_read, 2098 .read = do_sync_read,
@@ -2251,6 +2107,7 @@ const struct file_operations ocfs2_fops = {
2251#ifdef CONFIG_COMPAT 2107#ifdef CONFIG_COMPAT
2252 .compat_ioctl = ocfs2_compat_ioctl, 2108 .compat_ioctl = ocfs2_compat_ioctl,
2253#endif 2109#endif
2110 .lock = ocfs2_lock,
2254 .flock = ocfs2_flock, 2111 .flock = ocfs2_flock,
2255 .splice_read = ocfs2_file_splice_read, 2112 .splice_read = ocfs2_file_splice_read,
2256 .splice_write = ocfs2_file_splice_write, 2113 .splice_write = ocfs2_file_splice_write,
@@ -2267,5 +2124,51 @@ const struct file_operations ocfs2_dops = {
2267#ifdef CONFIG_COMPAT 2124#ifdef CONFIG_COMPAT
2268 .compat_ioctl = ocfs2_compat_ioctl, 2125 .compat_ioctl = ocfs2_compat_ioctl,
2269#endif 2126#endif
2127 .lock = ocfs2_lock,
2128 .flock = ocfs2_flock,
2129};
2130
2131/*
2132 * POSIX-lockless variants of our file_operations.
2133 *
2134 * These will be used if the underlying cluster stack does not support
2135 * posix file locking, if the user passes the "localflocks" mount
2136 * option, or if we have a local-only fs.
2137 *
2138 * ocfs2_flock is in here because all stacks handle UNIX file locks,
2139 * so we still want it in the case of no stack support for
2140 * plocks. Internally, it will do the right thing when asked to ignore
2141 * the cluster.
2142 */
2143const struct file_operations ocfs2_fops_no_plocks = {
2144 .llseek = generic_file_llseek,
2145 .read = do_sync_read,
2146 .write = do_sync_write,
2147 .mmap = ocfs2_mmap,
2148 .fsync = ocfs2_sync_file,
2149 .release = ocfs2_file_release,
2150 .open = ocfs2_file_open,
2151 .aio_read = ocfs2_file_aio_read,
2152 .aio_write = ocfs2_file_aio_write,
2153 .unlocked_ioctl = ocfs2_ioctl,
2154#ifdef CONFIG_COMPAT
2155 .compat_ioctl = ocfs2_compat_ioctl,
2156#endif
2157 .flock = ocfs2_flock,
2158 .splice_read = ocfs2_file_splice_read,
2159 .splice_write = ocfs2_file_splice_write,
2160};
2161
2162const struct file_operations ocfs2_dops_no_plocks = {
2163 .llseek = generic_file_llseek,
2164 .read = generic_read_dir,
2165 .readdir = ocfs2_readdir,
2166 .fsync = ocfs2_sync_file,
2167 .release = ocfs2_dir_release,
2168 .open = ocfs2_dir_open,
2169 .unlocked_ioctl = ocfs2_ioctl,
2170#ifdef CONFIG_COMPAT
2171 .compat_ioctl = ocfs2_compat_ioctl,
2172#endif
2270 .flock = ocfs2_flock, 2173 .flock = ocfs2_flock,
2271}; 2174};
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 1e27b4d017ea..e92382cbca5f 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -28,9 +28,12 @@
28 28
29extern const struct file_operations ocfs2_fops; 29extern const struct file_operations ocfs2_fops;
30extern const struct file_operations ocfs2_dops; 30extern const struct file_operations ocfs2_dops;
31extern const struct file_operations ocfs2_fops_no_plocks;
32extern const struct file_operations ocfs2_dops_no_plocks;
31extern const struct inode_operations ocfs2_file_iops; 33extern const struct inode_operations ocfs2_file_iops;
32extern const struct inode_operations ocfs2_special_file_iops; 34extern const struct inode_operations ocfs2_special_file_iops;
33struct ocfs2_alloc_context; 35struct ocfs2_alloc_context;
36enum ocfs2_alloc_restarted;
34 37
35struct ocfs2_file_private { 38struct ocfs2_file_private {
36 struct file *fp_file; 39 struct file *fp_file;
@@ -38,27 +41,18 @@ struct ocfs2_file_private {
38 struct ocfs2_lock_res fp_flock; 41 struct ocfs2_lock_res fp_flock;
39}; 42};
40 43
41enum ocfs2_alloc_restarted { 44int ocfs2_add_inode_data(struct ocfs2_super *osb,
42 RESTART_NONE = 0, 45 struct inode *inode,
43 RESTART_TRANS, 46 u32 *logical_offset,
44 RESTART_META 47 u32 clusters_to_add,
45}; 48 int mark_unwritten,
46int ocfs2_do_extend_allocation(struct ocfs2_super *osb, 49 struct buffer_head *fe_bh,
47 struct inode *inode, 50 handle_t *handle,
48 u32 *logical_offset, 51 struct ocfs2_alloc_context *data_ac,
49 u32 clusters_to_add, 52 struct ocfs2_alloc_context *meta_ac,
50 int mark_unwritten, 53 enum ocfs2_alloc_restarted *reason_ret);
51 struct buffer_head *fe_bh,
52 handle_t *handle,
53 struct ocfs2_alloc_context *data_ac,
54 struct ocfs2_alloc_context *meta_ac,
55 enum ocfs2_alloc_restarted *reason_ret);
56int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, 54int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size,
57 u64 zero_to); 55 u64 zero_to);
58int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di,
59 u32 clusters_to_add, u32 extents_to_split,
60 struct ocfs2_alloc_context **data_ac,
61 struct ocfs2_alloc_context **meta_ac);
62int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); 56int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
63int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, 57int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry,
64 struct kstat *stat); 58 struct kstat *stat);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 7e9e4c79aec7..4903688f72a9 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -49,6 +49,7 @@
49#include "symlink.h" 49#include "symlink.h"
50#include "sysfile.h" 50#include "sysfile.h"
51#include "uptodate.h" 51#include "uptodate.h"
52#include "xattr.h"
52 53
53#include "buffer_head_io.h" 54#include "buffer_head_io.h"
54 55
@@ -219,6 +220,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
219 struct super_block *sb; 220 struct super_block *sb;
220 struct ocfs2_super *osb; 221 struct ocfs2_super *osb;
221 int status = -EINVAL; 222 int status = -EINVAL;
223 int use_plocks = 1;
222 224
223 mlog_entry("(0x%p, size:%llu)\n", inode, 225 mlog_entry("(0x%p, size:%llu)\n", inode,
224 (unsigned long long)le64_to_cpu(fe->i_size)); 226 (unsigned long long)le64_to_cpu(fe->i_size));
@@ -226,6 +228,10 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
226 sb = inode->i_sb; 228 sb = inode->i_sb;
227 osb = OCFS2_SB(sb); 229 osb = OCFS2_SB(sb);
228 230
231 if ((osb->s_mount_opt & OCFS2_MOUNT_LOCALFLOCKS) ||
232 ocfs2_mount_local(osb) || !ocfs2_stack_supports_plocks())
233 use_plocks = 0;
234
229 /* this means that read_inode cannot create a superblock inode 235 /* this means that read_inode cannot create a superblock inode
230 * today. change if needed. */ 236 * today. change if needed. */
231 if (!OCFS2_IS_VALID_DINODE(fe) || 237 if (!OCFS2_IS_VALID_DINODE(fe) ||
@@ -295,13 +301,19 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe,
295 301
296 switch (inode->i_mode & S_IFMT) { 302 switch (inode->i_mode & S_IFMT) {
297 case S_IFREG: 303 case S_IFREG:
298 inode->i_fop = &ocfs2_fops; 304 if (use_plocks)
305 inode->i_fop = &ocfs2_fops;
306 else
307 inode->i_fop = &ocfs2_fops_no_plocks;
299 inode->i_op = &ocfs2_file_iops; 308 inode->i_op = &ocfs2_file_iops;
300 i_size_write(inode, le64_to_cpu(fe->i_size)); 309 i_size_write(inode, le64_to_cpu(fe->i_size));
301 break; 310 break;
302 case S_IFDIR: 311 case S_IFDIR:
303 inode->i_op = &ocfs2_dir_iops; 312 inode->i_op = &ocfs2_dir_iops;
304 inode->i_fop = &ocfs2_dops; 313 if (use_plocks)
314 inode->i_fop = &ocfs2_dops;
315 else
316 inode->i_fop = &ocfs2_dops_no_plocks;
305 i_size_write(inode, le64_to_cpu(fe->i_size)); 317 i_size_write(inode, le64_to_cpu(fe->i_size));
306 break; 318 break;
307 case S_IFLNK: 319 case S_IFLNK:
@@ -448,8 +460,11 @@ static int ocfs2_read_locked_inode(struct inode *inode,
448 } 460 }
449 } 461 }
450 462
451 status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0, 463 if (can_lock)
452 can_lock ? inode : NULL); 464 status = ocfs2_read_blocks(inode, args->fi_blkno, 1, &bh,
465 OCFS2_BH_IGNORE_CACHE);
466 else
467 status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh);
453 if (status < 0) { 468 if (status < 0) {
454 mlog_errno(status); 469 mlog_errno(status);
455 goto bail; 470 goto bail;
@@ -522,6 +537,9 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
522 * data and fast symlinks. 537 * data and fast symlinks.
523 */ 538 */
524 if (fe->i_clusters) { 539 if (fe->i_clusters) {
540 if (ocfs2_should_order_data(inode))
541 ocfs2_begin_ordered_truncate(inode, 0);
542
525 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); 543 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
526 if (IS_ERR(handle)) { 544 if (IS_ERR(handle)) {
527 status = PTR_ERR(handle); 545 status = PTR_ERR(handle);
@@ -730,6 +748,13 @@ static int ocfs2_wipe_inode(struct inode *inode,
730 goto bail_unlock_dir; 748 goto bail_unlock_dir;
731 } 749 }
732 750
751 /*Free extended attribute resources associated with this inode.*/
752 status = ocfs2_xattr_remove(inode, di_bh);
753 if (status < 0) {
754 mlog_errno(status);
755 goto bail_unlock_dir;
756 }
757
733 status = ocfs2_remove_inode(inode, di_bh, orphan_dir_inode, 758 status = ocfs2_remove_inode(inode, di_bh, orphan_dir_inode,
734 orphan_dir_bh); 759 orphan_dir_bh);
735 if (status < 0) 760 if (status < 0)
@@ -1081,6 +1106,8 @@ void ocfs2_clear_inode(struct inode *inode)
1081 oi->ip_last_trans = 0; 1106 oi->ip_last_trans = 0;
1082 oi->ip_dir_start_lookup = 0; 1107 oi->ip_dir_start_lookup = 0;
1083 oi->ip_blkno = 0ULL; 1108 oi->ip_blkno = 0ULL;
1109 jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal,
1110 &oi->ip_jinode);
1084 1111
1085bail: 1112bail:
1086 mlog_exit_void(); 1113 mlog_exit_void();
@@ -1107,58 +1134,6 @@ void ocfs2_drop_inode(struct inode *inode)
1107} 1134}
1108 1135
1109/* 1136/*
1110 * TODO: this should probably be merged into ocfs2_get_block
1111 *
1112 * However, you now need to pay attention to the cont_prepare_write()
1113 * stuff in ocfs2_get_block (that is, ocfs2_get_block pretty much
1114 * expects never to extend).
1115 */
1116struct buffer_head *ocfs2_bread(struct inode *inode,
1117 int block, int *err, int reada)
1118{
1119 struct buffer_head *bh = NULL;
1120 int tmperr;
1121 u64 p_blkno;
1122 int readflags = OCFS2_BH_CACHED;
1123
1124 if (reada)
1125 readflags |= OCFS2_BH_READAHEAD;
1126
1127 if (((u64)block << inode->i_sb->s_blocksize_bits) >=
1128 i_size_read(inode)) {
1129 BUG_ON(!reada);
1130 return NULL;
1131 }
1132
1133 down_read(&OCFS2_I(inode)->ip_alloc_sem);
1134 tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL,
1135 NULL);
1136 up_read(&OCFS2_I(inode)->ip_alloc_sem);
1137 if (tmperr < 0) {
1138 mlog_errno(tmperr);
1139 goto fail;
1140 }
1141
1142 tmperr = ocfs2_read_block(OCFS2_SB(inode->i_sb), p_blkno, &bh,
1143 readflags, inode);
1144 if (tmperr < 0)
1145 goto fail;
1146
1147 tmperr = 0;
1148
1149 *err = 0;
1150 return bh;
1151
1152fail:
1153 if (bh) {
1154 brelse(bh);
1155 bh = NULL;
1156 }
1157 *err = -EIO;
1158 return NULL;
1159}
1160
1161/*
1162 * This is called from our getattr. 1137 * This is called from our getattr.
1163 */ 1138 */
1164int ocfs2_inode_revalidate(struct dentry *dentry) 1139int ocfs2_inode_revalidate(struct dentry *dentry)
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 390a85596aa0..2f37af9bcc4a 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -40,6 +40,9 @@ struct ocfs2_inode_info
40 /* protects allocation changes on this inode. */ 40 /* protects allocation changes on this inode. */
41 struct rw_semaphore ip_alloc_sem; 41 struct rw_semaphore ip_alloc_sem;
42 42
43 /* protects extended attribute changes on this inode */
44 struct rw_semaphore ip_xattr_sem;
45
43 /* These fields are protected by ip_lock */ 46 /* These fields are protected by ip_lock */
44 spinlock_t ip_lock; 47 spinlock_t ip_lock;
45 u32 ip_open_count; 48 u32 ip_open_count;
@@ -68,6 +71,7 @@ struct ocfs2_inode_info
68 struct ocfs2_extent_map ip_extent_map; 71 struct ocfs2_extent_map ip_extent_map;
69 72
70 struct inode vfs_inode; 73 struct inode vfs_inode;
74 struct jbd2_inode ip_jinode;
71}; 75};
72 76
73/* 77/*
@@ -113,8 +117,6 @@ extern struct kmem_cache *ocfs2_inode_cache;
113 117
114extern const struct address_space_operations ocfs2_aops; 118extern const struct address_space_operations ocfs2_aops;
115 119
116struct buffer_head *ocfs2_bread(struct inode *inode, int block,
117 int *err, int reada);
118void ocfs2_clear_inode(struct inode *inode); 120void ocfs2_clear_inode(struct inode *inode);
119void ocfs2_delete_inode(struct inode *inode); 121void ocfs2_delete_inode(struct inode *inode);
120void ocfs2_drop_inode(struct inode *inode); 122void ocfs2_drop_inode(struct inode *inode);
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 7b142f0ce995..9fcd36dcc9a0 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -102,8 +102,7 @@ bail_unlock:
102bail: 102bail:
103 mutex_unlock(&inode->i_mutex); 103 mutex_unlock(&inode->i_mutex);
104 104
105 if (bh) 105 brelse(bh);
106 brelse(bh);
107 106
108 mlog_exit(status); 107 mlog_exit(status);
109 return status; 108 return status;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index c47bc2a809c2..81e40677eecb 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -215,9 +215,9 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb)
215 goto finally; 215 goto finally;
216 } 216 }
217 217
218 journal_lock_updates(journal->j_journal); 218 jbd2_journal_lock_updates(journal->j_journal);
219 status = journal_flush(journal->j_journal); 219 status = jbd2_journal_flush(journal->j_journal);
220 journal_unlock_updates(journal->j_journal); 220 jbd2_journal_unlock_updates(journal->j_journal);
221 if (status < 0) { 221 if (status < 0) {
222 up_write(&journal->j_trans_barrier); 222 up_write(&journal->j_trans_barrier);
223 mlog_errno(status); 223 mlog_errno(status);
@@ -264,7 +264,7 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
264 264
265 down_read(&osb->journal->j_trans_barrier); 265 down_read(&osb->journal->j_trans_barrier);
266 266
267 handle = journal_start(journal, max_buffs); 267 handle = jbd2_journal_start(journal, max_buffs);
268 if (IS_ERR(handle)) { 268 if (IS_ERR(handle)) {
269 up_read(&osb->journal->j_trans_barrier); 269 up_read(&osb->journal->j_trans_barrier);
270 270
@@ -290,7 +290,7 @@ int ocfs2_commit_trans(struct ocfs2_super *osb,
290 290
291 BUG_ON(!handle); 291 BUG_ON(!handle);
292 292
293 ret = journal_stop(handle); 293 ret = jbd2_journal_stop(handle);
294 if (ret < 0) 294 if (ret < 0)
295 mlog_errno(ret); 295 mlog_errno(ret);
296 296
@@ -304,7 +304,7 @@ int ocfs2_commit_trans(struct ocfs2_super *osb,
304 * transaction. extend_trans will either extend the current handle by 304 * transaction. extend_trans will either extend the current handle by
305 * nblocks, or commit it and start a new one with nblocks credits. 305 * nblocks, or commit it and start a new one with nblocks credits.
306 * 306 *
307 * This might call journal_restart() which will commit dirty buffers 307 * This might call jbd2_journal_restart() which will commit dirty buffers
308 * and then restart the transaction. Before calling 308 * and then restart the transaction. Before calling
309 * ocfs2_extend_trans(), any changed blocks should have been 309 * ocfs2_extend_trans(), any changed blocks should have been
310 * dirtied. After calling it, all blocks which need to be changed must 310 * dirtied. After calling it, all blocks which need to be changed must
@@ -332,7 +332,7 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks)
332#ifdef CONFIG_OCFS2_DEBUG_FS 332#ifdef CONFIG_OCFS2_DEBUG_FS
333 status = 1; 333 status = 1;
334#else 334#else
335 status = journal_extend(handle, nblocks); 335 status = jbd2_journal_extend(handle, nblocks);
336 if (status < 0) { 336 if (status < 0) {
337 mlog_errno(status); 337 mlog_errno(status);
338 goto bail; 338 goto bail;
@@ -340,8 +340,10 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks)
340#endif 340#endif
341 341
342 if (status > 0) { 342 if (status > 0) {
343 mlog(0, "journal_extend failed, trying journal_restart\n"); 343 mlog(0,
344 status = journal_restart(handle, nblocks); 344 "jbd2_journal_extend failed, trying "
345 "jbd2_journal_restart\n");
346 status = jbd2_journal_restart(handle, nblocks);
345 if (status < 0) { 347 if (status < 0) {
346 mlog_errno(status); 348 mlog_errno(status);
347 goto bail; 349 goto bail;
@@ -393,11 +395,11 @@ int ocfs2_journal_access(handle_t *handle,
393 switch (type) { 395 switch (type) {
394 case OCFS2_JOURNAL_ACCESS_CREATE: 396 case OCFS2_JOURNAL_ACCESS_CREATE:
395 case OCFS2_JOURNAL_ACCESS_WRITE: 397 case OCFS2_JOURNAL_ACCESS_WRITE:
396 status = journal_get_write_access(handle, bh); 398 status = jbd2_journal_get_write_access(handle, bh);
397 break; 399 break;
398 400
399 case OCFS2_JOURNAL_ACCESS_UNDO: 401 case OCFS2_JOURNAL_ACCESS_UNDO:
400 status = journal_get_undo_access(handle, bh); 402 status = jbd2_journal_get_undo_access(handle, bh);
401 break; 403 break;
402 404
403 default: 405 default:
@@ -422,7 +424,7 @@ int ocfs2_journal_dirty(handle_t *handle,
422 mlog_entry("(bh->b_blocknr=%llu)\n", 424 mlog_entry("(bh->b_blocknr=%llu)\n",
423 (unsigned long long)bh->b_blocknr); 425 (unsigned long long)bh->b_blocknr);
424 426
425 status = journal_dirty_metadata(handle, bh); 427 status = jbd2_journal_dirty_metadata(handle, bh);
426 if (status < 0) 428 if (status < 0)
427 mlog(ML_ERROR, "Could not dirty metadata buffer. " 429 mlog(ML_ERROR, "Could not dirty metadata buffer. "
428 "(bh->b_blocknr=%llu)\n", 430 "(bh->b_blocknr=%llu)\n",
@@ -432,6 +434,7 @@ int ocfs2_journal_dirty(handle_t *handle,
432 return status; 434 return status;
433} 435}
434 436
437#ifdef CONFIG_OCFS2_COMPAT_JBD
435int ocfs2_journal_dirty_data(handle_t *handle, 438int ocfs2_journal_dirty_data(handle_t *handle,
436 struct buffer_head *bh) 439 struct buffer_head *bh)
437{ 440{
@@ -443,8 +446,9 @@ int ocfs2_journal_dirty_data(handle_t *handle,
443 446
444 return err; 447 return err;
445} 448}
449#endif
446 450
447#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD_DEFAULT_MAX_COMMIT_AGE) 451#define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE)
448 452
449void ocfs2_set_journal_params(struct ocfs2_super *osb) 453void ocfs2_set_journal_params(struct ocfs2_super *osb)
450{ 454{
@@ -457,9 +461,9 @@ void ocfs2_set_journal_params(struct ocfs2_super *osb)
457 spin_lock(&journal->j_state_lock); 461 spin_lock(&journal->j_state_lock);
458 journal->j_commit_interval = commit_interval; 462 journal->j_commit_interval = commit_interval;
459 if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) 463 if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER)
460 journal->j_flags |= JFS_BARRIER; 464 journal->j_flags |= JBD2_BARRIER;
461 else 465 else
462 journal->j_flags &= ~JFS_BARRIER; 466 journal->j_flags &= ~JBD2_BARRIER;
463 spin_unlock(&journal->j_state_lock); 467 spin_unlock(&journal->j_state_lock);
464} 468}
465 469
@@ -524,14 +528,14 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
524 mlog(0, "inode->ip_clusters = %u\n", OCFS2_I(inode)->ip_clusters); 528 mlog(0, "inode->ip_clusters = %u\n", OCFS2_I(inode)->ip_clusters);
525 529
526 /* call the kernels journal init function now */ 530 /* call the kernels journal init function now */
527 j_journal = journal_init_inode(inode); 531 j_journal = jbd2_journal_init_inode(inode);
528 if (j_journal == NULL) { 532 if (j_journal == NULL) {
529 mlog(ML_ERROR, "Linux journal layer error\n"); 533 mlog(ML_ERROR, "Linux journal layer error\n");
530 status = -EINVAL; 534 status = -EINVAL;
531 goto done; 535 goto done;
532 } 536 }
533 537
534 mlog(0, "Returned from journal_init_inode\n"); 538 mlog(0, "Returned from jbd2_journal_init_inode\n");
535 mlog(0, "j_journal->j_maxlen = %u\n", j_journal->j_maxlen); 539 mlog(0, "j_journal->j_maxlen = %u\n", j_journal->j_maxlen);
536 540
537 *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) & 541 *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) &
@@ -550,8 +554,7 @@ done:
550 if (status < 0) { 554 if (status < 0) {
551 if (inode_lock) 555 if (inode_lock)
552 ocfs2_inode_unlock(inode, 1); 556 ocfs2_inode_unlock(inode, 1);
553 if (bh != NULL) 557 brelse(bh);
554 brelse(bh);
555 if (inode) { 558 if (inode) {
556 OCFS2_I(inode)->ip_open_count--; 559 OCFS2_I(inode)->ip_open_count--;
557 iput(inode); 560 iput(inode);
@@ -639,7 +642,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
639 if (journal->j_state != OCFS2_JOURNAL_LOADED) 642 if (journal->j_state != OCFS2_JOURNAL_LOADED)
640 goto done; 643 goto done;
641 644
642 /* need to inc inode use count as journal_destroy will iput. */ 645 /* need to inc inode use count - jbd2_journal_destroy will iput. */
643 if (!igrab(inode)) 646 if (!igrab(inode))
644 BUG(); 647 BUG();
645 648
@@ -668,9 +671,9 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
668 BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0); 671 BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0);
669 672
670 if (ocfs2_mount_local(osb)) { 673 if (ocfs2_mount_local(osb)) {
671 journal_lock_updates(journal->j_journal); 674 jbd2_journal_lock_updates(journal->j_journal);
672 status = journal_flush(journal->j_journal); 675 status = jbd2_journal_flush(journal->j_journal);
673 journal_unlock_updates(journal->j_journal); 676 jbd2_journal_unlock_updates(journal->j_journal);
674 if (status < 0) 677 if (status < 0)
675 mlog_errno(status); 678 mlog_errno(status);
676 } 679 }
@@ -686,7 +689,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb)
686 } 689 }
687 690
688 /* Shutdown the kernel journal system */ 691 /* Shutdown the kernel journal system */
689 journal_destroy(journal->j_journal); 692 jbd2_journal_destroy(journal->j_journal);
690 693
691 OCFS2_I(inode)->ip_open_count--; 694 OCFS2_I(inode)->ip_open_count--;
692 695
@@ -711,15 +714,15 @@ static void ocfs2_clear_journal_error(struct super_block *sb,
711{ 714{
712 int olderr; 715 int olderr;
713 716
714 olderr = journal_errno(journal); 717 olderr = jbd2_journal_errno(journal);
715 if (olderr) { 718 if (olderr) {
716 mlog(ML_ERROR, "File system error %d recorded in " 719 mlog(ML_ERROR, "File system error %d recorded in "
717 "journal %u.\n", olderr, slot); 720 "journal %u.\n", olderr, slot);
718 mlog(ML_ERROR, "File system on device %s needs checking.\n", 721 mlog(ML_ERROR, "File system on device %s needs checking.\n",
719 sb->s_id); 722 sb->s_id);
720 723
721 journal_ack_err(journal); 724 jbd2_journal_ack_err(journal);
722 journal_clear_err(journal); 725 jbd2_journal_clear_err(journal);
723 } 726 }
724} 727}
725 728
@@ -734,7 +737,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed)
734 737
735 osb = journal->j_osb; 738 osb = journal->j_osb;
736 739
737 status = journal_load(journal->j_journal); 740 status = jbd2_journal_load(journal->j_journal);
738 if (status < 0) { 741 if (status < 0) {
739 mlog(ML_ERROR, "Failed to load journal!\n"); 742 mlog(ML_ERROR, "Failed to load journal!\n");
740 goto done; 743 goto done;
@@ -778,7 +781,7 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full)
778 781
779 BUG_ON(!journal); 782 BUG_ON(!journal);
780 783
781 status = journal_wipe(journal->j_journal, full); 784 status = jbd2_journal_wipe(journal->j_journal, full);
782 if (status < 0) { 785 if (status < 0) {
783 mlog_errno(status); 786 mlog_errno(status);
784 goto bail; 787 goto bail;
@@ -847,9 +850,8 @@ static int ocfs2_force_read_journal(struct inode *inode)
847 850
848 /* We are reading journal data which should not 851 /* We are reading journal data which should not
849 * be put in the uptodate cache */ 852 * be put in the uptodate cache */
850 status = ocfs2_read_blocks(OCFS2_SB(inode->i_sb), 853 status = ocfs2_read_blocks_sync(OCFS2_SB(inode->i_sb),
851 p_blkno, p_blocks, bhs, 0, 854 p_blkno, p_blocks, bhs);
852 NULL);
853 if (status < 0) { 855 if (status < 0) {
854 mlog_errno(status); 856 mlog_errno(status);
855 goto bail; 857 goto bail;
@@ -865,8 +867,7 @@ static int ocfs2_force_read_journal(struct inode *inode)
865 867
866bail: 868bail:
867 for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++) 869 for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++)
868 if (bhs[i]) 870 brelse(bhs[i]);
869 brelse(bhs[i]);
870 mlog_exit(status); 871 mlog_exit(status);
871 return status; 872 return status;
872} 873}
@@ -1133,7 +1134,8 @@ static int ocfs2_read_journal_inode(struct ocfs2_super *osb,
1133 } 1134 }
1134 SET_INODE_JOURNAL(inode); 1135 SET_INODE_JOURNAL(inode);
1135 1136
1136 status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, bh, 0, inode); 1137 status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, bh,
1138 OCFS2_BH_IGNORE_CACHE);
1137 if (status < 0) { 1139 if (status < 0) {
1138 mlog_errno(status); 1140 mlog_errno(status);
1139 goto bail; 1141 goto bail;
@@ -1229,19 +1231,19 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1229 } 1231 }
1230 1232
1231 mlog(0, "calling journal_init_inode\n"); 1233 mlog(0, "calling journal_init_inode\n");
1232 journal = journal_init_inode(inode); 1234 journal = jbd2_journal_init_inode(inode);
1233 if (journal == NULL) { 1235 if (journal == NULL) {
1234 mlog(ML_ERROR, "Linux journal layer error\n"); 1236 mlog(ML_ERROR, "Linux journal layer error\n");
1235 status = -EIO; 1237 status = -EIO;
1236 goto done; 1238 goto done;
1237 } 1239 }
1238 1240
1239 status = journal_load(journal); 1241 status = jbd2_journal_load(journal);
1240 if (status < 0) { 1242 if (status < 0) {
1241 mlog_errno(status); 1243 mlog_errno(status);
1242 if (!igrab(inode)) 1244 if (!igrab(inode))
1243 BUG(); 1245 BUG();
1244 journal_destroy(journal); 1246 jbd2_journal_destroy(journal);
1245 goto done; 1247 goto done;
1246 } 1248 }
1247 1249
@@ -1249,9 +1251,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1249 1251
1250 /* wipe the journal */ 1252 /* wipe the journal */
1251 mlog(0, "flushing the journal.\n"); 1253 mlog(0, "flushing the journal.\n");
1252 journal_lock_updates(journal); 1254 jbd2_journal_lock_updates(journal);
1253 status = journal_flush(journal); 1255 status = jbd2_journal_flush(journal);
1254 journal_unlock_updates(journal); 1256 jbd2_journal_unlock_updates(journal);
1255 if (status < 0) 1257 if (status < 0)
1256 mlog_errno(status); 1258 mlog_errno(status);
1257 1259
@@ -1272,7 +1274,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1272 if (!igrab(inode)) 1274 if (!igrab(inode))
1273 BUG(); 1275 BUG();
1274 1276
1275 journal_destroy(journal); 1277 jbd2_journal_destroy(journal);
1276 1278
1277done: 1279done:
1278 /* drop the lock on this nodes journal */ 1280 /* drop the lock on this nodes journal */
@@ -1282,8 +1284,7 @@ done:
1282 if (inode) 1284 if (inode)
1283 iput(inode); 1285 iput(inode);
1284 1286
1285 if (bh) 1287 brelse(bh);
1286 brelse(bh);
1287 1288
1288 mlog_exit(status); 1289 mlog_exit(status);
1289 return status; 1290 return status;
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 2178ebffa05f..d4d14e9a3cea 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -27,7 +27,12 @@
27#define OCFS2_JOURNAL_H 27#define OCFS2_JOURNAL_H
28 28
29#include <linux/fs.h> 29#include <linux/fs.h>
30#include <linux/jbd.h> 30#ifndef CONFIG_OCFS2_COMPAT_JBD
31# include <linux/jbd2.h>
32#else
33# include <linux/jbd.h>
34# include "ocfs2_jbd_compat.h"
35#endif
31 36
32enum ocfs2_journal_state { 37enum ocfs2_journal_state {
33 OCFS2_JOURNAL_FREE = 0, 38 OCFS2_JOURNAL_FREE = 0,
@@ -215,8 +220,8 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
215 * buffer. Will have to call ocfs2_journal_dirty once 220 * buffer. Will have to call ocfs2_journal_dirty once
216 * we've actually dirtied it. Type is one of . or . 221 * we've actually dirtied it. Type is one of . or .
217 * ocfs2_journal_dirty - Mark a journalled buffer as having dirty data. 222 * ocfs2_journal_dirty - Mark a journalled buffer as having dirty data.
218 * ocfs2_journal_dirty_data - Indicate that a data buffer should go out before 223 * ocfs2_jbd2_file_inode - Mark an inode so that its data goes out before
219 * the current handle commits. 224 * the current handle commits.
220 */ 225 */
221 226
222/* You must always start_trans with a number of buffs > 0, but it's 227/* You must always start_trans with a number of buffs > 0, but it's
@@ -268,8 +273,10 @@ int ocfs2_journal_access(handle_t *handle,
268 */ 273 */
269int ocfs2_journal_dirty(handle_t *handle, 274int ocfs2_journal_dirty(handle_t *handle,
270 struct buffer_head *bh); 275 struct buffer_head *bh);
276#ifdef CONFIG_OCFS2_COMPAT_JBD
271int ocfs2_journal_dirty_data(handle_t *handle, 277int ocfs2_journal_dirty_data(handle_t *handle,
272 struct buffer_head *bh); 278 struct buffer_head *bh);
279#endif
273 280
274/* 281/*
275 * Credit Macros: 282 * Credit Macros:
@@ -283,6 +290,9 @@ int ocfs2_journal_dirty_data(handle_t *handle,
283/* simple file updates like chmod, etc. */ 290/* simple file updates like chmod, etc. */
284#define OCFS2_INODE_UPDATE_CREDITS 1 291#define OCFS2_INODE_UPDATE_CREDITS 1
285 292
293/* extended attribute block update */
294#define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1
295
286/* group extend. inode update and last group update. */ 296/* group extend. inode update and last group update. */
287#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) 297#define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
288 298
@@ -340,11 +350,23 @@ int ocfs2_journal_dirty_data(handle_t *handle,
340#define OCFS2_RENAME_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3 \ 350#define OCFS2_RENAME_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3 \
341 + OCFS2_UNLINK_CREDITS) 351 + OCFS2_UNLINK_CREDITS)
342 352
353/* global bitmap dinode, group desc., relinked group,
354 * suballocator dinode, group desc., relinked group,
355 * dinode, xattr block */
356#define OCFS2_XATTR_BLOCK_CREATE_CREDITS (OCFS2_SUBALLOC_ALLOC * 2 + \
357 + OCFS2_INODE_UPDATE_CREDITS \
358 + OCFS2_XATTR_BLOCK_UPDATE_CREDITS)
359
360/*
361 * Please note that the caller must make sure that root_el is the root
362 * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise
363 * the result may be wrong.
364 */
343static inline int ocfs2_calc_extend_credits(struct super_block *sb, 365static inline int ocfs2_calc_extend_credits(struct super_block *sb,
344 struct ocfs2_dinode *fe, 366 struct ocfs2_extent_list *root_el,
345 u32 bits_wanted) 367 u32 bits_wanted)
346{ 368{
347 int bitmap_blocks, sysfile_bitmap_blocks, dinode_blocks; 369 int bitmap_blocks, sysfile_bitmap_blocks, extent_blocks;
348 370
349 /* bitmap dinode, group desc. + relinked group. */ 371 /* bitmap dinode, group desc. + relinked group. */
350 bitmap_blocks = OCFS2_SUBALLOC_ALLOC; 372 bitmap_blocks = OCFS2_SUBALLOC_ALLOC;
@@ -355,16 +377,16 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb,
355 * however many metadata chunks needed * a remaining suballoc 377 * however many metadata chunks needed * a remaining suballoc
356 * alloc. */ 378 * alloc. */
357 sysfile_bitmap_blocks = 1 + 379 sysfile_bitmap_blocks = 1 +
358 (OCFS2_SUBALLOC_ALLOC - 1) * ocfs2_extend_meta_needed(fe); 380 (OCFS2_SUBALLOC_ALLOC - 1) * ocfs2_extend_meta_needed(root_el);
359 381
360 /* this does not include *new* metadata blocks, which are 382 /* this does not include *new* metadata blocks, which are
361 * accounted for in sysfile_bitmap_blocks. fe + 383 * accounted for in sysfile_bitmap_blocks. root_el +
362 * prev. last_eb_blk + blocks along edge of tree. 384 * prev. last_eb_blk + blocks along edge of tree.
363 * calc_symlink_credits passes because we just need 1 385 * calc_symlink_credits passes because we just need 1
364 * credit for the dinode there. */ 386 * credit for the dinode there. */
365 dinode_blocks = 1 + 1 + le16_to_cpu(fe->id2.i_list.l_tree_depth); 387 extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth);
366 388
367 return bitmap_blocks + sysfile_bitmap_blocks + dinode_blocks; 389 return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks;
368} 390}
369 391
370static inline int ocfs2_calc_symlink_credits(struct super_block *sb) 392static inline int ocfs2_calc_symlink_credits(struct super_block *sb)
@@ -415,4 +437,16 @@ static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb,
415 return credits; 437 return credits;
416} 438}
417 439
440static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode)
441{
442 return jbd2_journal_file_inode(handle, &OCFS2_I(inode)->ip_jinode);
443}
444
445static inline int ocfs2_begin_ordered_truncate(struct inode *inode,
446 loff_t new_size)
447{
448 return jbd2_journal_begin_ordered_truncate(&OCFS2_I(inode)->ip_jinode,
449 new_size);
450}
451
418#endif /* OCFS2_JOURNAL_H */ 452#endif /* OCFS2_JOURNAL_H */
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 28e492e4ec88..687b28713c32 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -28,6 +28,7 @@
28#include <linux/slab.h> 28#include <linux/slab.h>
29#include <linux/highmem.h> 29#include <linux/highmem.h>
30#include <linux/bitops.h> 30#include <linux/bitops.h>
31#include <linux/debugfs.h>
31 32
32#define MLOG_MASK_PREFIX ML_DISK_ALLOC 33#define MLOG_MASK_PREFIX ML_DISK_ALLOC
33#include <cluster/masklog.h> 34#include <cluster/masklog.h>
@@ -47,8 +48,6 @@
47 48
48#define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab)) 49#define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab))
49 50
50static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb);
51
52static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc); 51static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc);
53 52
54static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, 53static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb,
@@ -75,24 +74,129 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
75static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, 74static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
76 struct inode *local_alloc_inode); 75 struct inode *local_alloc_inode);
77 76
78static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb) 77#ifdef CONFIG_OCFS2_FS_STATS
78
79static int ocfs2_la_debug_open(struct inode *inode, struct file *file)
80{
81 file->private_data = inode->i_private;
82 return 0;
83}
84
85#define LA_DEBUG_BUF_SZ PAGE_CACHE_SIZE
86#define LA_DEBUG_VER 1
87static ssize_t ocfs2_la_debug_read(struct file *file, char __user *userbuf,
88 size_t count, loff_t *ppos)
89{
90 static DEFINE_MUTEX(la_debug_mutex);
91 struct ocfs2_super *osb = file->private_data;
92 int written, ret;
93 char *buf = osb->local_alloc_debug_buf;
94
95 mutex_lock(&la_debug_mutex);
96 memset(buf, 0, LA_DEBUG_BUF_SZ);
97
98 written = snprintf(buf, LA_DEBUG_BUF_SZ,
99 "0x%x\t0x%llx\t%u\t%u\t0x%x\n",
100 LA_DEBUG_VER,
101 (unsigned long long)osb->la_last_gd,
102 osb->local_alloc_default_bits,
103 osb->local_alloc_bits, osb->local_alloc_state);
104
105 ret = simple_read_from_buffer(userbuf, count, ppos, buf, written);
106
107 mutex_unlock(&la_debug_mutex);
108 return ret;
109}
110
111static const struct file_operations ocfs2_la_debug_fops = {
112 .open = ocfs2_la_debug_open,
113 .read = ocfs2_la_debug_read,
114};
115
116static void ocfs2_init_la_debug(struct ocfs2_super *osb)
117{
118 osb->local_alloc_debug_buf = kmalloc(LA_DEBUG_BUF_SZ, GFP_NOFS);
119 if (!osb->local_alloc_debug_buf)
120 return;
121
122 osb->local_alloc_debug = debugfs_create_file("local_alloc_stats",
123 S_IFREG|S_IRUSR,
124 osb->osb_debug_root,
125 osb,
126 &ocfs2_la_debug_fops);
127 if (!osb->local_alloc_debug) {
128 kfree(osb->local_alloc_debug_buf);
129 osb->local_alloc_debug_buf = NULL;
130 }
131}
132
133static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb)
134{
135 if (osb->local_alloc_debug)
136 debugfs_remove(osb->local_alloc_debug);
137
138 if (osb->local_alloc_debug_buf)
139 kfree(osb->local_alloc_debug_buf);
140
141 osb->local_alloc_debug_buf = NULL;
142 osb->local_alloc_debug = NULL;
143}
144#else /* CONFIG_OCFS2_FS_STATS */
145static void ocfs2_init_la_debug(struct ocfs2_super *osb)
146{
147 return;
148}
149static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb)
150{
151 return;
152}
153#endif
154
155static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb)
79{ 156{
80 BUG_ON(osb->s_clustersize_bits > 20); 157 return (osb->local_alloc_state == OCFS2_LA_THROTTLED ||
158 osb->local_alloc_state == OCFS2_LA_ENABLED);
159}
81 160
82 /* Size local alloc windows by the megabyte */ 161void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
83 return osb->local_alloc_size << (20 - osb->s_clustersize_bits); 162 unsigned int num_clusters)
163{
164 spin_lock(&osb->osb_lock);
165 if (osb->local_alloc_state == OCFS2_LA_DISABLED ||
166 osb->local_alloc_state == OCFS2_LA_THROTTLED)
167 if (num_clusters >= osb->local_alloc_default_bits) {
168 cancel_delayed_work(&osb->la_enable_wq);
169 osb->local_alloc_state = OCFS2_LA_ENABLED;
170 }
171 spin_unlock(&osb->osb_lock);
172}
173
174void ocfs2_la_enable_worker(struct work_struct *work)
175{
176 struct ocfs2_super *osb =
177 container_of(work, struct ocfs2_super,
178 la_enable_wq.work);
179 spin_lock(&osb->osb_lock);
180 osb->local_alloc_state = OCFS2_LA_ENABLED;
181 spin_unlock(&osb->osb_lock);
84} 182}
85 183
86/* 184/*
87 * Tell us whether a given allocation should use the local alloc 185 * Tell us whether a given allocation should use the local alloc
88 * file. Otherwise, it has to go to the main bitmap. 186 * file. Otherwise, it has to go to the main bitmap.
187 *
188 * This function does semi-dirty reads of local alloc size and state!
189 * This is ok however, as the values are re-checked once under mutex.
89 */ 190 */
90int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) 191int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits)
91{ 192{
92 int la_bits = ocfs2_local_alloc_window_bits(osb);
93 int ret = 0; 193 int ret = 0;
194 int la_bits;
195
196 spin_lock(&osb->osb_lock);
197 la_bits = osb->local_alloc_bits;
94 198
95 if (osb->local_alloc_state != OCFS2_LA_ENABLED) 199 if (!ocfs2_la_state_enabled(osb))
96 goto bail; 200 goto bail;
97 201
98 /* la_bits should be at least twice the size (in clusters) of 202 /* la_bits should be at least twice the size (in clusters) of
@@ -106,6 +210,7 @@ int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits)
106bail: 210bail:
107 mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n", 211 mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n",
108 osb->local_alloc_state, (unsigned long long)bits, la_bits, ret); 212 osb->local_alloc_state, (unsigned long long)bits, la_bits, ret);
213 spin_unlock(&osb->osb_lock);
109 return ret; 214 return ret;
110} 215}
111 216
@@ -120,14 +225,18 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
120 225
121 mlog_entry_void(); 226 mlog_entry_void();
122 227
123 if (osb->local_alloc_size == 0) 228 ocfs2_init_la_debug(osb);
229
230 if (osb->local_alloc_bits == 0)
124 goto bail; 231 goto bail;
125 232
126 if (ocfs2_local_alloc_window_bits(osb) >= osb->bitmap_cpg) { 233 if (osb->local_alloc_bits >= osb->bitmap_cpg) {
127 mlog(ML_NOTICE, "Requested local alloc window %d is larger " 234 mlog(ML_NOTICE, "Requested local alloc window %d is larger "
128 "than max possible %u. Using defaults.\n", 235 "than max possible %u. Using defaults.\n",
129 ocfs2_local_alloc_window_bits(osb), (osb->bitmap_cpg - 1)); 236 osb->local_alloc_bits, (osb->bitmap_cpg - 1));
130 osb->local_alloc_size = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; 237 osb->local_alloc_bits =
238 ocfs2_megabytes_to_clusters(osb->sb,
239 OCFS2_DEFAULT_LOCAL_ALLOC_SIZE);
131 } 240 }
132 241
133 /* read the alloc off disk */ 242 /* read the alloc off disk */
@@ -139,8 +248,8 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
139 goto bail; 248 goto bail;
140 } 249 }
141 250
142 status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, 251 status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1,
143 &alloc_bh, 0, inode); 252 &alloc_bh, OCFS2_BH_IGNORE_CACHE);
144 if (status < 0) { 253 if (status < 0) {
145 mlog_errno(status); 254 mlog_errno(status);
146 goto bail; 255 goto bail;
@@ -185,13 +294,14 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb)
185 294
186bail: 295bail:
187 if (status < 0) 296 if (status < 0)
188 if (alloc_bh) 297 brelse(alloc_bh);
189 brelse(alloc_bh);
190 if (inode) 298 if (inode)
191 iput(inode); 299 iput(inode);
192 300
193 mlog(0, "Local alloc window bits = %d\n", 301 if (status < 0)
194 ocfs2_local_alloc_window_bits(osb)); 302 ocfs2_shutdown_la_debug(osb);
303
304 mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits);
195 305
196 mlog_exit(status); 306 mlog_exit(status);
197 return status; 307 return status;
@@ -217,6 +327,11 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
217 327
218 mlog_entry_void(); 328 mlog_entry_void();
219 329
330 cancel_delayed_work(&osb->la_enable_wq);
331 flush_workqueue(ocfs2_wq);
332
333 ocfs2_shutdown_la_debug(osb);
334
220 if (osb->local_alloc_state == OCFS2_LA_UNUSED) 335 if (osb->local_alloc_state == OCFS2_LA_UNUSED)
221 goto out; 336 goto out;
222 337
@@ -295,8 +410,7 @@ out_commit:
295 ocfs2_commit_trans(osb, handle); 410 ocfs2_commit_trans(osb, handle);
296 411
297out_unlock: 412out_unlock:
298 if (main_bm_bh) 413 brelse(main_bm_bh);
299 brelse(main_bm_bh);
300 414
301 ocfs2_inode_unlock(main_bm_inode, 1); 415 ocfs2_inode_unlock(main_bm_inode, 1);
302 416
@@ -345,8 +459,8 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
345 459
346 mutex_lock(&inode->i_mutex); 460 mutex_lock(&inode->i_mutex);
347 461
348 status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, 462 status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1,
349 &alloc_bh, 0, inode); 463 &alloc_bh, OCFS2_BH_IGNORE_CACHE);
350 if (status < 0) { 464 if (status < 0) {
351 mlog_errno(status); 465 mlog_errno(status);
352 goto bail; 466 goto bail;
@@ -372,8 +486,7 @@ bail:
372 *alloc_copy = NULL; 486 *alloc_copy = NULL;
373 } 487 }
374 488
375 if (alloc_bh) 489 brelse(alloc_bh);
376 brelse(alloc_bh);
377 490
378 if (inode) { 491 if (inode) {
379 mutex_unlock(&inode->i_mutex); 492 mutex_unlock(&inode->i_mutex);
@@ -441,8 +554,7 @@ out_unlock:
441out_mutex: 554out_mutex:
442 mutex_unlock(&main_bm_inode->i_mutex); 555 mutex_unlock(&main_bm_inode->i_mutex);
443 556
444 if (main_bm_bh) 557 brelse(main_bm_bh);
445 brelse(main_bm_bh);
446 558
447 iput(main_bm_inode); 559 iput(main_bm_inode);
448 560
@@ -453,8 +565,48 @@ out:
453 return status; 565 return status;
454} 566}
455 567
568/* Check to see if the local alloc window is within ac->ac_max_block */
569static int ocfs2_local_alloc_in_range(struct inode *inode,
570 struct ocfs2_alloc_context *ac,
571 u32 bits_wanted)
572{
573 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
574 struct ocfs2_dinode *alloc;
575 struct ocfs2_local_alloc *la;
576 int start;
577 u64 block_off;
578
579 if (!ac->ac_max_block)
580 return 1;
581
582 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
583 la = OCFS2_LOCAL_ALLOC(alloc);
584
585 start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted);
586 if (start == -1) {
587 mlog_errno(-ENOSPC);
588 return 0;
589 }
590
591 /*
592 * Converting (bm_off + start + bits_wanted) to blocks gives us
593 * the blkno just past our actual allocation. This is perfect
594 * to compare with ac_max_block.
595 */
596 block_off = ocfs2_clusters_to_blocks(inode->i_sb,
597 le32_to_cpu(la->la_bm_off) +
598 start + bits_wanted);
599 mlog(0, "Checking %llu against %llu\n",
600 (unsigned long long)block_off,
601 (unsigned long long)ac->ac_max_block);
602 if (block_off > ac->ac_max_block)
603 return 0;
604
605 return 1;
606}
607
456/* 608/*
457 * make sure we've got at least bitswanted contiguous bits in the 609 * make sure we've got at least bits_wanted contiguous bits in the
458 * local alloc. You lose them when you drop i_mutex. 610 * local alloc. You lose them when you drop i_mutex.
459 * 611 *
460 * We will add ourselves to the transaction passed in, but may start 612 * We will add ourselves to the transaction passed in, but may start
@@ -485,16 +637,18 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
485 637
486 mutex_lock(&local_alloc_inode->i_mutex); 638 mutex_lock(&local_alloc_inode->i_mutex);
487 639
488 if (osb->local_alloc_state != OCFS2_LA_ENABLED) { 640 /*
489 status = -ENOSPC; 641 * We must double check state and allocator bits because
490 goto bail; 642 * another process may have changed them while holding i_mutex.
491 } 643 */
492 644 spin_lock(&osb->osb_lock);
493 if (bits_wanted > ocfs2_local_alloc_window_bits(osb)) { 645 if (!ocfs2_la_state_enabled(osb) ||
494 mlog(0, "Asking for more than my max window size!\n"); 646 (bits_wanted > osb->local_alloc_bits)) {
647 spin_unlock(&osb->osb_lock);
495 status = -ENOSPC; 648 status = -ENOSPC;
496 goto bail; 649 goto bail;
497 } 650 }
651 spin_unlock(&osb->osb_lock);
498 652
499 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; 653 alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data;
500 654
@@ -522,6 +676,36 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb,
522 mlog_errno(status); 676 mlog_errno(status);
523 goto bail; 677 goto bail;
524 } 678 }
679
680 /*
681 * Under certain conditions, the window slide code
682 * might have reduced the number of bits available or
683 * disabled the the local alloc entirely. Re-check
684 * here and return -ENOSPC if necessary.
685 */
686 status = -ENOSPC;
687 if (!ocfs2_la_state_enabled(osb))
688 goto bail;
689
690 free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) -
691 le32_to_cpu(alloc->id1.bitmap1.i_used);
692 if (bits_wanted > free_bits)
693 goto bail;
694 }
695
696 if (ac->ac_max_block)
697 mlog(0, "Calling in_range for max block %llu\n",
698 (unsigned long long)ac->ac_max_block);
699
700 if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac,
701 bits_wanted)) {
702 /*
703 * The window is outside ac->ac_max_block.
704 * This errno tells the caller to keep localalloc enabled
705 * but to get the allocation from the main bitmap.
706 */
707 status = -EFBIG;
708 goto bail;
525 } 709 }
526 710
527 ac->ac_inode = local_alloc_inode; 711 ac->ac_inode = local_alloc_inode;
@@ -789,6 +973,85 @@ bail:
789 return status; 973 return status;
790} 974}
791 975
976enum ocfs2_la_event {
977 OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */
978 OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has
979 * enough bits theoretically
980 * free, but a contiguous
981 * allocation could not be
982 * found. */
983 OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have
984 * enough bits free to satisfy
985 * our request. */
986};
987#define OCFS2_LA_ENABLE_INTERVAL (30 * HZ)
988/*
989 * Given an event, calculate the size of our next local alloc window.
990 *
991 * This should always be called under i_mutex of the local alloc inode
992 * so that local alloc disabling doesn't race with processes trying to
993 * use the allocator.
994 *
995 * Returns the state which the local alloc was left in. This value can
996 * be ignored by some paths.
997 */
998static int ocfs2_recalc_la_window(struct ocfs2_super *osb,
999 enum ocfs2_la_event event)
1000{
1001 unsigned int bits;
1002 int state;
1003
1004 spin_lock(&osb->osb_lock);
1005 if (osb->local_alloc_state == OCFS2_LA_DISABLED) {
1006 WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED);
1007 goto out_unlock;
1008 }
1009
1010 /*
1011 * ENOSPC and fragmentation are treated similarly for now.
1012 */
1013 if (event == OCFS2_LA_EVENT_ENOSPC ||
1014 event == OCFS2_LA_EVENT_FRAGMENTED) {
1015 /*
1016 * We ran out of contiguous space in the primary
1017 * bitmap. Drastically reduce the number of bits used
1018 * by local alloc until we have to disable it.
1019 */
1020 bits = osb->local_alloc_bits >> 1;
1021 if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) {
1022 /*
1023 * By setting state to THROTTLED, we'll keep
1024 * the number of local alloc bits used down
1025 * until an event occurs which would give us
1026 * reason to assume the bitmap situation might
1027 * have changed.
1028 */
1029 osb->local_alloc_state = OCFS2_LA_THROTTLED;
1030 osb->local_alloc_bits = bits;
1031 } else {
1032 osb->local_alloc_state = OCFS2_LA_DISABLED;
1033 }
1034 queue_delayed_work(ocfs2_wq, &osb->la_enable_wq,
1035 OCFS2_LA_ENABLE_INTERVAL);
1036 goto out_unlock;
1037 }
1038
1039 /*
1040 * Don't increase the size of the local alloc window until we
1041 * know we might be able to fulfill the request. Otherwise, we
1042 * risk bouncing around the global bitmap during periods of
1043 * low space.
1044 */
1045 if (osb->local_alloc_state != OCFS2_LA_THROTTLED)
1046 osb->local_alloc_bits = osb->local_alloc_default_bits;
1047
1048out_unlock:
1049 state = osb->local_alloc_state;
1050 spin_unlock(&osb->osb_lock);
1051
1052 return state;
1053}
1054
792static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, 1055static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
793 struct ocfs2_alloc_context **ac, 1056 struct ocfs2_alloc_context **ac,
794 struct inode **bitmap_inode, 1057 struct inode **bitmap_inode,
@@ -803,12 +1066,21 @@ static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
803 goto bail; 1066 goto bail;
804 } 1067 }
805 1068
806 (*ac)->ac_bits_wanted = ocfs2_local_alloc_window_bits(osb); 1069retry_enospc:
1070 (*ac)->ac_bits_wanted = osb->local_alloc_bits;
807 1071
808 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); 1072 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
1073 if (status == -ENOSPC) {
1074 if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) ==
1075 OCFS2_LA_DISABLED)
1076 goto bail;
1077
1078 ocfs2_free_ac_resource(*ac);
1079 memset(*ac, 0, sizeof(struct ocfs2_alloc_context));
1080 goto retry_enospc;
1081 }
809 if (status < 0) { 1082 if (status < 0) {
810 if (status != -ENOSPC) 1083 mlog_errno(status);
811 mlog_errno(status);
812 goto bail; 1084 goto bail;
813 } 1085 }
814 1086
@@ -849,7 +1121,7 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
849 "one\n"); 1121 "one\n");
850 1122
851 mlog(0, "Allocating %u clusters for a new window.\n", 1123 mlog(0, "Allocating %u clusters for a new window.\n",
852 ocfs2_local_alloc_window_bits(osb)); 1124 osb->local_alloc_bits);
853 1125
854 /* Instruct the allocation code to try the most recently used 1126 /* Instruct the allocation code to try the most recently used
855 * cluster group. We'll re-record the group used this pass 1127 * cluster group. We'll re-record the group used this pass
@@ -859,9 +1131,36 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb,
859 /* we used the generic suballoc reserve function, but we set 1131 /* we used the generic suballoc reserve function, but we set
860 * everything up nicely, so there's no reason why we can't use 1132 * everything up nicely, so there's no reason why we can't use
861 * the more specific cluster api to claim bits. */ 1133 * the more specific cluster api to claim bits. */
862 status = ocfs2_claim_clusters(osb, handle, ac, 1134 status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits,
863 ocfs2_local_alloc_window_bits(osb),
864 &cluster_off, &cluster_count); 1135 &cluster_off, &cluster_count);
1136 if (status == -ENOSPC) {
1137retry_enospc:
1138 /*
1139 * Note: We could also try syncing the journal here to
1140 * allow use of any free bits which the current
1141 * transaction can't give us access to. --Mark
1142 */
1143 if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) ==
1144 OCFS2_LA_DISABLED)
1145 goto bail;
1146
1147 status = ocfs2_claim_clusters(osb, handle, ac,
1148 osb->local_alloc_bits,
1149 &cluster_off,
1150 &cluster_count);
1151 if (status == -ENOSPC)
1152 goto retry_enospc;
1153 /*
1154 * We only shrunk the *minimum* number of in our
1155 * request - it's entirely possible that the allocator
1156 * might give us more than we asked for.
1157 */
1158 if (status == 0) {
1159 spin_lock(&osb->osb_lock);
1160 osb->local_alloc_bits = cluster_count;
1161 spin_unlock(&osb->osb_lock);
1162 }
1163 }
865 if (status < 0) { 1164 if (status < 0) {
866 if (status != -ENOSPC) 1165 if (status != -ENOSPC)
867 mlog_errno(status); 1166 mlog_errno(status);
@@ -905,6 +1204,8 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
905 1204
906 mlog_entry_void(); 1205 mlog_entry_void();
907 1206
1207 ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE);
1208
908 /* This will lock the main bitmap for us. */ 1209 /* This will lock the main bitmap for us. */
909 status = ocfs2_local_alloc_reserve_for_window(osb, 1210 status = ocfs2_local_alloc_reserve_for_window(osb,
910 &ac, 1211 &ac,
@@ -976,8 +1277,7 @@ bail:
976 if (handle) 1277 if (handle)
977 ocfs2_commit_trans(osb, handle); 1278 ocfs2_commit_trans(osb, handle);
978 1279
979 if (main_bm_bh) 1280 brelse(main_bm_bh);
980 brelse(main_bm_bh);
981 1281
982 if (main_bm_inode) 1282 if (main_bm_inode)
983 iput(main_bm_inode); 1283 iput(main_bm_inode);
diff --git a/fs/ocfs2/localalloc.h b/fs/ocfs2/localalloc.h
index 3f76631e110c..ac5ea9f86653 100644
--- a/fs/ocfs2/localalloc.h
+++ b/fs/ocfs2/localalloc.h
@@ -52,4 +52,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
52 u32 *bit_off, 52 u32 *bit_off,
53 u32 *num_bits); 53 u32 *num_bits);
54 54
55void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb,
56 unsigned int num_clusters);
57void ocfs2_la_enable_worker(struct work_struct *work);
58
55#endif /* OCFS2_LOCALALLOC_H */ 59#endif /* OCFS2_LOCALALLOC_H */
diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c
index 203f87143877..544ac6245175 100644
--- a/fs/ocfs2/locks.c
+++ b/fs/ocfs2/locks.c
@@ -24,6 +24,7 @@
24 */ 24 */
25 25
26#include <linux/fs.h> 26#include <linux/fs.h>
27#include <linux/fcntl.h>
27 28
28#define MLOG_MASK_PREFIX ML_INODE 29#define MLOG_MASK_PREFIX ML_INODE
29#include <cluster/masklog.h> 30#include <cluster/masklog.h>
@@ -32,6 +33,7 @@
32 33
33#include "dlmglue.h" 34#include "dlmglue.h"
34#include "file.h" 35#include "file.h"
36#include "inode.h"
35#include "locks.h" 37#include "locks.h"
36 38
37static int ocfs2_do_flock(struct file *file, struct inode *inode, 39static int ocfs2_do_flock(struct file *file, struct inode *inode,
@@ -123,3 +125,16 @@ int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl)
123 else 125 else
124 return ocfs2_do_flock(file, inode, cmd, fl); 126 return ocfs2_do_flock(file, inode, cmd, fl);
125} 127}
128
129int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl)
130{
131 struct inode *inode = file->f_mapping->host;
132 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
133
134 if (!(fl->fl_flags & FL_POSIX))
135 return -ENOLCK;
136 if (__mandatory_lock(inode))
137 return -ENOLCK;
138
139 return ocfs2_plock(osb->cconn, OCFS2_I(inode)->ip_blkno, file, cmd, fl);
140}
diff --git a/fs/ocfs2/locks.h b/fs/ocfs2/locks.h
index 9743ef2324ec..496d488b271f 100644
--- a/fs/ocfs2/locks.h
+++ b/fs/ocfs2/locks.h
@@ -27,5 +27,6 @@
27#define OCFS2_LOCKS_H 27#define OCFS2_LOCKS_H
28 28
29int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl); 29int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl);
30int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl);
30 31
31#endif /* OCFS2_LOCKS_H */ 32#endif /* OCFS2_LOCKS_H */
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index d5d808fe0140..485a6aa0ad39 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -60,6 +60,7 @@
60#include "symlink.h" 60#include "symlink.h"
61#include "sysfile.h" 61#include "sysfile.h"
62#include "uptodate.h" 62#include "uptodate.h"
63#include "xattr.h"
63 64
64#include "buffer_head_io.h" 65#include "buffer_head_io.h"
65 66
@@ -327,14 +328,9 @@ leave:
327 if (status == -ENOSPC) 328 if (status == -ENOSPC)
328 mlog(0, "Disk is full\n"); 329 mlog(0, "Disk is full\n");
329 330
330 if (new_fe_bh) 331 brelse(new_fe_bh);
331 brelse(new_fe_bh); 332 brelse(de_bh);
332 333 brelse(parent_fe_bh);
333 if (de_bh)
334 brelse(de_bh);
335
336 if (parent_fe_bh)
337 brelse(parent_fe_bh);
338 334
339 if ((status < 0) && inode) 335 if ((status < 0) && inode)
340 iput(inode); 336 iput(inode);
@@ -647,12 +643,9 @@ out_unlock_inode:
647out: 643out:
648 ocfs2_inode_unlock(dir, 1); 644 ocfs2_inode_unlock(dir, 1);
649 645
650 if (de_bh) 646 brelse(de_bh);
651 brelse(de_bh); 647 brelse(fe_bh);
652 if (fe_bh) 648 brelse(parent_fe_bh);
653 brelse(fe_bh);
654 if (parent_fe_bh)
655 brelse(parent_fe_bh);
656 649
657 mlog_exit(err); 650 mlog_exit(err);
658 651
@@ -851,17 +844,10 @@ leave:
851 iput(orphan_dir); 844 iput(orphan_dir);
852 } 845 }
853 846
854 if (fe_bh) 847 brelse(fe_bh);
855 brelse(fe_bh); 848 brelse(dirent_bh);
856 849 brelse(parent_node_bh);
857 if (dirent_bh) 850 brelse(orphan_entry_bh);
858 brelse(dirent_bh);
859
860 if (parent_node_bh)
861 brelse(parent_node_bh);
862
863 if (orphan_entry_bh)
864 brelse(orphan_entry_bh);
865 851
866 mlog_exit(status); 852 mlog_exit(status);
867 853
@@ -1372,24 +1358,15 @@ bail:
1372 1358
1373 if (new_inode) 1359 if (new_inode)
1374 iput(new_inode); 1360 iput(new_inode);
1375 if (newfe_bh) 1361 brelse(newfe_bh);
1376 brelse(newfe_bh); 1362 brelse(old_inode_bh);
1377 if (old_inode_bh) 1363 brelse(old_dir_bh);
1378 brelse(old_inode_bh); 1364 brelse(new_dir_bh);
1379 if (old_dir_bh) 1365 brelse(new_de_bh);
1380 brelse(old_dir_bh); 1366 brelse(old_de_bh);
1381 if (new_dir_bh) 1367 brelse(old_inode_de_bh);
1382 brelse(new_dir_bh); 1368 brelse(orphan_entry_bh);
1383 if (new_de_bh) 1369 brelse(insert_entry_bh);
1384 brelse(new_de_bh);
1385 if (old_de_bh)
1386 brelse(old_de_bh);
1387 if (old_inode_de_bh)
1388 brelse(old_inode_de_bh);
1389 if (orphan_entry_bh)
1390 brelse(orphan_entry_bh);
1391 if (insert_entry_bh)
1392 brelse(insert_entry_bh);
1393 1370
1394 mlog_exit(status); 1371 mlog_exit(status);
1395 1372
@@ -1492,8 +1469,7 @@ bail:
1492 1469
1493 if (bhs) { 1470 if (bhs) {
1494 for(i = 0; i < blocks; i++) 1471 for(i = 0; i < blocks; i++)
1495 if (bhs[i]) 1472 brelse(bhs[i]);
1496 brelse(bhs[i]);
1497 kfree(bhs); 1473 kfree(bhs);
1498 } 1474 }
1499 1475
@@ -1598,10 +1574,10 @@ static int ocfs2_symlink(struct inode *dir,
1598 u32 offset = 0; 1574 u32 offset = 0;
1599 1575
1600 inode->i_op = &ocfs2_symlink_inode_operations; 1576 inode->i_op = &ocfs2_symlink_inode_operations;
1601 status = ocfs2_do_extend_allocation(osb, inode, &offset, 1, 0, 1577 status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0,
1602 new_fe_bh, 1578 new_fe_bh,
1603 handle, data_ac, NULL, 1579 handle, data_ac, NULL,
1604 NULL); 1580 NULL);
1605 if (status < 0) { 1581 if (status < 0) {
1606 if (status != -ENOSPC && status != -EINTR) { 1582 if (status != -ENOSPC && status != -EINTR) {
1607 mlog(ML_ERROR, 1583 mlog(ML_ERROR,
@@ -1659,12 +1635,9 @@ bail:
1659 1635
1660 ocfs2_inode_unlock(dir, 1); 1636 ocfs2_inode_unlock(dir, 1);
1661 1637
1662 if (new_fe_bh) 1638 brelse(new_fe_bh);
1663 brelse(new_fe_bh); 1639 brelse(parent_fe_bh);
1664 if (parent_fe_bh) 1640 brelse(de_bh);
1665 brelse(parent_fe_bh);
1666 if (de_bh)
1667 brelse(de_bh);
1668 if (inode_ac) 1641 if (inode_ac)
1669 ocfs2_free_alloc_context(inode_ac); 1642 ocfs2_free_alloc_context(inode_ac);
1670 if (data_ac) 1643 if (data_ac)
@@ -1759,8 +1732,7 @@ leave:
1759 iput(orphan_dir_inode); 1732 iput(orphan_dir_inode);
1760 } 1733 }
1761 1734
1762 if (orphan_dir_bh) 1735 brelse(orphan_dir_bh);
1763 brelse(orphan_dir_bh);
1764 1736
1765 mlog_exit(status); 1737 mlog_exit(status);
1766 return status; 1738 return status;
@@ -1780,10 +1752,9 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
1780 1752
1781 mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); 1753 mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
1782 1754
1783 status = ocfs2_read_block(osb, 1755 status = ocfs2_read_block(orphan_dir_inode,
1784 OCFS2_I(orphan_dir_inode)->ip_blkno, 1756 OCFS2_I(orphan_dir_inode)->ip_blkno,
1785 &orphan_dir_bh, OCFS2_BH_CACHED, 1757 &orphan_dir_bh);
1786 orphan_dir_inode);
1787 if (status < 0) { 1758 if (status < 0) {
1788 mlog_errno(status); 1759 mlog_errno(status);
1789 goto leave; 1760 goto leave;
@@ -1829,8 +1800,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
1829 (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num); 1800 (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num);
1830 1801
1831leave: 1802leave:
1832 if (orphan_dir_bh) 1803 brelse(orphan_dir_bh);
1833 brelse(orphan_dir_bh);
1834 1804
1835 mlog_exit(status); 1805 mlog_exit(status);
1836 return status; 1806 return status;
@@ -1898,8 +1868,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
1898 } 1868 }
1899 1869
1900leave: 1870leave:
1901 if (target_de_bh) 1871 brelse(target_de_bh);
1902 brelse(target_de_bh);
1903 1872
1904 mlog_exit(status); 1873 mlog_exit(status);
1905 return status; 1874 return status;
@@ -1918,4 +1887,8 @@ const struct inode_operations ocfs2_dir_iops = {
1918 .setattr = ocfs2_setattr, 1887 .setattr = ocfs2_setattr,
1919 .getattr = ocfs2_getattr, 1888 .getattr = ocfs2_getattr,
1920 .permission = ocfs2_permission, 1889 .permission = ocfs2_permission,
1890 .setxattr = generic_setxattr,
1891 .getxattr = generic_getxattr,
1892 .listxattr = ocfs2_listxattr,
1893 .removexattr = generic_removexattr,
1921}; 1894};
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 7f625f2b1117..a21a465490c4 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -34,7 +34,12 @@
34#include <linux/workqueue.h> 34#include <linux/workqueue.h>
35#include <linux/kref.h> 35#include <linux/kref.h>
36#include <linux/mutex.h> 36#include <linux/mutex.h>
37#include <linux/jbd.h> 37#ifndef CONFIG_OCFS2_COMPAT_JBD
38# include <linux/jbd2.h>
39#else
40# include <linux/jbd.h>
41# include "ocfs2_jbd_compat.h"
42#endif
38 43
39/* For union ocfs2_dlm_lksb */ 44/* For union ocfs2_dlm_lksb */
40#include "stackglue.h" 45#include "stackglue.h"
@@ -171,9 +176,13 @@ struct ocfs2_alloc_stats
171 176
172enum ocfs2_local_alloc_state 177enum ocfs2_local_alloc_state
173{ 178{
174 OCFS2_LA_UNUSED = 0, 179 OCFS2_LA_UNUSED = 0, /* Local alloc will never be used for
175 OCFS2_LA_ENABLED, 180 * this mountpoint. */
176 OCFS2_LA_DISABLED 181 OCFS2_LA_ENABLED, /* Local alloc is in use. */
182 OCFS2_LA_THROTTLED, /* Local alloc is in use, but number
183 * of bits has been reduced. */
184 OCFS2_LA_DISABLED /* Local alloc has temporarily been
185 * disabled. */
177}; 186};
178 187
179enum ocfs2_mount_options 188enum ocfs2_mount_options
@@ -184,6 +193,8 @@ enum ocfs2_mount_options
184 OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ 193 OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */
185 OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */ 194 OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */
186 OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */ 195 OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */
196 OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */
197 OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */
187}; 198};
188 199
189#define OCFS2_OSB_SOFT_RO 0x0001 200#define OCFS2_OSB_SOFT_RO 0x0001
@@ -214,6 +225,7 @@ struct ocfs2_super
214 u32 bitmap_cpg; 225 u32 bitmap_cpg;
215 u8 *uuid; 226 u8 *uuid;
216 char *uuid_str; 227 char *uuid_str;
228 u32 uuid_hash;
217 u8 *vol_label; 229 u8 *vol_label;
218 u64 first_cluster_group_blkno; 230 u64 first_cluster_group_blkno;
219 u32 fs_generation; 231 u32 fs_generation;
@@ -241,6 +253,7 @@ struct ocfs2_super
241 int s_sectsize_bits; 253 int s_sectsize_bits;
242 int s_clustersize; 254 int s_clustersize;
243 int s_clustersize_bits; 255 int s_clustersize_bits;
256 unsigned int s_xattr_inline_size;
244 257
245 atomic_t vol_state; 258 atomic_t vol_state;
246 struct mutex recovery_lock; 259 struct mutex recovery_lock;
@@ -252,11 +265,27 @@ struct ocfs2_super
252 struct ocfs2_journal *journal; 265 struct ocfs2_journal *journal;
253 unsigned long osb_commit_interval; 266 unsigned long osb_commit_interval;
254 267
255 int local_alloc_size; 268 struct delayed_work la_enable_wq;
256 enum ocfs2_local_alloc_state local_alloc_state; 269
270 /*
271 * Must hold local alloc i_mutex and osb->osb_lock to change
272 * local_alloc_bits. Reads can be done under either lock.
273 */
274 unsigned int local_alloc_bits;
275 unsigned int local_alloc_default_bits;
276
277 enum ocfs2_local_alloc_state local_alloc_state; /* protected
278 * by osb_lock */
279
257 struct buffer_head *local_alloc_bh; 280 struct buffer_head *local_alloc_bh;
281
258 u64 la_last_gd; 282 u64 la_last_gd;
259 283
284#ifdef CONFIG_OCFS2_FS_STATS
285 struct dentry *local_alloc_debug;
286 char *local_alloc_debug_buf;
287#endif
288
260 /* Next two fields are for local node slot recovery during 289 /* Next two fields are for local node slot recovery during
261 * mount. */ 290 * mount. */
262 int dirty; 291 int dirty;
@@ -340,6 +369,13 @@ static inline int ocfs2_supports_inline_data(struct ocfs2_super *osb)
340 return 0; 369 return 0;
341} 370}
342 371
372static inline int ocfs2_supports_xattr(struct ocfs2_super *osb)
373{
374 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR)
375 return 1;
376 return 0;
377}
378
343/* set / clear functions because cluster events can make these happen 379/* set / clear functions because cluster events can make these happen
344 * in parallel so we want the transitions to be atomic. this also 380 * in parallel so we want the transitions to be atomic. this also
345 * means that any future flags osb_flags must be protected by spinlock 381 * means that any future flags osb_flags must be protected by spinlock
@@ -554,6 +590,14 @@ static inline unsigned int ocfs2_pages_per_cluster(struct super_block *sb)
554 return pages_per_cluster; 590 return pages_per_cluster;
555} 591}
556 592
593static inline unsigned int ocfs2_megabytes_to_clusters(struct super_block *sb,
594 unsigned int megs)
595{
596 BUILD_BUG_ON(OCFS2_MAX_CLUSTERSIZE > 1048576);
597
598 return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits);
599}
600
557static inline void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb) 601static inline void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb)
558{ 602{
559 spin_lock(&osb->osb_lock); 603 spin_lock(&osb->osb_lock);
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 4f619850ccf7..f24ce3d3f956 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -64,6 +64,7 @@
64#define OCFS2_INODE_SIGNATURE "INODE01" 64#define OCFS2_INODE_SIGNATURE "INODE01"
65#define OCFS2_EXTENT_BLOCK_SIGNATURE "EXBLK01" 65#define OCFS2_EXTENT_BLOCK_SIGNATURE "EXBLK01"
66#define OCFS2_GROUP_DESC_SIGNATURE "GROUP01" 66#define OCFS2_GROUP_DESC_SIGNATURE "GROUP01"
67#define OCFS2_XATTR_BLOCK_SIGNATURE "XATTR01"
67 68
68/* Compatibility flags */ 69/* Compatibility flags */
69#define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \ 70#define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \
@@ -90,7 +91,8 @@
90 | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \ 91 | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \
91 | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \ 92 | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \
92 | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \ 93 | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \
93 | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK) 94 | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \
95 | OCFS2_FEATURE_INCOMPAT_XATTR)
94#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN 96#define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN
95 97
96/* 98/*
@@ -127,10 +129,6 @@
127/* Support for data packed into inode blocks */ 129/* Support for data packed into inode blocks */
128#define OCFS2_FEATURE_INCOMPAT_INLINE_DATA 0x0040 130#define OCFS2_FEATURE_INCOMPAT_INLINE_DATA 0x0040
129 131
130/* Support for the extended slot map */
131#define OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP 0x100
132
133
134/* 132/*
135 * Support for alternate, userspace cluster stacks. If set, the superblock 133 * Support for alternate, userspace cluster stacks. If set, the superblock
136 * field s_cluster_info contains a tag for the alternate stack in use as 134 * field s_cluster_info contains a tag for the alternate stack in use as
@@ -142,6 +140,12 @@
142 */ 140 */
143#define OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK 0x0080 141#define OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK 0x0080
144 142
143/* Support for the extended slot map */
144#define OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP 0x100
145
146/* Support for extended attributes */
147#define OCFS2_FEATURE_INCOMPAT_XATTR 0x0200
148
145/* 149/*
146 * backup superblock flag is used to indicate that this volume 150 * backup superblock flag is used to indicate that this volume
147 * has backup superblocks. 151 * has backup superblocks.
@@ -299,6 +303,12 @@ struct ocfs2_new_group_input {
299 */ 303 */
300#define OCFS2_DEFAULT_LOCAL_ALLOC_SIZE 8 304#define OCFS2_DEFAULT_LOCAL_ALLOC_SIZE 8
301 305
306/*
307 * Inline extended attribute size (in bytes)
308 * The value chosen should be aligned to 16 byte boundaries.
309 */
310#define OCFS2_MIN_XATTR_INLINE_SIZE 256
311
302struct ocfs2_system_inode_info { 312struct ocfs2_system_inode_info {
303 char *si_name; 313 char *si_name;
304 int si_iflags; 314 int si_iflags;
@@ -563,7 +573,7 @@ struct ocfs2_super_block {
563/*40*/ __le16 s_max_slots; /* Max number of simultaneous mounts 573/*40*/ __le16 s_max_slots; /* Max number of simultaneous mounts
564 before tunefs required */ 574 before tunefs required */
565 __le16 s_tunefs_flag; 575 __le16 s_tunefs_flag;
566 __le32 s_reserved1; 576 __le32 s_uuid_hash; /* hash value of uuid */
567 __le64 s_first_cluster_group; /* Block offset of 1st cluster 577 __le64 s_first_cluster_group; /* Block offset of 1st cluster
568 * group header */ 578 * group header */
569/*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ 579/*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */
@@ -571,7 +581,11 @@ struct ocfs2_super_block {
571/*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Selected userspace 581/*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Selected userspace
572 stack. Only valid 582 stack. Only valid
573 with INCOMPAT flag. */ 583 with INCOMPAT flag. */
574/*B8*/ __le64 s_reserved2[17]; /* Fill out superblock */ 584/*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size
585 for this fs*/
586 __le16 s_reserved0;
587 __le32 s_reserved1;
588/*C0*/ __le64 s_reserved2[16]; /* Fill out superblock */
575/*140*/ 589/*140*/
576 590
577 /* 591 /*
@@ -621,7 +635,8 @@ struct ocfs2_dinode {
621 belongs to */ 635 belongs to */
622 __le16 i_suballoc_bit; /* Bit offset in suballocator 636 __le16 i_suballoc_bit; /* Bit offset in suballocator
623 block group */ 637 block group */
624/*10*/ __le32 i_reserved0; 638/*10*/ __le16 i_reserved0;
639 __le16 i_xattr_inline_size;
625 __le32 i_clusters; /* Cluster count */ 640 __le32 i_clusters; /* Cluster count */
626 __le32 i_uid; /* Owner UID */ 641 __le32 i_uid; /* Owner UID */
627 __le32 i_gid; /* Owning GID */ 642 __le32 i_gid; /* Owning GID */
@@ -640,11 +655,12 @@ struct ocfs2_dinode {
640 __le32 i_atime_nsec; 655 __le32 i_atime_nsec;
641 __le32 i_ctime_nsec; 656 __le32 i_ctime_nsec;
642 __le32 i_mtime_nsec; 657 __le32 i_mtime_nsec;
643 __le32 i_attr; 658/*70*/ __le32 i_attr;
644 __le16 i_orphaned_slot; /* Only valid when OCFS2_ORPHANED_FL 659 __le16 i_orphaned_slot; /* Only valid when OCFS2_ORPHANED_FL
645 was set in i_flags */ 660 was set in i_flags */
646 __le16 i_dyn_features; 661 __le16 i_dyn_features;
647/*70*/ __le64 i_reserved2[8]; 662 __le64 i_xattr_loc;
663/*80*/ __le64 i_reserved2[7];
648/*B8*/ union { 664/*B8*/ union {
649 __le64 i_pad1; /* Generic way to refer to this 665 __le64 i_pad1; /* Generic way to refer to this
650 64bit union */ 666 64bit union */
@@ -715,6 +731,136 @@ struct ocfs2_group_desc
715/*40*/ __u8 bg_bitmap[0]; 731/*40*/ __u8 bg_bitmap[0];
716}; 732};
717 733
734/*
735 * On disk extended attribute structure for OCFS2.
736 */
737
738/*
739 * ocfs2_xattr_entry indicates one extend attribute.
740 *
741 * Note that it can be stored in inode, one block or one xattr bucket.
742 */
743struct ocfs2_xattr_entry {
744 __le32 xe_name_hash; /* hash value of xattr prefix+suffix. */
745 __le16 xe_name_offset; /* byte offset from the 1st etnry in the local
746 local xattr storage(inode, xattr block or
747 xattr bucket). */
748 __u8 xe_name_len; /* xattr name len, does't include prefix. */
749 __u8 xe_type; /* the low 7 bits indicates the name prefix's
750 * type and the highest 1 bits indicate whether
751 * the EA is stored in the local storage. */
752 __le64 xe_value_size; /* real xattr value length. */
753};
754
755/*
756 * On disk structure for xattr header.
757 *
758 * One ocfs2_xattr_header describes how many ocfs2_xattr_entry records in
759 * the local xattr storage.
760 */
761struct ocfs2_xattr_header {
762 __le16 xh_count; /* contains the count of how
763 many records are in the
764 local xattr storage. */
765 __le16 xh_free_start; /* current offset for storing
766 xattr. */
767 __le16 xh_name_value_len; /* total length of name/value
768 length in this bucket. */
769 __le16 xh_num_buckets; /* bucket nums in one extent
770 record, only valid in the
771 first bucket. */
772 __le64 xh_csum;
773 struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */
774};
775
776/*
777 * On disk structure for xattr value root.
778 *
779 * It is used when one extended attribute's size is larger, and we will save it
780 * in an outside cluster. It will stored in a b-tree like file content.
781 */
782struct ocfs2_xattr_value_root {
783/*00*/ __le32 xr_clusters; /* clusters covered by xattr value. */
784 __le32 xr_reserved0;
785 __le64 xr_last_eb_blk; /* Pointer to last extent block */
786/*10*/ struct ocfs2_extent_list xr_list; /* Extent record list */
787};
788
789/*
790 * On disk structure for xattr tree root.
791 *
792 * It is used when there are too many extended attributes for one file. These
793 * attributes will be organized and stored in an indexed-btree.
794 */
795struct ocfs2_xattr_tree_root {
796/*00*/ __le32 xt_clusters; /* clusters covered by xattr. */
797 __le32 xt_reserved0;
798 __le64 xt_last_eb_blk; /* Pointer to last extent block */
799/*10*/ struct ocfs2_extent_list xt_list; /* Extent record list */
800};
801
802#define OCFS2_XATTR_INDEXED 0x1
803#define OCFS2_HASH_SHIFT 5
804#define OCFS2_XATTR_ROUND 3
805#define OCFS2_XATTR_SIZE(size) (((size) + OCFS2_XATTR_ROUND) & \
806 ~(OCFS2_XATTR_ROUND))
807
808#define OCFS2_XATTR_BUCKET_SIZE 4096
809#define OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET (OCFS2_XATTR_BUCKET_SIZE \
810 / OCFS2_MIN_BLOCKSIZE)
811
812/*
813 * On disk structure for xattr block.
814 */
815struct ocfs2_xattr_block {
816/*00*/ __u8 xb_signature[8]; /* Signature for verification */
817 __le16 xb_suballoc_slot; /* Slot suballocator this
818 block belongs to. */
819 __le16 xb_suballoc_bit; /* Bit offset in suballocator
820 block group */
821 __le32 xb_fs_generation; /* Must match super block */
822/*10*/ __le64 xb_blkno; /* Offset on disk, in blocks */
823 __le64 xb_csum;
824/*20*/ __le16 xb_flags; /* Indicates whether this block contains
825 real xattr or a xattr tree. */
826 __le16 xb_reserved0;
827 __le32 xb_reserved1;
828 __le64 xb_reserved2;
829/*30*/ union {
830 struct ocfs2_xattr_header xb_header; /* xattr header if this
831 block contains xattr */
832 struct ocfs2_xattr_tree_root xb_root;/* xattr tree root if this
833 block cotains xattr
834 tree. */
835 } xb_attrs;
836};
837
838#define OCFS2_XATTR_ENTRY_LOCAL 0x80
839#define OCFS2_XATTR_TYPE_MASK 0x7F
840static inline void ocfs2_xattr_set_local(struct ocfs2_xattr_entry *xe,
841 int local)
842{
843 if (local)
844 xe->xe_type |= OCFS2_XATTR_ENTRY_LOCAL;
845 else
846 xe->xe_type &= ~OCFS2_XATTR_ENTRY_LOCAL;
847}
848
849static inline int ocfs2_xattr_is_local(struct ocfs2_xattr_entry *xe)
850{
851 return xe->xe_type & OCFS2_XATTR_ENTRY_LOCAL;
852}
853
854static inline void ocfs2_xattr_set_type(struct ocfs2_xattr_entry *xe, int type)
855{
856 xe->xe_type |= type & OCFS2_XATTR_TYPE_MASK;
857}
858
859static inline int ocfs2_xattr_get_type(struct ocfs2_xattr_entry *xe)
860{
861 return xe->xe_type & OCFS2_XATTR_TYPE_MASK;
862}
863
718#ifdef __KERNEL__ 864#ifdef __KERNEL__
719static inline int ocfs2_fast_symlink_chars(struct super_block *sb) 865static inline int ocfs2_fast_symlink_chars(struct super_block *sb)
720{ 866{
@@ -728,6 +874,20 @@ static inline int ocfs2_max_inline_data(struct super_block *sb)
728 offsetof(struct ocfs2_dinode, id2.i_data.id_data); 874 offsetof(struct ocfs2_dinode, id2.i_data.id_data);
729} 875}
730 876
877static inline int ocfs2_max_inline_data_with_xattr(struct super_block *sb,
878 struct ocfs2_dinode *di)
879{
880 unsigned int xattrsize = le16_to_cpu(di->i_xattr_inline_size);
881
882 if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_XATTR_FL)
883 return sb->s_blocksize -
884 offsetof(struct ocfs2_dinode, id2.i_data.id_data) -
885 xattrsize;
886 else
887 return sb->s_blocksize -
888 offsetof(struct ocfs2_dinode, id2.i_data.id_data);
889}
890
731static inline int ocfs2_extent_recs_per_inode(struct super_block *sb) 891static inline int ocfs2_extent_recs_per_inode(struct super_block *sb)
732{ 892{
733 int size; 893 int size;
@@ -738,6 +898,24 @@ static inline int ocfs2_extent_recs_per_inode(struct super_block *sb)
738 return size / sizeof(struct ocfs2_extent_rec); 898 return size / sizeof(struct ocfs2_extent_rec);
739} 899}
740 900
901static inline int ocfs2_extent_recs_per_inode_with_xattr(
902 struct super_block *sb,
903 struct ocfs2_dinode *di)
904{
905 int size;
906 unsigned int xattrsize = le16_to_cpu(di->i_xattr_inline_size);
907
908 if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_XATTR_FL)
909 size = sb->s_blocksize -
910 offsetof(struct ocfs2_dinode, id2.i_list.l_recs) -
911 xattrsize;
912 else
913 size = sb->s_blocksize -
914 offsetof(struct ocfs2_dinode, id2.i_list.l_recs);
915
916 return size / sizeof(struct ocfs2_extent_rec);
917}
918
741static inline int ocfs2_chain_recs_per_inode(struct super_block *sb) 919static inline int ocfs2_chain_recs_per_inode(struct super_block *sb)
742{ 920{
743 int size; 921 int size;
@@ -801,6 +979,17 @@ static inline u64 ocfs2_backup_super_blkno(struct super_block *sb, int index)
801 return 0; 979 return 0;
802 980
803} 981}
982
983static inline u16 ocfs2_xattr_recs_per_xb(struct super_block *sb)
984{
985 int size;
986
987 size = sb->s_blocksize -
988 offsetof(struct ocfs2_xattr_block,
989 xb_attrs.xb_root.xt_list.l_recs);
990
991 return size / sizeof(struct ocfs2_extent_rec);
992}
804#else 993#else
805static inline int ocfs2_fast_symlink_chars(int blocksize) 994static inline int ocfs2_fast_symlink_chars(int blocksize)
806{ 995{
@@ -884,6 +1073,17 @@ static inline uint64_t ocfs2_backup_super_blkno(int blocksize, int index)
884 1073
885 return 0; 1074 return 0;
886} 1075}
1076
1077static inline int ocfs2_xattr_recs_per_xb(int blocksize)
1078{
1079 int size;
1080
1081 size = blocksize -
1082 offsetof(struct ocfs2_xattr_block,
1083 xb_attrs.xb_root.xt_list.l_recs);
1084
1085 return size / sizeof(struct ocfs2_extent_rec);
1086}
887#endif /* __KERNEL__ */ 1087#endif /* __KERNEL__ */
888 1088
889 1089
diff --git a/fs/ocfs2/ocfs2_jbd_compat.h b/fs/ocfs2/ocfs2_jbd_compat.h
new file mode 100644
index 000000000000..b91c78f8f558
--- /dev/null
+++ b/fs/ocfs2/ocfs2_jbd_compat.h
@@ -0,0 +1,82 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * ocfs2_jbd_compat.h
5 *
6 * Compatibility defines for JBD.
7 *
8 * Copyright (C) 2008 Oracle. All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License version 2 as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * General Public License for more details.
18 */
19
20#ifndef OCFS2_JBD_COMPAT_H
21#define OCFS2_JBD_COMPAT_H
22
23#ifndef CONFIG_OCFS2_COMPAT_JBD
24# error Should not have been included
25#endif
26
27struct jbd2_inode {
28 unsigned int dummy;
29};
30
31#define JBD2_BARRIER JFS_BARRIER
32#define JBD2_DEFAULT_MAX_COMMIT_AGE JBD_DEFAULT_MAX_COMMIT_AGE
33
34#define jbd2_journal_ack_err journal_ack_err
35#define jbd2_journal_clear_err journal_clear_err
36#define jbd2_journal_destroy journal_destroy
37#define jbd2_journal_dirty_metadata journal_dirty_metadata
38#define jbd2_journal_errno journal_errno
39#define jbd2_journal_extend journal_extend
40#define jbd2_journal_flush journal_flush
41#define jbd2_journal_force_commit journal_force_commit
42#define jbd2_journal_get_write_access journal_get_write_access
43#define jbd2_journal_get_undo_access journal_get_undo_access
44#define jbd2_journal_init_inode journal_init_inode
45#define jbd2_journal_invalidatepage journal_invalidatepage
46#define jbd2_journal_load journal_load
47#define jbd2_journal_lock_updates journal_lock_updates
48#define jbd2_journal_restart journal_restart
49#define jbd2_journal_start journal_start
50#define jbd2_journal_start_commit journal_start_commit
51#define jbd2_journal_stop journal_stop
52#define jbd2_journal_try_to_free_buffers journal_try_to_free_buffers
53#define jbd2_journal_unlock_updates journal_unlock_updates
54#define jbd2_journal_wipe journal_wipe
55#define jbd2_log_wait_commit log_wait_commit
56
57static inline int jbd2_journal_file_inode(handle_t *handle,
58 struct jbd2_inode *inode)
59{
60 return 0;
61}
62
63static inline int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,
64 loff_t new_size)
65{
66 return 0;
67}
68
69static inline void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode,
70 struct inode *inode)
71{
72 return;
73}
74
75static inline void jbd2_journal_release_jbd_inode(journal_t *journal,
76 struct jbd2_inode *jinode)
77{
78 return;
79}
80
81
82#endif /* OCFS2_JBD_COMPAT_H */
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index 8166968e9015..ffd48db229a7 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -200,7 +200,7 @@ static int update_backups(struct inode * inode, u32 clusters, char *data)
200 if (cluster > clusters) 200 if (cluster > clusters)
201 break; 201 break;
202 202
203 ret = ocfs2_read_block(osb, blkno, &backup, 0, NULL); 203 ret = ocfs2_read_blocks_sync(osb, blkno, 1, &backup);
204 if (ret < 0) { 204 if (ret < 0) {
205 mlog_errno(ret); 205 mlog_errno(ret);
206 break; 206 break;
@@ -236,8 +236,8 @@ static void ocfs2_update_super_and_backups(struct inode *inode,
236 * update the superblock last. 236 * update the superblock last.
237 * It doesn't matter if the write failed. 237 * It doesn't matter if the write failed.
238 */ 238 */
239 ret = ocfs2_read_block(osb, OCFS2_SUPER_BLOCK_BLKNO, 239 ret = ocfs2_read_blocks_sync(osb, OCFS2_SUPER_BLOCK_BLKNO, 1,
240 &super_bh, 0, NULL); 240 &super_bh);
241 if (ret < 0) { 241 if (ret < 0) {
242 mlog_errno(ret); 242 mlog_errno(ret);
243 goto out; 243 goto out;
@@ -332,8 +332,7 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters)
332 lgd_blkno = ocfs2_which_cluster_group(main_bm_inode, 332 lgd_blkno = ocfs2_which_cluster_group(main_bm_inode,
333 first_new_cluster - 1); 333 first_new_cluster - 1);
334 334
335 ret = ocfs2_read_block(osb, lgd_blkno, &group_bh, OCFS2_BH_CACHED, 335 ret = ocfs2_read_block(main_bm_inode, lgd_blkno, &group_bh);
336 main_bm_inode);
337 if (ret < 0) { 336 if (ret < 0) {
338 mlog_errno(ret); 337 mlog_errno(ret);
339 goto out_unlock; 338 goto out_unlock;
@@ -540,7 +539,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
540 goto out_unlock; 539 goto out_unlock;
541 } 540 }
542 541
543 ret = ocfs2_read_block(osb, input->group, &group_bh, 0, NULL); 542 ret = ocfs2_read_blocks_sync(osb, input->group, 1, &group_bh);
544 if (ret < 0) { 543 if (ret < 0) {
545 mlog(ML_ERROR, "Can't read the group descriptor # %llu " 544 mlog(ML_ERROR, "Can't read the group descriptor # %llu "
546 "from the device.", (unsigned long long)input->group); 545 "from the device.", (unsigned long long)input->group);
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index bb5ff8939bf1..bdda2d8f8508 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -150,8 +150,8 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
150 * be !NULL. Thus, ocfs2_read_blocks() will ignore blocknr. If 150 * be !NULL. Thus, ocfs2_read_blocks() will ignore blocknr. If
151 * this is not true, the read of -1 (UINT64_MAX) will fail. 151 * this is not true, the read of -1 (UINT64_MAX) will fail.
152 */ 152 */
153 ret = ocfs2_read_blocks(osb, -1, si->si_blocks, si->si_bh, 0, 153 ret = ocfs2_read_blocks(si->si_inode, -1, si->si_blocks, si->si_bh,
154 si->si_inode); 154 OCFS2_BH_IGNORE_CACHE);
155 if (ret == 0) { 155 if (ret == 0) {
156 spin_lock(&osb->osb_lock); 156 spin_lock(&osb->osb_lock);
157 ocfs2_update_slot_info(si); 157 ocfs2_update_slot_info(si);
@@ -404,7 +404,8 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
404 (unsigned long long)blkno); 404 (unsigned long long)blkno);
405 405
406 bh = NULL; /* Acquire a fresh bh */ 406 bh = NULL; /* Acquire a fresh bh */
407 status = ocfs2_read_block(osb, blkno, &bh, 0, si->si_inode); 407 status = ocfs2_read_blocks(si->si_inode, blkno, 1, &bh,
408 OCFS2_BH_IGNORE_CACHE);
408 if (status < 0) { 409 if (status < 0) {
409 mlog_errno(status); 410 mlog_errno(status);
410 goto bail; 411 goto bail;
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 353fc35c6748..faec2d879357 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -28,6 +28,7 @@
28#include "ocfs2.h" /* For struct ocfs2_lock_res */ 28#include "ocfs2.h" /* For struct ocfs2_lock_res */
29#include "stackglue.h" 29#include "stackglue.h"
30 30
31#include <linux/dlm_plock.h>
31 32
32/* 33/*
33 * The control protocol starts with a handshake. Until the handshake 34 * The control protocol starts with a handshake. Until the handshake
@@ -746,6 +747,37 @@ static void user_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb)
746{ 747{
747} 748}
748 749
750static int user_plock(struct ocfs2_cluster_connection *conn,
751 u64 ino,
752 struct file *file,
753 int cmd,
754 struct file_lock *fl)
755{
756 /*
757 * This more or less just demuxes the plock request into any
758 * one of three dlm calls.
759 *
760 * Internally, fs/dlm will pass these to a misc device, which
761 * a userspace daemon will read and write to.
762 *
763 * For now, cancel requests (which happen internally only),
764 * are turned into unlocks. Most of this function taken from
765 * gfs2_lock.
766 */
767
768 if (cmd == F_CANCELLK) {
769 cmd = F_SETLK;
770 fl->fl_type = F_UNLCK;
771 }
772
773 if (IS_GETLK(cmd))
774 return dlm_posix_get(conn->cc_lockspace, ino, file, fl);
775 else if (fl->fl_type == F_UNLCK)
776 return dlm_posix_unlock(conn->cc_lockspace, ino, file, fl);
777 else
778 return dlm_posix_lock(conn->cc_lockspace, ino, file, cmd, fl);
779}
780
749/* 781/*
750 * Compare a requested locking protocol version against the current one. 782 * Compare a requested locking protocol version against the current one.
751 * 783 *
@@ -839,6 +871,7 @@ static struct ocfs2_stack_operations ocfs2_user_plugin_ops = {
839 .dlm_unlock = user_dlm_unlock, 871 .dlm_unlock = user_dlm_unlock,
840 .lock_status = user_dlm_lock_status, 872 .lock_status = user_dlm_lock_status,
841 .lock_lvb = user_dlm_lvb, 873 .lock_lvb = user_dlm_lvb,
874 .plock = user_plock,
842 .dump_lksb = user_dlm_dump_lksb, 875 .dump_lksb = user_dlm_dump_lksb,
843}; 876};
844 877
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 07f348b8d721..68b668b0e60a 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -288,6 +288,26 @@ void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb)
288} 288}
289EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb); 289EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb);
290 290
291int ocfs2_stack_supports_plocks(void)
292{
293 return active_stack && active_stack->sp_ops->plock;
294}
295EXPORT_SYMBOL_GPL(ocfs2_stack_supports_plocks);
296
297/*
298 * ocfs2_plock() can only be safely called if
299 * ocfs2_stack_supports_plocks() returned true
300 */
301int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino,
302 struct file *file, int cmd, struct file_lock *fl)
303{
304 WARN_ON_ONCE(active_stack->sp_ops->plock == NULL);
305 if (active_stack->sp_ops->plock)
306 return active_stack->sp_ops->plock(conn, ino, file, cmd, fl);
307 return -EOPNOTSUPP;
308}
309EXPORT_SYMBOL_GPL(ocfs2_plock);
310
291int ocfs2_cluster_connect(const char *stack_name, 311int ocfs2_cluster_connect(const char *stack_name,
292 const char *group, 312 const char *group,
293 int grouplen, 313 int grouplen,
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index db56281dd1be..c571af375ef8 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -28,6 +28,10 @@
28#include "dlm/dlmapi.h" 28#include "dlm/dlmapi.h"
29#include <linux/dlm.h> 29#include <linux/dlm.h>
30 30
31/* Needed for plock-related prototypes */
32struct file;
33struct file_lock;
34
31/* 35/*
32 * dlmconstants.h does not have a LOCAL flag. We hope to remove it 36 * dlmconstants.h does not have a LOCAL flag. We hope to remove it
33 * some day, but right now we need it. Let's fake it. This value is larger 37 * some day, but right now we need it. Let's fake it. This value is larger
@@ -187,6 +191,17 @@ struct ocfs2_stack_operations {
187 void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb); 191 void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb);
188 192
189 /* 193 /*
194 * Cluster-aware posix locks
195 *
196 * This is NULL for stacks which do not support posix locks.
197 */
198 int (*plock)(struct ocfs2_cluster_connection *conn,
199 u64 ino,
200 struct file *file,
201 int cmd,
202 struct file_lock *fl);
203
204 /*
190 * This is an optoinal debugging hook. If provided, the 205 * This is an optoinal debugging hook. If provided, the
191 * stack can dump debugging information about this lock. 206 * stack can dump debugging information about this lock.
192 */ 207 */
@@ -240,6 +255,10 @@ int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb);
240void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb); 255void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb);
241void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb); 256void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb);
242 257
258int ocfs2_stack_supports_plocks(void);
259int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino,
260 struct file *file, int cmd, struct file_lock *fl);
261
243void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto); 262void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto);
244 263
245 264
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index d2d278fb9819..c5ff18b46b57 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -62,15 +62,18 @@ static int ocfs2_block_group_fill(handle_t *handle,
62 struct ocfs2_chain_list *cl); 62 struct ocfs2_chain_list *cl);
63static int ocfs2_block_group_alloc(struct ocfs2_super *osb, 63static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
64 struct inode *alloc_inode, 64 struct inode *alloc_inode,
65 struct buffer_head *bh); 65 struct buffer_head *bh,
66 u64 max_block);
66 67
67static int ocfs2_cluster_group_search(struct inode *inode, 68static int ocfs2_cluster_group_search(struct inode *inode,
68 struct buffer_head *group_bh, 69 struct buffer_head *group_bh,
69 u32 bits_wanted, u32 min_bits, 70 u32 bits_wanted, u32 min_bits,
71 u64 max_block,
70 u16 *bit_off, u16 *bits_found); 72 u16 *bit_off, u16 *bits_found);
71static int ocfs2_block_group_search(struct inode *inode, 73static int ocfs2_block_group_search(struct inode *inode,
72 struct buffer_head *group_bh, 74 struct buffer_head *group_bh,
73 u32 bits_wanted, u32 min_bits, 75 u32 bits_wanted, u32 min_bits,
76 u64 max_block,
74 u16 *bit_off, u16 *bits_found); 77 u16 *bit_off, u16 *bits_found);
75static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, 78static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
76 struct ocfs2_alloc_context *ac, 79 struct ocfs2_alloc_context *ac,
@@ -110,8 +113,11 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode,
110 u64 data_blkno, 113 u64 data_blkno,
111 u64 *bg_blkno, 114 u64 *bg_blkno,
112 u16 *bg_bit_off); 115 u16 *bg_bit_off);
116static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
117 u32 bits_wanted, u64 max_block,
118 struct ocfs2_alloc_context **ac);
113 119
114static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) 120void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
115{ 121{
116 struct inode *inode = ac->ac_inode; 122 struct inode *inode = ac->ac_inode;
117 123
@@ -124,10 +130,8 @@ static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
124 iput(inode); 130 iput(inode);
125 ac->ac_inode = NULL; 131 ac->ac_inode = NULL;
126 } 132 }
127 if (ac->ac_bh) { 133 brelse(ac->ac_bh);
128 brelse(ac->ac_bh); 134 ac->ac_bh = NULL;
129 ac->ac_bh = NULL;
130 }
131} 135}
132 136
133void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) 137void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
@@ -276,7 +280,8 @@ static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl)
276 */ 280 */
277static int ocfs2_block_group_alloc(struct ocfs2_super *osb, 281static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
278 struct inode *alloc_inode, 282 struct inode *alloc_inode,
279 struct buffer_head *bh) 283 struct buffer_head *bh,
284 u64 max_block)
280{ 285{
281 int status, credits; 286 int status, credits;
282 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data; 287 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
@@ -294,9 +299,9 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
294 mlog_entry_void(); 299 mlog_entry_void();
295 300
296 cl = &fe->id2.i_chain; 301 cl = &fe->id2.i_chain;
297 status = ocfs2_reserve_clusters(osb, 302 status = ocfs2_reserve_clusters_with_limit(osb,
298 le16_to_cpu(cl->cl_cpg), 303 le16_to_cpu(cl->cl_cpg),
299 &ac); 304 max_block, &ac);
300 if (status < 0) { 305 if (status < 0) {
301 if (status != -ENOSPC) 306 if (status != -ENOSPC)
302 mlog_errno(status); 307 mlog_errno(status);
@@ -394,8 +399,7 @@ bail:
394 if (ac) 399 if (ac)
395 ocfs2_free_alloc_context(ac); 400 ocfs2_free_alloc_context(ac);
396 401
397 if (bg_bh) 402 brelse(bg_bh);
398 brelse(bg_bh);
399 403
400 mlog_exit(status); 404 mlog_exit(status);
401 return status; 405 return status;
@@ -469,7 +473,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
469 goto bail; 473 goto bail;
470 } 474 }
471 475
472 status = ocfs2_block_group_alloc(osb, alloc_inode, bh); 476 status = ocfs2_block_group_alloc(osb, alloc_inode, bh,
477 ac->ac_max_block);
473 if (status < 0) { 478 if (status < 0) {
474 if (status != -ENOSPC) 479 if (status != -ENOSPC)
475 mlog_errno(status); 480 mlog_errno(status);
@@ -486,16 +491,15 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
486 get_bh(bh); 491 get_bh(bh);
487 ac->ac_bh = bh; 492 ac->ac_bh = bh;
488bail: 493bail:
489 if (bh) 494 brelse(bh);
490 brelse(bh);
491 495
492 mlog_exit(status); 496 mlog_exit(status);
493 return status; 497 return status;
494} 498}
495 499
496int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, 500int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb,
497 struct ocfs2_dinode *fe, 501 int blocks,
498 struct ocfs2_alloc_context **ac) 502 struct ocfs2_alloc_context **ac)
499{ 503{
500 int status; 504 int status;
501 u32 slot; 505 u32 slot;
@@ -507,7 +511,7 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
507 goto bail; 511 goto bail;
508 } 512 }
509 513
510 (*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe); 514 (*ac)->ac_bits_wanted = blocks;
511 (*ac)->ac_which = OCFS2_AC_USE_META; 515 (*ac)->ac_which = OCFS2_AC_USE_META;
512 slot = osb->slot_num; 516 slot = osb->slot_num;
513 (*ac)->ac_group_search = ocfs2_block_group_search; 517 (*ac)->ac_group_search = ocfs2_block_group_search;
@@ -532,6 +536,15 @@ bail:
532 return status; 536 return status;
533} 537}
534 538
539int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
540 struct ocfs2_extent_list *root_el,
541 struct ocfs2_alloc_context **ac)
542{
543 return ocfs2_reserve_new_metadata_blocks(osb,
544 ocfs2_extend_meta_needed(root_el),
545 ac);
546}
547
535static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb, 548static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb,
536 struct ocfs2_alloc_context *ac) 549 struct ocfs2_alloc_context *ac)
537{ 550{
@@ -582,6 +595,14 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
582 (*ac)->ac_group_search = ocfs2_block_group_search; 595 (*ac)->ac_group_search = ocfs2_block_group_search;
583 596
584 /* 597 /*
598 * stat(2) can't handle i_ino > 32bits, so we tell the
599 * lower levels not to allocate us a block group past that
600 * limit. The 'inode64' mount option avoids this behavior.
601 */
602 if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64))
603 (*ac)->ac_max_block = (u32)~0U;
604
605 /*
585 * slot is set when we successfully steal inode from other nodes. 606 * slot is set when we successfully steal inode from other nodes.
586 * It is reset in 3 places: 607 * It is reset in 3 places:
587 * 1. when we flush the truncate log 608 * 1. when we flush the truncate log
@@ -661,9 +682,9 @@ bail:
661/* Callers don't need to care which bitmap (local alloc or main) to 682/* Callers don't need to care which bitmap (local alloc or main) to
662 * use so we figure it out for them, but unfortunately this clutters 683 * use so we figure it out for them, but unfortunately this clutters
663 * things a bit. */ 684 * things a bit. */
664int ocfs2_reserve_clusters(struct ocfs2_super *osb, 685static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
665 u32 bits_wanted, 686 u32 bits_wanted, u64 max_block,
666 struct ocfs2_alloc_context **ac) 687 struct ocfs2_alloc_context **ac)
667{ 688{
668 int status; 689 int status;
669 690
@@ -677,24 +698,20 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb,
677 } 698 }
678 699
679 (*ac)->ac_bits_wanted = bits_wanted; 700 (*ac)->ac_bits_wanted = bits_wanted;
701 (*ac)->ac_max_block = max_block;
680 702
681 status = -ENOSPC; 703 status = -ENOSPC;
682 if (ocfs2_alloc_should_use_local(osb, bits_wanted)) { 704 if (ocfs2_alloc_should_use_local(osb, bits_wanted)) {
683 status = ocfs2_reserve_local_alloc_bits(osb, 705 status = ocfs2_reserve_local_alloc_bits(osb,
684 bits_wanted, 706 bits_wanted,
685 *ac); 707 *ac);
686 if ((status < 0) && (status != -ENOSPC)) { 708 if (status == -EFBIG) {
709 /* The local alloc window is outside ac_max_block.
710 * use the main bitmap. */
711 status = -ENOSPC;
712 } else if ((status < 0) && (status != -ENOSPC)) {
687 mlog_errno(status); 713 mlog_errno(status);
688 goto bail; 714 goto bail;
689 } else if (status == -ENOSPC) {
690 /* reserve_local_bits will return enospc with
691 * the local alloc inode still locked, so we
692 * can change this safely here. */
693 mlog(0, "Disabling local alloc\n");
694 /* We set to OCFS2_LA_DISABLED so that umount
695 * can clean up what's left of the local
696 * allocation */
697 osb->local_alloc_state = OCFS2_LA_DISABLED;
698 } 715 }
699 } 716 }
700 717
@@ -718,6 +735,13 @@ bail:
718 return status; 735 return status;
719} 736}
720 737
738int ocfs2_reserve_clusters(struct ocfs2_super *osb,
739 u32 bits_wanted,
740 struct ocfs2_alloc_context **ac)
741{
742 return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0, ac);
743}
744
721/* 745/*
722 * More or less lifted from ext3. I'll leave their description below: 746 * More or less lifted from ext3. I'll leave their description below:
723 * 747 *
@@ -1000,11 +1024,14 @@ static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg
1000static int ocfs2_cluster_group_search(struct inode *inode, 1024static int ocfs2_cluster_group_search(struct inode *inode,
1001 struct buffer_head *group_bh, 1025 struct buffer_head *group_bh,
1002 u32 bits_wanted, u32 min_bits, 1026 u32 bits_wanted, u32 min_bits,
1027 u64 max_block,
1003 u16 *bit_off, u16 *bits_found) 1028 u16 *bit_off, u16 *bits_found)
1004{ 1029{
1005 int search = -ENOSPC; 1030 int search = -ENOSPC;
1006 int ret; 1031 int ret;
1032 u64 blkoff;
1007 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data; 1033 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
1034 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1008 u16 tmp_off, tmp_found; 1035 u16 tmp_off, tmp_found;
1009 unsigned int max_bits, gd_cluster_off; 1036 unsigned int max_bits, gd_cluster_off;
1010 1037
@@ -1037,6 +1064,17 @@ static int ocfs2_cluster_group_search(struct inode *inode,
1037 if (ret) 1064 if (ret)
1038 return ret; 1065 return ret;
1039 1066
1067 if (max_block) {
1068 blkoff = ocfs2_clusters_to_blocks(inode->i_sb,
1069 gd_cluster_off +
1070 tmp_off + tmp_found);
1071 mlog(0, "Checking %llu against %llu\n",
1072 (unsigned long long)blkoff,
1073 (unsigned long long)max_block);
1074 if (blkoff > max_block)
1075 return -ENOSPC;
1076 }
1077
1040 /* ocfs2_block_group_find_clear_bits() might 1078 /* ocfs2_block_group_find_clear_bits() might
1041 * return success, but we still want to return 1079 * return success, but we still want to return
1042 * -ENOSPC unless it found the minimum number 1080 * -ENOSPC unless it found the minimum number
@@ -1045,6 +1083,12 @@ static int ocfs2_cluster_group_search(struct inode *inode,
1045 *bit_off = tmp_off; 1083 *bit_off = tmp_off;
1046 *bits_found = tmp_found; 1084 *bits_found = tmp_found;
1047 search = 0; /* success */ 1085 search = 0; /* success */
1086 } else if (tmp_found) {
1087 /*
1088 * Don't show bits which we'll be returning
1089 * for allocation to the local alloc bitmap.
1090 */
1091 ocfs2_local_alloc_seen_free_bits(osb, tmp_found);
1048 } 1092 }
1049 } 1093 }
1050 1094
@@ -1054,19 +1098,31 @@ static int ocfs2_cluster_group_search(struct inode *inode,
1054static int ocfs2_block_group_search(struct inode *inode, 1098static int ocfs2_block_group_search(struct inode *inode,
1055 struct buffer_head *group_bh, 1099 struct buffer_head *group_bh,
1056 u32 bits_wanted, u32 min_bits, 1100 u32 bits_wanted, u32 min_bits,
1101 u64 max_block,
1057 u16 *bit_off, u16 *bits_found) 1102 u16 *bit_off, u16 *bits_found)
1058{ 1103{
1059 int ret = -ENOSPC; 1104 int ret = -ENOSPC;
1105 u64 blkoff;
1060 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data; 1106 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data;
1061 1107
1062 BUG_ON(min_bits != 1); 1108 BUG_ON(min_bits != 1);
1063 BUG_ON(ocfs2_is_cluster_bitmap(inode)); 1109 BUG_ON(ocfs2_is_cluster_bitmap(inode));
1064 1110
1065 if (bg->bg_free_bits_count) 1111 if (bg->bg_free_bits_count) {
1066 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), 1112 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
1067 group_bh, bits_wanted, 1113 group_bh, bits_wanted,
1068 le16_to_cpu(bg->bg_bits), 1114 le16_to_cpu(bg->bg_bits),
1069 bit_off, bits_found); 1115 bit_off, bits_found);
1116 if (!ret && max_block) {
1117 blkoff = le64_to_cpu(bg->bg_blkno) + *bit_off +
1118 *bits_found;
1119 mlog(0, "Checking %llu against %llu\n",
1120 (unsigned long long)blkoff,
1121 (unsigned long long)max_block);
1122 if (blkoff > max_block)
1123 ret = -ENOSPC;
1124 }
1125 }
1070 1126
1071 return ret; 1127 return ret;
1072} 1128}
@@ -1116,8 +1172,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1116 struct ocfs2_group_desc *gd; 1172 struct ocfs2_group_desc *gd;
1117 struct inode *alloc_inode = ac->ac_inode; 1173 struct inode *alloc_inode = ac->ac_inode;
1118 1174
1119 ret = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), gd_blkno, 1175 ret = ocfs2_read_block(alloc_inode, gd_blkno, &group_bh);
1120 &group_bh, OCFS2_BH_CACHED, alloc_inode);
1121 if (ret < 0) { 1176 if (ret < 0) {
1122 mlog_errno(ret); 1177 mlog_errno(ret);
1123 return ret; 1178 return ret;
@@ -1131,7 +1186,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1131 } 1186 }
1132 1187
1133 ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, 1188 ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
1134 bit_off, &found); 1189 ac->ac_max_block, bit_off, &found);
1135 if (ret < 0) { 1190 if (ret < 0) {
1136 if (ret != -ENOSPC) 1191 if (ret != -ENOSPC)
1137 mlog_errno(ret); 1192 mlog_errno(ret);
@@ -1186,9 +1241,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1186 bits_wanted, chain, 1241 bits_wanted, chain,
1187 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno); 1242 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno);
1188 1243
1189 status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), 1244 status = ocfs2_read_block(alloc_inode,
1190 le64_to_cpu(cl->cl_recs[chain].c_blkno), 1245 le64_to_cpu(cl->cl_recs[chain].c_blkno),
1191 &group_bh, OCFS2_BH_CACHED, alloc_inode); 1246 &group_bh);
1192 if (status < 0) { 1247 if (status < 0) {
1193 mlog_errno(status); 1248 mlog_errno(status);
1194 goto bail; 1249 goto bail;
@@ -1204,21 +1259,20 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1204 /* for now, the chain search is a bit simplistic. We just use 1259 /* for now, the chain search is a bit simplistic. We just use
1205 * the 1st group with any empty bits. */ 1260 * the 1st group with any empty bits. */
1206 while ((status = ac->ac_group_search(alloc_inode, group_bh, 1261 while ((status = ac->ac_group_search(alloc_inode, group_bh,
1207 bits_wanted, min_bits, bit_off, 1262 bits_wanted, min_bits,
1263 ac->ac_max_block, bit_off,
1208 &tmp_bits)) == -ENOSPC) { 1264 &tmp_bits)) == -ENOSPC) {
1209 if (!bg->bg_next_group) 1265 if (!bg->bg_next_group)
1210 break; 1266 break;
1211 1267
1212 if (prev_group_bh) { 1268 brelse(prev_group_bh);
1213 brelse(prev_group_bh); 1269 prev_group_bh = NULL;
1214 prev_group_bh = NULL; 1270
1215 }
1216 next_group = le64_to_cpu(bg->bg_next_group); 1271 next_group = le64_to_cpu(bg->bg_next_group);
1217 prev_group_bh = group_bh; 1272 prev_group_bh = group_bh;
1218 group_bh = NULL; 1273 group_bh = NULL;
1219 status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), 1274 status = ocfs2_read_block(alloc_inode,
1220 next_group, &group_bh, 1275 next_group, &group_bh);
1221 OCFS2_BH_CACHED, alloc_inode);
1222 if (status < 0) { 1276 if (status < 0) {
1223 mlog_errno(status); 1277 mlog_errno(status);
1224 goto bail; 1278 goto bail;
@@ -1307,10 +1361,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1307 *bg_blkno = le64_to_cpu(bg->bg_blkno); 1361 *bg_blkno = le64_to_cpu(bg->bg_blkno);
1308 *bits_left = le16_to_cpu(bg->bg_free_bits_count); 1362 *bits_left = le16_to_cpu(bg->bg_free_bits_count);
1309bail: 1363bail:
1310 if (group_bh) 1364 brelse(group_bh);
1311 brelse(group_bh); 1365 brelse(prev_group_bh);
1312 if (prev_group_bh)
1313 brelse(prev_group_bh);
1314 1366
1315 mlog_exit(status); 1367 mlog_exit(status);
1316 return status; 1368 return status;
@@ -1723,7 +1775,6 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
1723{ 1775{
1724 int status = 0; 1776 int status = 0;
1725 u32 tmp_used; 1777 u32 tmp_used;
1726 struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
1727 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data; 1778 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data;
1728 struct ocfs2_chain_list *cl = &fe->id2.i_chain; 1779 struct ocfs2_chain_list *cl = &fe->id2.i_chain;
1729 struct buffer_head *group_bh = NULL; 1780 struct buffer_head *group_bh = NULL;
@@ -1742,8 +1793,7 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
1742 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count, 1793 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count,
1743 (unsigned long long)bg_blkno, start_bit); 1794 (unsigned long long)bg_blkno, start_bit);
1744 1795
1745 status = ocfs2_read_block(osb, bg_blkno, &group_bh, OCFS2_BH_CACHED, 1796 status = ocfs2_read_block(alloc_inode, bg_blkno, &group_bh);
1746 alloc_inode);
1747 if (status < 0) { 1797 if (status < 0) {
1748 mlog_errno(status); 1798 mlog_errno(status);
1749 goto bail; 1799 goto bail;
@@ -1784,8 +1834,7 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
1784 } 1834 }
1785 1835
1786bail: 1836bail:
1787 if (group_bh) 1837 brelse(group_bh);
1788 brelse(group_bh);
1789 1838
1790 mlog_exit(status); 1839 mlog_exit(status);
1791 return status; 1840 return status;
@@ -1838,9 +1887,15 @@ int ocfs2_free_clusters(handle_t *handle,
1838 status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, 1887 status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
1839 bg_start_bit, bg_blkno, 1888 bg_start_bit, bg_blkno,
1840 num_clusters); 1889 num_clusters);
1841 if (status < 0) 1890 if (status < 0) {
1842 mlog_errno(status); 1891 mlog_errno(status);
1892 goto out;
1893 }
1843 1894
1895 ocfs2_local_alloc_seen_free_bits(OCFS2_SB(bitmap_inode->i_sb),
1896 num_clusters);
1897
1898out:
1844 mlog_exit(status); 1899 mlog_exit(status);
1845 return status; 1900 return status;
1846} 1901}
@@ -1891,3 +1946,84 @@ static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe)
1891 (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno); 1946 (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno);
1892 } 1947 }
1893} 1948}
1949
1950/*
1951 * For a given allocation, determine which allocators will need to be
1952 * accessed, and lock them, reserving the appropriate number of bits.
1953 *
1954 * Sparse file systems call this from ocfs2_write_begin_nolock()
1955 * and ocfs2_allocate_unwritten_extents().
1956 *
1957 * File systems which don't support holes call this from
1958 * ocfs2_extend_allocation().
1959 */
1960int ocfs2_lock_allocators(struct inode *inode,
1961 struct ocfs2_extent_tree *et,
1962 u32 clusters_to_add, u32 extents_to_split,
1963 struct ocfs2_alloc_context **data_ac,
1964 struct ocfs2_alloc_context **meta_ac)
1965{
1966 int ret = 0, num_free_extents;
1967 unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;
1968 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1969
1970 *meta_ac = NULL;
1971 if (data_ac)
1972 *data_ac = NULL;
1973
1974 BUG_ON(clusters_to_add != 0 && data_ac == NULL);
1975
1976 num_free_extents = ocfs2_num_free_extents(osb, inode, et);
1977 if (num_free_extents < 0) {
1978 ret = num_free_extents;
1979 mlog_errno(ret);
1980 goto out;
1981 }
1982
1983 /*
1984 * Sparse allocation file systems need to be more conservative
1985 * with reserving room for expansion - the actual allocation
1986 * happens while we've got a journal handle open so re-taking
1987 * a cluster lock (because we ran out of room for another
1988 * extent) will violate ordering rules.
1989 *
1990 * Most of the time we'll only be seeing this 1 cluster at a time
1991 * anyway.
1992 *
1993 * Always lock for any unwritten extents - we might want to
1994 * add blocks during a split.
1995 */
1996 if (!num_free_extents ||
1997 (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) {
1998 ret = ocfs2_reserve_new_metadata(osb, et->et_root_el, meta_ac);
1999 if (ret < 0) {
2000 if (ret != -ENOSPC)
2001 mlog_errno(ret);
2002 goto out;
2003 }
2004 }
2005
2006 if (clusters_to_add == 0)
2007 goto out;
2008
2009 ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac);
2010 if (ret < 0) {
2011 if (ret != -ENOSPC)
2012 mlog_errno(ret);
2013 goto out;
2014 }
2015
2016out:
2017 if (ret) {
2018 if (*meta_ac) {
2019 ocfs2_free_alloc_context(*meta_ac);
2020 *meta_ac = NULL;
2021 }
2022
2023 /*
2024 * We cannot have an error and a non null *data_ac.
2025 */
2026 }
2027
2028 return ret;
2029}
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h
index 544c600662bd..4df159d8f450 100644
--- a/fs/ocfs2/suballoc.h
+++ b/fs/ocfs2/suballoc.h
@@ -28,10 +28,11 @@
28 28
29typedef int (group_search_t)(struct inode *, 29typedef int (group_search_t)(struct inode *,
30 struct buffer_head *, 30 struct buffer_head *,
31 u32, 31 u32, /* bits_wanted */
32 u32, 32 u32, /* min_bits */
33 u16 *, 33 u64, /* max_block */
34 u16 *); 34 u16 *, /* *bit_off */
35 u16 *); /* *bits_found */
35 36
36struct ocfs2_alloc_context { 37struct ocfs2_alloc_context {
37 struct inode *ac_inode; /* which bitmap are we allocating from? */ 38 struct inode *ac_inode; /* which bitmap are we allocating from? */
@@ -51,6 +52,8 @@ struct ocfs2_alloc_context {
51 group_search_t *ac_group_search; 52 group_search_t *ac_group_search;
52 53
53 u64 ac_last_group; 54 u64 ac_last_group;
55 u64 ac_max_block; /* Highest block number to allocate. 0 is
56 is the same as ~0 - unlimited */
54}; 57};
55 58
56void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac); 59void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac);
@@ -59,9 +62,17 @@ static inline int ocfs2_alloc_context_bits_left(struct ocfs2_alloc_context *ac)
59 return ac->ac_bits_wanted - ac->ac_bits_given; 62 return ac->ac_bits_wanted - ac->ac_bits_given;
60} 63}
61 64
65/*
66 * Please note that the caller must make sure that root_el is the root
67 * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise
68 * the result may be wrong.
69 */
62int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, 70int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
63 struct ocfs2_dinode *fe, 71 struct ocfs2_extent_list *root_el,
64 struct ocfs2_alloc_context **ac); 72 struct ocfs2_alloc_context **ac);
73int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb,
74 int blocks,
75 struct ocfs2_alloc_context **ac);
65int ocfs2_reserve_new_inode(struct ocfs2_super *osb, 76int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
66 struct ocfs2_alloc_context **ac); 77 struct ocfs2_alloc_context **ac);
67int ocfs2_reserve_clusters(struct ocfs2_super *osb, 78int ocfs2_reserve_clusters(struct ocfs2_super *osb,
@@ -147,6 +158,7 @@ static inline int ocfs2_is_cluster_bitmap(struct inode *inode)
147 * apis above. */ 158 * apis above. */
148int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, 159int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
149 struct ocfs2_alloc_context *ac); 160 struct ocfs2_alloc_context *ac);
161void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac);
150 162
151/* given a cluster offset, calculate which block group it belongs to 163/* given a cluster offset, calculate which block group it belongs to
152 * and return that block offset. */ 164 * and return that block offset. */
@@ -156,4 +168,8 @@ u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster);
156int ocfs2_check_group_descriptor(struct super_block *sb, 168int ocfs2_check_group_descriptor(struct super_block *sb,
157 struct ocfs2_dinode *di, 169 struct ocfs2_dinode *di,
158 struct ocfs2_group_desc *gd); 170 struct ocfs2_group_desc *gd);
171int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et,
172 u32 clusters_to_add, u32 extents_to_split,
173 struct ocfs2_alloc_context **data_ac,
174 struct ocfs2_alloc_context **meta_ac);
159#endif /* _CHAINALLOC_H_ */ 175#endif /* _CHAINALLOC_H_ */
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 70334d85aff1..304b63ac78cf 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -64,6 +64,7 @@
64#include "sysfile.h" 64#include "sysfile.h"
65#include "uptodate.h" 65#include "uptodate.h"
66#include "ver.h" 66#include "ver.h"
67#include "xattr.h"
67 68
68#include "buffer_head_io.h" 69#include "buffer_head_io.h"
69 70
@@ -154,6 +155,9 @@ enum {
154 Opt_localalloc, 155 Opt_localalloc,
155 Opt_localflocks, 156 Opt_localflocks,
156 Opt_stack, 157 Opt_stack,
158 Opt_user_xattr,
159 Opt_nouser_xattr,
160 Opt_inode64,
157 Opt_err, 161 Opt_err,
158}; 162};
159 163
@@ -173,6 +177,9 @@ static const match_table_t tokens = {
173 {Opt_localalloc, "localalloc=%d"}, 177 {Opt_localalloc, "localalloc=%d"},
174 {Opt_localflocks, "localflocks"}, 178 {Opt_localflocks, "localflocks"},
175 {Opt_stack, "cluster_stack=%s"}, 179 {Opt_stack, "cluster_stack=%s"},
180 {Opt_user_xattr, "user_xattr"},
181 {Opt_nouser_xattr, "nouser_xattr"},
182 {Opt_inode64, "inode64"},
176 {Opt_err, NULL} 183 {Opt_err, NULL}
177}; 184};
178 185
@@ -205,10 +212,11 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait)
205 ocfs2_schedule_truncate_log_flush(osb, 0); 212 ocfs2_schedule_truncate_log_flush(osb, 0);
206 } 213 }
207 214
208 if (journal_start_commit(OCFS2_SB(sb)->journal->j_journal, &target)) { 215 if (jbd2_journal_start_commit(OCFS2_SB(sb)->journal->j_journal,
216 &target)) {
209 if (wait) 217 if (wait)
210 log_wait_commit(OCFS2_SB(sb)->journal->j_journal, 218 jbd2_log_wait_commit(OCFS2_SB(sb)->journal->j_journal,
211 target); 219 target);
212 } 220 }
213 return 0; 221 return 0;
214} 222}
@@ -325,6 +333,7 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb)
325 if (!oi) 333 if (!oi)
326 return NULL; 334 return NULL;
327 335
336 jbd2_journal_init_jbd_inode(&oi->ip_jinode, &oi->vfs_inode);
328 return &oi->vfs_inode; 337 return &oi->vfs_inode;
329} 338}
330 339
@@ -406,6 +415,15 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
406 goto out; 415 goto out;
407 } 416 }
408 417
418 /* Probably don't want this on remount; it might
419 * mess with other nodes */
420 if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64) &&
421 (parsed_options.mount_opt & OCFS2_MOUNT_INODE64)) {
422 ret = -EINVAL;
423 mlog(ML_ERROR, "Cannot enable inode64 on remount\n");
424 goto out;
425 }
426
409 /* We're going to/from readonly mode. */ 427 /* We're going to/from readonly mode. */
410 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { 428 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) {
411 /* Lock here so the check of HARD_RO and the potential 429 /* Lock here so the check of HARD_RO and the potential
@@ -637,7 +655,8 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
637 osb->s_atime_quantum = parsed_options.atime_quantum; 655 osb->s_atime_quantum = parsed_options.atime_quantum;
638 osb->preferred_slot = parsed_options.slot; 656 osb->preferred_slot = parsed_options.slot;
639 osb->osb_commit_interval = parsed_options.commit_interval; 657 osb->osb_commit_interval = parsed_options.commit_interval;
640 osb->local_alloc_size = parsed_options.localalloc_opt; 658 osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt);
659 osb->local_alloc_bits = osb->local_alloc_default_bits;
641 660
642 status = ocfs2_verify_userspace_stack(osb, &parsed_options); 661 status = ocfs2_verify_userspace_stack(osb, &parsed_options);
643 if (status) 662 if (status)
@@ -743,8 +762,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
743 return status; 762 return status;
744 763
745read_super_error: 764read_super_error:
746 if (bh != NULL) 765 brelse(bh);
747 brelse(bh);
748 766
749 if (inode) 767 if (inode)
750 iput(inode); 768 iput(inode);
@@ -847,6 +865,12 @@ static int ocfs2_parse_options(struct super_block *sb,
847 case Opt_data_writeback: 865 case Opt_data_writeback:
848 mopt->mount_opt |= OCFS2_MOUNT_DATA_WRITEBACK; 866 mopt->mount_opt |= OCFS2_MOUNT_DATA_WRITEBACK;
849 break; 867 break;
868 case Opt_user_xattr:
869 mopt->mount_opt &= ~OCFS2_MOUNT_NOUSERXATTR;
870 break;
871 case Opt_nouser_xattr:
872 mopt->mount_opt |= OCFS2_MOUNT_NOUSERXATTR;
873 break;
850 case Opt_atime_quantum: 874 case Opt_atime_quantum:
851 if (match_int(&args[0], &option)) { 875 if (match_int(&args[0], &option)) {
852 status = 0; 876 status = 0;
@@ -873,7 +897,7 @@ static int ocfs2_parse_options(struct super_block *sb,
873 if (option < 0) 897 if (option < 0)
874 return 0; 898 return 0;
875 if (option == 0) 899 if (option == 0)
876 option = JBD_DEFAULT_MAX_COMMIT_AGE; 900 option = JBD2_DEFAULT_MAX_COMMIT_AGE;
877 mopt->commit_interval = HZ * option; 901 mopt->commit_interval = HZ * option;
878 break; 902 break;
879 case Opt_localalloc: 903 case Opt_localalloc:
@@ -918,6 +942,9 @@ static int ocfs2_parse_options(struct super_block *sb,
918 OCFS2_STACK_LABEL_LEN); 942 OCFS2_STACK_LABEL_LEN);
919 mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; 943 mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0';
920 break; 944 break;
945 case Opt_inode64:
946 mopt->mount_opt |= OCFS2_MOUNT_INODE64;
947 break;
921 default: 948 default:
922 mlog(ML_ERROR, 949 mlog(ML_ERROR,
923 "Unrecognized mount option \"%s\" " 950 "Unrecognized mount option \"%s\" "
@@ -938,6 +965,7 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
938{ 965{
939 struct ocfs2_super *osb = OCFS2_SB(mnt->mnt_sb); 966 struct ocfs2_super *osb = OCFS2_SB(mnt->mnt_sb);
940 unsigned long opts = osb->s_mount_opt; 967 unsigned long opts = osb->s_mount_opt;
968 unsigned int local_alloc_megs;
941 969
942 if (opts & OCFS2_MOUNT_HB_LOCAL) 970 if (opts & OCFS2_MOUNT_HB_LOCAL)
943 seq_printf(s, ",_netdev,heartbeat=local"); 971 seq_printf(s, ",_netdev,heartbeat=local");
@@ -970,8 +998,9 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
970 seq_printf(s, ",commit=%u", 998 seq_printf(s, ",commit=%u",
971 (unsigned) (osb->osb_commit_interval / HZ)); 999 (unsigned) (osb->osb_commit_interval / HZ));
972 1000
973 if (osb->local_alloc_size != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE) 1001 local_alloc_megs = osb->local_alloc_bits >> (20 - osb->s_clustersize_bits);
974 seq_printf(s, ",localalloc=%d", osb->local_alloc_size); 1002 if (local_alloc_megs != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE)
1003 seq_printf(s, ",localalloc=%d", local_alloc_megs);
975 1004
976 if (opts & OCFS2_MOUNT_LOCALFLOCKS) 1005 if (opts & OCFS2_MOUNT_LOCALFLOCKS)
977 seq_printf(s, ",localflocks,"); 1006 seq_printf(s, ",localflocks,");
@@ -980,6 +1009,14 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
980 seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN, 1009 seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN,
981 osb->osb_cluster_stack); 1010 osb->osb_cluster_stack);
982 1011
1012 if (opts & OCFS2_MOUNT_NOUSERXATTR)
1013 seq_printf(s, ",nouser_xattr");
1014 else
1015 seq_printf(s, ",user_xattr");
1016
1017 if (opts & OCFS2_MOUNT_INODE64)
1018 seq_printf(s, ",inode64");
1019
983 return 0; 1020 return 0;
984} 1021}
985 1022
@@ -1132,6 +1169,7 @@ static void ocfs2_inode_init_once(void *data)
1132 oi->ip_dir_start_lookup = 0; 1169 oi->ip_dir_start_lookup = 0;
1133 1170
1134 init_rwsem(&oi->ip_alloc_sem); 1171 init_rwsem(&oi->ip_alloc_sem);
1172 init_rwsem(&oi->ip_xattr_sem);
1135 mutex_init(&oi->ip_io_mutex); 1173 mutex_init(&oi->ip_io_mutex);
1136 1174
1137 oi->ip_blkno = 0ULL; 1175 oi->ip_blkno = 0ULL;
@@ -1375,6 +1413,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
1375 sb->s_fs_info = osb; 1413 sb->s_fs_info = osb;
1376 sb->s_op = &ocfs2_sops; 1414 sb->s_op = &ocfs2_sops;
1377 sb->s_export_op = &ocfs2_export_ops; 1415 sb->s_export_op = &ocfs2_export_ops;
1416 sb->s_xattr = ocfs2_xattr_handlers;
1378 sb->s_time_gran = 1; 1417 sb->s_time_gran = 1;
1379 sb->s_flags |= MS_NOATIME; 1418 sb->s_flags |= MS_NOATIME;
1380 /* this is needed to support O_LARGEFILE */ 1419 /* this is needed to support O_LARGEFILE */
@@ -1421,8 +1460,12 @@ static int ocfs2_initialize_super(struct super_block *sb,
1421 1460
1422 osb->slot_num = OCFS2_INVALID_SLOT; 1461 osb->slot_num = OCFS2_INVALID_SLOT;
1423 1462
1463 osb->s_xattr_inline_size = le16_to_cpu(
1464 di->id2.i_super.s_xattr_inline_size);
1465
1424 osb->local_alloc_state = OCFS2_LA_UNUSED; 1466 osb->local_alloc_state = OCFS2_LA_UNUSED;
1425 osb->local_alloc_bh = NULL; 1467 osb->local_alloc_bh = NULL;
1468 INIT_DELAYED_WORK(&osb->la_enable_wq, ocfs2_la_enable_worker);
1426 1469
1427 init_waitqueue_head(&osb->osb_mount_event); 1470 init_waitqueue_head(&osb->osb_mount_event);
1428 1471
@@ -1568,6 +1611,7 @@ static int ocfs2_initialize_super(struct super_block *sb,
1568 osb->first_cluster_group_blkno = 1611 osb->first_cluster_group_blkno =
1569 le64_to_cpu(di->id2.i_super.s_first_cluster_group); 1612 le64_to_cpu(di->id2.i_super.s_first_cluster_group);
1570 osb->fs_generation = le32_to_cpu(di->i_fs_generation); 1613 osb->fs_generation = le32_to_cpu(di->i_fs_generation);
1614 osb->uuid_hash = le32_to_cpu(di->id2.i_super.s_uuid_hash);
1571 mlog(0, "vol_label: %s\n", osb->vol_label); 1615 mlog(0, "vol_label: %s\n", osb->vol_label);
1572 mlog(0, "uuid: %s\n", osb->uuid_str); 1616 mlog(0, "uuid: %s\n", osb->uuid_str);
1573 mlog(0, "root_blkno=%llu, system_dir_blkno=%llu\n", 1617 mlog(0, "root_blkno=%llu, system_dir_blkno=%llu\n",
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index ba9dbb51d25b..cbd03dfdc7b9 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -50,6 +50,7 @@
50#include "inode.h" 50#include "inode.h"
51#include "journal.h" 51#include "journal.h"
52#include "symlink.h" 52#include "symlink.h"
53#include "xattr.h"
53 54
54#include "buffer_head_io.h" 55#include "buffer_head_io.h"
55 56
@@ -83,11 +84,7 @@ static char *ocfs2_fast_symlink_getlink(struct inode *inode,
83 84
84 mlog_entry_void(); 85 mlog_entry_void();
85 86
86 status = ocfs2_read_block(OCFS2_SB(inode->i_sb), 87 status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, bh);
87 OCFS2_I(inode)->ip_blkno,
88 bh,
89 OCFS2_BH_CACHED,
90 inode);
91 if (status < 0) { 88 if (status < 0) {
92 mlog_errno(status); 89 mlog_errno(status);
93 link = ERR_PTR(status); 90 link = ERR_PTR(status);
@@ -157,8 +154,7 @@ bail:
157 kunmap(page); 154 kunmap(page);
158 page_cache_release(page); 155 page_cache_release(page);
159 } 156 }
160 if (bh) 157 brelse(bh);
161 brelse(bh);
162 158
163 return ERR_PTR(status); 159 return ERR_PTR(status);
164} 160}
@@ -168,10 +164,18 @@ const struct inode_operations ocfs2_symlink_inode_operations = {
168 .follow_link = ocfs2_follow_link, 164 .follow_link = ocfs2_follow_link,
169 .getattr = ocfs2_getattr, 165 .getattr = ocfs2_getattr,
170 .setattr = ocfs2_setattr, 166 .setattr = ocfs2_setattr,
167 .setxattr = generic_setxattr,
168 .getxattr = generic_getxattr,
169 .listxattr = ocfs2_listxattr,
170 .removexattr = generic_removexattr,
171}; 171};
172const struct inode_operations ocfs2_fast_symlink_inode_operations = { 172const struct inode_operations ocfs2_fast_symlink_inode_operations = {
173 .readlink = ocfs2_readlink, 173 .readlink = ocfs2_readlink,
174 .follow_link = ocfs2_follow_link, 174 .follow_link = ocfs2_follow_link,
175 .getattr = ocfs2_getattr, 175 .getattr = ocfs2_getattr,
176 .setattr = ocfs2_setattr, 176 .setattr = ocfs2_setattr,
177 .setxattr = generic_setxattr,
178 .getxattr = generic_getxattr,
179 .listxattr = ocfs2_listxattr,
180 .removexattr = generic_removexattr,
177}; 181};
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 4da8851f2b23..187b99ff0368 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -53,7 +53,11 @@
53#include <linux/highmem.h> 53#include <linux/highmem.h>
54#include <linux/buffer_head.h> 54#include <linux/buffer_head.h>
55#include <linux/rbtree.h> 55#include <linux/rbtree.h>
56#include <linux/jbd.h> 56#ifndef CONFIG_OCFS2_COMPAT_JBD
57# include <linux/jbd2.h>
58#else
59# include <linux/jbd.h>
60#endif
57 61
58#define MLOG_MASK_PREFIX ML_UPTODATE 62#define MLOG_MASK_PREFIX ML_UPTODATE
59 63
@@ -511,14 +515,10 @@ static void ocfs2_remove_metadata_tree(struct ocfs2_caching_info *ci,
511 ci->ci_num_cached--; 515 ci->ci_num_cached--;
512} 516}
513 517
514/* Called when we remove a chunk of metadata from an inode. We don't 518static void ocfs2_remove_block_from_cache(struct inode *inode,
515 * bother reverting things to an inlined array in the case of a remove 519 sector_t block)
516 * which moves us back under the limit. */
517void ocfs2_remove_from_cache(struct inode *inode,
518 struct buffer_head *bh)
519{ 520{
520 int index; 521 int index;
521 sector_t block = bh->b_blocknr;
522 struct ocfs2_meta_cache_item *item = NULL; 522 struct ocfs2_meta_cache_item *item = NULL;
523 struct ocfs2_inode_info *oi = OCFS2_I(inode); 523 struct ocfs2_inode_info *oi = OCFS2_I(inode);
524 struct ocfs2_caching_info *ci = &oi->ip_metadata_cache; 524 struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
@@ -544,6 +544,30 @@ void ocfs2_remove_from_cache(struct inode *inode,
544 kmem_cache_free(ocfs2_uptodate_cachep, item); 544 kmem_cache_free(ocfs2_uptodate_cachep, item);
545} 545}
546 546
547/*
548 * Called when we remove a chunk of metadata from an inode. We don't
549 * bother reverting things to an inlined array in the case of a remove
550 * which moves us back under the limit.
551 */
552void ocfs2_remove_from_cache(struct inode *inode,
553 struct buffer_head *bh)
554{
555 sector_t block = bh->b_blocknr;
556
557 ocfs2_remove_block_from_cache(inode, block);
558}
559
560/* Called when we remove xattr clusters from an inode. */
561void ocfs2_remove_xattr_clusters_from_cache(struct inode *inode,
562 sector_t block,
563 u32 c_len)
564{
565 unsigned int i, b_len = ocfs2_clusters_to_blocks(inode->i_sb, 1) * c_len;
566
567 for (i = 0; i < b_len; i++, block++)
568 ocfs2_remove_block_from_cache(inode, block);
569}
570
547int __init init_ocfs2_uptodate_cache(void) 571int __init init_ocfs2_uptodate_cache(void)
548{ 572{
549 ocfs2_uptodate_cachep = kmem_cache_create("ocfs2_uptodate", 573 ocfs2_uptodate_cachep = kmem_cache_create("ocfs2_uptodate",
diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h
index 2e73206059a8..531b4b3a0c47 100644
--- a/fs/ocfs2/uptodate.h
+++ b/fs/ocfs2/uptodate.h
@@ -40,6 +40,9 @@ void ocfs2_set_new_buffer_uptodate(struct inode *inode,
40 struct buffer_head *bh); 40 struct buffer_head *bh);
41void ocfs2_remove_from_cache(struct inode *inode, 41void ocfs2_remove_from_cache(struct inode *inode,
42 struct buffer_head *bh); 42 struct buffer_head *bh);
43void ocfs2_remove_xattr_clusters_from_cache(struct inode *inode,
44 sector_t block,
45 u32 c_len);
43int ocfs2_buffer_read_ahead(struct inode *inode, 46int ocfs2_buffer_read_ahead(struct inode *inode,
44 struct buffer_head *bh); 47 struct buffer_head *bh);
45 48
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
new file mode 100644
index 000000000000..c25780a70dfd
--- /dev/null
+++ b/fs/ocfs2/xattr.c
@@ -0,0 +1,4834 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * xattr.c
5 *
6 * Copyright (C) 2008 Oracle. All rights reserved.
7 *
8 * CREDITS:
9 * Lots of code in this file is taken from ext3.
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public
13 * License as published by the Free Software Foundation; either
14 * version 2 of the License, or (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public
22 * License along with this program; if not, write to the
23 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 * Boston, MA 021110-1307, USA.
25 */
26
27#include <linux/capability.h>
28#include <linux/fs.h>
29#include <linux/types.h>
30#include <linux/slab.h>
31#include <linux/highmem.h>
32#include <linux/pagemap.h>
33#include <linux/uio.h>
34#include <linux/sched.h>
35#include <linux/splice.h>
36#include <linux/mount.h>
37#include <linux/writeback.h>
38#include <linux/falloc.h>
39#include <linux/sort.h>
40#include <linux/init.h>
41#include <linux/module.h>
42#include <linux/string.h>
43
44#define MLOG_MASK_PREFIX ML_XATTR
45#include <cluster/masklog.h>
46
47#include "ocfs2.h"
48#include "alloc.h"
49#include "dlmglue.h"
50#include "file.h"
51#include "symlink.h"
52#include "sysfile.h"
53#include "inode.h"
54#include "journal.h"
55#include "ocfs2_fs.h"
56#include "suballoc.h"
57#include "uptodate.h"
58#include "buffer_head_io.h"
59#include "super.h"
60#include "xattr.h"
61
62
63struct ocfs2_xattr_def_value_root {
64 struct ocfs2_xattr_value_root xv;
65 struct ocfs2_extent_rec er;
66};
67
68struct ocfs2_xattr_bucket {
69 struct buffer_head *bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
70 struct ocfs2_xattr_header *xh;
71};
72
73#define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root))
74#define OCFS2_XATTR_INLINE_SIZE 80
75
76static struct ocfs2_xattr_def_value_root def_xv = {
77 .xv.xr_list.l_count = cpu_to_le16(1),
78};
79
80struct xattr_handler *ocfs2_xattr_handlers[] = {
81 &ocfs2_xattr_user_handler,
82 &ocfs2_xattr_trusted_handler,
83 NULL
84};
85
86static struct xattr_handler *ocfs2_xattr_handler_map[] = {
87 [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler,
88 [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler,
89};
90
91struct ocfs2_xattr_info {
92 int name_index;
93 const char *name;
94 const void *value;
95 size_t value_len;
96};
97
98struct ocfs2_xattr_search {
99 struct buffer_head *inode_bh;
100 /*
101 * xattr_bh point to the block buffer head which has extended attribute
102 * when extended attribute in inode, xattr_bh is equal to inode_bh.
103 */
104 struct buffer_head *xattr_bh;
105 struct ocfs2_xattr_header *header;
106 struct ocfs2_xattr_bucket bucket;
107 void *base;
108 void *end;
109 struct ocfs2_xattr_entry *here;
110 int not_found;
111};
112
113static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
114 struct ocfs2_xattr_header *xh,
115 int index,
116 int *block_off,
117 int *new_offset);
118
119static int ocfs2_xattr_index_block_find(struct inode *inode,
120 struct buffer_head *root_bh,
121 int name_index,
122 const char *name,
123 struct ocfs2_xattr_search *xs);
124
125static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
126 struct ocfs2_xattr_tree_root *xt,
127 char *buffer,
128 size_t buffer_size);
129
130static int ocfs2_xattr_create_index_block(struct inode *inode,
131 struct ocfs2_xattr_search *xs);
132
133static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
134 struct ocfs2_xattr_info *xi,
135 struct ocfs2_xattr_search *xs);
136
137static int ocfs2_delete_xattr_index_block(struct inode *inode,
138 struct buffer_head *xb_bh);
139
140static inline const char *ocfs2_xattr_prefix(int name_index)
141{
142 struct xattr_handler *handler = NULL;
143
144 if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
145 handler = ocfs2_xattr_handler_map[name_index];
146
147 return handler ? handler->prefix : NULL;
148}
149
150static u32 ocfs2_xattr_name_hash(struct inode *inode,
151 const char *name,
152 int name_len)
153{
154 /* Get hash value of uuid from super block */
155 u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
156 int i;
157
158 /* hash extended attribute name */
159 for (i = 0; i < name_len; i++) {
160 hash = (hash << OCFS2_HASH_SHIFT) ^
161 (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
162 *name++;
163 }
164
165 return hash;
166}
167
168/*
169 * ocfs2_xattr_hash_entry()
170 *
171 * Compute the hash of an extended attribute.
172 */
173static void ocfs2_xattr_hash_entry(struct inode *inode,
174 struct ocfs2_xattr_header *header,
175 struct ocfs2_xattr_entry *entry)
176{
177 u32 hash = 0;
178 char *name = (char *)header + le16_to_cpu(entry->xe_name_offset);
179
180 hash = ocfs2_xattr_name_hash(inode, name, entry->xe_name_len);
181 entry->xe_name_hash = cpu_to_le32(hash);
182
183 return;
184}
185
186static int ocfs2_xattr_extend_allocation(struct inode *inode,
187 u32 clusters_to_add,
188 struct buffer_head *xattr_bh,
189 struct ocfs2_xattr_value_root *xv)
190{
191 int status = 0;
192 int restart_func = 0;
193 int credits = 0;
194 handle_t *handle = NULL;
195 struct ocfs2_alloc_context *data_ac = NULL;
196 struct ocfs2_alloc_context *meta_ac = NULL;
197 enum ocfs2_alloc_restarted why;
198 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
199 u32 prev_clusters, logical_start = le32_to_cpu(xv->xr_clusters);
200 struct ocfs2_extent_tree et;
201
202 mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
203
204 ocfs2_init_xattr_value_extent_tree(&et, inode, xattr_bh, xv);
205
206restart_all:
207
208 status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
209 &data_ac, &meta_ac);
210 if (status) {
211 mlog_errno(status);
212 goto leave;
213 }
214
215 credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el,
216 clusters_to_add);
217 handle = ocfs2_start_trans(osb, credits);
218 if (IS_ERR(handle)) {
219 status = PTR_ERR(handle);
220 handle = NULL;
221 mlog_errno(status);
222 goto leave;
223 }
224
225restarted_transaction:
226 status = ocfs2_journal_access(handle, inode, xattr_bh,
227 OCFS2_JOURNAL_ACCESS_WRITE);
228 if (status < 0) {
229 mlog_errno(status);
230 goto leave;
231 }
232
233 prev_clusters = le32_to_cpu(xv->xr_clusters);
234 status = ocfs2_add_clusters_in_btree(osb,
235 inode,
236 &logical_start,
237 clusters_to_add,
238 0,
239 &et,
240 handle,
241 data_ac,
242 meta_ac,
243 &why);
244 if ((status < 0) && (status != -EAGAIN)) {
245 if (status != -ENOSPC)
246 mlog_errno(status);
247 goto leave;
248 }
249
250 status = ocfs2_journal_dirty(handle, xattr_bh);
251 if (status < 0) {
252 mlog_errno(status);
253 goto leave;
254 }
255
256 clusters_to_add -= le32_to_cpu(xv->xr_clusters) - prev_clusters;
257
258 if (why != RESTART_NONE && clusters_to_add) {
259 if (why == RESTART_META) {
260 mlog(0, "restarting function.\n");
261 restart_func = 1;
262 } else {
263 BUG_ON(why != RESTART_TRANS);
264
265 mlog(0, "restarting transaction.\n");
266 /* TODO: This can be more intelligent. */
267 credits = ocfs2_calc_extend_credits(osb->sb,
268 et.et_root_el,
269 clusters_to_add);
270 status = ocfs2_extend_trans(handle, credits);
271 if (status < 0) {
272 /* handle still has to be committed at
273 * this point. */
274 status = -ENOMEM;
275 mlog_errno(status);
276 goto leave;
277 }
278 goto restarted_transaction;
279 }
280 }
281
282leave:
283 if (handle) {
284 ocfs2_commit_trans(osb, handle);
285 handle = NULL;
286 }
287 if (data_ac) {
288 ocfs2_free_alloc_context(data_ac);
289 data_ac = NULL;
290 }
291 if (meta_ac) {
292 ocfs2_free_alloc_context(meta_ac);
293 meta_ac = NULL;
294 }
295 if ((!status) && restart_func) {
296 restart_func = 0;
297 goto restart_all;
298 }
299
300 return status;
301}
302
303static int __ocfs2_remove_xattr_range(struct inode *inode,
304 struct buffer_head *root_bh,
305 struct ocfs2_xattr_value_root *xv,
306 u32 cpos, u32 phys_cpos, u32 len,
307 struct ocfs2_cached_dealloc_ctxt *dealloc)
308{
309 int ret;
310 u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
311 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
312 struct inode *tl_inode = osb->osb_tl_inode;
313 handle_t *handle;
314 struct ocfs2_alloc_context *meta_ac = NULL;
315 struct ocfs2_extent_tree et;
316
317 ocfs2_init_xattr_value_extent_tree(&et, inode, root_bh, xv);
318
319 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
320 if (ret) {
321 mlog_errno(ret);
322 return ret;
323 }
324
325 mutex_lock(&tl_inode->i_mutex);
326
327 if (ocfs2_truncate_log_needs_flush(osb)) {
328 ret = __ocfs2_flush_truncate_log(osb);
329 if (ret < 0) {
330 mlog_errno(ret);
331 goto out;
332 }
333 }
334
335 handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
336 if (IS_ERR(handle)) {
337 ret = PTR_ERR(handle);
338 mlog_errno(ret);
339 goto out;
340 }
341
342 ret = ocfs2_journal_access(handle, inode, root_bh,
343 OCFS2_JOURNAL_ACCESS_WRITE);
344 if (ret) {
345 mlog_errno(ret);
346 goto out_commit;
347 }
348
349 ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
350 dealloc);
351 if (ret) {
352 mlog_errno(ret);
353 goto out_commit;
354 }
355
356 le32_add_cpu(&xv->xr_clusters, -len);
357
358 ret = ocfs2_journal_dirty(handle, root_bh);
359 if (ret) {
360 mlog_errno(ret);
361 goto out_commit;
362 }
363
364 ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len);
365 if (ret)
366 mlog_errno(ret);
367
368out_commit:
369 ocfs2_commit_trans(osb, handle);
370out:
371 mutex_unlock(&tl_inode->i_mutex);
372
373 if (meta_ac)
374 ocfs2_free_alloc_context(meta_ac);
375
376 return ret;
377}
378
379static int ocfs2_xattr_shrink_size(struct inode *inode,
380 u32 old_clusters,
381 u32 new_clusters,
382 struct buffer_head *root_bh,
383 struct ocfs2_xattr_value_root *xv)
384{
385 int ret = 0;
386 u32 trunc_len, cpos, phys_cpos, alloc_size;
387 u64 block;
388 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
389 struct ocfs2_cached_dealloc_ctxt dealloc;
390
391 ocfs2_init_dealloc_ctxt(&dealloc);
392
393 if (old_clusters <= new_clusters)
394 return 0;
395
396 cpos = new_clusters;
397 trunc_len = old_clusters - new_clusters;
398 while (trunc_len) {
399 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
400 &alloc_size, &xv->xr_list);
401 if (ret) {
402 mlog_errno(ret);
403 goto out;
404 }
405
406 if (alloc_size > trunc_len)
407 alloc_size = trunc_len;
408
409 ret = __ocfs2_remove_xattr_range(inode, root_bh, xv, cpos,
410 phys_cpos, alloc_size,
411 &dealloc);
412 if (ret) {
413 mlog_errno(ret);
414 goto out;
415 }
416
417 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
418 ocfs2_remove_xattr_clusters_from_cache(inode, block,
419 alloc_size);
420 cpos += alloc_size;
421 trunc_len -= alloc_size;
422 }
423
424out:
425 ocfs2_schedule_truncate_log_flush(osb, 1);
426 ocfs2_run_deallocs(osb, &dealloc);
427
428 return ret;
429}
430
431static int ocfs2_xattr_value_truncate(struct inode *inode,
432 struct buffer_head *root_bh,
433 struct ocfs2_xattr_value_root *xv,
434 int len)
435{
436 int ret;
437 u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
438 u32 old_clusters = le32_to_cpu(xv->xr_clusters);
439
440 if (new_clusters == old_clusters)
441 return 0;
442
443 if (new_clusters > old_clusters)
444 ret = ocfs2_xattr_extend_allocation(inode,
445 new_clusters - old_clusters,
446 root_bh, xv);
447 else
448 ret = ocfs2_xattr_shrink_size(inode,
449 old_clusters, new_clusters,
450 root_bh, xv);
451
452 return ret;
453}
454
455static int ocfs2_xattr_list_entry(char *buffer, size_t size,
456 size_t *result, const char *prefix,
457 const char *name, int name_len)
458{
459 char *p = buffer + *result;
460 int prefix_len = strlen(prefix);
461 int total_len = prefix_len + name_len + 1;
462
463 *result += total_len;
464
465 /* we are just looking for how big our buffer needs to be */
466 if (!size)
467 return 0;
468
469 if (*result > size)
470 return -ERANGE;
471
472 memcpy(p, prefix, prefix_len);
473 memcpy(p + prefix_len, name, name_len);
474 p[prefix_len + name_len] = '\0';
475
476 return 0;
477}
478
479static int ocfs2_xattr_list_entries(struct inode *inode,
480 struct ocfs2_xattr_header *header,
481 char *buffer, size_t buffer_size)
482{
483 size_t result = 0;
484 int i, type, ret;
485 const char *prefix, *name;
486
487 for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
488 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
489 type = ocfs2_xattr_get_type(entry);
490 prefix = ocfs2_xattr_prefix(type);
491
492 if (prefix) {
493 name = (const char *)header +
494 le16_to_cpu(entry->xe_name_offset);
495
496 ret = ocfs2_xattr_list_entry(buffer, buffer_size,
497 &result, prefix, name,
498 entry->xe_name_len);
499 if (ret)
500 return ret;
501 }
502 }
503
504 return result;
505}
506
507static int ocfs2_xattr_ibody_list(struct inode *inode,
508 struct ocfs2_dinode *di,
509 char *buffer,
510 size_t buffer_size)
511{
512 struct ocfs2_xattr_header *header = NULL;
513 struct ocfs2_inode_info *oi = OCFS2_I(inode);
514 int ret = 0;
515
516 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
517 return ret;
518
519 header = (struct ocfs2_xattr_header *)
520 ((void *)di + inode->i_sb->s_blocksize -
521 le16_to_cpu(di->i_xattr_inline_size));
522
523 ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
524
525 return ret;
526}
527
528static int ocfs2_xattr_block_list(struct inode *inode,
529 struct ocfs2_dinode *di,
530 char *buffer,
531 size_t buffer_size)
532{
533 struct buffer_head *blk_bh = NULL;
534 struct ocfs2_xattr_block *xb;
535 int ret = 0;
536
537 if (!di->i_xattr_loc)
538 return ret;
539
540 ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh);
541 if (ret < 0) {
542 mlog_errno(ret);
543 return ret;
544 }
545 /*Verify the signature of xattr block*/
546 if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
547 strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
548 ret = -EFAULT;
549 goto cleanup;
550 }
551
552 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
553
554 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
555 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
556 ret = ocfs2_xattr_list_entries(inode, header,
557 buffer, buffer_size);
558 } else {
559 struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root;
560 ret = ocfs2_xattr_tree_list_index_block(inode, xt,
561 buffer, buffer_size);
562 }
563cleanup:
564 brelse(blk_bh);
565
566 return ret;
567}
568
569ssize_t ocfs2_listxattr(struct dentry *dentry,
570 char *buffer,
571 size_t size)
572{
573 int ret = 0, i_ret = 0, b_ret = 0;
574 struct buffer_head *di_bh = NULL;
575 struct ocfs2_dinode *di = NULL;
576 struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
577
578 if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
579 return -EOPNOTSUPP;
580
581 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
582 return ret;
583
584 ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
585 if (ret < 0) {
586 mlog_errno(ret);
587 return ret;
588 }
589
590 di = (struct ocfs2_dinode *)di_bh->b_data;
591
592 down_read(&oi->ip_xattr_sem);
593 i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
594 if (i_ret < 0)
595 b_ret = 0;
596 else {
597 if (buffer) {
598 buffer += i_ret;
599 size -= i_ret;
600 }
601 b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
602 buffer, size);
603 if (b_ret < 0)
604 i_ret = 0;
605 }
606 up_read(&oi->ip_xattr_sem);
607 ocfs2_inode_unlock(dentry->d_inode, 0);
608
609 brelse(di_bh);
610
611 return i_ret + b_ret;
612}
613
614static int ocfs2_xattr_find_entry(int name_index,
615 const char *name,
616 struct ocfs2_xattr_search *xs)
617{
618 struct ocfs2_xattr_entry *entry;
619 size_t name_len;
620 int i, cmp = 1;
621
622 if (name == NULL)
623 return -EINVAL;
624
625 name_len = strlen(name);
626 entry = xs->here;
627 for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
628 cmp = name_index - ocfs2_xattr_get_type(entry);
629 if (!cmp)
630 cmp = name_len - entry->xe_name_len;
631 if (!cmp)
632 cmp = memcmp(name, (xs->base +
633 le16_to_cpu(entry->xe_name_offset)),
634 name_len);
635 if (cmp == 0)
636 break;
637 entry += 1;
638 }
639 xs->here = entry;
640
641 return cmp ? -ENODATA : 0;
642}
643
644static int ocfs2_xattr_get_value_outside(struct inode *inode,
645 struct ocfs2_xattr_value_root *xv,
646 void *buffer,
647 size_t len)
648{
649 u32 cpos, p_cluster, num_clusters, bpc, clusters;
650 u64 blkno;
651 int i, ret = 0;
652 size_t cplen, blocksize;
653 struct buffer_head *bh = NULL;
654 struct ocfs2_extent_list *el;
655
656 el = &xv->xr_list;
657 clusters = le32_to_cpu(xv->xr_clusters);
658 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
659 blocksize = inode->i_sb->s_blocksize;
660
661 cpos = 0;
662 while (cpos < clusters) {
663 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
664 &num_clusters, el);
665 if (ret) {
666 mlog_errno(ret);
667 goto out;
668 }
669
670 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
671 /* Copy ocfs2_xattr_value */
672 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
673 ret = ocfs2_read_block(inode, blkno, &bh);
674 if (ret) {
675 mlog_errno(ret);
676 goto out;
677 }
678
679 cplen = len >= blocksize ? blocksize : len;
680 memcpy(buffer, bh->b_data, cplen);
681 len -= cplen;
682 buffer += cplen;
683
684 brelse(bh);
685 bh = NULL;
686 if (len == 0)
687 break;
688 }
689 cpos += num_clusters;
690 }
691out:
692 return ret;
693}
694
695static int ocfs2_xattr_ibody_get(struct inode *inode,
696 int name_index,
697 const char *name,
698 void *buffer,
699 size_t buffer_size,
700 struct ocfs2_xattr_search *xs)
701{
702 struct ocfs2_inode_info *oi = OCFS2_I(inode);
703 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
704 struct ocfs2_xattr_value_root *xv;
705 size_t size;
706 int ret = 0;
707
708 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
709 return -ENODATA;
710
711 xs->end = (void *)di + inode->i_sb->s_blocksize;
712 xs->header = (struct ocfs2_xattr_header *)
713 (xs->end - le16_to_cpu(di->i_xattr_inline_size));
714 xs->base = (void *)xs->header;
715 xs->here = xs->header->xh_entries;
716
717 ret = ocfs2_xattr_find_entry(name_index, name, xs);
718 if (ret)
719 return ret;
720 size = le64_to_cpu(xs->here->xe_value_size);
721 if (buffer) {
722 if (size > buffer_size)
723 return -ERANGE;
724 if (ocfs2_xattr_is_local(xs->here)) {
725 memcpy(buffer, (void *)xs->base +
726 le16_to_cpu(xs->here->xe_name_offset) +
727 OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
728 } else {
729 xv = (struct ocfs2_xattr_value_root *)
730 (xs->base + le16_to_cpu(
731 xs->here->xe_name_offset) +
732 OCFS2_XATTR_SIZE(xs->here->xe_name_len));
733 ret = ocfs2_xattr_get_value_outside(inode, xv,
734 buffer, size);
735 if (ret < 0) {
736 mlog_errno(ret);
737 return ret;
738 }
739 }
740 }
741
742 return size;
743}
744
745static int ocfs2_xattr_block_get(struct inode *inode,
746 int name_index,
747 const char *name,
748 void *buffer,
749 size_t buffer_size,
750 struct ocfs2_xattr_search *xs)
751{
752 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
753 struct buffer_head *blk_bh = NULL;
754 struct ocfs2_xattr_block *xb;
755 struct ocfs2_xattr_value_root *xv;
756 size_t size;
757 int ret = -ENODATA, name_offset, name_len, block_off, i;
758
759 if (!di->i_xattr_loc)
760 return ret;
761
762 memset(&xs->bucket, 0, sizeof(xs->bucket));
763
764 ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh);
765 if (ret < 0) {
766 mlog_errno(ret);
767 return ret;
768 }
769 /*Verify the signature of xattr block*/
770 if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
771 strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
772 ret = -EFAULT;
773 goto cleanup;
774 }
775
776 xs->xattr_bh = blk_bh;
777 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
778
779 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
780 xs->header = &xb->xb_attrs.xb_header;
781 xs->base = (void *)xs->header;
782 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
783 xs->here = xs->header->xh_entries;
784
785 ret = ocfs2_xattr_find_entry(name_index, name, xs);
786 } else
787 ret = ocfs2_xattr_index_block_find(inode, blk_bh,
788 name_index,
789 name, xs);
790
791 if (ret)
792 goto cleanup;
793 size = le64_to_cpu(xs->here->xe_value_size);
794 if (buffer) {
795 ret = -ERANGE;
796 if (size > buffer_size)
797 goto cleanup;
798
799 name_offset = le16_to_cpu(xs->here->xe_name_offset);
800 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
801 i = xs->here - xs->header->xh_entries;
802
803 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
804 ret = ocfs2_xattr_bucket_get_name_value(inode,
805 xs->bucket.xh,
806 i,
807 &block_off,
808 &name_offset);
809 xs->base = xs->bucket.bhs[block_off]->b_data;
810 }
811 if (ocfs2_xattr_is_local(xs->here)) {
812 memcpy(buffer, (void *)xs->base +
813 name_offset + name_len, size);
814 } else {
815 xv = (struct ocfs2_xattr_value_root *)
816 (xs->base + name_offset + name_len);
817 ret = ocfs2_xattr_get_value_outside(inode, xv,
818 buffer, size);
819 if (ret < 0) {
820 mlog_errno(ret);
821 goto cleanup;
822 }
823 }
824 }
825 ret = size;
826cleanup:
827 for (i = 0; i < OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET; i++)
828 brelse(xs->bucket.bhs[i]);
829 memset(&xs->bucket, 0, sizeof(xs->bucket));
830
831 brelse(blk_bh);
832 return ret;
833}
834
835/* ocfs2_xattr_get()
836 *
837 * Copy an extended attribute into the buffer provided.
838 * Buffer is NULL to compute the size of buffer required.
839 */
840int ocfs2_xattr_get(struct inode *inode,
841 int name_index,
842 const char *name,
843 void *buffer,
844 size_t buffer_size)
845{
846 int ret;
847 struct ocfs2_dinode *di = NULL;
848 struct buffer_head *di_bh = NULL;
849 struct ocfs2_inode_info *oi = OCFS2_I(inode);
850 struct ocfs2_xattr_search xis = {
851 .not_found = -ENODATA,
852 };
853 struct ocfs2_xattr_search xbs = {
854 .not_found = -ENODATA,
855 };
856
857 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
858 return -EOPNOTSUPP;
859
860 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
861 ret = -ENODATA;
862
863 ret = ocfs2_inode_lock(inode, &di_bh, 0);
864 if (ret < 0) {
865 mlog_errno(ret);
866 return ret;
867 }
868 xis.inode_bh = xbs.inode_bh = di_bh;
869 di = (struct ocfs2_dinode *)di_bh->b_data;
870
871 down_read(&oi->ip_xattr_sem);
872 ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
873 buffer_size, &xis);
874 if (ret == -ENODATA)
875 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
876 buffer_size, &xbs);
877 up_read(&oi->ip_xattr_sem);
878 ocfs2_inode_unlock(inode, 0);
879
880 brelse(di_bh);
881
882 return ret;
883}
884
885static int __ocfs2_xattr_set_value_outside(struct inode *inode,
886 struct ocfs2_xattr_value_root *xv,
887 const void *value,
888 int value_len)
889{
890 int ret = 0, i, cp_len, credits;
891 u16 blocksize = inode->i_sb->s_blocksize;
892 u32 p_cluster, num_clusters;
893 u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
894 u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
895 u64 blkno;
896 struct buffer_head *bh = NULL;
897 handle_t *handle;
898
899 BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
900
901 credits = clusters * bpc;
902 handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb), credits);
903 if (IS_ERR(handle)) {
904 ret = PTR_ERR(handle);
905 mlog_errno(ret);
906 goto out;
907 }
908
909 while (cpos < clusters) {
910 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
911 &num_clusters, &xv->xr_list);
912 if (ret) {
913 mlog_errno(ret);
914 goto out_commit;
915 }
916
917 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
918
919 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
920 ret = ocfs2_read_block(inode, blkno, &bh);
921 if (ret) {
922 mlog_errno(ret);
923 goto out_commit;
924 }
925
926 ret = ocfs2_journal_access(handle,
927 inode,
928 bh,
929 OCFS2_JOURNAL_ACCESS_WRITE);
930 if (ret < 0) {
931 mlog_errno(ret);
932 goto out_commit;
933 }
934
935 cp_len = value_len > blocksize ? blocksize : value_len;
936 memcpy(bh->b_data, value, cp_len);
937 value_len -= cp_len;
938 value += cp_len;
939 if (cp_len < blocksize)
940 memset(bh->b_data + cp_len, 0,
941 blocksize - cp_len);
942
943 ret = ocfs2_journal_dirty(handle, bh);
944 if (ret < 0) {
945 mlog_errno(ret);
946 goto out_commit;
947 }
948 brelse(bh);
949 bh = NULL;
950
951 /*
952 * XXX: do we need to empty all the following
953 * blocks in this cluster?
954 */
955 if (!value_len)
956 break;
957 }
958 cpos += num_clusters;
959 }
960out_commit:
961 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
962out:
963 brelse(bh);
964
965 return ret;
966}
967
968static int ocfs2_xattr_cleanup(struct inode *inode,
969 struct ocfs2_xattr_info *xi,
970 struct ocfs2_xattr_search *xs,
971 size_t offs)
972{
973 handle_t *handle = NULL;
974 int ret = 0;
975 size_t name_len = strlen(xi->name);
976 void *val = xs->base + offs;
977 size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
978
979 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
980 OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
981 if (IS_ERR(handle)) {
982 ret = PTR_ERR(handle);
983 mlog_errno(ret);
984 goto out;
985 }
986 ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
987 OCFS2_JOURNAL_ACCESS_WRITE);
988 if (ret) {
989 mlog_errno(ret);
990 goto out_commit;
991 }
992 /* Decrease xattr count */
993 le16_add_cpu(&xs->header->xh_count, -1);
994 /* Remove the xattr entry and tree root which has already be set*/
995 memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry));
996 memset(val, 0, size);
997
998 ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
999 if (ret < 0)
1000 mlog_errno(ret);
1001out_commit:
1002 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1003out:
1004 return ret;
1005}
1006
1007static int ocfs2_xattr_update_entry(struct inode *inode,
1008 struct ocfs2_xattr_info *xi,
1009 struct ocfs2_xattr_search *xs,
1010 size_t offs)
1011{
1012 handle_t *handle = NULL;
1013 int ret = 0;
1014
1015 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1016 OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
1017 if (IS_ERR(handle)) {
1018 ret = PTR_ERR(handle);
1019 mlog_errno(ret);
1020 goto out;
1021 }
1022 ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1023 OCFS2_JOURNAL_ACCESS_WRITE);
1024 if (ret) {
1025 mlog_errno(ret);
1026 goto out_commit;
1027 }
1028
1029 xs->here->xe_name_offset = cpu_to_le16(offs);
1030 xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1031 if (xi->value_len <= OCFS2_XATTR_INLINE_SIZE)
1032 ocfs2_xattr_set_local(xs->here, 1);
1033 else
1034 ocfs2_xattr_set_local(xs->here, 0);
1035 ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1036
1037 ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1038 if (ret < 0)
1039 mlog_errno(ret);
1040out_commit:
1041 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1042out:
1043 return ret;
1044}
1045
1046/*
1047 * ocfs2_xattr_set_value_outside()
1048 *
1049 * Set large size value in B tree.
1050 */
1051static int ocfs2_xattr_set_value_outside(struct inode *inode,
1052 struct ocfs2_xattr_info *xi,
1053 struct ocfs2_xattr_search *xs,
1054 size_t offs)
1055{
1056 size_t name_len = strlen(xi->name);
1057 void *val = xs->base + offs;
1058 struct ocfs2_xattr_value_root *xv = NULL;
1059 size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1060 int ret = 0;
1061
1062 memset(val, 0, size);
1063 memcpy(val, xi->name, name_len);
1064 xv = (struct ocfs2_xattr_value_root *)
1065 (val + OCFS2_XATTR_SIZE(name_len));
1066 xv->xr_clusters = 0;
1067 xv->xr_last_eb_blk = 0;
1068 xv->xr_list.l_tree_depth = 0;
1069 xv->xr_list.l_count = cpu_to_le16(1);
1070 xv->xr_list.l_next_free_rec = 0;
1071
1072 ret = ocfs2_xattr_value_truncate(inode, xs->xattr_bh, xv,
1073 xi->value_len);
1074 if (ret < 0) {
1075 mlog_errno(ret);
1076 return ret;
1077 }
1078 ret = __ocfs2_xattr_set_value_outside(inode, xv, xi->value,
1079 xi->value_len);
1080 if (ret < 0) {
1081 mlog_errno(ret);
1082 return ret;
1083 }
1084 ret = ocfs2_xattr_update_entry(inode, xi, xs, offs);
1085 if (ret < 0)
1086 mlog_errno(ret);
1087
1088 return ret;
1089}
1090
1091/*
1092 * ocfs2_xattr_set_entry_local()
1093 *
1094 * Set, replace or remove extended attribute in local.
1095 */
1096static void ocfs2_xattr_set_entry_local(struct inode *inode,
1097 struct ocfs2_xattr_info *xi,
1098 struct ocfs2_xattr_search *xs,
1099 struct ocfs2_xattr_entry *last,
1100 size_t min_offs)
1101{
1102 size_t name_len = strlen(xi->name);
1103 int i;
1104
1105 if (xi->value && xs->not_found) {
1106 /* Insert the new xattr entry. */
1107 le16_add_cpu(&xs->header->xh_count, 1);
1108 ocfs2_xattr_set_type(last, xi->name_index);
1109 ocfs2_xattr_set_local(last, 1);
1110 last->xe_name_len = name_len;
1111 } else {
1112 void *first_val;
1113 void *val;
1114 size_t offs, size;
1115
1116 first_val = xs->base + min_offs;
1117 offs = le16_to_cpu(xs->here->xe_name_offset);
1118 val = xs->base + offs;
1119
1120 if (le64_to_cpu(xs->here->xe_value_size) >
1121 OCFS2_XATTR_INLINE_SIZE)
1122 size = OCFS2_XATTR_SIZE(name_len) +
1123 OCFS2_XATTR_ROOT_SIZE;
1124 else
1125 size = OCFS2_XATTR_SIZE(name_len) +
1126 OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1127
1128 if (xi->value && size == OCFS2_XATTR_SIZE(name_len) +
1129 OCFS2_XATTR_SIZE(xi->value_len)) {
1130 /* The old and the new value have the
1131 same size. Just replace the value. */
1132 ocfs2_xattr_set_local(xs->here, 1);
1133 xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1134 /* Clear value bytes. */
1135 memset(val + OCFS2_XATTR_SIZE(name_len),
1136 0,
1137 OCFS2_XATTR_SIZE(xi->value_len));
1138 memcpy(val + OCFS2_XATTR_SIZE(name_len),
1139 xi->value,
1140 xi->value_len);
1141 return;
1142 }
1143 /* Remove the old name+value. */
1144 memmove(first_val + size, first_val, val - first_val);
1145 memset(first_val, 0, size);
1146 xs->here->xe_name_hash = 0;
1147 xs->here->xe_name_offset = 0;
1148 ocfs2_xattr_set_local(xs->here, 1);
1149 xs->here->xe_value_size = 0;
1150
1151 min_offs += size;
1152
1153 /* Adjust all value offsets. */
1154 last = xs->header->xh_entries;
1155 for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1156 size_t o = le16_to_cpu(last->xe_name_offset);
1157
1158 if (o < offs)
1159 last->xe_name_offset = cpu_to_le16(o + size);
1160 last += 1;
1161 }
1162
1163 if (!xi->value) {
1164 /* Remove the old entry. */
1165 last -= 1;
1166 memmove(xs->here, xs->here + 1,
1167 (void *)last - (void *)xs->here);
1168 memset(last, 0, sizeof(struct ocfs2_xattr_entry));
1169 le16_add_cpu(&xs->header->xh_count, -1);
1170 }
1171 }
1172 if (xi->value) {
1173 /* Insert the new name+value. */
1174 size_t size = OCFS2_XATTR_SIZE(name_len) +
1175 OCFS2_XATTR_SIZE(xi->value_len);
1176 void *val = xs->base + min_offs - size;
1177
1178 xs->here->xe_name_offset = cpu_to_le16(min_offs - size);
1179 memset(val, 0, size);
1180 memcpy(val, xi->name, name_len);
1181 memcpy(val + OCFS2_XATTR_SIZE(name_len),
1182 xi->value,
1183 xi->value_len);
1184 xs->here->xe_value_size = cpu_to_le64(xi->value_len);
1185 ocfs2_xattr_set_local(xs->here, 1);
1186 ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1187 }
1188
1189 return;
1190}
1191
1192/*
1193 * ocfs2_xattr_set_entry()
1194 *
1195 * Set extended attribute entry into inode or block.
1196 *
1197 * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE,
1198 * We first insert tree root(ocfs2_xattr_value_root) with set_entry_local(),
1199 * then set value in B tree with set_value_outside().
1200 */
1201static int ocfs2_xattr_set_entry(struct inode *inode,
1202 struct ocfs2_xattr_info *xi,
1203 struct ocfs2_xattr_search *xs,
1204 int flag)
1205{
1206 struct ocfs2_xattr_entry *last;
1207 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1208 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1209 size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name);
1210 size_t size_l = 0;
1211 handle_t *handle = NULL;
1212 int free, i, ret;
1213 struct ocfs2_xattr_info xi_l = {
1214 .name_index = xi->name_index,
1215 .name = xi->name,
1216 .value = xi->value,
1217 .value_len = xi->value_len,
1218 };
1219
1220 /* Compute min_offs, last and free space. */
1221 last = xs->header->xh_entries;
1222
1223 for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1224 size_t offs = le16_to_cpu(last->xe_name_offset);
1225 if (offs < min_offs)
1226 min_offs = offs;
1227 last += 1;
1228 }
1229
1230 free = min_offs - ((void *)last - xs->base) - sizeof(__u32);
1231 if (free < 0)
1232 return -EFAULT;
1233
1234 if (!xs->not_found) {
1235 size_t size = 0;
1236 if (ocfs2_xattr_is_local(xs->here))
1237 size = OCFS2_XATTR_SIZE(name_len) +
1238 OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1239 else
1240 size = OCFS2_XATTR_SIZE(name_len) +
1241 OCFS2_XATTR_ROOT_SIZE;
1242 free += (size + sizeof(struct ocfs2_xattr_entry));
1243 }
1244 /* Check free space in inode or block */
1245 if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1246 if (free < sizeof(struct ocfs2_xattr_entry) +
1247 OCFS2_XATTR_SIZE(name_len) +
1248 OCFS2_XATTR_ROOT_SIZE) {
1249 ret = -ENOSPC;
1250 goto out;
1251 }
1252 size_l = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1253 xi_l.value = (void *)&def_xv;
1254 xi_l.value_len = OCFS2_XATTR_ROOT_SIZE;
1255 } else if (xi->value) {
1256 if (free < sizeof(struct ocfs2_xattr_entry) +
1257 OCFS2_XATTR_SIZE(name_len) +
1258 OCFS2_XATTR_SIZE(xi->value_len)) {
1259 ret = -ENOSPC;
1260 goto out;
1261 }
1262 }
1263
1264 if (!xs->not_found) {
1265 /* For existing extended attribute */
1266 size_t size = OCFS2_XATTR_SIZE(name_len) +
1267 OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1268 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1269 void *val = xs->base + offs;
1270
1271 if (ocfs2_xattr_is_local(xs->here) && size == size_l) {
1272 /* Replace existing local xattr with tree root */
1273 ret = ocfs2_xattr_set_value_outside(inode, xi, xs,
1274 offs);
1275 if (ret < 0)
1276 mlog_errno(ret);
1277 goto out;
1278 } else if (!ocfs2_xattr_is_local(xs->here)) {
1279 /* For existing xattr which has value outside */
1280 struct ocfs2_xattr_value_root *xv = NULL;
1281 xv = (struct ocfs2_xattr_value_root *)(val +
1282 OCFS2_XATTR_SIZE(name_len));
1283
1284 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1285 /*
1286 * If new value need set outside also,
1287 * first truncate old value to new value,
1288 * then set new value with set_value_outside().
1289 */
1290 ret = ocfs2_xattr_value_truncate(inode,
1291 xs->xattr_bh,
1292 xv,
1293 xi->value_len);
1294 if (ret < 0) {
1295 mlog_errno(ret);
1296 goto out;
1297 }
1298
1299 ret = __ocfs2_xattr_set_value_outside(inode,
1300 xv,
1301 xi->value,
1302 xi->value_len);
1303 if (ret < 0) {
1304 mlog_errno(ret);
1305 goto out;
1306 }
1307
1308 ret = ocfs2_xattr_update_entry(inode,
1309 xi,
1310 xs,
1311 offs);
1312 if (ret < 0)
1313 mlog_errno(ret);
1314 goto out;
1315 } else {
1316 /*
1317 * If new value need set in local,
1318 * just trucate old value to zero.
1319 */
1320 ret = ocfs2_xattr_value_truncate(inode,
1321 xs->xattr_bh,
1322 xv,
1323 0);
1324 if (ret < 0)
1325 mlog_errno(ret);
1326 }
1327 }
1328 }
1329
1330 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1331 OCFS2_INODE_UPDATE_CREDITS);
1332 if (IS_ERR(handle)) {
1333 ret = PTR_ERR(handle);
1334 mlog_errno(ret);
1335 goto out;
1336 }
1337
1338 ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
1339 OCFS2_JOURNAL_ACCESS_WRITE);
1340 if (ret) {
1341 mlog_errno(ret);
1342 goto out_commit;
1343 }
1344
1345 if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1346 /* set extended attribute in external block. */
1347 ret = ocfs2_extend_trans(handle,
1348 OCFS2_INODE_UPDATE_CREDITS +
1349 OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
1350 if (ret) {
1351 mlog_errno(ret);
1352 goto out_commit;
1353 }
1354 ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1355 OCFS2_JOURNAL_ACCESS_WRITE);
1356 if (ret) {
1357 mlog_errno(ret);
1358 goto out_commit;
1359 }
1360 }
1361
1362 /*
1363 * Set value in local, include set tree root in local.
1364 * This is the first step for value size >INLINE_SIZE.
1365 */
1366 ocfs2_xattr_set_entry_local(inode, &xi_l, xs, last, min_offs);
1367
1368 if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1369 ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1370 if (ret < 0) {
1371 mlog_errno(ret);
1372 goto out_commit;
1373 }
1374 }
1375
1376 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) &&
1377 (flag & OCFS2_INLINE_XATTR_FL)) {
1378 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1379 unsigned int xattrsize = osb->s_xattr_inline_size;
1380
1381 /*
1382 * Adjust extent record count or inline data size
1383 * to reserve space for extended attribute.
1384 */
1385 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1386 struct ocfs2_inline_data *idata = &di->id2.i_data;
1387 le16_add_cpu(&idata->id_count, -xattrsize);
1388 } else if (!(ocfs2_inode_is_fast_symlink(inode))) {
1389 struct ocfs2_extent_list *el = &di->id2.i_list;
1390 le16_add_cpu(&el->l_count, -(xattrsize /
1391 sizeof(struct ocfs2_extent_rec)));
1392 }
1393 di->i_xattr_inline_size = cpu_to_le16(xattrsize);
1394 }
1395 /* Update xattr flag */
1396 spin_lock(&oi->ip_lock);
1397 oi->ip_dyn_features |= flag;
1398 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1399 spin_unlock(&oi->ip_lock);
1400 /* Update inode ctime */
1401 inode->i_ctime = CURRENT_TIME;
1402 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
1403 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
1404
1405 ret = ocfs2_journal_dirty(handle, xs->inode_bh);
1406 if (ret < 0)
1407 mlog_errno(ret);
1408
1409out_commit:
1410 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1411
1412 if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) {
1413 /*
1414 * Set value outside in B tree.
1415 * This is the second step for value size > INLINE_SIZE.
1416 */
1417 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1418 ret = ocfs2_xattr_set_value_outside(inode, xi, xs, offs);
1419 if (ret < 0) {
1420 int ret2;
1421
1422 mlog_errno(ret);
1423 /*
1424 * If set value outside failed, we have to clean
1425 * the junk tree root we have already set in local.
1426 */
1427 ret2 = ocfs2_xattr_cleanup(inode, xi, xs, offs);
1428 if (ret2 < 0)
1429 mlog_errno(ret2);
1430 }
1431 }
1432out:
1433 return ret;
1434
1435}
1436
1437static int ocfs2_remove_value_outside(struct inode*inode,
1438 struct buffer_head *bh,
1439 struct ocfs2_xattr_header *header)
1440{
1441 int ret = 0, i;
1442
1443 for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
1444 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
1445
1446 if (!ocfs2_xattr_is_local(entry)) {
1447 struct ocfs2_xattr_value_root *xv;
1448 void *val;
1449
1450 val = (void *)header +
1451 le16_to_cpu(entry->xe_name_offset);
1452 xv = (struct ocfs2_xattr_value_root *)
1453 (val + OCFS2_XATTR_SIZE(entry->xe_name_len));
1454 ret = ocfs2_xattr_value_truncate(inode, bh, xv, 0);
1455 if (ret < 0) {
1456 mlog_errno(ret);
1457 return ret;
1458 }
1459 }
1460 }
1461
1462 return ret;
1463}
1464
1465static int ocfs2_xattr_ibody_remove(struct inode *inode,
1466 struct buffer_head *di_bh)
1467{
1468
1469 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1470 struct ocfs2_xattr_header *header;
1471 int ret;
1472
1473 header = (struct ocfs2_xattr_header *)
1474 ((void *)di + inode->i_sb->s_blocksize -
1475 le16_to_cpu(di->i_xattr_inline_size));
1476
1477 ret = ocfs2_remove_value_outside(inode, di_bh, header);
1478
1479 return ret;
1480}
1481
1482static int ocfs2_xattr_block_remove(struct inode *inode,
1483 struct buffer_head *blk_bh)
1484{
1485 struct ocfs2_xattr_block *xb;
1486 int ret = 0;
1487
1488 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1489 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1490 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
1491 ret = ocfs2_remove_value_outside(inode, blk_bh, header);
1492 } else
1493 ret = ocfs2_delete_xattr_index_block(inode, blk_bh);
1494
1495 return ret;
1496}
1497
1498static int ocfs2_xattr_free_block(struct inode *inode,
1499 u64 block)
1500{
1501 struct inode *xb_alloc_inode;
1502 struct buffer_head *xb_alloc_bh = NULL;
1503 struct buffer_head *blk_bh = NULL;
1504 struct ocfs2_xattr_block *xb;
1505 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1506 handle_t *handle;
1507 int ret = 0;
1508 u64 blk, bg_blkno;
1509 u16 bit;
1510
1511 ret = ocfs2_read_block(inode, block, &blk_bh);
1512 if (ret < 0) {
1513 mlog_errno(ret);
1514 goto out;
1515 }
1516
1517 /*Verify the signature of xattr block*/
1518 if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
1519 strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
1520 ret = -EFAULT;
1521 goto out;
1522 }
1523
1524 ret = ocfs2_xattr_block_remove(inode, blk_bh);
1525 if (ret < 0) {
1526 mlog_errno(ret);
1527 goto out;
1528 }
1529
1530 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1531 blk = le64_to_cpu(xb->xb_blkno);
1532 bit = le16_to_cpu(xb->xb_suballoc_bit);
1533 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
1534
1535 xb_alloc_inode = ocfs2_get_system_file_inode(osb,
1536 EXTENT_ALLOC_SYSTEM_INODE,
1537 le16_to_cpu(xb->xb_suballoc_slot));
1538 if (!xb_alloc_inode) {
1539 ret = -ENOMEM;
1540 mlog_errno(ret);
1541 goto out;
1542 }
1543 mutex_lock(&xb_alloc_inode->i_mutex);
1544
1545 ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
1546 if (ret < 0) {
1547 mlog_errno(ret);
1548 goto out_mutex;
1549 }
1550
1551 handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
1552 if (IS_ERR(handle)) {
1553 ret = PTR_ERR(handle);
1554 mlog_errno(ret);
1555 goto out_unlock;
1556 }
1557
1558 ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
1559 bit, bg_blkno, 1);
1560 if (ret < 0)
1561 mlog_errno(ret);
1562
1563 ocfs2_commit_trans(osb, handle);
1564out_unlock:
1565 ocfs2_inode_unlock(xb_alloc_inode, 1);
1566 brelse(xb_alloc_bh);
1567out_mutex:
1568 mutex_unlock(&xb_alloc_inode->i_mutex);
1569 iput(xb_alloc_inode);
1570out:
1571 brelse(blk_bh);
1572 return ret;
1573}
1574
1575/*
1576 * ocfs2_xattr_remove()
1577 *
1578 * Free extended attribute resources associated with this inode.
1579 */
1580int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
1581{
1582 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1583 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1584 handle_t *handle;
1585 int ret;
1586
1587 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1588 return 0;
1589
1590 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1591 return 0;
1592
1593 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1594 ret = ocfs2_xattr_ibody_remove(inode, di_bh);
1595 if (ret < 0) {
1596 mlog_errno(ret);
1597 goto out;
1598 }
1599 }
1600
1601 if (di->i_xattr_loc) {
1602 ret = ocfs2_xattr_free_block(inode,
1603 le64_to_cpu(di->i_xattr_loc));
1604 if (ret < 0) {
1605 mlog_errno(ret);
1606 goto out;
1607 }
1608 }
1609
1610 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
1611 OCFS2_INODE_UPDATE_CREDITS);
1612 if (IS_ERR(handle)) {
1613 ret = PTR_ERR(handle);
1614 mlog_errno(ret);
1615 goto out;
1616 }
1617 ret = ocfs2_journal_access(handle, inode, di_bh,
1618 OCFS2_JOURNAL_ACCESS_WRITE);
1619 if (ret) {
1620 mlog_errno(ret);
1621 goto out_commit;
1622 }
1623
1624 di->i_xattr_loc = 0;
1625
1626 spin_lock(&oi->ip_lock);
1627 oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
1628 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1629 spin_unlock(&oi->ip_lock);
1630
1631 ret = ocfs2_journal_dirty(handle, di_bh);
1632 if (ret < 0)
1633 mlog_errno(ret);
1634out_commit:
1635 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1636out:
1637 return ret;
1638}
1639
1640static int ocfs2_xattr_has_space_inline(struct inode *inode,
1641 struct ocfs2_dinode *di)
1642{
1643 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1644 unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
1645 int free;
1646
1647 if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
1648 return 0;
1649
1650 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1651 struct ocfs2_inline_data *idata = &di->id2.i_data;
1652 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
1653 } else if (ocfs2_inode_is_fast_symlink(inode)) {
1654 free = ocfs2_fast_symlink_chars(inode->i_sb) -
1655 le64_to_cpu(di->i_size);
1656 } else {
1657 struct ocfs2_extent_list *el = &di->id2.i_list;
1658 free = (le16_to_cpu(el->l_count) -
1659 le16_to_cpu(el->l_next_free_rec)) *
1660 sizeof(struct ocfs2_extent_rec);
1661 }
1662 if (free >= xattrsize)
1663 return 1;
1664
1665 return 0;
1666}
1667
1668/*
1669 * ocfs2_xattr_ibody_find()
1670 *
1671 * Find extended attribute in inode block and
1672 * fill search info into struct ocfs2_xattr_search.
1673 */
1674static int ocfs2_xattr_ibody_find(struct inode *inode,
1675 int name_index,
1676 const char *name,
1677 struct ocfs2_xattr_search *xs)
1678{
1679 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1680 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1681 int ret;
1682 int has_space = 0;
1683
1684 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
1685 return 0;
1686
1687 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
1688 down_read(&oi->ip_alloc_sem);
1689 has_space = ocfs2_xattr_has_space_inline(inode, di);
1690 up_read(&oi->ip_alloc_sem);
1691 if (!has_space)
1692 return 0;
1693 }
1694
1695 xs->xattr_bh = xs->inode_bh;
1696 xs->end = (void *)di + inode->i_sb->s_blocksize;
1697 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
1698 xs->header = (struct ocfs2_xattr_header *)
1699 (xs->end - le16_to_cpu(di->i_xattr_inline_size));
1700 else
1701 xs->header = (struct ocfs2_xattr_header *)
1702 (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
1703 xs->base = (void *)xs->header;
1704 xs->here = xs->header->xh_entries;
1705
1706 /* Find the named attribute. */
1707 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1708 ret = ocfs2_xattr_find_entry(name_index, name, xs);
1709 if (ret && ret != -ENODATA)
1710 return ret;
1711 xs->not_found = ret;
1712 }
1713
1714 return 0;
1715}
1716
1717/*
1718 * ocfs2_xattr_ibody_set()
1719 *
1720 * Set, replace or remove an extended attribute into inode block.
1721 *
1722 */
1723static int ocfs2_xattr_ibody_set(struct inode *inode,
1724 struct ocfs2_xattr_info *xi,
1725 struct ocfs2_xattr_search *xs)
1726{
1727 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1728 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1729 int ret;
1730
1731 if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
1732 return -ENOSPC;
1733
1734 down_write(&oi->ip_alloc_sem);
1735 if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
1736 if (!ocfs2_xattr_has_space_inline(inode, di)) {
1737 ret = -ENOSPC;
1738 goto out;
1739 }
1740 }
1741
1742 ret = ocfs2_xattr_set_entry(inode, xi, xs,
1743 (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
1744out:
1745 up_write(&oi->ip_alloc_sem);
1746
1747 return ret;
1748}
1749
1750/*
1751 * ocfs2_xattr_block_find()
1752 *
1753 * Find extended attribute in external block and
1754 * fill search info into struct ocfs2_xattr_search.
1755 */
1756static int ocfs2_xattr_block_find(struct inode *inode,
1757 int name_index,
1758 const char *name,
1759 struct ocfs2_xattr_search *xs)
1760{
1761 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1762 struct buffer_head *blk_bh = NULL;
1763 struct ocfs2_xattr_block *xb;
1764 int ret = 0;
1765
1766 if (!di->i_xattr_loc)
1767 return ret;
1768
1769 ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh);
1770 if (ret < 0) {
1771 mlog_errno(ret);
1772 return ret;
1773 }
1774 /*Verify the signature of xattr block*/
1775 if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE,
1776 strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) {
1777 ret = -EFAULT;
1778 goto cleanup;
1779 }
1780
1781 xs->xattr_bh = blk_bh;
1782 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1783
1784 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1785 xs->header = &xb->xb_attrs.xb_header;
1786 xs->base = (void *)xs->header;
1787 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
1788 xs->here = xs->header->xh_entries;
1789
1790 ret = ocfs2_xattr_find_entry(name_index, name, xs);
1791 } else
1792 ret = ocfs2_xattr_index_block_find(inode, blk_bh,
1793 name_index,
1794 name, xs);
1795
1796 if (ret && ret != -ENODATA) {
1797 xs->xattr_bh = NULL;
1798 goto cleanup;
1799 }
1800 xs->not_found = ret;
1801 return 0;
1802cleanup:
1803 brelse(blk_bh);
1804
1805 return ret;
1806}
1807
1808/*
1809 * When all the xattrs are deleted from index btree, the ocfs2_xattr_tree
1810 * will be erased and ocfs2_xattr_block will have its ocfs2_xattr_header
1811 * re-initialized.
1812 */
1813static int ocfs2_restore_xattr_block(struct inode *inode,
1814 struct ocfs2_xattr_search *xs)
1815{
1816 int ret;
1817 handle_t *handle;
1818 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1819 struct ocfs2_xattr_block *xb =
1820 (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1821 struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
1822 u16 xb_flags = le16_to_cpu(xb->xb_flags);
1823
1824 BUG_ON(!(xb_flags & OCFS2_XATTR_INDEXED) ||
1825 le16_to_cpu(el->l_next_free_rec) != 0);
1826
1827 handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_UPDATE_CREDITS);
1828 if (IS_ERR(handle)) {
1829 ret = PTR_ERR(handle);
1830 handle = NULL;
1831 goto out;
1832 }
1833
1834 ret = ocfs2_journal_access(handle, inode, xs->xattr_bh,
1835 OCFS2_JOURNAL_ACCESS_WRITE);
1836 if (ret < 0) {
1837 mlog_errno(ret);
1838 goto out_commit;
1839 }
1840
1841 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
1842 offsetof(struct ocfs2_xattr_block, xb_attrs));
1843
1844 xb->xb_flags = cpu_to_le16(xb_flags & ~OCFS2_XATTR_INDEXED);
1845
1846 ocfs2_journal_dirty(handle, xs->xattr_bh);
1847
1848out_commit:
1849 ocfs2_commit_trans(osb, handle);
1850out:
1851 return ret;
1852}
1853
1854/*
1855 * ocfs2_xattr_block_set()
1856 *
1857 * Set, replace or remove an extended attribute into external block.
1858 *
1859 */
1860static int ocfs2_xattr_block_set(struct inode *inode,
1861 struct ocfs2_xattr_info *xi,
1862 struct ocfs2_xattr_search *xs)
1863{
1864 struct buffer_head *new_bh = NULL;
1865 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1866 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1867 struct ocfs2_alloc_context *meta_ac = NULL;
1868 handle_t *handle = NULL;
1869 struct ocfs2_xattr_block *xblk = NULL;
1870 u16 suballoc_bit_start;
1871 u32 num_got;
1872 u64 first_blkno;
1873 int ret;
1874
1875 if (!xs->xattr_bh) {
1876 /*
1877 * Alloc one external block for extended attribute
1878 * outside of inode.
1879 */
1880 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
1881 if (ret < 0) {
1882 mlog_errno(ret);
1883 goto out;
1884 }
1885 handle = ocfs2_start_trans(osb,
1886 OCFS2_XATTR_BLOCK_CREATE_CREDITS);
1887 if (IS_ERR(handle)) {
1888 ret = PTR_ERR(handle);
1889 mlog_errno(ret);
1890 goto out;
1891 }
1892 ret = ocfs2_journal_access(handle, inode, xs->inode_bh,
1893 OCFS2_JOURNAL_ACCESS_CREATE);
1894 if (ret < 0) {
1895 mlog_errno(ret);
1896 goto out_commit;
1897 }
1898
1899 ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
1900 &suballoc_bit_start, &num_got,
1901 &first_blkno);
1902 if (ret < 0) {
1903 mlog_errno(ret);
1904 goto out_commit;
1905 }
1906
1907 new_bh = sb_getblk(inode->i_sb, first_blkno);
1908 ocfs2_set_new_buffer_uptodate(inode, new_bh);
1909
1910 ret = ocfs2_journal_access(handle, inode, new_bh,
1911 OCFS2_JOURNAL_ACCESS_CREATE);
1912 if (ret < 0) {
1913 mlog_errno(ret);
1914 goto out_commit;
1915 }
1916
1917 /* Initialize ocfs2_xattr_block */
1918 xs->xattr_bh = new_bh;
1919 xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
1920 memset(xblk, 0, inode->i_sb->s_blocksize);
1921 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
1922 xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num);
1923 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
1924 xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
1925 xblk->xb_blkno = cpu_to_le64(first_blkno);
1926
1927 xs->header = &xblk->xb_attrs.xb_header;
1928 xs->base = (void *)xs->header;
1929 xs->end = (void *)xblk + inode->i_sb->s_blocksize;
1930 xs->here = xs->header->xh_entries;
1931
1932
1933 ret = ocfs2_journal_dirty(handle, new_bh);
1934 if (ret < 0) {
1935 mlog_errno(ret);
1936 goto out_commit;
1937 }
1938 di->i_xattr_loc = cpu_to_le64(first_blkno);
1939 ret = ocfs2_journal_dirty(handle, xs->inode_bh);
1940 if (ret < 0)
1941 mlog_errno(ret);
1942out_commit:
1943 ocfs2_commit_trans(osb, handle);
1944out:
1945 if (meta_ac)
1946 ocfs2_free_alloc_context(meta_ac);
1947 if (ret < 0)
1948 return ret;
1949 } else
1950 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1951
1952 if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
1953 /* Set extended attribute into external block */
1954 ret = ocfs2_xattr_set_entry(inode, xi, xs, OCFS2_HAS_XATTR_FL);
1955 if (!ret || ret != -ENOSPC)
1956 goto end;
1957
1958 ret = ocfs2_xattr_create_index_block(inode, xs);
1959 if (ret)
1960 goto end;
1961 }
1962
1963 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs);
1964 if (!ret && xblk->xb_attrs.xb_root.xt_list.l_next_free_rec == 0)
1965 ret = ocfs2_restore_xattr_block(inode, xs);
1966
1967end:
1968
1969 return ret;
1970}
1971
1972/*
1973 * ocfs2_xattr_set()
1974 *
1975 * Set, replace or remove an extended attribute for this inode.
1976 * value is NULL to remove an existing extended attribute, else either
1977 * create or replace an extended attribute.
1978 */
1979int ocfs2_xattr_set(struct inode *inode,
1980 int name_index,
1981 const char *name,
1982 const void *value,
1983 size_t value_len,
1984 int flags)
1985{
1986 struct buffer_head *di_bh = NULL;
1987 struct ocfs2_dinode *di;
1988 int ret;
1989 u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
1990
1991 struct ocfs2_xattr_info xi = {
1992 .name_index = name_index,
1993 .name = name,
1994 .value = value,
1995 .value_len = value_len,
1996 };
1997
1998 struct ocfs2_xattr_search xis = {
1999 .not_found = -ENODATA,
2000 };
2001
2002 struct ocfs2_xattr_search xbs = {
2003 .not_found = -ENODATA,
2004 };
2005
2006 if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2007 return -EOPNOTSUPP;
2008
2009 ret = ocfs2_inode_lock(inode, &di_bh, 1);
2010 if (ret < 0) {
2011 mlog_errno(ret);
2012 return ret;
2013 }
2014 xis.inode_bh = xbs.inode_bh = di_bh;
2015 di = (struct ocfs2_dinode *)di_bh->b_data;
2016
2017 down_write(&OCFS2_I(inode)->ip_xattr_sem);
2018 /*
2019 * Scan inode and external block to find the same name
2020 * extended attribute and collect search infomation.
2021 */
2022 ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
2023 if (ret)
2024 goto cleanup;
2025 if (xis.not_found) {
2026 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
2027 if (ret)
2028 goto cleanup;
2029 }
2030
2031 if (xis.not_found && xbs.not_found) {
2032 ret = -ENODATA;
2033 if (flags & XATTR_REPLACE)
2034 goto cleanup;
2035 ret = 0;
2036 if (!value)
2037 goto cleanup;
2038 } else {
2039 ret = -EEXIST;
2040 if (flags & XATTR_CREATE)
2041 goto cleanup;
2042 }
2043
2044 if (!value) {
2045 /* Remove existing extended attribute */
2046 if (!xis.not_found)
2047 ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
2048 else if (!xbs.not_found)
2049 ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
2050 } else {
2051 /* We always try to set extended attribute into inode first*/
2052 ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
2053 if (!ret && !xbs.not_found) {
2054 /*
2055 * If succeed and that extended attribute existing in
2056 * external block, then we will remove it.
2057 */
2058 xi.value = NULL;
2059 xi.value_len = 0;
2060 ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
2061 } else if (ret == -ENOSPC) {
2062 if (di->i_xattr_loc && !xbs.xattr_bh) {
2063 ret = ocfs2_xattr_block_find(inode, name_index,
2064 name, &xbs);
2065 if (ret)
2066 goto cleanup;
2067 }
2068 /*
2069 * If no space in inode, we will set extended attribute
2070 * into external block.
2071 */
2072 ret = ocfs2_xattr_block_set(inode, &xi, &xbs);
2073 if (ret)
2074 goto cleanup;
2075 if (!xis.not_found) {
2076 /*
2077 * If succeed and that extended attribute
2078 * existing in inode, we will remove it.
2079 */
2080 xi.value = NULL;
2081 xi.value_len = 0;
2082 ret = ocfs2_xattr_ibody_set(inode, &xi, &xis);
2083 }
2084 }
2085 }
2086cleanup:
2087 up_write(&OCFS2_I(inode)->ip_xattr_sem);
2088 ocfs2_inode_unlock(inode, 1);
2089 brelse(di_bh);
2090 brelse(xbs.xattr_bh);
2091 for (i = 0; i < blk_per_bucket; i++)
2092 brelse(xbs.bucket.bhs[i]);
2093
2094 return ret;
2095}
2096
2097/*
2098 * Find the xattr extent rec which may contains name_hash.
2099 * e_cpos will be the first name hash of the xattr rec.
2100 * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
2101 */
2102static int ocfs2_xattr_get_rec(struct inode *inode,
2103 u32 name_hash,
2104 u64 *p_blkno,
2105 u32 *e_cpos,
2106 u32 *num_clusters,
2107 struct ocfs2_extent_list *el)
2108{
2109 int ret = 0, i;
2110 struct buffer_head *eb_bh = NULL;
2111 struct ocfs2_extent_block *eb;
2112 struct ocfs2_extent_rec *rec = NULL;
2113 u64 e_blkno = 0;
2114
2115 if (el->l_tree_depth) {
2116 ret = ocfs2_find_leaf(inode, el, name_hash, &eb_bh);
2117 if (ret) {
2118 mlog_errno(ret);
2119 goto out;
2120 }
2121
2122 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
2123 el = &eb->h_list;
2124
2125 if (el->l_tree_depth) {
2126 ocfs2_error(inode->i_sb,
2127 "Inode %lu has non zero tree depth in "
2128 "xattr tree block %llu\n", inode->i_ino,
2129 (unsigned long long)eb_bh->b_blocknr);
2130 ret = -EROFS;
2131 goto out;
2132 }
2133 }
2134
2135 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
2136 rec = &el->l_recs[i];
2137
2138 if (le32_to_cpu(rec->e_cpos) <= name_hash) {
2139 e_blkno = le64_to_cpu(rec->e_blkno);
2140 break;
2141 }
2142 }
2143
2144 if (!e_blkno) {
2145 ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
2146 "record (%u, %u, 0) in xattr", inode->i_ino,
2147 le32_to_cpu(rec->e_cpos),
2148 ocfs2_rec_clusters(el, rec));
2149 ret = -EROFS;
2150 goto out;
2151 }
2152
2153 *p_blkno = le64_to_cpu(rec->e_blkno);
2154 *num_clusters = le16_to_cpu(rec->e_leaf_clusters);
2155 if (e_cpos)
2156 *e_cpos = le32_to_cpu(rec->e_cpos);
2157out:
2158 brelse(eb_bh);
2159 return ret;
2160}
2161
2162typedef int (xattr_bucket_func)(struct inode *inode,
2163 struct ocfs2_xattr_bucket *bucket,
2164 void *para);
2165
2166static int ocfs2_find_xe_in_bucket(struct inode *inode,
2167 struct buffer_head *header_bh,
2168 int name_index,
2169 const char *name,
2170 u32 name_hash,
2171 u16 *xe_index,
2172 int *found)
2173{
2174 int i, ret = 0, cmp = 1, block_off, new_offset;
2175 struct ocfs2_xattr_header *xh =
2176 (struct ocfs2_xattr_header *)header_bh->b_data;
2177 size_t name_len = strlen(name);
2178 struct ocfs2_xattr_entry *xe = NULL;
2179 struct buffer_head *name_bh = NULL;
2180 char *xe_name;
2181
2182 /*
2183 * We don't use binary search in the bucket because there
2184 * may be multiple entries with the same name hash.
2185 */
2186 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
2187 xe = &xh->xh_entries[i];
2188
2189 if (name_hash > le32_to_cpu(xe->xe_name_hash))
2190 continue;
2191 else if (name_hash < le32_to_cpu(xe->xe_name_hash))
2192 break;
2193
2194 cmp = name_index - ocfs2_xattr_get_type(xe);
2195 if (!cmp)
2196 cmp = name_len - xe->xe_name_len;
2197 if (cmp)
2198 continue;
2199
2200 ret = ocfs2_xattr_bucket_get_name_value(inode,
2201 xh,
2202 i,
2203 &block_off,
2204 &new_offset);
2205 if (ret) {
2206 mlog_errno(ret);
2207 break;
2208 }
2209
2210 ret = ocfs2_read_block(inode, header_bh->b_blocknr + block_off,
2211 &name_bh);
2212 if (ret) {
2213 mlog_errno(ret);
2214 break;
2215 }
2216 xe_name = name_bh->b_data + new_offset;
2217
2218 cmp = memcmp(name, xe_name, name_len);
2219 brelse(name_bh);
2220 name_bh = NULL;
2221
2222 if (cmp == 0) {
2223 *xe_index = i;
2224 *found = 1;
2225 ret = 0;
2226 break;
2227 }
2228 }
2229
2230 return ret;
2231}
2232
2233/*
2234 * Find the specified xattr entry in a series of buckets.
2235 * This series start from p_blkno and last for num_clusters.
2236 * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
2237 * the num of the valid buckets.
2238 *
2239 * Return the buffer_head this xattr should reside in. And if the xattr's
2240 * hash is in the gap of 2 buckets, return the lower bucket.
2241 */
2242static int ocfs2_xattr_bucket_find(struct inode *inode,
2243 int name_index,
2244 const char *name,
2245 u32 name_hash,
2246 u64 p_blkno,
2247 u32 first_hash,
2248 u32 num_clusters,
2249 struct ocfs2_xattr_search *xs)
2250{
2251 int ret, found = 0;
2252 struct buffer_head *bh = NULL;
2253 struct buffer_head *lower_bh = NULL;
2254 struct ocfs2_xattr_header *xh = NULL;
2255 struct ocfs2_xattr_entry *xe = NULL;
2256 u16 index = 0;
2257 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2258 int low_bucket = 0, bucket, high_bucket;
2259 u32 last_hash;
2260 u64 blkno;
2261
2262 ret = ocfs2_read_block(inode, p_blkno, &bh);
2263 if (ret) {
2264 mlog_errno(ret);
2265 goto out;
2266 }
2267
2268 xh = (struct ocfs2_xattr_header *)bh->b_data;
2269 high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
2270
2271 while (low_bucket <= high_bucket) {
2272 brelse(bh);
2273 bh = NULL;
2274 bucket = (low_bucket + high_bucket) / 2;
2275
2276 blkno = p_blkno + bucket * blk_per_bucket;
2277
2278 ret = ocfs2_read_block(inode, blkno, &bh);
2279 if (ret) {
2280 mlog_errno(ret);
2281 goto out;
2282 }
2283
2284 xh = (struct ocfs2_xattr_header *)bh->b_data;
2285 xe = &xh->xh_entries[0];
2286 if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
2287 high_bucket = bucket - 1;
2288 continue;
2289 }
2290
2291 /*
2292 * Check whether the hash of the last entry in our
2293 * bucket is larger than the search one. for an empty
2294 * bucket, the last one is also the first one.
2295 */
2296 if (xh->xh_count)
2297 xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
2298
2299 last_hash = le32_to_cpu(xe->xe_name_hash);
2300
2301 /* record lower_bh which may be the insert place. */
2302 brelse(lower_bh);
2303 lower_bh = bh;
2304 bh = NULL;
2305
2306 if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
2307 low_bucket = bucket + 1;
2308 continue;
2309 }
2310
2311 /* the searched xattr should reside in this bucket if exists. */
2312 ret = ocfs2_find_xe_in_bucket(inode, lower_bh,
2313 name_index, name, name_hash,
2314 &index, &found);
2315 if (ret) {
2316 mlog_errno(ret);
2317 goto out;
2318 }
2319 break;
2320 }
2321
2322 /*
2323 * Record the bucket we have found.
2324 * When the xattr's hash value is in the gap of 2 buckets, we will
2325 * always set it to the previous bucket.
2326 */
2327 if (!lower_bh) {
2328 /*
2329 * We can't find any bucket whose first name_hash is less
2330 * than the find name_hash.
2331 */
2332 BUG_ON(bh->b_blocknr != p_blkno);
2333 lower_bh = bh;
2334 bh = NULL;
2335 }
2336 xs->bucket.bhs[0] = lower_bh;
2337 xs->bucket.xh = (struct ocfs2_xattr_header *)
2338 xs->bucket.bhs[0]->b_data;
2339 lower_bh = NULL;
2340
2341 xs->header = xs->bucket.xh;
2342 xs->base = xs->bucket.bhs[0]->b_data;
2343 xs->end = xs->base + inode->i_sb->s_blocksize;
2344
2345 if (found) {
2346 /*
2347 * If we have found the xattr enty, read all the blocks in
2348 * this bucket.
2349 */
2350 ret = ocfs2_read_blocks(inode, xs->bucket.bhs[0]->b_blocknr + 1,
2351 blk_per_bucket - 1, &xs->bucket.bhs[1],
2352 OCFS2_BH_CACHED);
2353 if (ret) {
2354 mlog_errno(ret);
2355 goto out;
2356 }
2357
2358 xs->here = &xs->header->xh_entries[index];
2359 mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
2360 (unsigned long long)xs->bucket.bhs[0]->b_blocknr, index);
2361 } else
2362 ret = -ENODATA;
2363
2364out:
2365 brelse(bh);
2366 brelse(lower_bh);
2367 return ret;
2368}
2369
2370static int ocfs2_xattr_index_block_find(struct inode *inode,
2371 struct buffer_head *root_bh,
2372 int name_index,
2373 const char *name,
2374 struct ocfs2_xattr_search *xs)
2375{
2376 int ret;
2377 struct ocfs2_xattr_block *xb =
2378 (struct ocfs2_xattr_block *)root_bh->b_data;
2379 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
2380 struct ocfs2_extent_list *el = &xb_root->xt_list;
2381 u64 p_blkno = 0;
2382 u32 first_hash, num_clusters = 0;
2383 u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
2384
2385 if (le16_to_cpu(el->l_next_free_rec) == 0)
2386 return -ENODATA;
2387
2388 mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n",
2389 name, name_hash, name_index);
2390
2391 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
2392 &num_clusters, el);
2393 if (ret) {
2394 mlog_errno(ret);
2395 goto out;
2396 }
2397
2398 BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
2399
2400 mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
2401 "in the rec is %u\n", num_clusters, p_blkno, first_hash);
2402
2403 ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
2404 p_blkno, first_hash, num_clusters, xs);
2405
2406out:
2407 return ret;
2408}
2409
2410static int ocfs2_iterate_xattr_buckets(struct inode *inode,
2411 u64 blkno,
2412 u32 clusters,
2413 xattr_bucket_func *func,
2414 void *para)
2415{
2416 int i, j, ret = 0;
2417 int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2418 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
2419 u32 num_buckets = clusters * bpc;
2420 struct ocfs2_xattr_bucket bucket;
2421
2422 memset(&bucket, 0, sizeof(bucket));
2423
2424 mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
2425 clusters, blkno);
2426
2427 for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) {
2428 ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket,
2429 bucket.bhs, OCFS2_BH_CACHED);
2430 if (ret) {
2431 mlog_errno(ret);
2432 goto out;
2433 }
2434
2435 bucket.xh = (struct ocfs2_xattr_header *)bucket.bhs[0]->b_data;
2436 /*
2437 * The real bucket num in this series of blocks is stored
2438 * in the 1st bucket.
2439 */
2440 if (i == 0)
2441 num_buckets = le16_to_cpu(bucket.xh->xh_num_buckets);
2442
2443 mlog(0, "iterating xattr bucket %llu, first hash %u\n", blkno,
2444 le32_to_cpu(bucket.xh->xh_entries[0].xe_name_hash));
2445 if (func) {
2446 ret = func(inode, &bucket, para);
2447 if (ret) {
2448 mlog_errno(ret);
2449 break;
2450 }
2451 }
2452
2453 for (j = 0; j < blk_per_bucket; j++)
2454 brelse(bucket.bhs[j]);
2455 memset(&bucket, 0, sizeof(bucket));
2456 }
2457
2458out:
2459 for (j = 0; j < blk_per_bucket; j++)
2460 brelse(bucket.bhs[j]);
2461
2462 return ret;
2463}
2464
2465struct ocfs2_xattr_tree_list {
2466 char *buffer;
2467 size_t buffer_size;
2468 size_t result;
2469};
2470
2471static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
2472 struct ocfs2_xattr_header *xh,
2473 int index,
2474 int *block_off,
2475 int *new_offset)
2476{
2477 u16 name_offset;
2478
2479 if (index < 0 || index >= le16_to_cpu(xh->xh_count))
2480 return -EINVAL;
2481
2482 name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
2483
2484 *block_off = name_offset >> inode->i_sb->s_blocksize_bits;
2485 *new_offset = name_offset % inode->i_sb->s_blocksize;
2486
2487 return 0;
2488}
2489
2490static int ocfs2_list_xattr_bucket(struct inode *inode,
2491 struct ocfs2_xattr_bucket *bucket,
2492 void *para)
2493{
2494 int ret = 0, type;
2495 struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
2496 int i, block_off, new_offset;
2497 const char *prefix, *name;
2498
2499 for (i = 0 ; i < le16_to_cpu(bucket->xh->xh_count); i++) {
2500 struct ocfs2_xattr_entry *entry = &bucket->xh->xh_entries[i];
2501 type = ocfs2_xattr_get_type(entry);
2502 prefix = ocfs2_xattr_prefix(type);
2503
2504 if (prefix) {
2505 ret = ocfs2_xattr_bucket_get_name_value(inode,
2506 bucket->xh,
2507 i,
2508 &block_off,
2509 &new_offset);
2510 if (ret)
2511 break;
2512
2513 name = (const char *)bucket->bhs[block_off]->b_data +
2514 new_offset;
2515 ret = ocfs2_xattr_list_entry(xl->buffer,
2516 xl->buffer_size,
2517 &xl->result,
2518 prefix, name,
2519 entry->xe_name_len);
2520 if (ret)
2521 break;
2522 }
2523 }
2524
2525 return ret;
2526}
2527
2528static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
2529 struct ocfs2_xattr_tree_root *xt,
2530 char *buffer,
2531 size_t buffer_size)
2532{
2533 struct ocfs2_extent_list *el = &xt->xt_list;
2534 int ret = 0;
2535 u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
2536 u64 p_blkno = 0;
2537 struct ocfs2_xattr_tree_list xl = {
2538 .buffer = buffer,
2539 .buffer_size = buffer_size,
2540 .result = 0,
2541 };
2542
2543 if (le16_to_cpu(el->l_next_free_rec) == 0)
2544 return 0;
2545
2546 while (name_hash > 0) {
2547 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
2548 &e_cpos, &num_clusters, el);
2549 if (ret) {
2550 mlog_errno(ret);
2551 goto out;
2552 }
2553
2554 ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
2555 ocfs2_list_xattr_bucket,
2556 &xl);
2557 if (ret) {
2558 mlog_errno(ret);
2559 goto out;
2560 }
2561
2562 if (e_cpos == 0)
2563 break;
2564
2565 name_hash = e_cpos - 1;
2566 }
2567
2568 ret = xl.result;
2569out:
2570 return ret;
2571}
2572
2573static int cmp_xe(const void *a, const void *b)
2574{
2575 const struct ocfs2_xattr_entry *l = a, *r = b;
2576 u32 l_hash = le32_to_cpu(l->xe_name_hash);
2577 u32 r_hash = le32_to_cpu(r->xe_name_hash);
2578
2579 if (l_hash > r_hash)
2580 return 1;
2581 if (l_hash < r_hash)
2582 return -1;
2583 return 0;
2584}
2585
2586static void swap_xe(void *a, void *b, int size)
2587{
2588 struct ocfs2_xattr_entry *l = a, *r = b, tmp;
2589
2590 tmp = *l;
2591 memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
2592 memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
2593}
2594
2595/*
2596 * When the ocfs2_xattr_block is filled up, new bucket will be created
2597 * and all the xattr entries will be moved to the new bucket.
2598 * Note: we need to sort the entries since they are not saved in order
2599 * in the ocfs2_xattr_block.
2600 */
2601static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
2602 struct buffer_head *xb_bh,
2603 struct buffer_head *xh_bh,
2604 struct buffer_head *data_bh)
2605{
2606 int i, blocksize = inode->i_sb->s_blocksize;
2607 u16 offset, size, off_change;
2608 struct ocfs2_xattr_entry *xe;
2609 struct ocfs2_xattr_block *xb =
2610 (struct ocfs2_xattr_block *)xb_bh->b_data;
2611 struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
2612 struct ocfs2_xattr_header *xh =
2613 (struct ocfs2_xattr_header *)xh_bh->b_data;
2614 u16 count = le16_to_cpu(xb_xh->xh_count);
2615 char *target = xh_bh->b_data, *src = xb_bh->b_data;
2616
2617 mlog(0, "cp xattr from block %llu to bucket %llu\n",
2618 (unsigned long long)xb_bh->b_blocknr,
2619 (unsigned long long)xh_bh->b_blocknr);
2620
2621 memset(xh_bh->b_data, 0, blocksize);
2622 if (data_bh)
2623 memset(data_bh->b_data, 0, blocksize);
2624 /*
2625 * Since the xe_name_offset is based on ocfs2_xattr_header,
2626 * there is a offset change corresponding to the change of
2627 * ocfs2_xattr_header's position.
2628 */
2629 off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
2630 xe = &xb_xh->xh_entries[count - 1];
2631 offset = le16_to_cpu(xe->xe_name_offset) + off_change;
2632 size = blocksize - offset;
2633
2634 /* copy all the names and values. */
2635 if (data_bh)
2636 target = data_bh->b_data;
2637 memcpy(target + offset, src + offset, size);
2638
2639 /* Init new header now. */
2640 xh->xh_count = xb_xh->xh_count;
2641 xh->xh_num_buckets = cpu_to_le16(1);
2642 xh->xh_name_value_len = cpu_to_le16(size);
2643 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
2644
2645 /* copy all the entries. */
2646 target = xh_bh->b_data;
2647 offset = offsetof(struct ocfs2_xattr_header, xh_entries);
2648 size = count * sizeof(struct ocfs2_xattr_entry);
2649 memcpy(target + offset, (char *)xb_xh + offset, size);
2650
2651 /* Change the xe offset for all the xe because of the move. */
2652 off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
2653 offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
2654 for (i = 0; i < count; i++)
2655 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
2656
2657 mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
2658 offset, size, off_change);
2659
2660 sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
2661 cmp_xe, swap_xe);
2662}
2663
2664/*
2665 * After we move xattr from block to index btree, we have to
2666 * update ocfs2_xattr_search to the new xe and base.
2667 *
2668 * When the entry is in xattr block, xattr_bh indicates the storage place.
2669 * While if the entry is in index b-tree, "bucket" indicates the
2670 * real place of the xattr.
2671 */
2672static int ocfs2_xattr_update_xattr_search(struct inode *inode,
2673 struct ocfs2_xattr_search *xs,
2674 struct buffer_head *old_bh,
2675 struct buffer_head *new_bh)
2676{
2677 int ret = 0;
2678 char *buf = old_bh->b_data;
2679 struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
2680 struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
2681 int i, blocksize = inode->i_sb->s_blocksize;
2682 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2683
2684 xs->bucket.bhs[0] = new_bh;
2685 get_bh(new_bh);
2686 xs->bucket.xh = (struct ocfs2_xattr_header *)xs->bucket.bhs[0]->b_data;
2687 xs->header = xs->bucket.xh;
2688
2689 xs->base = new_bh->b_data;
2690 xs->end = xs->base + inode->i_sb->s_blocksize;
2691
2692 if (!xs->not_found) {
2693 if (OCFS2_XATTR_BUCKET_SIZE != blocksize) {
2694 ret = ocfs2_read_blocks(inode,
2695 xs->bucket.bhs[0]->b_blocknr + 1,
2696 blk_per_bucket - 1, &xs->bucket.bhs[1],
2697 OCFS2_BH_CACHED);
2698 if (ret) {
2699 mlog_errno(ret);
2700 return ret;
2701 }
2702
2703 i = xs->here - old_xh->xh_entries;
2704 xs->here = &xs->header->xh_entries[i];
2705 }
2706 }
2707
2708 return ret;
2709}
2710
2711static int ocfs2_xattr_create_index_block(struct inode *inode,
2712 struct ocfs2_xattr_search *xs)
2713{
2714 int ret, credits = OCFS2_SUBALLOC_ALLOC;
2715 u32 bit_off, len;
2716 u64 blkno;
2717 handle_t *handle;
2718 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2719 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2720 struct ocfs2_alloc_context *data_ac;
2721 struct buffer_head *xh_bh = NULL, *data_bh = NULL;
2722 struct buffer_head *xb_bh = xs->xattr_bh;
2723 struct ocfs2_xattr_block *xb =
2724 (struct ocfs2_xattr_block *)xb_bh->b_data;
2725 struct ocfs2_xattr_tree_root *xr;
2726 u16 xb_flags = le16_to_cpu(xb->xb_flags);
2727 u16 bpb = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2728
2729 mlog(0, "create xattr index block for %llu\n",
2730 (unsigned long long)xb_bh->b_blocknr);
2731
2732 BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
2733
2734 ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
2735 if (ret) {
2736 mlog_errno(ret);
2737 goto out;
2738 }
2739
2740 /*
2741 * XXX:
2742 * We can use this lock for now, and maybe move to a dedicated mutex
2743 * if performance becomes a problem later.
2744 */
2745 down_write(&oi->ip_alloc_sem);
2746
2747 /*
2748 * 3 more credits, one for xattr block update, one for the 1st block
2749 * of the new xattr bucket and one for the value/data.
2750 */
2751 credits += 3;
2752 handle = ocfs2_start_trans(osb, credits);
2753 if (IS_ERR(handle)) {
2754 ret = PTR_ERR(handle);
2755 mlog_errno(ret);
2756 goto out_sem;
2757 }
2758
2759 ret = ocfs2_journal_access(handle, inode, xb_bh,
2760 OCFS2_JOURNAL_ACCESS_WRITE);
2761 if (ret) {
2762 mlog_errno(ret);
2763 goto out_commit;
2764 }
2765
2766 ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len);
2767 if (ret) {
2768 mlog_errno(ret);
2769 goto out_commit;
2770 }
2771
2772 /*
2773 * The bucket may spread in many blocks, and
2774 * we will only touch the 1st block and the last block
2775 * in the whole bucket(one for entry and one for data).
2776 */
2777 blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
2778
2779 mlog(0, "allocate 1 cluster from %llu to xattr block\n", blkno);
2780
2781 xh_bh = sb_getblk(inode->i_sb, blkno);
2782 if (!xh_bh) {
2783 ret = -EIO;
2784 mlog_errno(ret);
2785 goto out_commit;
2786 }
2787
2788 ocfs2_set_new_buffer_uptodate(inode, xh_bh);
2789
2790 ret = ocfs2_journal_access(handle, inode, xh_bh,
2791 OCFS2_JOURNAL_ACCESS_CREATE);
2792 if (ret) {
2793 mlog_errno(ret);
2794 goto out_commit;
2795 }
2796
2797 if (bpb > 1) {
2798 data_bh = sb_getblk(inode->i_sb, blkno + bpb - 1);
2799 if (!data_bh) {
2800 ret = -EIO;
2801 mlog_errno(ret);
2802 goto out_commit;
2803 }
2804
2805 ocfs2_set_new_buffer_uptodate(inode, data_bh);
2806
2807 ret = ocfs2_journal_access(handle, inode, data_bh,
2808 OCFS2_JOURNAL_ACCESS_CREATE);
2809 if (ret) {
2810 mlog_errno(ret);
2811 goto out_commit;
2812 }
2813 }
2814
2815 ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xh_bh, data_bh);
2816
2817 ocfs2_journal_dirty(handle, xh_bh);
2818 if (data_bh)
2819 ocfs2_journal_dirty(handle, data_bh);
2820
2821 ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh);
2822
2823 /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
2824 memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
2825 offsetof(struct ocfs2_xattr_block, xb_attrs));
2826
2827 xr = &xb->xb_attrs.xb_root;
2828 xr->xt_clusters = cpu_to_le32(1);
2829 xr->xt_last_eb_blk = 0;
2830 xr->xt_list.l_tree_depth = 0;
2831 xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
2832 xr->xt_list.l_next_free_rec = cpu_to_le16(1);
2833
2834 xr->xt_list.l_recs[0].e_cpos = 0;
2835 xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
2836 xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
2837
2838 xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
2839
2840 ret = ocfs2_journal_dirty(handle, xb_bh);
2841 if (ret) {
2842 mlog_errno(ret);
2843 goto out_commit;
2844 }
2845
2846out_commit:
2847 ocfs2_commit_trans(osb, handle);
2848
2849out_sem:
2850 up_write(&oi->ip_alloc_sem);
2851
2852out:
2853 if (data_ac)
2854 ocfs2_free_alloc_context(data_ac);
2855
2856 brelse(xh_bh);
2857 brelse(data_bh);
2858
2859 return ret;
2860}
2861
2862static int cmp_xe_offset(const void *a, const void *b)
2863{
2864 const struct ocfs2_xattr_entry *l = a, *r = b;
2865 u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
2866 u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
2867
2868 if (l_name_offset < r_name_offset)
2869 return 1;
2870 if (l_name_offset > r_name_offset)
2871 return -1;
2872 return 0;
2873}
2874
2875/*
2876 * defrag a xattr bucket if we find that the bucket has some
2877 * holes beteen name/value pairs.
2878 * We will move all the name/value pairs to the end of the bucket
2879 * so that we can spare some space for insertion.
2880 */
2881static int ocfs2_defrag_xattr_bucket(struct inode *inode,
2882 struct ocfs2_xattr_bucket *bucket)
2883{
2884 int ret, i;
2885 size_t end, offset, len, value_len;
2886 struct ocfs2_xattr_header *xh;
2887 char *entries, *buf, *bucket_buf = NULL;
2888 u64 blkno = bucket->bhs[0]->b_blocknr;
2889 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2890 u16 xh_free_start;
2891 size_t blocksize = inode->i_sb->s_blocksize;
2892 handle_t *handle;
2893 struct buffer_head **bhs;
2894 struct ocfs2_xattr_entry *xe;
2895
2896 bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
2897 GFP_NOFS);
2898 if (!bhs)
2899 return -ENOMEM;
2900
2901 ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, bhs,
2902 OCFS2_BH_CACHED);
2903 if (ret)
2904 goto out;
2905
2906 /*
2907 * In order to make the operation more efficient and generic,
2908 * we copy all the blocks into a contiguous memory and do the
2909 * defragment there, so if anything is error, we will not touch
2910 * the real block.
2911 */
2912 bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
2913 if (!bucket_buf) {
2914 ret = -EIO;
2915 goto out;
2916 }
2917
2918 buf = bucket_buf;
2919 for (i = 0; i < blk_per_bucket; i++, buf += blocksize)
2920 memcpy(buf, bhs[i]->b_data, blocksize);
2921
2922 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), blk_per_bucket);
2923 if (IS_ERR(handle)) {
2924 ret = PTR_ERR(handle);
2925 handle = NULL;
2926 mlog_errno(ret);
2927 goto out;
2928 }
2929
2930 for (i = 0; i < blk_per_bucket; i++) {
2931 ret = ocfs2_journal_access(handle, inode, bhs[i],
2932 OCFS2_JOURNAL_ACCESS_WRITE);
2933 if (ret < 0) {
2934 mlog_errno(ret);
2935 goto commit;
2936 }
2937 }
2938
2939 xh = (struct ocfs2_xattr_header *)bucket_buf;
2940 entries = (char *)xh->xh_entries;
2941 xh_free_start = le16_to_cpu(xh->xh_free_start);
2942
2943 mlog(0, "adjust xattr bucket in %llu, count = %u, "
2944 "xh_free_start = %u, xh_name_value_len = %u.\n",
2945 blkno, le16_to_cpu(xh->xh_count), xh_free_start,
2946 le16_to_cpu(xh->xh_name_value_len));
2947
2948 /*
2949 * sort all the entries by their offset.
2950 * the largest will be the first, so that we can
2951 * move them to the end one by one.
2952 */
2953 sort(entries, le16_to_cpu(xh->xh_count),
2954 sizeof(struct ocfs2_xattr_entry),
2955 cmp_xe_offset, swap_xe);
2956
2957 /* Move all name/values to the end of the bucket. */
2958 xe = xh->xh_entries;
2959 end = OCFS2_XATTR_BUCKET_SIZE;
2960 for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
2961 offset = le16_to_cpu(xe->xe_name_offset);
2962 if (ocfs2_xattr_is_local(xe))
2963 value_len = OCFS2_XATTR_SIZE(
2964 le64_to_cpu(xe->xe_value_size));
2965 else
2966 value_len = OCFS2_XATTR_ROOT_SIZE;
2967 len = OCFS2_XATTR_SIZE(xe->xe_name_len) + value_len;
2968
2969 /*
2970 * We must make sure that the name/value pair
2971 * exist in the same block. So adjust end to
2972 * the previous block end if needed.
2973 */
2974 if (((end - len) / blocksize !=
2975 (end - 1) / blocksize))
2976 end = end - end % blocksize;
2977
2978 if (end > offset + len) {
2979 memmove(bucket_buf + end - len,
2980 bucket_buf + offset, len);
2981 xe->xe_name_offset = cpu_to_le16(end - len);
2982 }
2983
2984 mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
2985 "bucket %llu\n", (unsigned long long)blkno);
2986
2987 end -= len;
2988 }
2989
2990 mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
2991 "bucket %llu\n", (unsigned long long)blkno);
2992
2993 if (xh_free_start == end)
2994 goto commit;
2995
2996 memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
2997 xh->xh_free_start = cpu_to_le16(end);
2998
2999 /* sort the entries by their name_hash. */
3000 sort(entries, le16_to_cpu(xh->xh_count),
3001 sizeof(struct ocfs2_xattr_entry),
3002 cmp_xe, swap_xe);
3003
3004 buf = bucket_buf;
3005 for (i = 0; i < blk_per_bucket; i++, buf += blocksize) {
3006 memcpy(bhs[i]->b_data, buf, blocksize);
3007 ocfs2_journal_dirty(handle, bhs[i]);
3008 }
3009
3010commit:
3011 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
3012out:
3013
3014 if (bhs) {
3015 for (i = 0; i < blk_per_bucket; i++)
3016 brelse(bhs[i]);
3017 }
3018 kfree(bhs);
3019
3020 kfree(bucket_buf);
3021 return ret;
3022}
3023
3024/*
3025 * Move half nums of the xattr bucket in the previous cluster to this new
3026 * cluster. We only touch the last cluster of the previous extend record.
3027 *
3028 * first_bh is the first buffer_head of a series of bucket in the same
3029 * extent rec and header_bh is the header of one bucket in this cluster.
3030 * They will be updated if we move the data header_bh contains to the new
3031 * cluster. first_hash will be set as the 1st xe's name_hash of the new cluster.
3032 */
3033static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
3034 handle_t *handle,
3035 struct buffer_head **first_bh,
3036 struct buffer_head **header_bh,
3037 u64 new_blkno,
3038 u64 prev_blkno,
3039 u32 num_clusters,
3040 u32 *first_hash)
3041{
3042 int i, ret, credits;
3043 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3044 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3045 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
3046 int blocksize = inode->i_sb->s_blocksize;
3047 struct buffer_head *old_bh, *new_bh, *prev_bh, *new_first_bh = NULL;
3048 struct ocfs2_xattr_header *new_xh;
3049 struct ocfs2_xattr_header *xh =
3050 (struct ocfs2_xattr_header *)((*first_bh)->b_data);
3051
3052 BUG_ON(le16_to_cpu(xh->xh_num_buckets) < num_buckets);
3053 BUG_ON(OCFS2_XATTR_BUCKET_SIZE == osb->s_clustersize);
3054
3055 prev_bh = *first_bh;
3056 get_bh(prev_bh);
3057 xh = (struct ocfs2_xattr_header *)prev_bh->b_data;
3058
3059 prev_blkno += (num_clusters - 1) * bpc + bpc / 2;
3060
3061 mlog(0, "move half of xattrs in cluster %llu to %llu\n",
3062 prev_blkno, new_blkno);
3063
3064 /*
3065 * We need to update the 1st half of the new cluster and
3066 * 1 more for the update of the 1st bucket of the previous
3067 * extent record.
3068 */
3069 credits = bpc / 2 + 1;
3070 ret = ocfs2_extend_trans(handle, credits);
3071 if (ret) {
3072 mlog_errno(ret);
3073 goto out;
3074 }
3075
3076 ret = ocfs2_journal_access(handle, inode, prev_bh,
3077 OCFS2_JOURNAL_ACCESS_WRITE);
3078 if (ret) {
3079 mlog_errno(ret);
3080 goto out;
3081 }
3082
3083 for (i = 0; i < bpc / 2; i++, prev_blkno++, new_blkno++) {
3084 old_bh = new_bh = NULL;
3085 new_bh = sb_getblk(inode->i_sb, new_blkno);
3086 if (!new_bh) {
3087 ret = -EIO;
3088 mlog_errno(ret);
3089 goto out;
3090 }
3091
3092 ocfs2_set_new_buffer_uptodate(inode, new_bh);
3093
3094 ret = ocfs2_journal_access(handle, inode, new_bh,
3095 OCFS2_JOURNAL_ACCESS_CREATE);
3096 if (ret < 0) {
3097 mlog_errno(ret);
3098 brelse(new_bh);
3099 goto out;
3100 }
3101
3102 ret = ocfs2_read_block(inode, prev_blkno, &old_bh);
3103 if (ret < 0) {
3104 mlog_errno(ret);
3105 brelse(new_bh);
3106 goto out;
3107 }
3108
3109 memcpy(new_bh->b_data, old_bh->b_data, blocksize);
3110
3111 if (i == 0) {
3112 new_xh = (struct ocfs2_xattr_header *)new_bh->b_data;
3113 new_xh->xh_num_buckets = cpu_to_le16(num_buckets / 2);
3114
3115 if (first_hash)
3116 *first_hash = le32_to_cpu(
3117 new_xh->xh_entries[0].xe_name_hash);
3118 new_first_bh = new_bh;
3119 get_bh(new_first_bh);
3120 }
3121
3122 ocfs2_journal_dirty(handle, new_bh);
3123
3124 if (*header_bh == old_bh) {
3125 brelse(*header_bh);
3126 *header_bh = new_bh;
3127 get_bh(*header_bh);
3128
3129 brelse(*first_bh);
3130 *first_bh = new_first_bh;
3131 get_bh(*first_bh);
3132 }
3133 brelse(new_bh);
3134 brelse(old_bh);
3135 }
3136
3137 le16_add_cpu(&xh->xh_num_buckets, -(num_buckets / 2));
3138
3139 ocfs2_journal_dirty(handle, prev_bh);
3140out:
3141 brelse(prev_bh);
3142 brelse(new_first_bh);
3143 return ret;
3144}
3145
3146static int ocfs2_read_xattr_bucket(struct inode *inode,
3147 u64 blkno,
3148 struct buffer_head **bhs,
3149 int new)
3150{
3151 int ret = 0;
3152 u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3153
3154 if (!new)
3155 return ocfs2_read_blocks(inode, blkno,
3156 blk_per_bucket, bhs,
3157 OCFS2_BH_CACHED);
3158
3159 for (i = 0; i < blk_per_bucket; i++) {
3160 bhs[i] = sb_getblk(inode->i_sb, blkno + i);
3161 if (bhs[i] == NULL) {
3162 ret = -EIO;
3163 mlog_errno(ret);
3164 break;
3165 }
3166 ocfs2_set_new_buffer_uptodate(inode, bhs[i]);
3167 }
3168
3169 return ret;
3170}
3171
3172/*
3173 * Move half num of the xattrs in old bucket(blk) to new bucket(new_blk).
3174 * first_hash will record the 1st hash of the new bucket.
3175 */
3176static int ocfs2_half_xattr_bucket(struct inode *inode,
3177 handle_t *handle,
3178 u64 blk,
3179 u64 new_blk,
3180 u32 *first_hash,
3181 int new_bucket_head)
3182{
3183 int ret, i;
3184 u16 count, start, len, name_value_len, xe_len, name_offset;
3185 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3186 struct buffer_head **s_bhs, **t_bhs = NULL;
3187 struct ocfs2_xattr_header *xh;
3188 struct ocfs2_xattr_entry *xe;
3189 int blocksize = inode->i_sb->s_blocksize;
3190
3191 mlog(0, "move half of xattrs from bucket %llu to %llu\n",
3192 blk, new_blk);
3193
3194 s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS);
3195 if (!s_bhs)
3196 return -ENOMEM;
3197
3198 ret = ocfs2_read_xattr_bucket(inode, blk, s_bhs, 0);
3199 if (ret) {
3200 mlog_errno(ret);
3201 goto out;
3202 }
3203
3204 ret = ocfs2_journal_access(handle, inode, s_bhs[0],
3205 OCFS2_JOURNAL_ACCESS_WRITE);
3206 if (ret) {
3207 mlog_errno(ret);
3208 goto out;
3209 }
3210
3211 t_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS);
3212 if (!t_bhs) {
3213 ret = -ENOMEM;
3214 goto out;
3215 }
3216
3217 ret = ocfs2_read_xattr_bucket(inode, new_blk, t_bhs, new_bucket_head);
3218 if (ret) {
3219 mlog_errno(ret);
3220 goto out;
3221 }
3222
3223 for (i = 0; i < blk_per_bucket; i++) {
3224 ret = ocfs2_journal_access(handle, inode, t_bhs[i],
3225 OCFS2_JOURNAL_ACCESS_CREATE);
3226 if (ret) {
3227 mlog_errno(ret);
3228 goto out;
3229 }
3230 }
3231
3232 /* copy the whole bucket to the new first. */
3233 for (i = 0; i < blk_per_bucket; i++)
3234 memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
3235
3236 /* update the new bucket. */
3237 xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data;
3238 count = le16_to_cpu(xh->xh_count);
3239 start = count / 2;
3240
3241 /*
3242 * Calculate the total name/value len and xh_free_start for
3243 * the old bucket first.
3244 */
3245 name_offset = OCFS2_XATTR_BUCKET_SIZE;
3246 name_value_len = 0;
3247 for (i = 0; i < start; i++) {
3248 xe = &xh->xh_entries[i];
3249 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3250 if (ocfs2_xattr_is_local(xe))
3251 xe_len +=
3252 OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3253 else
3254 xe_len += OCFS2_XATTR_ROOT_SIZE;
3255 name_value_len += xe_len;
3256 if (le16_to_cpu(xe->xe_name_offset) < name_offset)
3257 name_offset = le16_to_cpu(xe->xe_name_offset);
3258 }
3259
3260 /*
3261 * Now begin the modification to the new bucket.
3262 *
3263 * In the new bucket, We just move the xattr entry to the beginning
3264 * and don't touch the name/value. So there will be some holes in the
3265 * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
3266 * called.
3267 */
3268 xe = &xh->xh_entries[start];
3269 len = sizeof(struct ocfs2_xattr_entry) * (count - start);
3270 mlog(0, "mv xattr entry len %d from %d to %d\n", len,
3271 (int)((char *)xe - (char *)xh),
3272 (int)((char *)xh->xh_entries - (char *)xh));
3273 memmove((char *)xh->xh_entries, (char *)xe, len);
3274 xe = &xh->xh_entries[count - start];
3275 len = sizeof(struct ocfs2_xattr_entry) * start;
3276 memset((char *)xe, 0, len);
3277
3278 le16_add_cpu(&xh->xh_count, -start);
3279 le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
3280
3281 /* Calculate xh_free_start for the new bucket. */
3282 xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
3283 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3284 xe = &xh->xh_entries[i];
3285 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3286 if (ocfs2_xattr_is_local(xe))
3287 xe_len +=
3288 OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3289 else
3290 xe_len += OCFS2_XATTR_ROOT_SIZE;
3291 if (le16_to_cpu(xe->xe_name_offset) <
3292 le16_to_cpu(xh->xh_free_start))
3293 xh->xh_free_start = xe->xe_name_offset;
3294 }
3295
3296 /* set xh->xh_num_buckets for the new xh. */
3297 if (new_bucket_head)
3298 xh->xh_num_buckets = cpu_to_le16(1);
3299 else
3300 xh->xh_num_buckets = 0;
3301
3302 for (i = 0; i < blk_per_bucket; i++) {
3303 ocfs2_journal_dirty(handle, t_bhs[i]);
3304 if (ret)
3305 mlog_errno(ret);
3306 }
3307
3308 /* store the first_hash of the new bucket. */
3309 if (first_hash)
3310 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3311
3312 /*
3313 * Now only update the 1st block of the old bucket.
3314 * Please note that the entry has been sorted already above.
3315 */
3316 xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data;
3317 memset(&xh->xh_entries[start], 0,
3318 sizeof(struct ocfs2_xattr_entry) * (count - start));
3319 xh->xh_count = cpu_to_le16(start);
3320 xh->xh_free_start = cpu_to_le16(name_offset);
3321 xh->xh_name_value_len = cpu_to_le16(name_value_len);
3322
3323 ocfs2_journal_dirty(handle, s_bhs[0]);
3324 if (ret)
3325 mlog_errno(ret);
3326
3327out:
3328 if (s_bhs) {
3329 for (i = 0; i < blk_per_bucket; i++)
3330 brelse(s_bhs[i]);
3331 }
3332 kfree(s_bhs);
3333
3334 if (t_bhs) {
3335 for (i = 0; i < blk_per_bucket; i++)
3336 brelse(t_bhs[i]);
3337 }
3338 kfree(t_bhs);
3339
3340 return ret;
3341}
3342
3343/*
3344 * Copy xattr from one bucket to another bucket.
3345 *
3346 * The caller must make sure that the journal transaction
3347 * has enough space for journaling.
3348 */
3349static int ocfs2_cp_xattr_bucket(struct inode *inode,
3350 handle_t *handle,
3351 u64 s_blkno,
3352 u64 t_blkno,
3353 int t_is_new)
3354{
3355 int ret, i;
3356 int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3357 int blocksize = inode->i_sb->s_blocksize;
3358 struct buffer_head **s_bhs, **t_bhs = NULL;
3359
3360 BUG_ON(s_blkno == t_blkno);
3361
3362 mlog(0, "cp bucket %llu to %llu, target is %d\n",
3363 s_blkno, t_blkno, t_is_new);
3364
3365 s_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
3366 GFP_NOFS);
3367 if (!s_bhs)
3368 return -ENOMEM;
3369
3370 ret = ocfs2_read_xattr_bucket(inode, s_blkno, s_bhs, 0);
3371 if (ret)
3372 goto out;
3373
3374 t_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket,
3375 GFP_NOFS);
3376 if (!t_bhs) {
3377 ret = -ENOMEM;
3378 goto out;
3379 }
3380
3381 ret = ocfs2_read_xattr_bucket(inode, t_blkno, t_bhs, t_is_new);
3382 if (ret)
3383 goto out;
3384
3385 for (i = 0; i < blk_per_bucket; i++) {
3386 ret = ocfs2_journal_access(handle, inode, t_bhs[i],
3387 OCFS2_JOURNAL_ACCESS_WRITE);
3388 if (ret)
3389 goto out;
3390 }
3391
3392 for (i = 0; i < blk_per_bucket; i++) {
3393 memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize);
3394 ocfs2_journal_dirty(handle, t_bhs[i]);
3395 }
3396
3397out:
3398 if (s_bhs) {
3399 for (i = 0; i < blk_per_bucket; i++)
3400 brelse(s_bhs[i]);
3401 }
3402 kfree(s_bhs);
3403
3404 if (t_bhs) {
3405 for (i = 0; i < blk_per_bucket; i++)
3406 brelse(t_bhs[i]);
3407 }
3408 kfree(t_bhs);
3409
3410 return ret;
3411}
3412
3413/*
3414 * Copy one xattr cluster from src_blk to to_blk.
3415 * The to_blk will become the first bucket header of the cluster, so its
3416 * xh_num_buckets will be initialized as the bucket num in the cluster.
3417 */
3418static int ocfs2_cp_xattr_cluster(struct inode *inode,
3419 handle_t *handle,
3420 struct buffer_head *first_bh,
3421 u64 src_blk,
3422 u64 to_blk,
3423 u32 *first_hash)
3424{
3425 int i, ret, credits;
3426 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3427 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3428 int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
3429 struct buffer_head *bh = NULL;
3430 struct ocfs2_xattr_header *xh;
3431 u64 to_blk_start = to_blk;
3432
3433 mlog(0, "cp xattrs from cluster %llu to %llu\n", src_blk, to_blk);
3434
3435 /*
3436 * We need to update the new cluster and 1 more for the update of
3437 * the 1st bucket of the previous extent rec.
3438 */
3439 credits = bpc + 1;
3440 ret = ocfs2_extend_trans(handle, credits);
3441 if (ret) {
3442 mlog_errno(ret);
3443 goto out;
3444 }
3445
3446 ret = ocfs2_journal_access(handle, inode, first_bh,
3447 OCFS2_JOURNAL_ACCESS_WRITE);
3448 if (ret) {
3449 mlog_errno(ret);
3450 goto out;
3451 }
3452
3453 for (i = 0; i < num_buckets; i++) {
3454 ret = ocfs2_cp_xattr_bucket(inode, handle,
3455 src_blk, to_blk, 1);
3456 if (ret) {
3457 mlog_errno(ret);
3458 goto out;
3459 }
3460
3461 src_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3462 to_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3463 }
3464
3465 /* update the old bucket header. */
3466 xh = (struct ocfs2_xattr_header *)first_bh->b_data;
3467 le16_add_cpu(&xh->xh_num_buckets, -num_buckets);
3468
3469 ocfs2_journal_dirty(handle, first_bh);
3470
3471 /* update the new bucket header. */
3472 ret = ocfs2_read_block(inode, to_blk_start, &bh);
3473 if (ret < 0) {
3474 mlog_errno(ret);
3475 goto out;
3476 }
3477
3478 ret = ocfs2_journal_access(handle, inode, bh,
3479 OCFS2_JOURNAL_ACCESS_WRITE);
3480 if (ret) {
3481 mlog_errno(ret);
3482 goto out;
3483 }
3484
3485 xh = (struct ocfs2_xattr_header *)bh->b_data;
3486 xh->xh_num_buckets = cpu_to_le16(num_buckets);
3487
3488 ocfs2_journal_dirty(handle, bh);
3489
3490 if (first_hash)
3491 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3492out:
3493 brelse(bh);
3494 return ret;
3495}
3496
3497/*
3498 * Move half of the xattrs in this cluster to the new cluster.
3499 * This function should only be called when bucket size == cluster size.
3500 * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
3501 */
3502static int ocfs2_half_xattr_cluster(struct inode *inode,
3503 handle_t *handle,
3504 u64 prev_blk,
3505 u64 new_blk,
3506 u32 *first_hash)
3507{
3508 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3509 int ret, credits = 2 * blk_per_bucket;
3510
3511 BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
3512
3513 ret = ocfs2_extend_trans(handle, credits);
3514 if (ret) {
3515 mlog_errno(ret);
3516 return ret;
3517 }
3518
3519 /* Move half of the xattr in start_blk to the next bucket. */
3520 return ocfs2_half_xattr_bucket(inode, handle, prev_blk,
3521 new_blk, first_hash, 1);
3522}
3523
3524/*
3525 * Move some xattrs from the old cluster to the new one since they are not
3526 * contiguous in ocfs2 xattr tree.
3527 *
3528 * new_blk starts a new separate cluster, and we will move some xattrs from
3529 * prev_blk to it. v_start will be set as the first name hash value in this
3530 * new cluster so that it can be used as e_cpos during tree insertion and
3531 * don't collide with our original b-tree operations. first_bh and header_bh
3532 * will also be updated since they will be used in ocfs2_extend_xattr_bucket
3533 * to extend the insert bucket.
3534 *
3535 * The problem is how much xattr should we move to the new one and when should
3536 * we update first_bh and header_bh?
3537 * 1. If cluster size > bucket size, that means the previous cluster has more
3538 * than 1 bucket, so just move half nums of bucket into the new cluster and
3539 * update the first_bh and header_bh if the insert bucket has been moved
3540 * to the new cluster.
3541 * 2. If cluster_size == bucket_size:
3542 * a) If the previous extent rec has more than one cluster and the insert
3543 * place isn't in the last cluster, copy the entire last cluster to the
3544 * new one. This time, we don't need to upate the first_bh and header_bh
3545 * since they will not be moved into the new cluster.
3546 * b) Otherwise, move the bottom half of the xattrs in the last cluster into
3547 * the new one. And we set the extend flag to zero if the insert place is
3548 * moved into the new allocated cluster since no extend is needed.
3549 */
3550static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
3551 handle_t *handle,
3552 struct buffer_head **first_bh,
3553 struct buffer_head **header_bh,
3554 u64 new_blk,
3555 u64 prev_blk,
3556 u32 prev_clusters,
3557 u32 *v_start,
3558 int *extend)
3559{
3560 int ret = 0;
3561 int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3562
3563 mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
3564 prev_blk, prev_clusters, new_blk);
3565
3566 if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1)
3567 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
3568 handle,
3569 first_bh,
3570 header_bh,
3571 new_blk,
3572 prev_blk,
3573 prev_clusters,
3574 v_start);
3575 else {
3576 u64 last_blk = prev_blk + bpc * (prev_clusters - 1);
3577
3578 if (prev_clusters > 1 && (*header_bh)->b_blocknr != last_blk)
3579 ret = ocfs2_cp_xattr_cluster(inode, handle, *first_bh,
3580 last_blk, new_blk,
3581 v_start);
3582 else {
3583 ret = ocfs2_half_xattr_cluster(inode, handle,
3584 last_blk, new_blk,
3585 v_start);
3586
3587 if ((*header_bh)->b_blocknr == last_blk && extend)
3588 *extend = 0;
3589 }
3590 }
3591
3592 return ret;
3593}
3594
3595/*
3596 * Add a new cluster for xattr storage.
3597 *
3598 * If the new cluster is contiguous with the previous one, it will be
3599 * appended to the same extent record, and num_clusters will be updated.
3600 * If not, we will insert a new extent for it and move some xattrs in
3601 * the last cluster into the new allocated one.
3602 * We also need to limit the maximum size of a btree leaf, otherwise we'll
3603 * lose the benefits of hashing because we'll have to search large leaves.
3604 * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
3605 * if it's bigger).
3606 *
3607 * first_bh is the first block of the previous extent rec and header_bh
3608 * indicates the bucket we will insert the new xattrs. They will be updated
3609 * when the header_bh is moved into the new cluster.
3610 */
3611static int ocfs2_add_new_xattr_cluster(struct inode *inode,
3612 struct buffer_head *root_bh,
3613 struct buffer_head **first_bh,
3614 struct buffer_head **header_bh,
3615 u32 *num_clusters,
3616 u32 prev_cpos,
3617 u64 prev_blkno,
3618 int *extend)
3619{
3620 int ret, credits;
3621 u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
3622 u32 prev_clusters = *num_clusters;
3623 u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
3624 u64 block;
3625 handle_t *handle = NULL;
3626 struct ocfs2_alloc_context *data_ac = NULL;
3627 struct ocfs2_alloc_context *meta_ac = NULL;
3628 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3629 struct ocfs2_extent_tree et;
3630
3631 mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
3632 "previous xattr blkno = %llu\n",
3633 (unsigned long long)OCFS2_I(inode)->ip_blkno,
3634 prev_cpos, prev_blkno);
3635
3636 ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
3637
3638 ret = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
3639 &data_ac, &meta_ac);
3640 if (ret) {
3641 mlog_errno(ret);
3642 goto leave;
3643 }
3644
3645 credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el,
3646 clusters_to_add);
3647 handle = ocfs2_start_trans(osb, credits);
3648 if (IS_ERR(handle)) {
3649 ret = PTR_ERR(handle);
3650 handle = NULL;
3651 mlog_errno(ret);
3652 goto leave;
3653 }
3654
3655 ret = ocfs2_journal_access(handle, inode, root_bh,
3656 OCFS2_JOURNAL_ACCESS_WRITE);
3657 if (ret < 0) {
3658 mlog_errno(ret);
3659 goto leave;
3660 }
3661
3662 ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1,
3663 clusters_to_add, &bit_off, &num_bits);
3664 if (ret < 0) {
3665 if (ret != -ENOSPC)
3666 mlog_errno(ret);
3667 goto leave;
3668 }
3669
3670 BUG_ON(num_bits > clusters_to_add);
3671
3672 block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
3673 mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
3674 num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
3675
3676 if (prev_blkno + prev_clusters * bpc == block &&
3677 (prev_clusters + num_bits) << osb->s_clustersize_bits <=
3678 OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
3679 /*
3680 * If this cluster is contiguous with the old one and
3681 * adding this new cluster, we don't surpass the limit of
3682 * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
3683 * initialized and used like other buckets in the previous
3684 * cluster.
3685 * So add it as a contiguous one. The caller will handle
3686 * its init process.
3687 */
3688 v_start = prev_cpos + prev_clusters;
3689 *num_clusters = prev_clusters + num_bits;
3690 mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
3691 num_bits);
3692 } else {
3693 ret = ocfs2_adjust_xattr_cross_cluster(inode,
3694 handle,
3695 first_bh,
3696 header_bh,
3697 block,
3698 prev_blkno,
3699 prev_clusters,
3700 &v_start,
3701 extend);
3702 if (ret) {
3703 mlog_errno(ret);
3704 goto leave;
3705 }
3706 }
3707
3708 if (handle->h_buffer_credits < credits) {
3709 /*
3710 * The journal has been restarted before, and don't
3711 * have enough space for the insertion, so extend it
3712 * here.
3713 */
3714 ret = ocfs2_extend_trans(handle, credits);
3715 if (ret) {
3716 mlog_errno(ret);
3717 goto leave;
3718 }
3719 }
3720 mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
3721 num_bits, block, v_start);
3722 ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block,
3723 num_bits, 0, meta_ac);
3724 if (ret < 0) {
3725 mlog_errno(ret);
3726 goto leave;
3727 }
3728
3729 ret = ocfs2_journal_dirty(handle, root_bh);
3730 if (ret < 0) {
3731 mlog_errno(ret);
3732 goto leave;
3733 }
3734
3735leave:
3736 if (handle)
3737 ocfs2_commit_trans(osb, handle);
3738 if (data_ac)
3739 ocfs2_free_alloc_context(data_ac);
3740 if (meta_ac)
3741 ocfs2_free_alloc_context(meta_ac);
3742
3743 return ret;
3744}
3745
3746/*
3747 * Extend a new xattr bucket and move xattrs to the end one by one until
3748 * We meet with start_bh. Only move half of the xattrs to the bucket after it.
3749 */
3750static int ocfs2_extend_xattr_bucket(struct inode *inode,
3751 struct buffer_head *first_bh,
3752 struct buffer_head *start_bh,
3753 u32 num_clusters)
3754{
3755 int ret, credits;
3756 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3757 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3758 u64 start_blk = start_bh->b_blocknr, end_blk;
3759 u32 num_buckets = num_clusters * ocfs2_xattr_buckets_per_cluster(osb);
3760 handle_t *handle;
3761 struct ocfs2_xattr_header *first_xh =
3762 (struct ocfs2_xattr_header *)first_bh->b_data;
3763 u16 bucket = le16_to_cpu(first_xh->xh_num_buckets);
3764
3765 mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
3766 "from %llu, len = %u\n", start_blk,
3767 (unsigned long long)first_bh->b_blocknr, num_clusters);
3768
3769 BUG_ON(bucket >= num_buckets);
3770
3771 end_blk = first_bh->b_blocknr + (bucket - 1) * blk_per_bucket;
3772
3773 /*
3774 * We will touch all the buckets after the start_bh(include it).
3775 * Add one more bucket and modify the first_bh.
3776 */
3777 credits = end_blk - start_blk + 2 * blk_per_bucket + 1;
3778 handle = ocfs2_start_trans(osb, credits);
3779 if (IS_ERR(handle)) {
3780 ret = PTR_ERR(handle);
3781 handle = NULL;
3782 mlog_errno(ret);
3783 goto out;
3784 }
3785
3786 ret = ocfs2_journal_access(handle, inode, first_bh,
3787 OCFS2_JOURNAL_ACCESS_WRITE);
3788 if (ret) {
3789 mlog_errno(ret);
3790 goto commit;
3791 }
3792
3793 while (end_blk != start_blk) {
3794 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
3795 end_blk + blk_per_bucket, 0);
3796 if (ret)
3797 goto commit;
3798 end_blk -= blk_per_bucket;
3799 }
3800
3801 /* Move half of the xattr in start_blk to the next bucket. */
3802 ret = ocfs2_half_xattr_bucket(inode, handle, start_blk,
3803 start_blk + blk_per_bucket, NULL, 0);
3804
3805 le16_add_cpu(&first_xh->xh_num_buckets, 1);
3806 ocfs2_journal_dirty(handle, first_bh);
3807
3808commit:
3809 ocfs2_commit_trans(osb, handle);
3810out:
3811 return ret;
3812}
3813
3814/*
3815 * Add new xattr bucket in an extent record and adjust the buckets accordingly.
3816 * xb_bh is the ocfs2_xattr_block.
3817 * We will move all the buckets starting from header_bh to the next place. As
3818 * for this one, half num of its xattrs will be moved to the next one.
3819 *
3820 * We will allocate a new cluster if current cluster is full and adjust
3821 * header_bh and first_bh if the insert place is moved to the new cluster.
3822 */
3823static int ocfs2_add_new_xattr_bucket(struct inode *inode,
3824 struct buffer_head *xb_bh,
3825 struct buffer_head *header_bh)
3826{
3827 struct ocfs2_xattr_header *first_xh = NULL;
3828 struct buffer_head *first_bh = NULL;
3829 struct ocfs2_xattr_block *xb =
3830 (struct ocfs2_xattr_block *)xb_bh->b_data;
3831 struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3832 struct ocfs2_extent_list *el = &xb_root->xt_list;
3833 struct ocfs2_xattr_header *xh =
3834 (struct ocfs2_xattr_header *)header_bh->b_data;
3835 u32 name_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
3836 struct super_block *sb = inode->i_sb;
3837 struct ocfs2_super *osb = OCFS2_SB(sb);
3838 int ret, num_buckets, extend = 1;
3839 u64 p_blkno;
3840 u32 e_cpos, num_clusters;
3841
3842 mlog(0, "Add new xattr bucket starting form %llu\n",
3843 (unsigned long long)header_bh->b_blocknr);
3844
3845 /*
3846 * Add refrence for header_bh here because it may be
3847 * changed in ocfs2_add_new_xattr_cluster and we need
3848 * to free it in the end.
3849 */
3850 get_bh(header_bh);
3851
3852 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
3853 &num_clusters, el);
3854 if (ret) {
3855 mlog_errno(ret);
3856 goto out;
3857 }
3858
3859 ret = ocfs2_read_block(inode, p_blkno, &first_bh);
3860 if (ret) {
3861 mlog_errno(ret);
3862 goto out;
3863 }
3864
3865 num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
3866 first_xh = (struct ocfs2_xattr_header *)first_bh->b_data;
3867
3868 if (num_buckets == le16_to_cpu(first_xh->xh_num_buckets)) {
3869 ret = ocfs2_add_new_xattr_cluster(inode,
3870 xb_bh,
3871 &first_bh,
3872 &header_bh,
3873 &num_clusters,
3874 e_cpos,
3875 p_blkno,
3876 &extend);
3877 if (ret) {
3878 mlog_errno(ret);
3879 goto out;
3880 }
3881 }
3882
3883 if (extend)
3884 ret = ocfs2_extend_xattr_bucket(inode,
3885 first_bh,
3886 header_bh,
3887 num_clusters);
3888 if (ret)
3889 mlog_errno(ret);
3890out:
3891 brelse(first_bh);
3892 brelse(header_bh);
3893 return ret;
3894}
3895
3896static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
3897 struct ocfs2_xattr_bucket *bucket,
3898 int offs)
3899{
3900 int block_off = offs >> inode->i_sb->s_blocksize_bits;
3901
3902 offs = offs % inode->i_sb->s_blocksize;
3903 return bucket->bhs[block_off]->b_data + offs;
3904}
3905
3906/*
3907 * Handle the normal xattr set, including replace, delete and new.
3908 *
3909 * Note: "local" indicates the real data's locality. So we can't
3910 * just its bucket locality by its length.
3911 */
3912static void ocfs2_xattr_set_entry_normal(struct inode *inode,
3913 struct ocfs2_xattr_info *xi,
3914 struct ocfs2_xattr_search *xs,
3915 u32 name_hash,
3916 int local)
3917{
3918 struct ocfs2_xattr_entry *last, *xe;
3919 int name_len = strlen(xi->name);
3920 struct ocfs2_xattr_header *xh = xs->header;
3921 u16 count = le16_to_cpu(xh->xh_count), start;
3922 size_t blocksize = inode->i_sb->s_blocksize;
3923 char *val;
3924 size_t offs, size, new_size;
3925
3926 last = &xh->xh_entries[count];
3927 if (!xs->not_found) {
3928 xe = xs->here;
3929 offs = le16_to_cpu(xe->xe_name_offset);
3930 if (ocfs2_xattr_is_local(xe))
3931 size = OCFS2_XATTR_SIZE(name_len) +
3932 OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
3933 else
3934 size = OCFS2_XATTR_SIZE(name_len) +
3935 OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
3936
3937 /*
3938 * If the new value will be stored outside, xi->value has been
3939 * initalized as an empty ocfs2_xattr_value_root, and the same
3940 * goes with xi->value_len, so we can set new_size safely here.
3941 * See ocfs2_xattr_set_in_bucket.
3942 */
3943 new_size = OCFS2_XATTR_SIZE(name_len) +
3944 OCFS2_XATTR_SIZE(xi->value_len);
3945
3946 le16_add_cpu(&xh->xh_name_value_len, -size);
3947 if (xi->value) {
3948 if (new_size > size)
3949 goto set_new_name_value;
3950
3951 /* Now replace the old value with new one. */
3952 if (local)
3953 xe->xe_value_size = cpu_to_le64(xi->value_len);
3954 else
3955 xe->xe_value_size = 0;
3956
3957 val = ocfs2_xattr_bucket_get_val(inode,
3958 &xs->bucket, offs);
3959 memset(val + OCFS2_XATTR_SIZE(name_len), 0,
3960 size - OCFS2_XATTR_SIZE(name_len));
3961 if (OCFS2_XATTR_SIZE(xi->value_len) > 0)
3962 memcpy(val + OCFS2_XATTR_SIZE(name_len),
3963 xi->value, xi->value_len);
3964
3965 le16_add_cpu(&xh->xh_name_value_len, new_size);
3966 ocfs2_xattr_set_local(xe, local);
3967 return;
3968 } else {
3969 /*
3970 * Remove the old entry if there is more than one.
3971 * We don't remove the last entry so that we can
3972 * use it to indicate the hash value of the empty
3973 * bucket.
3974 */
3975 last -= 1;
3976 le16_add_cpu(&xh->xh_count, -1);
3977 if (xh->xh_count) {
3978 memmove(xe, xe + 1,
3979 (void *)last - (void *)xe);
3980 memset(last, 0,
3981 sizeof(struct ocfs2_xattr_entry));
3982 } else
3983 xh->xh_free_start =
3984 cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
3985
3986 return;
3987 }
3988 } else {
3989 /* find a new entry for insert. */
3990 int low = 0, high = count - 1, tmp;
3991 struct ocfs2_xattr_entry *tmp_xe;
3992
3993 while (low <= high && count) {
3994 tmp = (low + high) / 2;
3995 tmp_xe = &xh->xh_entries[tmp];
3996
3997 if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
3998 low = tmp + 1;
3999 else if (name_hash <
4000 le32_to_cpu(tmp_xe->xe_name_hash))
4001 high = tmp - 1;
4002 else {
4003 low = tmp;
4004 break;
4005 }
4006 }
4007
4008 xe = &xh->xh_entries[low];
4009 if (low != count)
4010 memmove(xe + 1, xe, (void *)last - (void *)xe);
4011
4012 le16_add_cpu(&xh->xh_count, 1);
4013 memset(xe, 0, sizeof(struct ocfs2_xattr_entry));
4014 xe->xe_name_hash = cpu_to_le32(name_hash);
4015 xe->xe_name_len = name_len;
4016 ocfs2_xattr_set_type(xe, xi->name_index);
4017 }
4018
4019set_new_name_value:
4020 /* Insert the new name+value. */
4021 size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(xi->value_len);
4022
4023 /*
4024 * We must make sure that the name/value pair
4025 * exists in the same block.
4026 */
4027 offs = le16_to_cpu(xh->xh_free_start);
4028 start = offs - size;
4029
4030 if (start >> inode->i_sb->s_blocksize_bits !=
4031 (offs - 1) >> inode->i_sb->s_blocksize_bits) {
4032 offs = offs - offs % blocksize;
4033 xh->xh_free_start = cpu_to_le16(offs);
4034 }
4035
4036 val = ocfs2_xattr_bucket_get_val(inode,
4037 &xs->bucket, offs - size);
4038 xe->xe_name_offset = cpu_to_le16(offs - size);
4039
4040 memset(val, 0, size);
4041 memcpy(val, xi->name, name_len);
4042 memcpy(val + OCFS2_XATTR_SIZE(name_len), xi->value, xi->value_len);
4043
4044 xe->xe_value_size = cpu_to_le64(xi->value_len);
4045 ocfs2_xattr_set_local(xe, local);
4046 xs->here = xe;
4047 le16_add_cpu(&xh->xh_free_start, -size);
4048 le16_add_cpu(&xh->xh_name_value_len, size);
4049
4050 return;
4051}
4052
4053static int ocfs2_xattr_bucket_handle_journal(struct inode *inode,
4054 handle_t *handle,
4055 struct ocfs2_xattr_search *xs,
4056 struct buffer_head **bhs,
4057 u16 bh_num)
4058{
4059 int ret = 0, off, block_off;
4060 struct ocfs2_xattr_entry *xe = xs->here;
4061
4062 /*
4063 * First calculate all the blocks we should journal_access
4064 * and journal_dirty. The first block should always be touched.
4065 */
4066 ret = ocfs2_journal_dirty(handle, bhs[0]);
4067 if (ret)
4068 mlog_errno(ret);
4069
4070 /* calc the data. */
4071 off = le16_to_cpu(xe->xe_name_offset);
4072 block_off = off >> inode->i_sb->s_blocksize_bits;
4073 ret = ocfs2_journal_dirty(handle, bhs[block_off]);
4074 if (ret)
4075 mlog_errno(ret);
4076
4077 return ret;
4078}
4079
4080/*
4081 * Set the xattr entry in the specified bucket.
4082 * The bucket is indicated by xs->bucket and it should have the enough
4083 * space for the xattr insertion.
4084 */
4085static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
4086 struct ocfs2_xattr_info *xi,
4087 struct ocfs2_xattr_search *xs,
4088 u32 name_hash,
4089 int local)
4090{
4091 int i, ret;
4092 handle_t *handle = NULL;
4093 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4094 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4095
4096 mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
4097 (unsigned long)xi->value_len, xi->name_index,
4098 (unsigned long long)xs->bucket.bhs[0]->b_blocknr);
4099
4100 if (!xs->bucket.bhs[1]) {
4101 ret = ocfs2_read_blocks(inode,
4102 xs->bucket.bhs[0]->b_blocknr + 1,
4103 blk_per_bucket - 1, &xs->bucket.bhs[1],
4104 OCFS2_BH_CACHED);
4105 if (ret) {
4106 mlog_errno(ret);
4107 goto out;
4108 }
4109 }
4110
4111 handle = ocfs2_start_trans(osb, blk_per_bucket);
4112 if (IS_ERR(handle)) {
4113 ret = PTR_ERR(handle);
4114 handle = NULL;
4115 mlog_errno(ret);
4116 goto out;
4117 }
4118
4119 for (i = 0; i < blk_per_bucket; i++) {
4120 ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[i],
4121 OCFS2_JOURNAL_ACCESS_WRITE);
4122 if (ret < 0) {
4123 mlog_errno(ret);
4124 goto out;
4125 }
4126 }
4127
4128 ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local);
4129
4130 /*Only dirty the blocks we have touched in set xattr. */
4131 ret = ocfs2_xattr_bucket_handle_journal(inode, handle, xs,
4132 xs->bucket.bhs, blk_per_bucket);
4133 if (ret)
4134 mlog_errno(ret);
4135out:
4136 ocfs2_commit_trans(osb, handle);
4137
4138 return ret;
4139}
4140
4141static int ocfs2_xattr_value_update_size(struct inode *inode,
4142 struct buffer_head *xe_bh,
4143 struct ocfs2_xattr_entry *xe,
4144 u64 new_size)
4145{
4146 int ret;
4147 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4148 handle_t *handle = NULL;
4149
4150 handle = ocfs2_start_trans(osb, 1);
4151 if (handle == NULL) {
4152 ret = -ENOMEM;
4153 mlog_errno(ret);
4154 goto out;
4155 }
4156
4157 ret = ocfs2_journal_access(handle, inode, xe_bh,
4158 OCFS2_JOURNAL_ACCESS_WRITE);
4159 if (ret < 0) {
4160 mlog_errno(ret);
4161 goto out_commit;
4162 }
4163
4164 xe->xe_value_size = cpu_to_le64(new_size);
4165
4166 ret = ocfs2_journal_dirty(handle, xe_bh);
4167 if (ret < 0)
4168 mlog_errno(ret);
4169
4170out_commit:
4171 ocfs2_commit_trans(osb, handle);
4172out:
4173 return ret;
4174}
4175
4176/*
4177 * Truncate the specified xe_off entry in xattr bucket.
4178 * bucket is indicated by header_bh and len is the new length.
4179 * Both the ocfs2_xattr_value_root and the entry will be updated here.
4180 *
4181 * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
4182 */
4183static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
4184 struct buffer_head *header_bh,
4185 int xe_off,
4186 int len)
4187{
4188 int ret, offset;
4189 u64 value_blk;
4190 struct buffer_head *value_bh = NULL;
4191 struct ocfs2_xattr_value_root *xv;
4192 struct ocfs2_xattr_entry *xe;
4193 struct ocfs2_xattr_header *xh =
4194 (struct ocfs2_xattr_header *)header_bh->b_data;
4195 size_t blocksize = inode->i_sb->s_blocksize;
4196
4197 xe = &xh->xh_entries[xe_off];
4198
4199 BUG_ON(!xe || ocfs2_xattr_is_local(xe));
4200
4201 offset = le16_to_cpu(xe->xe_name_offset) +
4202 OCFS2_XATTR_SIZE(xe->xe_name_len);
4203
4204 value_blk = offset / blocksize;
4205
4206 /* We don't allow ocfs2_xattr_value to be stored in different block. */
4207 BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
4208 value_blk += header_bh->b_blocknr;
4209
4210 ret = ocfs2_read_block(inode, value_blk, &value_bh);
4211 if (ret) {
4212 mlog_errno(ret);
4213 goto out;
4214 }
4215
4216 xv = (struct ocfs2_xattr_value_root *)
4217 (value_bh->b_data + offset % blocksize);
4218
4219 mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
4220 xe_off, (unsigned long long)header_bh->b_blocknr, len);
4221 ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len);
4222 if (ret) {
4223 mlog_errno(ret);
4224 goto out;
4225 }
4226
4227 ret = ocfs2_xattr_value_update_size(inode, header_bh, xe, len);
4228 if (ret) {
4229 mlog_errno(ret);
4230 goto out;
4231 }
4232
4233out:
4234 brelse(value_bh);
4235 return ret;
4236}
4237
4238static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
4239 struct ocfs2_xattr_search *xs,
4240 int len)
4241{
4242 int ret, offset;
4243 struct ocfs2_xattr_entry *xe = xs->here;
4244 struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
4245
4246 BUG_ON(!xs->bucket.bhs[0] || !xe || ocfs2_xattr_is_local(xe));
4247
4248 offset = xe - xh->xh_entries;
4249 ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket.bhs[0],
4250 offset, len);
4251 if (ret)
4252 mlog_errno(ret);
4253
4254 return ret;
4255}
4256
4257static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
4258 struct ocfs2_xattr_search *xs,
4259 char *val,
4260 int value_len)
4261{
4262 int offset;
4263 struct ocfs2_xattr_value_root *xv;
4264 struct ocfs2_xattr_entry *xe = xs->here;
4265
4266 BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
4267
4268 offset = le16_to_cpu(xe->xe_name_offset) +
4269 OCFS2_XATTR_SIZE(xe->xe_name_len);
4270
4271 xv = (struct ocfs2_xattr_value_root *)(xs->base + offset);
4272
4273 return __ocfs2_xattr_set_value_outside(inode, xv, val, value_len);
4274}
4275
4276static int ocfs2_rm_xattr_cluster(struct inode *inode,
4277 struct buffer_head *root_bh,
4278 u64 blkno,
4279 u32 cpos,
4280 u32 len)
4281{
4282 int ret;
4283 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4284 struct inode *tl_inode = osb->osb_tl_inode;
4285 handle_t *handle;
4286 struct ocfs2_xattr_block *xb =
4287 (struct ocfs2_xattr_block *)root_bh->b_data;
4288 struct ocfs2_alloc_context *meta_ac = NULL;
4289 struct ocfs2_cached_dealloc_ctxt dealloc;
4290 struct ocfs2_extent_tree et;
4291
4292 ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh);
4293
4294 ocfs2_init_dealloc_ctxt(&dealloc);
4295
4296 mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
4297 cpos, len, (unsigned long long)blkno);
4298
4299 ocfs2_remove_xattr_clusters_from_cache(inode, blkno, len);
4300
4301 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
4302 if (ret) {
4303 mlog_errno(ret);
4304 return ret;
4305 }
4306
4307 mutex_lock(&tl_inode->i_mutex);
4308
4309 if (ocfs2_truncate_log_needs_flush(osb)) {
4310 ret = __ocfs2_flush_truncate_log(osb);
4311 if (ret < 0) {
4312 mlog_errno(ret);
4313 goto out;
4314 }
4315 }
4316
4317 handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS);
4318 if (handle == NULL) {
4319 ret = -ENOMEM;
4320 mlog_errno(ret);
4321 goto out;
4322 }
4323
4324 ret = ocfs2_journal_access(handle, inode, root_bh,
4325 OCFS2_JOURNAL_ACCESS_WRITE);
4326 if (ret) {
4327 mlog_errno(ret);
4328 goto out_commit;
4329 }
4330
4331 ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac,
4332 &dealloc);
4333 if (ret) {
4334 mlog_errno(ret);
4335 goto out_commit;
4336 }
4337
4338 le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
4339
4340 ret = ocfs2_journal_dirty(handle, root_bh);
4341 if (ret) {
4342 mlog_errno(ret);
4343 goto out_commit;
4344 }
4345
4346 ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
4347 if (ret)
4348 mlog_errno(ret);
4349
4350out_commit:
4351 ocfs2_commit_trans(osb, handle);
4352out:
4353 ocfs2_schedule_truncate_log_flush(osb, 1);
4354
4355 mutex_unlock(&tl_inode->i_mutex);
4356
4357 if (meta_ac)
4358 ocfs2_free_alloc_context(meta_ac);
4359
4360 ocfs2_run_deallocs(osb, &dealloc);
4361
4362 return ret;
4363}
4364
4365static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
4366 struct ocfs2_xattr_search *xs)
4367{
4368 handle_t *handle = NULL;
4369 struct ocfs2_xattr_header *xh = xs->bucket.xh;
4370 struct ocfs2_xattr_entry *last = &xh->xh_entries[
4371 le16_to_cpu(xh->xh_count) - 1];
4372 int ret = 0;
4373
4374 handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 1);
4375 if (IS_ERR(handle)) {
4376 ret = PTR_ERR(handle);
4377 mlog_errno(ret);
4378 return;
4379 }
4380
4381 ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[0],
4382 OCFS2_JOURNAL_ACCESS_WRITE);
4383 if (ret) {
4384 mlog_errno(ret);
4385 goto out_commit;
4386 }
4387
4388 /* Remove the old entry. */
4389 memmove(xs->here, xs->here + 1,
4390 (void *)last - (void *)xs->here);
4391 memset(last, 0, sizeof(struct ocfs2_xattr_entry));
4392 le16_add_cpu(&xh->xh_count, -1);
4393
4394 ret = ocfs2_journal_dirty(handle, xs->bucket.bhs[0]);
4395 if (ret < 0)
4396 mlog_errno(ret);
4397out_commit:
4398 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
4399}
4400
4401/*
4402 * Set the xattr name/value in the bucket specified in xs.
4403 *
4404 * As the new value in xi may be stored in the bucket or in an outside cluster,
4405 * we divide the whole process into 3 steps:
4406 * 1. insert name/value in the bucket(ocfs2_xattr_set_entry_in_bucket)
4407 * 2. truncate of the outside cluster(ocfs2_xattr_bucket_value_truncate_xs)
4408 * 3. Set the value to the outside cluster(ocfs2_xattr_bucket_set_value_outside)
4409 * 4. If the clusters for the new outside value can't be allocated, we need
4410 * to free the xattr we allocated in set.
4411 */
4412static int ocfs2_xattr_set_in_bucket(struct inode *inode,
4413 struct ocfs2_xattr_info *xi,
4414 struct ocfs2_xattr_search *xs)
4415{
4416 int ret, local = 1;
4417 size_t value_len;
4418 char *val = (char *)xi->value;
4419 struct ocfs2_xattr_entry *xe = xs->here;
4420 u32 name_hash = ocfs2_xattr_name_hash(inode, xi->name,
4421 strlen(xi->name));
4422
4423 if (!xs->not_found && !ocfs2_xattr_is_local(xe)) {
4424 /*
4425 * We need to truncate the xattr storage first.
4426 *
4427 * If both the old and new value are stored to
4428 * outside block, we only need to truncate
4429 * the storage and then set the value outside.
4430 *
4431 * If the new value should be stored within block,
4432 * we should free all the outside block first and
4433 * the modification to the xattr block will be done
4434 * by following steps.
4435 */
4436 if (xi->value_len > OCFS2_XATTR_INLINE_SIZE)
4437 value_len = xi->value_len;
4438 else
4439 value_len = 0;
4440
4441 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4442 value_len);
4443 if (ret)
4444 goto out;
4445
4446 if (value_len)
4447 goto set_value_outside;
4448 }
4449
4450 value_len = xi->value_len;
4451 /* So we have to handle the inside block change now. */
4452 if (value_len > OCFS2_XATTR_INLINE_SIZE) {
4453 /*
4454 * If the new value will be stored outside of block,
4455 * initalize a new empty value root and insert it first.
4456 */
4457 local = 0;
4458 xi->value = &def_xv;
4459 xi->value_len = OCFS2_XATTR_ROOT_SIZE;
4460 }
4461
4462 ret = ocfs2_xattr_set_entry_in_bucket(inode, xi, xs, name_hash, local);
4463 if (ret) {
4464 mlog_errno(ret);
4465 goto out;
4466 }
4467
4468 if (value_len <= OCFS2_XATTR_INLINE_SIZE)
4469 goto out;
4470
4471 /* allocate the space now for the outside block storage. */
4472 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
4473 value_len);
4474 if (ret) {
4475 mlog_errno(ret);
4476
4477 if (xs->not_found) {
4478 /*
4479 * We can't allocate enough clusters for outside
4480 * storage and we have allocated xattr already,
4481 * so need to remove it.
4482 */
4483 ocfs2_xattr_bucket_remove_xs(inode, xs);
4484 }
4485 goto out;
4486 }
4487
4488set_value_outside:
4489 ret = ocfs2_xattr_bucket_set_value_outside(inode, xs, val, value_len);
4490out:
4491 return ret;
4492}
4493
4494/* check whether the xattr bucket is filled up with the same hash value. */
4495static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
4496 struct ocfs2_xattr_bucket *bucket)
4497{
4498 struct ocfs2_xattr_header *xh = bucket->xh;
4499
4500 if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
4501 xh->xh_entries[0].xe_name_hash) {
4502 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
4503 "hash = %u\n",
4504 (unsigned long long)bucket->bhs[0]->b_blocknr,
4505 le32_to_cpu(xh->xh_entries[0].xe_name_hash));
4506 return -ENOSPC;
4507 }
4508
4509 return 0;
4510}
4511
4512static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
4513 struct ocfs2_xattr_info *xi,
4514 struct ocfs2_xattr_search *xs)
4515{
4516 struct ocfs2_xattr_header *xh;
4517 struct ocfs2_xattr_entry *xe;
4518 u16 count, header_size, xh_free_start;
4519 int i, free, max_free, need, old;
4520 size_t value_size = 0, name_len = strlen(xi->name);
4521 size_t blocksize = inode->i_sb->s_blocksize;
4522 int ret, allocation = 0;
4523 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4524
4525 mlog_entry("Set xattr %s in xattr index block\n", xi->name);
4526
4527try_again:
4528 xh = xs->header;
4529 count = le16_to_cpu(xh->xh_count);
4530 xh_free_start = le16_to_cpu(xh->xh_free_start);
4531 header_size = sizeof(struct ocfs2_xattr_header) +
4532 count * sizeof(struct ocfs2_xattr_entry);
4533 max_free = OCFS2_XATTR_BUCKET_SIZE -
4534 le16_to_cpu(xh->xh_name_value_len) - header_size;
4535
4536 mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
4537 "of %u which exceed block size\n",
4538 (unsigned long long)xs->bucket.bhs[0]->b_blocknr,
4539 header_size);
4540
4541 if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE)
4542 value_size = OCFS2_XATTR_ROOT_SIZE;
4543 else if (xi->value)
4544 value_size = OCFS2_XATTR_SIZE(xi->value_len);
4545
4546 if (xs->not_found)
4547 need = sizeof(struct ocfs2_xattr_entry) +
4548 OCFS2_XATTR_SIZE(name_len) + value_size;
4549 else {
4550 need = value_size + OCFS2_XATTR_SIZE(name_len);
4551
4552 /*
4553 * We only replace the old value if the new length is smaller
4554 * than the old one. Otherwise we will allocate new space in the
4555 * bucket to store it.
4556 */
4557 xe = xs->here;
4558 if (ocfs2_xattr_is_local(xe))
4559 old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4560 else
4561 old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
4562
4563 if (old >= value_size)
4564 need = 0;
4565 }
4566
4567 free = xh_free_start - header_size;
4568 /*
4569 * We need to make sure the new name/value pair
4570 * can exist in the same block.
4571 */
4572 if (xh_free_start % blocksize < need)
4573 free -= xh_free_start % blocksize;
4574
4575 mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
4576 "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
4577 " %u\n", xs->not_found,
4578 (unsigned long long)xs->bucket.bhs[0]->b_blocknr,
4579 free, need, max_free, le16_to_cpu(xh->xh_free_start),
4580 le16_to_cpu(xh->xh_name_value_len));
4581
4582 if (free < need || count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
4583 if (need <= max_free &&
4584 count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
4585 /*
4586 * We can create the space by defragment. Since only the
4587 * name/value will be moved, the xe shouldn't be changed
4588 * in xs.
4589 */
4590 ret = ocfs2_defrag_xattr_bucket(inode, &xs->bucket);
4591 if (ret) {
4592 mlog_errno(ret);
4593 goto out;
4594 }
4595
4596 xh_free_start = le16_to_cpu(xh->xh_free_start);
4597 free = xh_free_start - header_size;
4598 if (xh_free_start % blocksize < need)
4599 free -= xh_free_start % blocksize;
4600
4601 if (free >= need)
4602 goto xattr_set;
4603
4604 mlog(0, "Can't get enough space for xattr insert by "
4605 "defragment. Need %u bytes, but we have %d, so "
4606 "allocate new bucket for it.\n", need, free);
4607 }
4608
4609 /*
4610 * We have to add new buckets or clusters and one
4611 * allocation should leave us enough space for insert.
4612 */
4613 BUG_ON(allocation);
4614
4615 /*
4616 * We do not allow for overlapping ranges between buckets. And
4617 * the maximum number of collisions we will allow for then is
4618 * one bucket's worth, so check it here whether we need to
4619 * add a new bucket for the insert.
4620 */
4621 ret = ocfs2_check_xattr_bucket_collision(inode, &xs->bucket);
4622 if (ret) {
4623 mlog_errno(ret);
4624 goto out;
4625 }
4626
4627 ret = ocfs2_add_new_xattr_bucket(inode,
4628 xs->xattr_bh,
4629 xs->bucket.bhs[0]);
4630 if (ret) {
4631 mlog_errno(ret);
4632 goto out;
4633 }
4634
4635 for (i = 0; i < blk_per_bucket; i++)
4636 brelse(xs->bucket.bhs[i]);
4637
4638 memset(&xs->bucket, 0, sizeof(xs->bucket));
4639
4640 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
4641 xi->name_index,
4642 xi->name, xs);
4643 if (ret && ret != -ENODATA)
4644 goto out;
4645 xs->not_found = ret;
4646 allocation = 1;
4647 goto try_again;
4648 }
4649
4650xattr_set:
4651 ret = ocfs2_xattr_set_in_bucket(inode, xi, xs);
4652out:
4653 mlog_exit(ret);
4654 return ret;
4655}
4656
4657static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
4658 struct ocfs2_xattr_bucket *bucket,
4659 void *para)
4660{
4661 int ret = 0;
4662 struct ocfs2_xattr_header *xh = bucket->xh;
4663 u16 i;
4664 struct ocfs2_xattr_entry *xe;
4665
4666 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4667 xe = &xh->xh_entries[i];
4668 if (ocfs2_xattr_is_local(xe))
4669 continue;
4670
4671 ret = ocfs2_xattr_bucket_value_truncate(inode,
4672 bucket->bhs[0],
4673 i, 0);
4674 if (ret) {
4675 mlog_errno(ret);
4676 break;
4677 }
4678 }
4679
4680 return ret;
4681}
4682
4683static int ocfs2_delete_xattr_index_block(struct inode *inode,
4684 struct buffer_head *xb_bh)
4685{
4686 struct ocfs2_xattr_block *xb =
4687 (struct ocfs2_xattr_block *)xb_bh->b_data;
4688 struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
4689 int ret = 0;
4690 u32 name_hash = UINT_MAX, e_cpos, num_clusters;
4691 u64 p_blkno;
4692
4693 if (le16_to_cpu(el->l_next_free_rec) == 0)
4694 return 0;
4695
4696 while (name_hash > 0) {
4697 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
4698 &e_cpos, &num_clusters, el);
4699 if (ret) {
4700 mlog_errno(ret);
4701 goto out;
4702 }
4703
4704 ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters,
4705 ocfs2_delete_xattr_in_bucket,
4706 NULL);
4707 if (ret) {
4708 mlog_errno(ret);
4709 goto out;
4710 }
4711
4712 ret = ocfs2_rm_xattr_cluster(inode, xb_bh,
4713 p_blkno, e_cpos, num_clusters);
4714 if (ret) {
4715 mlog_errno(ret);
4716 break;
4717 }
4718
4719 if (e_cpos == 0)
4720 break;
4721
4722 name_hash = e_cpos - 1;
4723 }
4724
4725out:
4726 return ret;
4727}
4728
4729/*
4730 * 'trusted' attributes support
4731 */
4732
4733#define XATTR_TRUSTED_PREFIX "trusted."
4734
4735static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
4736 size_t list_size, const char *name,
4737 size_t name_len)
4738{
4739 const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX) - 1;
4740 const size_t total_len = prefix_len + name_len + 1;
4741
4742 if (list && total_len <= list_size) {
4743 memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
4744 memcpy(list + prefix_len, name, name_len);
4745 list[prefix_len + name_len] = '\0';
4746 }
4747 return total_len;
4748}
4749
4750static int ocfs2_xattr_trusted_get(struct inode *inode, const char *name,
4751 void *buffer, size_t size)
4752{
4753 if (strcmp(name, "") == 0)
4754 return -EINVAL;
4755 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, name,
4756 buffer, size);
4757}
4758
4759static int ocfs2_xattr_trusted_set(struct inode *inode, const char *name,
4760 const void *value, size_t size, int flags)
4761{
4762 if (strcmp(name, "") == 0)
4763 return -EINVAL;
4764
4765 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, name, value,
4766 size, flags);
4767}
4768
4769struct xattr_handler ocfs2_xattr_trusted_handler = {
4770 .prefix = XATTR_TRUSTED_PREFIX,
4771 .list = ocfs2_xattr_trusted_list,
4772 .get = ocfs2_xattr_trusted_get,
4773 .set = ocfs2_xattr_trusted_set,
4774};
4775
4776
4777/*
4778 * 'user' attributes support
4779 */
4780
4781#define XATTR_USER_PREFIX "user."
4782
4783static size_t ocfs2_xattr_user_list(struct inode *inode, char *list,
4784 size_t list_size, const char *name,
4785 size_t name_len)
4786{
4787 const size_t prefix_len = sizeof(XATTR_USER_PREFIX) - 1;
4788 const size_t total_len = prefix_len + name_len + 1;
4789 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4790
4791 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
4792 return 0;
4793
4794 if (list && total_len <= list_size) {
4795 memcpy(list, XATTR_USER_PREFIX, prefix_len);
4796 memcpy(list + prefix_len, name, name_len);
4797 list[prefix_len + name_len] = '\0';
4798 }
4799 return total_len;
4800}
4801
4802static int ocfs2_xattr_user_get(struct inode *inode, const char *name,
4803 void *buffer, size_t size)
4804{
4805 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4806
4807 if (strcmp(name, "") == 0)
4808 return -EINVAL;
4809 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
4810 return -EOPNOTSUPP;
4811 return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name,
4812 buffer, size);
4813}
4814
4815static int ocfs2_xattr_user_set(struct inode *inode, const char *name,
4816 const void *value, size_t size, int flags)
4817{
4818 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4819
4820 if (strcmp(name, "") == 0)
4821 return -EINVAL;
4822 if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
4823 return -EOPNOTSUPP;
4824
4825 return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, name, value,
4826 size, flags);
4827}
4828
4829struct xattr_handler ocfs2_xattr_user_handler = {
4830 .prefix = XATTR_USER_PREFIX,
4831 .list = ocfs2_xattr_user_list,
4832 .get = ocfs2_xattr_user_get,
4833 .set = ocfs2_xattr_user_set,
4834};
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
new file mode 100644
index 000000000000..c25c7c62a059
--- /dev/null
+++ b/fs/ocfs2/xattr.h
@@ -0,0 +1,68 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * xattr.h
5 *
6 * Function prototypes
7 *
8 * Copyright (C) 2008 Oracle. All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public
12 * License as published by the Free Software Foundation; either
13 * version 2 of the License, or (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public
21 * License along with this program; if not, write to the
22 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 * Boston, MA 021110-1307, USA.
24 */
25
26#ifndef OCFS2_XATTR_H
27#define OCFS2_XATTR_H
28
29#include <linux/init.h>
30#include <linux/xattr.h>
31
32enum ocfs2_xattr_type {
33 OCFS2_XATTR_INDEX_USER = 1,
34 OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS,
35 OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
36 OCFS2_XATTR_INDEX_TRUSTED,
37 OCFS2_XATTR_INDEX_SECURITY,
38 OCFS2_XATTR_MAX
39};
40
41extern struct xattr_handler ocfs2_xattr_user_handler;
42extern struct xattr_handler ocfs2_xattr_trusted_handler;
43
44extern ssize_t ocfs2_listxattr(struct dentry *, char *, size_t);
45extern int ocfs2_xattr_get(struct inode *, int, const char *, void *, size_t);
46extern int ocfs2_xattr_set(struct inode *, int, const char *, const void *,
47 size_t, int);
48extern int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh);
49extern struct xattr_handler *ocfs2_xattr_handlers[];
50
51static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
52{
53 return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
54}
55
56static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
57{
58 return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
59}
60
61static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
62{
63 u16 len = sb->s_blocksize -
64 offsetof(struct ocfs2_xattr_header, xh_entries);
65
66 return len / sizeof(struct ocfs2_xattr_entry);
67}
68#endif /* OCFS2_XATTR_H */