aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-09-23 12:29:20 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-23 12:29:20 -0400
commitb64ada6b23d4a305fb3ca59b79dd38707fc53b69 (patch)
tree61bc87dc3a2549f54231261aaa544acffcd12281 /fs
parentbe90a49ca22a95f184d9f32d35b5247b44032849 (diff)
parentb80474b432913f73cce8db001e9fa3104f9b79ee (diff)
Merge branch 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2: (85 commits) ocfs2: Use buffer IO if we are appending a file. ocfs2: add spinlock protection when dealing with lockres->purge. dlmglue.c: add missed mlog lines ocfs2: __ocfs2_abort() should not enable panic for local mounts ocfs2: Add ioctl for reflink. ocfs2: Enable refcount tree support. ocfs2: Implement ocfs2_reflink. ocfs2: Add preserve to reflink. ocfs2: Create reflinked file in orphan dir. ocfs2: Use proper parameter for some inode operation. ocfs2: Make transaction extend more efficient. ocfs2: Don't merge in 1st refcount ops of reflink. ocfs2: Modify removing xattr process for refcount. ocfs2: Add reflink support for xattr. ocfs2: Create an xattr indexed block if needed. ocfs2: Call refcount tree remove process properly. ocfs2: Attach xattr clusters to refcount tree. ocfs2: Abstract ocfs2 xattr tree extend rec iteration process. ocfs2: Abstract the creation of xattr block. ocfs2: Remove inode from ocfs2_xattr_bucket_get_name_value. ...
Diffstat (limited to 'fs')
-rw-r--r--fs/ocfs2/Makefile1
-rw-r--r--fs/ocfs2/alloc.c1342
-rw-r--r--fs/ocfs2/alloc.h101
-rw-r--r--fs/ocfs2/aops.c37
-rw-r--r--fs/ocfs2/aops.h2
-rw-r--r--fs/ocfs2/buffer_head_io.c47
-rw-r--r--fs/ocfs2/buffer_head_io.h8
-rw-r--r--fs/ocfs2/cluster/masklog.c1
-rw-r--r--fs/ocfs2/cluster/masklog.h1
-rw-r--r--fs/ocfs2/dir.c107
-rw-r--r--fs/ocfs2/dlm/dlmthread.c6
-rw-r--r--fs/ocfs2/dlmglue.c105
-rw-r--r--fs/ocfs2/dlmglue.h6
-rw-r--r--fs/ocfs2/extent_map.c33
-rw-r--r--fs/ocfs2/extent_map.h8
-rw-r--r--fs/ocfs2/file.c151
-rw-r--r--fs/ocfs2/file.h2
-rw-r--r--fs/ocfs2/inode.c86
-rw-r--r--fs/ocfs2/inode.h20
-rw-r--r--fs/ocfs2/ioctl.c14
-rw-r--r--fs/ocfs2/journal.c82
-rw-r--r--fs/ocfs2/journal.h94
-rw-r--r--fs/ocfs2/localalloc.c12
-rw-r--r--fs/ocfs2/namei.c341
-rw-r--r--fs/ocfs2/namei.h6
-rw-r--r--fs/ocfs2/ocfs2.h52
-rw-r--r--fs/ocfs2/ocfs2_fs.h107
-rw-r--r--fs/ocfs2/ocfs2_lockid.h5
-rw-r--r--fs/ocfs2/quota_global.c5
-rw-r--r--fs/ocfs2/quota_local.c26
-rw-r--r--fs/ocfs2/refcounttree.c4313
-rw-r--r--fs/ocfs2/refcounttree.h106
-rw-r--r--fs/ocfs2/resize.c16
-rw-r--r--fs/ocfs2/slot_map.c10
-rw-r--r--fs/ocfs2/suballoc.c35
-rw-r--r--fs/ocfs2/super.c13
-rw-r--r--fs/ocfs2/uptodate.c265
-rw-r--r--fs/ocfs2/uptodate.h51
-rw-r--r--fs/ocfs2/xattr.c2056
-rw-r--r--fs/ocfs2/xattr.h15
40 files changed, 8512 insertions, 1176 deletions
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 01596079dd63..31f25ce32c97 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -28,6 +28,7 @@ ocfs2-objs := \
28 locks.o \ 28 locks.o \
29 mmap.o \ 29 mmap.o \
30 namei.o \ 30 namei.o \
31 refcounttree.o \
31 resize.o \ 32 resize.o \
32 slot_map.o \ 33 slot_map.o \
33 suballoc.o \ 34 suballoc.o \
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index ab513ddaeff2..38a42f5d59ff 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -49,10 +49,21 @@
49#include "super.h" 49#include "super.h"
50#include "uptodate.h" 50#include "uptodate.h"
51#include "xattr.h" 51#include "xattr.h"
52#include "refcounttree.h"
52 53
53#include "buffer_head_io.h" 54#include "buffer_head_io.h"
54 55
56enum ocfs2_contig_type {
57 CONTIG_NONE = 0,
58 CONTIG_LEFT,
59 CONTIG_RIGHT,
60 CONTIG_LEFTRIGHT,
61};
55 62
63static enum ocfs2_contig_type
64 ocfs2_extent_rec_contig(struct super_block *sb,
65 struct ocfs2_extent_rec *ext,
66 struct ocfs2_extent_rec *insert_rec);
56/* 67/*
57 * Operations for a specific extent tree type. 68 * Operations for a specific extent tree type.
58 * 69 *
@@ -79,18 +90,30 @@ struct ocfs2_extent_tree_operations {
79 * that value. new_clusters is the delta, and must be 90 * that value. new_clusters is the delta, and must be
80 * added to the total. Required. 91 * added to the total. Required.
81 */ 92 */
82 void (*eo_update_clusters)(struct inode *inode, 93 void (*eo_update_clusters)(struct ocfs2_extent_tree *et,
83 struct ocfs2_extent_tree *et,
84 u32 new_clusters); 94 u32 new_clusters);
85 95
86 /* 96 /*
97 * If this extent tree is supported by an extent map, insert
98 * a record into the map.
99 */
100 void (*eo_extent_map_insert)(struct ocfs2_extent_tree *et,
101 struct ocfs2_extent_rec *rec);
102
103 /*
104 * If this extent tree is supported by an extent map, truncate the
105 * map to clusters,
106 */
107 void (*eo_extent_map_truncate)(struct ocfs2_extent_tree *et,
108 u32 clusters);
109
110 /*
87 * If ->eo_insert_check() exists, it is called before rec is 111 * If ->eo_insert_check() exists, it is called before rec is
88 * inserted into the extent tree. It is optional. 112 * inserted into the extent tree. It is optional.
89 */ 113 */
90 int (*eo_insert_check)(struct inode *inode, 114 int (*eo_insert_check)(struct ocfs2_extent_tree *et,
91 struct ocfs2_extent_tree *et,
92 struct ocfs2_extent_rec *rec); 115 struct ocfs2_extent_rec *rec);
93 int (*eo_sanity_check)(struct inode *inode, struct ocfs2_extent_tree *et); 116 int (*eo_sanity_check)(struct ocfs2_extent_tree *et);
94 117
95 /* 118 /*
96 * -------------------------------------------------------------- 119 * --------------------------------------------------------------
@@ -109,8 +132,17 @@ struct ocfs2_extent_tree_operations {
109 * it exists. If it does not, et->et_max_leaf_clusters is set 132 * it exists. If it does not, et->et_max_leaf_clusters is set
110 * to 0 (unlimited). Optional. 133 * to 0 (unlimited). Optional.
111 */ 134 */
112 void (*eo_fill_max_leaf_clusters)(struct inode *inode, 135 void (*eo_fill_max_leaf_clusters)(struct ocfs2_extent_tree *et);
113 struct ocfs2_extent_tree *et); 136
137 /*
138 * ->eo_extent_contig test whether the 2 ocfs2_extent_rec
139 * are contiguous or not. Optional. Don't need to set it if use
140 * ocfs2_extent_rec as the tree leaf.
141 */
142 enum ocfs2_contig_type
143 (*eo_extent_contig)(struct ocfs2_extent_tree *et,
144 struct ocfs2_extent_rec *ext,
145 struct ocfs2_extent_rec *insert_rec);
114}; 146};
115 147
116 148
@@ -121,19 +153,22 @@ struct ocfs2_extent_tree_operations {
121static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et); 153static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et);
122static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et, 154static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et,
123 u64 blkno); 155 u64 blkno);
124static void ocfs2_dinode_update_clusters(struct inode *inode, 156static void ocfs2_dinode_update_clusters(struct ocfs2_extent_tree *et,
125 struct ocfs2_extent_tree *et,
126 u32 clusters); 157 u32 clusters);
127static int ocfs2_dinode_insert_check(struct inode *inode, 158static void ocfs2_dinode_extent_map_insert(struct ocfs2_extent_tree *et,
128 struct ocfs2_extent_tree *et, 159 struct ocfs2_extent_rec *rec);
160static void ocfs2_dinode_extent_map_truncate(struct ocfs2_extent_tree *et,
161 u32 clusters);
162static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et,
129 struct ocfs2_extent_rec *rec); 163 struct ocfs2_extent_rec *rec);
130static int ocfs2_dinode_sanity_check(struct inode *inode, 164static int ocfs2_dinode_sanity_check(struct ocfs2_extent_tree *et);
131 struct ocfs2_extent_tree *et);
132static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et); 165static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et);
133static struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = { 166static struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = {
134 .eo_set_last_eb_blk = ocfs2_dinode_set_last_eb_blk, 167 .eo_set_last_eb_blk = ocfs2_dinode_set_last_eb_blk,
135 .eo_get_last_eb_blk = ocfs2_dinode_get_last_eb_blk, 168 .eo_get_last_eb_blk = ocfs2_dinode_get_last_eb_blk,
136 .eo_update_clusters = ocfs2_dinode_update_clusters, 169 .eo_update_clusters = ocfs2_dinode_update_clusters,
170 .eo_extent_map_insert = ocfs2_dinode_extent_map_insert,
171 .eo_extent_map_truncate = ocfs2_dinode_extent_map_truncate,
137 .eo_insert_check = ocfs2_dinode_insert_check, 172 .eo_insert_check = ocfs2_dinode_insert_check,
138 .eo_sanity_check = ocfs2_dinode_sanity_check, 173 .eo_sanity_check = ocfs2_dinode_sanity_check,
139 .eo_fill_root_el = ocfs2_dinode_fill_root_el, 174 .eo_fill_root_el = ocfs2_dinode_fill_root_el,
@@ -156,40 +191,53 @@ static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et)
156 return le64_to_cpu(di->i_last_eb_blk); 191 return le64_to_cpu(di->i_last_eb_blk);
157} 192}
158 193
159static void ocfs2_dinode_update_clusters(struct inode *inode, 194static void ocfs2_dinode_update_clusters(struct ocfs2_extent_tree *et,
160 struct ocfs2_extent_tree *et,
161 u32 clusters) 195 u32 clusters)
162{ 196{
197 struct ocfs2_inode_info *oi = cache_info_to_inode(et->et_ci);
163 struct ocfs2_dinode *di = et->et_object; 198 struct ocfs2_dinode *di = et->et_object;
164 199
165 le32_add_cpu(&di->i_clusters, clusters); 200 le32_add_cpu(&di->i_clusters, clusters);
166 spin_lock(&OCFS2_I(inode)->ip_lock); 201 spin_lock(&oi->ip_lock);
167 OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters); 202 oi->ip_clusters = le32_to_cpu(di->i_clusters);
168 spin_unlock(&OCFS2_I(inode)->ip_lock); 203 spin_unlock(&oi->ip_lock);
169} 204}
170 205
171static int ocfs2_dinode_insert_check(struct inode *inode, 206static void ocfs2_dinode_extent_map_insert(struct ocfs2_extent_tree *et,
172 struct ocfs2_extent_tree *et, 207 struct ocfs2_extent_rec *rec)
208{
209 struct inode *inode = &cache_info_to_inode(et->et_ci)->vfs_inode;
210
211 ocfs2_extent_map_insert_rec(inode, rec);
212}
213
214static void ocfs2_dinode_extent_map_truncate(struct ocfs2_extent_tree *et,
215 u32 clusters)
216{
217 struct inode *inode = &cache_info_to_inode(et->et_ci)->vfs_inode;
218
219 ocfs2_extent_map_trunc(inode, clusters);
220}
221
222static int ocfs2_dinode_insert_check(struct ocfs2_extent_tree *et,
173 struct ocfs2_extent_rec *rec) 223 struct ocfs2_extent_rec *rec)
174{ 224{
175 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 225 struct ocfs2_inode_info *oi = cache_info_to_inode(et->et_ci);
226 struct ocfs2_super *osb = OCFS2_SB(oi->vfs_inode.i_sb);
176 227
177 BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL); 228 BUG_ON(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL);
178 mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) && 229 mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) &&
179 (OCFS2_I(inode)->ip_clusters != 230 (oi->ip_clusters != le32_to_cpu(rec->e_cpos)),
180 le32_to_cpu(rec->e_cpos)),
181 "Device %s, asking for sparse allocation: inode %llu, " 231 "Device %s, asking for sparse allocation: inode %llu, "
182 "cpos %u, clusters %u\n", 232 "cpos %u, clusters %u\n",
183 osb->dev_str, 233 osb->dev_str,
184 (unsigned long long)OCFS2_I(inode)->ip_blkno, 234 (unsigned long long)oi->ip_blkno,
185 rec->e_cpos, 235 rec->e_cpos, oi->ip_clusters);
186 OCFS2_I(inode)->ip_clusters);
187 236
188 return 0; 237 return 0;
189} 238}
190 239
191static int ocfs2_dinode_sanity_check(struct inode *inode, 240static int ocfs2_dinode_sanity_check(struct ocfs2_extent_tree *et)
192 struct ocfs2_extent_tree *et)
193{ 241{
194 struct ocfs2_dinode *di = et->et_object; 242 struct ocfs2_dinode *di = et->et_object;
195 243
@@ -229,8 +277,7 @@ static u64 ocfs2_xattr_value_get_last_eb_blk(struct ocfs2_extent_tree *et)
229 return le64_to_cpu(vb->vb_xv->xr_last_eb_blk); 277 return le64_to_cpu(vb->vb_xv->xr_last_eb_blk);
230} 278}
231 279
232static void ocfs2_xattr_value_update_clusters(struct inode *inode, 280static void ocfs2_xattr_value_update_clusters(struct ocfs2_extent_tree *et,
233 struct ocfs2_extent_tree *et,
234 u32 clusters) 281 u32 clusters)
235{ 282{
236 struct ocfs2_xattr_value_buf *vb = et->et_object; 283 struct ocfs2_xattr_value_buf *vb = et->et_object;
@@ -252,12 +299,11 @@ static void ocfs2_xattr_tree_fill_root_el(struct ocfs2_extent_tree *et)
252 et->et_root_el = &xb->xb_attrs.xb_root.xt_list; 299 et->et_root_el = &xb->xb_attrs.xb_root.xt_list;
253} 300}
254 301
255static void ocfs2_xattr_tree_fill_max_leaf_clusters(struct inode *inode, 302static void ocfs2_xattr_tree_fill_max_leaf_clusters(struct ocfs2_extent_tree *et)
256 struct ocfs2_extent_tree *et)
257{ 303{
304 struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
258 et->et_max_leaf_clusters = 305 et->et_max_leaf_clusters =
259 ocfs2_clusters_for_bytes(inode->i_sb, 306 ocfs2_clusters_for_bytes(sb, OCFS2_MAX_XATTR_TREE_LEAF_SIZE);
260 OCFS2_MAX_XATTR_TREE_LEAF_SIZE);
261} 307}
262 308
263static void ocfs2_xattr_tree_set_last_eb_blk(struct ocfs2_extent_tree *et, 309static void ocfs2_xattr_tree_set_last_eb_blk(struct ocfs2_extent_tree *et,
@@ -277,8 +323,7 @@ static u64 ocfs2_xattr_tree_get_last_eb_blk(struct ocfs2_extent_tree *et)
277 return le64_to_cpu(xt->xt_last_eb_blk); 323 return le64_to_cpu(xt->xt_last_eb_blk);
278} 324}
279 325
280static void ocfs2_xattr_tree_update_clusters(struct inode *inode, 326static void ocfs2_xattr_tree_update_clusters(struct ocfs2_extent_tree *et,
281 struct ocfs2_extent_tree *et,
282 u32 clusters) 327 u32 clusters)
283{ 328{
284 struct ocfs2_xattr_block *xb = et->et_object; 329 struct ocfs2_xattr_block *xb = et->et_object;
@@ -309,8 +354,7 @@ static u64 ocfs2_dx_root_get_last_eb_blk(struct ocfs2_extent_tree *et)
309 return le64_to_cpu(dx_root->dr_last_eb_blk); 354 return le64_to_cpu(dx_root->dr_last_eb_blk);
310} 355}
311 356
312static void ocfs2_dx_root_update_clusters(struct inode *inode, 357static void ocfs2_dx_root_update_clusters(struct ocfs2_extent_tree *et,
313 struct ocfs2_extent_tree *et,
314 u32 clusters) 358 u32 clusters)
315{ 359{
316 struct ocfs2_dx_root_block *dx_root = et->et_object; 360 struct ocfs2_dx_root_block *dx_root = et->et_object;
@@ -318,8 +362,7 @@ static void ocfs2_dx_root_update_clusters(struct inode *inode,
318 le32_add_cpu(&dx_root->dr_clusters, clusters); 362 le32_add_cpu(&dx_root->dr_clusters, clusters);
319} 363}
320 364
321static int ocfs2_dx_root_sanity_check(struct inode *inode, 365static int ocfs2_dx_root_sanity_check(struct ocfs2_extent_tree *et)
322 struct ocfs2_extent_tree *et)
323{ 366{
324 struct ocfs2_dx_root_block *dx_root = et->et_object; 367 struct ocfs2_dx_root_block *dx_root = et->et_object;
325 368
@@ -343,8 +386,54 @@ static struct ocfs2_extent_tree_operations ocfs2_dx_root_et_ops = {
343 .eo_fill_root_el = ocfs2_dx_root_fill_root_el, 386 .eo_fill_root_el = ocfs2_dx_root_fill_root_el,
344}; 387};
345 388
389static void ocfs2_refcount_tree_fill_root_el(struct ocfs2_extent_tree *et)
390{
391 struct ocfs2_refcount_block *rb = et->et_object;
392
393 et->et_root_el = &rb->rf_list;
394}
395
396static void ocfs2_refcount_tree_set_last_eb_blk(struct ocfs2_extent_tree *et,
397 u64 blkno)
398{
399 struct ocfs2_refcount_block *rb = et->et_object;
400
401 rb->rf_last_eb_blk = cpu_to_le64(blkno);
402}
403
404static u64 ocfs2_refcount_tree_get_last_eb_blk(struct ocfs2_extent_tree *et)
405{
406 struct ocfs2_refcount_block *rb = et->et_object;
407
408 return le64_to_cpu(rb->rf_last_eb_blk);
409}
410
411static void ocfs2_refcount_tree_update_clusters(struct ocfs2_extent_tree *et,
412 u32 clusters)
413{
414 struct ocfs2_refcount_block *rb = et->et_object;
415
416 le32_add_cpu(&rb->rf_clusters, clusters);
417}
418
419static enum ocfs2_contig_type
420ocfs2_refcount_tree_extent_contig(struct ocfs2_extent_tree *et,
421 struct ocfs2_extent_rec *ext,
422 struct ocfs2_extent_rec *insert_rec)
423{
424 return CONTIG_NONE;
425}
426
427static struct ocfs2_extent_tree_operations ocfs2_refcount_tree_et_ops = {
428 .eo_set_last_eb_blk = ocfs2_refcount_tree_set_last_eb_blk,
429 .eo_get_last_eb_blk = ocfs2_refcount_tree_get_last_eb_blk,
430 .eo_update_clusters = ocfs2_refcount_tree_update_clusters,
431 .eo_fill_root_el = ocfs2_refcount_tree_fill_root_el,
432 .eo_extent_contig = ocfs2_refcount_tree_extent_contig,
433};
434
346static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et, 435static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
347 struct inode *inode, 436 struct ocfs2_caching_info *ci,
348 struct buffer_head *bh, 437 struct buffer_head *bh,
349 ocfs2_journal_access_func access, 438 ocfs2_journal_access_func access,
350 void *obj, 439 void *obj,
@@ -352,6 +441,7 @@ static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
352{ 441{
353 et->et_ops = ops; 442 et->et_ops = ops;
354 et->et_root_bh = bh; 443 et->et_root_bh = bh;
444 et->et_ci = ci;
355 et->et_root_journal_access = access; 445 et->et_root_journal_access = access;
356 if (!obj) 446 if (!obj)
357 obj = (void *)bh->b_data; 447 obj = (void *)bh->b_data;
@@ -361,41 +451,49 @@ static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et,
361 if (!et->et_ops->eo_fill_max_leaf_clusters) 451 if (!et->et_ops->eo_fill_max_leaf_clusters)
362 et->et_max_leaf_clusters = 0; 452 et->et_max_leaf_clusters = 0;
363 else 453 else
364 et->et_ops->eo_fill_max_leaf_clusters(inode, et); 454 et->et_ops->eo_fill_max_leaf_clusters(et);
365} 455}
366 456
367void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et, 457void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
368 struct inode *inode, 458 struct ocfs2_caching_info *ci,
369 struct buffer_head *bh) 459 struct buffer_head *bh)
370{ 460{
371 __ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_di, 461 __ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_di,
372 NULL, &ocfs2_dinode_et_ops); 462 NULL, &ocfs2_dinode_et_ops);
373} 463}
374 464
375void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et, 465void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
376 struct inode *inode, 466 struct ocfs2_caching_info *ci,
377 struct buffer_head *bh) 467 struct buffer_head *bh)
378{ 468{
379 __ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_xb, 469 __ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_xb,
380 NULL, &ocfs2_xattr_tree_et_ops); 470 NULL, &ocfs2_xattr_tree_et_ops);
381} 471}
382 472
383void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, 473void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
384 struct inode *inode, 474 struct ocfs2_caching_info *ci,
385 struct ocfs2_xattr_value_buf *vb) 475 struct ocfs2_xattr_value_buf *vb)
386{ 476{
387 __ocfs2_init_extent_tree(et, inode, vb->vb_bh, vb->vb_access, vb, 477 __ocfs2_init_extent_tree(et, ci, vb->vb_bh, vb->vb_access, vb,
388 &ocfs2_xattr_value_et_ops); 478 &ocfs2_xattr_value_et_ops);
389} 479}
390 480
391void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et, 481void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
392 struct inode *inode, 482 struct ocfs2_caching_info *ci,
393 struct buffer_head *bh) 483 struct buffer_head *bh)
394{ 484{
395 __ocfs2_init_extent_tree(et, inode, bh, ocfs2_journal_access_dr, 485 __ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_dr,
396 NULL, &ocfs2_dx_root_et_ops); 486 NULL, &ocfs2_dx_root_et_ops);
397} 487}
398 488
489void ocfs2_init_refcount_extent_tree(struct ocfs2_extent_tree *et,
490 struct ocfs2_caching_info *ci,
491 struct buffer_head *bh)
492{
493 __ocfs2_init_extent_tree(et, ci, bh, ocfs2_journal_access_rb,
494 NULL, &ocfs2_refcount_tree_et_ops);
495}
496
399static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et, 497static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et,
400 u64 new_last_eb_blk) 498 u64 new_last_eb_blk)
401{ 499{
@@ -407,78 +505,71 @@ static inline u64 ocfs2_et_get_last_eb_blk(struct ocfs2_extent_tree *et)
407 return et->et_ops->eo_get_last_eb_blk(et); 505 return et->et_ops->eo_get_last_eb_blk(et);
408} 506}
409 507
410static inline void ocfs2_et_update_clusters(struct inode *inode, 508static inline void ocfs2_et_update_clusters(struct ocfs2_extent_tree *et,
411 struct ocfs2_extent_tree *et,
412 u32 clusters) 509 u32 clusters)
413{ 510{
414 et->et_ops->eo_update_clusters(inode, et, clusters); 511 et->et_ops->eo_update_clusters(et, clusters);
512}
513
514static inline void ocfs2_et_extent_map_insert(struct ocfs2_extent_tree *et,
515 struct ocfs2_extent_rec *rec)
516{
517 if (et->et_ops->eo_extent_map_insert)
518 et->et_ops->eo_extent_map_insert(et, rec);
519}
520
521static inline void ocfs2_et_extent_map_truncate(struct ocfs2_extent_tree *et,
522 u32 clusters)
523{
524 if (et->et_ops->eo_extent_map_truncate)
525 et->et_ops->eo_extent_map_truncate(et, clusters);
415} 526}
416 527
417static inline int ocfs2_et_root_journal_access(handle_t *handle, 528static inline int ocfs2_et_root_journal_access(handle_t *handle,
418 struct inode *inode,
419 struct ocfs2_extent_tree *et, 529 struct ocfs2_extent_tree *et,
420 int type) 530 int type)
421{ 531{
422 return et->et_root_journal_access(handle, inode, et->et_root_bh, 532 return et->et_root_journal_access(handle, et->et_ci, et->et_root_bh,
423 type); 533 type);
424} 534}
425 535
426static inline int ocfs2_et_insert_check(struct inode *inode, 536static inline enum ocfs2_contig_type
427 struct ocfs2_extent_tree *et, 537 ocfs2_et_extent_contig(struct ocfs2_extent_tree *et,
538 struct ocfs2_extent_rec *rec,
539 struct ocfs2_extent_rec *insert_rec)
540{
541 if (et->et_ops->eo_extent_contig)
542 return et->et_ops->eo_extent_contig(et, rec, insert_rec);
543
544 return ocfs2_extent_rec_contig(
545 ocfs2_metadata_cache_get_super(et->et_ci),
546 rec, insert_rec);
547}
548
549static inline int ocfs2_et_insert_check(struct ocfs2_extent_tree *et,
428 struct ocfs2_extent_rec *rec) 550 struct ocfs2_extent_rec *rec)
429{ 551{
430 int ret = 0; 552 int ret = 0;
431 553
432 if (et->et_ops->eo_insert_check) 554 if (et->et_ops->eo_insert_check)
433 ret = et->et_ops->eo_insert_check(inode, et, rec); 555 ret = et->et_ops->eo_insert_check(et, rec);
434 return ret; 556 return ret;
435} 557}
436 558
437static inline int ocfs2_et_sanity_check(struct inode *inode, 559static inline int ocfs2_et_sanity_check(struct ocfs2_extent_tree *et)
438 struct ocfs2_extent_tree *et)
439{ 560{
440 int ret = 0; 561 int ret = 0;
441 562
442 if (et->et_ops->eo_sanity_check) 563 if (et->et_ops->eo_sanity_check)
443 ret = et->et_ops->eo_sanity_check(inode, et); 564 ret = et->et_ops->eo_sanity_check(et);
444 return ret; 565 return ret;
445} 566}
446 567
447static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); 568static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc);
448static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, 569static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt,
449 struct ocfs2_extent_block *eb); 570 struct ocfs2_extent_block *eb);
450 571static void ocfs2_adjust_rightmost_records(handle_t *handle,
451/* 572 struct ocfs2_extent_tree *et,
452 * Structures which describe a path through a btree, and functions to
453 * manipulate them.
454 *
455 * The idea here is to be as generic as possible with the tree
456 * manipulation code.
457 */
458struct ocfs2_path_item {
459 struct buffer_head *bh;
460 struct ocfs2_extent_list *el;
461};
462
463#define OCFS2_MAX_PATH_DEPTH 5
464
465struct ocfs2_path {
466 int p_tree_depth;
467 ocfs2_journal_access_func p_root_access;
468 struct ocfs2_path_item p_node[OCFS2_MAX_PATH_DEPTH];
469};
470
471#define path_root_bh(_path) ((_path)->p_node[0].bh)
472#define path_root_el(_path) ((_path)->p_node[0].el)
473#define path_root_access(_path)((_path)->p_root_access)
474#define path_leaf_bh(_path) ((_path)->p_node[(_path)->p_tree_depth].bh)
475#define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el)
476#define path_num_items(_path) ((_path)->p_tree_depth + 1)
477
478static int ocfs2_find_path(struct inode *inode, struct ocfs2_path *path,
479 u32 cpos);
480static void ocfs2_adjust_rightmost_records(struct inode *inode,
481 handle_t *handle,
482 struct ocfs2_path *path, 573 struct ocfs2_path *path,
483 struct ocfs2_extent_rec *insert_rec); 574 struct ocfs2_extent_rec *insert_rec);
484/* 575/*
@@ -486,7 +577,7 @@ static void ocfs2_adjust_rightmost_records(struct inode *inode,
486 * to build another path. Generally, this involves freeing the buffer 577 * to build another path. Generally, this involves freeing the buffer
487 * heads. 578 * heads.
488 */ 579 */
489static void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root) 580void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root)
490{ 581{
491 int i, start = 0, depth = 0; 582 int i, start = 0, depth = 0;
492 struct ocfs2_path_item *node; 583 struct ocfs2_path_item *node;
@@ -515,7 +606,7 @@ static void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root)
515 path->p_tree_depth = depth; 606 path->p_tree_depth = depth;
516} 607}
517 608
518static void ocfs2_free_path(struct ocfs2_path *path) 609void ocfs2_free_path(struct ocfs2_path *path)
519{ 610{
520 if (path) { 611 if (path) {
521 ocfs2_reinit_path(path, 0); 612 ocfs2_reinit_path(path, 0);
@@ -613,13 +704,13 @@ static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh,
613 return path; 704 return path;
614} 705}
615 706
616static struct ocfs2_path *ocfs2_new_path_from_path(struct ocfs2_path *path) 707struct ocfs2_path *ocfs2_new_path_from_path(struct ocfs2_path *path)
617{ 708{
618 return ocfs2_new_path(path_root_bh(path), path_root_el(path), 709 return ocfs2_new_path(path_root_bh(path), path_root_el(path),
619 path_root_access(path)); 710 path_root_access(path));
620} 711}
621 712
622static struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et) 713struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et)
623{ 714{
624 return ocfs2_new_path(et->et_root_bh, et->et_root_el, 715 return ocfs2_new_path(et->et_root_bh, et->et_root_el,
625 et->et_root_journal_access); 716 et->et_root_journal_access);
@@ -632,10 +723,10 @@ static struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et)
632 * I don't like the way this function's name looks next to 723 * I don't like the way this function's name looks next to
633 * ocfs2_journal_access_path(), but I don't have a better one. 724 * ocfs2_journal_access_path(), but I don't have a better one.
634 */ 725 */
635static int ocfs2_path_bh_journal_access(handle_t *handle, 726int ocfs2_path_bh_journal_access(handle_t *handle,
636 struct inode *inode, 727 struct ocfs2_caching_info *ci,
637 struct ocfs2_path *path, 728 struct ocfs2_path *path,
638 int idx) 729 int idx)
639{ 730{
640 ocfs2_journal_access_func access = path_root_access(path); 731 ocfs2_journal_access_func access = path_root_access(path);
641 732
@@ -645,15 +736,16 @@ static int ocfs2_path_bh_journal_access(handle_t *handle,
645 if (idx) 736 if (idx)
646 access = ocfs2_journal_access_eb; 737 access = ocfs2_journal_access_eb;
647 738
648 return access(handle, inode, path->p_node[idx].bh, 739 return access(handle, ci, path->p_node[idx].bh,
649 OCFS2_JOURNAL_ACCESS_WRITE); 740 OCFS2_JOURNAL_ACCESS_WRITE);
650} 741}
651 742
652/* 743/*
653 * Convenience function to journal all components in a path. 744 * Convenience function to journal all components in a path.
654 */ 745 */
655static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle, 746int ocfs2_journal_access_path(struct ocfs2_caching_info *ci,
656 struct ocfs2_path *path) 747 handle_t *handle,
748 struct ocfs2_path *path)
657{ 749{
658 int i, ret = 0; 750 int i, ret = 0;
659 751
@@ -661,7 +753,7 @@ static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle,
661 goto out; 753 goto out;
662 754
663 for(i = 0; i < path_num_items(path); i++) { 755 for(i = 0; i < path_num_items(path); i++) {
664 ret = ocfs2_path_bh_journal_access(handle, inode, path, i); 756 ret = ocfs2_path_bh_journal_access(handle, ci, path, i);
665 if (ret < 0) { 757 if (ret < 0) {
666 mlog_errno(ret); 758 mlog_errno(ret);
667 goto out; 759 goto out;
@@ -702,17 +794,9 @@ int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster)
702 return ret; 794 return ret;
703} 795}
704 796
705enum ocfs2_contig_type {
706 CONTIG_NONE = 0,
707 CONTIG_LEFT,
708 CONTIG_RIGHT,
709 CONTIG_LEFTRIGHT,
710};
711
712
713/* 797/*
714 * NOTE: ocfs2_block_extent_contig(), ocfs2_extents_adjacent() and 798 * NOTE: ocfs2_block_extent_contig(), ocfs2_extents_adjacent() and
715 * ocfs2_extent_contig only work properly against leaf nodes! 799 * ocfs2_extent_rec_contig only work properly against leaf nodes!
716 */ 800 */
717static int ocfs2_block_extent_contig(struct super_block *sb, 801static int ocfs2_block_extent_contig(struct super_block *sb,
718 struct ocfs2_extent_rec *ext, 802 struct ocfs2_extent_rec *ext,
@@ -738,9 +822,9 @@ static int ocfs2_extents_adjacent(struct ocfs2_extent_rec *left,
738} 822}
739 823
740static enum ocfs2_contig_type 824static enum ocfs2_contig_type
741 ocfs2_extent_contig(struct inode *inode, 825 ocfs2_extent_rec_contig(struct super_block *sb,
742 struct ocfs2_extent_rec *ext, 826 struct ocfs2_extent_rec *ext,
743 struct ocfs2_extent_rec *insert_rec) 827 struct ocfs2_extent_rec *insert_rec)
744{ 828{
745 u64 blkno = le64_to_cpu(insert_rec->e_blkno); 829 u64 blkno = le64_to_cpu(insert_rec->e_blkno);
746 830
@@ -753,12 +837,12 @@ static enum ocfs2_contig_type
753 return CONTIG_NONE; 837 return CONTIG_NONE;
754 838
755 if (ocfs2_extents_adjacent(ext, insert_rec) && 839 if (ocfs2_extents_adjacent(ext, insert_rec) &&
756 ocfs2_block_extent_contig(inode->i_sb, ext, blkno)) 840 ocfs2_block_extent_contig(sb, ext, blkno))
757 return CONTIG_RIGHT; 841 return CONTIG_RIGHT;
758 842
759 blkno = le64_to_cpu(ext->e_blkno); 843 blkno = le64_to_cpu(ext->e_blkno);
760 if (ocfs2_extents_adjacent(insert_rec, ext) && 844 if (ocfs2_extents_adjacent(insert_rec, ext) &&
761 ocfs2_block_extent_contig(inode->i_sb, insert_rec, blkno)) 845 ocfs2_block_extent_contig(sb, insert_rec, blkno))
762 return CONTIG_LEFT; 846 return CONTIG_LEFT;
763 847
764 return CONTIG_NONE; 848 return CONTIG_NONE;
@@ -853,13 +937,13 @@ static int ocfs2_validate_extent_block(struct super_block *sb,
853 return 0; 937 return 0;
854} 938}
855 939
856int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno, 940int ocfs2_read_extent_block(struct ocfs2_caching_info *ci, u64 eb_blkno,
857 struct buffer_head **bh) 941 struct buffer_head **bh)
858{ 942{
859 int rc; 943 int rc;
860 struct buffer_head *tmp = *bh; 944 struct buffer_head *tmp = *bh;
861 945
862 rc = ocfs2_read_block(inode, eb_blkno, &tmp, 946 rc = ocfs2_read_block(ci, eb_blkno, &tmp,
863 ocfs2_validate_extent_block); 947 ocfs2_validate_extent_block);
864 948
865 /* If ocfs2_read_block() got us a new bh, pass it up. */ 949 /* If ocfs2_read_block() got us a new bh, pass it up. */
@@ -874,7 +958,6 @@ int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno,
874 * How many free extents have we got before we need more meta data? 958 * How many free extents have we got before we need more meta data?
875 */ 959 */
876int ocfs2_num_free_extents(struct ocfs2_super *osb, 960int ocfs2_num_free_extents(struct ocfs2_super *osb,
877 struct inode *inode,
878 struct ocfs2_extent_tree *et) 961 struct ocfs2_extent_tree *et)
879{ 962{
880 int retval; 963 int retval;
@@ -889,7 +972,8 @@ int ocfs2_num_free_extents(struct ocfs2_super *osb,
889 last_eb_blk = ocfs2_et_get_last_eb_blk(et); 972 last_eb_blk = ocfs2_et_get_last_eb_blk(et);
890 973
891 if (last_eb_blk) { 974 if (last_eb_blk) {
892 retval = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh); 975 retval = ocfs2_read_extent_block(et->et_ci, last_eb_blk,
976 &eb_bh);
893 if (retval < 0) { 977 if (retval < 0) {
894 mlog_errno(retval); 978 mlog_errno(retval);
895 goto bail; 979 goto bail;
@@ -913,9 +997,8 @@ bail:
913 * sets h_signature, h_blkno, h_suballoc_bit, h_suballoc_slot, and 997 * sets h_signature, h_blkno, h_suballoc_bit, h_suballoc_slot, and
914 * l_count for you 998 * l_count for you
915 */ 999 */
916static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb, 1000static int ocfs2_create_new_meta_bhs(handle_t *handle,
917 handle_t *handle, 1001 struct ocfs2_extent_tree *et,
918 struct inode *inode,
919 int wanted, 1002 int wanted,
920 struct ocfs2_alloc_context *meta_ac, 1003 struct ocfs2_alloc_context *meta_ac,
921 struct buffer_head *bhs[]) 1004 struct buffer_head *bhs[])
@@ -924,6 +1007,8 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
924 u16 suballoc_bit_start; 1007 u16 suballoc_bit_start;
925 u32 num_got; 1008 u32 num_got;
926 u64 first_blkno; 1009 u64 first_blkno;
1010 struct ocfs2_super *osb =
1011 OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci));
927 struct ocfs2_extent_block *eb; 1012 struct ocfs2_extent_block *eb;
928 1013
929 mlog_entry_void(); 1014 mlog_entry_void();
@@ -949,9 +1034,10 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb,
949 mlog_errno(status); 1034 mlog_errno(status);
950 goto bail; 1035 goto bail;
951 } 1036 }
952 ocfs2_set_new_buffer_uptodate(inode, bhs[i]); 1037 ocfs2_set_new_buffer_uptodate(et->et_ci, bhs[i]);
953 1038
954 status = ocfs2_journal_access_eb(handle, inode, bhs[i], 1039 status = ocfs2_journal_access_eb(handle, et->et_ci,
1040 bhs[i],
955 OCFS2_JOURNAL_ACCESS_CREATE); 1041 OCFS2_JOURNAL_ACCESS_CREATE);
956 if (status < 0) { 1042 if (status < 0) {
957 mlog_errno(status); 1043 mlog_errno(status);
@@ -1023,7 +1109,6 @@ static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list *el)
1023 * extent block's rightmost record. 1109 * extent block's rightmost record.
1024 */ 1110 */
1025static int ocfs2_adjust_rightmost_branch(handle_t *handle, 1111static int ocfs2_adjust_rightmost_branch(handle_t *handle,
1026 struct inode *inode,
1027 struct ocfs2_extent_tree *et) 1112 struct ocfs2_extent_tree *et)
1028{ 1113{
1029 int status; 1114 int status;
@@ -1037,7 +1122,7 @@ static int ocfs2_adjust_rightmost_branch(handle_t *handle,
1037 return status; 1122 return status;
1038 } 1123 }
1039 1124
1040 status = ocfs2_find_path(inode, path, UINT_MAX); 1125 status = ocfs2_find_path(et->et_ci, path, UINT_MAX);
1041 if (status < 0) { 1126 if (status < 0) {
1042 mlog_errno(status); 1127 mlog_errno(status);
1043 goto out; 1128 goto out;
@@ -1050,7 +1135,7 @@ static int ocfs2_adjust_rightmost_branch(handle_t *handle,
1050 goto out; 1135 goto out;
1051 } 1136 }
1052 1137
1053 status = ocfs2_journal_access_path(inode, handle, path); 1138 status = ocfs2_journal_access_path(et->et_ci, handle, path);
1054 if (status < 0) { 1139 if (status < 0) {
1055 mlog_errno(status); 1140 mlog_errno(status);
1056 goto out; 1141 goto out;
@@ -1059,7 +1144,7 @@ static int ocfs2_adjust_rightmost_branch(handle_t *handle,
1059 el = path_leaf_el(path); 1144 el = path_leaf_el(path);
1060 rec = &el->l_recs[le32_to_cpu(el->l_next_free_rec) - 1]; 1145 rec = &el->l_recs[le32_to_cpu(el->l_next_free_rec) - 1];
1061 1146
1062 ocfs2_adjust_rightmost_records(inode, handle, path, rec); 1147 ocfs2_adjust_rightmost_records(handle, et, path, rec);
1063 1148
1064out: 1149out:
1065 ocfs2_free_path(path); 1150 ocfs2_free_path(path);
@@ -1068,7 +1153,7 @@ out:
1068 1153
1069/* 1154/*
1070 * Add an entire tree branch to our inode. eb_bh is the extent block 1155 * Add an entire tree branch to our inode. eb_bh is the extent block
1071 * to start at, if we don't want to start the branch at the dinode 1156 * to start at, if we don't want to start the branch at the root
1072 * structure. 1157 * structure.
1073 * 1158 *
1074 * last_eb_bh is required as we have to update it's next_leaf pointer 1159 * last_eb_bh is required as we have to update it's next_leaf pointer
@@ -1077,9 +1162,7 @@ out:
1077 * the new branch will be 'empty' in the sense that every block will 1162 * the new branch will be 'empty' in the sense that every block will
1078 * contain a single record with cluster count == 0. 1163 * contain a single record with cluster count == 0.
1079 */ 1164 */
1080static int ocfs2_add_branch(struct ocfs2_super *osb, 1165static int ocfs2_add_branch(handle_t *handle,
1081 handle_t *handle,
1082 struct inode *inode,
1083 struct ocfs2_extent_tree *et, 1166 struct ocfs2_extent_tree *et,
1084 struct buffer_head *eb_bh, 1167 struct buffer_head *eb_bh,
1085 struct buffer_head **last_eb_bh, 1168 struct buffer_head **last_eb_bh,
@@ -1123,7 +1206,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
1123 if (root_end > new_cpos) { 1206 if (root_end > new_cpos) {
1124 mlog(0, "adjust the cluster end from %u to %u\n", 1207 mlog(0, "adjust the cluster end from %u to %u\n",
1125 root_end, new_cpos); 1208 root_end, new_cpos);
1126 status = ocfs2_adjust_rightmost_branch(handle, inode, et); 1209 status = ocfs2_adjust_rightmost_branch(handle, et);
1127 if (status) { 1210 if (status) {
1128 mlog_errno(status); 1211 mlog_errno(status);
1129 goto bail; 1212 goto bail;
@@ -1139,7 +1222,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
1139 goto bail; 1222 goto bail;
1140 } 1223 }
1141 1224
1142 status = ocfs2_create_new_meta_bhs(osb, handle, inode, new_blocks, 1225 status = ocfs2_create_new_meta_bhs(handle, et, new_blocks,
1143 meta_ac, new_eb_bhs); 1226 meta_ac, new_eb_bhs);
1144 if (status < 0) { 1227 if (status < 0) {
1145 mlog_errno(status); 1228 mlog_errno(status);
@@ -1161,7 +1244,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
1161 BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb)); 1244 BUG_ON(!OCFS2_IS_VALID_EXTENT_BLOCK(eb));
1162 eb_el = &eb->h_list; 1245 eb_el = &eb->h_list;
1163 1246
1164 status = ocfs2_journal_access_eb(handle, inode, bh, 1247 status = ocfs2_journal_access_eb(handle, et->et_ci, bh,
1165 OCFS2_JOURNAL_ACCESS_CREATE); 1248 OCFS2_JOURNAL_ACCESS_CREATE);
1166 if (status < 0) { 1249 if (status < 0) {
1167 mlog_errno(status); 1250 mlog_errno(status);
@@ -1201,20 +1284,20 @@ static int ocfs2_add_branch(struct ocfs2_super *osb,
1201 * journal_dirty erroring as it won't unless we've aborted the 1284 * journal_dirty erroring as it won't unless we've aborted the
1202 * handle (in which case we would never be here) so reserving 1285 * handle (in which case we would never be here) so reserving
1203 * the write with journal_access is all we need to do. */ 1286 * the write with journal_access is all we need to do. */
1204 status = ocfs2_journal_access_eb(handle, inode, *last_eb_bh, 1287 status = ocfs2_journal_access_eb(handle, et->et_ci, *last_eb_bh,
1205 OCFS2_JOURNAL_ACCESS_WRITE); 1288 OCFS2_JOURNAL_ACCESS_WRITE);
1206 if (status < 0) { 1289 if (status < 0) {
1207 mlog_errno(status); 1290 mlog_errno(status);
1208 goto bail; 1291 goto bail;
1209 } 1292 }
1210 status = ocfs2_et_root_journal_access(handle, inode, et, 1293 status = ocfs2_et_root_journal_access(handle, et,
1211 OCFS2_JOURNAL_ACCESS_WRITE); 1294 OCFS2_JOURNAL_ACCESS_WRITE);
1212 if (status < 0) { 1295 if (status < 0) {
1213 mlog_errno(status); 1296 mlog_errno(status);
1214 goto bail; 1297 goto bail;
1215 } 1298 }
1216 if (eb_bh) { 1299 if (eb_bh) {
1217 status = ocfs2_journal_access_eb(handle, inode, eb_bh, 1300 status = ocfs2_journal_access_eb(handle, et->et_ci, eb_bh,
1218 OCFS2_JOURNAL_ACCESS_WRITE); 1301 OCFS2_JOURNAL_ACCESS_WRITE);
1219 if (status < 0) { 1302 if (status < 0) {
1220 mlog_errno(status); 1303 mlog_errno(status);
@@ -1274,9 +1357,7 @@ bail:
1274 * returns back the new extent block so you can add a branch to it 1357 * returns back the new extent block so you can add a branch to it
1275 * after this call. 1358 * after this call.
1276 */ 1359 */
1277static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, 1360static int ocfs2_shift_tree_depth(handle_t *handle,
1278 handle_t *handle,
1279 struct inode *inode,
1280 struct ocfs2_extent_tree *et, 1361 struct ocfs2_extent_tree *et,
1281 struct ocfs2_alloc_context *meta_ac, 1362 struct ocfs2_alloc_context *meta_ac,
1282 struct buffer_head **ret_new_eb_bh) 1363 struct buffer_head **ret_new_eb_bh)
@@ -1290,7 +1371,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
1290 1371
1291 mlog_entry_void(); 1372 mlog_entry_void();
1292 1373
1293 status = ocfs2_create_new_meta_bhs(osb, handle, inode, 1, meta_ac, 1374 status = ocfs2_create_new_meta_bhs(handle, et, 1, meta_ac,
1294 &new_eb_bh); 1375 &new_eb_bh);
1295 if (status < 0) { 1376 if (status < 0) {
1296 mlog_errno(status); 1377 mlog_errno(status);
@@ -1304,7 +1385,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
1304 eb_el = &eb->h_list; 1385 eb_el = &eb->h_list;
1305 root_el = et->et_root_el; 1386 root_el = et->et_root_el;
1306 1387
1307 status = ocfs2_journal_access_eb(handle, inode, new_eb_bh, 1388 status = ocfs2_journal_access_eb(handle, et->et_ci, new_eb_bh,
1308 OCFS2_JOURNAL_ACCESS_CREATE); 1389 OCFS2_JOURNAL_ACCESS_CREATE);
1309 if (status < 0) { 1390 if (status < 0) {
1310 mlog_errno(status); 1391 mlog_errno(status);
@@ -1323,7 +1404,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb,
1323 goto bail; 1404 goto bail;
1324 } 1405 }
1325 1406
1326 status = ocfs2_et_root_journal_access(handle, inode, et, 1407 status = ocfs2_et_root_journal_access(handle, et,
1327 OCFS2_JOURNAL_ACCESS_WRITE); 1408 OCFS2_JOURNAL_ACCESS_WRITE);
1328 if (status < 0) { 1409 if (status < 0) {
1329 mlog_errno(status); 1410 mlog_errno(status);
@@ -1379,9 +1460,7 @@ bail:
1379 * 1460 *
1380 * return status < 0 indicates an error. 1461 * return status < 0 indicates an error.
1381 */ 1462 */
1382static int ocfs2_find_branch_target(struct ocfs2_super *osb, 1463static int ocfs2_find_branch_target(struct ocfs2_extent_tree *et,
1383 struct inode *inode,
1384 struct ocfs2_extent_tree *et,
1385 struct buffer_head **target_bh) 1464 struct buffer_head **target_bh)
1386{ 1465{
1387 int status = 0, i; 1466 int status = 0, i;
@@ -1399,19 +1478,21 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
1399 1478
1400 while(le16_to_cpu(el->l_tree_depth) > 1) { 1479 while(le16_to_cpu(el->l_tree_depth) > 1) {
1401 if (le16_to_cpu(el->l_next_free_rec) == 0) { 1480 if (le16_to_cpu(el->l_next_free_rec) == 0) {
1402 ocfs2_error(inode->i_sb, "Dinode %llu has empty " 1481 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
1482 "Owner %llu has empty "
1403 "extent list (next_free_rec == 0)", 1483 "extent list (next_free_rec == 0)",
1404 (unsigned long long)OCFS2_I(inode)->ip_blkno); 1484 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci));
1405 status = -EIO; 1485 status = -EIO;
1406 goto bail; 1486 goto bail;
1407 } 1487 }
1408 i = le16_to_cpu(el->l_next_free_rec) - 1; 1488 i = le16_to_cpu(el->l_next_free_rec) - 1;
1409 blkno = le64_to_cpu(el->l_recs[i].e_blkno); 1489 blkno = le64_to_cpu(el->l_recs[i].e_blkno);
1410 if (!blkno) { 1490 if (!blkno) {
1411 ocfs2_error(inode->i_sb, "Dinode %llu has extent " 1491 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
1492 "Owner %llu has extent "
1412 "list where extent # %d has no physical " 1493 "list where extent # %d has no physical "
1413 "block start", 1494 "block start",
1414 (unsigned long long)OCFS2_I(inode)->ip_blkno, i); 1495 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci), i);
1415 status = -EIO; 1496 status = -EIO;
1416 goto bail; 1497 goto bail;
1417 } 1498 }
@@ -1419,7 +1500,7 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb,
1419 brelse(bh); 1500 brelse(bh);
1420 bh = NULL; 1501 bh = NULL;
1421 1502
1422 status = ocfs2_read_extent_block(inode, blkno, &bh); 1503 status = ocfs2_read_extent_block(et->et_ci, blkno, &bh);
1423 if (status < 0) { 1504 if (status < 0) {
1424 mlog_errno(status); 1505 mlog_errno(status);
1425 goto bail; 1506 goto bail;
@@ -1460,20 +1541,18 @@ bail:
1460 * 1541 *
1461 * *last_eb_bh will be updated by ocfs2_add_branch(). 1542 * *last_eb_bh will be updated by ocfs2_add_branch().
1462 */ 1543 */
1463static int ocfs2_grow_tree(struct inode *inode, handle_t *handle, 1544static int ocfs2_grow_tree(handle_t *handle, struct ocfs2_extent_tree *et,
1464 struct ocfs2_extent_tree *et, int *final_depth, 1545 int *final_depth, struct buffer_head **last_eb_bh,
1465 struct buffer_head **last_eb_bh,
1466 struct ocfs2_alloc_context *meta_ac) 1546 struct ocfs2_alloc_context *meta_ac)
1467{ 1547{
1468 int ret, shift; 1548 int ret, shift;
1469 struct ocfs2_extent_list *el = et->et_root_el; 1549 struct ocfs2_extent_list *el = et->et_root_el;
1470 int depth = le16_to_cpu(el->l_tree_depth); 1550 int depth = le16_to_cpu(el->l_tree_depth);
1471 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1472 struct buffer_head *bh = NULL; 1551 struct buffer_head *bh = NULL;
1473 1552
1474 BUG_ON(meta_ac == NULL); 1553 BUG_ON(meta_ac == NULL);
1475 1554
1476 shift = ocfs2_find_branch_target(osb, inode, et, &bh); 1555 shift = ocfs2_find_branch_target(et, &bh);
1477 if (shift < 0) { 1556 if (shift < 0) {
1478 ret = shift; 1557 ret = shift;
1479 mlog_errno(ret); 1558 mlog_errno(ret);
@@ -1490,8 +1569,7 @@ static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
1490 /* ocfs2_shift_tree_depth will return us a buffer with 1569 /* ocfs2_shift_tree_depth will return us a buffer with
1491 * the new extent block (so we can pass that to 1570 * the new extent block (so we can pass that to
1492 * ocfs2_add_branch). */ 1571 * ocfs2_add_branch). */
1493 ret = ocfs2_shift_tree_depth(osb, handle, inode, et, 1572 ret = ocfs2_shift_tree_depth(handle, et, meta_ac, &bh);
1494 meta_ac, &bh);
1495 if (ret < 0) { 1573 if (ret < 0) {
1496 mlog_errno(ret); 1574 mlog_errno(ret);
1497 goto out; 1575 goto out;
@@ -1517,7 +1595,7 @@ static int ocfs2_grow_tree(struct inode *inode, handle_t *handle,
1517 /* call ocfs2_add_branch to add the final part of the tree with 1595 /* call ocfs2_add_branch to add the final part of the tree with
1518 * the new data. */ 1596 * the new data. */
1519 mlog(0, "add branch. bh = %p\n", bh); 1597 mlog(0, "add branch. bh = %p\n", bh);
1520 ret = ocfs2_add_branch(osb, handle, inode, et, bh, last_eb_bh, 1598 ret = ocfs2_add_branch(handle, et, bh, last_eb_bh,
1521 meta_ac); 1599 meta_ac);
1522 if (ret < 0) { 1600 if (ret < 0) {
1523 mlog_errno(ret); 1601 mlog_errno(ret);
@@ -1687,7 +1765,7 @@ set_and_inc:
1687 * 1765 *
1688 * The array index of the subtree root is passed back. 1766 * The array index of the subtree root is passed back.
1689 */ 1767 */
1690static int ocfs2_find_subtree_root(struct inode *inode, 1768static int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
1691 struct ocfs2_path *left, 1769 struct ocfs2_path *left,
1692 struct ocfs2_path *right) 1770 struct ocfs2_path *right)
1693{ 1771{
@@ -1705,10 +1783,10 @@ static int ocfs2_find_subtree_root(struct inode *inode,
1705 * The caller didn't pass two adjacent paths. 1783 * The caller didn't pass two adjacent paths.
1706 */ 1784 */
1707 mlog_bug_on_msg(i > left->p_tree_depth, 1785 mlog_bug_on_msg(i > left->p_tree_depth,
1708 "Inode %lu, left depth %u, right depth %u\n" 1786 "Owner %llu, left depth %u, right depth %u\n"
1709 "left leaf blk %llu, right leaf blk %llu\n", 1787 "left leaf blk %llu, right leaf blk %llu\n",
1710 inode->i_ino, left->p_tree_depth, 1788 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
1711 right->p_tree_depth, 1789 left->p_tree_depth, right->p_tree_depth,
1712 (unsigned long long)path_leaf_bh(left)->b_blocknr, 1790 (unsigned long long)path_leaf_bh(left)->b_blocknr,
1713 (unsigned long long)path_leaf_bh(right)->b_blocknr); 1791 (unsigned long long)path_leaf_bh(right)->b_blocknr);
1714 } while (left->p_node[i].bh->b_blocknr == 1792 } while (left->p_node[i].bh->b_blocknr ==
@@ -1725,7 +1803,7 @@ typedef void (path_insert_t)(void *, struct buffer_head *);
1725 * This code can be called with a cpos larger than the tree, in which 1803 * This code can be called with a cpos larger than the tree, in which
1726 * case it will return the rightmost path. 1804 * case it will return the rightmost path.
1727 */ 1805 */
1728static int __ocfs2_find_path(struct inode *inode, 1806static int __ocfs2_find_path(struct ocfs2_caching_info *ci,
1729 struct ocfs2_extent_list *root_el, u32 cpos, 1807 struct ocfs2_extent_list *root_el, u32 cpos,
1730 path_insert_t *func, void *data) 1808 path_insert_t *func, void *data)
1731{ 1809{
@@ -1736,15 +1814,14 @@ static int __ocfs2_find_path(struct inode *inode,
1736 struct ocfs2_extent_block *eb; 1814 struct ocfs2_extent_block *eb;
1737 struct ocfs2_extent_list *el; 1815 struct ocfs2_extent_list *el;
1738 struct ocfs2_extent_rec *rec; 1816 struct ocfs2_extent_rec *rec;
1739 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1740 1817
1741 el = root_el; 1818 el = root_el;
1742 while (el->l_tree_depth) { 1819 while (el->l_tree_depth) {
1743 if (le16_to_cpu(el->l_next_free_rec) == 0) { 1820 if (le16_to_cpu(el->l_next_free_rec) == 0) {
1744 ocfs2_error(inode->i_sb, 1821 ocfs2_error(ocfs2_metadata_cache_get_super(ci),
1745 "Inode %llu has empty extent list at " 1822 "Owner %llu has empty extent list at "
1746 "depth %u\n", 1823 "depth %u\n",
1747 (unsigned long long)oi->ip_blkno, 1824 (unsigned long long)ocfs2_metadata_cache_owner(ci),
1748 le16_to_cpu(el->l_tree_depth)); 1825 le16_to_cpu(el->l_tree_depth));
1749 ret = -EROFS; 1826 ret = -EROFS;
1750 goto out; 1827 goto out;
@@ -1767,10 +1844,10 @@ static int __ocfs2_find_path(struct inode *inode,
1767 1844
1768 blkno = le64_to_cpu(el->l_recs[i].e_blkno); 1845 blkno = le64_to_cpu(el->l_recs[i].e_blkno);
1769 if (blkno == 0) { 1846 if (blkno == 0) {
1770 ocfs2_error(inode->i_sb, 1847 ocfs2_error(ocfs2_metadata_cache_get_super(ci),
1771 "Inode %llu has bad blkno in extent list " 1848 "Owner %llu has bad blkno in extent list "
1772 "at depth %u (index %d)\n", 1849 "at depth %u (index %d)\n",
1773 (unsigned long long)oi->ip_blkno, 1850 (unsigned long long)ocfs2_metadata_cache_owner(ci),
1774 le16_to_cpu(el->l_tree_depth), i); 1851 le16_to_cpu(el->l_tree_depth), i);
1775 ret = -EROFS; 1852 ret = -EROFS;
1776 goto out; 1853 goto out;
@@ -1778,7 +1855,7 @@ static int __ocfs2_find_path(struct inode *inode,
1778 1855
1779 brelse(bh); 1856 brelse(bh);
1780 bh = NULL; 1857 bh = NULL;
1781 ret = ocfs2_read_extent_block(inode, blkno, &bh); 1858 ret = ocfs2_read_extent_block(ci, blkno, &bh);
1782 if (ret) { 1859 if (ret) {
1783 mlog_errno(ret); 1860 mlog_errno(ret);
1784 goto out; 1861 goto out;
@@ -1789,10 +1866,10 @@ static int __ocfs2_find_path(struct inode *inode,
1789 1866
1790 if (le16_to_cpu(el->l_next_free_rec) > 1867 if (le16_to_cpu(el->l_next_free_rec) >
1791 le16_to_cpu(el->l_count)) { 1868 le16_to_cpu(el->l_count)) {
1792 ocfs2_error(inode->i_sb, 1869 ocfs2_error(ocfs2_metadata_cache_get_super(ci),
1793 "Inode %llu has bad count in extent list " 1870 "Owner %llu has bad count in extent list "
1794 "at block %llu (next free=%u, count=%u)\n", 1871 "at block %llu (next free=%u, count=%u)\n",
1795 (unsigned long long)oi->ip_blkno, 1872 (unsigned long long)ocfs2_metadata_cache_owner(ci),
1796 (unsigned long long)bh->b_blocknr, 1873 (unsigned long long)bh->b_blocknr,
1797 le16_to_cpu(el->l_next_free_rec), 1874 le16_to_cpu(el->l_next_free_rec),
1798 le16_to_cpu(el->l_count)); 1875 le16_to_cpu(el->l_count));
@@ -1836,14 +1913,14 @@ static void find_path_ins(void *data, struct buffer_head *bh)
1836 ocfs2_path_insert_eb(fp->path, fp->index, bh); 1913 ocfs2_path_insert_eb(fp->path, fp->index, bh);
1837 fp->index++; 1914 fp->index++;
1838} 1915}
1839static int ocfs2_find_path(struct inode *inode, struct ocfs2_path *path, 1916int ocfs2_find_path(struct ocfs2_caching_info *ci,
1840 u32 cpos) 1917 struct ocfs2_path *path, u32 cpos)
1841{ 1918{
1842 struct find_path_data data; 1919 struct find_path_data data;
1843 1920
1844 data.index = 1; 1921 data.index = 1;
1845 data.path = path; 1922 data.path = path;
1846 return __ocfs2_find_path(inode, path_root_el(path), cpos, 1923 return __ocfs2_find_path(ci, path_root_el(path), cpos,
1847 find_path_ins, &data); 1924 find_path_ins, &data);
1848} 1925}
1849 1926
@@ -1868,13 +1945,14 @@ static void find_leaf_ins(void *data, struct buffer_head *bh)
1868 * 1945 *
1869 * This function doesn't handle non btree extent lists. 1946 * This function doesn't handle non btree extent lists.
1870 */ 1947 */
1871int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el, 1948int ocfs2_find_leaf(struct ocfs2_caching_info *ci,
1872 u32 cpos, struct buffer_head **leaf_bh) 1949 struct ocfs2_extent_list *root_el, u32 cpos,
1950 struct buffer_head **leaf_bh)
1873{ 1951{
1874 int ret; 1952 int ret;
1875 struct buffer_head *bh = NULL; 1953 struct buffer_head *bh = NULL;
1876 1954
1877 ret = __ocfs2_find_path(inode, root_el, cpos, find_leaf_ins, &bh); 1955 ret = __ocfs2_find_path(ci, root_el, cpos, find_leaf_ins, &bh);
1878 if (ret) { 1956 if (ret) {
1879 mlog_errno(ret); 1957 mlog_errno(ret);
1880 goto out; 1958 goto out;
@@ -1980,7 +2058,7 @@ static void ocfs2_adjust_root_records(struct ocfs2_extent_list *root_el,
1980 * - When we've adjusted the last extent record in the left path leaf and the 2058 * - When we've adjusted the last extent record in the left path leaf and the
1981 * 1st extent record in the right path leaf during cross extent block merge. 2059 * 1st extent record in the right path leaf during cross extent block merge.
1982 */ 2060 */
1983static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle, 2061static void ocfs2_complete_edge_insert(handle_t *handle,
1984 struct ocfs2_path *left_path, 2062 struct ocfs2_path *left_path,
1985 struct ocfs2_path *right_path, 2063 struct ocfs2_path *right_path,
1986 int subtree_index) 2064 int subtree_index)
@@ -2058,8 +2136,8 @@ static void ocfs2_complete_edge_insert(struct inode *inode, handle_t *handle,
2058 mlog_errno(ret); 2136 mlog_errno(ret);
2059} 2137}
2060 2138
2061static int ocfs2_rotate_subtree_right(struct inode *inode, 2139static int ocfs2_rotate_subtree_right(handle_t *handle,
2062 handle_t *handle, 2140 struct ocfs2_extent_tree *et,
2063 struct ocfs2_path *left_path, 2141 struct ocfs2_path *left_path,
2064 struct ocfs2_path *right_path, 2142 struct ocfs2_path *right_path,
2065 int subtree_index) 2143 int subtree_index)
@@ -2075,10 +2153,10 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
2075 left_el = path_leaf_el(left_path); 2153 left_el = path_leaf_el(left_path);
2076 2154
2077 if (left_el->l_next_free_rec != left_el->l_count) { 2155 if (left_el->l_next_free_rec != left_el->l_count) {
2078 ocfs2_error(inode->i_sb, 2156 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
2079 "Inode %llu has non-full interior leaf node %llu" 2157 "Inode %llu has non-full interior leaf node %llu"
2080 "(next free = %u)", 2158 "(next free = %u)",
2081 (unsigned long long)OCFS2_I(inode)->ip_blkno, 2159 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
2082 (unsigned long long)left_leaf_bh->b_blocknr, 2160 (unsigned long long)left_leaf_bh->b_blocknr,
2083 le16_to_cpu(left_el->l_next_free_rec)); 2161 le16_to_cpu(left_el->l_next_free_rec));
2084 return -EROFS; 2162 return -EROFS;
@@ -2094,7 +2172,7 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
2094 root_bh = left_path->p_node[subtree_index].bh; 2172 root_bh = left_path->p_node[subtree_index].bh;
2095 BUG_ON(root_bh != right_path->p_node[subtree_index].bh); 2173 BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
2096 2174
2097 ret = ocfs2_path_bh_journal_access(handle, inode, right_path, 2175 ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
2098 subtree_index); 2176 subtree_index);
2099 if (ret) { 2177 if (ret) {
2100 mlog_errno(ret); 2178 mlog_errno(ret);
@@ -2102,14 +2180,14 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
2102 } 2180 }
2103 2181
2104 for(i = subtree_index + 1; i < path_num_items(right_path); i++) { 2182 for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
2105 ret = ocfs2_path_bh_journal_access(handle, inode, 2183 ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
2106 right_path, i); 2184 right_path, i);
2107 if (ret) { 2185 if (ret) {
2108 mlog_errno(ret); 2186 mlog_errno(ret);
2109 goto out; 2187 goto out;
2110 } 2188 }
2111 2189
2112 ret = ocfs2_path_bh_journal_access(handle, inode, 2190 ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
2113 left_path, i); 2191 left_path, i);
2114 if (ret) { 2192 if (ret) {
2115 mlog_errno(ret); 2193 mlog_errno(ret);
@@ -2123,7 +2201,7 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
2123 /* This is a code error, not a disk corruption. */ 2201 /* This is a code error, not a disk corruption. */
2124 mlog_bug_on_msg(!right_el->l_next_free_rec, "Inode %llu: Rotate fails " 2202 mlog_bug_on_msg(!right_el->l_next_free_rec, "Inode %llu: Rotate fails "
2125 "because rightmost leaf block %llu is empty\n", 2203 "because rightmost leaf block %llu is empty\n",
2126 (unsigned long long)OCFS2_I(inode)->ip_blkno, 2204 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
2127 (unsigned long long)right_leaf_bh->b_blocknr); 2205 (unsigned long long)right_leaf_bh->b_blocknr);
2128 2206
2129 ocfs2_create_empty_extent(right_el); 2207 ocfs2_create_empty_extent(right_el);
@@ -2157,8 +2235,8 @@ static int ocfs2_rotate_subtree_right(struct inode *inode,
2157 goto out; 2235 goto out;
2158 } 2236 }
2159 2237
2160 ocfs2_complete_edge_insert(inode, handle, left_path, right_path, 2238 ocfs2_complete_edge_insert(handle, left_path, right_path,
2161 subtree_index); 2239 subtree_index);
2162 2240
2163out: 2241out:
2164 return ret; 2242 return ret;
@@ -2248,10 +2326,18 @@ static int ocfs2_extend_rotate_transaction(handle_t *handle, int subtree_depth,
2248 int op_credits, 2326 int op_credits,
2249 struct ocfs2_path *path) 2327 struct ocfs2_path *path)
2250{ 2328{
2329 int ret;
2251 int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits; 2330 int credits = (path->p_tree_depth - subtree_depth) * 2 + 1 + op_credits;
2252 2331
2253 if (handle->h_buffer_credits < credits) 2332 if (handle->h_buffer_credits < credits) {
2254 return ocfs2_extend_trans(handle, credits); 2333 ret = ocfs2_extend_trans(handle,
2334 credits - handle->h_buffer_credits);
2335 if (ret)
2336 return ret;
2337
2338 if (unlikely(handle->h_buffer_credits < credits))
2339 return ocfs2_extend_trans(handle, credits);
2340 }
2255 2341
2256 return 0; 2342 return 0;
2257} 2343}
@@ -2321,8 +2407,8 @@ static int ocfs2_leftmost_rec_contains(struct ocfs2_extent_list *el, u32 cpos)
2321 * *ret_left_path will contain a valid path which can be passed to 2407 * *ret_left_path will contain a valid path which can be passed to
2322 * ocfs2_insert_path(). 2408 * ocfs2_insert_path().
2323 */ 2409 */
2324static int ocfs2_rotate_tree_right(struct inode *inode, 2410static int ocfs2_rotate_tree_right(handle_t *handle,
2325 handle_t *handle, 2411 struct ocfs2_extent_tree *et,
2326 enum ocfs2_split_type split, 2412 enum ocfs2_split_type split,
2327 u32 insert_cpos, 2413 u32 insert_cpos,
2328 struct ocfs2_path *right_path, 2414 struct ocfs2_path *right_path,
@@ -2331,6 +2417,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
2331 int ret, start, orig_credits = handle->h_buffer_credits; 2417 int ret, start, orig_credits = handle->h_buffer_credits;
2332 u32 cpos; 2418 u32 cpos;
2333 struct ocfs2_path *left_path = NULL; 2419 struct ocfs2_path *left_path = NULL;
2420 struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
2334 2421
2335 *ret_left_path = NULL; 2422 *ret_left_path = NULL;
2336 2423
@@ -2341,7 +2428,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
2341 goto out; 2428 goto out;
2342 } 2429 }
2343 2430
2344 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path, &cpos); 2431 ret = ocfs2_find_cpos_for_left_leaf(sb, right_path, &cpos);
2345 if (ret) { 2432 if (ret) {
2346 mlog_errno(ret); 2433 mlog_errno(ret);
2347 goto out; 2434 goto out;
@@ -2379,7 +2466,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
2379 mlog(0, "Rotating a tree: ins. cpos: %u, left path cpos: %u\n", 2466 mlog(0, "Rotating a tree: ins. cpos: %u, left path cpos: %u\n",
2380 insert_cpos, cpos); 2467 insert_cpos, cpos);
2381 2468
2382 ret = ocfs2_find_path(inode, left_path, cpos); 2469 ret = ocfs2_find_path(et->et_ci, left_path, cpos);
2383 if (ret) { 2470 if (ret) {
2384 mlog_errno(ret); 2471 mlog_errno(ret);
2385 goto out; 2472 goto out;
@@ -2387,10 +2474,11 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
2387 2474
2388 mlog_bug_on_msg(path_leaf_bh(left_path) == 2475 mlog_bug_on_msg(path_leaf_bh(left_path) ==
2389 path_leaf_bh(right_path), 2476 path_leaf_bh(right_path),
2390 "Inode %lu: error during insert of %u " 2477 "Owner %llu: error during insert of %u "
2391 "(left path cpos %u) results in two identical " 2478 "(left path cpos %u) results in two identical "
2392 "paths ending at %llu\n", 2479 "paths ending at %llu\n",
2393 inode->i_ino, insert_cpos, cpos, 2480 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
2481 insert_cpos, cpos,
2394 (unsigned long long) 2482 (unsigned long long)
2395 path_leaf_bh(left_path)->b_blocknr); 2483 path_leaf_bh(left_path)->b_blocknr);
2396 2484
@@ -2416,7 +2504,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
2416 goto out_ret_path; 2504 goto out_ret_path;
2417 } 2505 }
2418 2506
2419 start = ocfs2_find_subtree_root(inode, left_path, right_path); 2507 start = ocfs2_find_subtree_root(et, left_path, right_path);
2420 2508
2421 mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n", 2509 mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n",
2422 start, 2510 start,
@@ -2430,7 +2518,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
2430 goto out; 2518 goto out;
2431 } 2519 }
2432 2520
2433 ret = ocfs2_rotate_subtree_right(inode, handle, left_path, 2521 ret = ocfs2_rotate_subtree_right(handle, et, left_path,
2434 right_path, start); 2522 right_path, start);
2435 if (ret) { 2523 if (ret) {
2436 mlog_errno(ret); 2524 mlog_errno(ret);
@@ -2462,8 +2550,7 @@ static int ocfs2_rotate_tree_right(struct inode *inode,
2462 */ 2550 */
2463 ocfs2_mv_path(right_path, left_path); 2551 ocfs2_mv_path(right_path, left_path);
2464 2552
2465 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path, 2553 ret = ocfs2_find_cpos_for_left_leaf(sb, right_path, &cpos);
2466 &cpos);
2467 if (ret) { 2554 if (ret) {
2468 mlog_errno(ret); 2555 mlog_errno(ret);
2469 goto out; 2556 goto out;
@@ -2477,7 +2564,8 @@ out_ret_path:
2477 return ret; 2564 return ret;
2478} 2565}
2479 2566
2480static int ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle, 2567static int ocfs2_update_edge_lengths(handle_t *handle,
2568 struct ocfs2_extent_tree *et,
2481 int subtree_index, struct ocfs2_path *path) 2569 int subtree_index, struct ocfs2_path *path)
2482{ 2570{
2483 int i, idx, ret; 2571 int i, idx, ret;
@@ -2502,7 +2590,7 @@ static int ocfs2_update_edge_lengths(struct inode *inode, handle_t *handle,
2502 goto out; 2590 goto out;
2503 } 2591 }
2504 2592
2505 ret = ocfs2_journal_access_path(inode, handle, path); 2593 ret = ocfs2_journal_access_path(et->et_ci, handle, path);
2506 if (ret) { 2594 if (ret) {
2507 mlog_errno(ret); 2595 mlog_errno(ret);
2508 goto out; 2596 goto out;
@@ -2532,7 +2620,8 @@ out:
2532 return ret; 2620 return ret;
2533} 2621}
2534 2622
2535static void ocfs2_unlink_path(struct inode *inode, handle_t *handle, 2623static void ocfs2_unlink_path(handle_t *handle,
2624 struct ocfs2_extent_tree *et,
2536 struct ocfs2_cached_dealloc_ctxt *dealloc, 2625 struct ocfs2_cached_dealloc_ctxt *dealloc,
2537 struct ocfs2_path *path, int unlink_start) 2626 struct ocfs2_path *path, int unlink_start)
2538{ 2627{
@@ -2554,12 +2643,12 @@ static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
2554 mlog(ML_ERROR, 2643 mlog(ML_ERROR,
2555 "Inode %llu, attempted to remove extent block " 2644 "Inode %llu, attempted to remove extent block "
2556 "%llu with %u records\n", 2645 "%llu with %u records\n",
2557 (unsigned long long)OCFS2_I(inode)->ip_blkno, 2646 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
2558 (unsigned long long)le64_to_cpu(eb->h_blkno), 2647 (unsigned long long)le64_to_cpu(eb->h_blkno),
2559 le16_to_cpu(el->l_next_free_rec)); 2648 le16_to_cpu(el->l_next_free_rec));
2560 2649
2561 ocfs2_journal_dirty(handle, bh); 2650 ocfs2_journal_dirty(handle, bh);
2562 ocfs2_remove_from_cache(inode, bh); 2651 ocfs2_remove_from_cache(et->et_ci, bh);
2563 continue; 2652 continue;
2564 } 2653 }
2565 2654
@@ -2572,11 +2661,12 @@ static void ocfs2_unlink_path(struct inode *inode, handle_t *handle,
2572 if (ret) 2661 if (ret)
2573 mlog_errno(ret); 2662 mlog_errno(ret);
2574 2663
2575 ocfs2_remove_from_cache(inode, bh); 2664 ocfs2_remove_from_cache(et->et_ci, bh);
2576 } 2665 }
2577} 2666}
2578 2667
2579static void ocfs2_unlink_subtree(struct inode *inode, handle_t *handle, 2668static void ocfs2_unlink_subtree(handle_t *handle,
2669 struct ocfs2_extent_tree *et,
2580 struct ocfs2_path *left_path, 2670 struct ocfs2_path *left_path,
2581 struct ocfs2_path *right_path, 2671 struct ocfs2_path *right_path,
2582 int subtree_index, 2672 int subtree_index,
@@ -2607,17 +2697,17 @@ static void ocfs2_unlink_subtree(struct inode *inode, handle_t *handle,
2607 ocfs2_journal_dirty(handle, root_bh); 2697 ocfs2_journal_dirty(handle, root_bh);
2608 ocfs2_journal_dirty(handle, path_leaf_bh(left_path)); 2698 ocfs2_journal_dirty(handle, path_leaf_bh(left_path));
2609 2699
2610 ocfs2_unlink_path(inode, handle, dealloc, right_path, 2700 ocfs2_unlink_path(handle, et, dealloc, right_path,
2611 subtree_index + 1); 2701 subtree_index + 1);
2612} 2702}
2613 2703
2614static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, 2704static int ocfs2_rotate_subtree_left(handle_t *handle,
2705 struct ocfs2_extent_tree *et,
2615 struct ocfs2_path *left_path, 2706 struct ocfs2_path *left_path,
2616 struct ocfs2_path *right_path, 2707 struct ocfs2_path *right_path,
2617 int subtree_index, 2708 int subtree_index,
2618 struct ocfs2_cached_dealloc_ctxt *dealloc, 2709 struct ocfs2_cached_dealloc_ctxt *dealloc,
2619 int *deleted, 2710 int *deleted)
2620 struct ocfs2_extent_tree *et)
2621{ 2711{
2622 int ret, i, del_right_subtree = 0, right_has_empty = 0; 2712 int ret, i, del_right_subtree = 0, right_has_empty = 0;
2623 struct buffer_head *root_bh, *et_root_bh = path_root_bh(right_path); 2713 struct buffer_head *root_bh, *et_root_bh = path_root_bh(right_path);
@@ -2653,7 +2743,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2653 return -EAGAIN; 2743 return -EAGAIN;
2654 2744
2655 if (le16_to_cpu(right_leaf_el->l_next_free_rec) > 1) { 2745 if (le16_to_cpu(right_leaf_el->l_next_free_rec) > 1) {
2656 ret = ocfs2_journal_access_eb(handle, inode, 2746 ret = ocfs2_journal_access_eb(handle, et->et_ci,
2657 path_leaf_bh(right_path), 2747 path_leaf_bh(right_path),
2658 OCFS2_JOURNAL_ACCESS_WRITE); 2748 OCFS2_JOURNAL_ACCESS_WRITE);
2659 if (ret) { 2749 if (ret) {
@@ -2672,7 +2762,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2672 * We have to update i_last_eb_blk during the meta 2762 * We have to update i_last_eb_blk during the meta
2673 * data delete. 2763 * data delete.
2674 */ 2764 */
2675 ret = ocfs2_et_root_journal_access(handle, inode, et, 2765 ret = ocfs2_et_root_journal_access(handle, et,
2676 OCFS2_JOURNAL_ACCESS_WRITE); 2766 OCFS2_JOURNAL_ACCESS_WRITE);
2677 if (ret) { 2767 if (ret) {
2678 mlog_errno(ret); 2768 mlog_errno(ret);
@@ -2688,7 +2778,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2688 */ 2778 */
2689 BUG_ON(right_has_empty && !del_right_subtree); 2779 BUG_ON(right_has_empty && !del_right_subtree);
2690 2780
2691 ret = ocfs2_path_bh_journal_access(handle, inode, right_path, 2781 ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
2692 subtree_index); 2782 subtree_index);
2693 if (ret) { 2783 if (ret) {
2694 mlog_errno(ret); 2784 mlog_errno(ret);
@@ -2696,14 +2786,14 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2696 } 2786 }
2697 2787
2698 for(i = subtree_index + 1; i < path_num_items(right_path); i++) { 2788 for(i = subtree_index + 1; i < path_num_items(right_path); i++) {
2699 ret = ocfs2_path_bh_journal_access(handle, inode, 2789 ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
2700 right_path, i); 2790 right_path, i);
2701 if (ret) { 2791 if (ret) {
2702 mlog_errno(ret); 2792 mlog_errno(ret);
2703 goto out; 2793 goto out;
2704 } 2794 }
2705 2795
2706 ret = ocfs2_path_bh_journal_access(handle, inode, 2796 ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
2707 left_path, i); 2797 left_path, i);
2708 if (ret) { 2798 if (ret) {
2709 mlog_errno(ret); 2799 mlog_errno(ret);
@@ -2740,9 +2830,9 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2740 mlog_errno(ret); 2830 mlog_errno(ret);
2741 2831
2742 if (del_right_subtree) { 2832 if (del_right_subtree) {
2743 ocfs2_unlink_subtree(inode, handle, left_path, right_path, 2833 ocfs2_unlink_subtree(handle, et, left_path, right_path,
2744 subtree_index, dealloc); 2834 subtree_index, dealloc);
2745 ret = ocfs2_update_edge_lengths(inode, handle, subtree_index, 2835 ret = ocfs2_update_edge_lengths(handle, et, subtree_index,
2746 left_path); 2836 left_path);
2747 if (ret) { 2837 if (ret) {
2748 mlog_errno(ret); 2838 mlog_errno(ret);
@@ -2766,7 +2856,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle,
2766 2856
2767 *deleted = 1; 2857 *deleted = 1;
2768 } else 2858 } else
2769 ocfs2_complete_edge_insert(inode, handle, left_path, right_path, 2859 ocfs2_complete_edge_insert(handle, left_path, right_path,
2770 subtree_index); 2860 subtree_index);
2771 2861
2772out: 2862out:
@@ -2852,8 +2942,8 @@ out:
2852 return ret; 2942 return ret;
2853} 2943}
2854 2944
2855static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode, 2945static int ocfs2_rotate_rightmost_leaf_left(handle_t *handle,
2856 handle_t *handle, 2946 struct ocfs2_extent_tree *et,
2857 struct ocfs2_path *path) 2947 struct ocfs2_path *path)
2858{ 2948{
2859 int ret; 2949 int ret;
@@ -2863,7 +2953,7 @@ static int ocfs2_rotate_rightmost_leaf_left(struct inode *inode,
2863 if (!ocfs2_is_empty_extent(&el->l_recs[0])) 2953 if (!ocfs2_is_empty_extent(&el->l_recs[0]))
2864 return 0; 2954 return 0;
2865 2955
2866 ret = ocfs2_path_bh_journal_access(handle, inode, path, 2956 ret = ocfs2_path_bh_journal_access(handle, et->et_ci, path,
2867 path_num_items(path) - 1); 2957 path_num_items(path) - 1);
2868 if (ret) { 2958 if (ret) {
2869 mlog_errno(ret); 2959 mlog_errno(ret);
@@ -2880,24 +2970,24 @@ out:
2880 return ret; 2970 return ret;
2881} 2971}
2882 2972
2883static int __ocfs2_rotate_tree_left(struct inode *inode, 2973static int __ocfs2_rotate_tree_left(handle_t *handle,
2884 handle_t *handle, int orig_credits, 2974 struct ocfs2_extent_tree *et,
2975 int orig_credits,
2885 struct ocfs2_path *path, 2976 struct ocfs2_path *path,
2886 struct ocfs2_cached_dealloc_ctxt *dealloc, 2977 struct ocfs2_cached_dealloc_ctxt *dealloc,
2887 struct ocfs2_path **empty_extent_path, 2978 struct ocfs2_path **empty_extent_path)
2888 struct ocfs2_extent_tree *et)
2889{ 2979{
2890 int ret, subtree_root, deleted; 2980 int ret, subtree_root, deleted;
2891 u32 right_cpos; 2981 u32 right_cpos;
2892 struct ocfs2_path *left_path = NULL; 2982 struct ocfs2_path *left_path = NULL;
2893 struct ocfs2_path *right_path = NULL; 2983 struct ocfs2_path *right_path = NULL;
2984 struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
2894 2985
2895 BUG_ON(!ocfs2_is_empty_extent(&(path_leaf_el(path)->l_recs[0]))); 2986 BUG_ON(!ocfs2_is_empty_extent(&(path_leaf_el(path)->l_recs[0])));
2896 2987
2897 *empty_extent_path = NULL; 2988 *empty_extent_path = NULL;
2898 2989
2899 ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, path, 2990 ret = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos);
2900 &right_cpos);
2901 if (ret) { 2991 if (ret) {
2902 mlog_errno(ret); 2992 mlog_errno(ret);
2903 goto out; 2993 goto out;
@@ -2920,13 +3010,13 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
2920 } 3010 }
2921 3011
2922 while (right_cpos) { 3012 while (right_cpos) {
2923 ret = ocfs2_find_path(inode, right_path, right_cpos); 3013 ret = ocfs2_find_path(et->et_ci, right_path, right_cpos);
2924 if (ret) { 3014 if (ret) {
2925 mlog_errno(ret); 3015 mlog_errno(ret);
2926 goto out; 3016 goto out;
2927 } 3017 }
2928 3018
2929 subtree_root = ocfs2_find_subtree_root(inode, left_path, 3019 subtree_root = ocfs2_find_subtree_root(et, left_path,
2930 right_path); 3020 right_path);
2931 3021
2932 mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n", 3022 mlog(0, "Subtree root at index %d (blk %llu, depth %d)\n",
@@ -2946,16 +3036,16 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
2946 * Caller might still want to make changes to the 3036 * Caller might still want to make changes to the
2947 * tree root, so re-add it to the journal here. 3037 * tree root, so re-add it to the journal here.
2948 */ 3038 */
2949 ret = ocfs2_path_bh_journal_access(handle, inode, 3039 ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
2950 left_path, 0); 3040 left_path, 0);
2951 if (ret) { 3041 if (ret) {
2952 mlog_errno(ret); 3042 mlog_errno(ret);
2953 goto out; 3043 goto out;
2954 } 3044 }
2955 3045
2956 ret = ocfs2_rotate_subtree_left(inode, handle, left_path, 3046 ret = ocfs2_rotate_subtree_left(handle, et, left_path,
2957 right_path, subtree_root, 3047 right_path, subtree_root,
2958 dealloc, &deleted, et); 3048 dealloc, &deleted);
2959 if (ret == -EAGAIN) { 3049 if (ret == -EAGAIN) {
2960 /* 3050 /*
2961 * The rotation has to temporarily stop due to 3051 * The rotation has to temporarily stop due to
@@ -2982,7 +3072,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode,
2982 3072
2983 ocfs2_mv_path(left_path, right_path); 3073 ocfs2_mv_path(left_path, right_path);
2984 3074
2985 ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path, 3075 ret = ocfs2_find_cpos_for_right_leaf(sb, left_path,
2986 &right_cpos); 3076 &right_cpos);
2987 if (ret) { 3077 if (ret) {
2988 mlog_errno(ret); 3078 mlog_errno(ret);
@@ -2997,10 +3087,10 @@ out:
2997 return ret; 3087 return ret;
2998} 3088}
2999 3089
3000static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle, 3090static int ocfs2_remove_rightmost_path(handle_t *handle,
3091 struct ocfs2_extent_tree *et,
3001 struct ocfs2_path *path, 3092 struct ocfs2_path *path,
3002 struct ocfs2_cached_dealloc_ctxt *dealloc, 3093 struct ocfs2_cached_dealloc_ctxt *dealloc)
3003 struct ocfs2_extent_tree *et)
3004{ 3094{
3005 int ret, subtree_index; 3095 int ret, subtree_index;
3006 u32 cpos; 3096 u32 cpos;
@@ -3009,7 +3099,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
3009 struct ocfs2_extent_list *el; 3099 struct ocfs2_extent_list *el;
3010 3100
3011 3101
3012 ret = ocfs2_et_sanity_check(inode, et); 3102 ret = ocfs2_et_sanity_check(et);
3013 if (ret) 3103 if (ret)
3014 goto out; 3104 goto out;
3015 /* 3105 /*
@@ -3024,13 +3114,14 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
3024 goto out; 3114 goto out;
3025 } 3115 }
3026 3116
3027 ret = ocfs2_journal_access_path(inode, handle, path); 3117 ret = ocfs2_journal_access_path(et->et_ci, handle, path);
3028 if (ret) { 3118 if (ret) {
3029 mlog_errno(ret); 3119 mlog_errno(ret);
3030 goto out; 3120 goto out;
3031 } 3121 }
3032 3122
3033 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, &cpos); 3123 ret = ocfs2_find_cpos_for_left_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
3124 path, &cpos);
3034 if (ret) { 3125 if (ret) {
3035 mlog_errno(ret); 3126 mlog_errno(ret);
3036 goto out; 3127 goto out;
@@ -3048,23 +3139,23 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
3048 goto out; 3139 goto out;
3049 } 3140 }
3050 3141
3051 ret = ocfs2_find_path(inode, left_path, cpos); 3142 ret = ocfs2_find_path(et->et_ci, left_path, cpos);
3052 if (ret) { 3143 if (ret) {
3053 mlog_errno(ret); 3144 mlog_errno(ret);
3054 goto out; 3145 goto out;
3055 } 3146 }
3056 3147
3057 ret = ocfs2_journal_access_path(inode, handle, left_path); 3148 ret = ocfs2_journal_access_path(et->et_ci, handle, left_path);
3058 if (ret) { 3149 if (ret) {
3059 mlog_errno(ret); 3150 mlog_errno(ret);
3060 goto out; 3151 goto out;
3061 } 3152 }
3062 3153
3063 subtree_index = ocfs2_find_subtree_root(inode, left_path, path); 3154 subtree_index = ocfs2_find_subtree_root(et, left_path, path);
3064 3155
3065 ocfs2_unlink_subtree(inode, handle, left_path, path, 3156 ocfs2_unlink_subtree(handle, et, left_path, path,
3066 subtree_index, dealloc); 3157 subtree_index, dealloc);
3067 ret = ocfs2_update_edge_lengths(inode, handle, subtree_index, 3158 ret = ocfs2_update_edge_lengths(handle, et, subtree_index,
3068 left_path); 3159 left_path);
3069 if (ret) { 3160 if (ret) {
3070 mlog_errno(ret); 3161 mlog_errno(ret);
@@ -3078,10 +3169,10 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle,
3078 * 'path' is also the leftmost path which 3169 * 'path' is also the leftmost path which
3079 * means it must be the only one. This gets 3170 * means it must be the only one. This gets
3080 * handled differently because we want to 3171 * handled differently because we want to
3081 * revert the inode back to having extents 3172 * revert the root back to having extents
3082 * in-line. 3173 * in-line.
3083 */ 3174 */
3084 ocfs2_unlink_path(inode, handle, dealloc, path, 1); 3175 ocfs2_unlink_path(handle, et, dealloc, path, 1);
3085 3176
3086 el = et->et_root_el; 3177 el = et->et_root_el;
3087 el->l_tree_depth = 0; 3178 el->l_tree_depth = 0;
@@ -3114,10 +3205,10 @@ out:
3114 * the rightmost tree leaf record is removed so the caller is 3205 * the rightmost tree leaf record is removed so the caller is
3115 * responsible for detecting and correcting that. 3206 * responsible for detecting and correcting that.
3116 */ 3207 */
3117static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle, 3208static int ocfs2_rotate_tree_left(handle_t *handle,
3209 struct ocfs2_extent_tree *et,
3118 struct ocfs2_path *path, 3210 struct ocfs2_path *path,
3119 struct ocfs2_cached_dealloc_ctxt *dealloc, 3211 struct ocfs2_cached_dealloc_ctxt *dealloc)
3120 struct ocfs2_extent_tree *et)
3121{ 3212{
3122 int ret, orig_credits = handle->h_buffer_credits; 3213 int ret, orig_credits = handle->h_buffer_credits;
3123 struct ocfs2_path *tmp_path = NULL, *restart_path = NULL; 3214 struct ocfs2_path *tmp_path = NULL, *restart_path = NULL;
@@ -3134,8 +3225,7 @@ rightmost_no_delete:
3134 * Inline extents. This is trivially handled, so do 3225 * Inline extents. This is trivially handled, so do
3135 * it up front. 3226 * it up front.
3136 */ 3227 */
3137 ret = ocfs2_rotate_rightmost_leaf_left(inode, handle, 3228 ret = ocfs2_rotate_rightmost_leaf_left(handle, et, path);
3138 path);
3139 if (ret) 3229 if (ret)
3140 mlog_errno(ret); 3230 mlog_errno(ret);
3141 goto out; 3231 goto out;
@@ -3151,7 +3241,7 @@ rightmost_no_delete:
3151 * 3241 *
3152 * 1) is handled via ocfs2_rotate_rightmost_leaf_left() 3242 * 1) is handled via ocfs2_rotate_rightmost_leaf_left()
3153 * 2a) we need the left branch so that we can update it with the unlink 3243 * 2a) we need the left branch so that we can update it with the unlink
3154 * 2b) we need to bring the inode back to inline extents. 3244 * 2b) we need to bring the root back to inline extents.
3155 */ 3245 */
3156 3246
3157 eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data; 3247 eb = (struct ocfs2_extent_block *)path_leaf_bh(path)->b_data;
@@ -3167,9 +3257,9 @@ rightmost_no_delete:
3167 3257
3168 if (le16_to_cpu(el->l_next_free_rec) == 0) { 3258 if (le16_to_cpu(el->l_next_free_rec) == 0) {
3169 ret = -EIO; 3259 ret = -EIO;
3170 ocfs2_error(inode->i_sb, 3260 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
3171 "Inode %llu has empty extent block at %llu", 3261 "Owner %llu has empty extent block at %llu",
3172 (unsigned long long)OCFS2_I(inode)->ip_blkno, 3262 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
3173 (unsigned long long)le64_to_cpu(eb->h_blkno)); 3263 (unsigned long long)le64_to_cpu(eb->h_blkno));
3174 goto out; 3264 goto out;
3175 } 3265 }
@@ -3183,8 +3273,8 @@ rightmost_no_delete:
3183 * nonempty list. 3273 * nonempty list.
3184 */ 3274 */
3185 3275
3186 ret = ocfs2_remove_rightmost_path(inode, handle, path, 3276 ret = ocfs2_remove_rightmost_path(handle, et, path,
3187 dealloc, et); 3277 dealloc);
3188 if (ret) 3278 if (ret)
3189 mlog_errno(ret); 3279 mlog_errno(ret);
3190 goto out; 3280 goto out;
@@ -3195,8 +3285,8 @@ rightmost_no_delete:
3195 * and restarting from there. 3285 * and restarting from there.
3196 */ 3286 */
3197try_rotate: 3287try_rotate:
3198 ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, path, 3288 ret = __ocfs2_rotate_tree_left(handle, et, orig_credits, path,
3199 dealloc, &restart_path, et); 3289 dealloc, &restart_path);
3200 if (ret && ret != -EAGAIN) { 3290 if (ret && ret != -EAGAIN) {
3201 mlog_errno(ret); 3291 mlog_errno(ret);
3202 goto out; 3292 goto out;
@@ -3206,9 +3296,9 @@ try_rotate:
3206 tmp_path = restart_path; 3296 tmp_path = restart_path;
3207 restart_path = NULL; 3297 restart_path = NULL;
3208 3298
3209 ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, 3299 ret = __ocfs2_rotate_tree_left(handle, et, orig_credits,
3210 tmp_path, dealloc, 3300 tmp_path, dealloc,
3211 &restart_path, et); 3301 &restart_path);
3212 if (ret && ret != -EAGAIN) { 3302 if (ret && ret != -EAGAIN) {
3213 mlog_errno(ret); 3303 mlog_errno(ret);
3214 goto out; 3304 goto out;
@@ -3259,7 +3349,7 @@ static void ocfs2_cleanup_merge(struct ocfs2_extent_list *el,
3259 } 3349 }
3260} 3350}
3261 3351
3262static int ocfs2_get_right_path(struct inode *inode, 3352static int ocfs2_get_right_path(struct ocfs2_extent_tree *et,
3263 struct ocfs2_path *left_path, 3353 struct ocfs2_path *left_path,
3264 struct ocfs2_path **ret_right_path) 3354 struct ocfs2_path **ret_right_path)
3265{ 3355{
@@ -3276,8 +3366,8 @@ static int ocfs2_get_right_path(struct inode *inode,
3276 left_el = path_leaf_el(left_path); 3366 left_el = path_leaf_el(left_path);
3277 BUG_ON(left_el->l_next_free_rec != left_el->l_count); 3367 BUG_ON(left_el->l_next_free_rec != left_el->l_count);
3278 3368
3279 ret = ocfs2_find_cpos_for_right_leaf(inode->i_sb, left_path, 3369 ret = ocfs2_find_cpos_for_right_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
3280 &right_cpos); 3370 left_path, &right_cpos);
3281 if (ret) { 3371 if (ret) {
3282 mlog_errno(ret); 3372 mlog_errno(ret);
3283 goto out; 3373 goto out;
@@ -3293,7 +3383,7 @@ static int ocfs2_get_right_path(struct inode *inode,
3293 goto out; 3383 goto out;
3294 } 3384 }
3295 3385
3296 ret = ocfs2_find_path(inode, right_path, right_cpos); 3386 ret = ocfs2_find_path(et->et_ci, right_path, right_cpos);
3297 if (ret) { 3387 if (ret) {
3298 mlog_errno(ret); 3388 mlog_errno(ret);
3299 goto out; 3389 goto out;
@@ -3313,9 +3403,9 @@ out:
3313 * For index == l_count - 1, the "next" means the 1st extent rec of the 3403 * For index == l_count - 1, the "next" means the 1st extent rec of the
3314 * next extent block. 3404 * next extent block.
3315 */ 3405 */
3316static int ocfs2_merge_rec_right(struct inode *inode, 3406static int ocfs2_merge_rec_right(struct ocfs2_path *left_path,
3317 struct ocfs2_path *left_path,
3318 handle_t *handle, 3407 handle_t *handle,
3408 struct ocfs2_extent_tree *et,
3319 struct ocfs2_extent_rec *split_rec, 3409 struct ocfs2_extent_rec *split_rec,
3320 int index) 3410 int index)
3321{ 3411{
@@ -3336,7 +3426,7 @@ static int ocfs2_merge_rec_right(struct inode *inode,
3336 if (index == le16_to_cpu(el->l_next_free_rec) - 1 && 3426 if (index == le16_to_cpu(el->l_next_free_rec) - 1 &&
3337 le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count)) { 3427 le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count)) {
3338 /* we meet with a cross extent block merge. */ 3428 /* we meet with a cross extent block merge. */
3339 ret = ocfs2_get_right_path(inode, left_path, &right_path); 3429 ret = ocfs2_get_right_path(et, left_path, &right_path);
3340 if (ret) { 3430 if (ret) {
3341 mlog_errno(ret); 3431 mlog_errno(ret);
3342 goto out; 3432 goto out;
@@ -3355,8 +3445,8 @@ static int ocfs2_merge_rec_right(struct inode *inode,
3355 le16_to_cpu(left_rec->e_leaf_clusters) != 3445 le16_to_cpu(left_rec->e_leaf_clusters) !=
3356 le32_to_cpu(right_rec->e_cpos)); 3446 le32_to_cpu(right_rec->e_cpos));
3357 3447
3358 subtree_index = ocfs2_find_subtree_root(inode, 3448 subtree_index = ocfs2_find_subtree_root(et, left_path,
3359 left_path, right_path); 3449 right_path);
3360 3450
3361 ret = ocfs2_extend_rotate_transaction(handle, subtree_index, 3451 ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
3362 handle->h_buffer_credits, 3452 handle->h_buffer_credits,
@@ -3369,7 +3459,7 @@ static int ocfs2_merge_rec_right(struct inode *inode,
3369 root_bh = left_path->p_node[subtree_index].bh; 3459 root_bh = left_path->p_node[subtree_index].bh;
3370 BUG_ON(root_bh != right_path->p_node[subtree_index].bh); 3460 BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
3371 3461
3372 ret = ocfs2_path_bh_journal_access(handle, inode, right_path, 3462 ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
3373 subtree_index); 3463 subtree_index);
3374 if (ret) { 3464 if (ret) {
3375 mlog_errno(ret); 3465 mlog_errno(ret);
@@ -3378,14 +3468,14 @@ static int ocfs2_merge_rec_right(struct inode *inode,
3378 3468
3379 for (i = subtree_index + 1; 3469 for (i = subtree_index + 1;
3380 i < path_num_items(right_path); i++) { 3470 i < path_num_items(right_path); i++) {
3381 ret = ocfs2_path_bh_journal_access(handle, inode, 3471 ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
3382 right_path, i); 3472 right_path, i);
3383 if (ret) { 3473 if (ret) {
3384 mlog_errno(ret); 3474 mlog_errno(ret);
3385 goto out; 3475 goto out;
3386 } 3476 }
3387 3477
3388 ret = ocfs2_path_bh_journal_access(handle, inode, 3478 ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
3389 left_path, i); 3479 left_path, i);
3390 if (ret) { 3480 if (ret) {
3391 mlog_errno(ret); 3481 mlog_errno(ret);
@@ -3398,7 +3488,7 @@ static int ocfs2_merge_rec_right(struct inode *inode,
3398 right_rec = &el->l_recs[index + 1]; 3488 right_rec = &el->l_recs[index + 1];
3399 } 3489 }
3400 3490
3401 ret = ocfs2_path_bh_journal_access(handle, inode, left_path, 3491 ret = ocfs2_path_bh_journal_access(handle, et->et_ci, left_path,
3402 path_num_items(left_path) - 1); 3492 path_num_items(left_path) - 1);
3403 if (ret) { 3493 if (ret) {
3404 mlog_errno(ret); 3494 mlog_errno(ret);
@@ -3409,7 +3499,8 @@ static int ocfs2_merge_rec_right(struct inode *inode,
3409 3499
3410 le32_add_cpu(&right_rec->e_cpos, -split_clusters); 3500 le32_add_cpu(&right_rec->e_cpos, -split_clusters);
3411 le64_add_cpu(&right_rec->e_blkno, 3501 le64_add_cpu(&right_rec->e_blkno,
3412 -ocfs2_clusters_to_blocks(inode->i_sb, split_clusters)); 3502 -ocfs2_clusters_to_blocks(ocfs2_metadata_cache_get_super(et->et_ci),
3503 split_clusters));
3413 le16_add_cpu(&right_rec->e_leaf_clusters, split_clusters); 3504 le16_add_cpu(&right_rec->e_leaf_clusters, split_clusters);
3414 3505
3415 ocfs2_cleanup_merge(el, index); 3506 ocfs2_cleanup_merge(el, index);
@@ -3423,8 +3514,8 @@ static int ocfs2_merge_rec_right(struct inode *inode,
3423 if (ret) 3514 if (ret)
3424 mlog_errno(ret); 3515 mlog_errno(ret);
3425 3516
3426 ocfs2_complete_edge_insert(inode, handle, left_path, 3517 ocfs2_complete_edge_insert(handle, left_path, right_path,
3427 right_path, subtree_index); 3518 subtree_index);
3428 } 3519 }
3429out: 3520out:
3430 if (right_path) 3521 if (right_path)
@@ -3432,7 +3523,7 @@ out:
3432 return ret; 3523 return ret;
3433} 3524}
3434 3525
3435static int ocfs2_get_left_path(struct inode *inode, 3526static int ocfs2_get_left_path(struct ocfs2_extent_tree *et,
3436 struct ocfs2_path *right_path, 3527 struct ocfs2_path *right_path,
3437 struct ocfs2_path **ret_left_path) 3528 struct ocfs2_path **ret_left_path)
3438{ 3529{
@@ -3445,7 +3536,7 @@ static int ocfs2_get_left_path(struct inode *inode,
3445 /* This function shouldn't be called for non-trees. */ 3536 /* This function shouldn't be called for non-trees. */
3446 BUG_ON(right_path->p_tree_depth == 0); 3537 BUG_ON(right_path->p_tree_depth == 0);
3447 3538
3448 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, 3539 ret = ocfs2_find_cpos_for_left_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
3449 right_path, &left_cpos); 3540 right_path, &left_cpos);
3450 if (ret) { 3541 if (ret) {
3451 mlog_errno(ret); 3542 mlog_errno(ret);
@@ -3462,7 +3553,7 @@ static int ocfs2_get_left_path(struct inode *inode,
3462 goto out; 3553 goto out;
3463 } 3554 }
3464 3555
3465 ret = ocfs2_find_path(inode, left_path, left_cpos); 3556 ret = ocfs2_find_path(et->et_ci, left_path, left_cpos);
3466 if (ret) { 3557 if (ret) {
3467 mlog_errno(ret); 3558 mlog_errno(ret);
3468 goto out; 3559 goto out;
@@ -3485,12 +3576,11 @@ out:
3485 * remove the rightmost leaf extent block in the right_path and change 3576 * remove the rightmost leaf extent block in the right_path and change
3486 * the right path to indicate the new rightmost path. 3577 * the right path to indicate the new rightmost path.
3487 */ 3578 */
3488static int ocfs2_merge_rec_left(struct inode *inode, 3579static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
3489 struct ocfs2_path *right_path,
3490 handle_t *handle, 3580 handle_t *handle,
3581 struct ocfs2_extent_tree *et,
3491 struct ocfs2_extent_rec *split_rec, 3582 struct ocfs2_extent_rec *split_rec,
3492 struct ocfs2_cached_dealloc_ctxt *dealloc, 3583 struct ocfs2_cached_dealloc_ctxt *dealloc,
3493 struct ocfs2_extent_tree *et,
3494 int index) 3584 int index)
3495{ 3585{
3496 int ret, i, subtree_index = 0, has_empty_extent = 0; 3586 int ret, i, subtree_index = 0, has_empty_extent = 0;
@@ -3508,7 +3598,7 @@ static int ocfs2_merge_rec_left(struct inode *inode,
3508 right_rec = &el->l_recs[index]; 3598 right_rec = &el->l_recs[index];
3509 if (index == 0) { 3599 if (index == 0) {
3510 /* we meet with a cross extent block merge. */ 3600 /* we meet with a cross extent block merge. */
3511 ret = ocfs2_get_left_path(inode, right_path, &left_path); 3601 ret = ocfs2_get_left_path(et, right_path, &left_path);
3512 if (ret) { 3602 if (ret) {
3513 mlog_errno(ret); 3603 mlog_errno(ret);
3514 goto out; 3604 goto out;
@@ -3524,8 +3614,8 @@ static int ocfs2_merge_rec_left(struct inode *inode,
3524 le16_to_cpu(left_rec->e_leaf_clusters) != 3614 le16_to_cpu(left_rec->e_leaf_clusters) !=
3525 le32_to_cpu(split_rec->e_cpos)); 3615 le32_to_cpu(split_rec->e_cpos));
3526 3616
3527 subtree_index = ocfs2_find_subtree_root(inode, 3617 subtree_index = ocfs2_find_subtree_root(et, left_path,
3528 left_path, right_path); 3618 right_path);
3529 3619
3530 ret = ocfs2_extend_rotate_transaction(handle, subtree_index, 3620 ret = ocfs2_extend_rotate_transaction(handle, subtree_index,
3531 handle->h_buffer_credits, 3621 handle->h_buffer_credits,
@@ -3538,7 +3628,7 @@ static int ocfs2_merge_rec_left(struct inode *inode,
3538 root_bh = left_path->p_node[subtree_index].bh; 3628 root_bh = left_path->p_node[subtree_index].bh;
3539 BUG_ON(root_bh != right_path->p_node[subtree_index].bh); 3629 BUG_ON(root_bh != right_path->p_node[subtree_index].bh);
3540 3630
3541 ret = ocfs2_path_bh_journal_access(handle, inode, right_path, 3631 ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
3542 subtree_index); 3632 subtree_index);
3543 if (ret) { 3633 if (ret) {
3544 mlog_errno(ret); 3634 mlog_errno(ret);
@@ -3547,14 +3637,14 @@ static int ocfs2_merge_rec_left(struct inode *inode,
3547 3637
3548 for (i = subtree_index + 1; 3638 for (i = subtree_index + 1;
3549 i < path_num_items(right_path); i++) { 3639 i < path_num_items(right_path); i++) {
3550 ret = ocfs2_path_bh_journal_access(handle, inode, 3640 ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
3551 right_path, i); 3641 right_path, i);
3552 if (ret) { 3642 if (ret) {
3553 mlog_errno(ret); 3643 mlog_errno(ret);
3554 goto out; 3644 goto out;
3555 } 3645 }
3556 3646
3557 ret = ocfs2_path_bh_journal_access(handle, inode, 3647 ret = ocfs2_path_bh_journal_access(handle, et->et_ci,
3558 left_path, i); 3648 left_path, i);
3559 if (ret) { 3649 if (ret) {
3560 mlog_errno(ret); 3650 mlog_errno(ret);
@@ -3567,7 +3657,7 @@ static int ocfs2_merge_rec_left(struct inode *inode,
3567 has_empty_extent = 1; 3657 has_empty_extent = 1;
3568 } 3658 }
3569 3659
3570 ret = ocfs2_path_bh_journal_access(handle, inode, right_path, 3660 ret = ocfs2_path_bh_journal_access(handle, et->et_ci, right_path,
3571 path_num_items(right_path) - 1); 3661 path_num_items(right_path) - 1);
3572 if (ret) { 3662 if (ret) {
3573 mlog_errno(ret); 3663 mlog_errno(ret);
@@ -3586,7 +3676,8 @@ static int ocfs2_merge_rec_left(struct inode *inode,
3586 3676
3587 le32_add_cpu(&right_rec->e_cpos, split_clusters); 3677 le32_add_cpu(&right_rec->e_cpos, split_clusters);
3588 le64_add_cpu(&right_rec->e_blkno, 3678 le64_add_cpu(&right_rec->e_blkno,
3589 ocfs2_clusters_to_blocks(inode->i_sb, split_clusters)); 3679 ocfs2_clusters_to_blocks(ocfs2_metadata_cache_get_super(et->et_ci),
3680 split_clusters));
3590 le16_add_cpu(&right_rec->e_leaf_clusters, -split_clusters); 3681 le16_add_cpu(&right_rec->e_leaf_clusters, -split_clusters);
3591 3682
3592 ocfs2_cleanup_merge(el, index); 3683 ocfs2_cleanup_merge(el, index);
@@ -3608,9 +3699,9 @@ static int ocfs2_merge_rec_left(struct inode *inode,
3608 if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 && 3699 if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 &&
3609 le16_to_cpu(el->l_next_free_rec) == 1) { 3700 le16_to_cpu(el->l_next_free_rec) == 1) {
3610 3701
3611 ret = ocfs2_remove_rightmost_path(inode, handle, 3702 ret = ocfs2_remove_rightmost_path(handle, et,
3612 right_path, 3703 right_path,
3613 dealloc, et); 3704 dealloc);
3614 if (ret) { 3705 if (ret) {
3615 mlog_errno(ret); 3706 mlog_errno(ret);
3616 goto out; 3707 goto out;
@@ -3622,7 +3713,7 @@ static int ocfs2_merge_rec_left(struct inode *inode,
3622 ocfs2_mv_path(right_path, left_path); 3713 ocfs2_mv_path(right_path, left_path);
3623 left_path = NULL; 3714 left_path = NULL;
3624 } else 3715 } else
3625 ocfs2_complete_edge_insert(inode, handle, left_path, 3716 ocfs2_complete_edge_insert(handle, left_path,
3626 right_path, subtree_index); 3717 right_path, subtree_index);
3627 } 3718 }
3628out: 3719out:
@@ -3631,15 +3722,13 @@ out:
3631 return ret; 3722 return ret;
3632} 3723}
3633 3724
3634static int ocfs2_try_to_merge_extent(struct inode *inode, 3725static int ocfs2_try_to_merge_extent(handle_t *handle,
3635 handle_t *handle, 3726 struct ocfs2_extent_tree *et,
3636 struct ocfs2_path *path, 3727 struct ocfs2_path *path,
3637 int split_index, 3728 int split_index,
3638 struct ocfs2_extent_rec *split_rec, 3729 struct ocfs2_extent_rec *split_rec,
3639 struct ocfs2_cached_dealloc_ctxt *dealloc, 3730 struct ocfs2_cached_dealloc_ctxt *dealloc,
3640 struct ocfs2_merge_ctxt *ctxt, 3731 struct ocfs2_merge_ctxt *ctxt)
3641 struct ocfs2_extent_tree *et)
3642
3643{ 3732{
3644 int ret = 0; 3733 int ret = 0;
3645 struct ocfs2_extent_list *el = path_leaf_el(path); 3734 struct ocfs2_extent_list *el = path_leaf_el(path);
@@ -3655,8 +3744,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
3655 * extents - having more than one in a leaf is 3744 * extents - having more than one in a leaf is
3656 * illegal. 3745 * illegal.
3657 */ 3746 */
3658 ret = ocfs2_rotate_tree_left(inode, handle, path, 3747 ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
3659 dealloc, et);
3660 if (ret) { 3748 if (ret) {
3661 mlog_errno(ret); 3749 mlog_errno(ret);
3662 goto out; 3750 goto out;
@@ -3685,8 +3773,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
3685 * prevoius extent block. It is more efficient and easier 3773 * prevoius extent block. It is more efficient and easier
3686 * if we do merge_right first and merge_left later. 3774 * if we do merge_right first and merge_left later.
3687 */ 3775 */
3688 ret = ocfs2_merge_rec_right(inode, path, 3776 ret = ocfs2_merge_rec_right(path, handle, et, split_rec,
3689 handle, split_rec,
3690 split_index); 3777 split_index);
3691 if (ret) { 3778 if (ret) {
3692 mlog_errno(ret); 3779 mlog_errno(ret);
@@ -3699,8 +3786,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
3699 BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); 3786 BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
3700 3787
3701 /* The merge left us with an empty extent, remove it. */ 3788 /* The merge left us with an empty extent, remove it. */
3702 ret = ocfs2_rotate_tree_left(inode, handle, path, 3789 ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
3703 dealloc, et);
3704 if (ret) { 3790 if (ret) {
3705 mlog_errno(ret); 3791 mlog_errno(ret);
3706 goto out; 3792 goto out;
@@ -3712,18 +3798,15 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
3712 * Note that we don't pass split_rec here on purpose - 3798 * Note that we don't pass split_rec here on purpose -
3713 * we've merged it into the rec already. 3799 * we've merged it into the rec already.
3714 */ 3800 */
3715 ret = ocfs2_merge_rec_left(inode, path, 3801 ret = ocfs2_merge_rec_left(path, handle, et, rec,
3716 handle, rec, 3802 dealloc, split_index);
3717 dealloc, et,
3718 split_index);
3719 3803
3720 if (ret) { 3804 if (ret) {
3721 mlog_errno(ret); 3805 mlog_errno(ret);
3722 goto out; 3806 goto out;
3723 } 3807 }
3724 3808
3725 ret = ocfs2_rotate_tree_left(inode, handle, path, 3809 ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
3726 dealloc, et);
3727 /* 3810 /*
3728 * Error from this last rotate is not critical, so 3811 * Error from this last rotate is not critical, so
3729 * print but don't bubble it up. 3812 * print but don't bubble it up.
@@ -3740,19 +3823,16 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
3740 * the record on the left (hence the left merge). 3823 * the record on the left (hence the left merge).
3741 */ 3824 */
3742 if (ctxt->c_contig_type == CONTIG_RIGHT) { 3825 if (ctxt->c_contig_type == CONTIG_RIGHT) {
3743 ret = ocfs2_merge_rec_left(inode, 3826 ret = ocfs2_merge_rec_left(path, handle, et,
3744 path, 3827 split_rec, dealloc,
3745 handle, split_rec,
3746 dealloc, et,
3747 split_index); 3828 split_index);
3748 if (ret) { 3829 if (ret) {
3749 mlog_errno(ret); 3830 mlog_errno(ret);
3750 goto out; 3831 goto out;
3751 } 3832 }
3752 } else { 3833 } else {
3753 ret = ocfs2_merge_rec_right(inode, 3834 ret = ocfs2_merge_rec_right(path, handle,
3754 path, 3835 et, split_rec,
3755 handle, split_rec,
3756 split_index); 3836 split_index);
3757 if (ret) { 3837 if (ret) {
3758 mlog_errno(ret); 3838 mlog_errno(ret);
@@ -3765,8 +3845,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode,
3765 * The merge may have left an empty extent in 3845 * The merge may have left an empty extent in
3766 * our leaf. Try to rotate it away. 3846 * our leaf. Try to rotate it away.
3767 */ 3847 */
3768 ret = ocfs2_rotate_tree_left(inode, handle, path, 3848 ret = ocfs2_rotate_tree_left(handle, et, path,
3769 dealloc, et); 3849 dealloc);
3770 if (ret) 3850 if (ret)
3771 mlog_errno(ret); 3851 mlog_errno(ret);
3772 ret = 0; 3852 ret = 0;
@@ -3812,10 +3892,10 @@ static void ocfs2_subtract_from_rec(struct super_block *sb,
3812 * list. If this leaf is part of an allocation tree, it is assumed 3892 * list. If this leaf is part of an allocation tree, it is assumed
3813 * that the tree above has been prepared. 3893 * that the tree above has been prepared.
3814 */ 3894 */
3815static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec, 3895static void ocfs2_insert_at_leaf(struct ocfs2_extent_tree *et,
3896 struct ocfs2_extent_rec *insert_rec,
3816 struct ocfs2_extent_list *el, 3897 struct ocfs2_extent_list *el,
3817 struct ocfs2_insert_type *insert, 3898 struct ocfs2_insert_type *insert)
3818 struct inode *inode)
3819{ 3899{
3820 int i = insert->ins_contig_index; 3900 int i = insert->ins_contig_index;
3821 unsigned int range; 3901 unsigned int range;
@@ -3827,7 +3907,8 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec,
3827 i = ocfs2_search_extent_list(el, le32_to_cpu(insert_rec->e_cpos)); 3907 i = ocfs2_search_extent_list(el, le32_to_cpu(insert_rec->e_cpos));
3828 BUG_ON(i == -1); 3908 BUG_ON(i == -1);
3829 rec = &el->l_recs[i]; 3909 rec = &el->l_recs[i];
3830 ocfs2_subtract_from_rec(inode->i_sb, insert->ins_split, rec, 3910 ocfs2_subtract_from_rec(ocfs2_metadata_cache_get_super(et->et_ci),
3911 insert->ins_split, rec,
3831 insert_rec); 3912 insert_rec);
3832 goto rotate; 3913 goto rotate;
3833 } 3914 }
@@ -3869,10 +3950,10 @@ static void ocfs2_insert_at_leaf(struct ocfs2_extent_rec *insert_rec,
3869 3950
3870 mlog_bug_on_msg(le16_to_cpu(el->l_next_free_rec) >= 3951 mlog_bug_on_msg(le16_to_cpu(el->l_next_free_rec) >=
3871 le16_to_cpu(el->l_count), 3952 le16_to_cpu(el->l_count),
3872 "inode %lu, depth %u, count %u, next free %u, " 3953 "owner %llu, depth %u, count %u, next free %u, "
3873 "rec.cpos %u, rec.clusters %u, " 3954 "rec.cpos %u, rec.clusters %u, "
3874 "insert.cpos %u, insert.clusters %u\n", 3955 "insert.cpos %u, insert.clusters %u\n",
3875 inode->i_ino, 3956 ocfs2_metadata_cache_owner(et->et_ci),
3876 le16_to_cpu(el->l_tree_depth), 3957 le16_to_cpu(el->l_tree_depth),
3877 le16_to_cpu(el->l_count), 3958 le16_to_cpu(el->l_count),
3878 le16_to_cpu(el->l_next_free_rec), 3959 le16_to_cpu(el->l_next_free_rec),
@@ -3900,8 +3981,8 @@ rotate:
3900 ocfs2_rotate_leaf(el, insert_rec); 3981 ocfs2_rotate_leaf(el, insert_rec);
3901} 3982}
3902 3983
3903static void ocfs2_adjust_rightmost_records(struct inode *inode, 3984static void ocfs2_adjust_rightmost_records(handle_t *handle,
3904 handle_t *handle, 3985 struct ocfs2_extent_tree *et,
3905 struct ocfs2_path *path, 3986 struct ocfs2_path *path,
3906 struct ocfs2_extent_rec *insert_rec) 3987 struct ocfs2_extent_rec *insert_rec)
3907{ 3988{
@@ -3919,9 +4000,9 @@ static void ocfs2_adjust_rightmost_records(struct inode *inode,
3919 4000
3920 next_free = le16_to_cpu(el->l_next_free_rec); 4001 next_free = le16_to_cpu(el->l_next_free_rec);
3921 if (next_free == 0) { 4002 if (next_free == 0) {
3922 ocfs2_error(inode->i_sb, 4003 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
3923 "Dinode %llu has a bad extent list", 4004 "Owner %llu has a bad extent list",
3924 (unsigned long long)OCFS2_I(inode)->ip_blkno); 4005 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci));
3925 ret = -EIO; 4006 ret = -EIO;
3926 return; 4007 return;
3927 } 4008 }
@@ -3941,7 +4022,8 @@ static void ocfs2_adjust_rightmost_records(struct inode *inode,
3941 } 4022 }
3942} 4023}
3943 4024
3944static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle, 4025static int ocfs2_append_rec_to_path(handle_t *handle,
4026 struct ocfs2_extent_tree *et,
3945 struct ocfs2_extent_rec *insert_rec, 4027 struct ocfs2_extent_rec *insert_rec,
3946 struct ocfs2_path *right_path, 4028 struct ocfs2_path *right_path,
3947 struct ocfs2_path **ret_left_path) 4029 struct ocfs2_path **ret_left_path)
@@ -3969,8 +4051,8 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
3969 (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) { 4051 (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) {
3970 u32 left_cpos; 4052 u32 left_cpos;
3971 4053
3972 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, right_path, 4054 ret = ocfs2_find_cpos_for_left_leaf(ocfs2_metadata_cache_get_super(et->et_ci),
3973 &left_cpos); 4055 right_path, &left_cpos);
3974 if (ret) { 4056 if (ret) {
3975 mlog_errno(ret); 4057 mlog_errno(ret);
3976 goto out; 4058 goto out;
@@ -3992,7 +4074,8 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
3992 goto out; 4074 goto out;
3993 } 4075 }
3994 4076
3995 ret = ocfs2_find_path(inode, left_path, left_cpos); 4077 ret = ocfs2_find_path(et->et_ci, left_path,
4078 left_cpos);
3996 if (ret) { 4079 if (ret) {
3997 mlog_errno(ret); 4080 mlog_errno(ret);
3998 goto out; 4081 goto out;
@@ -4005,13 +4088,13 @@ static int ocfs2_append_rec_to_path(struct inode *inode, handle_t *handle,
4005 } 4088 }
4006 } 4089 }
4007 4090
4008 ret = ocfs2_journal_access_path(inode, handle, right_path); 4091 ret = ocfs2_journal_access_path(et->et_ci, handle, right_path);
4009 if (ret) { 4092 if (ret) {
4010 mlog_errno(ret); 4093 mlog_errno(ret);
4011 goto out; 4094 goto out;
4012 } 4095 }
4013 4096
4014 ocfs2_adjust_rightmost_records(inode, handle, right_path, insert_rec); 4097 ocfs2_adjust_rightmost_records(handle, et, right_path, insert_rec);
4015 4098
4016 *ret_left_path = left_path; 4099 *ret_left_path = left_path;
4017 ret = 0; 4100 ret = 0;
@@ -4022,7 +4105,7 @@ out:
4022 return ret; 4105 return ret;
4023} 4106}
4024 4107
4025static void ocfs2_split_record(struct inode *inode, 4108static void ocfs2_split_record(struct ocfs2_extent_tree *et,
4026 struct ocfs2_path *left_path, 4109 struct ocfs2_path *left_path,
4027 struct ocfs2_path *right_path, 4110 struct ocfs2_path *right_path,
4028 struct ocfs2_extent_rec *split_rec, 4111 struct ocfs2_extent_rec *split_rec,
@@ -4095,7 +4178,8 @@ static void ocfs2_split_record(struct inode *inode,
4095 } 4178 }
4096 4179
4097 rec = &el->l_recs[index]; 4180 rec = &el->l_recs[index];
4098 ocfs2_subtract_from_rec(inode->i_sb, split, rec, split_rec); 4181 ocfs2_subtract_from_rec(ocfs2_metadata_cache_get_super(et->et_ci),
4182 split, rec, split_rec);
4099 ocfs2_rotate_leaf(insert_el, split_rec); 4183 ocfs2_rotate_leaf(insert_el, split_rec);
4100} 4184}
4101 4185
@@ -4107,8 +4191,8 @@ static void ocfs2_split_record(struct inode *inode,
4107 * in. left_path should only be passed in if we need to update that 4191 * in. left_path should only be passed in if we need to update that
4108 * portion of the tree after an edge insert. 4192 * portion of the tree after an edge insert.
4109 */ 4193 */
4110static int ocfs2_insert_path(struct inode *inode, 4194static int ocfs2_insert_path(handle_t *handle,
4111 handle_t *handle, 4195 struct ocfs2_extent_tree *et,
4112 struct ocfs2_path *left_path, 4196 struct ocfs2_path *left_path,
4113 struct ocfs2_path *right_path, 4197 struct ocfs2_path *right_path,
4114 struct ocfs2_extent_rec *insert_rec, 4198 struct ocfs2_extent_rec *insert_rec,
@@ -4134,7 +4218,7 @@ static int ocfs2_insert_path(struct inode *inode,
4134 goto out; 4218 goto out;
4135 } 4219 }
4136 4220
4137 ret = ocfs2_journal_access_path(inode, handle, left_path); 4221 ret = ocfs2_journal_access_path(et->et_ci, handle, left_path);
4138 if (ret < 0) { 4222 if (ret < 0) {
4139 mlog_errno(ret); 4223 mlog_errno(ret);
4140 goto out; 4224 goto out;
@@ -4145,7 +4229,7 @@ static int ocfs2_insert_path(struct inode *inode,
4145 * Pass both paths to the journal. The majority of inserts 4229 * Pass both paths to the journal. The majority of inserts
4146 * will be touching all components anyway. 4230 * will be touching all components anyway.
4147 */ 4231 */
4148 ret = ocfs2_journal_access_path(inode, handle, right_path); 4232 ret = ocfs2_journal_access_path(et->et_ci, handle, right_path);
4149 if (ret < 0) { 4233 if (ret < 0) {
4150 mlog_errno(ret); 4234 mlog_errno(ret);
4151 goto out; 4235 goto out;
@@ -4157,7 +4241,7 @@ static int ocfs2_insert_path(struct inode *inode,
4157 * of splits, but it's easier to just let one separate 4241 * of splits, but it's easier to just let one separate
4158 * function sort it all out. 4242 * function sort it all out.
4159 */ 4243 */
4160 ocfs2_split_record(inode, left_path, right_path, 4244 ocfs2_split_record(et, left_path, right_path,
4161 insert_rec, insert->ins_split); 4245 insert_rec, insert->ins_split);
4162 4246
4163 /* 4247 /*
@@ -4171,8 +4255,8 @@ static int ocfs2_insert_path(struct inode *inode,
4171 if (ret) 4255 if (ret)
4172 mlog_errno(ret); 4256 mlog_errno(ret);
4173 } else 4257 } else
4174 ocfs2_insert_at_leaf(insert_rec, path_leaf_el(right_path), 4258 ocfs2_insert_at_leaf(et, insert_rec, path_leaf_el(right_path),
4175 insert, inode); 4259 insert);
4176 4260
4177 ret = ocfs2_journal_dirty(handle, leaf_bh); 4261 ret = ocfs2_journal_dirty(handle, leaf_bh);
4178 if (ret) 4262 if (ret)
@@ -4185,10 +4269,10 @@ static int ocfs2_insert_path(struct inode *inode,
4185 * 4269 *
4186 * XXX: Should we extend the transaction here? 4270 * XXX: Should we extend the transaction here?
4187 */ 4271 */
4188 subtree_index = ocfs2_find_subtree_root(inode, left_path, 4272 subtree_index = ocfs2_find_subtree_root(et, left_path,
4189 right_path); 4273 right_path);
4190 ocfs2_complete_edge_insert(inode, handle, left_path, 4274 ocfs2_complete_edge_insert(handle, left_path, right_path,
4191 right_path, subtree_index); 4275 subtree_index);
4192 } 4276 }
4193 4277
4194 ret = 0; 4278 ret = 0;
@@ -4196,8 +4280,7 @@ out:
4196 return ret; 4280 return ret;
4197} 4281}
4198 4282
4199static int ocfs2_do_insert_extent(struct inode *inode, 4283static int ocfs2_do_insert_extent(handle_t *handle,
4200 handle_t *handle,
4201 struct ocfs2_extent_tree *et, 4284 struct ocfs2_extent_tree *et,
4202 struct ocfs2_extent_rec *insert_rec, 4285 struct ocfs2_extent_rec *insert_rec,
4203 struct ocfs2_insert_type *type) 4286 struct ocfs2_insert_type *type)
@@ -4210,7 +4293,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
4210 4293
4211 el = et->et_root_el; 4294 el = et->et_root_el;
4212 4295
4213 ret = ocfs2_et_root_journal_access(handle, inode, et, 4296 ret = ocfs2_et_root_journal_access(handle, et,
4214 OCFS2_JOURNAL_ACCESS_WRITE); 4297 OCFS2_JOURNAL_ACCESS_WRITE);
4215 if (ret) { 4298 if (ret) {
4216 mlog_errno(ret); 4299 mlog_errno(ret);
@@ -4218,7 +4301,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
4218 } 4301 }
4219 4302
4220 if (le16_to_cpu(el->l_tree_depth) == 0) { 4303 if (le16_to_cpu(el->l_tree_depth) == 0) {
4221 ocfs2_insert_at_leaf(insert_rec, el, type, inode); 4304 ocfs2_insert_at_leaf(et, insert_rec, el, type);
4222 goto out_update_clusters; 4305 goto out_update_clusters;
4223 } 4306 }
4224 4307
@@ -4241,7 +4324,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
4241 cpos = UINT_MAX; 4324 cpos = UINT_MAX;
4242 } 4325 }
4243 4326
4244 ret = ocfs2_find_path(inode, right_path, cpos); 4327 ret = ocfs2_find_path(et->et_ci, right_path, cpos);
4245 if (ret) { 4328 if (ret) {
4246 mlog_errno(ret); 4329 mlog_errno(ret);
4247 goto out; 4330 goto out;
@@ -4260,7 +4343,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
4260 * can wind up skipping both of these two special cases... 4343 * can wind up skipping both of these two special cases...
4261 */ 4344 */
4262 if (rotate) { 4345 if (rotate) {
4263 ret = ocfs2_rotate_tree_right(inode, handle, type->ins_split, 4346 ret = ocfs2_rotate_tree_right(handle, et, type->ins_split,
4264 le32_to_cpu(insert_rec->e_cpos), 4347 le32_to_cpu(insert_rec->e_cpos),
4265 right_path, &left_path); 4348 right_path, &left_path);
4266 if (ret) { 4349 if (ret) {
@@ -4272,7 +4355,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
4272 * ocfs2_rotate_tree_right() might have extended the 4355 * ocfs2_rotate_tree_right() might have extended the
4273 * transaction without re-journaling our tree root. 4356 * transaction without re-journaling our tree root.
4274 */ 4357 */
4275 ret = ocfs2_et_root_journal_access(handle, inode, et, 4358 ret = ocfs2_et_root_journal_access(handle, et,
4276 OCFS2_JOURNAL_ACCESS_WRITE); 4359 OCFS2_JOURNAL_ACCESS_WRITE);
4277 if (ret) { 4360 if (ret) {
4278 mlog_errno(ret); 4361 mlog_errno(ret);
@@ -4280,7 +4363,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
4280 } 4363 }
4281 } else if (type->ins_appending == APPEND_TAIL 4364 } else if (type->ins_appending == APPEND_TAIL
4282 && type->ins_contig != CONTIG_LEFT) { 4365 && type->ins_contig != CONTIG_LEFT) {
4283 ret = ocfs2_append_rec_to_path(inode, handle, insert_rec, 4366 ret = ocfs2_append_rec_to_path(handle, et, insert_rec,
4284 right_path, &left_path); 4367 right_path, &left_path);
4285 if (ret) { 4368 if (ret) {
4286 mlog_errno(ret); 4369 mlog_errno(ret);
@@ -4288,7 +4371,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
4288 } 4371 }
4289 } 4372 }
4290 4373
4291 ret = ocfs2_insert_path(inode, handle, left_path, right_path, 4374 ret = ocfs2_insert_path(handle, et, left_path, right_path,
4292 insert_rec, type); 4375 insert_rec, type);
4293 if (ret) { 4376 if (ret) {
4294 mlog_errno(ret); 4377 mlog_errno(ret);
@@ -4297,7 +4380,7 @@ static int ocfs2_do_insert_extent(struct inode *inode,
4297 4380
4298out_update_clusters: 4381out_update_clusters:
4299 if (type->ins_split == SPLIT_NONE) 4382 if (type->ins_split == SPLIT_NONE)
4300 ocfs2_et_update_clusters(inode, et, 4383 ocfs2_et_update_clusters(et,
4301 le16_to_cpu(insert_rec->e_leaf_clusters)); 4384 le16_to_cpu(insert_rec->e_leaf_clusters));
4302 4385
4303 ret = ocfs2_journal_dirty(handle, et->et_root_bh); 4386 ret = ocfs2_journal_dirty(handle, et->et_root_bh);
@@ -4312,7 +4395,8 @@ out:
4312} 4395}
4313 4396
4314static enum ocfs2_contig_type 4397static enum ocfs2_contig_type
4315ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path, 4398ocfs2_figure_merge_contig_type(struct ocfs2_extent_tree *et,
4399 struct ocfs2_path *path,
4316 struct ocfs2_extent_list *el, int index, 4400 struct ocfs2_extent_list *el, int index,
4317 struct ocfs2_extent_rec *split_rec) 4401 struct ocfs2_extent_rec *split_rec)
4318{ 4402{
@@ -4324,12 +4408,12 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
4324 struct ocfs2_path *left_path = NULL, *right_path = NULL; 4408 struct ocfs2_path *left_path = NULL, *right_path = NULL;
4325 struct buffer_head *bh; 4409 struct buffer_head *bh;
4326 struct ocfs2_extent_block *eb; 4410 struct ocfs2_extent_block *eb;
4411 struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
4327 4412
4328 if (index > 0) { 4413 if (index > 0) {
4329 rec = &el->l_recs[index - 1]; 4414 rec = &el->l_recs[index - 1];
4330 } else if (path->p_tree_depth > 0) { 4415 } else if (path->p_tree_depth > 0) {
4331 status = ocfs2_find_cpos_for_left_leaf(inode->i_sb, 4416 status = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos);
4332 path, &left_cpos);
4333 if (status) 4417 if (status)
4334 goto out; 4418 goto out;
4335 4419
@@ -4338,7 +4422,8 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
4338 if (!left_path) 4422 if (!left_path)
4339 goto out; 4423 goto out;
4340 4424
4341 status = ocfs2_find_path(inode, left_path, left_cpos); 4425 status = ocfs2_find_path(et->et_ci, left_path,
4426 left_cpos);
4342 if (status) 4427 if (status)
4343 goto out; 4428 goto out;
4344 4429
@@ -4348,7 +4433,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
4348 le16_to_cpu(new_el->l_count)) { 4433 le16_to_cpu(new_el->l_count)) {
4349 bh = path_leaf_bh(left_path); 4434 bh = path_leaf_bh(left_path);
4350 eb = (struct ocfs2_extent_block *)bh->b_data; 4435 eb = (struct ocfs2_extent_block *)bh->b_data;
4351 ocfs2_error(inode->i_sb, 4436 ocfs2_error(sb,
4352 "Extent block #%llu has an " 4437 "Extent block #%llu has an "
4353 "invalid l_next_free_rec of " 4438 "invalid l_next_free_rec of "
4354 "%d. It should have " 4439 "%d. It should have "
@@ -4373,7 +4458,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
4373 if (split_rec->e_cpos == el->l_recs[index].e_cpos) 4458 if (split_rec->e_cpos == el->l_recs[index].e_cpos)
4374 ret = CONTIG_RIGHT; 4459 ret = CONTIG_RIGHT;
4375 } else { 4460 } else {
4376 ret = ocfs2_extent_contig(inode, rec, split_rec); 4461 ret = ocfs2_et_extent_contig(et, rec, split_rec);
4377 } 4462 }
4378 } 4463 }
4379 4464
@@ -4382,8 +4467,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
4382 rec = &el->l_recs[index + 1]; 4467 rec = &el->l_recs[index + 1];
4383 else if (le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count) && 4468 else if (le16_to_cpu(el->l_next_free_rec) == le16_to_cpu(el->l_count) &&
4384 path->p_tree_depth > 0) { 4469 path->p_tree_depth > 0) {
4385 status = ocfs2_find_cpos_for_right_leaf(inode->i_sb, 4470 status = ocfs2_find_cpos_for_right_leaf(sb, path, &right_cpos);
4386 path, &right_cpos);
4387 if (status) 4471 if (status)
4388 goto out; 4472 goto out;
4389 4473
@@ -4394,7 +4478,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
4394 if (!right_path) 4478 if (!right_path)
4395 goto out; 4479 goto out;
4396 4480
4397 status = ocfs2_find_path(inode, right_path, right_cpos); 4481 status = ocfs2_find_path(et->et_ci, right_path, right_cpos);
4398 if (status) 4482 if (status)
4399 goto out; 4483 goto out;
4400 4484
@@ -4404,7 +4488,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
4404 if (le16_to_cpu(new_el->l_next_free_rec) <= 1) { 4488 if (le16_to_cpu(new_el->l_next_free_rec) <= 1) {
4405 bh = path_leaf_bh(right_path); 4489 bh = path_leaf_bh(right_path);
4406 eb = (struct ocfs2_extent_block *)bh->b_data; 4490 eb = (struct ocfs2_extent_block *)bh->b_data;
4407 ocfs2_error(inode->i_sb, 4491 ocfs2_error(sb,
4408 "Extent block #%llu has an " 4492 "Extent block #%llu has an "
4409 "invalid l_next_free_rec of %d", 4493 "invalid l_next_free_rec of %d",
4410 (unsigned long long)le64_to_cpu(eb->h_blkno), 4494 (unsigned long long)le64_to_cpu(eb->h_blkno),
@@ -4419,7 +4503,7 @@ ocfs2_figure_merge_contig_type(struct inode *inode, struct ocfs2_path *path,
4419 if (rec) { 4503 if (rec) {
4420 enum ocfs2_contig_type contig_type; 4504 enum ocfs2_contig_type contig_type;
4421 4505
4422 contig_type = ocfs2_extent_contig(inode, rec, split_rec); 4506 contig_type = ocfs2_et_extent_contig(et, rec, split_rec);
4423 4507
4424 if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT) 4508 if (contig_type == CONTIG_LEFT && ret == CONTIG_RIGHT)
4425 ret = CONTIG_LEFTRIGHT; 4509 ret = CONTIG_LEFTRIGHT;
@@ -4436,11 +4520,10 @@ out:
4436 return ret; 4520 return ret;
4437} 4521}
4438 4522
4439static void ocfs2_figure_contig_type(struct inode *inode, 4523static void ocfs2_figure_contig_type(struct ocfs2_extent_tree *et,
4440 struct ocfs2_insert_type *insert, 4524 struct ocfs2_insert_type *insert,
4441 struct ocfs2_extent_list *el, 4525 struct ocfs2_extent_list *el,
4442 struct ocfs2_extent_rec *insert_rec, 4526 struct ocfs2_extent_rec *insert_rec)
4443 struct ocfs2_extent_tree *et)
4444{ 4527{
4445 int i; 4528 int i;
4446 enum ocfs2_contig_type contig_type = CONTIG_NONE; 4529 enum ocfs2_contig_type contig_type = CONTIG_NONE;
@@ -4448,8 +4531,8 @@ static void ocfs2_figure_contig_type(struct inode *inode,
4448 BUG_ON(le16_to_cpu(el->l_tree_depth) != 0); 4531 BUG_ON(le16_to_cpu(el->l_tree_depth) != 0);
4449 4532
4450 for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) { 4533 for(i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
4451 contig_type = ocfs2_extent_contig(inode, &el->l_recs[i], 4534 contig_type = ocfs2_et_extent_contig(et, &el->l_recs[i],
4452 insert_rec); 4535 insert_rec);
4453 if (contig_type != CONTIG_NONE) { 4536 if (contig_type != CONTIG_NONE) {
4454 insert->ins_contig_index = i; 4537 insert->ins_contig_index = i;
4455 break; 4538 break;
@@ -4530,8 +4613,7 @@ set_tail_append:
4530 * All of the information is stored on the ocfs2_insert_type 4613 * All of the information is stored on the ocfs2_insert_type
4531 * structure. 4614 * structure.
4532 */ 4615 */
4533static int ocfs2_figure_insert_type(struct inode *inode, 4616static int ocfs2_figure_insert_type(struct ocfs2_extent_tree *et,
4534 struct ocfs2_extent_tree *et,
4535 struct buffer_head **last_eb_bh, 4617 struct buffer_head **last_eb_bh,
4536 struct ocfs2_extent_rec *insert_rec, 4618 struct ocfs2_extent_rec *insert_rec,
4537 int *free_records, 4619 int *free_records,
@@ -4555,7 +4637,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
4555 * ocfs2_figure_insert_type() and ocfs2_add_branch() 4637 * ocfs2_figure_insert_type() and ocfs2_add_branch()
4556 * may want it later. 4638 * may want it later.
4557 */ 4639 */
4558 ret = ocfs2_read_extent_block(inode, 4640 ret = ocfs2_read_extent_block(et->et_ci,
4559 ocfs2_et_get_last_eb_blk(et), 4641 ocfs2_et_get_last_eb_blk(et),
4560 &bh); 4642 &bh);
4561 if (ret) { 4643 if (ret) {
@@ -4578,7 +4660,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
4578 le16_to_cpu(el->l_next_free_rec); 4660 le16_to_cpu(el->l_next_free_rec);
4579 4661
4580 if (!insert->ins_tree_depth) { 4662 if (!insert->ins_tree_depth) {
4581 ocfs2_figure_contig_type(inode, insert, el, insert_rec, et); 4663 ocfs2_figure_contig_type(et, insert, el, insert_rec);
4582 ocfs2_figure_appending_type(insert, el, insert_rec); 4664 ocfs2_figure_appending_type(insert, el, insert_rec);
4583 return 0; 4665 return 0;
4584 } 4666 }
@@ -4596,7 +4678,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
4596 * us the rightmost tree path. This is accounted for below in 4678 * us the rightmost tree path. This is accounted for below in
4597 * the appending code. 4679 * the appending code.
4598 */ 4680 */
4599 ret = ocfs2_find_path(inode, path, le32_to_cpu(insert_rec->e_cpos)); 4681 ret = ocfs2_find_path(et->et_ci, path, le32_to_cpu(insert_rec->e_cpos));
4600 if (ret) { 4682 if (ret) {
4601 mlog_errno(ret); 4683 mlog_errno(ret);
4602 goto out; 4684 goto out;
@@ -4612,7 +4694,7 @@ static int ocfs2_figure_insert_type(struct inode *inode,
4612 * into two types of appends: simple record append, or a 4694 * into two types of appends: simple record append, or a
4613 * rotate inside the tail leaf. 4695 * rotate inside the tail leaf.
4614 */ 4696 */
4615 ocfs2_figure_contig_type(inode, insert, el, insert_rec, et); 4697 ocfs2_figure_contig_type(et, insert, el, insert_rec);
4616 4698
4617 /* 4699 /*
4618 * The insert code isn't quite ready to deal with all cases of 4700 * The insert code isn't quite ready to deal with all cases of
@@ -4657,13 +4739,11 @@ out:
4657} 4739}
4658 4740
4659/* 4741/*
4660 * Insert an extent into an inode btree. 4742 * Insert an extent into a btree.
4661 * 4743 *
4662 * The caller needs to update fe->i_clusters 4744 * The caller needs to update the owning btree's cluster count.
4663 */ 4745 */
4664int ocfs2_insert_extent(struct ocfs2_super *osb, 4746int ocfs2_insert_extent(handle_t *handle,
4665 handle_t *handle,
4666 struct inode *inode,
4667 struct ocfs2_extent_tree *et, 4747 struct ocfs2_extent_tree *et,
4668 u32 cpos, 4748 u32 cpos,
4669 u64 start_blk, 4749 u64 start_blk,
@@ -4677,21 +4757,22 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
4677 struct ocfs2_insert_type insert = {0, }; 4757 struct ocfs2_insert_type insert = {0, };
4678 struct ocfs2_extent_rec rec; 4758 struct ocfs2_extent_rec rec;
4679 4759
4680 mlog(0, "add %u clusters at position %u to inode %llu\n", 4760 mlog(0, "add %u clusters at position %u to owner %llu\n",
4681 new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno); 4761 new_clusters, cpos,
4762 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci));
4682 4763
4683 memset(&rec, 0, sizeof(rec)); 4764 memset(&rec, 0, sizeof(rec));
4684 rec.e_cpos = cpu_to_le32(cpos); 4765 rec.e_cpos = cpu_to_le32(cpos);
4685 rec.e_blkno = cpu_to_le64(start_blk); 4766 rec.e_blkno = cpu_to_le64(start_blk);
4686 rec.e_leaf_clusters = cpu_to_le16(new_clusters); 4767 rec.e_leaf_clusters = cpu_to_le16(new_clusters);
4687 rec.e_flags = flags; 4768 rec.e_flags = flags;
4688 status = ocfs2_et_insert_check(inode, et, &rec); 4769 status = ocfs2_et_insert_check(et, &rec);
4689 if (status) { 4770 if (status) {
4690 mlog_errno(status); 4771 mlog_errno(status);
4691 goto bail; 4772 goto bail;
4692 } 4773 }
4693 4774
4694 status = ocfs2_figure_insert_type(inode, et, &last_eb_bh, &rec, 4775 status = ocfs2_figure_insert_type(et, &last_eb_bh, &rec,
4695 &free_records, &insert); 4776 &free_records, &insert);
4696 if (status < 0) { 4777 if (status < 0) {
4697 mlog_errno(status); 4778 mlog_errno(status);
@@ -4705,7 +4786,7 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
4705 free_records, insert.ins_tree_depth); 4786 free_records, insert.ins_tree_depth);
4706 4787
4707 if (insert.ins_contig == CONTIG_NONE && free_records == 0) { 4788 if (insert.ins_contig == CONTIG_NONE && free_records == 0) {
4708 status = ocfs2_grow_tree(inode, handle, et, 4789 status = ocfs2_grow_tree(handle, et,
4709 &insert.ins_tree_depth, &last_eb_bh, 4790 &insert.ins_tree_depth, &last_eb_bh,
4710 meta_ac); 4791 meta_ac);
4711 if (status) { 4792 if (status) {
@@ -4715,11 +4796,11 @@ int ocfs2_insert_extent(struct ocfs2_super *osb,
4715 } 4796 }
4716 4797
4717 /* Finally, we can add clusters. This might rotate the tree for us. */ 4798 /* Finally, we can add clusters. This might rotate the tree for us. */
4718 status = ocfs2_do_insert_extent(inode, handle, et, &rec, &insert); 4799 status = ocfs2_do_insert_extent(handle, et, &rec, &insert);
4719 if (status < 0) 4800 if (status < 0)
4720 mlog_errno(status); 4801 mlog_errno(status);
4721 else if (et->et_ops == &ocfs2_dinode_et_ops) 4802 else
4722 ocfs2_extent_map_insert_rec(inode, &rec); 4803 ocfs2_et_extent_map_insert(et, &rec);
4723 4804
4724bail: 4805bail:
4725 brelse(last_eb_bh); 4806 brelse(last_eb_bh);
@@ -4735,13 +4816,11 @@ bail:
4735 * it is not limited to the file storage. Any extent tree can use this 4816 * it is not limited to the file storage. Any extent tree can use this
4736 * function if it implements the proper ocfs2_extent_tree. 4817 * function if it implements the proper ocfs2_extent_tree.
4737 */ 4818 */
4738int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb, 4819int ocfs2_add_clusters_in_btree(handle_t *handle,
4739 struct inode *inode, 4820 struct ocfs2_extent_tree *et,
4740 u32 *logical_offset, 4821 u32 *logical_offset,
4741 u32 clusters_to_add, 4822 u32 clusters_to_add,
4742 int mark_unwritten, 4823 int mark_unwritten,
4743 struct ocfs2_extent_tree *et,
4744 handle_t *handle,
4745 struct ocfs2_alloc_context *data_ac, 4824 struct ocfs2_alloc_context *data_ac,
4746 struct ocfs2_alloc_context *meta_ac, 4825 struct ocfs2_alloc_context *meta_ac,
4747 enum ocfs2_alloc_restarted *reason_ret) 4826 enum ocfs2_alloc_restarted *reason_ret)
@@ -4752,13 +4831,15 @@ int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
4752 u32 bit_off, num_bits; 4831 u32 bit_off, num_bits;
4753 u64 block; 4832 u64 block;
4754 u8 flags = 0; 4833 u8 flags = 0;
4834 struct ocfs2_super *osb =
4835 OCFS2_SB(ocfs2_metadata_cache_get_super(et->et_ci));
4755 4836
4756 BUG_ON(!clusters_to_add); 4837 BUG_ON(!clusters_to_add);
4757 4838
4758 if (mark_unwritten) 4839 if (mark_unwritten)
4759 flags = OCFS2_EXT_UNWRITTEN; 4840 flags = OCFS2_EXT_UNWRITTEN;
4760 4841
4761 free_extents = ocfs2_num_free_extents(osb, inode, et); 4842 free_extents = ocfs2_num_free_extents(osb, et);
4762 if (free_extents < 0) { 4843 if (free_extents < 0) {
4763 status = free_extents; 4844 status = free_extents;
4764 mlog_errno(status); 4845 mlog_errno(status);
@@ -4795,7 +4876,7 @@ int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
4795 BUG_ON(num_bits > clusters_to_add); 4876 BUG_ON(num_bits > clusters_to_add);
4796 4877
4797 /* reserve our write early -- insert_extent may update the tree root */ 4878 /* reserve our write early -- insert_extent may update the tree root */
4798 status = ocfs2_et_root_journal_access(handle, inode, et, 4879 status = ocfs2_et_root_journal_access(handle, et,
4799 OCFS2_JOURNAL_ACCESS_WRITE); 4880 OCFS2_JOURNAL_ACCESS_WRITE);
4800 if (status < 0) { 4881 if (status < 0) {
4801 mlog_errno(status); 4882 mlog_errno(status);
@@ -4803,10 +4884,10 @@ int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb,
4803 } 4884 }
4804 4885
4805 block = ocfs2_clusters_to_blocks(osb->sb, bit_off); 4886 block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
4806 mlog(0, "Allocating %u clusters at block %u for inode %llu\n", 4887 mlog(0, "Allocating %u clusters at block %u for owner %llu\n",
4807 num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); 4888 num_bits, bit_off,
4808 status = ocfs2_insert_extent(osb, handle, inode, et, 4889 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci));
4809 *logical_offset, block, 4890 status = ocfs2_insert_extent(handle, et, *logical_offset, block,
4810 num_bits, flags, meta_ac); 4891 num_bits, flags, meta_ac);
4811 if (status < 0) { 4892 if (status < 0) {
4812 mlog_errno(status); 4893 mlog_errno(status);
@@ -4856,10 +4937,9 @@ static void ocfs2_make_right_split_rec(struct super_block *sb,
4856 split_rec->e_flags = rec->e_flags; 4937 split_rec->e_flags = rec->e_flags;
4857} 4938}
4858 4939
4859static int ocfs2_split_and_insert(struct inode *inode, 4940static int ocfs2_split_and_insert(handle_t *handle,
4860 handle_t *handle,
4861 struct ocfs2_path *path,
4862 struct ocfs2_extent_tree *et, 4941 struct ocfs2_extent_tree *et,
4942 struct ocfs2_path *path,
4863 struct buffer_head **last_eb_bh, 4943 struct buffer_head **last_eb_bh,
4864 int split_index, 4944 int split_index,
4865 struct ocfs2_extent_rec *orig_split_rec, 4945 struct ocfs2_extent_rec *orig_split_rec,
@@ -4892,7 +4972,7 @@ leftright:
4892 4972
4893 if (le16_to_cpu(rightmost_el->l_next_free_rec) == 4973 if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
4894 le16_to_cpu(rightmost_el->l_count)) { 4974 le16_to_cpu(rightmost_el->l_count)) {
4895 ret = ocfs2_grow_tree(inode, handle, et, 4975 ret = ocfs2_grow_tree(handle, et,
4896 &depth, last_eb_bh, meta_ac); 4976 &depth, last_eb_bh, meta_ac);
4897 if (ret) { 4977 if (ret) {
4898 mlog_errno(ret); 4978 mlog_errno(ret);
@@ -4921,8 +5001,8 @@ leftright:
4921 */ 5001 */
4922 insert.ins_split = SPLIT_RIGHT; 5002 insert.ins_split = SPLIT_RIGHT;
4923 5003
4924 ocfs2_make_right_split_rec(inode->i_sb, &tmprec, insert_range, 5004 ocfs2_make_right_split_rec(ocfs2_metadata_cache_get_super(et->et_ci),
4925 &rec); 5005 &tmprec, insert_range, &rec);
4926 5006
4927 split_rec = tmprec; 5007 split_rec = tmprec;
4928 5008
@@ -4930,7 +5010,7 @@ leftright:
4930 do_leftright = 1; 5010 do_leftright = 1;
4931 } 5011 }
4932 5012
4933 ret = ocfs2_do_insert_extent(inode, handle, et, &split_rec, &insert); 5013 ret = ocfs2_do_insert_extent(handle, et, &split_rec, &insert);
4934 if (ret) { 5014 if (ret) {
4935 mlog_errno(ret); 5015 mlog_errno(ret);
4936 goto out; 5016 goto out;
@@ -4946,7 +5026,7 @@ leftright:
4946 ocfs2_reinit_path(path, 1); 5026 ocfs2_reinit_path(path, 1);
4947 5027
4948 cpos = le32_to_cpu(split_rec.e_cpos); 5028 cpos = le32_to_cpu(split_rec.e_cpos);
4949 ret = ocfs2_find_path(inode, path, cpos); 5029 ret = ocfs2_find_path(et->et_ci, path, cpos);
4950 if (ret) { 5030 if (ret) {
4951 mlog_errno(ret); 5031 mlog_errno(ret);
4952 goto out; 5032 goto out;
@@ -4961,8 +5041,8 @@ out:
4961 return ret; 5041 return ret;
4962} 5042}
4963 5043
4964static int ocfs2_replace_extent_rec(struct inode *inode, 5044static int ocfs2_replace_extent_rec(handle_t *handle,
4965 handle_t *handle, 5045 struct ocfs2_extent_tree *et,
4966 struct ocfs2_path *path, 5046 struct ocfs2_path *path,
4967 struct ocfs2_extent_list *el, 5047 struct ocfs2_extent_list *el,
4968 int split_index, 5048 int split_index,
@@ -4970,7 +5050,7 @@ static int ocfs2_replace_extent_rec(struct inode *inode,
4970{ 5050{
4971 int ret; 5051 int ret;
4972 5052
4973 ret = ocfs2_path_bh_journal_access(handle, inode, path, 5053 ret = ocfs2_path_bh_journal_access(handle, et->et_ci, path,
4974 path_num_items(path) - 1); 5054 path_num_items(path) - 1);
4975 if (ret) { 5055 if (ret) {
4976 mlog_errno(ret); 5056 mlog_errno(ret);
@@ -4985,9 +5065,8 @@ out:
4985} 5065}
4986 5066
4987/* 5067/*
4988 * Mark part or all of the extent record at split_index in the leaf 5068 * Split part or all of the extent record at split_index in the leaf
4989 * pointed to by path as written. This removes the unwritten 5069 * pointed to by path. Merge with the contiguous extent record if needed.
4990 * extent flag.
4991 * 5070 *
4992 * Care is taken to handle contiguousness so as to not grow the tree. 5071 * Care is taken to handle contiguousness so as to not grow the tree.
4993 * 5072 *
@@ -5004,14 +5083,13 @@ out:
5004 * have been brought into cache (and pinned via the journal), so the 5083 * have been brought into cache (and pinned via the journal), so the
5005 * extra overhead is not expressed in terms of disk reads. 5084 * extra overhead is not expressed in terms of disk reads.
5006 */ 5085 */
5007static int __ocfs2_mark_extent_written(struct inode *inode, 5086int ocfs2_split_extent(handle_t *handle,
5008 struct ocfs2_extent_tree *et, 5087 struct ocfs2_extent_tree *et,
5009 handle_t *handle, 5088 struct ocfs2_path *path,
5010 struct ocfs2_path *path, 5089 int split_index,
5011 int split_index, 5090 struct ocfs2_extent_rec *split_rec,
5012 struct ocfs2_extent_rec *split_rec, 5091 struct ocfs2_alloc_context *meta_ac,
5013 struct ocfs2_alloc_context *meta_ac, 5092 struct ocfs2_cached_dealloc_ctxt *dealloc)
5014 struct ocfs2_cached_dealloc_ctxt *dealloc)
5015{ 5093{
5016 int ret = 0; 5094 int ret = 0;
5017 struct ocfs2_extent_list *el = path_leaf_el(path); 5095 struct ocfs2_extent_list *el = path_leaf_el(path);
@@ -5020,12 +5098,6 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
5020 struct ocfs2_merge_ctxt ctxt; 5098 struct ocfs2_merge_ctxt ctxt;
5021 struct ocfs2_extent_list *rightmost_el; 5099 struct ocfs2_extent_list *rightmost_el;
5022 5100
5023 if (!(rec->e_flags & OCFS2_EXT_UNWRITTEN)) {
5024 ret = -EIO;
5025 mlog_errno(ret);
5026 goto out;
5027 }
5028
5029 if (le32_to_cpu(rec->e_cpos) > le32_to_cpu(split_rec->e_cpos) || 5101 if (le32_to_cpu(rec->e_cpos) > le32_to_cpu(split_rec->e_cpos) ||
5030 ((le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters)) < 5102 ((le32_to_cpu(rec->e_cpos) + le16_to_cpu(rec->e_leaf_clusters)) <
5031 (le32_to_cpu(split_rec->e_cpos) + le16_to_cpu(split_rec->e_leaf_clusters)))) { 5103 (le32_to_cpu(split_rec->e_cpos) + le16_to_cpu(split_rec->e_leaf_clusters)))) {
@@ -5034,19 +5106,19 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
5034 goto out; 5106 goto out;
5035 } 5107 }
5036 5108
5037 ctxt.c_contig_type = ocfs2_figure_merge_contig_type(inode, path, el, 5109 ctxt.c_contig_type = ocfs2_figure_merge_contig_type(et, path, el,
5038 split_index, 5110 split_index,
5039 split_rec); 5111 split_rec);
5040 5112
5041 /* 5113 /*
5042 * The core merge / split code wants to know how much room is 5114 * The core merge / split code wants to know how much room is
5043 * left in this inodes allocation tree, so we pass the 5115 * left in this allocation tree, so we pass the
5044 * rightmost extent list. 5116 * rightmost extent list.
5045 */ 5117 */
5046 if (path->p_tree_depth) { 5118 if (path->p_tree_depth) {
5047 struct ocfs2_extent_block *eb; 5119 struct ocfs2_extent_block *eb;
5048 5120
5049 ret = ocfs2_read_extent_block(inode, 5121 ret = ocfs2_read_extent_block(et->et_ci,
5050 ocfs2_et_get_last_eb_blk(et), 5122 ocfs2_et_get_last_eb_blk(et),
5051 &last_eb_bh); 5123 &last_eb_bh);
5052 if (ret) { 5124 if (ret) {
@@ -5073,19 +5145,18 @@ static int __ocfs2_mark_extent_written(struct inode *inode,
5073 5145
5074 if (ctxt.c_contig_type == CONTIG_NONE) { 5146 if (ctxt.c_contig_type == CONTIG_NONE) {
5075 if (ctxt.c_split_covers_rec) 5147 if (ctxt.c_split_covers_rec)
5076 ret = ocfs2_replace_extent_rec(inode, handle, 5148 ret = ocfs2_replace_extent_rec(handle, et, path, el,
5077 path, el,
5078 split_index, split_rec); 5149 split_index, split_rec);
5079 else 5150 else
5080 ret = ocfs2_split_and_insert(inode, handle, path, et, 5151 ret = ocfs2_split_and_insert(handle, et, path,
5081 &last_eb_bh, split_index, 5152 &last_eb_bh, split_index,
5082 split_rec, meta_ac); 5153 split_rec, meta_ac);
5083 if (ret) 5154 if (ret)
5084 mlog_errno(ret); 5155 mlog_errno(ret);
5085 } else { 5156 } else {
5086 ret = ocfs2_try_to_merge_extent(inode, handle, path, 5157 ret = ocfs2_try_to_merge_extent(handle, et, path,
5087 split_index, split_rec, 5158 split_index, split_rec,
5088 dealloc, &ctxt, et); 5159 dealloc, &ctxt);
5089 if (ret) 5160 if (ret)
5090 mlog_errno(ret); 5161 mlog_errno(ret);
5091 } 5162 }
@@ -5096,46 +5167,31 @@ out:
5096} 5167}
5097 5168
5098/* 5169/*
5099 * Mark the already-existing extent at cpos as written for len clusters. 5170 * Change the flags of the already-existing extent at cpos for len clusters.
5171 *
5172 * new_flags: the flags we want to set.
5173 * clear_flags: the flags we want to clear.
5174 * phys: the new physical offset we want this new extent starts from.
5100 * 5175 *
5101 * If the existing extent is larger than the request, initiate a 5176 * If the existing extent is larger than the request, initiate a
5102 * split. An attempt will be made at merging with adjacent extents. 5177 * split. An attempt will be made at merging with adjacent extents.
5103 * 5178 *
5104 * The caller is responsible for passing down meta_ac if we'll need it. 5179 * The caller is responsible for passing down meta_ac if we'll need it.
5105 */ 5180 */
5106int ocfs2_mark_extent_written(struct inode *inode, 5181int ocfs2_change_extent_flag(handle_t *handle,
5107 struct ocfs2_extent_tree *et, 5182 struct ocfs2_extent_tree *et,
5108 handle_t *handle, u32 cpos, u32 len, u32 phys, 5183 u32 cpos, u32 len, u32 phys,
5109 struct ocfs2_alloc_context *meta_ac, 5184 struct ocfs2_alloc_context *meta_ac,
5110 struct ocfs2_cached_dealloc_ctxt *dealloc) 5185 struct ocfs2_cached_dealloc_ctxt *dealloc,
5186 int new_flags, int clear_flags)
5111{ 5187{
5112 int ret, index; 5188 int ret, index;
5113 u64 start_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys); 5189 struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
5190 u64 start_blkno = ocfs2_clusters_to_blocks(sb, phys);
5114 struct ocfs2_extent_rec split_rec; 5191 struct ocfs2_extent_rec split_rec;
5115 struct ocfs2_path *left_path = NULL; 5192 struct ocfs2_path *left_path = NULL;
5116 struct ocfs2_extent_list *el; 5193 struct ocfs2_extent_list *el;
5117 5194 struct ocfs2_extent_rec *rec;
5118 mlog(0, "Inode %lu cpos %u, len %u, phys %u (%llu)\n",
5119 inode->i_ino, cpos, len, phys, (unsigned long long)start_blkno);
5120
5121 if (!ocfs2_writes_unwritten_extents(OCFS2_SB(inode->i_sb))) {
5122 ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents "
5123 "that are being written to, but the feature bit "
5124 "is not set in the super block.",
5125 (unsigned long long)OCFS2_I(inode)->ip_blkno);
5126 ret = -EROFS;
5127 goto out;
5128 }
5129
5130 /*
5131 * XXX: This should be fixed up so that we just re-insert the
5132 * next extent records.
5133 *
5134 * XXX: This is a hack on the extent tree, maybe it should be
5135 * an op?
5136 */
5137 if (et->et_ops == &ocfs2_dinode_et_ops)
5138 ocfs2_extent_map_trunc(inode, 0);
5139 5195
5140 left_path = ocfs2_new_path_from_et(et); 5196 left_path = ocfs2_new_path_from_et(et);
5141 if (!left_path) { 5197 if (!left_path) {
@@ -5144,7 +5200,7 @@ int ocfs2_mark_extent_written(struct inode *inode,
5144 goto out; 5200 goto out;
5145 } 5201 }
5146 5202
5147 ret = ocfs2_find_path(inode, left_path, cpos); 5203 ret = ocfs2_find_path(et->et_ci, left_path, cpos);
5148 if (ret) { 5204 if (ret) {
5149 mlog_errno(ret); 5205 mlog_errno(ret);
5150 goto out; 5206 goto out;
@@ -5153,34 +5209,102 @@ int ocfs2_mark_extent_written(struct inode *inode,
5153 5209
5154 index = ocfs2_search_extent_list(el, cpos); 5210 index = ocfs2_search_extent_list(el, cpos);
5155 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { 5211 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
5156 ocfs2_error(inode->i_sb, 5212 ocfs2_error(sb,
5157 "Inode %llu has an extent at cpos %u which can no " 5213 "Owner %llu has an extent at cpos %u which can no "
5158 "longer be found.\n", 5214 "longer be found.\n",
5159 (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos); 5215 (unsigned long long)
5216 ocfs2_metadata_cache_owner(et->et_ci), cpos);
5160 ret = -EROFS; 5217 ret = -EROFS;
5161 goto out; 5218 goto out;
5162 } 5219 }
5163 5220
5221 ret = -EIO;
5222 rec = &el->l_recs[index];
5223 if (new_flags && (rec->e_flags & new_flags)) {
5224 mlog(ML_ERROR, "Owner %llu tried to set %d flags on an "
5225 "extent that already had them",
5226 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
5227 new_flags);
5228 goto out;
5229 }
5230
5231 if (clear_flags && !(rec->e_flags & clear_flags)) {
5232 mlog(ML_ERROR, "Owner %llu tried to clear %d flags on an "
5233 "extent that didn't have them",
5234 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
5235 clear_flags);
5236 goto out;
5237 }
5238
5164 memset(&split_rec, 0, sizeof(struct ocfs2_extent_rec)); 5239 memset(&split_rec, 0, sizeof(struct ocfs2_extent_rec));
5165 split_rec.e_cpos = cpu_to_le32(cpos); 5240 split_rec.e_cpos = cpu_to_le32(cpos);
5166 split_rec.e_leaf_clusters = cpu_to_le16(len); 5241 split_rec.e_leaf_clusters = cpu_to_le16(len);
5167 split_rec.e_blkno = cpu_to_le64(start_blkno); 5242 split_rec.e_blkno = cpu_to_le64(start_blkno);
5168 split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags; 5243 split_rec.e_flags = rec->e_flags;
5169 split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN; 5244 if (new_flags)
5170 5245 split_rec.e_flags |= new_flags;
5171 ret = __ocfs2_mark_extent_written(inode, et, handle, left_path, 5246 if (clear_flags)
5172 index, &split_rec, meta_ac, 5247 split_rec.e_flags &= ~clear_flags;
5173 dealloc); 5248
5249 ret = ocfs2_split_extent(handle, et, left_path,
5250 index, &split_rec, meta_ac,
5251 dealloc);
5174 if (ret) 5252 if (ret)
5175 mlog_errno(ret); 5253 mlog_errno(ret);
5176 5254
5177out: 5255out:
5178 ocfs2_free_path(left_path); 5256 ocfs2_free_path(left_path);
5179 return ret; 5257 return ret;
5258
5180} 5259}
5181 5260
5182static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et, 5261/*
5183 handle_t *handle, struct ocfs2_path *path, 5262 * Mark the already-existing extent at cpos as written for len clusters.
5263 * This removes the unwritten extent flag.
5264 *
5265 * If the existing extent is larger than the request, initiate a
5266 * split. An attempt will be made at merging with adjacent extents.
5267 *
5268 * The caller is responsible for passing down meta_ac if we'll need it.
5269 */
5270int ocfs2_mark_extent_written(struct inode *inode,
5271 struct ocfs2_extent_tree *et,
5272 handle_t *handle, u32 cpos, u32 len, u32 phys,
5273 struct ocfs2_alloc_context *meta_ac,
5274 struct ocfs2_cached_dealloc_ctxt *dealloc)
5275{
5276 int ret;
5277
5278 mlog(0, "Inode %lu cpos %u, len %u, phys clusters %u\n",
5279 inode->i_ino, cpos, len, phys);
5280
5281 if (!ocfs2_writes_unwritten_extents(OCFS2_SB(inode->i_sb))) {
5282 ocfs2_error(inode->i_sb, "Inode %llu has unwritten extents "
5283 "that are being written to, but the feature bit "
5284 "is not set in the super block.",
5285 (unsigned long long)OCFS2_I(inode)->ip_blkno);
5286 ret = -EROFS;
5287 goto out;
5288 }
5289
5290 /*
5291 * XXX: This should be fixed up so that we just re-insert the
5292 * next extent records.
5293 */
5294 ocfs2_et_extent_map_truncate(et, 0);
5295
5296 ret = ocfs2_change_extent_flag(handle, et, cpos,
5297 len, phys, meta_ac, dealloc,
5298 0, OCFS2_EXT_UNWRITTEN);
5299 if (ret)
5300 mlog_errno(ret);
5301
5302out:
5303 return ret;
5304}
5305
5306static int ocfs2_split_tree(handle_t *handle, struct ocfs2_extent_tree *et,
5307 struct ocfs2_path *path,
5184 int index, u32 new_range, 5308 int index, u32 new_range,
5185 struct ocfs2_alloc_context *meta_ac) 5309 struct ocfs2_alloc_context *meta_ac)
5186{ 5310{
@@ -5197,11 +5321,12 @@ static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et,
5197 */ 5321 */
5198 el = path_leaf_el(path); 5322 el = path_leaf_el(path);
5199 rec = &el->l_recs[index]; 5323 rec = &el->l_recs[index];
5200 ocfs2_make_right_split_rec(inode->i_sb, &split_rec, new_range, rec); 5324 ocfs2_make_right_split_rec(ocfs2_metadata_cache_get_super(et->et_ci),
5325 &split_rec, new_range, rec);
5201 5326
5202 depth = path->p_tree_depth; 5327 depth = path->p_tree_depth;
5203 if (depth > 0) { 5328 if (depth > 0) {
5204 ret = ocfs2_read_extent_block(inode, 5329 ret = ocfs2_read_extent_block(et->et_ci,
5205 ocfs2_et_get_last_eb_blk(et), 5330 ocfs2_et_get_last_eb_blk(et),
5206 &last_eb_bh); 5331 &last_eb_bh);
5207 if (ret < 0) { 5332 if (ret < 0) {
@@ -5224,7 +5349,7 @@ static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et,
5224 5349
5225 if (le16_to_cpu(rightmost_el->l_next_free_rec) == 5350 if (le16_to_cpu(rightmost_el->l_next_free_rec) ==
5226 le16_to_cpu(rightmost_el->l_count)) { 5351 le16_to_cpu(rightmost_el->l_count)) {
5227 ret = ocfs2_grow_tree(inode, handle, et, &depth, &last_eb_bh, 5352 ret = ocfs2_grow_tree(handle, et, &depth, &last_eb_bh,
5228 meta_ac); 5353 meta_ac);
5229 if (ret) { 5354 if (ret) {
5230 mlog_errno(ret); 5355 mlog_errno(ret);
@@ -5238,7 +5363,7 @@ static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et,
5238 insert.ins_split = SPLIT_RIGHT; 5363 insert.ins_split = SPLIT_RIGHT;
5239 insert.ins_tree_depth = depth; 5364 insert.ins_tree_depth = depth;
5240 5365
5241 ret = ocfs2_do_insert_extent(inode, handle, et, &split_rec, &insert); 5366 ret = ocfs2_do_insert_extent(handle, et, &split_rec, &insert);
5242 if (ret) 5367 if (ret)
5243 mlog_errno(ret); 5368 mlog_errno(ret);
5244 5369
@@ -5247,23 +5372,23 @@ out:
5247 return ret; 5372 return ret;
5248} 5373}
5249 5374
5250static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle, 5375static int ocfs2_truncate_rec(handle_t *handle,
5376 struct ocfs2_extent_tree *et,
5251 struct ocfs2_path *path, int index, 5377 struct ocfs2_path *path, int index,
5252 struct ocfs2_cached_dealloc_ctxt *dealloc, 5378 struct ocfs2_cached_dealloc_ctxt *dealloc,
5253 u32 cpos, u32 len, 5379 u32 cpos, u32 len)
5254 struct ocfs2_extent_tree *et)
5255{ 5380{
5256 int ret; 5381 int ret;
5257 u32 left_cpos, rec_range, trunc_range; 5382 u32 left_cpos, rec_range, trunc_range;
5258 int wants_rotate = 0, is_rightmost_tree_rec = 0; 5383 int wants_rotate = 0, is_rightmost_tree_rec = 0;
5259 struct super_block *sb = inode->i_sb; 5384 struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
5260 struct ocfs2_path *left_path = NULL; 5385 struct ocfs2_path *left_path = NULL;
5261 struct ocfs2_extent_list *el = path_leaf_el(path); 5386 struct ocfs2_extent_list *el = path_leaf_el(path);
5262 struct ocfs2_extent_rec *rec; 5387 struct ocfs2_extent_rec *rec;
5263 struct ocfs2_extent_block *eb; 5388 struct ocfs2_extent_block *eb;
5264 5389
5265 if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) { 5390 if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) {
5266 ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc, et); 5391 ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
5267 if (ret) { 5392 if (ret) {
5268 mlog_errno(ret); 5393 mlog_errno(ret);
5269 goto out; 5394 goto out;
@@ -5295,14 +5420,13 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
5295 * by this leaf and the one to it's left. 5420 * by this leaf and the one to it's left.
5296 * 5421 *
5297 * There are two cases we can skip: 5422 * There are two cases we can skip:
5298 * 1) Path is the leftmost one in our inode tree. 5423 * 1) Path is the leftmost one in our btree.
5299 * 2) The leaf is rightmost and will be empty after 5424 * 2) The leaf is rightmost and will be empty after
5300 * we remove the extent record - the rotate code 5425 * we remove the extent record - the rotate code
5301 * knows how to update the newly formed edge. 5426 * knows how to update the newly formed edge.
5302 */ 5427 */
5303 5428
5304 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, path, 5429 ret = ocfs2_find_cpos_for_left_leaf(sb, path, &left_cpos);
5305 &left_cpos);
5306 if (ret) { 5430 if (ret) {
5307 mlog_errno(ret); 5431 mlog_errno(ret);
5308 goto out; 5432 goto out;
@@ -5316,7 +5440,8 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
5316 goto out; 5440 goto out;
5317 } 5441 }
5318 5442
5319 ret = ocfs2_find_path(inode, left_path, left_cpos); 5443 ret = ocfs2_find_path(et->et_ci, left_path,
5444 left_cpos);
5320 if (ret) { 5445 if (ret) {
5321 mlog_errno(ret); 5446 mlog_errno(ret);
5322 goto out; 5447 goto out;
@@ -5332,13 +5457,13 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
5332 goto out; 5457 goto out;
5333 } 5458 }
5334 5459
5335 ret = ocfs2_journal_access_path(inode, handle, path); 5460 ret = ocfs2_journal_access_path(et->et_ci, handle, path);
5336 if (ret) { 5461 if (ret) {
5337 mlog_errno(ret); 5462 mlog_errno(ret);
5338 goto out; 5463 goto out;
5339 } 5464 }
5340 5465
5341 ret = ocfs2_journal_access_path(inode, handle, left_path); 5466 ret = ocfs2_journal_access_path(et->et_ci, handle, left_path);
5342 if (ret) { 5467 if (ret) {
5343 mlog_errno(ret); 5468 mlog_errno(ret);
5344 goto out; 5469 goto out;
@@ -5361,7 +5486,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
5361 * be deleted by the rotate code. 5486 * be deleted by the rotate code.
5362 */ 5487 */
5363 rec = &el->l_recs[next_free - 1]; 5488 rec = &el->l_recs[next_free - 1];
5364 ocfs2_adjust_rightmost_records(inode, handle, path, 5489 ocfs2_adjust_rightmost_records(handle, et, path,
5365 rec); 5490 rec);
5366 } 5491 }
5367 } else if (le32_to_cpu(rec->e_cpos) == cpos) { 5492 } else if (le32_to_cpu(rec->e_cpos) == cpos) {
@@ -5373,11 +5498,12 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
5373 /* Remove rightmost portion of the record */ 5498 /* Remove rightmost portion of the record */
5374 le16_add_cpu(&rec->e_leaf_clusters, -len); 5499 le16_add_cpu(&rec->e_leaf_clusters, -len);
5375 if (is_rightmost_tree_rec) 5500 if (is_rightmost_tree_rec)
5376 ocfs2_adjust_rightmost_records(inode, handle, path, rec); 5501 ocfs2_adjust_rightmost_records(handle, et, path, rec);
5377 } else { 5502 } else {
5378 /* Caller should have trapped this. */ 5503 /* Caller should have trapped this. */
5379 mlog(ML_ERROR, "Inode %llu: Invalid record truncate: (%u, %u) " 5504 mlog(ML_ERROR, "Owner %llu: Invalid record truncate: (%u, %u) "
5380 "(%u, %u)\n", (unsigned long long)OCFS2_I(inode)->ip_blkno, 5505 "(%u, %u)\n",
5506 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
5381 le32_to_cpu(rec->e_cpos), 5507 le32_to_cpu(rec->e_cpos),
5382 le16_to_cpu(rec->e_leaf_clusters), cpos, len); 5508 le16_to_cpu(rec->e_leaf_clusters), cpos, len);
5383 BUG(); 5509 BUG();
@@ -5386,14 +5512,14 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle,
5386 if (left_path) { 5512 if (left_path) {
5387 int subtree_index; 5513 int subtree_index;
5388 5514
5389 subtree_index = ocfs2_find_subtree_root(inode, left_path, path); 5515 subtree_index = ocfs2_find_subtree_root(et, left_path, path);
5390 ocfs2_complete_edge_insert(inode, handle, left_path, path, 5516 ocfs2_complete_edge_insert(handle, left_path, path,
5391 subtree_index); 5517 subtree_index);
5392 } 5518 }
5393 5519
5394 ocfs2_journal_dirty(handle, path_leaf_bh(path)); 5520 ocfs2_journal_dirty(handle, path_leaf_bh(path));
5395 5521
5396 ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc, et); 5522 ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
5397 if (ret) { 5523 if (ret) {
5398 mlog_errno(ret); 5524 mlog_errno(ret);
5399 goto out; 5525 goto out;
@@ -5404,9 +5530,9 @@ out:
5404 return ret; 5530 return ret;
5405} 5531}
5406 5532
5407int ocfs2_remove_extent(struct inode *inode, 5533int ocfs2_remove_extent(handle_t *handle,
5408 struct ocfs2_extent_tree *et, 5534 struct ocfs2_extent_tree *et,
5409 u32 cpos, u32 len, handle_t *handle, 5535 u32 cpos, u32 len,
5410 struct ocfs2_alloc_context *meta_ac, 5536 struct ocfs2_alloc_context *meta_ac,
5411 struct ocfs2_cached_dealloc_ctxt *dealloc) 5537 struct ocfs2_cached_dealloc_ctxt *dealloc)
5412{ 5538{
@@ -5416,7 +5542,11 @@ int ocfs2_remove_extent(struct inode *inode,
5416 struct ocfs2_extent_list *el; 5542 struct ocfs2_extent_list *el;
5417 struct ocfs2_path *path = NULL; 5543 struct ocfs2_path *path = NULL;
5418 5544
5419 ocfs2_extent_map_trunc(inode, 0); 5545 /*
5546 * XXX: Why are we truncating to 0 instead of wherever this
5547 * affects us?
5548 */
5549 ocfs2_et_extent_map_truncate(et, 0);
5420 5550
5421 path = ocfs2_new_path_from_et(et); 5551 path = ocfs2_new_path_from_et(et);
5422 if (!path) { 5552 if (!path) {
@@ -5425,7 +5555,7 @@ int ocfs2_remove_extent(struct inode *inode,
5425 goto out; 5555 goto out;
5426 } 5556 }
5427 5557
5428 ret = ocfs2_find_path(inode, path, cpos); 5558 ret = ocfs2_find_path(et->et_ci, path, cpos);
5429 if (ret) { 5559 if (ret) {
5430 mlog_errno(ret); 5560 mlog_errno(ret);
5431 goto out; 5561 goto out;
@@ -5434,10 +5564,11 @@ int ocfs2_remove_extent(struct inode *inode,
5434 el = path_leaf_el(path); 5564 el = path_leaf_el(path);
5435 index = ocfs2_search_extent_list(el, cpos); 5565 index = ocfs2_search_extent_list(el, cpos);
5436 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { 5566 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
5437 ocfs2_error(inode->i_sb, 5567 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
5438 "Inode %llu has an extent at cpos %u which can no " 5568 "Owner %llu has an extent at cpos %u which can no "
5439 "longer be found.\n", 5569 "longer be found.\n",
5440 (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos); 5570 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
5571 cpos);
5441 ret = -EROFS; 5572 ret = -EROFS;
5442 goto out; 5573 goto out;
5443 } 5574 }
@@ -5464,20 +5595,21 @@ int ocfs2_remove_extent(struct inode *inode,
5464 5595
5465 BUG_ON(cpos < le32_to_cpu(rec->e_cpos) || trunc_range > rec_range); 5596 BUG_ON(cpos < le32_to_cpu(rec->e_cpos) || trunc_range > rec_range);
5466 5597
5467 mlog(0, "Inode %llu, remove (cpos %u, len %u). Existing index %d " 5598 mlog(0, "Owner %llu, remove (cpos %u, len %u). Existing index %d "
5468 "(cpos %u, len %u)\n", 5599 "(cpos %u, len %u)\n",
5469 (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos, len, index, 5600 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
5601 cpos, len, index,
5470 le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec)); 5602 le32_to_cpu(rec->e_cpos), ocfs2_rec_clusters(el, rec));
5471 5603
5472 if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) { 5604 if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) {
5473 ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, 5605 ret = ocfs2_truncate_rec(handle, et, path, index, dealloc,
5474 cpos, len, et); 5606 cpos, len);
5475 if (ret) { 5607 if (ret) {
5476 mlog_errno(ret); 5608 mlog_errno(ret);
5477 goto out; 5609 goto out;
5478 } 5610 }
5479 } else { 5611 } else {
5480 ret = ocfs2_split_tree(inode, et, handle, path, index, 5612 ret = ocfs2_split_tree(handle, et, path, index,
5481 trunc_range, meta_ac); 5613 trunc_range, meta_ac);
5482 if (ret) { 5614 if (ret) {
5483 mlog_errno(ret); 5615 mlog_errno(ret);
@@ -5490,7 +5622,7 @@ int ocfs2_remove_extent(struct inode *inode,
5490 */ 5622 */
5491 ocfs2_reinit_path(path, 1); 5623 ocfs2_reinit_path(path, 1);
5492 5624
5493 ret = ocfs2_find_path(inode, path, cpos); 5625 ret = ocfs2_find_path(et->et_ci, path, cpos);
5494 if (ret) { 5626 if (ret) {
5495 mlog_errno(ret); 5627 mlog_errno(ret);
5496 goto out; 5628 goto out;
@@ -5499,9 +5631,9 @@ int ocfs2_remove_extent(struct inode *inode,
5499 el = path_leaf_el(path); 5631 el = path_leaf_el(path);
5500 index = ocfs2_search_extent_list(el, cpos); 5632 index = ocfs2_search_extent_list(el, cpos);
5501 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) { 5633 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
5502 ocfs2_error(inode->i_sb, 5634 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
5503 "Inode %llu: split at cpos %u lost record.", 5635 "Owner %llu: split at cpos %u lost record.",
5504 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5636 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
5505 cpos); 5637 cpos);
5506 ret = -EROFS; 5638 ret = -EROFS;
5507 goto out; 5639 goto out;
@@ -5515,18 +5647,18 @@ int ocfs2_remove_extent(struct inode *inode,
5515 rec_range = le32_to_cpu(rec->e_cpos) + 5647 rec_range = le32_to_cpu(rec->e_cpos) +
5516 ocfs2_rec_clusters(el, rec); 5648 ocfs2_rec_clusters(el, rec);
5517 if (rec_range != trunc_range) { 5649 if (rec_range != trunc_range) {
5518 ocfs2_error(inode->i_sb, 5650 ocfs2_error(ocfs2_metadata_cache_get_super(et->et_ci),
5519 "Inode %llu: error after split at cpos %u" 5651 "Owner %llu: error after split at cpos %u"
5520 "trunc len %u, existing record is (%u,%u)", 5652 "trunc len %u, existing record is (%u,%u)",
5521 (unsigned long long)OCFS2_I(inode)->ip_blkno, 5653 (unsigned long long)ocfs2_metadata_cache_owner(et->et_ci),
5522 cpos, len, le32_to_cpu(rec->e_cpos), 5654 cpos, len, le32_to_cpu(rec->e_cpos),
5523 ocfs2_rec_clusters(el, rec)); 5655 ocfs2_rec_clusters(el, rec));
5524 ret = -EROFS; 5656 ret = -EROFS;
5525 goto out; 5657 goto out;
5526 } 5658 }
5527 5659
5528 ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, 5660 ret = ocfs2_truncate_rec(handle, et, path, index, dealloc,
5529 cpos, len, et); 5661 cpos, len);
5530 if (ret) { 5662 if (ret) {
5531 mlog_errno(ret); 5663 mlog_errno(ret);
5532 goto out; 5664 goto out;
@@ -5573,7 +5705,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
5573 goto out; 5705 goto out;
5574 } 5706 }
5575 5707
5576 ret = ocfs2_et_root_journal_access(handle, inode, et, 5708 ret = ocfs2_et_root_journal_access(handle, et,
5577 OCFS2_JOURNAL_ACCESS_WRITE); 5709 OCFS2_JOURNAL_ACCESS_WRITE);
5578 if (ret) { 5710 if (ret) {
5579 mlog_errno(ret); 5711 mlog_errno(ret);
@@ -5583,14 +5715,13 @@ int ocfs2_remove_btree_range(struct inode *inode,
5583 vfs_dq_free_space_nodirty(inode, 5715 vfs_dq_free_space_nodirty(inode,
5584 ocfs2_clusters_to_bytes(inode->i_sb, len)); 5716 ocfs2_clusters_to_bytes(inode->i_sb, len));
5585 5717
5586 ret = ocfs2_remove_extent(inode, et, cpos, len, handle, meta_ac, 5718 ret = ocfs2_remove_extent(handle, et, cpos, len, meta_ac, dealloc);
5587 dealloc);
5588 if (ret) { 5719 if (ret) {
5589 mlog_errno(ret); 5720 mlog_errno(ret);
5590 goto out_commit; 5721 goto out_commit;
5591 } 5722 }
5592 5723
5593 ocfs2_et_update_clusters(inode, et, -len); 5724 ocfs2_et_update_clusters(et, -len);
5594 5725
5595 ret = ocfs2_journal_dirty(handle, et->et_root_bh); 5726 ret = ocfs2_journal_dirty(handle, et->et_root_bh);
5596 if (ret) { 5727 if (ret) {
@@ -5690,7 +5821,7 @@ int ocfs2_truncate_log_append(struct ocfs2_super *osb,
5690 goto bail; 5821 goto bail;
5691 } 5822 }
5692 5823
5693 status = ocfs2_journal_access_di(handle, tl_inode, tl_bh, 5824 status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh,
5694 OCFS2_JOURNAL_ACCESS_WRITE); 5825 OCFS2_JOURNAL_ACCESS_WRITE);
5695 if (status < 0) { 5826 if (status < 0) {
5696 mlog_errno(status); 5827 mlog_errno(status);
@@ -5752,7 +5883,7 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
5752 while (i >= 0) { 5883 while (i >= 0) {
5753 /* Caller has given us at least enough credits to 5884 /* Caller has given us at least enough credits to
5754 * update the truncate log dinode */ 5885 * update the truncate log dinode */
5755 status = ocfs2_journal_access_di(handle, tl_inode, tl_bh, 5886 status = ocfs2_journal_access_di(handle, INODE_CACHE(tl_inode), tl_bh,
5756 OCFS2_JOURNAL_ACCESS_WRITE); 5887 OCFS2_JOURNAL_ACCESS_WRITE);
5757 if (status < 0) { 5888 if (status < 0) {
5758 mlog_errno(status); 5889 mlog_errno(status);
@@ -6010,7 +6141,7 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb,
6010 tl->tl_used = 0; 6141 tl->tl_used = 0;
6011 6142
6012 ocfs2_compute_meta_ecc(osb->sb, tl_bh->b_data, &di->i_check); 6143 ocfs2_compute_meta_ecc(osb->sb, tl_bh->b_data, &di->i_check);
6013 status = ocfs2_write_block(osb, tl_bh, tl_inode); 6144 status = ocfs2_write_block(osb, tl_bh, INODE_CACHE(tl_inode));
6014 if (status < 0) { 6145 if (status < 0) {
6015 mlog_errno(status); 6146 mlog_errno(status);
6016 goto bail; 6147 goto bail;
@@ -6400,9 +6531,9 @@ ocfs2_find_per_slot_free_list(int type,
6400 return fl; 6531 return fl;
6401} 6532}
6402 6533
6403static int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, 6534int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
6404 int type, int slot, u64 blkno, 6535 int type, int slot, u64 blkno,
6405 unsigned int bit) 6536 unsigned int bit)
6406{ 6537{
6407 int ret; 6538 int ret;
6408 struct ocfs2_per_slot_free_list *fl; 6539 struct ocfs2_per_slot_free_list *fl;
@@ -6518,7 +6649,7 @@ static int ocfs2_find_new_last_ext_blk(struct inode *inode,
6518 goto out; 6649 goto out;
6519 } 6650 }
6520 6651
6521 ret = ocfs2_find_leaf(inode, path_root_el(path), cpos, &bh); 6652 ret = ocfs2_find_leaf(INODE_CACHE(inode), path_root_el(path), cpos, &bh);
6522 if (ret) { 6653 if (ret) {
6523 mlog_errno(ret); 6654 mlog_errno(ret);
6524 goto out; 6655 goto out;
@@ -6551,7 +6682,7 @@ out:
6551 */ 6682 */
6552static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path, 6683static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path,
6553 handle_t *handle, struct ocfs2_truncate_context *tc, 6684 handle_t *handle, struct ocfs2_truncate_context *tc,
6554 u32 clusters_to_del, u64 *delete_start) 6685 u32 clusters_to_del, u64 *delete_start, u8 *flags)
6555{ 6686{
6556 int ret, i, index = path->p_tree_depth; 6687 int ret, i, index = path->p_tree_depth;
6557 u32 new_edge = 0; 6688 u32 new_edge = 0;
@@ -6561,6 +6692,7 @@ static int ocfs2_trim_tree(struct inode *inode, struct ocfs2_path *path,
6561 struct ocfs2_extent_rec *rec; 6692 struct ocfs2_extent_rec *rec;
6562 6693
6563 *delete_start = 0; 6694 *delete_start = 0;
6695 *flags = 0;
6564 6696
6565 while (index >= 0) { 6697 while (index >= 0) {
6566 bh = path->p_node[index].bh; 6698 bh = path->p_node[index].bh;
@@ -6648,6 +6780,7 @@ find_tail_record:
6648 *delete_start = le64_to_cpu(rec->e_blkno) 6780 *delete_start = le64_to_cpu(rec->e_blkno)
6649 + ocfs2_clusters_to_blocks(inode->i_sb, 6781 + ocfs2_clusters_to_blocks(inode->i_sb,
6650 le16_to_cpu(rec->e_leaf_clusters)); 6782 le16_to_cpu(rec->e_leaf_clusters));
6783 *flags = rec->e_flags;
6651 6784
6652 /* 6785 /*
6653 * If it's now empty, remove this record. 6786 * If it's now empty, remove this record.
@@ -6719,7 +6852,7 @@ delete:
6719 6852
6720 mlog(0, "deleting this extent block.\n"); 6853 mlog(0, "deleting this extent block.\n");
6721 6854
6722 ocfs2_remove_from_cache(inode, bh); 6855 ocfs2_remove_from_cache(INODE_CACHE(inode), bh);
6723 6856
6724 BUG_ON(ocfs2_rec_clusters(el, &el->l_recs[0])); 6857 BUG_ON(ocfs2_rec_clusters(el, &el->l_recs[0]));
6725 BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos)); 6858 BUG_ON(le32_to_cpu(el->l_recs[0].e_cpos));
@@ -6747,7 +6880,8 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
6747 struct buffer_head *fe_bh, 6880 struct buffer_head *fe_bh,
6748 handle_t *handle, 6881 handle_t *handle,
6749 struct ocfs2_truncate_context *tc, 6882 struct ocfs2_truncate_context *tc,
6750 struct ocfs2_path *path) 6883 struct ocfs2_path *path,
6884 struct ocfs2_alloc_context *meta_ac)
6751{ 6885{
6752 int status; 6886 int status;
6753 struct ocfs2_dinode *fe; 6887 struct ocfs2_dinode *fe;
@@ -6755,6 +6889,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
6755 struct ocfs2_extent_list *el; 6889 struct ocfs2_extent_list *el;
6756 struct buffer_head *last_eb_bh = NULL; 6890 struct buffer_head *last_eb_bh = NULL;
6757 u64 delete_blk = 0; 6891 u64 delete_blk = 0;
6892 u8 rec_flags;
6758 6893
6759 fe = (struct ocfs2_dinode *) fe_bh->b_data; 6894 fe = (struct ocfs2_dinode *) fe_bh->b_data;
6760 6895
@@ -6769,14 +6904,14 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
6769 * Each component will be touched, so we might as well journal 6904 * Each component will be touched, so we might as well journal
6770 * here to avoid having to handle errors later. 6905 * here to avoid having to handle errors later.
6771 */ 6906 */
6772 status = ocfs2_journal_access_path(inode, handle, path); 6907 status = ocfs2_journal_access_path(INODE_CACHE(inode), handle, path);
6773 if (status < 0) { 6908 if (status < 0) {
6774 mlog_errno(status); 6909 mlog_errno(status);
6775 goto bail; 6910 goto bail;
6776 } 6911 }
6777 6912
6778 if (last_eb_bh) { 6913 if (last_eb_bh) {
6779 status = ocfs2_journal_access_eb(handle, inode, last_eb_bh, 6914 status = ocfs2_journal_access_eb(handle, INODE_CACHE(inode), last_eb_bh,
6780 OCFS2_JOURNAL_ACCESS_WRITE); 6915 OCFS2_JOURNAL_ACCESS_WRITE);
6781 if (status < 0) { 6916 if (status < 0) {
6782 mlog_errno(status); 6917 mlog_errno(status);
@@ -6810,7 +6945,7 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
6810 inode->i_blocks = ocfs2_inode_sector_count(inode); 6945 inode->i_blocks = ocfs2_inode_sector_count(inode);
6811 6946
6812 status = ocfs2_trim_tree(inode, path, handle, tc, 6947 status = ocfs2_trim_tree(inode, path, handle, tc,
6813 clusters_to_del, &delete_blk); 6948 clusters_to_del, &delete_blk, &rec_flags);
6814 if (status) { 6949 if (status) {
6815 mlog_errno(status); 6950 mlog_errno(status);
6816 goto bail; 6951 goto bail;
@@ -6842,8 +6977,16 @@ static int ocfs2_do_truncate(struct ocfs2_super *osb,
6842 } 6977 }
6843 6978
6844 if (delete_blk) { 6979 if (delete_blk) {
6845 status = ocfs2_truncate_log_append(osb, handle, delete_blk, 6980 if (rec_flags & OCFS2_EXT_REFCOUNTED)
6846 clusters_to_del); 6981 status = ocfs2_decrease_refcount(inode, handle,
6982 ocfs2_blocks_to_clusters(osb->sb,
6983 delete_blk),
6984 clusters_to_del, meta_ac,
6985 &tc->tc_dealloc, 1);
6986 else
6987 status = ocfs2_truncate_log_append(osb, handle,
6988 delete_blk,
6989 clusters_to_del);
6847 if (status < 0) { 6990 if (status < 0) {
6848 mlog_errno(status); 6991 mlog_errno(status);
6849 goto bail; 6992 goto bail;
@@ -6863,9 +7006,9 @@ static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh)
6863 return 0; 7006 return 0;
6864} 7007}
6865 7008
6866static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, 7009void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
6867 unsigned int from, unsigned int to, 7010 unsigned int from, unsigned int to,
6868 struct page *page, int zero, u64 *phys) 7011 struct page *page, int zero, u64 *phys)
6869{ 7012{
6870 int ret, partial = 0; 7013 int ret, partial = 0;
6871 7014
@@ -6933,20 +7076,16 @@ out:
6933 ocfs2_unlock_and_free_pages(pages, numpages); 7076 ocfs2_unlock_and_free_pages(pages, numpages);
6934} 7077}
6935 7078
6936static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end, 7079int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end,
6937 struct page **pages, int *num) 7080 struct page **pages, int *num)
6938{ 7081{
6939 int numpages, ret = 0; 7082 int numpages, ret = 0;
6940 struct super_block *sb = inode->i_sb;
6941 struct address_space *mapping = inode->i_mapping; 7083 struct address_space *mapping = inode->i_mapping;
6942 unsigned long index; 7084 unsigned long index;
6943 loff_t last_page_bytes; 7085 loff_t last_page_bytes;
6944 7086
6945 BUG_ON(start > end); 7087 BUG_ON(start > end);
6946 7088
6947 BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits !=
6948 (end - 1) >> OCFS2_SB(sb)->s_clustersize_bits);
6949
6950 numpages = 0; 7089 numpages = 0;
6951 last_page_bytes = PAGE_ALIGN(end); 7090 last_page_bytes = PAGE_ALIGN(end);
6952 index = start >> PAGE_CACHE_SHIFT; 7091 index = start >> PAGE_CACHE_SHIFT;
@@ -6974,6 +7113,17 @@ out:
6974 return ret; 7113 return ret;
6975} 7114}
6976 7115
7116static int ocfs2_grab_eof_pages(struct inode *inode, loff_t start, loff_t end,
7117 struct page **pages, int *num)
7118{
7119 struct super_block *sb = inode->i_sb;
7120
7121 BUG_ON(start >> OCFS2_SB(sb)->s_clustersize_bits !=
7122 (end - 1) >> OCFS2_SB(sb)->s_clustersize_bits);
7123
7124 return ocfs2_grab_pages(inode, start, end, pages, num);
7125}
7126
6977/* 7127/*
6978 * Zero the area past i_size but still within an allocated 7128 * Zero the area past i_size but still within an allocated
6979 * cluster. This avoids exposing nonzero data on subsequent file 7129 * cluster. This avoids exposing nonzero data on subsequent file
@@ -7138,7 +7288,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
7138 goto out_unlock; 7288 goto out_unlock;
7139 } 7289 }
7140 7290
7141 ret = ocfs2_journal_access_di(handle, inode, di_bh, 7291 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
7142 OCFS2_JOURNAL_ACCESS_WRITE); 7292 OCFS2_JOURNAL_ACCESS_WRITE);
7143 if (ret) { 7293 if (ret) {
7144 mlog_errno(ret); 7294 mlog_errno(ret);
@@ -7218,9 +7368,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
7218 * this proves to be false, we could always re-build 7368 * this proves to be false, we could always re-build
7219 * the in-inode data from our pages. 7369 * the in-inode data from our pages.
7220 */ 7370 */
7221 ocfs2_init_dinode_extent_tree(&et, inode, di_bh); 7371 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
7222 ret = ocfs2_insert_extent(osb, handle, inode, &et, 7372 ret = ocfs2_insert_extent(handle, &et, 0, block, 1, 0, NULL);
7223 0, block, 1, 0, NULL);
7224 if (ret) { 7373 if (ret) {
7225 mlog_errno(ret); 7374 mlog_errno(ret);
7226 goto out_commit; 7375 goto out_commit;
@@ -7262,11 +7411,14 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
7262{ 7411{
7263 int status, i, credits, tl_sem = 0; 7412 int status, i, credits, tl_sem = 0;
7264 u32 clusters_to_del, new_highest_cpos, range; 7413 u32 clusters_to_del, new_highest_cpos, range;
7414 u64 blkno = 0;
7265 struct ocfs2_extent_list *el; 7415 struct ocfs2_extent_list *el;
7266 handle_t *handle = NULL; 7416 handle_t *handle = NULL;
7267 struct inode *tl_inode = osb->osb_tl_inode; 7417 struct inode *tl_inode = osb->osb_tl_inode;
7268 struct ocfs2_path *path = NULL; 7418 struct ocfs2_path *path = NULL;
7269 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; 7419 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
7420 struct ocfs2_alloc_context *meta_ac = NULL;
7421 struct ocfs2_refcount_tree *ref_tree = NULL;
7270 7422
7271 mlog_entry_void(); 7423 mlog_entry_void();
7272 7424
@@ -7292,10 +7444,12 @@ start:
7292 goto bail; 7444 goto bail;
7293 } 7445 }
7294 7446
7447 credits = 0;
7448
7295 /* 7449 /*
7296 * Truncate always works against the rightmost tree branch. 7450 * Truncate always works against the rightmost tree branch.
7297 */ 7451 */
7298 status = ocfs2_find_path(inode, path, UINT_MAX); 7452 status = ocfs2_find_path(INODE_CACHE(inode), path, UINT_MAX);
7299 if (status) { 7453 if (status) {
7300 mlog_errno(status); 7454 mlog_errno(status);
7301 goto bail; 7455 goto bail;
@@ -7332,10 +7486,15 @@ start:
7332 clusters_to_del = 0; 7486 clusters_to_del = 0;
7333 } else if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_highest_cpos) { 7487 } else if (le32_to_cpu(el->l_recs[i].e_cpos) >= new_highest_cpos) {
7334 clusters_to_del = ocfs2_rec_clusters(el, &el->l_recs[i]); 7488 clusters_to_del = ocfs2_rec_clusters(el, &el->l_recs[i]);
7489 blkno = le64_to_cpu(el->l_recs[i].e_blkno);
7335 } else if (range > new_highest_cpos) { 7490 } else if (range > new_highest_cpos) {
7336 clusters_to_del = (ocfs2_rec_clusters(el, &el->l_recs[i]) + 7491 clusters_to_del = (ocfs2_rec_clusters(el, &el->l_recs[i]) +
7337 le32_to_cpu(el->l_recs[i].e_cpos)) - 7492 le32_to_cpu(el->l_recs[i].e_cpos)) -
7338 new_highest_cpos; 7493 new_highest_cpos;
7494 blkno = le64_to_cpu(el->l_recs[i].e_blkno) +
7495 ocfs2_clusters_to_blocks(inode->i_sb,
7496 ocfs2_rec_clusters(el, &el->l_recs[i]) -
7497 clusters_to_del);
7339 } else { 7498 } else {
7340 status = 0; 7499 status = 0;
7341 goto bail; 7500 goto bail;
@@ -7344,6 +7503,29 @@ start:
7344 mlog(0, "clusters_to_del = %u in this pass, tail blk=%llu\n", 7503 mlog(0, "clusters_to_del = %u in this pass, tail blk=%llu\n",
7345 clusters_to_del, (unsigned long long)path_leaf_bh(path)->b_blocknr); 7504 clusters_to_del, (unsigned long long)path_leaf_bh(path)->b_blocknr);
7346 7505
7506 if (el->l_recs[i].e_flags & OCFS2_EXT_REFCOUNTED && clusters_to_del) {
7507 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
7508 OCFS2_HAS_REFCOUNT_FL));
7509
7510 status = ocfs2_lock_refcount_tree(osb,
7511 le64_to_cpu(di->i_refcount_loc),
7512 1, &ref_tree, NULL);
7513 if (status) {
7514 mlog_errno(status);
7515 goto bail;
7516 }
7517
7518 status = ocfs2_prepare_refcount_change_for_del(inode, fe_bh,
7519 blkno,
7520 clusters_to_del,
7521 &credits,
7522 &meta_ac);
7523 if (status < 0) {
7524 mlog_errno(status);
7525 goto bail;
7526 }
7527 }
7528
7347 mutex_lock(&tl_inode->i_mutex); 7529 mutex_lock(&tl_inode->i_mutex);
7348 tl_sem = 1; 7530 tl_sem = 1;
7349 /* ocfs2_truncate_log_needs_flush guarantees us at least one 7531 /* ocfs2_truncate_log_needs_flush guarantees us at least one
@@ -7357,7 +7539,7 @@ start:
7357 } 7539 }
7358 } 7540 }
7359 7541
7360 credits = ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del, 7542 credits += ocfs2_calc_tree_trunc_credits(osb->sb, clusters_to_del,
7361 (struct ocfs2_dinode *)fe_bh->b_data, 7543 (struct ocfs2_dinode *)fe_bh->b_data,
7362 el); 7544 el);
7363 handle = ocfs2_start_trans(osb, credits); 7545 handle = ocfs2_start_trans(osb, credits);
@@ -7369,7 +7551,7 @@ start:
7369 } 7551 }
7370 7552
7371 status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh, handle, 7553 status = ocfs2_do_truncate(osb, clusters_to_del, inode, fe_bh, handle,
7372 tc, path); 7554 tc, path, meta_ac);
7373 if (status < 0) { 7555 if (status < 0) {
7374 mlog_errno(status); 7556 mlog_errno(status);
7375 goto bail; 7557 goto bail;
@@ -7383,6 +7565,16 @@ start:
7383 7565
7384 ocfs2_reinit_path(path, 1); 7566 ocfs2_reinit_path(path, 1);
7385 7567
7568 if (meta_ac) {
7569 ocfs2_free_alloc_context(meta_ac);
7570 meta_ac = NULL;
7571 }
7572
7573 if (ref_tree) {
7574 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
7575 ref_tree = NULL;
7576 }
7577
7386 /* 7578 /*
7387 * The check above will catch the case where we've truncated 7579 * The check above will catch the case where we've truncated
7388 * away all allocation. 7580 * away all allocation.
@@ -7399,6 +7591,12 @@ bail:
7399 if (handle) 7591 if (handle)
7400 ocfs2_commit_trans(osb, handle); 7592 ocfs2_commit_trans(osb, handle);
7401 7593
7594 if (meta_ac)
7595 ocfs2_free_alloc_context(meta_ac);
7596
7597 if (ref_tree)
7598 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
7599
7402 ocfs2_run_deallocs(osb, &tc->tc_dealloc); 7600 ocfs2_run_deallocs(osb, &tc->tc_dealloc);
7403 7601
7404 ocfs2_free_path(path); 7602 ocfs2_free_path(path);
@@ -7445,7 +7643,7 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb,
7445 ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc); 7643 ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc);
7446 7644
7447 if (fe->id2.i_list.l_tree_depth) { 7645 if (fe->id2.i_list.l_tree_depth) {
7448 status = ocfs2_read_extent_block(inode, 7646 status = ocfs2_read_extent_block(INODE_CACHE(inode),
7449 le64_to_cpu(fe->i_last_eb_blk), 7647 le64_to_cpu(fe->i_last_eb_blk),
7450 &last_eb_bh); 7648 &last_eb_bh);
7451 if (status < 0) { 7649 if (status < 0) {
@@ -7507,7 +7705,7 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
7507 goto out; 7705 goto out;
7508 } 7706 }
7509 7707
7510 ret = ocfs2_journal_access_di(handle, inode, di_bh, 7708 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
7511 OCFS2_JOURNAL_ACCESS_WRITE); 7709 OCFS2_JOURNAL_ACCESS_WRITE);
7512 if (ret) { 7710 if (ret) {
7513 mlog_errno(ret); 7711 mlog_errno(ret);
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 353254ba29e1..9c122d574464 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -45,7 +45,8 @@
45 * 45 *
46 * ocfs2_extent_tree contains info for the root of the b-tree, it must have a 46 * ocfs2_extent_tree contains info for the root of the b-tree, it must have a
47 * root ocfs2_extent_list and a root_bh so that they can be used in the b-tree 47 * root ocfs2_extent_list and a root_bh so that they can be used in the b-tree
48 * functions. With metadata ecc, we now call different journal_access 48 * functions. It needs the ocfs2_caching_info structure associated with
49 * I/O on the tree. With metadata ecc, we now call different journal_access
49 * functions for each type of metadata, so it must have the 50 * functions for each type of metadata, so it must have the
50 * root_journal_access function. 51 * root_journal_access function.
51 * ocfs2_extent_tree_operations abstract the normal operations we do for 52 * ocfs2_extent_tree_operations abstract the normal operations we do for
@@ -56,6 +57,7 @@ struct ocfs2_extent_tree {
56 struct ocfs2_extent_tree_operations *et_ops; 57 struct ocfs2_extent_tree_operations *et_ops;
57 struct buffer_head *et_root_bh; 58 struct buffer_head *et_root_bh;
58 struct ocfs2_extent_list *et_root_el; 59 struct ocfs2_extent_list *et_root_el;
60 struct ocfs2_caching_info *et_ci;
59 ocfs2_journal_access_func et_root_journal_access; 61 ocfs2_journal_access_func et_root_journal_access;
60 void *et_object; 62 void *et_object;
61 unsigned int et_max_leaf_clusters; 63 unsigned int et_max_leaf_clusters;
@@ -66,31 +68,32 @@ struct ocfs2_extent_tree {
66 * specified object buffer. 68 * specified object buffer.
67 */ 69 */
68void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et, 70void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et,
69 struct inode *inode, 71 struct ocfs2_caching_info *ci,
70 struct buffer_head *bh); 72 struct buffer_head *bh);
71void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et, 73void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et,
72 struct inode *inode, 74 struct ocfs2_caching_info *ci,
73 struct buffer_head *bh); 75 struct buffer_head *bh);
74struct ocfs2_xattr_value_buf; 76struct ocfs2_xattr_value_buf;
75void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, 77void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et,
76 struct inode *inode, 78 struct ocfs2_caching_info *ci,
77 struct ocfs2_xattr_value_buf *vb); 79 struct ocfs2_xattr_value_buf *vb);
78void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et, 80void ocfs2_init_dx_root_extent_tree(struct ocfs2_extent_tree *et,
79 struct inode *inode, 81 struct ocfs2_caching_info *ci,
80 struct buffer_head *bh); 82 struct buffer_head *bh);
83void ocfs2_init_refcount_extent_tree(struct ocfs2_extent_tree *et,
84 struct ocfs2_caching_info *ci,
85 struct buffer_head *bh);
81 86
82/* 87/*
83 * Read an extent block into *bh. If *bh is NULL, a bh will be 88 * Read an extent block into *bh. If *bh is NULL, a bh will be
84 * allocated. This is a cached read. The extent block will be validated 89 * allocated. This is a cached read. The extent block will be validated
85 * with ocfs2_validate_extent_block(). 90 * with ocfs2_validate_extent_block().
86 */ 91 */
87int ocfs2_read_extent_block(struct inode *inode, u64 eb_blkno, 92int ocfs2_read_extent_block(struct ocfs2_caching_info *ci, u64 eb_blkno,
88 struct buffer_head **bh); 93 struct buffer_head **bh);
89 94
90struct ocfs2_alloc_context; 95struct ocfs2_alloc_context;
91int ocfs2_insert_extent(struct ocfs2_super *osb, 96int ocfs2_insert_extent(handle_t *handle,
92 handle_t *handle,
93 struct inode *inode,
94 struct ocfs2_extent_tree *et, 97 struct ocfs2_extent_tree *et,
95 u32 cpos, 98 u32 cpos,
96 u64 start_blk, 99 u64 start_blk,
@@ -103,25 +106,36 @@ enum ocfs2_alloc_restarted {
103 RESTART_TRANS, 106 RESTART_TRANS,
104 RESTART_META 107 RESTART_META
105}; 108};
106int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb, 109int ocfs2_add_clusters_in_btree(handle_t *handle,
107 struct inode *inode, 110 struct ocfs2_extent_tree *et,
108 u32 *logical_offset, 111 u32 *logical_offset,
109 u32 clusters_to_add, 112 u32 clusters_to_add,
110 int mark_unwritten, 113 int mark_unwritten,
111 struct ocfs2_extent_tree *et,
112 handle_t *handle,
113 struct ocfs2_alloc_context *data_ac, 114 struct ocfs2_alloc_context *data_ac,
114 struct ocfs2_alloc_context *meta_ac, 115 struct ocfs2_alloc_context *meta_ac,
115 enum ocfs2_alloc_restarted *reason_ret); 116 enum ocfs2_alloc_restarted *reason_ret);
116struct ocfs2_cached_dealloc_ctxt; 117struct ocfs2_cached_dealloc_ctxt;
118struct ocfs2_path;
119int ocfs2_split_extent(handle_t *handle,
120 struct ocfs2_extent_tree *et,
121 struct ocfs2_path *path,
122 int split_index,
123 struct ocfs2_extent_rec *split_rec,
124 struct ocfs2_alloc_context *meta_ac,
125 struct ocfs2_cached_dealloc_ctxt *dealloc);
117int ocfs2_mark_extent_written(struct inode *inode, 126int ocfs2_mark_extent_written(struct inode *inode,
118 struct ocfs2_extent_tree *et, 127 struct ocfs2_extent_tree *et,
119 handle_t *handle, u32 cpos, u32 len, u32 phys, 128 handle_t *handle, u32 cpos, u32 len, u32 phys,
120 struct ocfs2_alloc_context *meta_ac, 129 struct ocfs2_alloc_context *meta_ac,
121 struct ocfs2_cached_dealloc_ctxt *dealloc); 130 struct ocfs2_cached_dealloc_ctxt *dealloc);
122int ocfs2_remove_extent(struct inode *inode, 131int ocfs2_change_extent_flag(handle_t *handle,
123 struct ocfs2_extent_tree *et, 132 struct ocfs2_extent_tree *et,
124 u32 cpos, u32 len, handle_t *handle, 133 u32 cpos, u32 len, u32 phys,
134 struct ocfs2_alloc_context *meta_ac,
135 struct ocfs2_cached_dealloc_ctxt *dealloc,
136 int new_flags, int clear_flags);
137int ocfs2_remove_extent(handle_t *handle, struct ocfs2_extent_tree *et,
138 u32 cpos, u32 len,
125 struct ocfs2_alloc_context *meta_ac, 139 struct ocfs2_alloc_context *meta_ac,
126 struct ocfs2_cached_dealloc_ctxt *dealloc); 140 struct ocfs2_cached_dealloc_ctxt *dealloc);
127int ocfs2_remove_btree_range(struct inode *inode, 141int ocfs2_remove_btree_range(struct inode *inode,
@@ -130,7 +144,6 @@ int ocfs2_remove_btree_range(struct inode *inode,
130 struct ocfs2_cached_dealloc_ctxt *dealloc); 144 struct ocfs2_cached_dealloc_ctxt *dealloc);
131 145
132int ocfs2_num_free_extents(struct ocfs2_super *osb, 146int ocfs2_num_free_extents(struct ocfs2_super *osb,
133 struct inode *inode,
134 struct ocfs2_extent_tree *et); 147 struct ocfs2_extent_tree *et);
135 148
136/* 149/*
@@ -195,6 +208,9 @@ static inline void ocfs2_init_dealloc_ctxt(struct ocfs2_cached_dealloc_ctxt *c)
195} 208}
196int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt, 209int ocfs2_cache_cluster_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
197 u64 blkno, unsigned int bit); 210 u64 blkno, unsigned int bit);
211int ocfs2_cache_block_dealloc(struct ocfs2_cached_dealloc_ctxt *ctxt,
212 int type, int slot, u64 blkno,
213 unsigned int bit);
198static inline int ocfs2_dealloc_has_cluster(struct ocfs2_cached_dealloc_ctxt *c) 214static inline int ocfs2_dealloc_has_cluster(struct ocfs2_cached_dealloc_ctxt *c)
199{ 215{
200 return c->c_global_allocator != NULL; 216 return c->c_global_allocator != NULL;
@@ -222,8 +238,9 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
222int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, 238int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh,
223 unsigned int start, unsigned int end, int trunc); 239 unsigned int start, unsigned int end, int trunc);
224 240
225int ocfs2_find_leaf(struct inode *inode, struct ocfs2_extent_list *root_el, 241int ocfs2_find_leaf(struct ocfs2_caching_info *ci,
226 u32 cpos, struct buffer_head **leaf_bh); 242 struct ocfs2_extent_list *root_el, u32 cpos,
243 struct buffer_head **leaf_bh);
227int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster); 244int ocfs2_search_extent_list(struct ocfs2_extent_list *el, u32 v_cluster);
228 245
229/* 246/*
@@ -254,4 +271,50 @@ static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec)
254 return !rec->e_leaf_clusters; 271 return !rec->e_leaf_clusters;
255} 272}
256 273
274int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end,
275 struct page **pages, int *num);
276void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle,
277 unsigned int from, unsigned int to,
278 struct page *page, int zero, u64 *phys);
279/*
280 * Structures which describe a path through a btree, and functions to
281 * manipulate them.
282 *
283 * The idea here is to be as generic as possible with the tree
284 * manipulation code.
285 */
286struct ocfs2_path_item {
287 struct buffer_head *bh;
288 struct ocfs2_extent_list *el;
289};
290
291#define OCFS2_MAX_PATH_DEPTH 5
292
293struct ocfs2_path {
294 int p_tree_depth;
295 ocfs2_journal_access_func p_root_access;
296 struct ocfs2_path_item p_node[OCFS2_MAX_PATH_DEPTH];
297};
298
299#define path_root_bh(_path) ((_path)->p_node[0].bh)
300#define path_root_el(_path) ((_path)->p_node[0].el)
301#define path_root_access(_path)((_path)->p_root_access)
302#define path_leaf_bh(_path) ((_path)->p_node[(_path)->p_tree_depth].bh)
303#define path_leaf_el(_path) ((_path)->p_node[(_path)->p_tree_depth].el)
304#define path_num_items(_path) ((_path)->p_tree_depth + 1)
305
306void ocfs2_reinit_path(struct ocfs2_path *path, int keep_root);
307void ocfs2_free_path(struct ocfs2_path *path);
308int ocfs2_find_path(struct ocfs2_caching_info *ci,
309 struct ocfs2_path *path,
310 u32 cpos);
311struct ocfs2_path *ocfs2_new_path_from_path(struct ocfs2_path *path);
312struct ocfs2_path *ocfs2_new_path_from_et(struct ocfs2_extent_tree *et);
313int ocfs2_path_bh_journal_access(handle_t *handle,
314 struct ocfs2_caching_info *ci,
315 struct ocfs2_path *path,
316 int idx);
317int ocfs2_journal_access_path(struct ocfs2_caching_info *ci,
318 handle_t *handle,
319 struct ocfs2_path *path);
257#endif /* OCFS2_ALLOC_H */ 320#endif /* OCFS2_ALLOC_H */
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 8a1e61545f41..72e76062a900 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -44,6 +44,7 @@
44#include "suballoc.h" 44#include "suballoc.h"
45#include "super.h" 45#include "super.h"
46#include "symlink.h" 46#include "symlink.h"
47#include "refcounttree.h"
47 48
48#include "buffer_head_io.h" 49#include "buffer_head_io.h"
49 50
@@ -126,8 +127,8 @@ bail:
126 return err; 127 return err;
127} 128}
128 129
129static int ocfs2_get_block(struct inode *inode, sector_t iblock, 130int ocfs2_get_block(struct inode *inode, sector_t iblock,
130 struct buffer_head *bh_result, int create) 131 struct buffer_head *bh_result, int create)
131{ 132{
132 int err = 0; 133 int err = 0;
133 unsigned int ext_flags; 134 unsigned int ext_flags;
@@ -590,6 +591,8 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
590 goto bail; 591 goto bail;
591 } 592 }
592 593
594 /* We should already CoW the refcounted extent. */
595 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
593 /* 596 /*
594 * get_more_blocks() expects us to describe a hole by clearing 597 * get_more_blocks() expects us to describe a hole by clearing
595 * the mapped bit on bh_result(). 598 * the mapped bit on bh_result().
@@ -687,6 +690,10 @@ static ssize_t ocfs2_direct_IO(int rw,
687 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) 690 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
688 return 0; 691 return 0;
689 692
693 /* Fallback to buffered I/O if we are appending. */
694 if (i_size_read(inode) <= offset)
695 return 0;
696
690 ret = blockdev_direct_IO_no_locking(rw, iocb, inode, 697 ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
691 inode->i_sb->s_bdev, iov, offset, 698 inode->i_sb->s_bdev, iov, offset,
692 nr_segs, 699 nr_segs,
@@ -1259,7 +1266,8 @@ static int ocfs2_write_cluster(struct address_space *mapping,
1259 goto out; 1266 goto out;
1260 } 1267 }
1261 } else if (unwritten) { 1268 } else if (unwritten) {
1262 ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh); 1269 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode),
1270 wc->w_di_bh);
1263 ret = ocfs2_mark_extent_written(inode, &et, 1271 ret = ocfs2_mark_extent_written(inode, &et,
1264 wc->w_handle, cpos, 1, phys, 1272 wc->w_handle, cpos, 1, phys,
1265 meta_ac, &wc->w_dealloc); 1273 meta_ac, &wc->w_dealloc);
@@ -1448,6 +1456,9 @@ static int ocfs2_populate_write_desc(struct inode *inode,
1448 goto out; 1456 goto out;
1449 } 1457 }
1450 1458
1459 /* We should already CoW the refcountd extent. */
1460 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
1461
1451 /* 1462 /*
1452 * Assume worst case - that we're writing in 1463 * Assume worst case - that we're writing in
1453 * the middle of the extent. 1464 * the middle of the extent.
@@ -1528,7 +1539,7 @@ static int ocfs2_write_begin_inline(struct address_space *mapping,
1528 goto out; 1539 goto out;
1529 } 1540 }
1530 1541
1531 ret = ocfs2_journal_access_di(handle, inode, wc->w_di_bh, 1542 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
1532 OCFS2_JOURNAL_ACCESS_WRITE); 1543 OCFS2_JOURNAL_ACCESS_WRITE);
1533 if (ret) { 1544 if (ret) {
1534 ocfs2_commit_trans(osb, handle); 1545 ocfs2_commit_trans(osb, handle);
@@ -1699,6 +1710,19 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1699 goto out; 1710 goto out;
1700 } 1711 }
1701 1712
1713 ret = ocfs2_check_range_for_refcount(inode, pos, len);
1714 if (ret < 0) {
1715 mlog_errno(ret);
1716 goto out;
1717 } else if (ret == 1) {
1718 ret = ocfs2_refcount_cow(inode, di_bh,
1719 wc->w_cpos, wc->w_clen, UINT_MAX);
1720 if (ret) {
1721 mlog_errno(ret);
1722 goto out;
1723 }
1724 }
1725
1702 ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc, 1726 ret = ocfs2_populate_write_desc(inode, wc, &clusters_to_alloc,
1703 &extents_to_split); 1727 &extents_to_split);
1704 if (ret) { 1728 if (ret) {
@@ -1726,7 +1750,8 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1726 (long long)i_size_read(inode), le32_to_cpu(di->i_clusters), 1750 (long long)i_size_read(inode), le32_to_cpu(di->i_clusters),
1727 clusters_to_alloc, extents_to_split); 1751 clusters_to_alloc, extents_to_split);
1728 1752
1729 ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh); 1753 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode),
1754 wc->w_di_bh);
1730 ret = ocfs2_lock_allocators(inode, &et, 1755 ret = ocfs2_lock_allocators(inode, &et,
1731 clusters_to_alloc, extents_to_split, 1756 clusters_to_alloc, extents_to_split,
1732 &data_ac, &meta_ac); 1757 &data_ac, &meta_ac);
@@ -1773,7 +1798,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping,
1773 * We don't want this to fail in ocfs2_write_end(), so do it 1798 * We don't want this to fail in ocfs2_write_end(), so do it
1774 * here. 1799 * here.
1775 */ 1800 */
1776 ret = ocfs2_journal_access_di(handle, inode, wc->w_di_bh, 1801 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
1777 OCFS2_JOURNAL_ACCESS_WRITE); 1802 OCFS2_JOURNAL_ACCESS_WRITE);
1778 if (ret) { 1803 if (ret) {
1779 mlog_errno(ret); 1804 mlog_errno(ret);
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 503e49232e11..c48e93ffc513 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -57,6 +57,8 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
57 struct buffer_head *di_bh); 57 struct buffer_head *di_bh);
58int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size); 58int ocfs2_size_fits_inline_data(struct buffer_head *di_bh, u64 new_size);
59 59
60int ocfs2_get_block(struct inode *inode, sector_t iblock,
61 struct buffer_head *bh_result, int create);
60/* all ocfs2_dio_end_io()'s fault */ 62/* all ocfs2_dio_end_io()'s fault */
61#define ocfs2_iocb_is_rw_locked(iocb) \ 63#define ocfs2_iocb_is_rw_locked(iocb) \
62 test_bit(0, (unsigned long *)&iocb->private) 64 test_bit(0, (unsigned long *)&iocb->private)
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 15c8e6deee2e..d43d34a1dd31 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -52,12 +52,12 @@ enum ocfs2_state_bits {
52BUFFER_FNS(NeedsValidate, needs_validate); 52BUFFER_FNS(NeedsValidate, needs_validate);
53 53
54int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, 54int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
55 struct inode *inode) 55 struct ocfs2_caching_info *ci)
56{ 56{
57 int ret = 0; 57 int ret = 0;
58 58
59 mlog_entry("(bh->b_blocknr = %llu, inode=%p)\n", 59 mlog_entry("(bh->b_blocknr = %llu, ci=%p)\n",
60 (unsigned long long)bh->b_blocknr, inode); 60 (unsigned long long)bh->b_blocknr, ci);
61 61
62 BUG_ON(bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO); 62 BUG_ON(bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO);
63 BUG_ON(buffer_jbd(bh)); 63 BUG_ON(buffer_jbd(bh));
@@ -70,7 +70,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
70 goto out; 70 goto out;
71 } 71 }
72 72
73 mutex_lock(&OCFS2_I(inode)->ip_io_mutex); 73 ocfs2_metadata_cache_io_lock(ci);
74 74
75 lock_buffer(bh); 75 lock_buffer(bh);
76 set_buffer_uptodate(bh); 76 set_buffer_uptodate(bh);
@@ -85,7 +85,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
85 wait_on_buffer(bh); 85 wait_on_buffer(bh);
86 86
87 if (buffer_uptodate(bh)) { 87 if (buffer_uptodate(bh)) {
88 ocfs2_set_buffer_uptodate(inode, bh); 88 ocfs2_set_buffer_uptodate(ci, bh);
89 } else { 89 } else {
90 /* We don't need to remove the clustered uptodate 90 /* We don't need to remove the clustered uptodate
91 * information for this bh as it's not marked locally 91 * information for this bh as it's not marked locally
@@ -94,7 +94,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
94 put_bh(bh); 94 put_bh(bh);
95 } 95 }
96 96
97 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); 97 ocfs2_metadata_cache_io_unlock(ci);
98out: 98out:
99 mlog_exit(ret); 99 mlog_exit(ret);
100 return ret; 100 return ret;
@@ -177,7 +177,7 @@ bail:
177 return status; 177 return status;
178} 178}
179 179
180int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, 180int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
181 struct buffer_head *bhs[], int flags, 181 struct buffer_head *bhs[], int flags,
182 int (*validate)(struct super_block *sb, 182 int (*validate)(struct super_block *sb,
183 struct buffer_head *bh)) 183 struct buffer_head *bh))
@@ -185,11 +185,12 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
185 int status = 0; 185 int status = 0;
186 int i, ignore_cache = 0; 186 int i, ignore_cache = 0;
187 struct buffer_head *bh; 187 struct buffer_head *bh;
188 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
188 189
189 mlog_entry("(inode=%p, block=(%llu), nr=(%d), flags=%d)\n", 190 mlog_entry("(ci=%p, block=(%llu), nr=(%d), flags=%d)\n",
190 inode, (unsigned long long)block, nr, flags); 191 ci, (unsigned long long)block, nr, flags);
191 192
192 BUG_ON(!inode); 193 BUG_ON(!ci);
193 BUG_ON((flags & OCFS2_BH_READAHEAD) && 194 BUG_ON((flags & OCFS2_BH_READAHEAD) &&
194 (flags & OCFS2_BH_IGNORE_CACHE)); 195 (flags & OCFS2_BH_IGNORE_CACHE));
195 196
@@ -212,12 +213,12 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
212 goto bail; 213 goto bail;
213 } 214 }
214 215
215 mutex_lock(&OCFS2_I(inode)->ip_io_mutex); 216 ocfs2_metadata_cache_io_lock(ci);
216 for (i = 0 ; i < nr ; i++) { 217 for (i = 0 ; i < nr ; i++) {
217 if (bhs[i] == NULL) { 218 if (bhs[i] == NULL) {
218 bhs[i] = sb_getblk(inode->i_sb, block++); 219 bhs[i] = sb_getblk(sb, block++);
219 if (bhs[i] == NULL) { 220 if (bhs[i] == NULL) {
220 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); 221 ocfs2_metadata_cache_io_unlock(ci);
221 status = -EIO; 222 status = -EIO;
222 mlog_errno(status); 223 mlog_errno(status);
223 goto bail; 224 goto bail;
@@ -250,11 +251,11 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
250 * before our is-it-in-flight check. 251 * before our is-it-in-flight check.
251 */ 252 */
252 253
253 if (!ignore_cache && !ocfs2_buffer_uptodate(inode, bh)) { 254 if (!ignore_cache && !ocfs2_buffer_uptodate(ci, bh)) {
254 mlog(ML_UPTODATE, 255 mlog(ML_UPTODATE,
255 "bh (%llu), inode %llu not uptodate\n", 256 "bh (%llu), owner %llu not uptodate\n",
256 (unsigned long long)bh->b_blocknr, 257 (unsigned long long)bh->b_blocknr,
257 (unsigned long long)OCFS2_I(inode)->ip_blkno); 258 (unsigned long long)ocfs2_metadata_cache_owner(ci));
258 /* We're using ignore_cache here to say 259 /* We're using ignore_cache here to say
259 * "go to disk" */ 260 * "go to disk" */
260 ignore_cache = 1; 261 ignore_cache = 1;
@@ -283,7 +284,7 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
283 * previously submitted request than we are 284 * previously submitted request than we are
284 * done here. */ 285 * done here. */
285 if ((flags & OCFS2_BH_READAHEAD) 286 if ((flags & OCFS2_BH_READAHEAD)
286 && ocfs2_buffer_read_ahead(inode, bh)) 287 && ocfs2_buffer_read_ahead(ci, bh))
287 continue; 288 continue;
288 289
289 lock_buffer(bh); 290 lock_buffer(bh);
@@ -305,7 +306,7 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
305 * buffer lock. */ 306 * buffer lock. */
306 if (!(flags & OCFS2_BH_IGNORE_CACHE) 307 if (!(flags & OCFS2_BH_IGNORE_CACHE)
307 && !(flags & OCFS2_BH_READAHEAD) 308 && !(flags & OCFS2_BH_READAHEAD)
308 && ocfs2_buffer_uptodate(inode, bh)) { 309 && ocfs2_buffer_uptodate(ci, bh)) {
309 unlock_buffer(bh); 310 unlock_buffer(bh);
310 continue; 311 continue;
311 } 312 }
@@ -327,7 +328,7 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
327 328
328 if (!(flags & OCFS2_BH_READAHEAD)) { 329 if (!(flags & OCFS2_BH_READAHEAD)) {
329 /* We know this can't have changed as we hold the 330 /* We know this can't have changed as we hold the
330 * inode sem. Avoid doing any work on the bh if the 331 * owner sem. Avoid doing any work on the bh if the
331 * journal has it. */ 332 * journal has it. */
332 if (!buffer_jbd(bh)) 333 if (!buffer_jbd(bh))
333 wait_on_buffer(bh); 334 wait_on_buffer(bh);
@@ -351,7 +352,7 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
351 * that better not have changed */ 352 * that better not have changed */
352 BUG_ON(buffer_jbd(bh)); 353 BUG_ON(buffer_jbd(bh));
353 clear_buffer_needs_validate(bh); 354 clear_buffer_needs_validate(bh);
354 status = validate(inode->i_sb, bh); 355 status = validate(sb, bh);
355 if (status) { 356 if (status) {
356 put_bh(bh); 357 put_bh(bh);
357 bhs[i] = NULL; 358 bhs[i] = NULL;
@@ -363,9 +364,9 @@ int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
363 /* Always set the buffer in the cache, even if it was 364 /* Always set the buffer in the cache, even if it was
364 * a forced read, or read-ahead which hasn't yet 365 * a forced read, or read-ahead which hasn't yet
365 * completed. */ 366 * completed. */
366 ocfs2_set_buffer_uptodate(inode, bh); 367 ocfs2_set_buffer_uptodate(ci, bh);
367 } 368 }
368 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); 369 ocfs2_metadata_cache_io_unlock(ci);
369 370
370 mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", 371 mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n",
371 (unsigned long long)block, nr, 372 (unsigned long long)block, nr,
@@ -399,7 +400,7 @@ static void ocfs2_check_super_or_backup(struct super_block *sb,
399 400
400/* 401/*
401 * Write super block and backups doesn't need to collaborate with journal, 402 * Write super block and backups doesn't need to collaborate with journal,
402 * so we don't need to lock ip_io_mutex and inode doesn't need to bea passed 403 * so we don't need to lock ip_io_mutex and ci doesn't need to bea passed
403 * into this function. 404 * into this function.
404 */ 405 */
405int ocfs2_write_super_or_backup(struct ocfs2_super *osb, 406int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h
index c75d682dadd8..b97bcc6dde7c 100644
--- a/fs/ocfs2/buffer_head_io.h
+++ b/fs/ocfs2/buffer_head_io.h
@@ -33,7 +33,7 @@ void ocfs2_end_buffer_io_sync(struct buffer_head *bh,
33 33
34int ocfs2_write_block(struct ocfs2_super *osb, 34int ocfs2_write_block(struct ocfs2_super *osb,
35 struct buffer_head *bh, 35 struct buffer_head *bh,
36 struct inode *inode); 36 struct ocfs2_caching_info *ci);
37int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, 37int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
38 unsigned int nr, struct buffer_head *bhs[]); 38 unsigned int nr, struct buffer_head *bhs[]);
39 39
@@ -44,7 +44,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
44 * be set even for a READAHEAD call, as it marks the buffer for later 44 * be set even for a READAHEAD call, as it marks the buffer for later
45 * validation. 45 * validation.
46 */ 46 */
47int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, 47int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
48 struct buffer_head *bhs[], int flags, 48 struct buffer_head *bhs[], int flags,
49 int (*validate)(struct super_block *sb, 49 int (*validate)(struct super_block *sb,
50 struct buffer_head *bh)); 50 struct buffer_head *bh));
@@ -55,7 +55,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
55#define OCFS2_BH_IGNORE_CACHE 1 55#define OCFS2_BH_IGNORE_CACHE 1
56#define OCFS2_BH_READAHEAD 8 56#define OCFS2_BH_READAHEAD 8
57 57
58static inline int ocfs2_read_block(struct inode *inode, u64 off, 58static inline int ocfs2_read_block(struct ocfs2_caching_info *ci, u64 off,
59 struct buffer_head **bh, 59 struct buffer_head **bh,
60 int (*validate)(struct super_block *sb, 60 int (*validate)(struct super_block *sb,
61 struct buffer_head *bh)) 61 struct buffer_head *bh))
@@ -68,7 +68,7 @@ static inline int ocfs2_read_block(struct inode *inode, u64 off,
68 goto bail; 68 goto bail;
69 } 69 }
70 70
71 status = ocfs2_read_blocks(inode, off, 1, bh, 0, validate); 71 status = ocfs2_read_blocks(ci, off, 1, bh, 0, validate);
72 72
73bail: 73bail:
74 return status; 74 return status;
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index 96df5416993e..1cd2934de615 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -111,6 +111,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
111 define_mask(EXPORT), 111 define_mask(EXPORT),
112 define_mask(XATTR), 112 define_mask(XATTR),
113 define_mask(QUOTA), 113 define_mask(QUOTA),
114 define_mask(REFCOUNT),
114 define_mask(ERROR), 115 define_mask(ERROR),
115 define_mask(NOTICE), 116 define_mask(NOTICE),
116 define_mask(KTHREAD), 117 define_mask(KTHREAD),
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index 696c32e50716..9b4d11726cf2 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -113,6 +113,7 @@
113#define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */ 113#define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */
114#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */ 114#define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */
115#define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */ 115#define ML_QUOTA 0x0000000040000000ULL /* ocfs2 quota operations */
116#define ML_REFCOUNT 0x0000000080000000ULL /* refcount tree operations */
116/* bits that are infrequently given and frequently matched in the high word */ 117/* bits that are infrequently given and frequently matched in the high word */
117#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ 118#define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */
118#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ 119#define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index b358f3bf896d..28c3ec238796 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -176,7 +176,7 @@ static int ocfs2_dx_dir_link_trailer(struct inode *dir, handle_t *handle,
176 struct ocfs2_dx_root_block *dx_root; 176 struct ocfs2_dx_root_block *dx_root;
177 struct ocfs2_dir_block_trailer *trailer; 177 struct ocfs2_dir_block_trailer *trailer;
178 178
179 ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, 179 ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
180 OCFS2_JOURNAL_ACCESS_WRITE); 180 OCFS2_JOURNAL_ACCESS_WRITE);
181 if (ret) { 181 if (ret) {
182 mlog_errno(ret); 182 mlog_errno(ret);
@@ -564,7 +564,8 @@ static int ocfs2_read_dir_block_direct(struct inode *dir, u64 phys,
564 int ret; 564 int ret;
565 struct buffer_head *tmp = *bh; 565 struct buffer_head *tmp = *bh;
566 566
567 ret = ocfs2_read_block(dir, phys, &tmp, ocfs2_validate_dir_block); 567 ret = ocfs2_read_block(INODE_CACHE(dir), phys, &tmp,
568 ocfs2_validate_dir_block);
568 if (ret) { 569 if (ret) {
569 mlog_errno(ret); 570 mlog_errno(ret);
570 goto out; 571 goto out;
@@ -622,7 +623,8 @@ static int ocfs2_read_dx_root(struct inode *dir, struct ocfs2_dinode *di,
622 u64 blkno = le64_to_cpu(di->i_dx_root); 623 u64 blkno = le64_to_cpu(di->i_dx_root);
623 struct buffer_head *tmp = *dx_root_bh; 624 struct buffer_head *tmp = *dx_root_bh;
624 625
625 ret = ocfs2_read_block(dir, blkno, &tmp, ocfs2_validate_dx_root); 626 ret = ocfs2_read_block(INODE_CACHE(dir), blkno, &tmp,
627 ocfs2_validate_dx_root);
626 628
627 /* If ocfs2_read_block() got us a new bh, pass it up. */ 629 /* If ocfs2_read_block() got us a new bh, pass it up. */
628 if (!ret && !*dx_root_bh) 630 if (!ret && !*dx_root_bh)
@@ -662,7 +664,8 @@ static int ocfs2_read_dx_leaf(struct inode *dir, u64 blkno,
662 int ret; 664 int ret;
663 struct buffer_head *tmp = *dx_leaf_bh; 665 struct buffer_head *tmp = *dx_leaf_bh;
664 666
665 ret = ocfs2_read_block(dir, blkno, &tmp, ocfs2_validate_dx_leaf); 667 ret = ocfs2_read_block(INODE_CACHE(dir), blkno, &tmp,
668 ocfs2_validate_dx_leaf);
666 669
667 /* If ocfs2_read_block() got us a new bh, pass it up. */ 670 /* If ocfs2_read_block() got us a new bh, pass it up. */
668 if (!ret && !*dx_leaf_bh) 671 if (!ret && !*dx_leaf_bh)
@@ -680,7 +683,7 @@ static int ocfs2_read_dx_leaves(struct inode *dir, u64 start, int num,
680{ 683{
681 int ret; 684 int ret;
682 685
683 ret = ocfs2_read_blocks(dir, start, num, dx_leaf_bhs, 0, 686 ret = ocfs2_read_blocks(INODE_CACHE(dir), start, num, dx_leaf_bhs, 0,
684 ocfs2_validate_dx_leaf); 687 ocfs2_validate_dx_leaf);
685 if (ret) 688 if (ret)
686 mlog_errno(ret); 689 mlog_errno(ret);
@@ -802,7 +805,8 @@ static int ocfs2_dx_dir_lookup_rec(struct inode *inode,
802 struct ocfs2_extent_rec *rec = NULL; 805 struct ocfs2_extent_rec *rec = NULL;
803 806
804 if (el->l_tree_depth) { 807 if (el->l_tree_depth) {
805 ret = ocfs2_find_leaf(inode, el, major_hash, &eb_bh); 808 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, major_hash,
809 &eb_bh);
806 if (ret) { 810 if (ret) {
807 mlog_errno(ret); 811 mlog_errno(ret);
808 goto out; 812 goto out;
@@ -1133,7 +1137,8 @@ int ocfs2_update_entry(struct inode *dir, handle_t *handle,
1133 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) 1137 if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
1134 access = ocfs2_journal_access_di; 1138 access = ocfs2_journal_access_di;
1135 1139
1136 ret = access(handle, dir, de_bh, OCFS2_JOURNAL_ACCESS_WRITE); 1140 ret = access(handle, INODE_CACHE(dir), de_bh,
1141 OCFS2_JOURNAL_ACCESS_WRITE);
1137 if (ret) { 1142 if (ret) {
1138 mlog_errno(ret); 1143 mlog_errno(ret);
1139 goto out; 1144 goto out;
@@ -1176,7 +1181,7 @@ static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
1176 goto bail; 1181 goto bail;
1177 } 1182 }
1178 if (de == de_del) { 1183 if (de == de_del) {
1179 status = access(handle, dir, bh, 1184 status = access(handle, INODE_CACHE(dir), bh,
1180 OCFS2_JOURNAL_ACCESS_WRITE); 1185 OCFS2_JOURNAL_ACCESS_WRITE);
1181 if (status < 0) { 1186 if (status < 0) {
1182 status = -EIO; 1187 status = -EIO;
@@ -1326,7 +1331,7 @@ static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir,
1326 * the entry count needs to be updated. Also, we might be 1331 * the entry count needs to be updated. Also, we might be
1327 * adding to the start of the free list. 1332 * adding to the start of the free list.
1328 */ 1333 */
1329 ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, 1334 ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
1330 OCFS2_JOURNAL_ACCESS_WRITE); 1335 OCFS2_JOURNAL_ACCESS_WRITE);
1331 if (ret) { 1336 if (ret) {
1332 mlog_errno(ret); 1337 mlog_errno(ret);
@@ -1334,7 +1339,7 @@ static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir,
1334 } 1339 }
1335 1340
1336 if (!ocfs2_dx_root_inline(dx_root)) { 1341 if (!ocfs2_dx_root_inline(dx_root)) {
1337 ret = ocfs2_journal_access_dl(handle, dir, 1342 ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir),
1338 lookup->dl_dx_leaf_bh, 1343 lookup->dl_dx_leaf_bh,
1339 OCFS2_JOURNAL_ACCESS_WRITE); 1344 OCFS2_JOURNAL_ACCESS_WRITE);
1340 if (ret) { 1345 if (ret) {
@@ -1493,7 +1498,7 @@ static int __ocfs2_dx_dir_leaf_insert(struct inode *dir, handle_t *handle,
1493 int ret; 1498 int ret;
1494 struct ocfs2_dx_leaf *dx_leaf; 1499 struct ocfs2_dx_leaf *dx_leaf;
1495 1500
1496 ret = ocfs2_journal_access_dl(handle, dir, dx_leaf_bh, 1501 ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), dx_leaf_bh,
1497 OCFS2_JOURNAL_ACCESS_WRITE); 1502 OCFS2_JOURNAL_ACCESS_WRITE);
1498 if (ret) { 1503 if (ret) {
1499 mlog_errno(ret); 1504 mlog_errno(ret);
@@ -1523,7 +1528,7 @@ static int ocfs2_dx_dir_insert(struct inode *dir, handle_t *handle,
1523 struct ocfs2_dx_root_block *dx_root; 1528 struct ocfs2_dx_root_block *dx_root;
1524 struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh; 1529 struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh;
1525 1530
1526 ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, 1531 ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
1527 OCFS2_JOURNAL_ACCESS_WRITE); 1532 OCFS2_JOURNAL_ACCESS_WRITE);
1528 if (ret) { 1533 if (ret) {
1529 mlog_errno(ret); 1534 mlog_errno(ret);
@@ -1645,11 +1650,13 @@ int __ocfs2_add_entry(handle_t *handle,
1645 */ 1650 */
1646 if (ocfs2_free_list_at_root(lookup)) { 1651 if (ocfs2_free_list_at_root(lookup)) {
1647 bh = lookup->dl_dx_root_bh; 1652 bh = lookup->dl_dx_root_bh;
1648 retval = ocfs2_journal_access_dr(handle, dir, bh, 1653 retval = ocfs2_journal_access_dr(handle,
1654 INODE_CACHE(dir), bh,
1649 OCFS2_JOURNAL_ACCESS_WRITE); 1655 OCFS2_JOURNAL_ACCESS_WRITE);
1650 } else { 1656 } else {
1651 bh = lookup->dl_prev_leaf_bh; 1657 bh = lookup->dl_prev_leaf_bh;
1652 retval = ocfs2_journal_access_db(handle, dir, bh, 1658 retval = ocfs2_journal_access_db(handle,
1659 INODE_CACHE(dir), bh,
1653 OCFS2_JOURNAL_ACCESS_WRITE); 1660 OCFS2_JOURNAL_ACCESS_WRITE);
1654 } 1661 }
1655 if (retval) { 1662 if (retval) {
@@ -1700,11 +1707,13 @@ int __ocfs2_add_entry(handle_t *handle,
1700 } 1707 }
1701 1708
1702 if (insert_bh == parent_fe_bh) 1709 if (insert_bh == parent_fe_bh)
1703 status = ocfs2_journal_access_di(handle, dir, 1710 status = ocfs2_journal_access_di(handle,
1711 INODE_CACHE(dir),
1704 insert_bh, 1712 insert_bh,
1705 OCFS2_JOURNAL_ACCESS_WRITE); 1713 OCFS2_JOURNAL_ACCESS_WRITE);
1706 else { 1714 else {
1707 status = ocfs2_journal_access_db(handle, dir, 1715 status = ocfs2_journal_access_db(handle,
1716 INODE_CACHE(dir),
1708 insert_bh, 1717 insert_bh,
1709 OCFS2_JOURNAL_ACCESS_WRITE); 1718 OCFS2_JOURNAL_ACCESS_WRITE);
1710 1719
@@ -2280,7 +2289,7 @@ static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb,
2280 struct ocfs2_inline_data *data = &di->id2.i_data; 2289 struct ocfs2_inline_data *data = &di->id2.i_data;
2281 unsigned int size = le16_to_cpu(data->id_count); 2290 unsigned int size = le16_to_cpu(data->id_count);
2282 2291
2283 ret = ocfs2_journal_access_di(handle, inode, di_bh, 2292 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
2284 OCFS2_JOURNAL_ACCESS_WRITE); 2293 OCFS2_JOURNAL_ACCESS_WRITE);
2285 if (ret) { 2294 if (ret) {
2286 mlog_errno(ret); 2295 mlog_errno(ret);
@@ -2332,9 +2341,9 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
2332 goto bail; 2341 goto bail;
2333 } 2342 }
2334 2343
2335 ocfs2_set_new_buffer_uptodate(inode, new_bh); 2344 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
2336 2345
2337 status = ocfs2_journal_access_db(handle, inode, new_bh, 2346 status = ocfs2_journal_access_db(handle, INODE_CACHE(inode), new_bh,
2338 OCFS2_JOURNAL_ACCESS_CREATE); 2347 OCFS2_JOURNAL_ACCESS_CREATE);
2339 if (status < 0) { 2348 if (status < 0) {
2340 mlog_errno(status); 2349 mlog_errno(status);
@@ -2418,9 +2427,9 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2418 ret = -EIO; 2427 ret = -EIO;
2419 goto out; 2428 goto out;
2420 } 2429 }
2421 ocfs2_set_new_buffer_uptodate(dir, dx_root_bh); 2430 ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), dx_root_bh);
2422 2431
2423 ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, 2432 ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
2424 OCFS2_JOURNAL_ACCESS_CREATE); 2433 OCFS2_JOURNAL_ACCESS_CREATE);
2425 if (ret < 0) { 2434 if (ret < 0) {
2426 mlog_errno(ret); 2435 mlog_errno(ret);
@@ -2454,7 +2463,7 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
2454 if (ret) 2463 if (ret)
2455 mlog_errno(ret); 2464 mlog_errno(ret);
2456 2465
2457 ret = ocfs2_journal_access_di(handle, dir, di_bh, 2466 ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh,
2458 OCFS2_JOURNAL_ACCESS_CREATE); 2467 OCFS2_JOURNAL_ACCESS_CREATE);
2459 if (ret) { 2468 if (ret) {
2460 mlog_errno(ret); 2469 mlog_errno(ret);
@@ -2495,9 +2504,9 @@ static int ocfs2_dx_dir_format_cluster(struct ocfs2_super *osb,
2495 } 2504 }
2496 dx_leaves[i] = bh; 2505 dx_leaves[i] = bh;
2497 2506
2498 ocfs2_set_new_buffer_uptodate(dir, bh); 2507 ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), bh);
2499 2508
2500 ret = ocfs2_journal_access_dl(handle, dir, bh, 2509 ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), bh,
2501 OCFS2_JOURNAL_ACCESS_CREATE); 2510 OCFS2_JOURNAL_ACCESS_CREATE);
2502 if (ret < 0) { 2511 if (ret < 0) {
2503 mlog_errno(ret); 2512 mlog_errno(ret);
@@ -2582,7 +2591,6 @@ static int ocfs2_dx_dir_new_cluster(struct inode *dir,
2582{ 2591{
2583 int ret; 2592 int ret;
2584 u64 phys_blkno; 2593 u64 phys_blkno;
2585 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
2586 2594
2587 ret = __ocfs2_dx_dir_new_cluster(dir, cpos, handle, data_ac, dx_leaves, 2595 ret = __ocfs2_dx_dir_new_cluster(dir, cpos, handle, data_ac, dx_leaves,
2588 num_dx_leaves, &phys_blkno); 2596 num_dx_leaves, &phys_blkno);
@@ -2591,7 +2599,7 @@ static int ocfs2_dx_dir_new_cluster(struct inode *dir,
2591 goto out; 2599 goto out;
2592 } 2600 }
2593 2601
2594 ret = ocfs2_insert_extent(osb, handle, dir, et, cpos, phys_blkno, 1, 0, 2602 ret = ocfs2_insert_extent(handle, et, cpos, phys_blkno, 1, 0,
2595 meta_ac); 2603 meta_ac);
2596 if (ret) 2604 if (ret)
2597 mlog_errno(ret); 2605 mlog_errno(ret);
@@ -2895,7 +2903,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
2895 struct ocfs2_extent_tree dx_et; 2903 struct ocfs2_extent_tree dx_et;
2896 int did_quota = 0, bytes_allocated = 0; 2904 int did_quota = 0, bytes_allocated = 0;
2897 2905
2898 ocfs2_init_dinode_extent_tree(&et, dir, di_bh); 2906 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(dir), di_bh);
2899 2907
2900 alloc = ocfs2_clusters_for_bytes(sb, bytes); 2908 alloc = ocfs2_clusters_for_bytes(sb, bytes);
2901 dx_alloc = 0; 2909 dx_alloc = 0;
@@ -3005,9 +3013,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
3005 goto out_commit; 3013 goto out_commit;
3006 } 3014 }
3007 3015
3008 ocfs2_set_new_buffer_uptodate(dir, dirdata_bh); 3016 ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), dirdata_bh);
3009 3017
3010 ret = ocfs2_journal_access_db(handle, dir, dirdata_bh, 3018 ret = ocfs2_journal_access_db(handle, INODE_CACHE(dir), dirdata_bh,
3011 OCFS2_JOURNAL_ACCESS_CREATE); 3019 OCFS2_JOURNAL_ACCESS_CREATE);
3012 if (ret) { 3020 if (ret) {
3013 mlog_errno(ret); 3021 mlog_errno(ret);
@@ -3060,7 +3068,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
3060 * We let the later dirent insert modify c/mtime - to the user 3068 * We let the later dirent insert modify c/mtime - to the user
3061 * the data hasn't changed. 3069 * the data hasn't changed.
3062 */ 3070 */
3063 ret = ocfs2_journal_access_di(handle, dir, di_bh, 3071 ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh,
3064 OCFS2_JOURNAL_ACCESS_CREATE); 3072 OCFS2_JOURNAL_ACCESS_CREATE);
3065 if (ret) { 3073 if (ret) {
3066 mlog_errno(ret); 3074 mlog_errno(ret);
@@ -3085,7 +3093,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
3085 * This should never fail as our extent list is empty and all 3093 * This should never fail as our extent list is empty and all
3086 * related blocks have been journaled already. 3094 * related blocks have been journaled already.
3087 */ 3095 */
3088 ret = ocfs2_insert_extent(osb, handle, dir, &et, 0, blkno, len, 3096 ret = ocfs2_insert_extent(handle, &et, 0, blkno, len,
3089 0, NULL); 3097 0, NULL);
3090 if (ret) { 3098 if (ret) {
3091 mlog_errno(ret); 3099 mlog_errno(ret);
@@ -3117,8 +3125,10 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
3117 ocfs2_dx_dir_index_root_block(dir, dx_root_bh, 3125 ocfs2_dx_dir_index_root_block(dir, dx_root_bh,
3118 dirdata_bh); 3126 dirdata_bh);
3119 } else { 3127 } else {
3120 ocfs2_init_dx_root_extent_tree(&dx_et, dir, dx_root_bh); 3128 ocfs2_init_dx_root_extent_tree(&dx_et,
3121 ret = ocfs2_insert_extent(osb, handle, dir, &dx_et, 0, 3129 INODE_CACHE(dir),
3130 dx_root_bh);
3131 ret = ocfs2_insert_extent(handle, &dx_et, 0,
3122 dx_insert_blkno, 1, 0, NULL); 3132 dx_insert_blkno, 1, 0, NULL);
3123 if (ret) 3133 if (ret)
3124 mlog_errno(ret); 3134 mlog_errno(ret);
@@ -3138,7 +3148,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
3138 } 3148 }
3139 blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off); 3149 blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
3140 3150
3141 ret = ocfs2_insert_extent(osb, handle, dir, &et, 1, 3151 ret = ocfs2_insert_extent(handle, &et, 1,
3142 blkno, len, 0, NULL); 3152 blkno, len, 0, NULL);
3143 if (ret) { 3153 if (ret) {
3144 mlog_errno(ret); 3154 mlog_errno(ret);
@@ -3337,8 +3347,9 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb,
3337 spin_lock(&OCFS2_I(dir)->ip_lock); 3347 spin_lock(&OCFS2_I(dir)->ip_lock);
3338 if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) { 3348 if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) {
3339 spin_unlock(&OCFS2_I(dir)->ip_lock); 3349 spin_unlock(&OCFS2_I(dir)->ip_lock);
3340 ocfs2_init_dinode_extent_tree(&et, dir, parent_fe_bh); 3350 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(dir),
3341 num_free_extents = ocfs2_num_free_extents(osb, dir, &et); 3351 parent_fe_bh);
3352 num_free_extents = ocfs2_num_free_extents(osb, &et);
3342 if (num_free_extents < 0) { 3353 if (num_free_extents < 0) {
3343 status = num_free_extents; 3354 status = num_free_extents;
3344 mlog_errno(status); 3355 mlog_errno(status);
@@ -3387,9 +3398,9 @@ do_extend:
3387 goto bail; 3398 goto bail;
3388 } 3399 }
3389 3400
3390 ocfs2_set_new_buffer_uptodate(dir, new_bh); 3401 ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), new_bh);
3391 3402
3392 status = ocfs2_journal_access_db(handle, dir, new_bh, 3403 status = ocfs2_journal_access_db(handle, INODE_CACHE(dir), new_bh,
3393 OCFS2_JOURNAL_ACCESS_CREATE); 3404 OCFS2_JOURNAL_ACCESS_CREATE);
3394 if (status < 0) { 3405 if (status < 0) {
3395 mlog_errno(status); 3406 mlog_errno(status);
@@ -3829,7 +3840,7 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
3829 (unsigned long long)OCFS2_I(dir)->ip_blkno, 3840 (unsigned long long)OCFS2_I(dir)->ip_blkno,
3830 (unsigned long long)leaf_blkno, insert_hash); 3841 (unsigned long long)leaf_blkno, insert_hash);
3831 3842
3832 ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh); 3843 ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh);
3833 3844
3834 dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data; 3845 dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
3835 /* 3846 /*
@@ -3885,7 +3896,7 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
3885 } 3896 }
3886 did_quota = 1; 3897 did_quota = 1;
3887 3898
3888 ret = ocfs2_journal_access_dl(handle, dir, dx_leaf_bh, 3899 ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), dx_leaf_bh,
3889 OCFS2_JOURNAL_ACCESS_WRITE); 3900 OCFS2_JOURNAL_ACCESS_WRITE);
3890 if (ret) { 3901 if (ret) {
3891 mlog_errno(ret); 3902 mlog_errno(ret);
@@ -3949,7 +3960,8 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
3949 } 3960 }
3950 3961
3951 for (i = 0; i < num_dx_leaves; i++) { 3962 for (i = 0; i < num_dx_leaves; i++) {
3952 ret = ocfs2_journal_access_dl(handle, dir, orig_dx_leaves[i], 3963 ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir),
3964 orig_dx_leaves[i],
3953 OCFS2_JOURNAL_ACCESS_WRITE); 3965 OCFS2_JOURNAL_ACCESS_WRITE);
3954 if (ret) { 3966 if (ret) {
3955 mlog_errno(ret); 3967 mlog_errno(ret);
@@ -4165,7 +4177,7 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir,
4165 * failure to add the dx_root_bh to the journal won't result 4177 * failure to add the dx_root_bh to the journal won't result
4166 * us losing clusters. 4178 * us losing clusters.
4167 */ 4179 */
4168 ret = ocfs2_journal_access_dr(handle, dir, dx_root_bh, 4180 ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
4169 OCFS2_JOURNAL_ACCESS_WRITE); 4181 OCFS2_JOURNAL_ACCESS_WRITE);
4170 if (ret) { 4182 if (ret) {
4171 mlog_errno(ret); 4183 mlog_errno(ret);
@@ -4207,9 +4219,8 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir,
4207 4219
4208 /* This should never fail considering we start with an empty 4220 /* This should never fail considering we start with an empty
4209 * dx_root. */ 4221 * dx_root. */
4210 ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh); 4222 ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh);
4211 ret = ocfs2_insert_extent(osb, handle, dir, &et, 0, 4223 ret = ocfs2_insert_extent(handle, &et, 0, insert_blkno, 1, 0, NULL);
4212 insert_blkno, 1, 0, NULL);
4213 if (ret) 4224 if (ret)
4214 mlog_errno(ret); 4225 mlog_errno(ret);
4215 did_quota = 0; 4226 did_quota = 0;
@@ -4469,7 +4480,7 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir,
4469 goto out_unlock; 4480 goto out_unlock;
4470 } 4481 }
4471 4482
4472 ret = ocfs2_journal_access_di(handle, dir, di_bh, 4483 ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh,
4473 OCFS2_JOURNAL_ACCESS_WRITE); 4484 OCFS2_JOURNAL_ACCESS_WRITE);
4474 if (ret) { 4485 if (ret) {
4475 mlog_errno(ret); 4486 mlog_errno(ret);
@@ -4532,7 +4543,7 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh)
4532 if (ocfs2_dx_root_inline(dx_root)) 4543 if (ocfs2_dx_root_inline(dx_root))
4533 goto remove_index; 4544 goto remove_index;
4534 4545
4535 ocfs2_init_dx_root_extent_tree(&et, dir, dx_root_bh); 4546 ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh);
4536 4547
4537 /* XXX: What if dr_clusters is too large? */ 4548 /* XXX: What if dr_clusters is too large? */
4538 while (le32_to_cpu(dx_root->dr_clusters)) { 4549 while (le32_to_cpu(dx_root->dr_clusters)) {
@@ -4565,7 +4576,7 @@ remove_index:
4565 goto out; 4576 goto out;
4566 } 4577 }
4567 4578
4568 ocfs2_remove_from_cache(dir, dx_root_bh); 4579 ocfs2_remove_from_cache(INODE_CACHE(dir), dx_root_bh);
4569out: 4580out:
4570 ocfs2_schedule_truncate_log_flush(osb, 1); 4581 ocfs2_schedule_truncate_log_flush(osb, 1);
4571 ocfs2_run_deallocs(osb, &dealloc); 4582 ocfs2_run_deallocs(osb, &dealloc);
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index d490b66ad9d7..98569e86c613 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -212,14 +212,18 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm,
212 spin_lock(&dlm->spinlock); 212 spin_lock(&dlm->spinlock);
213 } 213 }
214 214
215 spin_lock(&res->spinlock);
215 if (!list_empty(&res->purge)) { 216 if (!list_empty(&res->purge)) {
216 mlog(0, "removing lockres %.*s:%p from purgelist, " 217 mlog(0, "removing lockres %.*s:%p from purgelist, "
217 "master = %d\n", res->lockname.len, res->lockname.name, 218 "master = %d\n", res->lockname.len, res->lockname.name,
218 res, master); 219 res, master);
219 list_del_init(&res->purge); 220 list_del_init(&res->purge);
221 spin_unlock(&res->spinlock);
220 dlm_lockres_put(res); 222 dlm_lockres_put(res);
221 dlm->purge_count--; 223 dlm->purge_count--;
222 } 224 } else
225 spin_unlock(&res->spinlock);
226
223 __dlm_unhash_lockres(res); 227 __dlm_unhash_lockres(res);
224 228
225 /* lockres is not in the hash now. drop the flag and wake up 229 /* lockres is not in the hash now. drop the flag and wake up
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 110bb57c46ab..0d38d67194cb 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -53,6 +53,7 @@
53#include "super.h" 53#include "super.h"
54#include "uptodate.h" 54#include "uptodate.h"
55#include "quota.h" 55#include "quota.h"
56#include "refcounttree.h"
56 57
57#include "buffer_head_io.h" 58#include "buffer_head_io.h"
58 59
@@ -110,6 +111,11 @@ static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb,
110 111
111static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres); 112static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres);
112 113
114static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
115 int new_level);
116static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
117 int blocking);
118
113#define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres) 119#define mlog_meta_lvb(__level, __lockres) ocfs2_dump_meta_lvb_info(__level, __PRETTY_FUNCTION__, __LINE__, __lockres)
114 120
115/* This aids in debugging situations where a bad LVB might be involved. */ 121/* This aids in debugging situations where a bad LVB might be involved. */
@@ -278,6 +284,12 @@ static struct ocfs2_lock_res_ops ocfs2_qinfo_lops = {
278 .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB, 284 .flags = LOCK_TYPE_REQUIRES_REFRESH | LOCK_TYPE_USES_LVB,
279}; 285};
280 286
287static struct ocfs2_lock_res_ops ocfs2_refcount_block_lops = {
288 .check_downconvert = ocfs2_check_refcount_downconvert,
289 .downconvert_worker = ocfs2_refcount_convert_worker,
290 .flags = 0,
291};
292
281static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) 293static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres)
282{ 294{
283 return lockres->l_type == OCFS2_LOCK_TYPE_META || 295 return lockres->l_type == OCFS2_LOCK_TYPE_META ||
@@ -306,6 +318,12 @@ static inline struct ocfs2_mem_dqinfo *ocfs2_lock_res_qinfo(struct ocfs2_lock_re
306 return (struct ocfs2_mem_dqinfo *)lockres->l_priv; 318 return (struct ocfs2_mem_dqinfo *)lockres->l_priv;
307} 319}
308 320
321static inline struct ocfs2_refcount_tree *
322ocfs2_lock_res_refcount_tree(struct ocfs2_lock_res *res)
323{
324 return container_of(res, struct ocfs2_refcount_tree, rf_lockres);
325}
326
309static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres) 327static inline struct ocfs2_super *ocfs2_get_lockres_osb(struct ocfs2_lock_res *lockres)
310{ 328{
311 if (lockres->l_ops->get_osb) 329 if (lockres->l_ops->get_osb)
@@ -693,6 +711,17 @@ void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
693 info); 711 info);
694} 712}
695 713
714void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres,
715 struct ocfs2_super *osb, u64 ref_blkno,
716 unsigned int generation)
717{
718 ocfs2_lock_res_init_once(lockres);
719 ocfs2_build_lock_name(OCFS2_LOCK_TYPE_REFCOUNT, ref_blkno,
720 generation, lockres->l_name);
721 ocfs2_lock_res_init_common(osb, lockres, OCFS2_LOCK_TYPE_REFCOUNT,
722 &ocfs2_refcount_block_lops, osb);
723}
724
696void ocfs2_lock_res_free(struct ocfs2_lock_res *res) 725void ocfs2_lock_res_free(struct ocfs2_lock_res *res)
697{ 726{
698 mlog_entry_void(); 727 mlog_entry_void();
@@ -1548,8 +1577,10 @@ int ocfs2_rw_lock(struct inode *inode, int write)
1548 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1577 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1549 write ? "EXMODE" : "PRMODE"); 1578 write ? "EXMODE" : "PRMODE");
1550 1579
1551 if (ocfs2_mount_local(osb)) 1580 if (ocfs2_mount_local(osb)) {
1581 mlog_exit(0);
1552 return 0; 1582 return 0;
1583 }
1553 1584
1554 lockres = &OCFS2_I(inode)->ip_rw_lockres; 1585 lockres = &OCFS2_I(inode)->ip_rw_lockres;
1555 1586
@@ -2127,7 +2158,7 @@ static int ocfs2_inode_lock_update(struct inode *inode,
2127 2158
2128 /* This will discard any caching information we might have had 2159 /* This will discard any caching information we might have had
2129 * for the inode metadata. */ 2160 * for the inode metadata. */
2130 ocfs2_metadata_cache_purge(inode); 2161 ocfs2_metadata_cache_purge(INODE_CACHE(inode));
2131 2162
2132 ocfs2_extent_map_trunc(inode, 0); 2163 ocfs2_extent_map_trunc(inode, 0);
2133 2164
@@ -3009,6 +3040,7 @@ static void ocfs2_unlock_ast(void *opaque, int error)
3009 "unlock_action %d\n", error, lockres->l_name, 3040 "unlock_action %d\n", error, lockres->l_name,
3010 lockres->l_unlock_action); 3041 lockres->l_unlock_action);
3011 spin_unlock_irqrestore(&lockres->l_lock, flags); 3042 spin_unlock_irqrestore(&lockres->l_lock, flags);
3043 mlog_exit_void();
3012 return; 3044 return;
3013 } 3045 }
3014 3046
@@ -3495,11 +3527,11 @@ out:
3495 return UNBLOCK_CONTINUE; 3527 return UNBLOCK_CONTINUE;
3496} 3528}
3497 3529
3498static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres, 3530static int ocfs2_ci_checkpointed(struct ocfs2_caching_info *ci,
3499 int new_level) 3531 struct ocfs2_lock_res *lockres,
3532 int new_level)
3500{ 3533{
3501 struct inode *inode = ocfs2_lock_res_inode(lockres); 3534 int checkpointed = ocfs2_ci_fully_checkpointed(ci);
3502 int checkpointed = ocfs2_inode_fully_checkpointed(inode);
3503 3535
3504 BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR); 3536 BUG_ON(new_level != DLM_LOCK_NL && new_level != DLM_LOCK_PR);
3505 BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed); 3537 BUG_ON(lockres->l_level != DLM_LOCK_EX && !checkpointed);
@@ -3507,10 +3539,18 @@ static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
3507 if (checkpointed) 3539 if (checkpointed)
3508 return 1; 3540 return 1;
3509 3541
3510 ocfs2_start_checkpoint(OCFS2_SB(inode->i_sb)); 3542 ocfs2_start_checkpoint(OCFS2_SB(ocfs2_metadata_cache_get_super(ci)));
3511 return 0; 3543 return 0;
3512} 3544}
3513 3545
3546static int ocfs2_check_meta_downconvert(struct ocfs2_lock_res *lockres,
3547 int new_level)
3548{
3549 struct inode *inode = ocfs2_lock_res_inode(lockres);
3550
3551 return ocfs2_ci_checkpointed(INODE_CACHE(inode), lockres, new_level);
3552}
3553
3514static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) 3554static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres)
3515{ 3555{
3516 struct inode *inode = ocfs2_lock_res_inode(lockres); 3556 struct inode *inode = ocfs2_lock_res_inode(lockres);
@@ -3640,6 +3680,26 @@ static int ocfs2_dentry_convert_worker(struct ocfs2_lock_res *lockres,
3640 return UNBLOCK_CONTINUE_POST; 3680 return UNBLOCK_CONTINUE_POST;
3641} 3681}
3642 3682
3683static int ocfs2_check_refcount_downconvert(struct ocfs2_lock_res *lockres,
3684 int new_level)
3685{
3686 struct ocfs2_refcount_tree *tree =
3687 ocfs2_lock_res_refcount_tree(lockres);
3688
3689 return ocfs2_ci_checkpointed(&tree->rf_ci, lockres, new_level);
3690}
3691
3692static int ocfs2_refcount_convert_worker(struct ocfs2_lock_res *lockres,
3693 int blocking)
3694{
3695 struct ocfs2_refcount_tree *tree =
3696 ocfs2_lock_res_refcount_tree(lockres);
3697
3698 ocfs2_metadata_cache_purge(&tree->rf_ci);
3699
3700 return UNBLOCK_CONTINUE;
3701}
3702
3643static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres) 3703static void ocfs2_set_qinfo_lvb(struct ocfs2_lock_res *lockres)
3644{ 3704{
3645 struct ocfs2_qinfo_lvb *lvb; 3705 struct ocfs2_qinfo_lvb *lvb;
@@ -3752,6 +3812,37 @@ bail:
3752 return status; 3812 return status;
3753} 3813}
3754 3814
3815int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex)
3816{
3817 int status;
3818 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
3819 struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
3820 struct ocfs2_super *osb = lockres->l_priv;
3821
3822
3823 if (ocfs2_is_hard_readonly(osb))
3824 return -EROFS;
3825
3826 if (ocfs2_mount_local(osb))
3827 return 0;
3828
3829 status = ocfs2_cluster_lock(osb, lockres, level, 0, 0);
3830 if (status < 0)
3831 mlog_errno(status);
3832
3833 return status;
3834}
3835
3836void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex)
3837{
3838 int level = ex ? DLM_LOCK_EX : DLM_LOCK_PR;
3839 struct ocfs2_lock_res *lockres = &ref_tree->rf_lockres;
3840 struct ocfs2_super *osb = lockres->l_priv;
3841
3842 if (!ocfs2_mount_local(osb))
3843 ocfs2_cluster_unlock(osb, lockres, level);
3844}
3845
3755/* 3846/*
3756 * This is the filesystem locking protocol. It provides the lock handling 3847 * This is the filesystem locking protocol. It provides the lock handling
3757 * hooks for the underlying DLM. It has a maximum version number. 3848 * hooks for the underlying DLM. It has a maximum version number.
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h
index 7553836931de..d1ce48e1b3d6 100644
--- a/fs/ocfs2/dlmglue.h
+++ b/fs/ocfs2/dlmglue.h
@@ -101,6 +101,9 @@ void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres,
101struct ocfs2_mem_dqinfo; 101struct ocfs2_mem_dqinfo;
102void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres, 102void ocfs2_qinfo_lock_res_init(struct ocfs2_lock_res *lockres,
103 struct ocfs2_mem_dqinfo *info); 103 struct ocfs2_mem_dqinfo *info);
104void ocfs2_refcount_lock_res_init(struct ocfs2_lock_res *lockres,
105 struct ocfs2_super *osb, u64 ref_blkno,
106 unsigned int generation);
104void ocfs2_lock_res_free(struct ocfs2_lock_res *res); 107void ocfs2_lock_res_free(struct ocfs2_lock_res *res);
105int ocfs2_create_new_inode_locks(struct inode *inode); 108int ocfs2_create_new_inode_locks(struct inode *inode);
106int ocfs2_drop_inode_locks(struct inode *inode); 109int ocfs2_drop_inode_locks(struct inode *inode);
@@ -148,6 +151,9 @@ int ocfs2_file_lock(struct file *file, int ex, int trylock);
148void ocfs2_file_unlock(struct file *file); 151void ocfs2_file_unlock(struct file *file);
149int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex); 152int ocfs2_qinfo_lock(struct ocfs2_mem_dqinfo *oinfo, int ex);
150void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex); 153void ocfs2_qinfo_unlock(struct ocfs2_mem_dqinfo *oinfo, int ex);
154struct ocfs2_refcount_tree;
155int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex);
156void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex);
151 157
152 158
153void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres); 159void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres);
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index f2bb1a04d253..843db64e9d4a 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -293,7 +293,7 @@ static int ocfs2_last_eb_is_empty(struct inode *inode,
293 struct ocfs2_extent_block *eb; 293 struct ocfs2_extent_block *eb;
294 struct ocfs2_extent_list *el; 294 struct ocfs2_extent_list *el;
295 295
296 ret = ocfs2_read_extent_block(inode, last_eb_blk, &eb_bh); 296 ret = ocfs2_read_extent_block(INODE_CACHE(inode), last_eb_blk, &eb_bh);
297 if (ret) { 297 if (ret) {
298 mlog_errno(ret); 298 mlog_errno(ret);
299 goto out; 299 goto out;
@@ -353,11 +353,11 @@ static int ocfs2_search_for_hole_index(struct ocfs2_extent_list *el,
353 * eb_bh is NULL. Otherwise, eb_bh should point to the extent block 353 * eb_bh is NULL. Otherwise, eb_bh should point to the extent block
354 * containing el. 354 * containing el.
355 */ 355 */
356static int ocfs2_figure_hole_clusters(struct inode *inode, 356int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci,
357 struct ocfs2_extent_list *el, 357 struct ocfs2_extent_list *el,
358 struct buffer_head *eb_bh, 358 struct buffer_head *eb_bh,
359 u32 v_cluster, 359 u32 v_cluster,
360 u32 *num_clusters) 360 u32 *num_clusters)
361{ 361{
362 int ret, i; 362 int ret, i;
363 struct buffer_head *next_eb_bh = NULL; 363 struct buffer_head *next_eb_bh = NULL;
@@ -375,7 +375,7 @@ static int ocfs2_figure_hole_clusters(struct inode *inode,
375 if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL) 375 if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL)
376 goto no_more_extents; 376 goto no_more_extents;
377 377
378 ret = ocfs2_read_extent_block(inode, 378 ret = ocfs2_read_extent_block(ci,
379 le64_to_cpu(eb->h_next_leaf_blk), 379 le64_to_cpu(eb->h_next_leaf_blk),
380 &next_eb_bh); 380 &next_eb_bh);
381 if (ret) { 381 if (ret) {
@@ -428,7 +428,8 @@ static int ocfs2_get_clusters_nocache(struct inode *inode,
428 tree_height = le16_to_cpu(el->l_tree_depth); 428 tree_height = le16_to_cpu(el->l_tree_depth);
429 429
430 if (tree_height > 0) { 430 if (tree_height > 0) {
431 ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh); 431 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
432 &eb_bh);
432 if (ret) { 433 if (ret) {
433 mlog_errno(ret); 434 mlog_errno(ret);
434 goto out; 435 goto out;
@@ -455,7 +456,8 @@ static int ocfs2_get_clusters_nocache(struct inode *inode,
455 * field. 456 * field.
456 */ 457 */
457 if (hole_len) { 458 if (hole_len) {
458 ret = ocfs2_figure_hole_clusters(inode, el, eb_bh, 459 ret = ocfs2_figure_hole_clusters(INODE_CACHE(inode),
460 el, eb_bh,
459 v_cluster, &len); 461 v_cluster, &len);
460 if (ret) { 462 if (ret) {
461 mlog_errno(ret); 463 mlog_errno(ret);
@@ -539,7 +541,8 @@ static void ocfs2_relative_extent_offsets(struct super_block *sb,
539 541
540int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, 542int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
541 u32 *p_cluster, u32 *num_clusters, 543 u32 *p_cluster, u32 *num_clusters,
542 struct ocfs2_extent_list *el) 544 struct ocfs2_extent_list *el,
545 unsigned int *extent_flags)
543{ 546{
544 int ret = 0, i; 547 int ret = 0, i;
545 struct buffer_head *eb_bh = NULL; 548 struct buffer_head *eb_bh = NULL;
@@ -548,7 +551,8 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
548 u32 coff; 551 u32 coff;
549 552
550 if (el->l_tree_depth) { 553 if (el->l_tree_depth) {
551 ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh); 554 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, v_cluster,
555 &eb_bh);
552 if (ret) { 556 if (ret) {
553 mlog_errno(ret); 557 mlog_errno(ret);
554 goto out; 558 goto out;
@@ -590,6 +594,9 @@ int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
590 *p_cluster = *p_cluster + coff; 594 *p_cluster = *p_cluster + coff;
591 if (num_clusters) 595 if (num_clusters)
592 *num_clusters = ocfs2_rec_clusters(el, rec) - coff; 596 *num_clusters = ocfs2_rec_clusters(el, rec) - coff;
597
598 if (extent_flags)
599 *extent_flags = rec->e_flags;
593 } 600 }
594out: 601out:
595 if (eb_bh) 602 if (eb_bh)
@@ -862,8 +869,8 @@ int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
862 BUG_ON(bhs[done + i]->b_blocknr != (p_block + i)); 869 BUG_ON(bhs[done + i]->b_blocknr != (p_block + i));
863 } 870 }
864 871
865 rc = ocfs2_read_blocks(inode, p_block, count, bhs + done, 872 rc = ocfs2_read_blocks(INODE_CACHE(inode), p_block, count,
866 flags, validate); 873 bhs + done, flags, validate);
867 if (rc) { 874 if (rc) {
868 mlog_errno(rc); 875 mlog_errno(rc);
869 break; 876 break;
diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h
index b7dd9731b462..e79d41c2c909 100644
--- a/fs/ocfs2/extent_map.h
+++ b/fs/ocfs2/extent_map.h
@@ -55,12 +55,18 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
55 55
56int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, 56int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster,
57 u32 *p_cluster, u32 *num_clusters, 57 u32 *p_cluster, u32 *num_clusters,
58 struct ocfs2_extent_list *el); 58 struct ocfs2_extent_list *el,
59 unsigned int *extent_flags);
59 60
60int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr, 61int ocfs2_read_virt_blocks(struct inode *inode, u64 v_block, int nr,
61 struct buffer_head *bhs[], int flags, 62 struct buffer_head *bhs[], int flags,
62 int (*validate)(struct super_block *sb, 63 int (*validate)(struct super_block *sb,
63 struct buffer_head *bh)); 64 struct buffer_head *bh));
65int ocfs2_figure_hole_clusters(struct ocfs2_caching_info *ci,
66 struct ocfs2_extent_list *el,
67 struct buffer_head *eb_bh,
68 u32 v_cluster,
69 u32 *num_clusters);
64static inline int ocfs2_read_virt_block(struct inode *inode, u64 v_block, 70static inline int ocfs2_read_virt_block(struct inode *inode, u64 v_block,
65 struct buffer_head **bh, 71 struct buffer_head **bh,
66 int (*validate)(struct super_block *sb, 72 int (*validate)(struct super_block *sb,
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 221c5e98957b..89fc8ee1f5a5 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -59,6 +59,7 @@
59#include "xattr.h" 59#include "xattr.h"
60#include "acl.h" 60#include "acl.h"
61#include "quota.h" 61#include "quota.h"
62#include "refcounttree.h"
62 63
63#include "buffer_head_io.h" 64#include "buffer_head_io.h"
64 65
@@ -259,7 +260,7 @@ int ocfs2_update_inode_atime(struct inode *inode,
259 goto out; 260 goto out;
260 } 261 }
261 262
262 ret = ocfs2_journal_access_di(handle, inode, bh, 263 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
263 OCFS2_JOURNAL_ACCESS_WRITE); 264 OCFS2_JOURNAL_ACCESS_WRITE);
264 if (ret) { 265 if (ret) {
265 mlog_errno(ret); 266 mlog_errno(ret);
@@ -334,6 +335,39 @@ out:
334 return ret; 335 return ret;
335} 336}
336 337
338static int ocfs2_cow_file_pos(struct inode *inode,
339 struct buffer_head *fe_bh,
340 u64 offset)
341{
342 int status;
343 u32 phys, cpos = offset >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
344 unsigned int num_clusters = 0;
345 unsigned int ext_flags = 0;
346
347 /*
348 * If the new offset is aligned to the range of the cluster, there is
349 * no space for ocfs2_zero_range_for_truncate to fill, so no need to
350 * CoW either.
351 */
352 if ((offset & (OCFS2_SB(inode->i_sb)->s_clustersize - 1)) == 0)
353 return 0;
354
355 status = ocfs2_get_clusters(inode, cpos, &phys,
356 &num_clusters, &ext_flags);
357 if (status) {
358 mlog_errno(status);
359 goto out;
360 }
361
362 if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
363 goto out;
364
365 return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1);
366
367out:
368 return status;
369}
370
337static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, 371static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
338 struct inode *inode, 372 struct inode *inode,
339 struct buffer_head *fe_bh, 373 struct buffer_head *fe_bh,
@@ -346,6 +380,17 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
346 380
347 mlog_entry_void(); 381 mlog_entry_void();
348 382
383 /*
384 * We need to CoW the cluster contains the offset if it is reflinked
385 * since we will call ocfs2_zero_range_for_truncate later which will
386 * write "0" from offset to the end of the cluster.
387 */
388 status = ocfs2_cow_file_pos(inode, fe_bh, new_i_size);
389 if (status) {
390 mlog_errno(status);
391 return status;
392 }
393
349 /* TODO: This needs to actually orphan the inode in this 394 /* TODO: This needs to actually orphan the inode in this
350 * transaction. */ 395 * transaction. */
351 396
@@ -356,7 +401,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
356 goto out; 401 goto out;
357 } 402 }
358 403
359 status = ocfs2_journal_access_di(handle, inode, fe_bh, 404 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh,
360 OCFS2_JOURNAL_ACCESS_WRITE); 405 OCFS2_JOURNAL_ACCESS_WRITE);
361 if (status < 0) { 406 if (status < 0) {
362 mlog_errno(status); 407 mlog_errno(status);
@@ -486,6 +531,8 @@ bail_unlock_sem:
486 up_write(&OCFS2_I(inode)->ip_alloc_sem); 531 up_write(&OCFS2_I(inode)->ip_alloc_sem);
487 532
488bail: 533bail:
534 if (!status && OCFS2_I(inode)->ip_clusters == 0)
535 status = ocfs2_try_remove_refcount_tree(inode, di_bh);
489 536
490 mlog_exit(status); 537 mlog_exit(status);
491 return status; 538 return status;
@@ -515,11 +562,10 @@ int ocfs2_add_inode_data(struct ocfs2_super *osb,
515 int ret; 562 int ret;
516 struct ocfs2_extent_tree et; 563 struct ocfs2_extent_tree et;
517 564
518 ocfs2_init_dinode_extent_tree(&et, inode, fe_bh); 565 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), fe_bh);
519 ret = ocfs2_add_clusters_in_btree(osb, inode, logical_offset, 566 ret = ocfs2_add_clusters_in_btree(handle, &et, logical_offset,
520 clusters_to_add, mark_unwritten, 567 clusters_to_add, mark_unwritten,
521 &et, handle, 568 data_ac, meta_ac, reason_ret);
522 data_ac, meta_ac, reason_ret);
523 569
524 return ret; 570 return ret;
525} 571}
@@ -564,7 +610,7 @@ restart_all:
564 (unsigned long long)OCFS2_I(inode)->ip_blkno, 610 (unsigned long long)OCFS2_I(inode)->ip_blkno,
565 (long long)i_size_read(inode), le32_to_cpu(fe->i_clusters), 611 (long long)i_size_read(inode), le32_to_cpu(fe->i_clusters),
566 clusters_to_add); 612 clusters_to_add);
567 ocfs2_init_dinode_extent_tree(&et, inode, bh); 613 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), bh);
568 status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0, 614 status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0,
569 &data_ac, &meta_ac); 615 &data_ac, &meta_ac);
570 if (status) { 616 if (status) {
@@ -593,7 +639,7 @@ restarted_transaction:
593 /* reserve a write to the file entry early on - that we if we 639 /* reserve a write to the file entry early on - that we if we
594 * run out of credits in the allocation path, we can still 640 * run out of credits in the allocation path, we can still
595 * update i_size. */ 641 * update i_size. */
596 status = ocfs2_journal_access_di(handle, inode, bh, 642 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
597 OCFS2_JOURNAL_ACCESS_WRITE); 643 OCFS2_JOURNAL_ACCESS_WRITE);
598 if (status < 0) { 644 if (status < 0) {
599 mlog_errno(status); 645 mlog_errno(status);
@@ -1131,7 +1177,7 @@ static int __ocfs2_write_remove_suid(struct inode *inode,
1131 goto out; 1177 goto out;
1132 } 1178 }
1133 1179
1134 ret = ocfs2_journal_access_di(handle, inode, bh, 1180 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
1135 OCFS2_JOURNAL_ACCESS_WRITE); 1181 OCFS2_JOURNAL_ACCESS_WRITE);
1136 if (ret < 0) { 1182 if (ret < 0) {
1137 mlog_errno(ret); 1183 mlog_errno(ret);
@@ -1395,7 +1441,7 @@ static int ocfs2_remove_inode_range(struct inode *inode,
1395 struct address_space *mapping = inode->i_mapping; 1441 struct address_space *mapping = inode->i_mapping;
1396 struct ocfs2_extent_tree et; 1442 struct ocfs2_extent_tree et;
1397 1443
1398 ocfs2_init_dinode_extent_tree(&et, inode, di_bh); 1444 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
1399 ocfs2_init_dealloc_ctxt(&dealloc); 1445 ocfs2_init_dealloc_ctxt(&dealloc);
1400 1446
1401 if (byte_len == 0) 1447 if (byte_len == 0)
@@ -1657,6 +1703,70 @@ static long ocfs2_fallocate(struct inode *inode, int mode, loff_t offset,
1657 OCFS2_IOC_RESVSP64, &sr, change_size); 1703 OCFS2_IOC_RESVSP64, &sr, change_size);
1658} 1704}
1659 1705
1706int ocfs2_check_range_for_refcount(struct inode *inode, loff_t pos,
1707 size_t count)
1708{
1709 int ret = 0;
1710 unsigned int extent_flags;
1711 u32 cpos, clusters, extent_len, phys_cpos;
1712 struct super_block *sb = inode->i_sb;
1713
1714 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)) ||
1715 !(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL))
1716 return 0;
1717
1718 cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits;
1719 clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos;
1720
1721 while (clusters) {
1722 ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len,
1723 &extent_flags);
1724 if (ret < 0) {
1725 mlog_errno(ret);
1726 goto out;
1727 }
1728
1729 if (phys_cpos && (extent_flags & OCFS2_EXT_REFCOUNTED)) {
1730 ret = 1;
1731 break;
1732 }
1733
1734 if (extent_len > clusters)
1735 extent_len = clusters;
1736
1737 clusters -= extent_len;
1738 cpos += extent_len;
1739 }
1740out:
1741 return ret;
1742}
1743
1744static int ocfs2_prepare_inode_for_refcount(struct inode *inode,
1745 loff_t pos, size_t count,
1746 int *meta_level)
1747{
1748 int ret;
1749 struct buffer_head *di_bh = NULL;
1750 u32 cpos = pos >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
1751 u32 clusters =
1752 ocfs2_clusters_for_bytes(inode->i_sb, pos + count) - cpos;
1753
1754 ret = ocfs2_inode_lock(inode, &di_bh, 1);
1755 if (ret) {
1756 mlog_errno(ret);
1757 goto out;
1758 }
1759
1760 *meta_level = 1;
1761
1762 ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX);
1763 if (ret)
1764 mlog_errno(ret);
1765out:
1766 brelse(di_bh);
1767 return ret;
1768}
1769
1660static int ocfs2_prepare_inode_for_write(struct dentry *dentry, 1770static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1661 loff_t *ppos, 1771 loff_t *ppos,
1662 size_t count, 1772 size_t count,
@@ -1713,6 +1823,22 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1713 1823
1714 end = saved_pos + count; 1824 end = saved_pos + count;
1715 1825
1826 ret = ocfs2_check_range_for_refcount(inode, saved_pos, count);
1827 if (ret == 1) {
1828 ocfs2_inode_unlock(inode, meta_level);
1829 meta_level = -1;
1830
1831 ret = ocfs2_prepare_inode_for_refcount(inode,
1832 saved_pos,
1833 count,
1834 &meta_level);
1835 }
1836
1837 if (ret < 0) {
1838 mlog_errno(ret);
1839 goto out_unlock;
1840 }
1841
1716 /* 1842 /*
1717 * Skip the O_DIRECT checks if we don't need 1843 * Skip the O_DIRECT checks if we don't need
1718 * them. 1844 * them.
@@ -1759,7 +1885,8 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
1759 *ppos = saved_pos; 1885 *ppos = saved_pos;
1760 1886
1761out_unlock: 1887out_unlock:
1762 ocfs2_inode_unlock(inode, meta_level); 1888 if (meta_level >= 0)
1889 ocfs2_inode_unlock(inode, meta_level);
1763 1890
1764out: 1891out:
1765 return ret; 1892 return ret;
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h
index 172f9fbc9fc7..d66cf4f7c70e 100644
--- a/fs/ocfs2/file.h
+++ b/fs/ocfs2/file.h
@@ -69,4 +69,6 @@ int ocfs2_update_inode_atime(struct inode *inode,
69int ocfs2_change_file_space(struct file *file, unsigned int cmd, 69int ocfs2_change_file_space(struct file *file, unsigned int cmd,
70 struct ocfs2_space_resv *sr); 70 struct ocfs2_space_resv *sr);
71 71
72int ocfs2_check_range_for_refcount(struct inode *inode, loff_t pos,
73 size_t count);
72#endif /* OCFS2_FILE_H */ 74#endif /* OCFS2_FILE_H */
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 4dc8890ba316..0297fb8982b8 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -53,6 +53,7 @@
53#include "sysfile.h" 53#include "sysfile.h"
54#include "uptodate.h" 54#include "uptodate.h"
55#include "xattr.h" 55#include "xattr.h"
56#include "refcounttree.h"
56 57
57#include "buffer_head_io.h" 58#include "buffer_head_io.h"
58 59
@@ -562,7 +563,8 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
562 goto out; 563 goto out;
563 } 564 }
564 565
565 status = ocfs2_journal_access_di(handle, inode, fe_bh, 566 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
567 fe_bh,
566 OCFS2_JOURNAL_ACCESS_WRITE); 568 OCFS2_JOURNAL_ACCESS_WRITE);
567 if (status < 0) { 569 if (status < 0) {
568 mlog_errno(status); 570 mlog_errno(status);
@@ -646,7 +648,7 @@ static int ocfs2_remove_inode(struct inode *inode,
646 } 648 }
647 649
648 /* set the inodes dtime */ 650 /* set the inodes dtime */
649 status = ocfs2_journal_access_di(handle, inode, di_bh, 651 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
650 OCFS2_JOURNAL_ACCESS_WRITE); 652 OCFS2_JOURNAL_ACCESS_WRITE);
651 if (status < 0) { 653 if (status < 0) {
652 mlog_errno(status); 654 mlog_errno(status);
@@ -662,7 +664,7 @@ static int ocfs2_remove_inode(struct inode *inode,
662 goto bail_commit; 664 goto bail_commit;
663 } 665 }
664 666
665 ocfs2_remove_from_cache(inode, di_bh); 667 ocfs2_remove_from_cache(INODE_CACHE(inode), di_bh);
666 vfs_dq_free_inode(inode); 668 vfs_dq_free_inode(inode);
667 669
668 status = ocfs2_free_dinode(handle, inode_alloc_inode, 670 status = ocfs2_free_dinode(handle, inode_alloc_inode,
@@ -781,6 +783,12 @@ static int ocfs2_wipe_inode(struct inode *inode,
781 goto bail_unlock_dir; 783 goto bail_unlock_dir;
782 } 784 }
783 785
786 status = ocfs2_remove_refcount_tree(inode, di_bh);
787 if (status < 0) {
788 mlog_errno(status);
789 goto bail_unlock_dir;
790 }
791
784 status = ocfs2_remove_inode(inode, di_bh, orphan_dir_inode, 792 status = ocfs2_remove_inode(inode, di_bh, orphan_dir_inode,
785 orphan_dir_bh); 793 orphan_dir_bh);
786 if (status < 0) 794 if (status < 0)
@@ -1112,13 +1120,14 @@ void ocfs2_clear_inode(struct inode *inode)
1112 ocfs2_lock_res_free(&oi->ip_inode_lockres); 1120 ocfs2_lock_res_free(&oi->ip_inode_lockres);
1113 ocfs2_lock_res_free(&oi->ip_open_lockres); 1121 ocfs2_lock_res_free(&oi->ip_open_lockres);
1114 1122
1115 ocfs2_metadata_cache_purge(inode); 1123 ocfs2_metadata_cache_exit(INODE_CACHE(inode));
1116 1124
1117 mlog_bug_on_msg(oi->ip_metadata_cache.ci_num_cached, 1125 mlog_bug_on_msg(INODE_CACHE(inode)->ci_num_cached,
1118 "Clear inode of %llu, inode has %u cache items\n", 1126 "Clear inode of %llu, inode has %u cache items\n",
1119 (unsigned long long)oi->ip_blkno, oi->ip_metadata_cache.ci_num_cached); 1127 (unsigned long long)oi->ip_blkno,
1128 INODE_CACHE(inode)->ci_num_cached);
1120 1129
1121 mlog_bug_on_msg(!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE), 1130 mlog_bug_on_msg(!(INODE_CACHE(inode)->ci_flags & OCFS2_CACHE_FL_INLINE),
1122 "Clear inode of %llu, inode has a bad flag\n", 1131 "Clear inode of %llu, inode has a bad flag\n",
1123 (unsigned long long)oi->ip_blkno); 1132 (unsigned long long)oi->ip_blkno);
1124 1133
@@ -1145,9 +1154,7 @@ void ocfs2_clear_inode(struct inode *inode)
1145 (unsigned long long)oi->ip_blkno, oi->ip_open_count); 1154 (unsigned long long)oi->ip_blkno, oi->ip_open_count);
1146 1155
1147 /* Clear all other flags. */ 1156 /* Clear all other flags. */
1148 oi->ip_flags = OCFS2_INODE_CACHE_INLINE; 1157 oi->ip_flags = 0;
1149 oi->ip_created_trans = 0;
1150 oi->ip_last_trans = 0;
1151 oi->ip_dir_start_lookup = 0; 1158 oi->ip_dir_start_lookup = 0;
1152 oi->ip_blkno = 0ULL; 1159 oi->ip_blkno = 0ULL;
1153 1160
@@ -1239,7 +1246,7 @@ int ocfs2_mark_inode_dirty(handle_t *handle,
1239 mlog_entry("(inode %llu)\n", 1246 mlog_entry("(inode %llu)\n",
1240 (unsigned long long)OCFS2_I(inode)->ip_blkno); 1247 (unsigned long long)OCFS2_I(inode)->ip_blkno);
1241 1248
1242 status = ocfs2_journal_access_di(handle, inode, bh, 1249 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), bh,
1243 OCFS2_JOURNAL_ACCESS_WRITE); 1250 OCFS2_JOURNAL_ACCESS_WRITE);
1244 if (status < 0) { 1251 if (status < 0) {
1245 mlog_errno(status); 1252 mlog_errno(status);
@@ -1380,8 +1387,8 @@ int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
1380 int rc; 1387 int rc;
1381 struct buffer_head *tmp = *bh; 1388 struct buffer_head *tmp = *bh;
1382 1389
1383 rc = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, &tmp, 1390 rc = ocfs2_read_blocks(INODE_CACHE(inode), OCFS2_I(inode)->ip_blkno,
1384 flags, ocfs2_validate_inode_block); 1391 1, &tmp, flags, ocfs2_validate_inode_block);
1385 1392
1386 /* If ocfs2_read_blocks() got us a new bh, pass it up. */ 1393 /* If ocfs2_read_blocks() got us a new bh, pass it up. */
1387 if (!rc && !*bh) 1394 if (!rc && !*bh)
@@ -1394,3 +1401,56 @@ int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh)
1394{ 1401{
1395 return ocfs2_read_inode_block_full(inode, bh, 0); 1402 return ocfs2_read_inode_block_full(inode, bh, 0);
1396} 1403}
1404
1405
1406static u64 ocfs2_inode_cache_owner(struct ocfs2_caching_info *ci)
1407{
1408 struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
1409
1410 return oi->ip_blkno;
1411}
1412
1413static struct super_block *ocfs2_inode_cache_get_super(struct ocfs2_caching_info *ci)
1414{
1415 struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
1416
1417 return oi->vfs_inode.i_sb;
1418}
1419
1420static void ocfs2_inode_cache_lock(struct ocfs2_caching_info *ci)
1421{
1422 struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
1423
1424 spin_lock(&oi->ip_lock);
1425}
1426
1427static void ocfs2_inode_cache_unlock(struct ocfs2_caching_info *ci)
1428{
1429 struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
1430
1431 spin_unlock(&oi->ip_lock);
1432}
1433
1434static void ocfs2_inode_cache_io_lock(struct ocfs2_caching_info *ci)
1435{
1436 struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
1437
1438 mutex_lock(&oi->ip_io_mutex);
1439}
1440
1441static void ocfs2_inode_cache_io_unlock(struct ocfs2_caching_info *ci)
1442{
1443 struct ocfs2_inode_info *oi = cache_info_to_inode(ci);
1444
1445 mutex_unlock(&oi->ip_io_mutex);
1446}
1447
1448const struct ocfs2_caching_operations ocfs2_inode_caching_ops = {
1449 .co_owner = ocfs2_inode_cache_owner,
1450 .co_get_super = ocfs2_inode_cache_get_super,
1451 .co_cache_lock = ocfs2_inode_cache_lock,
1452 .co_cache_unlock = ocfs2_inode_cache_unlock,
1453 .co_io_lock = ocfs2_inode_cache_io_lock,
1454 .co_io_unlock = ocfs2_inode_cache_io_unlock,
1455};
1456
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index ea71525aad41..ba4fe07b293c 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -60,12 +60,6 @@ struct ocfs2_inode_info
60 60
61 u32 ip_dir_start_lookup; 61 u32 ip_dir_start_lookup;
62 62
63 /* next two are protected by trans_inc_lock */
64 /* which transaction were we created on? Zero if none. */
65 unsigned long ip_created_trans;
66 /* last transaction we were a part of. */
67 unsigned long ip_last_trans;
68
69 struct ocfs2_caching_info ip_metadata_cache; 63 struct ocfs2_caching_info ip_metadata_cache;
70 64
71 struct ocfs2_extent_map ip_extent_map; 65 struct ocfs2_extent_map ip_extent_map;
@@ -106,8 +100,6 @@ struct ocfs2_inode_info
106#define OCFS2_INODE_MAYBE_ORPHANED 0x00000020 100#define OCFS2_INODE_MAYBE_ORPHANED 0x00000020
107/* Does someone have the file open O_DIRECT */ 101/* Does someone have the file open O_DIRECT */
108#define OCFS2_INODE_OPEN_DIRECT 0x00000040 102#define OCFS2_INODE_OPEN_DIRECT 0x00000040
109/* Indicates that the metadata cache should be used as an array. */
110#define OCFS2_INODE_CACHE_INLINE 0x00000080
111 103
112static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode) 104static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode)
113{ 105{
@@ -120,6 +112,12 @@ static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode)
120extern struct kmem_cache *ocfs2_inode_cache; 112extern struct kmem_cache *ocfs2_inode_cache;
121 113
122extern const struct address_space_operations ocfs2_aops; 114extern const struct address_space_operations ocfs2_aops;
115extern const struct ocfs2_caching_operations ocfs2_inode_caching_ops;
116
117static inline struct ocfs2_caching_info *INODE_CACHE(struct inode *inode)
118{
119 return &OCFS2_I(inode)->ip_metadata_cache;
120}
123 121
124void ocfs2_clear_inode(struct inode *inode); 122void ocfs2_clear_inode(struct inode *inode);
125void ocfs2_delete_inode(struct inode *inode); 123void ocfs2_delete_inode(struct inode *inode);
@@ -172,4 +170,10 @@ int ocfs2_read_inode_block(struct inode *inode, struct buffer_head **bh);
172/* The same, but can be passed OCFS2_BH_* flags */ 170/* The same, but can be passed OCFS2_BH_* flags */
173int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh, 171int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
174 int flags); 172 int flags);
173
174static inline struct ocfs2_inode_info *cache_info_to_inode(struct ocfs2_caching_info *ci)
175{
176 return container_of(ci, struct ocfs2_inode_info, ip_metadata_cache);
177}
178
175#endif /* OCFS2_INODE_H */ 179#endif /* OCFS2_INODE_H */
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 467b413bec21..31fbb0619510 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -21,6 +21,7 @@
21#include "ocfs2_fs.h" 21#include "ocfs2_fs.h"
22#include "ioctl.h" 22#include "ioctl.h"
23#include "resize.h" 23#include "resize.h"
24#include "refcounttree.h"
24 25
25#include <linux/ext2_fs.h> 26#include <linux/ext2_fs.h>
26 27
@@ -115,6 +116,9 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
115 int status; 116 int status;
116 struct ocfs2_space_resv sr; 117 struct ocfs2_space_resv sr;
117 struct ocfs2_new_group_input input; 118 struct ocfs2_new_group_input input;
119 struct reflink_arguments args;
120 const char *old_path, *new_path;
121 bool preserve;
118 122
119 switch (cmd) { 123 switch (cmd) {
120 case OCFS2_IOC_GETFLAGS: 124 case OCFS2_IOC_GETFLAGS:
@@ -160,6 +164,15 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
160 return -EFAULT; 164 return -EFAULT;
161 165
162 return ocfs2_group_add(inode, &input); 166 return ocfs2_group_add(inode, &input);
167 case OCFS2_IOC_REFLINK:
168 if (copy_from_user(&args, (struct reflink_arguments *)arg,
169 sizeof(args)))
170 return -EFAULT;
171 old_path = (const char *)(unsigned long)args.old_path;
172 new_path = (const char *)(unsigned long)args.new_path;
173 preserve = (args.preserve != 0);
174
175 return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve);
163 default: 176 default:
164 return -ENOTTY; 177 return -ENOTTY;
165 } 178 }
@@ -182,6 +195,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
182 case OCFS2_IOC_GROUP_EXTEND: 195 case OCFS2_IOC_GROUP_EXTEND:
183 case OCFS2_IOC_GROUP_ADD: 196 case OCFS2_IOC_GROUP_ADD:
184 case OCFS2_IOC_GROUP_ADD64: 197 case OCFS2_IOC_GROUP_ADD64:
198 case OCFS2_IOC_REFLINK:
185 break; 199 break;
186 default: 200 default:
187 return -ENOIOCTLCMD; 201 return -ENOIOCTLCMD;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index c48b93ac6b65..54c16b66327e 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -48,6 +48,7 @@
48#include "slot_map.h" 48#include "slot_map.h"
49#include "super.h" 49#include "super.h"
50#include "sysfile.h" 50#include "sysfile.h"
51#include "uptodate.h"
51#include "quota.h" 52#include "quota.h"
52 53
53#include "buffer_head_io.h" 54#include "buffer_head_io.h"
@@ -554,6 +555,14 @@ static struct ocfs2_triggers eb_triggers = {
554 .ot_offset = offsetof(struct ocfs2_extent_block, h_check), 555 .ot_offset = offsetof(struct ocfs2_extent_block, h_check),
555}; 556};
556 557
558static struct ocfs2_triggers rb_triggers = {
559 .ot_triggers = {
560 .t_commit = ocfs2_commit_trigger,
561 .t_abort = ocfs2_abort_trigger,
562 },
563 .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check),
564};
565
557static struct ocfs2_triggers gd_triggers = { 566static struct ocfs2_triggers gd_triggers = {
558 .ot_triggers = { 567 .ot_triggers = {
559 .t_commit = ocfs2_commit_trigger, 568 .t_commit = ocfs2_commit_trigger,
@@ -601,14 +610,16 @@ static struct ocfs2_triggers dl_triggers = {
601}; 610};
602 611
603static int __ocfs2_journal_access(handle_t *handle, 612static int __ocfs2_journal_access(handle_t *handle,
604 struct inode *inode, 613 struct ocfs2_caching_info *ci,
605 struct buffer_head *bh, 614 struct buffer_head *bh,
606 struct ocfs2_triggers *triggers, 615 struct ocfs2_triggers *triggers,
607 int type) 616 int type)
608{ 617{
609 int status; 618 int status;
619 struct ocfs2_super *osb =
620 OCFS2_SB(ocfs2_metadata_cache_get_super(ci));
610 621
611 BUG_ON(!inode); 622 BUG_ON(!ci || !ci->ci_ops);
612 BUG_ON(!handle); 623 BUG_ON(!handle);
613 BUG_ON(!bh); 624 BUG_ON(!bh);
614 625
@@ -627,15 +638,15 @@ static int __ocfs2_journal_access(handle_t *handle,
627 BUG(); 638 BUG();
628 } 639 }
629 640
630 /* Set the current transaction information on the inode so 641 /* Set the current transaction information on the ci so
631 * that the locking code knows whether it can drop it's locks 642 * that the locking code knows whether it can drop it's locks
632 * on this inode or not. We're protected from the commit 643 * on this ci or not. We're protected from the commit
633 * thread updating the current transaction id until 644 * thread updating the current transaction id until
634 * ocfs2_commit_trans() because ocfs2_start_trans() took 645 * ocfs2_commit_trans() because ocfs2_start_trans() took
635 * j_trans_barrier for us. */ 646 * j_trans_barrier for us. */
636 ocfs2_set_inode_lock_trans(OCFS2_SB(inode->i_sb)->journal, inode); 647 ocfs2_set_ci_lock_trans(osb->journal, ci);
637 648
638 mutex_lock(&OCFS2_I(inode)->ip_io_mutex); 649 ocfs2_metadata_cache_io_lock(ci);
639 switch (type) { 650 switch (type) {
640 case OCFS2_JOURNAL_ACCESS_CREATE: 651 case OCFS2_JOURNAL_ACCESS_CREATE:
641 case OCFS2_JOURNAL_ACCESS_WRITE: 652 case OCFS2_JOURNAL_ACCESS_WRITE:
@@ -650,9 +661,9 @@ static int __ocfs2_journal_access(handle_t *handle,
650 status = -EINVAL; 661 status = -EINVAL;
651 mlog(ML_ERROR, "Uknown access type!\n"); 662 mlog(ML_ERROR, "Uknown access type!\n");
652 } 663 }
653 if (!status && ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)) && triggers) 664 if (!status && ocfs2_meta_ecc(osb) && triggers)
654 jbd2_journal_set_triggers(bh, &triggers->ot_triggers); 665 jbd2_journal_set_triggers(bh, &triggers->ot_triggers);
655 mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); 666 ocfs2_metadata_cache_io_unlock(ci);
656 667
657 if (status < 0) 668 if (status < 0)
658 mlog(ML_ERROR, "Error %d getting %d access to buffer!\n", 669 mlog(ML_ERROR, "Error %d getting %d access to buffer!\n",
@@ -662,66 +673,65 @@ static int __ocfs2_journal_access(handle_t *handle,
662 return status; 673 return status;
663} 674}
664 675
665int ocfs2_journal_access_di(handle_t *handle, struct inode *inode, 676int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci,
666 struct buffer_head *bh, int type) 677 struct buffer_head *bh, int type)
667{ 678{
668 return __ocfs2_journal_access(handle, inode, bh, &di_triggers, 679 return __ocfs2_journal_access(handle, ci, bh, &di_triggers, type);
669 type);
670} 680}
671 681
672int ocfs2_journal_access_eb(handle_t *handle, struct inode *inode, 682int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci,
673 struct buffer_head *bh, int type) 683 struct buffer_head *bh, int type)
674{ 684{
675 return __ocfs2_journal_access(handle, inode, bh, &eb_triggers, 685 return __ocfs2_journal_access(handle, ci, bh, &eb_triggers, type);
676 type);
677} 686}
678 687
679int ocfs2_journal_access_gd(handle_t *handle, struct inode *inode, 688int ocfs2_journal_access_rb(handle_t *handle, struct ocfs2_caching_info *ci,
680 struct buffer_head *bh, int type) 689 struct buffer_head *bh, int type)
681{ 690{
682 return __ocfs2_journal_access(handle, inode, bh, &gd_triggers, 691 return __ocfs2_journal_access(handle, ci, bh, &rb_triggers,
683 type); 692 type);
684} 693}
685 694
686int ocfs2_journal_access_db(handle_t *handle, struct inode *inode, 695int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci,
687 struct buffer_head *bh, int type) 696 struct buffer_head *bh, int type)
688{ 697{
689 return __ocfs2_journal_access(handle, inode, bh, &db_triggers, 698 return __ocfs2_journal_access(handle, ci, bh, &gd_triggers, type);
690 type);
691} 699}
692 700
693int ocfs2_journal_access_xb(handle_t *handle, struct inode *inode, 701int ocfs2_journal_access_db(handle_t *handle, struct ocfs2_caching_info *ci,
694 struct buffer_head *bh, int type) 702 struct buffer_head *bh, int type)
695{ 703{
696 return __ocfs2_journal_access(handle, inode, bh, &xb_triggers, 704 return __ocfs2_journal_access(handle, ci, bh, &db_triggers, type);
697 type);
698} 705}
699 706
700int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode, 707int ocfs2_journal_access_xb(handle_t *handle, struct ocfs2_caching_info *ci,
701 struct buffer_head *bh, int type) 708 struct buffer_head *bh, int type)
702{ 709{
703 return __ocfs2_journal_access(handle, inode, bh, &dq_triggers, 710 return __ocfs2_journal_access(handle, ci, bh, &xb_triggers, type);
704 type);
705} 711}
706 712
707int ocfs2_journal_access_dr(handle_t *handle, struct inode *inode, 713int ocfs2_journal_access_dq(handle_t *handle, struct ocfs2_caching_info *ci,
708 struct buffer_head *bh, int type) 714 struct buffer_head *bh, int type)
709{ 715{
710 return __ocfs2_journal_access(handle, inode, bh, &dr_triggers, 716 return __ocfs2_journal_access(handle, ci, bh, &dq_triggers, type);
711 type);
712} 717}
713 718
714int ocfs2_journal_access_dl(handle_t *handle, struct inode *inode, 719int ocfs2_journal_access_dr(handle_t *handle, struct ocfs2_caching_info *ci,
715 struct buffer_head *bh, int type) 720 struct buffer_head *bh, int type)
716{ 721{
717 return __ocfs2_journal_access(handle, inode, bh, &dl_triggers, 722 return __ocfs2_journal_access(handle, ci, bh, &dr_triggers, type);
718 type); 723}
724
725int ocfs2_journal_access_dl(handle_t *handle, struct ocfs2_caching_info *ci,
726 struct buffer_head *bh, int type)
727{
728 return __ocfs2_journal_access(handle, ci, bh, &dl_triggers, type);
719} 729}
720 730
721int ocfs2_journal_access(handle_t *handle, struct inode *inode, 731int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
722 struct buffer_head *bh, int type) 732 struct buffer_head *bh, int type)
723{ 733{
724 return __ocfs2_journal_access(handle, inode, bh, NULL, type); 734 return __ocfs2_journal_access(handle, ci, bh, NULL, type);
725} 735}
726 736
727int ocfs2_journal_dirty(handle_t *handle, 737int ocfs2_journal_dirty(handle_t *handle,
@@ -898,7 +908,7 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb,
898 ocfs2_bump_recovery_generation(fe); 908 ocfs2_bump_recovery_generation(fe);
899 909
900 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check); 910 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
901 status = ocfs2_write_block(osb, bh, journal->j_inode); 911 status = ocfs2_write_block(osb, bh, INODE_CACHE(journal->j_inode));
902 if (status < 0) 912 if (status < 0)
903 mlog_errno(status); 913 mlog_errno(status);
904 914
@@ -1642,7 +1652,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb,
1642 ocfs2_get_recovery_generation(fe); 1652 ocfs2_get_recovery_generation(fe);
1643 1653
1644 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check); 1654 ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &fe->i_check);
1645 status = ocfs2_write_block(osb, bh, inode); 1655 status = ocfs2_write_block(osb, bh, INODE_CACHE(inode));
1646 if (status < 0) 1656 if (status < 0)
1647 mlog_errno(status); 1657 mlog_errno(status);
1648 1658
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h
index 2c3222aec622..3f74e09b0d80 100644
--- a/fs/ocfs2/journal.h
+++ b/fs/ocfs2/journal.h
@@ -90,56 +90,66 @@ static inline unsigned long ocfs2_inc_trans_id(struct ocfs2_journal *j)
90 return old_id; 90 return old_id;
91} 91}
92 92
93static inline void ocfs2_set_inode_lock_trans(struct ocfs2_journal *journal, 93static inline void ocfs2_set_ci_lock_trans(struct ocfs2_journal *journal,
94 struct inode *inode) 94 struct ocfs2_caching_info *ci)
95{ 95{
96 spin_lock(&trans_inc_lock); 96 spin_lock(&trans_inc_lock);
97 OCFS2_I(inode)->ip_last_trans = journal->j_trans_id; 97 ci->ci_last_trans = journal->j_trans_id;
98 spin_unlock(&trans_inc_lock); 98 spin_unlock(&trans_inc_lock);
99} 99}
100 100
101/* Used to figure out whether it's safe to drop a metadata lock on an 101/* Used to figure out whether it's safe to drop a metadata lock on an
102 * inode. Returns true if all the inodes changes have been 102 * cached object. Returns true if all the object's changes have been
103 * checkpointed to disk. You should be holding the spinlock on the 103 * checkpointed to disk. You should be holding the spinlock on the
104 * metadata lock while calling this to be sure that nobody can take 104 * metadata lock while calling this to be sure that nobody can take
105 * the lock and put it on another transaction. */ 105 * the lock and put it on another transaction. */
106static inline int ocfs2_inode_fully_checkpointed(struct inode *inode) 106static inline int ocfs2_ci_fully_checkpointed(struct ocfs2_caching_info *ci)
107{ 107{
108 int ret; 108 int ret;
109 struct ocfs2_journal *journal = OCFS2_SB(inode->i_sb)->journal; 109 struct ocfs2_journal *journal =
110 OCFS2_SB(ocfs2_metadata_cache_get_super(ci))->journal;
110 111
111 spin_lock(&trans_inc_lock); 112 spin_lock(&trans_inc_lock);
112 ret = time_after(journal->j_trans_id, OCFS2_I(inode)->ip_last_trans); 113 ret = time_after(journal->j_trans_id, ci->ci_last_trans);
113 spin_unlock(&trans_inc_lock); 114 spin_unlock(&trans_inc_lock);
114 return ret; 115 return ret;
115} 116}
116 117
117/* convenience function to check if an inode is still new (has never 118/* convenience function to check if an object backed by struct
118 * hit disk) Will do you a favor and set created_trans = 0 when you've 119 * ocfs2_caching_info is still new (has never hit disk) Will do you a
119 * been checkpointed. returns '1' if the inode is still new. */ 120 * favor and set created_trans = 0 when you've
120static inline int ocfs2_inode_is_new(struct inode *inode) 121 * been checkpointed. returns '1' if the ci is still new. */
122static inline int ocfs2_ci_is_new(struct ocfs2_caching_info *ci)
121{ 123{
122 int ret; 124 int ret;
125 struct ocfs2_journal *journal =
126 OCFS2_SB(ocfs2_metadata_cache_get_super(ci))->journal;
123 127
128 spin_lock(&trans_inc_lock);
129 ret = !(time_after(journal->j_trans_id, ci->ci_created_trans));
130 if (!ret)
131 ci->ci_created_trans = 0;
132 spin_unlock(&trans_inc_lock);
133 return ret;
134}
135
136/* Wrapper for inodes so we can check system files */
137static inline int ocfs2_inode_is_new(struct inode *inode)
138{
124 /* System files are never "new" as they're written out by 139 /* System files are never "new" as they're written out by
125 * mkfs. This helps us early during mount, before we have the 140 * mkfs. This helps us early during mount, before we have the
126 * journal open and j_trans_id could be junk. */ 141 * journal open and j_trans_id could be junk. */
127 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE) 142 if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_SYSTEM_FILE)
128 return 0; 143 return 0;
129 spin_lock(&trans_inc_lock); 144
130 ret = !(time_after(OCFS2_SB(inode->i_sb)->journal->j_trans_id, 145 return ocfs2_ci_is_new(INODE_CACHE(inode));
131 OCFS2_I(inode)->ip_created_trans));
132 if (!ret)
133 OCFS2_I(inode)->ip_created_trans = 0;
134 spin_unlock(&trans_inc_lock);
135 return ret;
136} 146}
137 147
138static inline void ocfs2_inode_set_new(struct ocfs2_super *osb, 148static inline void ocfs2_ci_set_new(struct ocfs2_super *osb,
139 struct inode *inode) 149 struct ocfs2_caching_info *ci)
140{ 150{
141 spin_lock(&trans_inc_lock); 151 spin_lock(&trans_inc_lock);
142 OCFS2_I(inode)->ip_created_trans = osb->journal->j_trans_id; 152 ci->ci_created_trans = osb->journal->j_trans_id;
143 spin_unlock(&trans_inc_lock); 153 spin_unlock(&trans_inc_lock);
144} 154}
145 155
@@ -200,7 +210,7 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
200 if (ocfs2_mount_local(osb)) 210 if (ocfs2_mount_local(osb))
201 return; 211 return;
202 212
203 if (!ocfs2_inode_fully_checkpointed(inode)) { 213 if (!ocfs2_ci_fully_checkpointed(INODE_CACHE(inode))) {
204 /* WARNING: This only kicks off a single 214 /* WARNING: This only kicks off a single
205 * checkpoint. If someone races you and adds more 215 * checkpoint. If someone races you and adds more
206 * metadata to the journal, you won't know, and will 216 * metadata to the journal, you won't know, and will
@@ -210,7 +220,7 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode)
210 ocfs2_start_checkpoint(osb); 220 ocfs2_start_checkpoint(osb);
211 221
212 wait_event(osb->journal->j_checkpointed, 222 wait_event(osb->journal->j_checkpointed,
213 ocfs2_inode_fully_checkpointed(inode)); 223 ocfs2_ci_fully_checkpointed(INODE_CACHE(inode)));
214 } 224 }
215} 225}
216 226
@@ -266,31 +276,34 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks);
266 276
267 277
268/* ocfs2_inode */ 278/* ocfs2_inode */
269int ocfs2_journal_access_di(handle_t *handle, struct inode *inode, 279int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci,
270 struct buffer_head *bh, int type); 280 struct buffer_head *bh, int type);
271/* ocfs2_extent_block */ 281/* ocfs2_extent_block */
272int ocfs2_journal_access_eb(handle_t *handle, struct inode *inode, 282int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci,
283 struct buffer_head *bh, int type);
284/* ocfs2_refcount_block */
285int ocfs2_journal_access_rb(handle_t *handle, struct ocfs2_caching_info *ci,
273 struct buffer_head *bh, int type); 286 struct buffer_head *bh, int type);
274/* ocfs2_group_desc */ 287/* ocfs2_group_desc */
275int ocfs2_journal_access_gd(handle_t *handle, struct inode *inode, 288int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci,
276 struct buffer_head *bh, int type); 289 struct buffer_head *bh, int type);
277/* ocfs2_xattr_block */ 290/* ocfs2_xattr_block */
278int ocfs2_journal_access_xb(handle_t *handle, struct inode *inode, 291int ocfs2_journal_access_xb(handle_t *handle, struct ocfs2_caching_info *ci,
279 struct buffer_head *bh, int type); 292 struct buffer_head *bh, int type);
280/* quota blocks */ 293/* quota blocks */
281int ocfs2_journal_access_dq(handle_t *handle, struct inode *inode, 294int ocfs2_journal_access_dq(handle_t *handle, struct ocfs2_caching_info *ci,
282 struct buffer_head *bh, int type); 295 struct buffer_head *bh, int type);
283/* dirblock */ 296/* dirblock */
284int ocfs2_journal_access_db(handle_t *handle, struct inode *inode, 297int ocfs2_journal_access_db(handle_t *handle, struct ocfs2_caching_info *ci,
285 struct buffer_head *bh, int type); 298 struct buffer_head *bh, int type);
286/* ocfs2_dx_root_block */ 299/* ocfs2_dx_root_block */
287int ocfs2_journal_access_dr(handle_t *handle, struct inode *inode, 300int ocfs2_journal_access_dr(handle_t *handle, struct ocfs2_caching_info *ci,
288 struct buffer_head *bh, int type); 301 struct buffer_head *bh, int type);
289/* ocfs2_dx_leaf */ 302/* ocfs2_dx_leaf */
290int ocfs2_journal_access_dl(handle_t *handle, struct inode *inode, 303int ocfs2_journal_access_dl(handle_t *handle, struct ocfs2_caching_info *ci,
291 struct buffer_head *bh, int type); 304 struct buffer_head *bh, int type);
292/* Anything that has no ecc */ 305/* Anything that has no ecc */
293int ocfs2_journal_access(handle_t *handle, struct inode *inode, 306int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci,
294 struct buffer_head *bh, int type); 307 struct buffer_head *bh, int type);
295 308
296/* 309/*
@@ -477,6 +490,23 @@ static inline int ocfs2_calc_dxi_expand_credits(struct super_block *sb)
477 return credits; 490 return credits;
478} 491}
479 492
493/* inode update, new refcount block and its allocation credits. */
494#define OCFS2_REFCOUNT_TREE_CREATE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1 \
495 + OCFS2_SUBALLOC_ALLOC)
496
497/* inode and the refcount block update. */
498#define OCFS2_REFCOUNT_TREE_SET_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
499
500/*
501 * inode and the refcount block update.
502 * It doesn't include the credits for sub alloc change.
503 * So if we need to free the bit, OCFS2_SUBALLOC_FREE needs to be added.
504 */
505#define OCFS2_REFCOUNT_TREE_REMOVE_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1)
506
507/* 2 metadata alloc, 2 new blocks and root refcount block */
508#define OCFS2_EXPAND_REFCOUNT_TREE_CREDITS (OCFS2_SUBALLOC_ALLOC * 2 + 3)
509
480/* 510/*
481 * Please note that the caller must make sure that root_el is the root 511 * Please note that the caller must make sure that root_el is the root
482 * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise 512 * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index bac7e6abaf47..ac10f83edb95 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -297,8 +297,8 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
297 } 297 }
298 memcpy(alloc_copy, alloc, bh->b_size); 298 memcpy(alloc_copy, alloc, bh->b_size);
299 299
300 status = ocfs2_journal_access_di(handle, local_alloc_inode, bh, 300 status = ocfs2_journal_access_di(handle, INODE_CACHE(local_alloc_inode),
301 OCFS2_JOURNAL_ACCESS_WRITE); 301 bh, OCFS2_JOURNAL_ACCESS_WRITE);
302 if (status < 0) { 302 if (status < 0) {
303 mlog_errno(status); 303 mlog_errno(status);
304 goto out_commit; 304 goto out_commit;
@@ -392,7 +392,7 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb,
392 ocfs2_clear_local_alloc(alloc); 392 ocfs2_clear_local_alloc(alloc);
393 393
394 ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check); 394 ocfs2_compute_meta_ecc(osb->sb, alloc_bh->b_data, &alloc->i_check);
395 status = ocfs2_write_block(osb, alloc_bh, inode); 395 status = ocfs2_write_block(osb, alloc_bh, INODE_CACHE(inode));
396 if (status < 0) 396 if (status < 0)
397 mlog_errno(status); 397 mlog_errno(status);
398 398
@@ -678,7 +678,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb,
678 * delete bits from it! */ 678 * delete bits from it! */
679 *num_bits = bits_wanted; 679 *num_bits = bits_wanted;
680 680
681 status = ocfs2_journal_access_di(handle, local_alloc_inode, 681 status = ocfs2_journal_access_di(handle,
682 INODE_CACHE(local_alloc_inode),
682 osb->local_alloc_bh, 683 osb->local_alloc_bh,
683 OCFS2_JOURNAL_ACCESS_WRITE); 684 OCFS2_JOURNAL_ACCESS_WRITE);
684 if (status < 0) { 685 if (status < 0) {
@@ -1156,7 +1157,8 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb,
1156 } 1157 }
1157 memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size); 1158 memcpy(alloc_copy, alloc, osb->local_alloc_bh->b_size);
1158 1159
1159 status = ocfs2_journal_access_di(handle, local_alloc_inode, 1160 status = ocfs2_journal_access_di(handle,
1161 INODE_CACHE(local_alloc_inode),
1160 osb->local_alloc_bh, 1162 osb->local_alloc_bh,
1161 OCFS2_JOURNAL_ACCESS_WRITE); 1163 OCFS2_JOURNAL_ACCESS_WRITE);
1162 if (status < 0) { 1164 if (status < 0) {
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 8601f934010b..f010b22b1c44 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -69,7 +69,6 @@
69static int ocfs2_mknod_locked(struct ocfs2_super *osb, 69static int ocfs2_mknod_locked(struct ocfs2_super *osb,
70 struct inode *dir, 70 struct inode *dir,
71 struct inode *inode, 71 struct inode *inode,
72 struct dentry *dentry,
73 dev_t dev, 72 dev_t dev,
74 struct buffer_head **new_fe_bh, 73 struct buffer_head **new_fe_bh,
75 struct buffer_head *parent_fe_bh, 74 struct buffer_head *parent_fe_bh,
@@ -78,7 +77,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
78 77
79static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, 78static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
80 struct inode **ret_orphan_dir, 79 struct inode **ret_orphan_dir,
81 struct inode *inode, 80 u64 blkno,
82 char *name, 81 char *name,
83 struct ocfs2_dir_lookup_result *lookup); 82 struct ocfs2_dir_lookup_result *lookup);
84 83
@@ -358,8 +357,12 @@ static int ocfs2_mknod(struct inode *dir,
358 } 357 }
359 did_quota_inode = 1; 358 did_quota_inode = 1;
360 359
360 mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry,
361 inode->i_mode, (unsigned long)dev, dentry->d_name.len,
362 dentry->d_name.name);
363
361 /* do the real work now. */ 364 /* do the real work now. */
362 status = ocfs2_mknod_locked(osb, dir, inode, dentry, dev, 365 status = ocfs2_mknod_locked(osb, dir, inode, dev,
363 &new_fe_bh, parent_fe_bh, handle, 366 &new_fe_bh, parent_fe_bh, handle,
364 inode_ac); 367 inode_ac);
365 if (status < 0) { 368 if (status < 0) {
@@ -375,7 +378,8 @@ static int ocfs2_mknod(struct inode *dir,
375 goto leave; 378 goto leave;
376 } 379 }
377 380
378 status = ocfs2_journal_access_di(handle, dir, parent_fe_bh, 381 status = ocfs2_journal_access_di(handle, INODE_CACHE(dir),
382 parent_fe_bh,
379 OCFS2_JOURNAL_ACCESS_WRITE); 383 OCFS2_JOURNAL_ACCESS_WRITE);
380 if (status < 0) { 384 if (status < 0) {
381 mlog_errno(status); 385 mlog_errno(status);
@@ -465,7 +469,6 @@ leave:
465static int ocfs2_mknod_locked(struct ocfs2_super *osb, 469static int ocfs2_mknod_locked(struct ocfs2_super *osb,
466 struct inode *dir, 470 struct inode *dir,
467 struct inode *inode, 471 struct inode *inode,
468 struct dentry *dentry,
469 dev_t dev, 472 dev_t dev,
470 struct buffer_head **new_fe_bh, 473 struct buffer_head **new_fe_bh,
471 struct buffer_head *parent_fe_bh, 474 struct buffer_head *parent_fe_bh,
@@ -479,10 +482,6 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
479 u16 suballoc_bit; 482 u16 suballoc_bit;
480 u16 feat; 483 u16 feat;
481 484
482 mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry,
483 inode->i_mode, (unsigned long)dev, dentry->d_name.len,
484 dentry->d_name.name);
485
486 *new_fe_bh = NULL; 485 *new_fe_bh = NULL;
487 486
488 status = ocfs2_claim_new_inode(osb, handle, dir, parent_fe_bh, 487 status = ocfs2_claim_new_inode(osb, handle, dir, parent_fe_bh,
@@ -507,9 +506,10 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
507 mlog_errno(status); 506 mlog_errno(status);
508 goto leave; 507 goto leave;
509 } 508 }
510 ocfs2_set_new_buffer_uptodate(inode, *new_fe_bh); 509 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), *new_fe_bh);
511 510
512 status = ocfs2_journal_access_di(handle, inode, *new_fe_bh, 511 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
512 *new_fe_bh,
513 OCFS2_JOURNAL_ACCESS_CREATE); 513 OCFS2_JOURNAL_ACCESS_CREATE);
514 if (status < 0) { 514 if (status < 0) {
515 mlog_errno(status); 515 mlog_errno(status);
@@ -565,7 +565,7 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
565 } 565 }
566 566
567 ocfs2_populate_inode(inode, fe, 1); 567 ocfs2_populate_inode(inode, fe, 1);
568 ocfs2_inode_set_new(osb, inode); 568 ocfs2_ci_set_new(osb, INODE_CACHE(inode));
569 if (!ocfs2_mount_local(osb)) { 569 if (!ocfs2_mount_local(osb)) {
570 status = ocfs2_create_new_inode_locks(inode); 570 status = ocfs2_create_new_inode_locks(inode);
571 if (status < 0) 571 if (status < 0)
@@ -682,7 +682,7 @@ static int ocfs2_link(struct dentry *old_dentry,
682 goto out_unlock_inode; 682 goto out_unlock_inode;
683 } 683 }
684 684
685 err = ocfs2_journal_access_di(handle, inode, fe_bh, 685 err = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh,
686 OCFS2_JOURNAL_ACCESS_WRITE); 686 OCFS2_JOURNAL_ACCESS_WRITE);
687 if (err < 0) { 687 if (err < 0) {
688 mlog_errno(err); 688 mlog_errno(err);
@@ -850,7 +850,8 @@ static int ocfs2_unlink(struct inode *dir,
850 } 850 }
851 851
852 if (inode_is_unlinkable(inode)) { 852 if (inode_is_unlinkable(inode)) {
853 status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, inode, 853 status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
854 OCFS2_I(inode)->ip_blkno,
854 orphan_name, &orphan_insert); 855 orphan_name, &orphan_insert);
855 if (status < 0) { 856 if (status < 0) {
856 mlog_errno(status); 857 mlog_errno(status);
@@ -866,7 +867,7 @@ static int ocfs2_unlink(struct inode *dir,
866 goto leave; 867 goto leave;
867 } 868 }
868 869
869 status = ocfs2_journal_access_di(handle, inode, fe_bh, 870 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode), fe_bh,
870 OCFS2_JOURNAL_ACCESS_WRITE); 871 OCFS2_JOURNAL_ACCESS_WRITE);
871 if (status < 0) { 872 if (status < 0) {
872 mlog_errno(status); 873 mlog_errno(status);
@@ -1241,9 +1242,8 @@ static int ocfs2_rename(struct inode *old_dir,
1241 1242
1242 if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) { 1243 if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) {
1243 status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, 1244 status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
1244 new_inode, 1245 OCFS2_I(new_inode)->ip_blkno,
1245 orphan_name, 1246 orphan_name, &orphan_insert);
1246 &orphan_insert);
1247 if (status < 0) { 1247 if (status < 0) {
1248 mlog_errno(status); 1248 mlog_errno(status);
1249 goto bail; 1249 goto bail;
@@ -1284,7 +1284,8 @@ static int ocfs2_rename(struct inode *old_dir,
1284 goto bail; 1284 goto bail;
1285 } 1285 }
1286 } 1286 }
1287 status = ocfs2_journal_access_di(handle, new_inode, newfe_bh, 1287 status = ocfs2_journal_access_di(handle, INODE_CACHE(new_inode),
1288 newfe_bh,
1288 OCFS2_JOURNAL_ACCESS_WRITE); 1289 OCFS2_JOURNAL_ACCESS_WRITE);
1289 if (status < 0) { 1290 if (status < 0) {
1290 mlog_errno(status); 1291 mlog_errno(status);
@@ -1331,7 +1332,8 @@ static int ocfs2_rename(struct inode *old_dir,
1331 old_inode->i_ctime = CURRENT_TIME; 1332 old_inode->i_ctime = CURRENT_TIME;
1332 mark_inode_dirty(old_inode); 1333 mark_inode_dirty(old_inode);
1333 1334
1334 status = ocfs2_journal_access_di(handle, old_inode, old_inode_bh, 1335 status = ocfs2_journal_access_di(handle, INODE_CACHE(old_inode),
1336 old_inode_bh,
1335 OCFS2_JOURNAL_ACCESS_WRITE); 1337 OCFS2_JOURNAL_ACCESS_WRITE);
1336 if (status >= 0) { 1338 if (status >= 0) {
1337 old_di = (struct ocfs2_dinode *) old_inode_bh->b_data; 1339 old_di = (struct ocfs2_dinode *) old_inode_bh->b_data;
@@ -1407,9 +1409,10 @@ static int ocfs2_rename(struct inode *old_dir,
1407 (int)old_dir_nlink, old_dir->i_nlink); 1409 (int)old_dir_nlink, old_dir->i_nlink);
1408 } else { 1410 } else {
1409 struct ocfs2_dinode *fe; 1411 struct ocfs2_dinode *fe;
1410 status = ocfs2_journal_access_di(handle, old_dir, 1412 status = ocfs2_journal_access_di(handle,
1411 old_dir_bh, 1413 INODE_CACHE(old_dir),
1412 OCFS2_JOURNAL_ACCESS_WRITE); 1414 old_dir_bh,
1415 OCFS2_JOURNAL_ACCESS_WRITE);
1413 fe = (struct ocfs2_dinode *) old_dir_bh->b_data; 1416 fe = (struct ocfs2_dinode *) old_dir_bh->b_data;
1414 ocfs2_set_links_count(fe, old_dir->i_nlink); 1417 ocfs2_set_links_count(fe, old_dir->i_nlink);
1415 status = ocfs2_journal_dirty(handle, old_dir_bh); 1418 status = ocfs2_journal_dirty(handle, old_dir_bh);
@@ -1527,9 +1530,11 @@ static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
1527 mlog_errno(status); 1530 mlog_errno(status);
1528 goto bail; 1531 goto bail;
1529 } 1532 }
1530 ocfs2_set_new_buffer_uptodate(inode, bhs[virtual]); 1533 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode),
1534 bhs[virtual]);
1531 1535
1532 status = ocfs2_journal_access(handle, inode, bhs[virtual], 1536 status = ocfs2_journal_access(handle, INODE_CACHE(inode),
1537 bhs[virtual],
1533 OCFS2_JOURNAL_ACCESS_CREATE); 1538 OCFS2_JOURNAL_ACCESS_CREATE);
1534 if (status < 0) { 1539 if (status < 0) {
1535 mlog_errno(status); 1540 mlog_errno(status);
@@ -1692,7 +1697,11 @@ static int ocfs2_symlink(struct inode *dir,
1692 } 1697 }
1693 did_quota_inode = 1; 1698 did_quota_inode = 1;
1694 1699
1695 status = ocfs2_mknod_locked(osb, dir, inode, dentry, 1700 mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry,
1701 inode->i_mode, dentry->d_name.len,
1702 dentry->d_name.name);
1703
1704 status = ocfs2_mknod_locked(osb, dir, inode,
1696 0, &new_fe_bh, parent_fe_bh, handle, 1705 0, &new_fe_bh, parent_fe_bh, handle,
1697 inode_ac); 1706 inode_ac);
1698 if (status < 0) { 1707 if (status < 0) {
@@ -1842,7 +1851,7 @@ bail:
1842 1851
1843static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb, 1852static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
1844 struct inode **ret_orphan_dir, 1853 struct inode **ret_orphan_dir,
1845 struct inode *inode, 1854 u64 blkno,
1846 char *name, 1855 char *name,
1847 struct ocfs2_dir_lookup_result *lookup) 1856 struct ocfs2_dir_lookup_result *lookup)
1848{ 1857{
@@ -1850,7 +1859,7 @@ static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
1850 struct buffer_head *orphan_dir_bh = NULL; 1859 struct buffer_head *orphan_dir_bh = NULL;
1851 int status = 0; 1860 int status = 0;
1852 1861
1853 status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name); 1862 status = ocfs2_blkno_stringify(blkno, name);
1854 if (status < 0) { 1863 if (status < 0) {
1855 mlog_errno(status); 1864 mlog_errno(status);
1856 return status; 1865 return status;
@@ -1917,7 +1926,9 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb,
1917 goto leave; 1926 goto leave;
1918 } 1927 }
1919 1928
1920 status = ocfs2_journal_access_di(handle, orphan_dir_inode, orphan_dir_bh, 1929 status = ocfs2_journal_access_di(handle,
1930 INODE_CACHE(orphan_dir_inode),
1931 orphan_dir_bh,
1921 OCFS2_JOURNAL_ACCESS_WRITE); 1932 OCFS2_JOURNAL_ACCESS_WRITE);
1922 if (status < 0) { 1933 if (status < 0) {
1923 mlog_errno(status); 1934 mlog_errno(status);
@@ -2002,7 +2013,9 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
2002 goto leave; 2013 goto leave;
2003 } 2014 }
2004 2015
2005 status = ocfs2_journal_access_di(handle,orphan_dir_inode, orphan_dir_bh, 2016 status = ocfs2_journal_access_di(handle,
2017 INODE_CACHE(orphan_dir_inode),
2018 orphan_dir_bh,
2006 OCFS2_JOURNAL_ACCESS_WRITE); 2019 OCFS2_JOURNAL_ACCESS_WRITE);
2007 if (status < 0) { 2020 if (status < 0) {
2008 mlog_errno(status); 2021 mlog_errno(status);
@@ -2028,6 +2041,274 @@ leave:
2028 return status; 2041 return status;
2029} 2042}
2030 2043
2044int ocfs2_create_inode_in_orphan(struct inode *dir,
2045 int mode,
2046 struct inode **new_inode)
2047{
2048 int status, did_quota_inode = 0;
2049 struct inode *inode = NULL;
2050 struct inode *orphan_dir = NULL;
2051 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
2052 struct ocfs2_dinode *di = NULL;
2053 handle_t *handle = NULL;
2054 char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
2055 struct buffer_head *parent_di_bh = NULL;
2056 struct buffer_head *new_di_bh = NULL;
2057 struct ocfs2_alloc_context *inode_ac = NULL;
2058 struct ocfs2_dir_lookup_result orphan_insert = { NULL, };
2059
2060 status = ocfs2_inode_lock(dir, &parent_di_bh, 1);
2061 if (status < 0) {
2062 if (status != -ENOENT)
2063 mlog_errno(status);
2064 return status;
2065 }
2066
2067 /*
2068 * We give the orphan dir the root blkno to fake an orphan name,
2069 * and allocate enough space for our insertion.
2070 */
2071 status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
2072 osb->root_blkno,
2073 orphan_name, &orphan_insert);
2074 if (status < 0) {
2075 mlog_errno(status);
2076 goto leave;
2077 }
2078
2079 /* reserve an inode spot */
2080 status = ocfs2_reserve_new_inode(osb, &inode_ac);
2081 if (status < 0) {
2082 if (status != -ENOSPC)
2083 mlog_errno(status);
2084 goto leave;
2085 }
2086
2087 inode = ocfs2_get_init_inode(dir, mode);
2088 if (!inode) {
2089 status = -ENOMEM;
2090 mlog_errno(status);
2091 goto leave;
2092 }
2093
2094 handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb, 0, 0));
2095 if (IS_ERR(handle)) {
2096 status = PTR_ERR(handle);
2097 handle = NULL;
2098 mlog_errno(status);
2099 goto leave;
2100 }
2101
2102 /* We don't use standard VFS wrapper because we don't want vfs_dq_init
2103 * to be called. */
2104 if (sb_any_quota_active(osb->sb) &&
2105 osb->sb->dq_op->alloc_inode(inode, 1) == NO_QUOTA) {
2106 status = -EDQUOT;
2107 goto leave;
2108 }
2109 did_quota_inode = 1;
2110
2111 /* do the real work now. */
2112 status = ocfs2_mknod_locked(osb, dir, inode,
2113 0, &new_di_bh, parent_di_bh, handle,
2114 inode_ac);
2115 if (status < 0) {
2116 mlog_errno(status);
2117 goto leave;
2118 }
2119
2120 status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, orphan_name);
2121 if (status < 0) {
2122 mlog_errno(status);
2123 goto leave;
2124 }
2125
2126 di = (struct ocfs2_dinode *)new_di_bh->b_data;
2127 status = ocfs2_orphan_add(osb, handle, inode, di, orphan_name,
2128 &orphan_insert, orphan_dir);
2129 if (status < 0) {
2130 mlog_errno(status);
2131 goto leave;
2132 }
2133
2134 /* get open lock so that only nodes can't remove it from orphan dir. */
2135 status = ocfs2_open_lock(inode);
2136 if (status < 0)
2137 mlog_errno(status);
2138
2139leave:
2140 if (status < 0 && did_quota_inode)
2141 vfs_dq_free_inode(inode);
2142 if (handle)
2143 ocfs2_commit_trans(osb, handle);
2144
2145 if (orphan_dir) {
2146 /* This was locked for us in ocfs2_prepare_orphan_dir() */
2147 ocfs2_inode_unlock(orphan_dir, 1);
2148 mutex_unlock(&orphan_dir->i_mutex);
2149 iput(orphan_dir);
2150 }
2151
2152 if (status == -ENOSPC)
2153 mlog(0, "Disk is full\n");
2154
2155 if ((status < 0) && inode) {
2156 clear_nlink(inode);
2157 iput(inode);
2158 }
2159
2160 if (inode_ac)
2161 ocfs2_free_alloc_context(inode_ac);
2162
2163 brelse(new_di_bh);
2164
2165 if (!status)
2166 *new_inode = inode;
2167
2168 ocfs2_free_dir_lookup_result(&orphan_insert);
2169
2170 ocfs2_inode_unlock(dir, 1);
2171 brelse(parent_di_bh);
2172 return status;
2173}
2174
2175int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
2176 struct inode *inode,
2177 struct dentry *dentry)
2178{
2179 int status = 0;
2180 struct buffer_head *parent_di_bh = NULL;
2181 handle_t *handle = NULL;
2182 struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
2183 struct ocfs2_dinode *dir_di, *di;
2184 struct inode *orphan_dir_inode = NULL;
2185 struct buffer_head *orphan_dir_bh = NULL;
2186 struct buffer_head *di_bh = NULL;
2187 struct ocfs2_dir_lookup_result lookup = { NULL, };
2188
2189 mlog_entry("(0x%p, 0x%p, %.*s')\n", dir, dentry,
2190 dentry->d_name.len, dentry->d_name.name);
2191
2192 status = ocfs2_inode_lock(dir, &parent_di_bh, 1);
2193 if (status < 0) {
2194 if (status != -ENOENT)
2195 mlog_errno(status);
2196 return status;
2197 }
2198
2199 dir_di = (struct ocfs2_dinode *) parent_di_bh->b_data;
2200 if (!dir_di->i_links_count) {
2201 /* can't make a file in a deleted directory. */
2202 status = -ENOENT;
2203 goto leave;
2204 }
2205
2206 status = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
2207 dentry->d_name.len);
2208 if (status)
2209 goto leave;
2210
2211 /* get a spot inside the dir. */
2212 status = ocfs2_prepare_dir_for_insert(osb, dir, parent_di_bh,
2213 dentry->d_name.name,
2214 dentry->d_name.len, &lookup);
2215 if (status < 0) {
2216 mlog_errno(status);
2217 goto leave;
2218 }
2219
2220 orphan_dir_inode = ocfs2_get_system_file_inode(osb,
2221 ORPHAN_DIR_SYSTEM_INODE,
2222 osb->slot_num);
2223 if (!orphan_dir_inode) {
2224 status = -EEXIST;
2225 mlog_errno(status);
2226 goto leave;
2227 }
2228
2229 mutex_lock(&orphan_dir_inode->i_mutex);
2230
2231 status = ocfs2_inode_lock(orphan_dir_inode, &orphan_dir_bh, 1);
2232 if (status < 0) {
2233 mlog_errno(status);
2234 mutex_unlock(&orphan_dir_inode->i_mutex);
2235 iput(orphan_dir_inode);
2236 goto leave;
2237 }
2238
2239 status = ocfs2_read_inode_block(inode, &di_bh);
2240 if (status < 0) {
2241 mlog_errno(status);
2242 goto orphan_unlock;
2243 }
2244
2245 handle = ocfs2_start_trans(osb, ocfs2_rename_credits(osb->sb));
2246 if (IS_ERR(handle)) {
2247 status = PTR_ERR(handle);
2248 handle = NULL;
2249 mlog_errno(status);
2250 goto orphan_unlock;
2251 }
2252
2253 status = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
2254 di_bh, OCFS2_JOURNAL_ACCESS_WRITE);
2255 if (status < 0) {
2256 mlog_errno(status);
2257 goto out_commit;
2258 }
2259
2260 status = ocfs2_orphan_del(osb, handle, orphan_dir_inode, inode,
2261 orphan_dir_bh);
2262 if (status < 0) {
2263 mlog_errno(status);
2264 goto out_commit;
2265 }
2266
2267 di = (struct ocfs2_dinode *)di_bh->b_data;
2268 le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL);
2269 di->i_orphaned_slot = 0;
2270 ocfs2_journal_dirty(handle, di_bh);
2271
2272 status = ocfs2_add_entry(handle, dentry, inode,
2273 OCFS2_I(inode)->ip_blkno, parent_di_bh,
2274 &lookup);
2275 if (status < 0) {
2276 mlog_errno(status);
2277 goto out_commit;
2278 }
2279
2280 status = ocfs2_dentry_attach_lock(dentry, inode,
2281 OCFS2_I(dir)->ip_blkno);
2282 if (status) {
2283 mlog_errno(status);
2284 goto out_commit;
2285 }
2286
2287 insert_inode_hash(inode);
2288 dentry->d_op = &ocfs2_dentry_ops;
2289 d_instantiate(dentry, inode);
2290 status = 0;
2291out_commit:
2292 ocfs2_commit_trans(osb, handle);
2293orphan_unlock:
2294 ocfs2_inode_unlock(orphan_dir_inode, 1);
2295 mutex_unlock(&orphan_dir_inode->i_mutex);
2296 iput(orphan_dir_inode);
2297leave:
2298
2299 ocfs2_inode_unlock(dir, 1);
2300
2301 brelse(di_bh);
2302 brelse(parent_di_bh);
2303 brelse(orphan_dir_bh);
2304
2305 ocfs2_free_dir_lookup_result(&lookup);
2306
2307 mlog_exit(status);
2308
2309 return status;
2310}
2311
2031const struct inode_operations ocfs2_dir_iops = { 2312const struct inode_operations ocfs2_dir_iops = {
2032 .create = ocfs2_create, 2313 .create = ocfs2_create,
2033 .lookup = ocfs2_lookup, 2314 .lookup = ocfs2_lookup,
diff --git a/fs/ocfs2/namei.h b/fs/ocfs2/namei.h
index 688aef64c879..e5d059d4f115 100644
--- a/fs/ocfs2/namei.h
+++ b/fs/ocfs2/namei.h
@@ -35,5 +35,11 @@ int ocfs2_orphan_del(struct ocfs2_super *osb,
35 struct inode *orphan_dir_inode, 35 struct inode *orphan_dir_inode,
36 struct inode *inode, 36 struct inode *inode,
37 struct buffer_head *orphan_dir_bh); 37 struct buffer_head *orphan_dir_bh);
38int ocfs2_create_inode_in_orphan(struct inode *dir,
39 int mode,
40 struct inode **new_inode);
41int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
42 struct inode *new_inode,
43 struct dentry *new_dentry);
38 44
39#endif /* OCFS2_NAMEI_H */ 45#endif /* OCFS2_NAMEI_H */
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 39e1d5a39505..eae404602424 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -51,20 +51,51 @@
51/* For struct ocfs2_blockcheck_stats */ 51/* For struct ocfs2_blockcheck_stats */
52#include "blockcheck.h" 52#include "blockcheck.h"
53 53
54
55/* Caching of metadata buffers */
56
54/* Most user visible OCFS2 inodes will have very few pieces of 57/* Most user visible OCFS2 inodes will have very few pieces of
55 * metadata, but larger files (including bitmaps, etc) must be taken 58 * metadata, but larger files (including bitmaps, etc) must be taken
56 * into account when designing an access scheme. We allow a small 59 * into account when designing an access scheme. We allow a small
57 * amount of inlined blocks to be stored on an array and grow the 60 * amount of inlined blocks to be stored on an array and grow the
58 * structure into a rb tree when necessary. */ 61 * structure into a rb tree when necessary. */
59#define OCFS2_INODE_MAX_CACHE_ARRAY 2 62#define OCFS2_CACHE_INFO_MAX_ARRAY 2
63
64/* Flags for ocfs2_caching_info */
65
66enum ocfs2_caching_info_flags {
67 /* Indicates that the metadata cache is using the inline array */
68 OCFS2_CACHE_FL_INLINE = 1<<1,
69};
60 70
71struct ocfs2_caching_operations;
61struct ocfs2_caching_info { 72struct ocfs2_caching_info {
73 /*
74 * The parent structure provides the locks, but because the
75 * parent structure can differ, it provides locking operations
76 * to struct ocfs2_caching_info.
77 */
78 const struct ocfs2_caching_operations *ci_ops;
79
80 /* next two are protected by trans_inc_lock */
81 /* which transaction were we created on? Zero if none. */
82 unsigned long ci_created_trans;
83 /* last transaction we were a part of. */
84 unsigned long ci_last_trans;
85
86 /* Cache structures */
87 unsigned int ci_flags;
62 unsigned int ci_num_cached; 88 unsigned int ci_num_cached;
63 union { 89 union {
64 sector_t ci_array[OCFS2_INODE_MAX_CACHE_ARRAY]; 90 sector_t ci_array[OCFS2_CACHE_INFO_MAX_ARRAY];
65 struct rb_root ci_tree; 91 struct rb_root ci_tree;
66 } ci_cache; 92 } ci_cache;
67}; 93};
94/*
95 * Need this prototype here instead of in uptodate.h because journal.h
96 * uses it.
97 */
98struct super_block *ocfs2_metadata_cache_get_super(struct ocfs2_caching_info *ci);
68 99
69/* this limits us to 256 nodes 100/* this limits us to 256 nodes
70 * if we need more, we can do a kmalloc for the map */ 101 * if we need more, we can do a kmalloc for the map */
@@ -377,12 +408,17 @@ struct ocfs2_super
377 408
378 /* the group we used to allocate inodes. */ 409 /* the group we used to allocate inodes. */
379 u64 osb_inode_alloc_group; 410 u64 osb_inode_alloc_group;
411
412 /* rb tree root for refcount lock. */
413 struct rb_root osb_rf_lock_tree;
414 struct ocfs2_refcount_tree *osb_ref_tree_lru;
380}; 415};
381 416
382#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) 417#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info)
383 418
384/* Useful typedef for passing around journal access functions */ 419/* Useful typedef for passing around journal access functions */
385typedef int (*ocfs2_journal_access_func)(handle_t *handle, struct inode *inode, 420typedef int (*ocfs2_journal_access_func)(handle_t *handle,
421 struct ocfs2_caching_info *ci,
386 struct buffer_head *bh, int type); 422 struct buffer_head *bh, int type);
387 423
388static inline int ocfs2_should_order_data(struct inode *inode) 424static inline int ocfs2_should_order_data(struct inode *inode)
@@ -480,6 +516,13 @@ static inline void ocfs2_add_links_count(struct ocfs2_dinode *di, int n)
480 ocfs2_set_links_count(di, links); 516 ocfs2_set_links_count(di, links);
481} 517}
482 518
519static inline int ocfs2_refcount_tree(struct ocfs2_super *osb)
520{
521 if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE)
522 return 1;
523 return 0;
524}
525
483/* set / clear functions because cluster events can make these happen 526/* set / clear functions because cluster events can make these happen
484 * in parallel so we want the transitions to be atomic. this also 527 * in parallel so we want the transitions to be atomic. this also
485 * means that any future flags osb_flags must be protected by spinlock 528 * means that any future flags osb_flags must be protected by spinlock
@@ -578,6 +621,9 @@ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
578#define OCFS2_IS_VALID_DX_LEAF(ptr) \ 621#define OCFS2_IS_VALID_DX_LEAF(ptr) \
579 (!strcmp((ptr)->dl_signature, OCFS2_DX_LEAF_SIGNATURE)) 622 (!strcmp((ptr)->dl_signature, OCFS2_DX_LEAF_SIGNATURE))
580 623
624#define OCFS2_IS_VALID_REFCOUNT_BLOCK(ptr) \
625 (!strcmp((ptr)->rf_signature, OCFS2_REFCOUNT_BLOCK_SIGNATURE))
626
581static inline unsigned long ino_from_blkno(struct super_block *sb, 627static inline unsigned long ino_from_blkno(struct super_block *sb,
582 u64 blkno) 628 u64 blkno)
583{ 629{
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index 7ab6e9e5e77c..e9431e4a5e7c 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -68,6 +68,7 @@
68#define OCFS2_DIR_TRAILER_SIGNATURE "DIRTRL1" 68#define OCFS2_DIR_TRAILER_SIGNATURE "DIRTRL1"
69#define OCFS2_DX_ROOT_SIGNATURE "DXDIR01" 69#define OCFS2_DX_ROOT_SIGNATURE "DXDIR01"
70#define OCFS2_DX_LEAF_SIGNATURE "DXLEAF1" 70#define OCFS2_DX_LEAF_SIGNATURE "DXLEAF1"
71#define OCFS2_REFCOUNT_BLOCK_SIGNATURE "REFCNT1"
71 72
72/* Compatibility flags */ 73/* Compatibility flags */
73#define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \ 74#define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \
@@ -98,7 +99,8 @@
98 | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \ 99 | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \
99 | OCFS2_FEATURE_INCOMPAT_XATTR \ 100 | OCFS2_FEATURE_INCOMPAT_XATTR \
100 | OCFS2_FEATURE_INCOMPAT_META_ECC \ 101 | OCFS2_FEATURE_INCOMPAT_META_ECC \
101 | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS) 102 | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \
103 | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE)
102#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ 104#define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \
103 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ 105 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \
104 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) 106 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)
@@ -160,6 +162,9 @@
160/* Metadata checksum and error correction */ 162/* Metadata checksum and error correction */
161#define OCFS2_FEATURE_INCOMPAT_META_ECC 0x0800 163#define OCFS2_FEATURE_INCOMPAT_META_ECC 0x0800
162 164
165/* Refcount tree support */
166#define OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE 0x1000
167
163/* 168/*
164 * backup superblock flag is used to indicate that this volume 169 * backup superblock flag is used to indicate that this volume
165 * has backup superblocks. 170 * has backup superblocks.
@@ -223,6 +228,7 @@
223#define OCFS2_HAS_XATTR_FL (0x0002) 228#define OCFS2_HAS_XATTR_FL (0x0002)
224#define OCFS2_INLINE_XATTR_FL (0x0004) 229#define OCFS2_INLINE_XATTR_FL (0x0004)
225#define OCFS2_INDEXED_DIR_FL (0x0008) 230#define OCFS2_INDEXED_DIR_FL (0x0008)
231#define OCFS2_HAS_REFCOUNT_FL (0x0010)
226 232
227/* Inode attributes, keep in sync with EXT2 */ 233/* Inode attributes, keep in sync with EXT2 */
228#define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */ 234#define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */
@@ -241,8 +247,11 @@
241/* 247/*
242 * Extent record flags (e_node.leaf.flags) 248 * Extent record flags (e_node.leaf.flags)
243 */ 249 */
244#define OCFS2_EXT_UNWRITTEN (0x01) /* Extent is allocated but 250#define OCFS2_EXT_UNWRITTEN (0x01) /* Extent is allocated but
245 * unwritten */ 251 * unwritten */
252#define OCFS2_EXT_REFCOUNTED (0x02) /* Extent is reference
253 * counted in an associated
254 * refcount tree */
246 255
247/* 256/*
248 * ioctl commands 257 * ioctl commands
@@ -292,6 +301,15 @@ struct ocfs2_new_group_input {
292#define OCFS2_IOC_GROUP_ADD _IOW('o', 2,struct ocfs2_new_group_input) 301#define OCFS2_IOC_GROUP_ADD _IOW('o', 2,struct ocfs2_new_group_input)
293#define OCFS2_IOC_GROUP_ADD64 _IOW('o', 3,struct ocfs2_new_group_input) 302#define OCFS2_IOC_GROUP_ADD64 _IOW('o', 3,struct ocfs2_new_group_input)
294 303
304/* Used to pass 2 file names to reflink. */
305struct reflink_arguments {
306 __u64 old_path;
307 __u64 new_path;
308 __u64 preserve;
309};
310#define OCFS2_IOC_REFLINK _IOW('o', 4, struct reflink_arguments)
311
312
295/* 313/*
296 * Journal Flags (ocfs2_dinode.id1.journal1.i_flags) 314 * Journal Flags (ocfs2_dinode.id1.journal1.i_flags)
297 */ 315 */
@@ -717,7 +735,8 @@ struct ocfs2_dinode {
717 __le64 i_xattr_loc; 735 __le64 i_xattr_loc;
718/*80*/ struct ocfs2_block_check i_check; /* Error checking */ 736/*80*/ struct ocfs2_block_check i_check; /* Error checking */
719/*88*/ __le64 i_dx_root; /* Pointer to dir index root block */ 737/*88*/ __le64 i_dx_root; /* Pointer to dir index root block */
720 __le64 i_reserved2[5]; 738/*90*/ __le64 i_refcount_loc;
739 __le64 i_reserved2[4];
721/*B8*/ union { 740/*B8*/ union {
722 __le64 i_pad1; /* Generic way to refer to this 741 __le64 i_pad1; /* Generic way to refer to this
723 64bit union */ 742 64bit union */
@@ -901,6 +920,60 @@ struct ocfs2_group_desc
901/*40*/ __u8 bg_bitmap[0]; 920/*40*/ __u8 bg_bitmap[0];
902}; 921};
903 922
923struct ocfs2_refcount_rec {
924/*00*/ __le64 r_cpos; /* Physical offset, in clusters */
925 __le32 r_clusters; /* Clusters covered by this extent */
926 __le32 r_refcount; /* Reference count of this extent */
927/*10*/
928};
929#define OCFS2_32BIT_POS_MASK (0xffffffffULL)
930
931#define OCFS2_REFCOUNT_LEAF_FL (0x00000001)
932#define OCFS2_REFCOUNT_TREE_FL (0x00000002)
933
934struct ocfs2_refcount_list {
935/*00*/ __le16 rl_count; /* Maximum number of entries possible
936 in rl_records */
937 __le16 rl_used; /* Current number of used records */
938 __le32 rl_reserved2;
939 __le64 rl_reserved1; /* Pad to sizeof(ocfs2_refcount_record) */
940/*10*/ struct ocfs2_refcount_rec rl_recs[0]; /* Refcount records */
941};
942
943
944struct ocfs2_refcount_block {
945/*00*/ __u8 rf_signature[8]; /* Signature for verification */
946 __le16 rf_suballoc_slot; /* Slot suballocator this block
947 belongs to */
948 __le16 rf_suballoc_bit; /* Bit offset in suballocator
949 block group */
950 __le32 rf_fs_generation; /* Must match superblock */
951/*10*/ __le64 rf_blkno; /* Offset on disk, in blocks */
952 __le64 rf_parent; /* Parent block, only valid if
953 OCFS2_REFCOUNT_LEAF_FL is set in
954 rf_flags */
955/*20*/ struct ocfs2_block_check rf_check; /* Error checking */
956 __le64 rf_last_eb_blk; /* Pointer to last extent block */
957/*30*/ __le32 rf_count; /* Number of inodes sharing this
958 refcount tree */
959 __le32 rf_flags; /* See the flags above */
960 __le32 rf_clusters; /* clusters covered by refcount tree. */
961 __le32 rf_cpos; /* cluster offset in refcount tree.*/
962/*40*/ __le32 rf_generation; /* generation number. all be the same
963 * for the same refcount tree. */
964 __le32 rf_reserved0;
965 __le64 rf_reserved1[7];
966/*80*/ union {
967 struct ocfs2_refcount_list rf_records; /* List of refcount
968 records */
969 struct ocfs2_extent_list rf_list; /* Extent record list,
970 only valid if
971 OCFS2_REFCOUNT_TREE_FL
972 is set in rf_flags */
973 };
974/* Actual on-disk size is one block */
975};
976
904/* 977/*
905 * On disk extended attribute structure for OCFS2. 978 * On disk extended attribute structure for OCFS2.
906 */ 979 */
@@ -1312,6 +1385,32 @@ static inline u16 ocfs2_xattr_recs_per_xb(struct super_block *sb)
1312 1385
1313 return size / sizeof(struct ocfs2_extent_rec); 1386 return size / sizeof(struct ocfs2_extent_rec);
1314} 1387}
1388
1389static inline u16 ocfs2_extent_recs_per_rb(struct super_block *sb)
1390{
1391 int size;
1392
1393 size = sb->s_blocksize -
1394 offsetof(struct ocfs2_refcount_block, rf_list.l_recs);
1395
1396 return size / sizeof(struct ocfs2_extent_rec);
1397}
1398
1399static inline u16 ocfs2_refcount_recs_per_rb(struct super_block *sb)
1400{
1401 int size;
1402
1403 size = sb->s_blocksize -
1404 offsetof(struct ocfs2_refcount_block, rf_records.rl_recs);
1405
1406 return size / sizeof(struct ocfs2_refcount_rec);
1407}
1408
1409static inline u32
1410ocfs2_get_ref_rec_low_cpos(const struct ocfs2_refcount_rec *rec)
1411{
1412 return le64_to_cpu(rec->r_cpos) & OCFS2_32BIT_POS_MASK;
1413}
1315#else 1414#else
1316static inline int ocfs2_fast_symlink_chars(int blocksize) 1415static inline int ocfs2_fast_symlink_chars(int blocksize)
1317{ 1416{
diff --git a/fs/ocfs2/ocfs2_lockid.h b/fs/ocfs2/ocfs2_lockid.h
index c212cf5a2bdf..d277aabf5dfb 100644
--- a/fs/ocfs2/ocfs2_lockid.h
+++ b/fs/ocfs2/ocfs2_lockid.h
@@ -49,6 +49,7 @@ enum ocfs2_lock_type {
49 OCFS2_LOCK_TYPE_QINFO, 49 OCFS2_LOCK_TYPE_QINFO,
50 OCFS2_LOCK_TYPE_NFS_SYNC, 50 OCFS2_LOCK_TYPE_NFS_SYNC,
51 OCFS2_LOCK_TYPE_ORPHAN_SCAN, 51 OCFS2_LOCK_TYPE_ORPHAN_SCAN,
52 OCFS2_LOCK_TYPE_REFCOUNT,
52 OCFS2_NUM_LOCK_TYPES 53 OCFS2_NUM_LOCK_TYPES
53}; 54};
54 55
@@ -89,6 +90,9 @@ static inline char ocfs2_lock_type_char(enum ocfs2_lock_type type)
89 case OCFS2_LOCK_TYPE_ORPHAN_SCAN: 90 case OCFS2_LOCK_TYPE_ORPHAN_SCAN:
90 c = 'P'; 91 c = 'P';
91 break; 92 break;
93 case OCFS2_LOCK_TYPE_REFCOUNT:
94 c = 'T';
95 break;
92 default: 96 default:
93 c = '\0'; 97 c = '\0';
94 } 98 }
@@ -110,6 +114,7 @@ static char *ocfs2_lock_type_strings[] = {
110 [OCFS2_LOCK_TYPE_QINFO] = "Quota", 114 [OCFS2_LOCK_TYPE_QINFO] = "Quota",
111 [OCFS2_LOCK_TYPE_NFS_SYNC] = "NFSSync", 115 [OCFS2_LOCK_TYPE_NFS_SYNC] = "NFSSync",
112 [OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan", 116 [OCFS2_LOCK_TYPE_ORPHAN_SCAN] = "OrphanScan",
117 [OCFS2_LOCK_TYPE_REFCOUNT] = "Refcount",
113}; 118};
114 119
115static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type) 120static inline const char *ocfs2_lock_type_string(enum ocfs2_lock_type type)
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 3cf0ec0acdd5..b437dc0c4cad 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -253,8 +253,9 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
253 flush_dcache_page(bh->b_page); 253 flush_dcache_page(bh->b_page);
254 set_buffer_uptodate(bh); 254 set_buffer_uptodate(bh);
255 unlock_buffer(bh); 255 unlock_buffer(bh);
256 ocfs2_set_buffer_uptodate(gqinode, bh); 256 ocfs2_set_buffer_uptodate(INODE_CACHE(gqinode), bh);
257 err = ocfs2_journal_access_dq(handle, gqinode, bh, ja_type); 257 err = ocfs2_journal_access_dq(handle, INODE_CACHE(gqinode), bh,
258 ja_type);
258 if (err < 0) { 259 if (err < 0) {
259 brelse(bh); 260 brelse(bh);
260 goto out; 261 goto out;
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index bdb09cb6e1fe..1a2c50a759fa 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -108,7 +108,7 @@ static int ocfs2_modify_bh(struct inode *inode, struct buffer_head *bh,
108 mlog_errno(status); 108 mlog_errno(status);
109 return status; 109 return status;
110 } 110 }
111 status = ocfs2_journal_access_dq(handle, inode, bh, 111 status = ocfs2_journal_access_dq(handle, INODE_CACHE(inode), bh,
112 OCFS2_JOURNAL_ACCESS_WRITE); 112 OCFS2_JOURNAL_ACCESS_WRITE);
113 if (status < 0) { 113 if (status < 0) {
114 mlog_errno(status); 114 mlog_errno(status);
@@ -510,7 +510,8 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode,
510 goto out_commit; 510 goto out_commit;
511 } 511 }
512 /* Release local quota file entry */ 512 /* Release local quota file entry */
513 status = ocfs2_journal_access_dq(handle, lqinode, 513 status = ocfs2_journal_access_dq(handle,
514 INODE_CACHE(lqinode),
514 qbh, OCFS2_JOURNAL_ACCESS_WRITE); 515 qbh, OCFS2_JOURNAL_ACCESS_WRITE);
515 if (status < 0) { 516 if (status < 0) {
516 mlog_errno(status); 517 mlog_errno(status);
@@ -619,7 +620,8 @@ int ocfs2_finish_quota_recovery(struct ocfs2_super *osb,
619 mlog_errno(status); 620 mlog_errno(status);
620 goto out_bh; 621 goto out_bh;
621 } 622 }
622 status = ocfs2_journal_access_dq(handle, lqinode, bh, 623 status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode),
624 bh,
623 OCFS2_JOURNAL_ACCESS_WRITE); 625 OCFS2_JOURNAL_ACCESS_WRITE);
624 if (status < 0) { 626 if (status < 0) {
625 mlog_errno(status); 627 mlog_errno(status);
@@ -993,8 +995,8 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
993 goto out_trans; 995 goto out_trans;
994 } 996 }
995 dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data; 997 dchunk = (struct ocfs2_local_disk_chunk *)bh->b_data;
996 ocfs2_set_new_buffer_uptodate(lqinode, bh); 998 ocfs2_set_new_buffer_uptodate(INODE_CACHE(lqinode), bh);
997 status = ocfs2_journal_access_dq(handle, lqinode, bh, 999 status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode), bh,
998 OCFS2_JOURNAL_ACCESS_CREATE); 1000 OCFS2_JOURNAL_ACCESS_CREATE);
999 if (status < 0) { 1001 if (status < 0) {
1000 mlog_errno(status); 1002 mlog_errno(status);
@@ -1027,8 +1029,8 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
1027 mlog_errno(status); 1029 mlog_errno(status);
1028 goto out_trans; 1030 goto out_trans;
1029 } 1031 }
1030 ocfs2_set_new_buffer_uptodate(lqinode, dbh); 1032 ocfs2_set_new_buffer_uptodate(INODE_CACHE(lqinode), dbh);
1031 status = ocfs2_journal_access_dq(handle, lqinode, dbh, 1033 status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode), dbh,
1032 OCFS2_JOURNAL_ACCESS_CREATE); 1034 OCFS2_JOURNAL_ACCESS_CREATE);
1033 if (status < 0) { 1035 if (status < 0) {
1034 mlog_errno(status); 1036 mlog_errno(status);
@@ -1131,7 +1133,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1131 mlog_errno(status); 1133 mlog_errno(status);
1132 goto out; 1134 goto out;
1133 } 1135 }
1134 ocfs2_set_new_buffer_uptodate(lqinode, bh); 1136 ocfs2_set_new_buffer_uptodate(INODE_CACHE(lqinode), bh);
1135 1137
1136 /* Local quota info, chunk header and the new block we initialize */ 1138 /* Local quota info, chunk header and the new block we initialize */
1137 handle = ocfs2_start_trans(OCFS2_SB(sb), 1139 handle = ocfs2_start_trans(OCFS2_SB(sb),
@@ -1143,7 +1145,7 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1143 goto out; 1145 goto out;
1144 } 1146 }
1145 /* Zero created block */ 1147 /* Zero created block */
1146 status = ocfs2_journal_access_dq(handle, lqinode, bh, 1148 status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode), bh,
1147 OCFS2_JOURNAL_ACCESS_CREATE); 1149 OCFS2_JOURNAL_ACCESS_CREATE);
1148 if (status < 0) { 1150 if (status < 0) {
1149 mlog_errno(status); 1151 mlog_errno(status);
@@ -1158,7 +1160,8 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
1158 goto out_trans; 1160 goto out_trans;
1159 } 1161 }
1160 /* Update chunk header */ 1162 /* Update chunk header */
1161 status = ocfs2_journal_access_dq(handle, lqinode, chunk->qc_headerbh, 1163 status = ocfs2_journal_access_dq(handle, INODE_CACHE(lqinode),
1164 chunk->qc_headerbh,
1162 OCFS2_JOURNAL_ACCESS_WRITE); 1165 OCFS2_JOURNAL_ACCESS_WRITE);
1163 if (status < 0) { 1166 if (status < 0) {
1164 mlog_errno(status); 1167 mlog_errno(status);
@@ -1292,7 +1295,8 @@ static int ocfs2_local_release_dquot(struct dquot *dquot)
1292 goto out; 1295 goto out;
1293 } 1296 }
1294 1297
1295 status = ocfs2_journal_access_dq(handle, sb_dqopt(sb)->files[type], 1298 status = ocfs2_journal_access_dq(handle,
1299 INODE_CACHE(sb_dqopt(sb)->files[type]),
1296 od->dq_chunk->qc_headerbh, OCFS2_JOURNAL_ACCESS_WRITE); 1300 od->dq_chunk->qc_headerbh, OCFS2_JOURNAL_ACCESS_WRITE);
1297 if (status < 0) { 1301 if (status < 0) {
1298 mlog_errno(status); 1302 mlog_errno(status);
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
new file mode 100644
index 000000000000..60287fc56bcb
--- /dev/null
+++ b/fs/ocfs2/refcounttree.c
@@ -0,0 +1,4313 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * refcounttree.c
5 *
6 * Copyright (C) 2009 Oracle. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public
10 * License version 2 as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 */
17
18#include <linux/sort.h>
19#define MLOG_MASK_PREFIX ML_REFCOUNT
20#include <cluster/masklog.h>
21#include "ocfs2.h"
22#include "inode.h"
23#include "alloc.h"
24#include "suballoc.h"
25#include "journal.h"
26#include "uptodate.h"
27#include "super.h"
28#include "buffer_head_io.h"
29#include "blockcheck.h"
30#include "refcounttree.h"
31#include "sysfile.h"
32#include "dlmglue.h"
33#include "extent_map.h"
34#include "aops.h"
35#include "xattr.h"
36#include "namei.h"
37
38#include <linux/bio.h>
39#include <linux/blkdev.h>
40#include <linux/gfp.h>
41#include <linux/slab.h>
42#include <linux/writeback.h>
43#include <linux/pagevec.h>
44#include <linux/swap.h>
45#include <linux/security.h>
46#include <linux/fsnotify.h>
47#include <linux/quotaops.h>
48#include <linux/namei.h>
49#include <linux/mount.h>
50
51struct ocfs2_cow_context {
52 struct inode *inode;
53 u32 cow_start;
54 u32 cow_len;
55 struct ocfs2_extent_tree data_et;
56 struct ocfs2_refcount_tree *ref_tree;
57 struct buffer_head *ref_root_bh;
58 struct ocfs2_alloc_context *meta_ac;
59 struct ocfs2_alloc_context *data_ac;
60 struct ocfs2_cached_dealloc_ctxt dealloc;
61 void *cow_object;
62 struct ocfs2_post_refcount *post_refcount;
63 int extra_credits;
64 int (*get_clusters)(struct ocfs2_cow_context *context,
65 u32 v_cluster, u32 *p_cluster,
66 u32 *num_clusters,
67 unsigned int *extent_flags);
68 int (*cow_duplicate_clusters)(handle_t *handle,
69 struct ocfs2_cow_context *context,
70 u32 cpos, u32 old_cluster,
71 u32 new_cluster, u32 new_len);
72};
73
74static inline struct ocfs2_refcount_tree *
75cache_info_to_refcount(struct ocfs2_caching_info *ci)
76{
77 return container_of(ci, struct ocfs2_refcount_tree, rf_ci);
78}
79
80static int ocfs2_validate_refcount_block(struct super_block *sb,
81 struct buffer_head *bh)
82{
83 int rc;
84 struct ocfs2_refcount_block *rb =
85 (struct ocfs2_refcount_block *)bh->b_data;
86
87 mlog(0, "Validating refcount block %llu\n",
88 (unsigned long long)bh->b_blocknr);
89
90 BUG_ON(!buffer_uptodate(bh));
91
92 /*
93 * If the ecc fails, we return the error but otherwise
94 * leave the filesystem running. We know any error is
95 * local to this block.
96 */
97 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &rb->rf_check);
98 if (rc) {
99 mlog(ML_ERROR, "Checksum failed for refcount block %llu\n",
100 (unsigned long long)bh->b_blocknr);
101 return rc;
102 }
103
104
105 if (!OCFS2_IS_VALID_REFCOUNT_BLOCK(rb)) {
106 ocfs2_error(sb,
107 "Refcount block #%llu has bad signature %.*s",
108 (unsigned long long)bh->b_blocknr, 7,
109 rb->rf_signature);
110 return -EINVAL;
111 }
112
113 if (le64_to_cpu(rb->rf_blkno) != bh->b_blocknr) {
114 ocfs2_error(sb,
115 "Refcount block #%llu has an invalid rf_blkno "
116 "of %llu",
117 (unsigned long long)bh->b_blocknr,
118 (unsigned long long)le64_to_cpu(rb->rf_blkno));
119 return -EINVAL;
120 }
121
122 if (le32_to_cpu(rb->rf_fs_generation) != OCFS2_SB(sb)->fs_generation) {
123 ocfs2_error(sb,
124 "Refcount block #%llu has an invalid "
125 "rf_fs_generation of #%u",
126 (unsigned long long)bh->b_blocknr,
127 le32_to_cpu(rb->rf_fs_generation));
128 return -EINVAL;
129 }
130
131 return 0;
132}
133
134static int ocfs2_read_refcount_block(struct ocfs2_caching_info *ci,
135 u64 rb_blkno,
136 struct buffer_head **bh)
137{
138 int rc;
139 struct buffer_head *tmp = *bh;
140
141 rc = ocfs2_read_block(ci, rb_blkno, &tmp,
142 ocfs2_validate_refcount_block);
143
144 /* If ocfs2_read_block() got us a new bh, pass it up. */
145 if (!rc && !*bh)
146 *bh = tmp;
147
148 return rc;
149}
150
151static u64 ocfs2_refcount_cache_owner(struct ocfs2_caching_info *ci)
152{
153 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
154
155 return rf->rf_blkno;
156}
157
158static struct super_block *
159ocfs2_refcount_cache_get_super(struct ocfs2_caching_info *ci)
160{
161 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
162
163 return rf->rf_sb;
164}
165
166static void ocfs2_refcount_cache_lock(struct ocfs2_caching_info *ci)
167{
168 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
169
170 spin_lock(&rf->rf_lock);
171}
172
173static void ocfs2_refcount_cache_unlock(struct ocfs2_caching_info *ci)
174{
175 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
176
177 spin_unlock(&rf->rf_lock);
178}
179
180static void ocfs2_refcount_cache_io_lock(struct ocfs2_caching_info *ci)
181{
182 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
183
184 mutex_lock(&rf->rf_io_mutex);
185}
186
187static void ocfs2_refcount_cache_io_unlock(struct ocfs2_caching_info *ci)
188{
189 struct ocfs2_refcount_tree *rf = cache_info_to_refcount(ci);
190
191 mutex_unlock(&rf->rf_io_mutex);
192}
193
194static const struct ocfs2_caching_operations ocfs2_refcount_caching_ops = {
195 .co_owner = ocfs2_refcount_cache_owner,
196 .co_get_super = ocfs2_refcount_cache_get_super,
197 .co_cache_lock = ocfs2_refcount_cache_lock,
198 .co_cache_unlock = ocfs2_refcount_cache_unlock,
199 .co_io_lock = ocfs2_refcount_cache_io_lock,
200 .co_io_unlock = ocfs2_refcount_cache_io_unlock,
201};
202
203static struct ocfs2_refcount_tree *
204ocfs2_find_refcount_tree(struct ocfs2_super *osb, u64 blkno)
205{
206 struct rb_node *n = osb->osb_rf_lock_tree.rb_node;
207 struct ocfs2_refcount_tree *tree = NULL;
208
209 while (n) {
210 tree = rb_entry(n, struct ocfs2_refcount_tree, rf_node);
211
212 if (blkno < tree->rf_blkno)
213 n = n->rb_left;
214 else if (blkno > tree->rf_blkno)
215 n = n->rb_right;
216 else
217 return tree;
218 }
219
220 return NULL;
221}
222
223/* osb_lock is already locked. */
224static void ocfs2_insert_refcount_tree(struct ocfs2_super *osb,
225 struct ocfs2_refcount_tree *new)
226{
227 u64 rf_blkno = new->rf_blkno;
228 struct rb_node *parent = NULL;
229 struct rb_node **p = &osb->osb_rf_lock_tree.rb_node;
230 struct ocfs2_refcount_tree *tmp;
231
232 while (*p) {
233 parent = *p;
234
235 tmp = rb_entry(parent, struct ocfs2_refcount_tree,
236 rf_node);
237
238 if (rf_blkno < tmp->rf_blkno)
239 p = &(*p)->rb_left;
240 else if (rf_blkno > tmp->rf_blkno)
241 p = &(*p)->rb_right;
242 else {
243 /* This should never happen! */
244 mlog(ML_ERROR, "Duplicate refcount block %llu found!\n",
245 (unsigned long long)rf_blkno);
246 BUG();
247 }
248 }
249
250 rb_link_node(&new->rf_node, parent, p);
251 rb_insert_color(&new->rf_node, &osb->osb_rf_lock_tree);
252}
253
254static void ocfs2_free_refcount_tree(struct ocfs2_refcount_tree *tree)
255{
256 ocfs2_metadata_cache_exit(&tree->rf_ci);
257 ocfs2_simple_drop_lockres(OCFS2_SB(tree->rf_sb), &tree->rf_lockres);
258 ocfs2_lock_res_free(&tree->rf_lockres);
259 kfree(tree);
260}
261
262static inline void
263ocfs2_erase_refcount_tree_from_list_no_lock(struct ocfs2_super *osb,
264 struct ocfs2_refcount_tree *tree)
265{
266 rb_erase(&tree->rf_node, &osb->osb_rf_lock_tree);
267 if (osb->osb_ref_tree_lru && osb->osb_ref_tree_lru == tree)
268 osb->osb_ref_tree_lru = NULL;
269}
270
271static void ocfs2_erase_refcount_tree_from_list(struct ocfs2_super *osb,
272 struct ocfs2_refcount_tree *tree)
273{
274 spin_lock(&osb->osb_lock);
275 ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree);
276 spin_unlock(&osb->osb_lock);
277}
278
279void ocfs2_kref_remove_refcount_tree(struct kref *kref)
280{
281 struct ocfs2_refcount_tree *tree =
282 container_of(kref, struct ocfs2_refcount_tree, rf_getcnt);
283
284 ocfs2_free_refcount_tree(tree);
285}
286
287static inline void
288ocfs2_refcount_tree_get(struct ocfs2_refcount_tree *tree)
289{
290 kref_get(&tree->rf_getcnt);
291}
292
293static inline void
294ocfs2_refcount_tree_put(struct ocfs2_refcount_tree *tree)
295{
296 kref_put(&tree->rf_getcnt, ocfs2_kref_remove_refcount_tree);
297}
298
299static inline void ocfs2_init_refcount_tree_ci(struct ocfs2_refcount_tree *new,
300 struct super_block *sb)
301{
302 ocfs2_metadata_cache_init(&new->rf_ci, &ocfs2_refcount_caching_ops);
303 mutex_init(&new->rf_io_mutex);
304 new->rf_sb = sb;
305 spin_lock_init(&new->rf_lock);
306}
307
308static inline void ocfs2_init_refcount_tree_lock(struct ocfs2_super *osb,
309 struct ocfs2_refcount_tree *new,
310 u64 rf_blkno, u32 generation)
311{
312 init_rwsem(&new->rf_sem);
313 ocfs2_refcount_lock_res_init(&new->rf_lockres, osb,
314 rf_blkno, generation);
315}
316
317static struct ocfs2_refcount_tree*
318ocfs2_allocate_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno)
319{
320 struct ocfs2_refcount_tree *new;
321
322 new = kzalloc(sizeof(struct ocfs2_refcount_tree), GFP_NOFS);
323 if (!new)
324 return NULL;
325
326 new->rf_blkno = rf_blkno;
327 kref_init(&new->rf_getcnt);
328 ocfs2_init_refcount_tree_ci(new, osb->sb);
329
330 return new;
331}
332
333static int ocfs2_get_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno,
334 struct ocfs2_refcount_tree **ret_tree)
335{
336 int ret = 0;
337 struct ocfs2_refcount_tree *tree, *new = NULL;
338 struct buffer_head *ref_root_bh = NULL;
339 struct ocfs2_refcount_block *ref_rb;
340
341 spin_lock(&osb->osb_lock);
342 if (osb->osb_ref_tree_lru &&
343 osb->osb_ref_tree_lru->rf_blkno == rf_blkno)
344 tree = osb->osb_ref_tree_lru;
345 else
346 tree = ocfs2_find_refcount_tree(osb, rf_blkno);
347 if (tree)
348 goto out;
349
350 spin_unlock(&osb->osb_lock);
351
352 new = ocfs2_allocate_refcount_tree(osb, rf_blkno);
353 if (!new) {
354 ret = -ENOMEM;
355 mlog_errno(ret);
356 return ret;
357 }
358 /*
359 * We need the generation to create the refcount tree lock and since
360 * it isn't changed during the tree modification, we are safe here to
361 * read without protection.
362 * We also have to purge the cache after we create the lock since the
363 * refcount block may have the stale data. It can only be trusted when
364 * we hold the refcount lock.
365 */
366 ret = ocfs2_read_refcount_block(&new->rf_ci, rf_blkno, &ref_root_bh);
367 if (ret) {
368 mlog_errno(ret);
369 ocfs2_metadata_cache_exit(&new->rf_ci);
370 kfree(new);
371 return ret;
372 }
373
374 ref_rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
375 new->rf_generation = le32_to_cpu(ref_rb->rf_generation);
376 ocfs2_init_refcount_tree_lock(osb, new, rf_blkno,
377 new->rf_generation);
378 ocfs2_metadata_cache_purge(&new->rf_ci);
379
380 spin_lock(&osb->osb_lock);
381 tree = ocfs2_find_refcount_tree(osb, rf_blkno);
382 if (tree)
383 goto out;
384
385 ocfs2_insert_refcount_tree(osb, new);
386
387 tree = new;
388 new = NULL;
389
390out:
391 *ret_tree = tree;
392
393 osb->osb_ref_tree_lru = tree;
394
395 spin_unlock(&osb->osb_lock);
396
397 if (new)
398 ocfs2_free_refcount_tree(new);
399
400 brelse(ref_root_bh);
401 return ret;
402}
403
404static int ocfs2_get_refcount_block(struct inode *inode, u64 *ref_blkno)
405{
406 int ret;
407 struct buffer_head *di_bh = NULL;
408 struct ocfs2_dinode *di;
409
410 ret = ocfs2_read_inode_block(inode, &di_bh);
411 if (ret) {
412 mlog_errno(ret);
413 goto out;
414 }
415
416 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
417
418 di = (struct ocfs2_dinode *)di_bh->b_data;
419 *ref_blkno = le64_to_cpu(di->i_refcount_loc);
420 brelse(di_bh);
421out:
422 return ret;
423}
424
425static int __ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
426 struct ocfs2_refcount_tree *tree, int rw)
427{
428 int ret;
429
430 ret = ocfs2_refcount_lock(tree, rw);
431 if (ret) {
432 mlog_errno(ret);
433 goto out;
434 }
435
436 if (rw)
437 down_write(&tree->rf_sem);
438 else
439 down_read(&tree->rf_sem);
440
441out:
442 return ret;
443}
444
445/*
446 * Lock the refcount tree pointed by ref_blkno and return the tree.
447 * In most case, we lock the tree and read the refcount block.
448 * So read it here if the caller really needs it.
449 *
450 * If the tree has been re-created by other node, it will free the
451 * old one and re-create it.
452 */
453int ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
454 u64 ref_blkno, int rw,
455 struct ocfs2_refcount_tree **ret_tree,
456 struct buffer_head **ref_bh)
457{
458 int ret, delete_tree = 0;
459 struct ocfs2_refcount_tree *tree = NULL;
460 struct buffer_head *ref_root_bh = NULL;
461 struct ocfs2_refcount_block *rb;
462
463again:
464 ret = ocfs2_get_refcount_tree(osb, ref_blkno, &tree);
465 if (ret) {
466 mlog_errno(ret);
467 return ret;
468 }
469
470 ocfs2_refcount_tree_get(tree);
471
472 ret = __ocfs2_lock_refcount_tree(osb, tree, rw);
473 if (ret) {
474 mlog_errno(ret);
475 ocfs2_refcount_tree_put(tree);
476 goto out;
477 }
478
479 ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno,
480 &ref_root_bh);
481 if (ret) {
482 mlog_errno(ret);
483 ocfs2_unlock_refcount_tree(osb, tree, rw);
484 ocfs2_refcount_tree_put(tree);
485 goto out;
486 }
487
488 rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
489 /*
490 * If the refcount block has been freed and re-created, we may need
491 * to recreate the refcount tree also.
492 *
493 * Here we just remove the tree from the rb-tree, and the last
494 * kref holder will unlock and delete this refcount_tree.
495 * Then we goto "again" and ocfs2_get_refcount_tree will create
496 * the new refcount tree for us.
497 */
498 if (tree->rf_generation != le32_to_cpu(rb->rf_generation)) {
499 if (!tree->rf_removed) {
500 ocfs2_erase_refcount_tree_from_list(osb, tree);
501 tree->rf_removed = 1;
502 delete_tree = 1;
503 }
504
505 ocfs2_unlock_refcount_tree(osb, tree, rw);
506 /*
507 * We get an extra reference when we create the refcount
508 * tree, so another put will destroy it.
509 */
510 if (delete_tree)
511 ocfs2_refcount_tree_put(tree);
512 brelse(ref_root_bh);
513 ref_root_bh = NULL;
514 goto again;
515 }
516
517 *ret_tree = tree;
518 if (ref_bh) {
519 *ref_bh = ref_root_bh;
520 ref_root_bh = NULL;
521 }
522out:
523 brelse(ref_root_bh);
524 return ret;
525}
526
527int ocfs2_lock_refcount_tree_by_inode(struct inode *inode, int rw,
528 struct ocfs2_refcount_tree **ret_tree,
529 struct buffer_head **ref_bh)
530{
531 int ret;
532 u64 ref_blkno;
533
534 ret = ocfs2_get_refcount_block(inode, &ref_blkno);
535 if (ret) {
536 mlog_errno(ret);
537 return ret;
538 }
539
540 return ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno,
541 rw, ret_tree, ref_bh);
542}
543
544void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb,
545 struct ocfs2_refcount_tree *tree, int rw)
546{
547 if (rw)
548 up_write(&tree->rf_sem);
549 else
550 up_read(&tree->rf_sem);
551
552 ocfs2_refcount_unlock(tree, rw);
553 ocfs2_refcount_tree_put(tree);
554}
555
556void ocfs2_purge_refcount_trees(struct ocfs2_super *osb)
557{
558 struct rb_node *node;
559 struct ocfs2_refcount_tree *tree;
560 struct rb_root *root = &osb->osb_rf_lock_tree;
561
562 while ((node = rb_last(root)) != NULL) {
563 tree = rb_entry(node, struct ocfs2_refcount_tree, rf_node);
564
565 mlog(0, "Purge tree %llu\n",
566 (unsigned long long) tree->rf_blkno);
567
568 rb_erase(&tree->rf_node, root);
569 ocfs2_free_refcount_tree(tree);
570 }
571}
572
573/*
574 * Create a refcount tree for an inode.
575 * We take for granted that the inode is already locked.
576 */
577static int ocfs2_create_refcount_tree(struct inode *inode,
578 struct buffer_head *di_bh)
579{
580 int ret;
581 handle_t *handle = NULL;
582 struct ocfs2_alloc_context *meta_ac = NULL;
583 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
584 struct ocfs2_inode_info *oi = OCFS2_I(inode);
585 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
586 struct buffer_head *new_bh = NULL;
587 struct ocfs2_refcount_block *rb;
588 struct ocfs2_refcount_tree *new_tree = NULL, *tree = NULL;
589 u16 suballoc_bit_start;
590 u32 num_got;
591 u64 first_blkno;
592
593 BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
594
595 mlog(0, "create tree for inode %lu\n", inode->i_ino);
596
597 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
598 if (ret) {
599 mlog_errno(ret);
600 goto out;
601 }
602
603 handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_CREATE_CREDITS);
604 if (IS_ERR(handle)) {
605 ret = PTR_ERR(handle);
606 mlog_errno(ret);
607 goto out;
608 }
609
610 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
611 OCFS2_JOURNAL_ACCESS_WRITE);
612 if (ret) {
613 mlog_errno(ret);
614 goto out_commit;
615 }
616
617 ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
618 &suballoc_bit_start, &num_got,
619 &first_blkno);
620 if (ret) {
621 mlog_errno(ret);
622 goto out_commit;
623 }
624
625 new_tree = ocfs2_allocate_refcount_tree(osb, first_blkno);
626 if (!new_tree) {
627 ret = -ENOMEM;
628 mlog_errno(ret);
629 goto out_commit;
630 }
631
632 new_bh = sb_getblk(inode->i_sb, first_blkno);
633 ocfs2_set_new_buffer_uptodate(&new_tree->rf_ci, new_bh);
634
635 ret = ocfs2_journal_access_rb(handle, &new_tree->rf_ci, new_bh,
636 OCFS2_JOURNAL_ACCESS_CREATE);
637 if (ret) {
638 mlog_errno(ret);
639 goto out_commit;
640 }
641
642 /* Initialize ocfs2_refcount_block. */
643 rb = (struct ocfs2_refcount_block *)new_bh->b_data;
644 memset(rb, 0, inode->i_sb->s_blocksize);
645 strcpy((void *)rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
646 rb->rf_suballoc_slot = cpu_to_le16(osb->slot_num);
647 rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
648 rb->rf_fs_generation = cpu_to_le32(osb->fs_generation);
649 rb->rf_blkno = cpu_to_le64(first_blkno);
650 rb->rf_count = cpu_to_le32(1);
651 rb->rf_records.rl_count =
652 cpu_to_le16(ocfs2_refcount_recs_per_rb(osb->sb));
653 spin_lock(&osb->osb_lock);
654 rb->rf_generation = osb->s_next_generation++;
655 spin_unlock(&osb->osb_lock);
656
657 ocfs2_journal_dirty(handle, new_bh);
658
659 spin_lock(&oi->ip_lock);
660 oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL;
661 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
662 di->i_refcount_loc = cpu_to_le64(first_blkno);
663 spin_unlock(&oi->ip_lock);
664
665 mlog(0, "created tree for inode %lu, refblock %llu\n",
666 inode->i_ino, (unsigned long long)first_blkno);
667
668 ocfs2_journal_dirty(handle, di_bh);
669
670 /*
671 * We have to init the tree lock here since it will use
672 * the generation number to create it.
673 */
674 new_tree->rf_generation = le32_to_cpu(rb->rf_generation);
675 ocfs2_init_refcount_tree_lock(osb, new_tree, first_blkno,
676 new_tree->rf_generation);
677
678 spin_lock(&osb->osb_lock);
679 tree = ocfs2_find_refcount_tree(osb, first_blkno);
680
681 /*
682 * We've just created a new refcount tree in this block. If
683 * we found a refcount tree on the ocfs2_super, it must be
684 * one we just deleted. We free the old tree before
685 * inserting the new tree.
686 */
687 BUG_ON(tree && tree->rf_generation == new_tree->rf_generation);
688 if (tree)
689 ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree);
690 ocfs2_insert_refcount_tree(osb, new_tree);
691 spin_unlock(&osb->osb_lock);
692 new_tree = NULL;
693 if (tree)
694 ocfs2_refcount_tree_put(tree);
695
696out_commit:
697 ocfs2_commit_trans(osb, handle);
698
699out:
700 if (new_tree) {
701 ocfs2_metadata_cache_exit(&new_tree->rf_ci);
702 kfree(new_tree);
703 }
704
705 brelse(new_bh);
706 if (meta_ac)
707 ocfs2_free_alloc_context(meta_ac);
708
709 return ret;
710}
711
712static int ocfs2_set_refcount_tree(struct inode *inode,
713 struct buffer_head *di_bh,
714 u64 refcount_loc)
715{
716 int ret;
717 handle_t *handle = NULL;
718 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
719 struct ocfs2_inode_info *oi = OCFS2_I(inode);
720 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
721 struct buffer_head *ref_root_bh = NULL;
722 struct ocfs2_refcount_block *rb;
723 struct ocfs2_refcount_tree *ref_tree;
724
725 BUG_ON(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL);
726
727 ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
728 &ref_tree, &ref_root_bh);
729 if (ret) {
730 mlog_errno(ret);
731 return ret;
732 }
733
734 handle = ocfs2_start_trans(osb, OCFS2_REFCOUNT_TREE_SET_CREDITS);
735 if (IS_ERR(handle)) {
736 ret = PTR_ERR(handle);
737 mlog_errno(ret);
738 goto out;
739 }
740
741 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
742 OCFS2_JOURNAL_ACCESS_WRITE);
743 if (ret) {
744 mlog_errno(ret);
745 goto out_commit;
746 }
747
748 ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, ref_root_bh,
749 OCFS2_JOURNAL_ACCESS_WRITE);
750 if (ret) {
751 mlog_errno(ret);
752 goto out_commit;
753 }
754
755 rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
756 le32_add_cpu(&rb->rf_count, 1);
757
758 ocfs2_journal_dirty(handle, ref_root_bh);
759
760 spin_lock(&oi->ip_lock);
761 oi->ip_dyn_features |= OCFS2_HAS_REFCOUNT_FL;
762 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
763 di->i_refcount_loc = cpu_to_le64(refcount_loc);
764 spin_unlock(&oi->ip_lock);
765 ocfs2_journal_dirty(handle, di_bh);
766
767out_commit:
768 ocfs2_commit_trans(osb, handle);
769out:
770 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
771 brelse(ref_root_bh);
772
773 return ret;
774}
775
776int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh)
777{
778 int ret, delete_tree = 0;
779 handle_t *handle = NULL;
780 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
781 struct ocfs2_inode_info *oi = OCFS2_I(inode);
782 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
783 struct ocfs2_refcount_block *rb;
784 struct inode *alloc_inode = NULL;
785 struct buffer_head *alloc_bh = NULL;
786 struct buffer_head *blk_bh = NULL;
787 struct ocfs2_refcount_tree *ref_tree;
788 int credits = OCFS2_REFCOUNT_TREE_REMOVE_CREDITS;
789 u64 blk = 0, bg_blkno = 0, ref_blkno = le64_to_cpu(di->i_refcount_loc);
790 u16 bit = 0;
791
792 if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL))
793 return 0;
794
795 BUG_ON(!ref_blkno);
796 ret = ocfs2_lock_refcount_tree(osb, ref_blkno, 1, &ref_tree, &blk_bh);
797 if (ret) {
798 mlog_errno(ret);
799 return ret;
800 }
801
802 rb = (struct ocfs2_refcount_block *)blk_bh->b_data;
803
804 /*
805 * If we are the last user, we need to free the block.
806 * So lock the allocator ahead.
807 */
808 if (le32_to_cpu(rb->rf_count) == 1) {
809 blk = le64_to_cpu(rb->rf_blkno);
810 bit = le16_to_cpu(rb->rf_suballoc_bit);
811 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
812
813 alloc_inode = ocfs2_get_system_file_inode(osb,
814 EXTENT_ALLOC_SYSTEM_INODE,
815 le16_to_cpu(rb->rf_suballoc_slot));
816 if (!alloc_inode) {
817 ret = -ENOMEM;
818 mlog_errno(ret);
819 goto out;
820 }
821 mutex_lock(&alloc_inode->i_mutex);
822
823 ret = ocfs2_inode_lock(alloc_inode, &alloc_bh, 1);
824 if (ret) {
825 mlog_errno(ret);
826 goto out_mutex;
827 }
828
829 credits += OCFS2_SUBALLOC_FREE;
830 }
831
832 handle = ocfs2_start_trans(osb, credits);
833 if (IS_ERR(handle)) {
834 ret = PTR_ERR(handle);
835 mlog_errno(ret);
836 goto out_unlock;
837 }
838
839 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
840 OCFS2_JOURNAL_ACCESS_WRITE);
841 if (ret) {
842 mlog_errno(ret);
843 goto out_commit;
844 }
845
846 ret = ocfs2_journal_access_rb(handle, &ref_tree->rf_ci, blk_bh,
847 OCFS2_JOURNAL_ACCESS_WRITE);
848 if (ret) {
849 mlog_errno(ret);
850 goto out_commit;
851 }
852
853 spin_lock(&oi->ip_lock);
854 oi->ip_dyn_features &= ~OCFS2_HAS_REFCOUNT_FL;
855 di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
856 di->i_refcount_loc = 0;
857 spin_unlock(&oi->ip_lock);
858 ocfs2_journal_dirty(handle, di_bh);
859
860 le32_add_cpu(&rb->rf_count , -1);
861 ocfs2_journal_dirty(handle, blk_bh);
862
863 if (!rb->rf_count) {
864 delete_tree = 1;
865 ocfs2_erase_refcount_tree_from_list(osb, ref_tree);
866 ret = ocfs2_free_suballoc_bits(handle, alloc_inode,
867 alloc_bh, bit, bg_blkno, 1);
868 if (ret)
869 mlog_errno(ret);
870 }
871
872out_commit:
873 ocfs2_commit_trans(osb, handle);
874out_unlock:
875 if (alloc_inode) {
876 ocfs2_inode_unlock(alloc_inode, 1);
877 brelse(alloc_bh);
878 }
879out_mutex:
880 if (alloc_inode) {
881 mutex_unlock(&alloc_inode->i_mutex);
882 iput(alloc_inode);
883 }
884out:
885 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
886 if (delete_tree)
887 ocfs2_refcount_tree_put(ref_tree);
888 brelse(blk_bh);
889
890 return ret;
891}
892
893static void ocfs2_find_refcount_rec_in_rl(struct ocfs2_caching_info *ci,
894 struct buffer_head *ref_leaf_bh,
895 u64 cpos, unsigned int len,
896 struct ocfs2_refcount_rec *ret_rec,
897 int *index)
898{
899 int i = 0;
900 struct ocfs2_refcount_block *rb =
901 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
902 struct ocfs2_refcount_rec *rec = NULL;
903
904 for (; i < le16_to_cpu(rb->rf_records.rl_used); i++) {
905 rec = &rb->rf_records.rl_recs[i];
906
907 if (le64_to_cpu(rec->r_cpos) +
908 le32_to_cpu(rec->r_clusters) <= cpos)
909 continue;
910 else if (le64_to_cpu(rec->r_cpos) > cpos)
911 break;
912
913 /* ok, cpos fail in this rec. Just return. */
914 if (ret_rec)
915 *ret_rec = *rec;
916 goto out;
917 }
918
919 if (ret_rec) {
920 /* We meet with a hole here, so fake the rec. */
921 ret_rec->r_cpos = cpu_to_le64(cpos);
922 ret_rec->r_refcount = 0;
923 if (i < le16_to_cpu(rb->rf_records.rl_used) &&
924 le64_to_cpu(rec->r_cpos) < cpos + len)
925 ret_rec->r_clusters =
926 cpu_to_le32(le64_to_cpu(rec->r_cpos) - cpos);
927 else
928 ret_rec->r_clusters = cpu_to_le32(len);
929 }
930
931out:
932 *index = i;
933}
934
935/*
936 * Try to remove refcount tree. The mechanism is:
937 * 1) Check whether i_clusters == 0, if no, exit.
938 * 2) check whether we have i_xattr_loc in dinode. if yes, exit.
939 * 3) Check whether we have inline xattr stored outside, if yes, exit.
940 * 4) Remove the tree.
941 */
942int ocfs2_try_remove_refcount_tree(struct inode *inode,
943 struct buffer_head *di_bh)
944{
945 int ret;
946 struct ocfs2_inode_info *oi = OCFS2_I(inode);
947 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
948
949 down_write(&oi->ip_xattr_sem);
950 down_write(&oi->ip_alloc_sem);
951
952 if (oi->ip_clusters)
953 goto out;
954
955 if ((oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) && di->i_xattr_loc)
956 goto out;
957
958 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL &&
959 ocfs2_has_inline_xattr_value_outside(inode, di))
960 goto out;
961
962 ret = ocfs2_remove_refcount_tree(inode, di_bh);
963 if (ret)
964 mlog_errno(ret);
965out:
966 up_write(&oi->ip_alloc_sem);
967 up_write(&oi->ip_xattr_sem);
968 return 0;
969}
970
971/*
972 * Given a cpos and len, try to find the refcount record which contains cpos.
973 * 1. If cpos can be found in one refcount record, return the record.
974 * 2. If cpos can't be found, return a fake record which start from cpos
975 * and end at a small value between cpos+len and start of the next record.
976 * This fake record has r_refcount = 0.
977 */
978static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci,
979 struct buffer_head *ref_root_bh,
980 u64 cpos, unsigned int len,
981 struct ocfs2_refcount_rec *ret_rec,
982 int *index,
983 struct buffer_head **ret_bh)
984{
985 int ret = 0, i, found;
986 u32 low_cpos;
987 struct ocfs2_extent_list *el;
988 struct ocfs2_extent_rec *tmp, *rec = NULL;
989 struct ocfs2_extent_block *eb;
990 struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL;
991 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
992 struct ocfs2_refcount_block *rb =
993 (struct ocfs2_refcount_block *)ref_root_bh->b_data;
994
995 if (!(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)) {
996 ocfs2_find_refcount_rec_in_rl(ci, ref_root_bh, cpos, len,
997 ret_rec, index);
998 *ret_bh = ref_root_bh;
999 get_bh(ref_root_bh);
1000 return 0;
1001 }
1002
1003 el = &rb->rf_list;
1004 low_cpos = cpos & OCFS2_32BIT_POS_MASK;
1005
1006 if (el->l_tree_depth) {
1007 ret = ocfs2_find_leaf(ci, el, low_cpos, &eb_bh);
1008 if (ret) {
1009 mlog_errno(ret);
1010 goto out;
1011 }
1012
1013 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
1014 el = &eb->h_list;
1015
1016 if (el->l_tree_depth) {
1017 ocfs2_error(sb,
1018 "refcount tree %llu has non zero tree "
1019 "depth in leaf btree tree block %llu\n",
1020 (unsigned long long)ocfs2_metadata_cache_owner(ci),
1021 (unsigned long long)eb_bh->b_blocknr);
1022 ret = -EROFS;
1023 goto out;
1024 }
1025 }
1026
1027 found = 0;
1028 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
1029 rec = &el->l_recs[i];
1030
1031 if (le32_to_cpu(rec->e_cpos) <= low_cpos) {
1032 found = 1;
1033 break;
1034 }
1035 }
1036
1037 /* adjust len when we have ocfs2_extent_rec after it. */
1038 if (found && i < le16_to_cpu(el->l_next_free_rec) - 1) {
1039 tmp = &el->l_recs[i+1];
1040
1041 if (le32_to_cpu(tmp->e_cpos) < cpos + len)
1042 len = le32_to_cpu(tmp->e_cpos) - cpos;
1043 }
1044
1045 ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno),
1046 &ref_leaf_bh);
1047 if (ret) {
1048 mlog_errno(ret);
1049 goto out;
1050 }
1051
1052 ocfs2_find_refcount_rec_in_rl(ci, ref_leaf_bh, cpos, len,
1053 ret_rec, index);
1054 *ret_bh = ref_leaf_bh;
1055out:
1056 brelse(eb_bh);
1057 return ret;
1058}
1059
1060enum ocfs2_ref_rec_contig {
1061 REF_CONTIG_NONE = 0,
1062 REF_CONTIG_LEFT,
1063 REF_CONTIG_RIGHT,
1064 REF_CONTIG_LEFTRIGHT,
1065};
1066
1067static enum ocfs2_ref_rec_contig
1068 ocfs2_refcount_rec_adjacent(struct ocfs2_refcount_block *rb,
1069 int index)
1070{
1071 if ((rb->rf_records.rl_recs[index].r_refcount ==
1072 rb->rf_records.rl_recs[index + 1].r_refcount) &&
1073 (le64_to_cpu(rb->rf_records.rl_recs[index].r_cpos) +
1074 le32_to_cpu(rb->rf_records.rl_recs[index].r_clusters) ==
1075 le64_to_cpu(rb->rf_records.rl_recs[index + 1].r_cpos)))
1076 return REF_CONTIG_RIGHT;
1077
1078 return REF_CONTIG_NONE;
1079}
1080
1081static enum ocfs2_ref_rec_contig
1082 ocfs2_refcount_rec_contig(struct ocfs2_refcount_block *rb,
1083 int index)
1084{
1085 enum ocfs2_ref_rec_contig ret = REF_CONTIG_NONE;
1086
1087 if (index < le16_to_cpu(rb->rf_records.rl_used) - 1)
1088 ret = ocfs2_refcount_rec_adjacent(rb, index);
1089
1090 if (index > 0) {
1091 enum ocfs2_ref_rec_contig tmp;
1092
1093 tmp = ocfs2_refcount_rec_adjacent(rb, index - 1);
1094
1095 if (tmp == REF_CONTIG_RIGHT) {
1096 if (ret == REF_CONTIG_RIGHT)
1097 ret = REF_CONTIG_LEFTRIGHT;
1098 else
1099 ret = REF_CONTIG_LEFT;
1100 }
1101 }
1102
1103 return ret;
1104}
1105
1106static void ocfs2_rotate_refcount_rec_left(struct ocfs2_refcount_block *rb,
1107 int index)
1108{
1109 BUG_ON(rb->rf_records.rl_recs[index].r_refcount !=
1110 rb->rf_records.rl_recs[index+1].r_refcount);
1111
1112 le32_add_cpu(&rb->rf_records.rl_recs[index].r_clusters,
1113 le32_to_cpu(rb->rf_records.rl_recs[index+1].r_clusters));
1114
1115 if (index < le16_to_cpu(rb->rf_records.rl_used) - 2)
1116 memmove(&rb->rf_records.rl_recs[index + 1],
1117 &rb->rf_records.rl_recs[index + 2],
1118 sizeof(struct ocfs2_refcount_rec) *
1119 (le16_to_cpu(rb->rf_records.rl_used) - index - 2));
1120
1121 memset(&rb->rf_records.rl_recs[le16_to_cpu(rb->rf_records.rl_used) - 1],
1122 0, sizeof(struct ocfs2_refcount_rec));
1123 le16_add_cpu(&rb->rf_records.rl_used, -1);
1124}
1125
1126/*
1127 * Merge the refcount rec if we are contiguous with the adjacent recs.
1128 */
1129static void ocfs2_refcount_rec_merge(struct ocfs2_refcount_block *rb,
1130 int index)
1131{
1132 enum ocfs2_ref_rec_contig contig =
1133 ocfs2_refcount_rec_contig(rb, index);
1134
1135 if (contig == REF_CONTIG_NONE)
1136 return;
1137
1138 if (contig == REF_CONTIG_LEFT || contig == REF_CONTIG_LEFTRIGHT) {
1139 BUG_ON(index == 0);
1140 index--;
1141 }
1142
1143 ocfs2_rotate_refcount_rec_left(rb, index);
1144
1145 if (contig == REF_CONTIG_LEFTRIGHT)
1146 ocfs2_rotate_refcount_rec_left(rb, index);
1147}
1148
1149/*
1150 * Change the refcount indexed by "index" in ref_bh.
1151 * If refcount reaches 0, remove it.
1152 */
1153static int ocfs2_change_refcount_rec(handle_t *handle,
1154 struct ocfs2_caching_info *ci,
1155 struct buffer_head *ref_leaf_bh,
1156 int index, int merge, int change)
1157{
1158 int ret;
1159 struct ocfs2_refcount_block *rb =
1160 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
1161 struct ocfs2_refcount_list *rl = &rb->rf_records;
1162 struct ocfs2_refcount_rec *rec = &rl->rl_recs[index];
1163
1164 ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
1165 OCFS2_JOURNAL_ACCESS_WRITE);
1166 if (ret) {
1167 mlog_errno(ret);
1168 goto out;
1169 }
1170
1171 mlog(0, "change index %d, old count %u, change %d\n", index,
1172 le32_to_cpu(rec->r_refcount), change);
1173 le32_add_cpu(&rec->r_refcount, change);
1174
1175 if (!rec->r_refcount) {
1176 if (index != le16_to_cpu(rl->rl_used) - 1) {
1177 memmove(rec, rec + 1,
1178 (le16_to_cpu(rl->rl_used) - index - 1) *
1179 sizeof(struct ocfs2_refcount_rec));
1180 memset(&rl->rl_recs[le16_to_cpu(rl->rl_used) - 1],
1181 0, sizeof(struct ocfs2_refcount_rec));
1182 }
1183
1184 le16_add_cpu(&rl->rl_used, -1);
1185 } else if (merge)
1186 ocfs2_refcount_rec_merge(rb, index);
1187
1188 ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
1189 if (ret)
1190 mlog_errno(ret);
1191out:
1192 return ret;
1193}
1194
1195static int ocfs2_expand_inline_ref_root(handle_t *handle,
1196 struct ocfs2_caching_info *ci,
1197 struct buffer_head *ref_root_bh,
1198 struct buffer_head **ref_leaf_bh,
1199 struct ocfs2_alloc_context *meta_ac)
1200{
1201 int ret;
1202 u16 suballoc_bit_start;
1203 u32 num_got;
1204 u64 blkno;
1205 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
1206 struct buffer_head *new_bh = NULL;
1207 struct ocfs2_refcount_block *new_rb;
1208 struct ocfs2_refcount_block *root_rb =
1209 (struct ocfs2_refcount_block *)ref_root_bh->b_data;
1210
1211 ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh,
1212 OCFS2_JOURNAL_ACCESS_WRITE);
1213 if (ret) {
1214 mlog_errno(ret);
1215 goto out;
1216 }
1217
1218 ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1,
1219 &suballoc_bit_start, &num_got,
1220 &blkno);
1221 if (ret) {
1222 mlog_errno(ret);
1223 goto out;
1224 }
1225
1226 new_bh = sb_getblk(sb, blkno);
1227 if (new_bh == NULL) {
1228 ret = -EIO;
1229 mlog_errno(ret);
1230 goto out;
1231 }
1232 ocfs2_set_new_buffer_uptodate(ci, new_bh);
1233
1234 ret = ocfs2_journal_access_rb(handle, ci, new_bh,
1235 OCFS2_JOURNAL_ACCESS_CREATE);
1236 if (ret) {
1237 mlog_errno(ret);
1238 goto out;
1239 }
1240
1241 /*
1242 * Initialize ocfs2_refcount_block.
1243 * It should contain the same information as the old root.
1244 * so just memcpy it and change the corresponding field.
1245 */
1246 memcpy(new_bh->b_data, ref_root_bh->b_data, sb->s_blocksize);
1247
1248 new_rb = (struct ocfs2_refcount_block *)new_bh->b_data;
1249 new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num);
1250 new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
1251 new_rb->rf_blkno = cpu_to_le64(blkno);
1252 new_rb->rf_cpos = cpu_to_le32(0);
1253 new_rb->rf_parent = cpu_to_le64(ref_root_bh->b_blocknr);
1254 new_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_LEAF_FL);
1255 ocfs2_journal_dirty(handle, new_bh);
1256
1257 /* Now change the root. */
1258 memset(&root_rb->rf_list, 0, sb->s_blocksize -
1259 offsetof(struct ocfs2_refcount_block, rf_list));
1260 root_rb->rf_list.l_count = cpu_to_le16(ocfs2_extent_recs_per_rb(sb));
1261 root_rb->rf_clusters = cpu_to_le32(1);
1262 root_rb->rf_list.l_next_free_rec = cpu_to_le16(1);
1263 root_rb->rf_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
1264 root_rb->rf_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
1265 root_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_TREE_FL);
1266
1267 ocfs2_journal_dirty(handle, ref_root_bh);
1268
1269 mlog(0, "new leaf block %llu, used %u\n", (unsigned long long)blkno,
1270 le16_to_cpu(new_rb->rf_records.rl_used));
1271
1272 *ref_leaf_bh = new_bh;
1273 new_bh = NULL;
1274out:
1275 brelse(new_bh);
1276 return ret;
1277}
1278
1279static int ocfs2_refcount_rec_no_intersect(struct ocfs2_refcount_rec *prev,
1280 struct ocfs2_refcount_rec *next)
1281{
1282 if (ocfs2_get_ref_rec_low_cpos(prev) + le32_to_cpu(prev->r_clusters) <=
1283 ocfs2_get_ref_rec_low_cpos(next))
1284 return 1;
1285
1286 return 0;
1287}
1288
1289static int cmp_refcount_rec_by_low_cpos(const void *a, const void *b)
1290{
1291 const struct ocfs2_refcount_rec *l = a, *r = b;
1292 u32 l_cpos = ocfs2_get_ref_rec_low_cpos(l);
1293 u32 r_cpos = ocfs2_get_ref_rec_low_cpos(r);
1294
1295 if (l_cpos > r_cpos)
1296 return 1;
1297 if (l_cpos < r_cpos)
1298 return -1;
1299 return 0;
1300}
1301
1302static int cmp_refcount_rec_by_cpos(const void *a, const void *b)
1303{
1304 const struct ocfs2_refcount_rec *l = a, *r = b;
1305 u64 l_cpos = le64_to_cpu(l->r_cpos);
1306 u64 r_cpos = le64_to_cpu(r->r_cpos);
1307
1308 if (l_cpos > r_cpos)
1309 return 1;
1310 if (l_cpos < r_cpos)
1311 return -1;
1312 return 0;
1313}
1314
1315static void swap_refcount_rec(void *a, void *b, int size)
1316{
1317 struct ocfs2_refcount_rec *l = a, *r = b, tmp;
1318
1319 tmp = *(struct ocfs2_refcount_rec *)l;
1320 *(struct ocfs2_refcount_rec *)l =
1321 *(struct ocfs2_refcount_rec *)r;
1322 *(struct ocfs2_refcount_rec *)r = tmp;
1323}
1324
1325/*
1326 * The refcount cpos are ordered by their 64bit cpos,
1327 * But we will use the low 32 bit to be the e_cpos in the b-tree.
1328 * So we need to make sure that this pos isn't intersected with others.
1329 *
1330 * Note: The refcount block is already sorted by their low 32 bit cpos,
1331 * So just try the middle pos first, and we will exit when we find
1332 * the good position.
1333 */
1334static int ocfs2_find_refcount_split_pos(struct ocfs2_refcount_list *rl,
1335 u32 *split_pos, int *split_index)
1336{
1337 int num_used = le16_to_cpu(rl->rl_used);
1338 int delta, middle = num_used / 2;
1339
1340 for (delta = 0; delta < middle; delta++) {
1341 /* Let's check delta earlier than middle */
1342 if (ocfs2_refcount_rec_no_intersect(
1343 &rl->rl_recs[middle - delta - 1],
1344 &rl->rl_recs[middle - delta])) {
1345 *split_index = middle - delta;
1346 break;
1347 }
1348
1349 /* For even counts, don't walk off the end */
1350 if ((middle + delta + 1) == num_used)
1351 continue;
1352
1353 /* Now try delta past middle */
1354 if (ocfs2_refcount_rec_no_intersect(
1355 &rl->rl_recs[middle + delta],
1356 &rl->rl_recs[middle + delta + 1])) {
1357 *split_index = middle + delta + 1;
1358 break;
1359 }
1360 }
1361
1362 if (delta >= middle)
1363 return -ENOSPC;
1364
1365 *split_pos = ocfs2_get_ref_rec_low_cpos(&rl->rl_recs[*split_index]);
1366 return 0;
1367}
1368
1369static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh,
1370 struct buffer_head *new_bh,
1371 u32 *split_cpos)
1372{
1373 int split_index = 0, num_moved, ret;
1374 u32 cpos = 0;
1375 struct ocfs2_refcount_block *rb =
1376 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
1377 struct ocfs2_refcount_list *rl = &rb->rf_records;
1378 struct ocfs2_refcount_block *new_rb =
1379 (struct ocfs2_refcount_block *)new_bh->b_data;
1380 struct ocfs2_refcount_list *new_rl = &new_rb->rf_records;
1381
1382 mlog(0, "split old leaf refcount block %llu, count = %u, used = %u\n",
1383 (unsigned long long)ref_leaf_bh->b_blocknr,
1384 le32_to_cpu(rl->rl_count), le32_to_cpu(rl->rl_used));
1385
1386 /*
1387 * XXX: Improvement later.
1388 * If we know all the high 32 bit cpos is the same, no need to sort.
1389 *
1390 * In order to make the whole process safe, we do:
1391 * 1. sort the entries by their low 32 bit cpos first so that we can
1392 * find the split cpos easily.
1393 * 2. call ocfs2_insert_extent to insert the new refcount block.
1394 * 3. move the refcount rec to the new block.
1395 * 4. sort the entries by their 64 bit cpos.
1396 * 5. dirty the new_rb and rb.
1397 */
1398 sort(&rl->rl_recs, le16_to_cpu(rl->rl_used),
1399 sizeof(struct ocfs2_refcount_rec),
1400 cmp_refcount_rec_by_low_cpos, swap_refcount_rec);
1401
1402 ret = ocfs2_find_refcount_split_pos(rl, &cpos, &split_index);
1403 if (ret) {
1404 mlog_errno(ret);
1405 return ret;
1406 }
1407
1408 new_rb->rf_cpos = cpu_to_le32(cpos);
1409
1410 /* move refcount records starting from split_index to the new block. */
1411 num_moved = le16_to_cpu(rl->rl_used) - split_index;
1412 memcpy(new_rl->rl_recs, &rl->rl_recs[split_index],
1413 num_moved * sizeof(struct ocfs2_refcount_rec));
1414
1415 /*ok, remove the entries we just moved over to the other block. */
1416 memset(&rl->rl_recs[split_index], 0,
1417 num_moved * sizeof(struct ocfs2_refcount_rec));
1418
1419 /* change old and new rl_used accordingly. */
1420 le16_add_cpu(&rl->rl_used, -num_moved);
1421 new_rl->rl_used = cpu_to_le32(num_moved);
1422
1423 sort(&rl->rl_recs, le16_to_cpu(rl->rl_used),
1424 sizeof(struct ocfs2_refcount_rec),
1425 cmp_refcount_rec_by_cpos, swap_refcount_rec);
1426
1427 sort(&new_rl->rl_recs, le16_to_cpu(new_rl->rl_used),
1428 sizeof(struct ocfs2_refcount_rec),
1429 cmp_refcount_rec_by_cpos, swap_refcount_rec);
1430
1431 *split_cpos = cpos;
1432 return 0;
1433}
1434
1435static int ocfs2_new_leaf_refcount_block(handle_t *handle,
1436 struct ocfs2_caching_info *ci,
1437 struct buffer_head *ref_root_bh,
1438 struct buffer_head *ref_leaf_bh,
1439 struct ocfs2_alloc_context *meta_ac)
1440{
1441 int ret;
1442 u16 suballoc_bit_start;
1443 u32 num_got, new_cpos;
1444 u64 blkno;
1445 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
1446 struct ocfs2_refcount_block *root_rb =
1447 (struct ocfs2_refcount_block *)ref_root_bh->b_data;
1448 struct buffer_head *new_bh = NULL;
1449 struct ocfs2_refcount_block *new_rb;
1450 struct ocfs2_extent_tree ref_et;
1451
1452 BUG_ON(!(le32_to_cpu(root_rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL));
1453
1454 ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh,
1455 OCFS2_JOURNAL_ACCESS_WRITE);
1456 if (ret) {
1457 mlog_errno(ret);
1458 goto out;
1459 }
1460
1461 ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
1462 OCFS2_JOURNAL_ACCESS_WRITE);
1463 if (ret) {
1464 mlog_errno(ret);
1465 goto out;
1466 }
1467
1468 ret = ocfs2_claim_metadata(OCFS2_SB(sb), handle, meta_ac, 1,
1469 &suballoc_bit_start, &num_got,
1470 &blkno);
1471 if (ret) {
1472 mlog_errno(ret);
1473 goto out;
1474 }
1475
1476 new_bh = sb_getblk(sb, blkno);
1477 if (new_bh == NULL) {
1478 ret = -EIO;
1479 mlog_errno(ret);
1480 goto out;
1481 }
1482 ocfs2_set_new_buffer_uptodate(ci, new_bh);
1483
1484 ret = ocfs2_journal_access_rb(handle, ci, new_bh,
1485 OCFS2_JOURNAL_ACCESS_CREATE);
1486 if (ret) {
1487 mlog_errno(ret);
1488 goto out;
1489 }
1490
1491 /* Initialize ocfs2_refcount_block. */
1492 new_rb = (struct ocfs2_refcount_block *)new_bh->b_data;
1493 memset(new_rb, 0, sb->s_blocksize);
1494 strcpy((void *)new_rb, OCFS2_REFCOUNT_BLOCK_SIGNATURE);
1495 new_rb->rf_suballoc_slot = cpu_to_le16(OCFS2_SB(sb)->slot_num);
1496 new_rb->rf_suballoc_bit = cpu_to_le16(suballoc_bit_start);
1497 new_rb->rf_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
1498 new_rb->rf_blkno = cpu_to_le64(blkno);
1499 new_rb->rf_parent = cpu_to_le64(ref_root_bh->b_blocknr);
1500 new_rb->rf_flags = cpu_to_le32(OCFS2_REFCOUNT_LEAF_FL);
1501 new_rb->rf_records.rl_count =
1502 cpu_to_le16(ocfs2_refcount_recs_per_rb(sb));
1503 new_rb->rf_generation = root_rb->rf_generation;
1504
1505 ret = ocfs2_divide_leaf_refcount_block(ref_leaf_bh, new_bh, &new_cpos);
1506 if (ret) {
1507 mlog_errno(ret);
1508 goto out;
1509 }
1510
1511 ocfs2_journal_dirty(handle, ref_leaf_bh);
1512 ocfs2_journal_dirty(handle, new_bh);
1513
1514 ocfs2_init_refcount_extent_tree(&ref_et, ci, ref_root_bh);
1515
1516 mlog(0, "insert new leaf block %llu at %u\n",
1517 (unsigned long long)new_bh->b_blocknr, new_cpos);
1518
1519 /* Insert the new leaf block with the specific offset cpos. */
1520 ret = ocfs2_insert_extent(handle, &ref_et, new_cpos, new_bh->b_blocknr,
1521 1, 0, meta_ac);
1522 if (ret)
1523 mlog_errno(ret);
1524
1525out:
1526 brelse(new_bh);
1527 return ret;
1528}
1529
1530static int ocfs2_expand_refcount_tree(handle_t *handle,
1531 struct ocfs2_caching_info *ci,
1532 struct buffer_head *ref_root_bh,
1533 struct buffer_head *ref_leaf_bh,
1534 struct ocfs2_alloc_context *meta_ac)
1535{
1536 int ret;
1537 struct buffer_head *expand_bh = NULL;
1538
1539 if (ref_root_bh == ref_leaf_bh) {
1540 /*
1541 * the old root bh hasn't been expanded to a b-tree,
1542 * so expand it first.
1543 */
1544 ret = ocfs2_expand_inline_ref_root(handle, ci, ref_root_bh,
1545 &expand_bh, meta_ac);
1546 if (ret) {
1547 mlog_errno(ret);
1548 goto out;
1549 }
1550 } else {
1551 expand_bh = ref_leaf_bh;
1552 get_bh(expand_bh);
1553 }
1554
1555
1556 /* Now add a new refcount block into the tree.*/
1557 ret = ocfs2_new_leaf_refcount_block(handle, ci, ref_root_bh,
1558 expand_bh, meta_ac);
1559 if (ret)
1560 mlog_errno(ret);
1561out:
1562 brelse(expand_bh);
1563 return ret;
1564}
1565
1566/*
1567 * Adjust the extent rec in b-tree representing ref_leaf_bh.
1568 *
1569 * Only called when we have inserted a new refcount rec at index 0
1570 * which means ocfs2_extent_rec.e_cpos may need some change.
1571 */
1572static int ocfs2_adjust_refcount_rec(handle_t *handle,
1573 struct ocfs2_caching_info *ci,
1574 struct buffer_head *ref_root_bh,
1575 struct buffer_head *ref_leaf_bh,
1576 struct ocfs2_refcount_rec *rec)
1577{
1578 int ret = 0, i;
1579 u32 new_cpos, old_cpos;
1580 struct ocfs2_path *path = NULL;
1581 struct ocfs2_extent_tree et;
1582 struct ocfs2_refcount_block *rb =
1583 (struct ocfs2_refcount_block *)ref_root_bh->b_data;
1584 struct ocfs2_extent_list *el;
1585
1586 if (!(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL))
1587 goto out;
1588
1589 rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
1590 old_cpos = le32_to_cpu(rb->rf_cpos);
1591 new_cpos = le64_to_cpu(rec->r_cpos) & OCFS2_32BIT_POS_MASK;
1592 if (old_cpos <= new_cpos)
1593 goto out;
1594
1595 ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
1596
1597 path = ocfs2_new_path_from_et(&et);
1598 if (!path) {
1599 ret = -ENOMEM;
1600 mlog_errno(ret);
1601 goto out;
1602 }
1603
1604 ret = ocfs2_find_path(ci, path, old_cpos);
1605 if (ret) {
1606 mlog_errno(ret);
1607 goto out;
1608 }
1609
1610 /*
1611 * 2 more credits, one for the leaf refcount block, one for
1612 * the extent block contains the extent rec.
1613 */
1614 ret = ocfs2_extend_trans(handle, handle->h_buffer_credits + 2);
1615 if (ret < 0) {
1616 mlog_errno(ret);
1617 goto out;
1618 }
1619
1620 ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
1621 OCFS2_JOURNAL_ACCESS_WRITE);
1622 if (ret < 0) {
1623 mlog_errno(ret);
1624 goto out;
1625 }
1626
1627 ret = ocfs2_journal_access_eb(handle, ci, path_leaf_bh(path),
1628 OCFS2_JOURNAL_ACCESS_WRITE);
1629 if (ret < 0) {
1630 mlog_errno(ret);
1631 goto out;
1632 }
1633
1634 /* change the leaf extent block first. */
1635 el = path_leaf_el(path);
1636
1637 for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++)
1638 if (le32_to_cpu(el->l_recs[i].e_cpos) == old_cpos)
1639 break;
1640
1641 BUG_ON(i == le16_to_cpu(el->l_next_free_rec));
1642
1643 el->l_recs[i].e_cpos = cpu_to_le32(new_cpos);
1644
1645 /* change the r_cpos in the leaf block. */
1646 rb->rf_cpos = cpu_to_le32(new_cpos);
1647
1648 ocfs2_journal_dirty(handle, path_leaf_bh(path));
1649 ocfs2_journal_dirty(handle, ref_leaf_bh);
1650
1651out:
1652 ocfs2_free_path(path);
1653 return ret;
1654}
1655
1656static int ocfs2_insert_refcount_rec(handle_t *handle,
1657 struct ocfs2_caching_info *ci,
1658 struct buffer_head *ref_root_bh,
1659 struct buffer_head *ref_leaf_bh,
1660 struct ocfs2_refcount_rec *rec,
1661 int index, int merge,
1662 struct ocfs2_alloc_context *meta_ac)
1663{
1664 int ret;
1665 struct ocfs2_refcount_block *rb =
1666 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
1667 struct ocfs2_refcount_list *rf_list = &rb->rf_records;
1668 struct buffer_head *new_bh = NULL;
1669
1670 BUG_ON(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL);
1671
1672 if (rf_list->rl_used == rf_list->rl_count) {
1673 u64 cpos = le64_to_cpu(rec->r_cpos);
1674 u32 len = le32_to_cpu(rec->r_clusters);
1675
1676 ret = ocfs2_expand_refcount_tree(handle, ci, ref_root_bh,
1677 ref_leaf_bh, meta_ac);
1678 if (ret) {
1679 mlog_errno(ret);
1680 goto out;
1681 }
1682
1683 ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
1684 cpos, len, NULL, &index,
1685 &new_bh);
1686 if (ret) {
1687 mlog_errno(ret);
1688 goto out;
1689 }
1690
1691 ref_leaf_bh = new_bh;
1692 rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
1693 rf_list = &rb->rf_records;
1694 }
1695
1696 ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
1697 OCFS2_JOURNAL_ACCESS_WRITE);
1698 if (ret) {
1699 mlog_errno(ret);
1700 goto out;
1701 }
1702
1703 if (index < le16_to_cpu(rf_list->rl_used))
1704 memmove(&rf_list->rl_recs[index + 1],
1705 &rf_list->rl_recs[index],
1706 (le16_to_cpu(rf_list->rl_used) - index) *
1707 sizeof(struct ocfs2_refcount_rec));
1708
1709 mlog(0, "insert refcount record start %llu, len %u, count %u "
1710 "to leaf block %llu at index %d\n",
1711 (unsigned long long)le64_to_cpu(rec->r_cpos),
1712 le32_to_cpu(rec->r_clusters), le32_to_cpu(rec->r_refcount),
1713 (unsigned long long)ref_leaf_bh->b_blocknr, index);
1714
1715 rf_list->rl_recs[index] = *rec;
1716
1717 le16_add_cpu(&rf_list->rl_used, 1);
1718
1719 if (merge)
1720 ocfs2_refcount_rec_merge(rb, index);
1721
1722 ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
1723 if (ret) {
1724 mlog_errno(ret);
1725 goto out;
1726 }
1727
1728 if (index == 0) {
1729 ret = ocfs2_adjust_refcount_rec(handle, ci,
1730 ref_root_bh,
1731 ref_leaf_bh, rec);
1732 if (ret)
1733 mlog_errno(ret);
1734 }
1735out:
1736 brelse(new_bh);
1737 return ret;
1738}
1739
1740/*
1741 * Split the refcount_rec indexed by "index" in ref_leaf_bh.
1742 * This is much simple than our b-tree code.
1743 * split_rec is the new refcount rec we want to insert.
1744 * If split_rec->r_refcount > 0, we are changing the refcount(in case we
1745 * increase refcount or decrease a refcount to non-zero).
1746 * If split_rec->r_refcount == 0, we are punching a hole in current refcount
1747 * rec( in case we decrease a refcount to zero).
1748 */
1749static int ocfs2_split_refcount_rec(handle_t *handle,
1750 struct ocfs2_caching_info *ci,
1751 struct buffer_head *ref_root_bh,
1752 struct buffer_head *ref_leaf_bh,
1753 struct ocfs2_refcount_rec *split_rec,
1754 int index, int merge,
1755 struct ocfs2_alloc_context *meta_ac,
1756 struct ocfs2_cached_dealloc_ctxt *dealloc)
1757{
1758 int ret, recs_need;
1759 u32 len;
1760 struct ocfs2_refcount_block *rb =
1761 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
1762 struct ocfs2_refcount_list *rf_list = &rb->rf_records;
1763 struct ocfs2_refcount_rec *orig_rec = &rf_list->rl_recs[index];
1764 struct ocfs2_refcount_rec *tail_rec = NULL;
1765 struct buffer_head *new_bh = NULL;
1766
1767 BUG_ON(le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL);
1768
1769 mlog(0, "original r_pos %llu, cluster %u, split %llu, cluster %u\n",
1770 le64_to_cpu(orig_rec->r_cpos), le32_to_cpu(orig_rec->r_clusters),
1771 le64_to_cpu(split_rec->r_cpos),
1772 le32_to_cpu(split_rec->r_clusters));
1773
1774 /*
1775 * If we just need to split the header or tail clusters,
1776 * no more recs are needed, just split is OK.
1777 * Otherwise we at least need one new recs.
1778 */
1779 if (!split_rec->r_refcount &&
1780 (split_rec->r_cpos == orig_rec->r_cpos ||
1781 le64_to_cpu(split_rec->r_cpos) +
1782 le32_to_cpu(split_rec->r_clusters) ==
1783 le64_to_cpu(orig_rec->r_cpos) + le32_to_cpu(orig_rec->r_clusters)))
1784 recs_need = 0;
1785 else
1786 recs_need = 1;
1787
1788 /*
1789 * We need one more rec if we split in the middle and the new rec have
1790 * some refcount in it.
1791 */
1792 if (split_rec->r_refcount &&
1793 (split_rec->r_cpos != orig_rec->r_cpos &&
1794 le64_to_cpu(split_rec->r_cpos) +
1795 le32_to_cpu(split_rec->r_clusters) !=
1796 le64_to_cpu(orig_rec->r_cpos) + le32_to_cpu(orig_rec->r_clusters)))
1797 recs_need++;
1798
1799 /* If the leaf block don't have enough record, expand it. */
1800 if (le16_to_cpu(rf_list->rl_used) + recs_need > rf_list->rl_count) {
1801 struct ocfs2_refcount_rec tmp_rec;
1802 u64 cpos = le64_to_cpu(orig_rec->r_cpos);
1803 len = le32_to_cpu(orig_rec->r_clusters);
1804 ret = ocfs2_expand_refcount_tree(handle, ci, ref_root_bh,
1805 ref_leaf_bh, meta_ac);
1806 if (ret) {
1807 mlog_errno(ret);
1808 goto out;
1809 }
1810
1811 /*
1812 * We have to re-get it since now cpos may be moved to
1813 * another leaf block.
1814 */
1815 ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
1816 cpos, len, &tmp_rec, &index,
1817 &new_bh);
1818 if (ret) {
1819 mlog_errno(ret);
1820 goto out;
1821 }
1822
1823 ref_leaf_bh = new_bh;
1824 rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
1825 rf_list = &rb->rf_records;
1826 orig_rec = &rf_list->rl_recs[index];
1827 }
1828
1829 ret = ocfs2_journal_access_rb(handle, ci, ref_leaf_bh,
1830 OCFS2_JOURNAL_ACCESS_WRITE);
1831 if (ret) {
1832 mlog_errno(ret);
1833 goto out;
1834 }
1835
1836 /*
1837 * We have calculated out how many new records we need and store
1838 * in recs_need, so spare enough space first by moving the records
1839 * after "index" to the end.
1840 */
1841 if (index != le16_to_cpu(rf_list->rl_used) - 1)
1842 memmove(&rf_list->rl_recs[index + 1 + recs_need],
1843 &rf_list->rl_recs[index + 1],
1844 (le16_to_cpu(rf_list->rl_used) - index - 1) *
1845 sizeof(struct ocfs2_refcount_rec));
1846
1847 len = (le64_to_cpu(orig_rec->r_cpos) +
1848 le32_to_cpu(orig_rec->r_clusters)) -
1849 (le64_to_cpu(split_rec->r_cpos) +
1850 le32_to_cpu(split_rec->r_clusters));
1851
1852 /*
1853 * If we have "len", the we will split in the tail and move it
1854 * to the end of the space we have just spared.
1855 */
1856 if (len) {
1857 tail_rec = &rf_list->rl_recs[index + recs_need];
1858
1859 memcpy(tail_rec, orig_rec, sizeof(struct ocfs2_refcount_rec));
1860 le64_add_cpu(&tail_rec->r_cpos,
1861 le32_to_cpu(tail_rec->r_clusters) - len);
1862 tail_rec->r_clusters = le32_to_cpu(len);
1863 }
1864
1865 /*
1866 * If the split pos isn't the same as the original one, we need to
1867 * split in the head.
1868 *
1869 * Note: We have the chance that split_rec.r_refcount = 0,
1870 * recs_need = 0 and len > 0, which means we just cut the head from
1871 * the orig_rec and in that case we have done some modification in
1872 * orig_rec above, so the check for r_cpos is faked.
1873 */
1874 if (split_rec->r_cpos != orig_rec->r_cpos && tail_rec != orig_rec) {
1875 len = le64_to_cpu(split_rec->r_cpos) -
1876 le64_to_cpu(orig_rec->r_cpos);
1877 orig_rec->r_clusters = cpu_to_le32(len);
1878 index++;
1879 }
1880
1881 le16_add_cpu(&rf_list->rl_used, recs_need);
1882
1883 if (split_rec->r_refcount) {
1884 rf_list->rl_recs[index] = *split_rec;
1885 mlog(0, "insert refcount record start %llu, len %u, count %u "
1886 "to leaf block %llu at index %d\n",
1887 (unsigned long long)le64_to_cpu(split_rec->r_cpos),
1888 le32_to_cpu(split_rec->r_clusters),
1889 le32_to_cpu(split_rec->r_refcount),
1890 (unsigned long long)ref_leaf_bh->b_blocknr, index);
1891
1892 if (merge)
1893 ocfs2_refcount_rec_merge(rb, index);
1894 }
1895
1896 ret = ocfs2_journal_dirty(handle, ref_leaf_bh);
1897 if (ret)
1898 mlog_errno(ret);
1899
1900out:
1901 brelse(new_bh);
1902 return ret;
1903}
1904
1905static int __ocfs2_increase_refcount(handle_t *handle,
1906 struct ocfs2_caching_info *ci,
1907 struct buffer_head *ref_root_bh,
1908 u64 cpos, u32 len, int merge,
1909 struct ocfs2_alloc_context *meta_ac,
1910 struct ocfs2_cached_dealloc_ctxt *dealloc)
1911{
1912 int ret = 0, index;
1913 struct buffer_head *ref_leaf_bh = NULL;
1914 struct ocfs2_refcount_rec rec;
1915 unsigned int set_len = 0;
1916
1917 mlog(0, "Tree owner %llu, add refcount start %llu, len %u\n",
1918 (unsigned long long)ocfs2_metadata_cache_owner(ci),
1919 (unsigned long long)cpos, len);
1920
1921 while (len) {
1922 ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
1923 cpos, len, &rec, &index,
1924 &ref_leaf_bh);
1925 if (ret) {
1926 mlog_errno(ret);
1927 goto out;
1928 }
1929
1930 set_len = le32_to_cpu(rec.r_clusters);
1931
1932 /*
1933 * Here we may meet with 3 situations:
1934 *
1935 * 1. If we find an already existing record, and the length
1936 * is the same, cool, we just need to increase the r_refcount
1937 * and it is OK.
1938 * 2. If we find a hole, just insert it with r_refcount = 1.
1939 * 3. If we are in the middle of one extent record, split
1940 * it.
1941 */
1942 if (rec.r_refcount && le64_to_cpu(rec.r_cpos) == cpos &&
1943 set_len <= len) {
1944 mlog(0, "increase refcount rec, start %llu, len %u, "
1945 "count %u\n", (unsigned long long)cpos, set_len,
1946 le32_to_cpu(rec.r_refcount));
1947 ret = ocfs2_change_refcount_rec(handle, ci,
1948 ref_leaf_bh, index,
1949 merge, 1);
1950 if (ret) {
1951 mlog_errno(ret);
1952 goto out;
1953 }
1954 } else if (!rec.r_refcount) {
1955 rec.r_refcount = cpu_to_le32(1);
1956
1957 mlog(0, "insert refcount rec, start %llu, len %u\n",
1958 (unsigned long long)le64_to_cpu(rec.r_cpos),
1959 set_len);
1960 ret = ocfs2_insert_refcount_rec(handle, ci, ref_root_bh,
1961 ref_leaf_bh,
1962 &rec, index,
1963 merge, meta_ac);
1964 if (ret) {
1965 mlog_errno(ret);
1966 goto out;
1967 }
1968 } else {
1969 set_len = min((u64)(cpos + len),
1970 le64_to_cpu(rec.r_cpos) + set_len) - cpos;
1971 rec.r_cpos = cpu_to_le64(cpos);
1972 rec.r_clusters = cpu_to_le32(set_len);
1973 le32_add_cpu(&rec.r_refcount, 1);
1974
1975 mlog(0, "split refcount rec, start %llu, "
1976 "len %u, count %u\n",
1977 (unsigned long long)le64_to_cpu(rec.r_cpos),
1978 set_len, le32_to_cpu(rec.r_refcount));
1979 ret = ocfs2_split_refcount_rec(handle, ci,
1980 ref_root_bh, ref_leaf_bh,
1981 &rec, index, merge,
1982 meta_ac, dealloc);
1983 if (ret) {
1984 mlog_errno(ret);
1985 goto out;
1986 }
1987 }
1988
1989 cpos += set_len;
1990 len -= set_len;
1991 brelse(ref_leaf_bh);
1992 ref_leaf_bh = NULL;
1993 }
1994
1995out:
1996 brelse(ref_leaf_bh);
1997 return ret;
1998}
1999
2000static int ocfs2_remove_refcount_extent(handle_t *handle,
2001 struct ocfs2_caching_info *ci,
2002 struct buffer_head *ref_root_bh,
2003 struct buffer_head *ref_leaf_bh,
2004 struct ocfs2_alloc_context *meta_ac,
2005 struct ocfs2_cached_dealloc_ctxt *dealloc)
2006{
2007 int ret;
2008 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
2009 struct ocfs2_refcount_block *rb =
2010 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
2011 struct ocfs2_extent_tree et;
2012
2013 BUG_ON(rb->rf_records.rl_used);
2014
2015 ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
2016 ret = ocfs2_remove_extent(handle, &et, le32_to_cpu(rb->rf_cpos),
2017 1, meta_ac, dealloc);
2018 if (ret) {
2019 mlog_errno(ret);
2020 goto out;
2021 }
2022
2023 ocfs2_remove_from_cache(ci, ref_leaf_bh);
2024
2025 /*
2026 * add the freed block to the dealloc so that it will be freed
2027 * when we run dealloc.
2028 */
2029 ret = ocfs2_cache_block_dealloc(dealloc, EXTENT_ALLOC_SYSTEM_INODE,
2030 le16_to_cpu(rb->rf_suballoc_slot),
2031 le64_to_cpu(rb->rf_blkno),
2032 le16_to_cpu(rb->rf_suballoc_bit));
2033 if (ret) {
2034 mlog_errno(ret);
2035 goto out;
2036 }
2037
2038 ret = ocfs2_journal_access_rb(handle, ci, ref_root_bh,
2039 OCFS2_JOURNAL_ACCESS_WRITE);
2040 if (ret) {
2041 mlog_errno(ret);
2042 goto out;
2043 }
2044
2045 rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
2046
2047 le32_add_cpu(&rb->rf_clusters, -1);
2048
2049 /*
2050 * check whether we need to restore the root refcount block if
2051 * there is no leaf extent block at atll.
2052 */
2053 if (!rb->rf_list.l_next_free_rec) {
2054 BUG_ON(rb->rf_clusters);
2055
2056 mlog(0, "reset refcount tree root %llu to be a record block.\n",
2057 (unsigned long long)ref_root_bh->b_blocknr);
2058
2059 rb->rf_flags = 0;
2060 rb->rf_parent = 0;
2061 rb->rf_cpos = 0;
2062 memset(&rb->rf_records, 0, sb->s_blocksize -
2063 offsetof(struct ocfs2_refcount_block, rf_records));
2064 rb->rf_records.rl_count =
2065 cpu_to_le16(ocfs2_refcount_recs_per_rb(sb));
2066 }
2067
2068 ocfs2_journal_dirty(handle, ref_root_bh);
2069
2070out:
2071 return ret;
2072}
2073
2074int ocfs2_increase_refcount(handle_t *handle,
2075 struct ocfs2_caching_info *ci,
2076 struct buffer_head *ref_root_bh,
2077 u64 cpos, u32 len,
2078 struct ocfs2_alloc_context *meta_ac,
2079 struct ocfs2_cached_dealloc_ctxt *dealloc)
2080{
2081 return __ocfs2_increase_refcount(handle, ci, ref_root_bh,
2082 cpos, len, 1,
2083 meta_ac, dealloc);
2084}
2085
2086static int ocfs2_decrease_refcount_rec(handle_t *handle,
2087 struct ocfs2_caching_info *ci,
2088 struct buffer_head *ref_root_bh,
2089 struct buffer_head *ref_leaf_bh,
2090 int index, u64 cpos, unsigned int len,
2091 struct ocfs2_alloc_context *meta_ac,
2092 struct ocfs2_cached_dealloc_ctxt *dealloc)
2093{
2094 int ret;
2095 struct ocfs2_refcount_block *rb =
2096 (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
2097 struct ocfs2_refcount_rec *rec = &rb->rf_records.rl_recs[index];
2098
2099 BUG_ON(cpos < le64_to_cpu(rec->r_cpos));
2100 BUG_ON(cpos + len >
2101 le64_to_cpu(rec->r_cpos) + le32_to_cpu(rec->r_clusters));
2102
2103 if (cpos == le64_to_cpu(rec->r_cpos) &&
2104 len == le32_to_cpu(rec->r_clusters))
2105 ret = ocfs2_change_refcount_rec(handle, ci,
2106 ref_leaf_bh, index, 1, -1);
2107 else {
2108 struct ocfs2_refcount_rec split = *rec;
2109 split.r_cpos = cpu_to_le64(cpos);
2110 split.r_clusters = cpu_to_le32(len);
2111
2112 le32_add_cpu(&split.r_refcount, -1);
2113
2114 mlog(0, "split refcount rec, start %llu, "
2115 "len %u, count %u, original start %llu, len %u\n",
2116 (unsigned long long)le64_to_cpu(split.r_cpos),
2117 len, le32_to_cpu(split.r_refcount),
2118 (unsigned long long)le64_to_cpu(rec->r_cpos),
2119 le32_to_cpu(rec->r_clusters));
2120 ret = ocfs2_split_refcount_rec(handle, ci,
2121 ref_root_bh, ref_leaf_bh,
2122 &split, index, 1,
2123 meta_ac, dealloc);
2124 }
2125
2126 if (ret) {
2127 mlog_errno(ret);
2128 goto out;
2129 }
2130
2131 /* Remove the leaf refcount block if it contains no refcount record. */
2132 if (!rb->rf_records.rl_used && ref_leaf_bh != ref_root_bh) {
2133 ret = ocfs2_remove_refcount_extent(handle, ci, ref_root_bh,
2134 ref_leaf_bh, meta_ac,
2135 dealloc);
2136 if (ret)
2137 mlog_errno(ret);
2138 }
2139
2140out:
2141 return ret;
2142}
2143
2144static int __ocfs2_decrease_refcount(handle_t *handle,
2145 struct ocfs2_caching_info *ci,
2146 struct buffer_head *ref_root_bh,
2147 u64 cpos, u32 len,
2148 struct ocfs2_alloc_context *meta_ac,
2149 struct ocfs2_cached_dealloc_ctxt *dealloc,
2150 int delete)
2151{
2152 int ret = 0, index = 0;
2153 struct ocfs2_refcount_rec rec;
2154 unsigned int r_count = 0, r_len;
2155 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
2156 struct buffer_head *ref_leaf_bh = NULL;
2157
2158 mlog(0, "Tree owner %llu, decrease refcount start %llu, "
2159 "len %u, delete %u\n",
2160 (unsigned long long)ocfs2_metadata_cache_owner(ci),
2161 (unsigned long long)cpos, len, delete);
2162
2163 while (len) {
2164 ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
2165 cpos, len, &rec, &index,
2166 &ref_leaf_bh);
2167 if (ret) {
2168 mlog_errno(ret);
2169 goto out;
2170 }
2171
2172 r_count = le32_to_cpu(rec.r_refcount);
2173 BUG_ON(r_count == 0);
2174 if (!delete)
2175 BUG_ON(r_count > 1);
2176
2177 r_len = min((u64)(cpos + len), le64_to_cpu(rec.r_cpos) +
2178 le32_to_cpu(rec.r_clusters)) - cpos;
2179
2180 ret = ocfs2_decrease_refcount_rec(handle, ci, ref_root_bh,
2181 ref_leaf_bh, index,
2182 cpos, r_len,
2183 meta_ac, dealloc);
2184 if (ret) {
2185 mlog_errno(ret);
2186 goto out;
2187 }
2188
2189 if (le32_to_cpu(rec.r_refcount) == 1 && delete) {
2190 ret = ocfs2_cache_cluster_dealloc(dealloc,
2191 ocfs2_clusters_to_blocks(sb, cpos),
2192 r_len);
2193 if (ret) {
2194 mlog_errno(ret);
2195 goto out;
2196 }
2197 }
2198
2199 cpos += r_len;
2200 len -= r_len;
2201 brelse(ref_leaf_bh);
2202 ref_leaf_bh = NULL;
2203 }
2204
2205out:
2206 brelse(ref_leaf_bh);
2207 return ret;
2208}
2209
2210/* Caller must hold refcount tree lock. */
2211int ocfs2_decrease_refcount(struct inode *inode,
2212 handle_t *handle, u32 cpos, u32 len,
2213 struct ocfs2_alloc_context *meta_ac,
2214 struct ocfs2_cached_dealloc_ctxt *dealloc,
2215 int delete)
2216{
2217 int ret;
2218 u64 ref_blkno;
2219 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2220 struct buffer_head *ref_root_bh = NULL;
2221 struct ocfs2_refcount_tree *tree;
2222
2223 BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
2224
2225 ret = ocfs2_get_refcount_block(inode, &ref_blkno);
2226 if (ret) {
2227 mlog_errno(ret);
2228 goto out;
2229 }
2230
2231 ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno, &tree);
2232 if (ret) {
2233 mlog_errno(ret);
2234 goto out;
2235 }
2236
2237 ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno,
2238 &ref_root_bh);
2239 if (ret) {
2240 mlog_errno(ret);
2241 goto out;
2242 }
2243
2244 ret = __ocfs2_decrease_refcount(handle, &tree->rf_ci, ref_root_bh,
2245 cpos, len, meta_ac, dealloc, delete);
2246 if (ret)
2247 mlog_errno(ret);
2248out:
2249 brelse(ref_root_bh);
2250 return ret;
2251}
2252
2253/*
2254 * Mark the already-existing extent at cpos as refcounted for len clusters.
2255 * This adds the refcount extent flag.
2256 *
2257 * If the existing extent is larger than the request, initiate a
2258 * split. An attempt will be made at merging with adjacent extents.
2259 *
2260 * The caller is responsible for passing down meta_ac if we'll need it.
2261 */
2262static int ocfs2_mark_extent_refcounted(struct inode *inode,
2263 struct ocfs2_extent_tree *et,
2264 handle_t *handle, u32 cpos,
2265 u32 len, u32 phys,
2266 struct ocfs2_alloc_context *meta_ac,
2267 struct ocfs2_cached_dealloc_ctxt *dealloc)
2268{
2269 int ret;
2270
2271 mlog(0, "Inode %lu refcount tree cpos %u, len %u, phys cluster %u\n",
2272 inode->i_ino, cpos, len, phys);
2273
2274 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
2275 ocfs2_error(inode->i_sb, "Inode %lu want to use refcount "
2276 "tree, but the feature bit is not set in the "
2277 "super block.", inode->i_ino);
2278 ret = -EROFS;
2279 goto out;
2280 }
2281
2282 ret = ocfs2_change_extent_flag(handle, et, cpos,
2283 len, phys, meta_ac, dealloc,
2284 OCFS2_EXT_REFCOUNTED, 0);
2285 if (ret)
2286 mlog_errno(ret);
2287
2288out:
2289 return ret;
2290}
2291
2292/*
2293 * Given some contiguous physical clusters, calculate what we need
2294 * for modifying their refcount.
2295 */
2296static int ocfs2_calc_refcount_meta_credits(struct super_block *sb,
2297 struct ocfs2_caching_info *ci,
2298 struct buffer_head *ref_root_bh,
2299 u64 start_cpos,
2300 u32 clusters,
2301 int *meta_add,
2302 int *credits)
2303{
2304 int ret = 0, index, ref_blocks = 0, recs_add = 0;
2305 u64 cpos = start_cpos;
2306 struct ocfs2_refcount_block *rb;
2307 struct ocfs2_refcount_rec rec;
2308 struct buffer_head *ref_leaf_bh = NULL, *prev_bh = NULL;
2309 u32 len;
2310
2311 mlog(0, "start_cpos %llu, clusters %u\n",
2312 (unsigned long long)start_cpos, clusters);
2313 while (clusters) {
2314 ret = ocfs2_get_refcount_rec(ci, ref_root_bh,
2315 cpos, clusters, &rec,
2316 &index, &ref_leaf_bh);
2317 if (ret) {
2318 mlog_errno(ret);
2319 goto out;
2320 }
2321
2322 if (ref_leaf_bh != prev_bh) {
2323 /*
2324 * Now we encounter a new leaf block, so calculate
2325 * whether we need to extend the old leaf.
2326 */
2327 if (prev_bh) {
2328 rb = (struct ocfs2_refcount_block *)
2329 prev_bh->b_data;
2330
2331 if (le64_to_cpu(rb->rf_records.rl_used) +
2332 recs_add >
2333 le16_to_cpu(rb->rf_records.rl_count))
2334 ref_blocks++;
2335 }
2336
2337 recs_add = 0;
2338 *credits += 1;
2339 brelse(prev_bh);
2340 prev_bh = ref_leaf_bh;
2341 get_bh(prev_bh);
2342 }
2343
2344 rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
2345
2346 mlog(0, "recs_add %d,cpos %llu, clusters %u, rec->r_cpos %llu,"
2347 "rec->r_clusters %u, rec->r_refcount %u, index %d\n",
2348 recs_add, (unsigned long long)cpos, clusters,
2349 (unsigned long long)le64_to_cpu(rec.r_cpos),
2350 le32_to_cpu(rec.r_clusters),
2351 le32_to_cpu(rec.r_refcount), index);
2352
2353 len = min((u64)cpos + clusters, le64_to_cpu(rec.r_cpos) +
2354 le32_to_cpu(rec.r_clusters)) - cpos;
2355 /*
2356 * If the refcount rec already exist, cool. We just need
2357 * to check whether there is a split. Otherwise we just need
2358 * to increase the refcount.
2359 * If we will insert one, increases recs_add.
2360 *
2361 * We record all the records which will be inserted to the
2362 * same refcount block, so that we can tell exactly whether
2363 * we need a new refcount block or not.
2364 */
2365 if (rec.r_refcount) {
2366 /* Check whether we need a split at the beginning. */
2367 if (cpos == start_cpos &&
2368 cpos != le64_to_cpu(rec.r_cpos))
2369 recs_add++;
2370
2371 /* Check whether we need a split in the end. */
2372 if (cpos + clusters < le64_to_cpu(rec.r_cpos) +
2373 le32_to_cpu(rec.r_clusters))
2374 recs_add++;
2375 } else
2376 recs_add++;
2377
2378 brelse(ref_leaf_bh);
2379 ref_leaf_bh = NULL;
2380 clusters -= len;
2381 cpos += len;
2382 }
2383
2384 if (prev_bh) {
2385 rb = (struct ocfs2_refcount_block *)prev_bh->b_data;
2386
2387 if (le64_to_cpu(rb->rf_records.rl_used) + recs_add >
2388 le16_to_cpu(rb->rf_records.rl_count))
2389 ref_blocks++;
2390
2391 *credits += 1;
2392 }
2393
2394 if (!ref_blocks)
2395 goto out;
2396
2397 mlog(0, "we need ref_blocks %d\n", ref_blocks);
2398 *meta_add += ref_blocks;
2399 *credits += ref_blocks;
2400
2401 /*
2402 * So we may need ref_blocks to insert into the tree.
2403 * That also means we need to change the b-tree and add that number
2404 * of records since we never merge them.
2405 * We need one more block for expansion since the new created leaf
2406 * block is also full and needs split.
2407 */
2408 rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
2409 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL) {
2410 struct ocfs2_extent_tree et;
2411
2412 ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
2413 *meta_add += ocfs2_extend_meta_needed(et.et_root_el);
2414 *credits += ocfs2_calc_extend_credits(sb,
2415 et.et_root_el,
2416 ref_blocks);
2417 } else {
2418 *credits += OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
2419 *meta_add += 1;
2420 }
2421
2422out:
2423 brelse(ref_leaf_bh);
2424 brelse(prev_bh);
2425 return ret;
2426}
2427
2428/*
2429 * For refcount tree, we will decrease some contiguous clusters
2430 * refcount count, so just go through it to see how many blocks
2431 * we gonna touch and whether we need to create new blocks.
2432 *
2433 * Normally the refcount blocks store these refcount should be
2434 * continguous also, so that we can get the number easily.
2435 * As for meta_ac, we will at most add split 2 refcount record and
2436 * 2 more refcount block, so just check it in a rough way.
2437 *
2438 * Caller must hold refcount tree lock.
2439 */
2440int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
2441 struct buffer_head *di_bh,
2442 u64 phys_blkno,
2443 u32 clusters,
2444 int *credits,
2445 struct ocfs2_alloc_context **meta_ac)
2446{
2447 int ret, ref_blocks = 0;
2448 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2449 struct ocfs2_inode_info *oi = OCFS2_I(inode);
2450 struct buffer_head *ref_root_bh = NULL;
2451 struct ocfs2_refcount_tree *tree;
2452 u64 start_cpos = ocfs2_blocks_to_clusters(inode->i_sb, phys_blkno);
2453
2454 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
2455 ocfs2_error(inode->i_sb, "Inode %lu want to use refcount "
2456 "tree, but the feature bit is not set in the "
2457 "super block.", inode->i_ino);
2458 ret = -EROFS;
2459 goto out;
2460 }
2461
2462 BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
2463
2464 ret = ocfs2_get_refcount_tree(OCFS2_SB(inode->i_sb),
2465 le64_to_cpu(di->i_refcount_loc), &tree);
2466 if (ret) {
2467 mlog_errno(ret);
2468 goto out;
2469 }
2470
2471 ret = ocfs2_read_refcount_block(&tree->rf_ci,
2472 le64_to_cpu(di->i_refcount_loc),
2473 &ref_root_bh);
2474 if (ret) {
2475 mlog_errno(ret);
2476 goto out;
2477 }
2478
2479 ret = ocfs2_calc_refcount_meta_credits(inode->i_sb,
2480 &tree->rf_ci,
2481 ref_root_bh,
2482 start_cpos, clusters,
2483 &ref_blocks, credits);
2484 if (ret) {
2485 mlog_errno(ret);
2486 goto out;
2487 }
2488
2489 mlog(0, "reserve new metadata %d, credits = %d\n",
2490 ref_blocks, *credits);
2491
2492 if (ref_blocks) {
2493 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
2494 ref_blocks, meta_ac);
2495 if (ret)
2496 mlog_errno(ret);
2497 }
2498
2499out:
2500 brelse(ref_root_bh);
2501 return ret;
2502}
2503
2504#define MAX_CONTIG_BYTES 1048576
2505
2506static inline unsigned int ocfs2_cow_contig_clusters(struct super_block *sb)
2507{
2508 return ocfs2_clusters_for_bytes(sb, MAX_CONTIG_BYTES);
2509}
2510
2511static inline unsigned int ocfs2_cow_contig_mask(struct super_block *sb)
2512{
2513 return ~(ocfs2_cow_contig_clusters(sb) - 1);
2514}
2515
2516/*
2517 * Given an extent that starts at 'start' and an I/O that starts at 'cpos',
2518 * find an offset (start + (n * contig_clusters)) that is closest to cpos
2519 * while still being less than or equal to it.
2520 *
2521 * The goal is to break the extent at a multiple of contig_clusters.
2522 */
2523static inline unsigned int ocfs2_cow_align_start(struct super_block *sb,
2524 unsigned int start,
2525 unsigned int cpos)
2526{
2527 BUG_ON(start > cpos);
2528
2529 return start + ((cpos - start) & ocfs2_cow_contig_mask(sb));
2530}
2531
2532/*
2533 * Given a cluster count of len, pad it out so that it is a multiple
2534 * of contig_clusters.
2535 */
2536static inline unsigned int ocfs2_cow_align_length(struct super_block *sb,
2537 unsigned int len)
2538{
2539 unsigned int padded =
2540 (len + (ocfs2_cow_contig_clusters(sb) - 1)) &
2541 ocfs2_cow_contig_mask(sb);
2542
2543 /* Did we wrap? */
2544 if (padded < len)
2545 padded = UINT_MAX;
2546
2547 return padded;
2548}
2549
2550/*
2551 * Calculate out the start and number of virtual clusters we need to to CoW.
2552 *
2553 * cpos is vitual start cluster position we want to do CoW in a
2554 * file and write_len is the cluster length.
2555 * max_cpos is the place where we want to stop CoW intentionally.
2556 *
2557 * Normal we will start CoW from the beginning of extent record cotaining cpos.
2558 * We try to break up extents on boundaries of MAX_CONTIG_BYTES so that we
2559 * get good I/O from the resulting extent tree.
2560 */
2561static int ocfs2_refcount_cal_cow_clusters(struct inode *inode,
2562 struct ocfs2_extent_list *el,
2563 u32 cpos,
2564 u32 write_len,
2565 u32 max_cpos,
2566 u32 *cow_start,
2567 u32 *cow_len)
2568{
2569 int ret = 0;
2570 int tree_height = le16_to_cpu(el->l_tree_depth), i;
2571 struct buffer_head *eb_bh = NULL;
2572 struct ocfs2_extent_block *eb = NULL;
2573 struct ocfs2_extent_rec *rec;
2574 unsigned int want_clusters, rec_end = 0;
2575 int contig_clusters = ocfs2_cow_contig_clusters(inode->i_sb);
2576 int leaf_clusters;
2577
2578 BUG_ON(cpos + write_len > max_cpos);
2579
2580 if (tree_height > 0) {
2581 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, cpos, &eb_bh);
2582 if (ret) {
2583 mlog_errno(ret);
2584 goto out;
2585 }
2586
2587 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
2588 el = &eb->h_list;
2589
2590 if (el->l_tree_depth) {
2591 ocfs2_error(inode->i_sb,
2592 "Inode %lu has non zero tree depth in "
2593 "leaf block %llu\n", inode->i_ino,
2594 (unsigned long long)eb_bh->b_blocknr);
2595 ret = -EROFS;
2596 goto out;
2597 }
2598 }
2599
2600 *cow_len = 0;
2601 for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
2602 rec = &el->l_recs[i];
2603
2604 if (ocfs2_is_empty_extent(rec)) {
2605 mlog_bug_on_msg(i != 0, "Inode %lu has empty record in "
2606 "index %d\n", inode->i_ino, i);
2607 continue;
2608 }
2609
2610 if (le32_to_cpu(rec->e_cpos) +
2611 le16_to_cpu(rec->e_leaf_clusters) <= cpos)
2612 continue;
2613
2614 if (*cow_len == 0) {
2615 /*
2616 * We should find a refcounted record in the
2617 * first pass.
2618 */
2619 BUG_ON(!(rec->e_flags & OCFS2_EXT_REFCOUNTED));
2620 *cow_start = le32_to_cpu(rec->e_cpos);
2621 }
2622
2623 /*
2624 * If we encounter a hole, a non-refcounted record or
2625 * pass the max_cpos, stop the search.
2626 */
2627 if ((!(rec->e_flags & OCFS2_EXT_REFCOUNTED)) ||
2628 (*cow_len && rec_end != le32_to_cpu(rec->e_cpos)) ||
2629 (max_cpos <= le32_to_cpu(rec->e_cpos)))
2630 break;
2631
2632 leaf_clusters = le16_to_cpu(rec->e_leaf_clusters);
2633 rec_end = le32_to_cpu(rec->e_cpos) + leaf_clusters;
2634 if (rec_end > max_cpos) {
2635 rec_end = max_cpos;
2636 leaf_clusters = rec_end - le32_to_cpu(rec->e_cpos);
2637 }
2638
2639 /*
2640 * How many clusters do we actually need from
2641 * this extent? First we see how many we actually
2642 * need to complete the write. If that's smaller
2643 * than contig_clusters, we try for contig_clusters.
2644 */
2645 if (!*cow_len)
2646 want_clusters = write_len;
2647 else
2648 want_clusters = (cpos + write_len) -
2649 (*cow_start + *cow_len);
2650 if (want_clusters < contig_clusters)
2651 want_clusters = contig_clusters;
2652
2653 /*
2654 * If the write does not cover the whole extent, we
2655 * need to calculate how we're going to split the extent.
2656 * We try to do it on contig_clusters boundaries.
2657 *
2658 * Any extent smaller than contig_clusters will be
2659 * CoWed in its entirety.
2660 */
2661 if (leaf_clusters <= contig_clusters)
2662 *cow_len += leaf_clusters;
2663 else if (*cow_len || (*cow_start == cpos)) {
2664 /*
2665 * This extent needs to be CoW'd from its
2666 * beginning, so all we have to do is compute
2667 * how many clusters to grab. We align
2668 * want_clusters to the edge of contig_clusters
2669 * to get better I/O.
2670 */
2671 want_clusters = ocfs2_cow_align_length(inode->i_sb,
2672 want_clusters);
2673
2674 if (leaf_clusters < want_clusters)
2675 *cow_len += leaf_clusters;
2676 else
2677 *cow_len += want_clusters;
2678 } else if ((*cow_start + contig_clusters) >=
2679 (cpos + write_len)) {
2680 /*
2681 * Breaking off contig_clusters at the front
2682 * of the extent will cover our write. That's
2683 * easy.
2684 */
2685 *cow_len = contig_clusters;
2686 } else if ((rec_end - cpos) <= contig_clusters) {
2687 /*
2688 * Breaking off contig_clusters at the tail of
2689 * this extent will cover cpos.
2690 */
2691 *cow_start = rec_end - contig_clusters;
2692 *cow_len = contig_clusters;
2693 } else if ((rec_end - cpos) <= want_clusters) {
2694 /*
2695 * While we can't fit the entire write in this
2696 * extent, we know that the write goes from cpos
2697 * to the end of the extent. Break that off.
2698 * We try to break it at some multiple of
2699 * contig_clusters from the front of the extent.
2700 * Failing that (ie, cpos is within
2701 * contig_clusters of the front), we'll CoW the
2702 * entire extent.
2703 */
2704 *cow_start = ocfs2_cow_align_start(inode->i_sb,
2705 *cow_start, cpos);
2706 *cow_len = rec_end - *cow_start;
2707 } else {
2708 /*
2709 * Ok, the entire write lives in the middle of
2710 * this extent. Let's try to slice the extent up
2711 * nicely. Optimally, our CoW region starts at
2712 * m*contig_clusters from the beginning of the
2713 * extent and goes for n*contig_clusters,
2714 * covering the entire write.
2715 */
2716 *cow_start = ocfs2_cow_align_start(inode->i_sb,
2717 *cow_start, cpos);
2718
2719 want_clusters = (cpos + write_len) - *cow_start;
2720 want_clusters = ocfs2_cow_align_length(inode->i_sb,
2721 want_clusters);
2722 if (*cow_start + want_clusters <= rec_end)
2723 *cow_len = want_clusters;
2724 else
2725 *cow_len = rec_end - *cow_start;
2726 }
2727
2728 /* Have we covered our entire write yet? */
2729 if ((*cow_start + *cow_len) >= (cpos + write_len))
2730 break;
2731
2732 /*
2733 * If we reach the end of the extent block and don't get enough
2734 * clusters, continue with the next extent block if possible.
2735 */
2736 if (i + 1 == le16_to_cpu(el->l_next_free_rec) &&
2737 eb && eb->h_next_leaf_blk) {
2738 brelse(eb_bh);
2739 eb_bh = NULL;
2740
2741 ret = ocfs2_read_extent_block(INODE_CACHE(inode),
2742 le64_to_cpu(eb->h_next_leaf_blk),
2743 &eb_bh);
2744 if (ret) {
2745 mlog_errno(ret);
2746 goto out;
2747 }
2748
2749 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
2750 el = &eb->h_list;
2751 i = -1;
2752 }
2753 }
2754
2755out:
2756 brelse(eb_bh);
2757 return ret;
2758}
2759
2760/*
2761 * Prepare meta_ac, data_ac and calculate credits when we want to add some
2762 * num_clusters in data_tree "et" and change the refcount for the old
2763 * clusters(starting form p_cluster) in the refcount tree.
2764 *
2765 * Note:
2766 * 1. since we may split the old tree, so we at most will need num_clusters + 2
2767 * more new leaf records.
2768 * 2. In some case, we may not need to reserve new clusters(e.g, reflink), so
2769 * just give data_ac = NULL.
2770 */
2771static int ocfs2_lock_refcount_allocators(struct super_block *sb,
2772 u32 p_cluster, u32 num_clusters,
2773 struct ocfs2_extent_tree *et,
2774 struct ocfs2_caching_info *ref_ci,
2775 struct buffer_head *ref_root_bh,
2776 struct ocfs2_alloc_context **meta_ac,
2777 struct ocfs2_alloc_context **data_ac,
2778 int *credits)
2779{
2780 int ret = 0, meta_add = 0;
2781 int num_free_extents = ocfs2_num_free_extents(OCFS2_SB(sb), et);
2782
2783 if (num_free_extents < 0) {
2784 ret = num_free_extents;
2785 mlog_errno(ret);
2786 goto out;
2787 }
2788
2789 if (num_free_extents < num_clusters + 2)
2790 meta_add =
2791 ocfs2_extend_meta_needed(et->et_root_el);
2792
2793 *credits += ocfs2_calc_extend_credits(sb, et->et_root_el,
2794 num_clusters + 2);
2795
2796 ret = ocfs2_calc_refcount_meta_credits(sb, ref_ci, ref_root_bh,
2797 p_cluster, num_clusters,
2798 &meta_add, credits);
2799 if (ret) {
2800 mlog_errno(ret);
2801 goto out;
2802 }
2803
2804 mlog(0, "reserve new metadata %d, clusters %u, credits = %d\n",
2805 meta_add, num_clusters, *credits);
2806 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(sb), meta_add,
2807 meta_ac);
2808 if (ret) {
2809 mlog_errno(ret);
2810 goto out;
2811 }
2812
2813 if (data_ac) {
2814 ret = ocfs2_reserve_clusters(OCFS2_SB(sb), num_clusters,
2815 data_ac);
2816 if (ret)
2817 mlog_errno(ret);
2818 }
2819
2820out:
2821 if (ret) {
2822 if (*meta_ac) {
2823 ocfs2_free_alloc_context(*meta_ac);
2824 *meta_ac = NULL;
2825 }
2826 }
2827
2828 return ret;
2829}
2830
2831static int ocfs2_clear_cow_buffer(handle_t *handle, struct buffer_head *bh)
2832{
2833 BUG_ON(buffer_dirty(bh));
2834
2835 clear_buffer_mapped(bh);
2836
2837 return 0;
2838}
2839
2840static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
2841 struct ocfs2_cow_context *context,
2842 u32 cpos, u32 old_cluster,
2843 u32 new_cluster, u32 new_len)
2844{
2845 int ret = 0, partial;
2846 struct ocfs2_caching_info *ci = context->data_et.et_ci;
2847 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
2848 u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
2849 struct page *page;
2850 pgoff_t page_index;
2851 unsigned int from, to;
2852 loff_t offset, end, map_end;
2853 struct address_space *mapping = context->inode->i_mapping;
2854
2855 mlog(0, "old_cluster %u, new %u, len %u at offset %u\n", old_cluster,
2856 new_cluster, new_len, cpos);
2857
2858 offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
2859 end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits);
2860
2861 while (offset < end) {
2862 page_index = offset >> PAGE_CACHE_SHIFT;
2863 map_end = (page_index + 1) << PAGE_CACHE_SHIFT;
2864 if (map_end > end)
2865 map_end = end;
2866
2867 /* from, to is the offset within the page. */
2868 from = offset & (PAGE_CACHE_SIZE - 1);
2869 to = PAGE_CACHE_SIZE;
2870 if (map_end & (PAGE_CACHE_SIZE - 1))
2871 to = map_end & (PAGE_CACHE_SIZE - 1);
2872
2873 page = grab_cache_page(mapping, page_index);
2874
2875 /* This page can't be dirtied before we CoW it out. */
2876 BUG_ON(PageDirty(page));
2877
2878 if (!PageUptodate(page)) {
2879 ret = block_read_full_page(page, ocfs2_get_block);
2880 if (ret) {
2881 mlog_errno(ret);
2882 goto unlock;
2883 }
2884 lock_page(page);
2885 }
2886
2887 if (page_has_buffers(page)) {
2888 ret = walk_page_buffers(handle, page_buffers(page),
2889 from, to, &partial,
2890 ocfs2_clear_cow_buffer);
2891 if (ret) {
2892 mlog_errno(ret);
2893 goto unlock;
2894 }
2895 }
2896
2897 ocfs2_map_and_dirty_page(context->inode,
2898 handle, from, to,
2899 page, 0, &new_block);
2900 mark_page_accessed(page);
2901unlock:
2902 unlock_page(page);
2903 page_cache_release(page);
2904 page = NULL;
2905 offset = map_end;
2906 if (ret)
2907 break;
2908 }
2909
2910 return ret;
2911}
2912
2913static int ocfs2_duplicate_clusters_by_jbd(handle_t *handle,
2914 struct ocfs2_cow_context *context,
2915 u32 cpos, u32 old_cluster,
2916 u32 new_cluster, u32 new_len)
2917{
2918 int ret = 0;
2919 struct super_block *sb = context->inode->i_sb;
2920 struct ocfs2_caching_info *ci = context->data_et.et_ci;
2921 int i, blocks = ocfs2_clusters_to_blocks(sb, new_len);
2922 u64 old_block = ocfs2_clusters_to_blocks(sb, old_cluster);
2923 u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster);
2924 struct ocfs2_super *osb = OCFS2_SB(sb);
2925 struct buffer_head *old_bh = NULL;
2926 struct buffer_head *new_bh = NULL;
2927
2928 mlog(0, "old_cluster %u, new %u, len %u\n", old_cluster,
2929 new_cluster, new_len);
2930
2931 for (i = 0; i < blocks; i++, old_block++, new_block++) {
2932 new_bh = sb_getblk(osb->sb, new_block);
2933 if (new_bh == NULL) {
2934 ret = -EIO;
2935 mlog_errno(ret);
2936 break;
2937 }
2938
2939 ocfs2_set_new_buffer_uptodate(ci, new_bh);
2940
2941 ret = ocfs2_read_block(ci, old_block, &old_bh, NULL);
2942 if (ret) {
2943 mlog_errno(ret);
2944 break;
2945 }
2946
2947 ret = ocfs2_journal_access(handle, ci, new_bh,
2948 OCFS2_JOURNAL_ACCESS_CREATE);
2949 if (ret) {
2950 mlog_errno(ret);
2951 break;
2952 }
2953
2954 memcpy(new_bh->b_data, old_bh->b_data, sb->s_blocksize);
2955 ret = ocfs2_journal_dirty(handle, new_bh);
2956 if (ret) {
2957 mlog_errno(ret);
2958 break;
2959 }
2960
2961 brelse(new_bh);
2962 brelse(old_bh);
2963 new_bh = NULL;
2964 old_bh = NULL;
2965 }
2966
2967 brelse(new_bh);
2968 brelse(old_bh);
2969 return ret;
2970}
2971
2972static int ocfs2_clear_ext_refcount(handle_t *handle,
2973 struct ocfs2_extent_tree *et,
2974 u32 cpos, u32 p_cluster, u32 len,
2975 unsigned int ext_flags,
2976 struct ocfs2_alloc_context *meta_ac,
2977 struct ocfs2_cached_dealloc_ctxt *dealloc)
2978{
2979 int ret, index;
2980 struct ocfs2_extent_rec replace_rec;
2981 struct ocfs2_path *path = NULL;
2982 struct ocfs2_extent_list *el;
2983 struct super_block *sb = ocfs2_metadata_cache_get_super(et->et_ci);
2984 u64 ino = ocfs2_metadata_cache_owner(et->et_ci);
2985
2986 mlog(0, "inode %llu cpos %u, len %u, p_cluster %u, ext_flags %u\n",
2987 (unsigned long long)ino, cpos, len, p_cluster, ext_flags);
2988
2989 memset(&replace_rec, 0, sizeof(replace_rec));
2990 replace_rec.e_cpos = cpu_to_le32(cpos);
2991 replace_rec.e_leaf_clusters = cpu_to_le16(len);
2992 replace_rec.e_blkno = cpu_to_le64(ocfs2_clusters_to_blocks(sb,
2993 p_cluster));
2994 replace_rec.e_flags = ext_flags;
2995 replace_rec.e_flags &= ~OCFS2_EXT_REFCOUNTED;
2996
2997 path = ocfs2_new_path_from_et(et);
2998 if (!path) {
2999 ret = -ENOMEM;
3000 mlog_errno(ret);
3001 goto out;
3002 }
3003
3004 ret = ocfs2_find_path(et->et_ci, path, cpos);
3005 if (ret) {
3006 mlog_errno(ret);
3007 goto out;
3008 }
3009
3010 el = path_leaf_el(path);
3011
3012 index = ocfs2_search_extent_list(el, cpos);
3013 if (index == -1 || index >= le16_to_cpu(el->l_next_free_rec)) {
3014 ocfs2_error(sb,
3015 "Inode %llu has an extent at cpos %u which can no "
3016 "longer be found.\n",
3017 (unsigned long long)ino, cpos);
3018 ret = -EROFS;
3019 goto out;
3020 }
3021
3022 ret = ocfs2_split_extent(handle, et, path, index,
3023 &replace_rec, meta_ac, dealloc);
3024 if (ret)
3025 mlog_errno(ret);
3026
3027out:
3028 ocfs2_free_path(path);
3029 return ret;
3030}
3031
3032static int ocfs2_replace_clusters(handle_t *handle,
3033 struct ocfs2_cow_context *context,
3034 u32 cpos, u32 old,
3035 u32 new, u32 len,
3036 unsigned int ext_flags)
3037{
3038 int ret;
3039 struct ocfs2_caching_info *ci = context->data_et.et_ci;
3040 u64 ino = ocfs2_metadata_cache_owner(ci);
3041
3042 mlog(0, "inode %llu, cpos %u, old %u, new %u, len %u, ext_flags %u\n",
3043 (unsigned long long)ino, cpos, old, new, len, ext_flags);
3044
3045 /*If the old clusters is unwritten, no need to duplicate. */
3046 if (!(ext_flags & OCFS2_EXT_UNWRITTEN)) {
3047 ret = context->cow_duplicate_clusters(handle, context, cpos,
3048 old, new, len);
3049 if (ret) {
3050 mlog_errno(ret);
3051 goto out;
3052 }
3053 }
3054
3055 ret = ocfs2_clear_ext_refcount(handle, &context->data_et,
3056 cpos, new, len, ext_flags,
3057 context->meta_ac, &context->dealloc);
3058 if (ret)
3059 mlog_errno(ret);
3060out:
3061 return ret;
3062}
3063
3064static int ocfs2_cow_sync_writeback(struct super_block *sb,
3065 struct ocfs2_cow_context *context,
3066 u32 cpos, u32 num_clusters)
3067{
3068 int ret = 0;
3069 loff_t offset, end, map_end;
3070 pgoff_t page_index;
3071 struct page *page;
3072
3073 if (ocfs2_should_order_data(context->inode))
3074 return 0;
3075
3076 offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits;
3077 end = offset + (num_clusters << OCFS2_SB(sb)->s_clustersize_bits);
3078
3079 ret = filemap_fdatawrite_range(context->inode->i_mapping,
3080 offset, end - 1);
3081 if (ret < 0) {
3082 mlog_errno(ret);
3083 return ret;
3084 }
3085
3086 while (offset < end) {
3087 page_index = offset >> PAGE_CACHE_SHIFT;
3088 map_end = (page_index + 1) << PAGE_CACHE_SHIFT;
3089 if (map_end > end)
3090 map_end = end;
3091
3092 page = grab_cache_page(context->inode->i_mapping, page_index);
3093 BUG_ON(!page);
3094
3095 wait_on_page_writeback(page);
3096 if (PageError(page)) {
3097 ret = -EIO;
3098 mlog_errno(ret);
3099 } else
3100 mark_page_accessed(page);
3101
3102 unlock_page(page);
3103 page_cache_release(page);
3104 page = NULL;
3105 offset = map_end;
3106 if (ret)
3107 break;
3108 }
3109
3110 return ret;
3111}
3112
3113static int ocfs2_di_get_clusters(struct ocfs2_cow_context *context,
3114 u32 v_cluster, u32 *p_cluster,
3115 u32 *num_clusters,
3116 unsigned int *extent_flags)
3117{
3118 return ocfs2_get_clusters(context->inode, v_cluster, p_cluster,
3119 num_clusters, extent_flags);
3120}
3121
3122static int ocfs2_make_clusters_writable(struct super_block *sb,
3123 struct ocfs2_cow_context *context,
3124 u32 cpos, u32 p_cluster,
3125 u32 num_clusters, unsigned int e_flags)
3126{
3127 int ret, delete, index, credits = 0;
3128 u32 new_bit, new_len;
3129 unsigned int set_len;
3130 struct ocfs2_super *osb = OCFS2_SB(sb);
3131 handle_t *handle;
3132 struct buffer_head *ref_leaf_bh = NULL;
3133 struct ocfs2_caching_info *ref_ci = &context->ref_tree->rf_ci;
3134 struct ocfs2_refcount_rec rec;
3135
3136 mlog(0, "cpos %u, p_cluster %u, num_clusters %u, e_flags %u\n",
3137 cpos, p_cluster, num_clusters, e_flags);
3138
3139 ret = ocfs2_lock_refcount_allocators(sb, p_cluster, num_clusters,
3140 &context->data_et,
3141 ref_ci,
3142 context->ref_root_bh,
3143 &context->meta_ac,
3144 &context->data_ac, &credits);
3145 if (ret) {
3146 mlog_errno(ret);
3147 return ret;
3148 }
3149
3150 if (context->post_refcount)
3151 credits += context->post_refcount->credits;
3152
3153 credits += context->extra_credits;
3154 handle = ocfs2_start_trans(osb, credits);
3155 if (IS_ERR(handle)) {
3156 ret = PTR_ERR(handle);
3157 mlog_errno(ret);
3158 goto out;
3159 }
3160
3161 while (num_clusters) {
3162 ret = ocfs2_get_refcount_rec(ref_ci, context->ref_root_bh,
3163 p_cluster, num_clusters,
3164 &rec, &index, &ref_leaf_bh);
3165 if (ret) {
3166 mlog_errno(ret);
3167 goto out_commit;
3168 }
3169
3170 BUG_ON(!rec.r_refcount);
3171 set_len = min((u64)p_cluster + num_clusters,
3172 le64_to_cpu(rec.r_cpos) +
3173 le32_to_cpu(rec.r_clusters)) - p_cluster;
3174
3175 /*
3176 * There are many different situation here.
3177 * 1. If refcount == 1, remove the flag and don't COW.
3178 * 2. If refcount > 1, allocate clusters.
3179 * Here we may not allocate r_len once at a time, so continue
3180 * until we reach num_clusters.
3181 */
3182 if (le32_to_cpu(rec.r_refcount) == 1) {
3183 delete = 0;
3184 ret = ocfs2_clear_ext_refcount(handle,
3185 &context->data_et,
3186 cpos, p_cluster,
3187 set_len, e_flags,
3188 context->meta_ac,
3189 &context->dealloc);
3190 if (ret) {
3191 mlog_errno(ret);
3192 goto out_commit;
3193 }
3194 } else {
3195 delete = 1;
3196
3197 ret = __ocfs2_claim_clusters(osb, handle,
3198 context->data_ac,
3199 1, set_len,
3200 &new_bit, &new_len);
3201 if (ret) {
3202 mlog_errno(ret);
3203 goto out_commit;
3204 }
3205
3206 ret = ocfs2_replace_clusters(handle, context,
3207 cpos, p_cluster, new_bit,
3208 new_len, e_flags);
3209 if (ret) {
3210 mlog_errno(ret);
3211 goto out_commit;
3212 }
3213 set_len = new_len;
3214 }
3215
3216 ret = __ocfs2_decrease_refcount(handle, ref_ci,
3217 context->ref_root_bh,
3218 p_cluster, set_len,
3219 context->meta_ac,
3220 &context->dealloc, delete);
3221 if (ret) {
3222 mlog_errno(ret);
3223 goto out_commit;
3224 }
3225
3226 cpos += set_len;
3227 p_cluster += set_len;
3228 num_clusters -= set_len;
3229 brelse(ref_leaf_bh);
3230 ref_leaf_bh = NULL;
3231 }
3232
3233 /* handle any post_cow action. */
3234 if (context->post_refcount && context->post_refcount->func) {
3235 ret = context->post_refcount->func(context->inode, handle,
3236 context->post_refcount->para);
3237 if (ret) {
3238 mlog_errno(ret);
3239 goto out_commit;
3240 }
3241 }
3242
3243 /*
3244 * Here we should write the new page out first if we are
3245 * in write-back mode.
3246 */
3247 if (context->get_clusters == ocfs2_di_get_clusters) {
3248 ret = ocfs2_cow_sync_writeback(sb, context, cpos, num_clusters);
3249 if (ret)
3250 mlog_errno(ret);
3251 }
3252
3253out_commit:
3254 ocfs2_commit_trans(osb, handle);
3255
3256out:
3257 if (context->data_ac) {
3258 ocfs2_free_alloc_context(context->data_ac);
3259 context->data_ac = NULL;
3260 }
3261 if (context->meta_ac) {
3262 ocfs2_free_alloc_context(context->meta_ac);
3263 context->meta_ac = NULL;
3264 }
3265 brelse(ref_leaf_bh);
3266
3267 return ret;
3268}
3269
3270static int ocfs2_replace_cow(struct ocfs2_cow_context *context)
3271{
3272 int ret = 0;
3273 struct inode *inode = context->inode;
3274 u32 cow_start = context->cow_start, cow_len = context->cow_len;
3275 u32 p_cluster, num_clusters;
3276 unsigned int ext_flags;
3277 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3278
3279 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) {
3280 ocfs2_error(inode->i_sb, "Inode %lu want to use refcount "
3281 "tree, but the feature bit is not set in the "
3282 "super block.", inode->i_ino);
3283 return -EROFS;
3284 }
3285
3286 ocfs2_init_dealloc_ctxt(&context->dealloc);
3287
3288 while (cow_len) {
3289 ret = context->get_clusters(context, cow_start, &p_cluster,
3290 &num_clusters, &ext_flags);
3291 if (ret) {
3292 mlog_errno(ret);
3293 break;
3294 }
3295
3296 BUG_ON(!(ext_flags & OCFS2_EXT_REFCOUNTED));
3297
3298 if (cow_len < num_clusters)
3299 num_clusters = cow_len;
3300
3301 ret = ocfs2_make_clusters_writable(inode->i_sb, context,
3302 cow_start, p_cluster,
3303 num_clusters, ext_flags);
3304 if (ret) {
3305 mlog_errno(ret);
3306 break;
3307 }
3308
3309 cow_len -= num_clusters;
3310 cow_start += num_clusters;
3311 }
3312
3313 if (ocfs2_dealloc_has_cluster(&context->dealloc)) {
3314 ocfs2_schedule_truncate_log_flush(osb, 1);
3315 ocfs2_run_deallocs(osb, &context->dealloc);
3316 }
3317
3318 return ret;
3319}
3320
3321/*
3322 * Starting at cpos, try to CoW write_len clusters. Don't CoW
3323 * past max_cpos. This will stop when it runs into a hole or an
3324 * unrefcounted extent.
3325 */
3326static int ocfs2_refcount_cow_hunk(struct inode *inode,
3327 struct buffer_head *di_bh,
3328 u32 cpos, u32 write_len, u32 max_cpos)
3329{
3330 int ret;
3331 u32 cow_start = 0, cow_len = 0;
3332 struct ocfs2_inode_info *oi = OCFS2_I(inode);
3333 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3334 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
3335 struct buffer_head *ref_root_bh = NULL;
3336 struct ocfs2_refcount_tree *ref_tree;
3337 struct ocfs2_cow_context *context = NULL;
3338
3339 BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
3340
3341 ret = ocfs2_refcount_cal_cow_clusters(inode, &di->id2.i_list,
3342 cpos, write_len, max_cpos,
3343 &cow_start, &cow_len);
3344 if (ret) {
3345 mlog_errno(ret);
3346 goto out;
3347 }
3348
3349 mlog(0, "CoW inode %lu, cpos %u, write_len %u, cow_start %u, "
3350 "cow_len %u\n", inode->i_ino,
3351 cpos, write_len, cow_start, cow_len);
3352
3353 BUG_ON(cow_len == 0);
3354
3355 context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS);
3356 if (!context) {
3357 ret = -ENOMEM;
3358 mlog_errno(ret);
3359 goto out;
3360 }
3361
3362 ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
3363 1, &ref_tree, &ref_root_bh);
3364 if (ret) {
3365 mlog_errno(ret);
3366 goto out;
3367 }
3368
3369 context->inode = inode;
3370 context->cow_start = cow_start;
3371 context->cow_len = cow_len;
3372 context->ref_tree = ref_tree;
3373 context->ref_root_bh = ref_root_bh;
3374 context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page;
3375 context->get_clusters = ocfs2_di_get_clusters;
3376
3377 ocfs2_init_dinode_extent_tree(&context->data_et,
3378 INODE_CACHE(inode), di_bh);
3379
3380 ret = ocfs2_replace_cow(context);
3381 if (ret)
3382 mlog_errno(ret);
3383
3384 /*
3385 * truncate the extent map here since no matter whether we meet with
3386 * any error during the action, we shouldn't trust cached extent map
3387 * any more.
3388 */
3389 ocfs2_extent_map_trunc(inode, cow_start);
3390
3391 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
3392 brelse(ref_root_bh);
3393out:
3394 kfree(context);
3395 return ret;
3396}
3397
3398/*
3399 * CoW any and all clusters between cpos and cpos+write_len.
3400 * Don't CoW past max_cpos. If this returns successfully, all
3401 * clusters between cpos and cpos+write_len are safe to modify.
3402 */
3403int ocfs2_refcount_cow(struct inode *inode,
3404 struct buffer_head *di_bh,
3405 u32 cpos, u32 write_len, u32 max_cpos)
3406{
3407 int ret = 0;
3408 u32 p_cluster, num_clusters;
3409 unsigned int ext_flags;
3410
3411 while (write_len) {
3412 ret = ocfs2_get_clusters(inode, cpos, &p_cluster,
3413 &num_clusters, &ext_flags);
3414 if (ret) {
3415 mlog_errno(ret);
3416 break;
3417 }
3418
3419 if (write_len < num_clusters)
3420 num_clusters = write_len;
3421
3422 if (ext_flags & OCFS2_EXT_REFCOUNTED) {
3423 ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos,
3424 num_clusters, max_cpos);
3425 if (ret) {
3426 mlog_errno(ret);
3427 break;
3428 }
3429 }
3430
3431 write_len -= num_clusters;
3432 cpos += num_clusters;
3433 }
3434
3435 return ret;
3436}
3437
3438static int ocfs2_xattr_value_get_clusters(struct ocfs2_cow_context *context,
3439 u32 v_cluster, u32 *p_cluster,
3440 u32 *num_clusters,
3441 unsigned int *extent_flags)
3442{
3443 struct inode *inode = context->inode;
3444 struct ocfs2_xattr_value_root *xv = context->cow_object;
3445
3446 return ocfs2_xattr_get_clusters(inode, v_cluster, p_cluster,
3447 num_clusters, &xv->xr_list,
3448 extent_flags);
3449}
3450
3451/*
3452 * Given a xattr value root, calculate the most meta/credits we need for
3453 * refcount tree change if we truncate it to 0.
3454 */
3455int ocfs2_refcounted_xattr_delete_need(struct inode *inode,
3456 struct ocfs2_caching_info *ref_ci,
3457 struct buffer_head *ref_root_bh,
3458 struct ocfs2_xattr_value_root *xv,
3459 int *meta_add, int *credits)
3460{
3461 int ret = 0, index, ref_blocks = 0;
3462 u32 p_cluster, num_clusters;
3463 u32 cpos = 0, clusters = le32_to_cpu(xv->xr_clusters);
3464 struct ocfs2_refcount_block *rb;
3465 struct ocfs2_refcount_rec rec;
3466 struct buffer_head *ref_leaf_bh = NULL;
3467
3468 while (cpos < clusters) {
3469 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
3470 &num_clusters, &xv->xr_list,
3471 NULL);
3472 if (ret) {
3473 mlog_errno(ret);
3474 goto out;
3475 }
3476
3477 cpos += num_clusters;
3478
3479 while (num_clusters) {
3480 ret = ocfs2_get_refcount_rec(ref_ci, ref_root_bh,
3481 p_cluster, num_clusters,
3482 &rec, &index,
3483 &ref_leaf_bh);
3484 if (ret) {
3485 mlog_errno(ret);
3486 goto out;
3487 }
3488
3489 BUG_ON(!rec.r_refcount);
3490
3491 rb = (struct ocfs2_refcount_block *)ref_leaf_bh->b_data;
3492
3493 /*
3494 * We really don't know whether the other clusters is in
3495 * this refcount block or not, so just take the worst
3496 * case that all the clusters are in this block and each
3497 * one will split a refcount rec, so totally we need
3498 * clusters * 2 new refcount rec.
3499 */
3500 if (le64_to_cpu(rb->rf_records.rl_used) + clusters * 2 >
3501 le16_to_cpu(rb->rf_records.rl_count))
3502 ref_blocks++;
3503
3504 *credits += 1;
3505 brelse(ref_leaf_bh);
3506 ref_leaf_bh = NULL;
3507
3508 if (num_clusters <= le32_to_cpu(rec.r_clusters))
3509 break;
3510 else
3511 num_clusters -= le32_to_cpu(rec.r_clusters);
3512 p_cluster += num_clusters;
3513 }
3514 }
3515
3516 *meta_add += ref_blocks;
3517 if (!ref_blocks)
3518 goto out;
3519
3520 rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
3521 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
3522 *credits += OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
3523 else {
3524 struct ocfs2_extent_tree et;
3525
3526 ocfs2_init_refcount_extent_tree(&et, ref_ci, ref_root_bh);
3527 *credits += ocfs2_calc_extend_credits(inode->i_sb,
3528 et.et_root_el,
3529 ref_blocks);
3530 }
3531
3532out:
3533 brelse(ref_leaf_bh);
3534 return ret;
3535}
3536
3537/*
3538 * Do CoW for xattr.
3539 */
3540int ocfs2_refcount_cow_xattr(struct inode *inode,
3541 struct ocfs2_dinode *di,
3542 struct ocfs2_xattr_value_buf *vb,
3543 struct ocfs2_refcount_tree *ref_tree,
3544 struct buffer_head *ref_root_bh,
3545 u32 cpos, u32 write_len,
3546 struct ocfs2_post_refcount *post)
3547{
3548 int ret;
3549 struct ocfs2_xattr_value_root *xv = vb->vb_xv;
3550 struct ocfs2_inode_info *oi = OCFS2_I(inode);
3551 struct ocfs2_cow_context *context = NULL;
3552 u32 cow_start, cow_len;
3553
3554 BUG_ON(!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
3555
3556 ret = ocfs2_refcount_cal_cow_clusters(inode, &xv->xr_list,
3557 cpos, write_len, UINT_MAX,
3558 &cow_start, &cow_len);
3559 if (ret) {
3560 mlog_errno(ret);
3561 goto out;
3562 }
3563
3564 BUG_ON(cow_len == 0);
3565
3566 context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS);
3567 if (!context) {
3568 ret = -ENOMEM;
3569 mlog_errno(ret);
3570 goto out;
3571 }
3572
3573 context->inode = inode;
3574 context->cow_start = cow_start;
3575 context->cow_len = cow_len;
3576 context->ref_tree = ref_tree;
3577 context->ref_root_bh = ref_root_bh;;
3578 context->cow_object = xv;
3579
3580 context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_jbd;
3581 /* We need the extra credits for duplicate_clusters by jbd. */
3582 context->extra_credits =
3583 ocfs2_clusters_to_blocks(inode->i_sb, 1) * cow_len;
3584 context->get_clusters = ocfs2_xattr_value_get_clusters;
3585 context->post_refcount = post;
3586
3587 ocfs2_init_xattr_value_extent_tree(&context->data_et,
3588 INODE_CACHE(inode), vb);
3589
3590 ret = ocfs2_replace_cow(context);
3591 if (ret)
3592 mlog_errno(ret);
3593
3594out:
3595 kfree(context);
3596 return ret;
3597}
3598
3599/*
3600 * Insert a new extent into refcount tree and mark a extent rec
3601 * as refcounted in the dinode tree.
3602 */
3603int ocfs2_add_refcount_flag(struct inode *inode,
3604 struct ocfs2_extent_tree *data_et,
3605 struct ocfs2_caching_info *ref_ci,
3606 struct buffer_head *ref_root_bh,
3607 u32 cpos, u32 p_cluster, u32 num_clusters,
3608 struct ocfs2_cached_dealloc_ctxt *dealloc,
3609 struct ocfs2_post_refcount *post)
3610{
3611 int ret;
3612 handle_t *handle;
3613 int credits = 1, ref_blocks = 0;
3614 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3615 struct ocfs2_alloc_context *meta_ac = NULL;
3616
3617 ret = ocfs2_calc_refcount_meta_credits(inode->i_sb,
3618 ref_ci, ref_root_bh,
3619 p_cluster, num_clusters,
3620 &ref_blocks, &credits);
3621 if (ret) {
3622 mlog_errno(ret);
3623 goto out;
3624 }
3625
3626 mlog(0, "reserve new metadata %d, credits = %d\n",
3627 ref_blocks, credits);
3628
3629 if (ref_blocks) {
3630 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
3631 ref_blocks, &meta_ac);
3632 if (ret) {
3633 mlog_errno(ret);
3634 goto out;
3635 }
3636 }
3637
3638 if (post)
3639 credits += post->credits;
3640
3641 handle = ocfs2_start_trans(osb, credits);
3642 if (IS_ERR(handle)) {
3643 ret = PTR_ERR(handle);
3644 mlog_errno(ret);
3645 goto out;
3646 }
3647
3648 ret = ocfs2_mark_extent_refcounted(inode, data_et, handle,
3649 cpos, num_clusters, p_cluster,
3650 meta_ac, dealloc);
3651 if (ret) {
3652 mlog_errno(ret);
3653 goto out_commit;
3654 }
3655
3656 ret = __ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
3657 p_cluster, num_clusters, 0,
3658 meta_ac, dealloc);
3659 if (ret) {
3660 mlog_errno(ret);
3661 goto out_commit;
3662 }
3663
3664 if (post && post->func) {
3665 ret = post->func(inode, handle, post->para);
3666 if (ret)
3667 mlog_errno(ret);
3668 }
3669
3670out_commit:
3671 ocfs2_commit_trans(osb, handle);
3672out:
3673 if (meta_ac)
3674 ocfs2_free_alloc_context(meta_ac);
3675 return ret;
3676}
3677
3678static int ocfs2_change_ctime(struct inode *inode,
3679 struct buffer_head *di_bh)
3680{
3681 int ret;
3682 handle_t *handle;
3683 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
3684
3685 handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb),
3686 OCFS2_INODE_UPDATE_CREDITS);
3687 if (IS_ERR(handle)) {
3688 ret = PTR_ERR(handle);
3689 mlog_errno(ret);
3690 goto out;
3691 }
3692
3693 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
3694 OCFS2_JOURNAL_ACCESS_WRITE);
3695 if (ret) {
3696 mlog_errno(ret);
3697 goto out_commit;
3698 }
3699
3700 inode->i_ctime = CURRENT_TIME;
3701 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
3702 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
3703
3704 ocfs2_journal_dirty(handle, di_bh);
3705
3706out_commit:
3707 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
3708out:
3709 return ret;
3710}
3711
3712static int ocfs2_attach_refcount_tree(struct inode *inode,
3713 struct buffer_head *di_bh)
3714{
3715 int ret, data_changed = 0;
3716 struct buffer_head *ref_root_bh = NULL;
3717 struct ocfs2_inode_info *oi = OCFS2_I(inode);
3718 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
3719 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3720 struct ocfs2_refcount_tree *ref_tree;
3721 unsigned int ext_flags;
3722 loff_t size;
3723 u32 cpos, num_clusters, clusters, p_cluster;
3724 struct ocfs2_cached_dealloc_ctxt dealloc;
3725 struct ocfs2_extent_tree di_et;
3726
3727 ocfs2_init_dealloc_ctxt(&dealloc);
3728
3729 if (!(oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)) {
3730 ret = ocfs2_create_refcount_tree(inode, di_bh);
3731 if (ret) {
3732 mlog_errno(ret);
3733 goto out;
3734 }
3735 }
3736
3737 BUG_ON(!di->i_refcount_loc);
3738 ret = ocfs2_lock_refcount_tree(osb,
3739 le64_to_cpu(di->i_refcount_loc), 1,
3740 &ref_tree, &ref_root_bh);
3741 if (ret) {
3742 mlog_errno(ret);
3743 goto out;
3744 }
3745
3746 ocfs2_init_dinode_extent_tree(&di_et, INODE_CACHE(inode), di_bh);
3747
3748 size = i_size_read(inode);
3749 clusters = ocfs2_clusters_for_bytes(inode->i_sb, size);
3750
3751 cpos = 0;
3752 while (cpos < clusters) {
3753 ret = ocfs2_get_clusters(inode, cpos, &p_cluster,
3754 &num_clusters, &ext_flags);
3755
3756 if (p_cluster && !(ext_flags & OCFS2_EXT_REFCOUNTED)) {
3757 ret = ocfs2_add_refcount_flag(inode, &di_et,
3758 &ref_tree->rf_ci,
3759 ref_root_bh, cpos,
3760 p_cluster, num_clusters,
3761 &dealloc, NULL);
3762 if (ret) {
3763 mlog_errno(ret);
3764 goto unlock;
3765 }
3766
3767 data_changed = 1;
3768 }
3769 cpos += num_clusters;
3770 }
3771
3772 if (oi->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
3773 ret = ocfs2_xattr_attach_refcount_tree(inode, di_bh,
3774 &ref_tree->rf_ci,
3775 ref_root_bh,
3776 &dealloc);
3777 if (ret) {
3778 mlog_errno(ret);
3779 goto unlock;
3780 }
3781 }
3782
3783 if (data_changed) {
3784 ret = ocfs2_change_ctime(inode, di_bh);
3785 if (ret)
3786 mlog_errno(ret);
3787 }
3788
3789unlock:
3790 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
3791 brelse(ref_root_bh);
3792
3793 if (!ret && ocfs2_dealloc_has_cluster(&dealloc)) {
3794 ocfs2_schedule_truncate_log_flush(osb, 1);
3795 ocfs2_run_deallocs(osb, &dealloc);
3796 }
3797out:
3798 /*
3799 * Empty the extent map so that we may get the right extent
3800 * record from the disk.
3801 */
3802 ocfs2_extent_map_trunc(inode, 0);
3803
3804 return ret;
3805}
3806
3807static int ocfs2_add_refcounted_extent(struct inode *inode,
3808 struct ocfs2_extent_tree *et,
3809 struct ocfs2_caching_info *ref_ci,
3810 struct buffer_head *ref_root_bh,
3811 u32 cpos, u32 p_cluster, u32 num_clusters,
3812 unsigned int ext_flags,
3813 struct ocfs2_cached_dealloc_ctxt *dealloc)
3814{
3815 int ret;
3816 handle_t *handle;
3817 int credits = 0;
3818 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3819 struct ocfs2_alloc_context *meta_ac = NULL;
3820
3821 ret = ocfs2_lock_refcount_allocators(inode->i_sb,
3822 p_cluster, num_clusters,
3823 et, ref_ci,
3824 ref_root_bh, &meta_ac,
3825 NULL, &credits);
3826 if (ret) {
3827 mlog_errno(ret);
3828 goto out;
3829 }
3830
3831 handle = ocfs2_start_trans(osb, credits);
3832 if (IS_ERR(handle)) {
3833 ret = PTR_ERR(handle);
3834 mlog_errno(ret);
3835 goto out;
3836 }
3837
3838 ret = ocfs2_insert_extent(handle, et, cpos,
3839 cpu_to_le64(ocfs2_clusters_to_blocks(inode->i_sb,
3840 p_cluster)),
3841 num_clusters, ext_flags, meta_ac);
3842 if (ret) {
3843 mlog_errno(ret);
3844 goto out_commit;
3845 }
3846
3847 ret = ocfs2_increase_refcount(handle, ref_ci, ref_root_bh,
3848 p_cluster, num_clusters,
3849 meta_ac, dealloc);
3850 if (ret)
3851 mlog_errno(ret);
3852
3853out_commit:
3854 ocfs2_commit_trans(osb, handle);
3855out:
3856 if (meta_ac)
3857 ocfs2_free_alloc_context(meta_ac);
3858 return ret;
3859}
3860
3861static int ocfs2_duplicate_extent_list(struct inode *s_inode,
3862 struct inode *t_inode,
3863 struct buffer_head *t_bh,
3864 struct ocfs2_caching_info *ref_ci,
3865 struct buffer_head *ref_root_bh,
3866 struct ocfs2_cached_dealloc_ctxt *dealloc)
3867{
3868 int ret = 0;
3869 u32 p_cluster, num_clusters, clusters, cpos;
3870 loff_t size;
3871 unsigned int ext_flags;
3872 struct ocfs2_extent_tree et;
3873
3874 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(t_inode), t_bh);
3875
3876 size = i_size_read(s_inode);
3877 clusters = ocfs2_clusters_for_bytes(s_inode->i_sb, size);
3878
3879 cpos = 0;
3880 while (cpos < clusters) {
3881 ret = ocfs2_get_clusters(s_inode, cpos, &p_cluster,
3882 &num_clusters, &ext_flags);
3883
3884 if (p_cluster) {
3885 ret = ocfs2_add_refcounted_extent(t_inode, &et,
3886 ref_ci, ref_root_bh,
3887 cpos, p_cluster,
3888 num_clusters,
3889 ext_flags,
3890 dealloc);
3891 if (ret) {
3892 mlog_errno(ret);
3893 goto out;
3894 }
3895 }
3896
3897 cpos += num_clusters;
3898 }
3899
3900out:
3901 return ret;
3902}
3903
3904/*
3905 * change the new file's attributes to the src.
3906 *
3907 * reflink creates a snapshot of a file, that means the attributes
3908 * must be identical except for three exceptions - nlink, ino, and ctime.
3909 */
3910static int ocfs2_complete_reflink(struct inode *s_inode,
3911 struct buffer_head *s_bh,
3912 struct inode *t_inode,
3913 struct buffer_head *t_bh,
3914 bool preserve)
3915{
3916 int ret;
3917 handle_t *handle;
3918 struct ocfs2_dinode *s_di = (struct ocfs2_dinode *)s_bh->b_data;
3919 struct ocfs2_dinode *di = (struct ocfs2_dinode *)t_bh->b_data;
3920 loff_t size = i_size_read(s_inode);
3921
3922 handle = ocfs2_start_trans(OCFS2_SB(t_inode->i_sb),
3923 OCFS2_INODE_UPDATE_CREDITS);
3924 if (IS_ERR(handle)) {
3925 ret = PTR_ERR(handle);
3926 mlog_errno(ret);
3927 return ret;
3928 }
3929
3930 ret = ocfs2_journal_access_di(handle, INODE_CACHE(t_inode), t_bh,
3931 OCFS2_JOURNAL_ACCESS_WRITE);
3932 if (ret) {
3933 mlog_errno(ret);
3934 goto out_commit;
3935 }
3936
3937 spin_lock(&OCFS2_I(t_inode)->ip_lock);
3938 OCFS2_I(t_inode)->ip_clusters = OCFS2_I(s_inode)->ip_clusters;
3939 OCFS2_I(t_inode)->ip_attr = OCFS2_I(s_inode)->ip_attr;
3940 OCFS2_I(t_inode)->ip_dyn_features = OCFS2_I(s_inode)->ip_dyn_features;
3941 spin_unlock(&OCFS2_I(t_inode)->ip_lock);
3942 i_size_write(t_inode, size);
3943
3944 di->i_xattr_inline_size = s_di->i_xattr_inline_size;
3945 di->i_clusters = s_di->i_clusters;
3946 di->i_size = s_di->i_size;
3947 di->i_dyn_features = s_di->i_dyn_features;
3948 di->i_attr = s_di->i_attr;
3949
3950 if (preserve) {
3951 di->i_uid = s_di->i_uid;
3952 di->i_gid = s_di->i_gid;
3953 di->i_mode = s_di->i_mode;
3954
3955 /*
3956 * update time.
3957 * we want mtime to appear identical to the source and
3958 * update ctime.
3959 */
3960 t_inode->i_ctime = CURRENT_TIME;
3961
3962 di->i_ctime = cpu_to_le64(t_inode->i_ctime.tv_sec);
3963 di->i_ctime_nsec = cpu_to_le32(t_inode->i_ctime.tv_nsec);
3964
3965 t_inode->i_mtime = s_inode->i_mtime;
3966 di->i_mtime = s_di->i_mtime;
3967 di->i_mtime_nsec = s_di->i_mtime_nsec;
3968 }
3969
3970 ocfs2_journal_dirty(handle, t_bh);
3971
3972out_commit:
3973 ocfs2_commit_trans(OCFS2_SB(t_inode->i_sb), handle);
3974 return ret;
3975}
3976
3977static int ocfs2_create_reflink_node(struct inode *s_inode,
3978 struct buffer_head *s_bh,
3979 struct inode *t_inode,
3980 struct buffer_head *t_bh,
3981 bool preserve)
3982{
3983 int ret;
3984 struct buffer_head *ref_root_bh = NULL;
3985 struct ocfs2_cached_dealloc_ctxt dealloc;
3986 struct ocfs2_super *osb = OCFS2_SB(s_inode->i_sb);
3987 struct ocfs2_refcount_block *rb;
3988 struct ocfs2_dinode *di = (struct ocfs2_dinode *)s_bh->b_data;
3989 struct ocfs2_refcount_tree *ref_tree;
3990
3991 ocfs2_init_dealloc_ctxt(&dealloc);
3992
3993 ret = ocfs2_set_refcount_tree(t_inode, t_bh,
3994 le64_to_cpu(di->i_refcount_loc));
3995 if (ret) {
3996 mlog_errno(ret);
3997 goto out;
3998 }
3999
4000 ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
4001 1, &ref_tree, &ref_root_bh);
4002 if (ret) {
4003 mlog_errno(ret);
4004 goto out;
4005 }
4006 rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
4007
4008 ret = ocfs2_duplicate_extent_list(s_inode, t_inode, t_bh,
4009 &ref_tree->rf_ci, ref_root_bh,
4010 &dealloc);
4011 if (ret) {
4012 mlog_errno(ret);
4013 goto out_unlock_refcount;
4014 }
4015
4016 ret = ocfs2_complete_reflink(s_inode, s_bh, t_inode, t_bh, preserve);
4017 if (ret)
4018 mlog_errno(ret);
4019
4020out_unlock_refcount:
4021 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
4022 brelse(ref_root_bh);
4023out:
4024 if (ocfs2_dealloc_has_cluster(&dealloc)) {
4025 ocfs2_schedule_truncate_log_flush(osb, 1);
4026 ocfs2_run_deallocs(osb, &dealloc);
4027 }
4028
4029 return ret;
4030}
4031
4032static int __ocfs2_reflink(struct dentry *old_dentry,
4033 struct buffer_head *old_bh,
4034 struct inode *new_inode,
4035 bool preserve)
4036{
4037 int ret;
4038 struct inode *inode = old_dentry->d_inode;
4039 struct buffer_head *new_bh = NULL;
4040
4041 ret = filemap_fdatawrite(inode->i_mapping);
4042 if (ret) {
4043 mlog_errno(ret);
4044 goto out;
4045 }
4046
4047 ret = ocfs2_attach_refcount_tree(inode, old_bh);
4048 if (ret) {
4049 mlog_errno(ret);
4050 goto out;
4051 }
4052
4053 mutex_lock(&new_inode->i_mutex);
4054 ret = ocfs2_inode_lock(new_inode, &new_bh, 1);
4055 if (ret) {
4056 mlog_errno(ret);
4057 goto out_unlock;
4058 }
4059
4060 ret = ocfs2_create_reflink_node(inode, old_bh,
4061 new_inode, new_bh, preserve);
4062 if (ret) {
4063 mlog_errno(ret);
4064 goto inode_unlock;
4065 }
4066
4067 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_XATTR_FL) {
4068 ret = ocfs2_reflink_xattrs(inode, old_bh,
4069 new_inode, new_bh,
4070 preserve);
4071 if (ret)
4072 mlog_errno(ret);
4073 }
4074inode_unlock:
4075 ocfs2_inode_unlock(new_inode, 1);
4076 brelse(new_bh);
4077out_unlock:
4078 mutex_unlock(&new_inode->i_mutex);
4079out:
4080 if (!ret) {
4081 ret = filemap_fdatawait(inode->i_mapping);
4082 if (ret)
4083 mlog_errno(ret);
4084 }
4085 return ret;
4086}
4087
4088static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
4089 struct dentry *new_dentry, bool preserve)
4090{
4091 int error;
4092 struct inode *inode = old_dentry->d_inode;
4093 struct buffer_head *old_bh = NULL;
4094 struct inode *new_orphan_inode = NULL;
4095
4096 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
4097 return -EOPNOTSUPP;
4098
4099 error = ocfs2_create_inode_in_orphan(dir, inode->i_mode,
4100 &new_orphan_inode);
4101 if (error) {
4102 mlog_errno(error);
4103 goto out;
4104 }
4105
4106 error = ocfs2_inode_lock(inode, &old_bh, 1);
4107 if (error) {
4108 mlog_errno(error);
4109 goto out;
4110 }
4111
4112 down_write(&OCFS2_I(inode)->ip_xattr_sem);
4113 down_write(&OCFS2_I(inode)->ip_alloc_sem);
4114 error = __ocfs2_reflink(old_dentry, old_bh,
4115 new_orphan_inode, preserve);
4116 up_write(&OCFS2_I(inode)->ip_alloc_sem);
4117 up_write(&OCFS2_I(inode)->ip_xattr_sem);
4118
4119 ocfs2_inode_unlock(inode, 1);
4120 brelse(old_bh);
4121
4122 if (error) {
4123 mlog_errno(error);
4124 goto out;
4125 }
4126
4127 /* If the security isn't preserved, we need to re-initialize them. */
4128 if (!preserve) {
4129 error = ocfs2_init_security_and_acl(dir, new_orphan_inode);
4130 if (error)
4131 mlog_errno(error);
4132 }
4133out:
4134 if (!error) {
4135 error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode,
4136 new_dentry);
4137 if (error)
4138 mlog_errno(error);
4139 }
4140
4141 if (new_orphan_inode) {
4142 /*
4143 * We need to open_unlock the inode no matter whether we
4144 * succeed or not, so that other nodes can delete it later.
4145 */
4146 ocfs2_open_unlock(new_orphan_inode);
4147 if (error)
4148 iput(new_orphan_inode);
4149 }
4150
4151 return error;
4152}
4153
4154/*
4155 * Below here are the bits used by OCFS2_IOC_REFLINK() to fake
4156 * sys_reflink(). This will go away when vfs_reflink() exists in
4157 * fs/namei.c.
4158 */
4159
4160/* copied from may_create in VFS. */
4161static inline int ocfs2_may_create(struct inode *dir, struct dentry *child)
4162{
4163 if (child->d_inode)
4164 return -EEXIST;
4165 if (IS_DEADDIR(dir))
4166 return -ENOENT;
4167 return inode_permission(dir, MAY_WRITE | MAY_EXEC);
4168}
4169
4170/* copied from user_path_parent. */
4171static int ocfs2_user_path_parent(const char __user *path,
4172 struct nameidata *nd, char **name)
4173{
4174 char *s = getname(path);
4175 int error;
4176
4177 if (IS_ERR(s))
4178 return PTR_ERR(s);
4179
4180 error = path_lookup(s, LOOKUP_PARENT, nd);
4181 if (error)
4182 putname(s);
4183 else
4184 *name = s;
4185
4186 return error;
4187}
4188
4189/**
4190 * ocfs2_vfs_reflink - Create a reference-counted link
4191 *
4192 * @old_dentry: source dentry + inode
4193 * @dir: directory to create the target
4194 * @new_dentry: target dentry
4195 * @preserve: if true, preserve all file attributes
4196 */
4197int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
4198 struct dentry *new_dentry, bool preserve)
4199{
4200 struct inode *inode = old_dentry->d_inode;
4201 int error;
4202
4203 if (!inode)
4204 return -ENOENT;
4205
4206 error = ocfs2_may_create(dir, new_dentry);
4207 if (error)
4208 return error;
4209
4210 if (dir->i_sb != inode->i_sb)
4211 return -EXDEV;
4212
4213 /*
4214 * A reflink to an append-only or immutable file cannot be created.
4215 */
4216 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
4217 return -EPERM;
4218
4219 /* Only regular files can be reflinked. */
4220 if (!S_ISREG(inode->i_mode))
4221 return -EPERM;
4222
4223 /*
4224 * If the caller wants to preserve ownership, they require the
4225 * rights to do so.
4226 */
4227 if (preserve) {
4228 if ((current_fsuid() != inode->i_uid) && !capable(CAP_CHOWN))
4229 return -EPERM;
4230 if (!in_group_p(inode->i_gid) && !capable(CAP_CHOWN))
4231 return -EPERM;
4232 }
4233
4234 /*
4235 * If the caller is modifying any aspect of the attributes, they
4236 * are not creating a snapshot. They need read permission on the
4237 * file.
4238 */
4239 if (!preserve) {
4240 error = inode_permission(inode, MAY_READ);
4241 if (error)
4242 return error;
4243 }
4244
4245 mutex_lock(&inode->i_mutex);
4246 vfs_dq_init(dir);
4247 error = ocfs2_reflink(old_dentry, dir, new_dentry, preserve);
4248 mutex_unlock(&inode->i_mutex);
4249 if (!error)
4250 fsnotify_create(dir, new_dentry);
4251 return error;
4252}
4253/*
4254 * Most codes are copied from sys_linkat.
4255 */
4256int ocfs2_reflink_ioctl(struct inode *inode,
4257 const char __user *oldname,
4258 const char __user *newname,
4259 bool preserve)
4260{
4261 struct dentry *new_dentry;
4262 struct nameidata nd;
4263 struct path old_path;
4264 int error;
4265 char *to = NULL;
4266
4267 if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
4268 return -EOPNOTSUPP;
4269
4270 error = user_path_at(AT_FDCWD, oldname, 0, &old_path);
4271 if (error) {
4272 mlog_errno(error);
4273 return error;
4274 }
4275
4276 error = ocfs2_user_path_parent(newname, &nd, &to);
4277 if (error) {
4278 mlog_errno(error);
4279 goto out;
4280 }
4281
4282 error = -EXDEV;
4283 if (old_path.mnt != nd.path.mnt)
4284 goto out_release;
4285 new_dentry = lookup_create(&nd, 0);
4286 error = PTR_ERR(new_dentry);
4287 if (IS_ERR(new_dentry)) {
4288 mlog_errno(error);
4289 goto out_unlock;
4290 }
4291
4292 error = mnt_want_write(nd.path.mnt);
4293 if (error) {
4294 mlog_errno(error);
4295 goto out_dput;
4296 }
4297
4298 error = ocfs2_vfs_reflink(old_path.dentry,
4299 nd.path.dentry->d_inode,
4300 new_dentry, preserve);
4301 mnt_drop_write(nd.path.mnt);
4302out_dput:
4303 dput(new_dentry);
4304out_unlock:
4305 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
4306out_release:
4307 path_put(&nd.path);
4308 putname(to);
4309out:
4310 path_put(&old_path);
4311
4312 return error;
4313}
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
new file mode 100644
index 000000000000..c1d19b1d3ecc
--- /dev/null
+++ b/fs/ocfs2/refcounttree.h
@@ -0,0 +1,106 @@
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * refcounttree.h
5 *
6 * Copyright (C) 2009 Oracle. All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public
10 * License version 2 as published by the Free Software Foundation.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 */
17#ifndef OCFS2_REFCOUNTTREE_H
18#define OCFS2_REFCOUNTTREE_H
19
20struct ocfs2_refcount_tree {
21 struct rb_node rf_node;
22 u64 rf_blkno;
23 u32 rf_generation;
24 struct rw_semaphore rf_sem;
25 struct ocfs2_lock_res rf_lockres;
26 struct kref rf_getcnt;
27 int rf_removed;
28
29 /* the following 4 fields are used by caching_info. */
30 struct ocfs2_caching_info rf_ci;
31 spinlock_t rf_lock;
32 struct mutex rf_io_mutex;
33 struct super_block *rf_sb;
34};
35
36void ocfs2_purge_refcount_trees(struct ocfs2_super *osb);
37int ocfs2_lock_refcount_tree(struct ocfs2_super *osb, u64 ref_blkno, int rw,
38 struct ocfs2_refcount_tree **tree,
39 struct buffer_head **ref_bh);
40void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb,
41 struct ocfs2_refcount_tree *tree,
42 int rw);
43
44int ocfs2_decrease_refcount(struct inode *inode,
45 handle_t *handle, u32 cpos, u32 len,
46 struct ocfs2_alloc_context *meta_ac,
47 struct ocfs2_cached_dealloc_ctxt *dealloc,
48 int delete);
49int ocfs2_prepare_refcount_change_for_del(struct inode *inode,
50 struct buffer_head *di_bh,
51 u64 phys_blkno,
52 u32 clusters,
53 int *credits,
54 struct ocfs2_alloc_context **meta_ac);
55int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh,
56 u32 cpos, u32 write_len, u32 max_cpos);
57
58typedef int (ocfs2_post_refcount_func)(struct inode *inode,
59 handle_t *handle,
60 void *para);
61/*
62 * Some refcount caller need to do more work after we modify the data b-tree
63 * during refcount operation(including CoW and add refcount flag), and make the
64 * transaction complete. So it must give us this structure so that we can do it
65 * within our transaction.
66 *
67 */
68struct ocfs2_post_refcount {
69 int credits; /* credits it need for journal. */
70 ocfs2_post_refcount_func *func; /* real function. */
71 void *para;
72};
73
74int ocfs2_refcounted_xattr_delete_need(struct inode *inode,
75 struct ocfs2_caching_info *ref_ci,
76 struct buffer_head *ref_root_bh,
77 struct ocfs2_xattr_value_root *xv,
78 int *meta_add, int *credits);
79int ocfs2_refcount_cow_xattr(struct inode *inode,
80 struct ocfs2_dinode *di,
81 struct ocfs2_xattr_value_buf *vb,
82 struct ocfs2_refcount_tree *ref_tree,
83 struct buffer_head *ref_root_bh,
84 u32 cpos, u32 write_len,
85 struct ocfs2_post_refcount *post);
86int ocfs2_add_refcount_flag(struct inode *inode,
87 struct ocfs2_extent_tree *data_et,
88 struct ocfs2_caching_info *ref_ci,
89 struct buffer_head *ref_root_bh,
90 u32 cpos, u32 p_cluster, u32 num_clusters,
91 struct ocfs2_cached_dealloc_ctxt *dealloc,
92 struct ocfs2_post_refcount *post);
93int ocfs2_remove_refcount_tree(struct inode *inode, struct buffer_head *di_bh);
94int ocfs2_try_remove_refcount_tree(struct inode *inode,
95 struct buffer_head *di_bh);
96int ocfs2_increase_refcount(handle_t *handle,
97 struct ocfs2_caching_info *ci,
98 struct buffer_head *ref_root_bh,
99 u64 cpos, u32 len,
100 struct ocfs2_alloc_context *meta_ac,
101 struct ocfs2_cached_dealloc_ctxt *dealloc);
102int ocfs2_reflink_ioctl(struct inode *inode,
103 const char __user *oldname,
104 const char __user *newname,
105 bool preserve);
106#endif /* OCFS2_REFCOUNTTREE_H */
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index 424adaa5f900..3c3d673a4d20 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -106,8 +106,8 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
106 mlog_entry("(new_clusters=%d, first_new_cluster = %u)\n", 106 mlog_entry("(new_clusters=%d, first_new_cluster = %u)\n",
107 new_clusters, first_new_cluster); 107 new_clusters, first_new_cluster);
108 108
109 ret = ocfs2_journal_access_gd(handle, bm_inode, group_bh, 109 ret = ocfs2_journal_access_gd(handle, INODE_CACHE(bm_inode),
110 OCFS2_JOURNAL_ACCESS_WRITE); 110 group_bh, OCFS2_JOURNAL_ACCESS_WRITE);
111 if (ret < 0) { 111 if (ret < 0) {
112 mlog_errno(ret); 112 mlog_errno(ret);
113 goto out; 113 goto out;
@@ -141,7 +141,7 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle,
141 } 141 }
142 142
143 /* update the inode accordingly. */ 143 /* update the inode accordingly. */
144 ret = ocfs2_journal_access_di(handle, bm_inode, bm_bh, 144 ret = ocfs2_journal_access_di(handle, INODE_CACHE(bm_inode), bm_bh,
145 OCFS2_JOURNAL_ACCESS_WRITE); 145 OCFS2_JOURNAL_ACCESS_WRITE);
146 if (ret < 0) { 146 if (ret < 0) {
147 mlog_errno(ret); 147 mlog_errno(ret);
@@ -514,7 +514,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
514 goto out_unlock; 514 goto out_unlock;
515 } 515 }
516 516
517 ocfs2_set_new_buffer_uptodate(inode, group_bh); 517 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), group_bh);
518 518
519 ret = ocfs2_verify_group_and_input(main_bm_inode, fe, input, group_bh); 519 ret = ocfs2_verify_group_and_input(main_bm_inode, fe, input, group_bh);
520 if (ret) { 520 if (ret) {
@@ -536,8 +536,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
536 cl = &fe->id2.i_chain; 536 cl = &fe->id2.i_chain;
537 cr = &cl->cl_recs[input->chain]; 537 cr = &cl->cl_recs[input->chain];
538 538
539 ret = ocfs2_journal_access_gd(handle, main_bm_inode, group_bh, 539 ret = ocfs2_journal_access_gd(handle, INODE_CACHE(main_bm_inode),
540 OCFS2_JOURNAL_ACCESS_WRITE); 540 group_bh, OCFS2_JOURNAL_ACCESS_WRITE);
541 if (ret < 0) { 541 if (ret < 0) {
542 mlog_errno(ret); 542 mlog_errno(ret);
543 goto out_commit; 543 goto out_commit;
@@ -552,8 +552,8 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input)
552 goto out_commit; 552 goto out_commit;
553 } 553 }
554 554
555 ret = ocfs2_journal_access_di(handle, main_bm_inode, main_bm_bh, 555 ret = ocfs2_journal_access_di(handle, INODE_CACHE(main_bm_inode),
556 OCFS2_JOURNAL_ACCESS_WRITE); 556 main_bm_bh, OCFS2_JOURNAL_ACCESS_WRITE);
557 if (ret < 0) { 557 if (ret < 0) {
558 mlog_errno(ret); 558 mlog_errno(ret);
559 goto out_commit; 559 goto out_commit;
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c
index 40661e7824e9..bfbd7e9e949f 100644
--- a/fs/ocfs2/slot_map.c
+++ b/fs/ocfs2/slot_map.c
@@ -150,8 +150,8 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb)
150 * be !NULL. Thus, ocfs2_read_blocks() will ignore blocknr. If 150 * be !NULL. Thus, ocfs2_read_blocks() will ignore blocknr. If
151 * this is not true, the read of -1 (UINT64_MAX) will fail. 151 * this is not true, the read of -1 (UINT64_MAX) will fail.
152 */ 152 */
153 ret = ocfs2_read_blocks(si->si_inode, -1, si->si_blocks, si->si_bh, 153 ret = ocfs2_read_blocks(INODE_CACHE(si->si_inode), -1, si->si_blocks,
154 OCFS2_BH_IGNORE_CACHE, NULL); 154 si->si_bh, OCFS2_BH_IGNORE_CACHE, NULL);
155 if (ret == 0) { 155 if (ret == 0) {
156 spin_lock(&osb->osb_lock); 156 spin_lock(&osb->osb_lock);
157 ocfs2_update_slot_info(si); 157 ocfs2_update_slot_info(si);
@@ -213,7 +213,7 @@ static int ocfs2_update_disk_slot(struct ocfs2_super *osb,
213 ocfs2_update_disk_slot_old(si, slot_num, &bh); 213 ocfs2_update_disk_slot_old(si, slot_num, &bh);
214 spin_unlock(&osb->osb_lock); 214 spin_unlock(&osb->osb_lock);
215 215
216 status = ocfs2_write_block(osb, bh, si->si_inode); 216 status = ocfs2_write_block(osb, bh, INODE_CACHE(si->si_inode));
217 if (status < 0) 217 if (status < 0)
218 mlog_errno(status); 218 mlog_errno(status);
219 219
@@ -404,8 +404,8 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb,
404 (unsigned long long)blkno); 404 (unsigned long long)blkno);
405 405
406 bh = NULL; /* Acquire a fresh bh */ 406 bh = NULL; /* Acquire a fresh bh */
407 status = ocfs2_read_blocks(si->si_inode, blkno, 1, &bh, 407 status = ocfs2_read_blocks(INODE_CACHE(si->si_inode), blkno,
408 OCFS2_BH_IGNORE_CACHE, NULL); 408 1, &bh, OCFS2_BH_IGNORE_CACHE, NULL);
409 if (status < 0) { 409 if (status < 0) {
410 mlog_errno(status); 410 mlog_errno(status);
411 goto bail; 411 goto bail;
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c
index 73a16d4666dc..c30b644d9572 100644
--- a/fs/ocfs2/suballoc.c
+++ b/fs/ocfs2/suballoc.c
@@ -310,7 +310,7 @@ int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
310 int rc; 310 int rc;
311 struct buffer_head *tmp = *bh; 311 struct buffer_head *tmp = *bh;
312 312
313 rc = ocfs2_read_block(inode, gd_blkno, &tmp, 313 rc = ocfs2_read_block(INODE_CACHE(inode), gd_blkno, &tmp,
314 ocfs2_validate_group_descriptor); 314 ocfs2_validate_group_descriptor);
315 if (rc) 315 if (rc)
316 goto out; 316 goto out;
@@ -352,7 +352,7 @@ static int ocfs2_block_group_fill(handle_t *handle,
352 } 352 }
353 353
354 status = ocfs2_journal_access_gd(handle, 354 status = ocfs2_journal_access_gd(handle,
355 alloc_inode, 355 INODE_CACHE(alloc_inode),
356 bg_bh, 356 bg_bh,
357 OCFS2_JOURNAL_ACCESS_CREATE); 357 OCFS2_JOURNAL_ACCESS_CREATE);
358 if (status < 0) { 358 if (status < 0) {
@@ -476,7 +476,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
476 mlog_errno(status); 476 mlog_errno(status);
477 goto bail; 477 goto bail;
478 } 478 }
479 ocfs2_set_new_buffer_uptodate(alloc_inode, bg_bh); 479 ocfs2_set_new_buffer_uptodate(INODE_CACHE(alloc_inode), bg_bh);
480 480
481 status = ocfs2_block_group_fill(handle, 481 status = ocfs2_block_group_fill(handle,
482 alloc_inode, 482 alloc_inode,
@@ -491,7 +491,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
491 491
492 bg = (struct ocfs2_group_desc *) bg_bh->b_data; 492 bg = (struct ocfs2_group_desc *) bg_bh->b_data;
493 493
494 status = ocfs2_journal_access_di(handle, alloc_inode, 494 status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
495 bh, OCFS2_JOURNAL_ACCESS_WRITE); 495 bh, OCFS2_JOURNAL_ACCESS_WRITE);
496 if (status < 0) { 496 if (status < 0) {
497 mlog_errno(status); 497 mlog_errno(status);
@@ -1033,7 +1033,7 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle,
1033 journal_type = OCFS2_JOURNAL_ACCESS_UNDO; 1033 journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
1034 1034
1035 status = ocfs2_journal_access_gd(handle, 1035 status = ocfs2_journal_access_gd(handle,
1036 alloc_inode, 1036 INODE_CACHE(alloc_inode),
1037 group_bh, 1037 group_bh,
1038 journal_type); 1038 journal_type);
1039 if (status < 0) { 1039 if (status < 0) {
@@ -1106,7 +1106,8 @@ static int ocfs2_relink_block_group(handle_t *handle,
1106 bg_ptr = le64_to_cpu(bg->bg_next_group); 1106 bg_ptr = le64_to_cpu(bg->bg_next_group);
1107 prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group); 1107 prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group);
1108 1108
1109 status = ocfs2_journal_access_gd(handle, alloc_inode, prev_bg_bh, 1109 status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
1110 prev_bg_bh,
1110 OCFS2_JOURNAL_ACCESS_WRITE); 1111 OCFS2_JOURNAL_ACCESS_WRITE);
1111 if (status < 0) { 1112 if (status < 0) {
1112 mlog_errno(status); 1113 mlog_errno(status);
@@ -1121,8 +1122,8 @@ static int ocfs2_relink_block_group(handle_t *handle,
1121 goto out_rollback; 1122 goto out_rollback;
1122 } 1123 }
1123 1124
1124 status = ocfs2_journal_access_gd(handle, alloc_inode, bg_bh, 1125 status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
1125 OCFS2_JOURNAL_ACCESS_WRITE); 1126 bg_bh, OCFS2_JOURNAL_ACCESS_WRITE);
1126 if (status < 0) { 1127 if (status < 0) {
1127 mlog_errno(status); 1128 mlog_errno(status);
1128 goto out_rollback; 1129 goto out_rollback;
@@ -1136,8 +1137,8 @@ static int ocfs2_relink_block_group(handle_t *handle,
1136 goto out_rollback; 1137 goto out_rollback;
1137 } 1138 }
1138 1139
1139 status = ocfs2_journal_access_di(handle, alloc_inode, fe_bh, 1140 status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
1140 OCFS2_JOURNAL_ACCESS_WRITE); 1141 fe_bh, OCFS2_JOURNAL_ACCESS_WRITE);
1141 if (status < 0) { 1142 if (status < 0) {
1142 mlog_errno(status); 1143 mlog_errno(status);
1143 goto out_rollback; 1144 goto out_rollback;
@@ -1288,7 +1289,7 @@ static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
1288 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; 1289 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
1289 struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain; 1290 struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain;
1290 1291
1291 ret = ocfs2_journal_access_di(handle, inode, di_bh, 1292 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
1292 OCFS2_JOURNAL_ACCESS_WRITE); 1293 OCFS2_JOURNAL_ACCESS_WRITE);
1293 if (ret < 0) { 1294 if (ret < 0) {
1294 mlog_errno(ret); 1295 mlog_errno(ret);
@@ -1461,7 +1462,7 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1461 /* Ok, claim our bits now: set the info on dinode, chainlist 1462 /* Ok, claim our bits now: set the info on dinode, chainlist
1462 * and then the group */ 1463 * and then the group */
1463 status = ocfs2_journal_access_di(handle, 1464 status = ocfs2_journal_access_di(handle,
1464 alloc_inode, 1465 INODE_CACHE(alloc_inode),
1465 ac->ac_bh, 1466 ac->ac_bh,
1466 OCFS2_JOURNAL_ACCESS_WRITE); 1467 OCFS2_JOURNAL_ACCESS_WRITE);
1467 if (status < 0) { 1468 if (status < 0) {
@@ -1907,8 +1908,8 @@ static inline int ocfs2_block_group_clear_bits(handle_t *handle,
1907 if (ocfs2_is_cluster_bitmap(alloc_inode)) 1908 if (ocfs2_is_cluster_bitmap(alloc_inode))
1908 journal_type = OCFS2_JOURNAL_ACCESS_UNDO; 1909 journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
1909 1910
1910 status = ocfs2_journal_access_gd(handle, alloc_inode, group_bh, 1911 status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
1911 journal_type); 1912 group_bh, journal_type);
1912 if (status < 0) { 1913 if (status < 0) {
1913 mlog_errno(status); 1914 mlog_errno(status);
1914 goto bail; 1915 goto bail;
@@ -1993,8 +1994,8 @@ int ocfs2_free_suballoc_bits(handle_t *handle,
1993 goto bail; 1994 goto bail;
1994 } 1995 }
1995 1996
1996 status = ocfs2_journal_access_di(handle, alloc_inode, alloc_bh, 1997 status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
1997 OCFS2_JOURNAL_ACCESS_WRITE); 1998 alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE);
1998 if (status < 0) { 1999 if (status < 0) {
1999 mlog_errno(status); 2000 mlog_errno(status);
2000 goto bail; 2001 goto bail;
@@ -2151,7 +2152,7 @@ int ocfs2_lock_allocators(struct inode *inode,
2151 2152
2152 BUG_ON(clusters_to_add != 0 && data_ac == NULL); 2153 BUG_ON(clusters_to_add != 0 && data_ac == NULL);
2153 2154
2154 num_free_extents = ocfs2_num_free_extents(osb, inode, et); 2155 num_free_extents = ocfs2_num_free_extents(osb, et);
2155 if (num_free_extents < 0) { 2156 if (num_free_extents < 0) {
2156 ret = num_free_extents; 2157 ret = num_free_extents;
2157 mlog_errno(ret); 2158 mlog_errno(ret);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index faca4720aa47..24feb449a1dc 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -69,6 +69,7 @@
69#include "ver.h" 69#include "ver.h"
70#include "xattr.h" 70#include "xattr.h"
71#include "quota.h" 71#include "quota.h"
72#include "refcounttree.h"
72 73
73#include "buffer_head_io.h" 74#include "buffer_head_io.h"
74 75
@@ -1668,8 +1669,6 @@ static void ocfs2_inode_init_once(void *data)
1668 spin_lock_init(&oi->ip_lock); 1669 spin_lock_init(&oi->ip_lock);
1669 ocfs2_extent_map_init(&oi->vfs_inode); 1670 ocfs2_extent_map_init(&oi->vfs_inode);
1670 INIT_LIST_HEAD(&oi->ip_io_markers); 1671 INIT_LIST_HEAD(&oi->ip_io_markers);
1671 oi->ip_created_trans = 0;
1672 oi->ip_last_trans = 0;
1673 oi->ip_dir_start_lookup = 0; 1672 oi->ip_dir_start_lookup = 0;
1674 1673
1675 init_rwsem(&oi->ip_alloc_sem); 1674 init_rwsem(&oi->ip_alloc_sem);
@@ -1683,7 +1682,8 @@ static void ocfs2_inode_init_once(void *data)
1683 ocfs2_lock_res_init_once(&oi->ip_inode_lockres); 1682 ocfs2_lock_res_init_once(&oi->ip_inode_lockres);
1684 ocfs2_lock_res_init_once(&oi->ip_open_lockres); 1683 ocfs2_lock_res_init_once(&oi->ip_open_lockres);
1685 1684
1686 ocfs2_metadata_cache_init(&oi->vfs_inode); 1685 ocfs2_metadata_cache_init(INODE_CACHE(&oi->vfs_inode),
1686 &ocfs2_inode_caching_ops);
1687 1687
1688 inode_init_once(&oi->vfs_inode); 1688 inode_init_once(&oi->vfs_inode);
1689} 1689}
@@ -1859,6 +1859,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1859 1859
1860 ocfs2_sync_blockdev(sb); 1860 ocfs2_sync_blockdev(sb);
1861 1861
1862 ocfs2_purge_refcount_trees(osb);
1863
1862 /* No cluster connection means we've failed during mount, so skip 1864 /* No cluster connection means we've failed during mount, so skip
1863 * all the steps which depended on that to complete. */ 1865 * all the steps which depended on that to complete. */
1864 if (osb->cconn) { 1866 if (osb->cconn) {
@@ -2065,6 +2067,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
2065 goto bail; 2067 goto bail;
2066 } 2068 }
2067 2069
2070 osb->osb_rf_lock_tree = RB_ROOT;
2071
2068 osb->s_feature_compat = 2072 osb->s_feature_compat =
2069 le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_compat); 2073 le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_compat);
2070 osb->s_feature_ro_compat = 2074 osb->s_feature_ro_compat =
@@ -2490,7 +2494,8 @@ void __ocfs2_abort(struct super_block* sb,
2490 /* Force a panic(). This stinks, but it's better than letting 2494 /* Force a panic(). This stinks, but it's better than letting
2491 * things continue without having a proper hard readonly 2495 * things continue without having a proper hard readonly
2492 * here. */ 2496 * here. */
2493 OCFS2_SB(sb)->s_mount_opt |= OCFS2_MOUNT_ERRORS_PANIC; 2497 if (!ocfs2_mount_local(OCFS2_SB(sb)))
2498 OCFS2_SB(sb)->s_mount_opt |= OCFS2_MOUNT_ERRORS_PANIC;
2494 ocfs2_handle_error(sb); 2499 ocfs2_handle_error(sb);
2495} 2500}
2496 2501
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index 187b99ff0368..b6284f235d2f 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -75,15 +75,77 @@ struct ocfs2_meta_cache_item {
75 75
76static struct kmem_cache *ocfs2_uptodate_cachep = NULL; 76static struct kmem_cache *ocfs2_uptodate_cachep = NULL;
77 77
78void ocfs2_metadata_cache_init(struct inode *inode) 78u64 ocfs2_metadata_cache_owner(struct ocfs2_caching_info *ci)
79{ 79{
80 struct ocfs2_inode_info *oi = OCFS2_I(inode); 80 BUG_ON(!ci || !ci->ci_ops);
81 struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
82 81
83 oi->ip_flags |= OCFS2_INODE_CACHE_INLINE; 82 return ci->ci_ops->co_owner(ci);
83}
84
85struct super_block *ocfs2_metadata_cache_get_super(struct ocfs2_caching_info *ci)
86{
87 BUG_ON(!ci || !ci->ci_ops);
88
89 return ci->ci_ops->co_get_super(ci);
90}
91
92static void ocfs2_metadata_cache_lock(struct ocfs2_caching_info *ci)
93{
94 BUG_ON(!ci || !ci->ci_ops);
95
96 ci->ci_ops->co_cache_lock(ci);
97}
98
99static void ocfs2_metadata_cache_unlock(struct ocfs2_caching_info *ci)
100{
101 BUG_ON(!ci || !ci->ci_ops);
102
103 ci->ci_ops->co_cache_unlock(ci);
104}
105
106void ocfs2_metadata_cache_io_lock(struct ocfs2_caching_info *ci)
107{
108 BUG_ON(!ci || !ci->ci_ops);
109
110 ci->ci_ops->co_io_lock(ci);
111}
112
113void ocfs2_metadata_cache_io_unlock(struct ocfs2_caching_info *ci)
114{
115 BUG_ON(!ci || !ci->ci_ops);
116
117 ci->ci_ops->co_io_unlock(ci);
118}
119
120
121static void ocfs2_metadata_cache_reset(struct ocfs2_caching_info *ci,
122 int clear)
123{
124 ci->ci_flags |= OCFS2_CACHE_FL_INLINE;
84 ci->ci_num_cached = 0; 125 ci->ci_num_cached = 0;
126
127 if (clear) {
128 ci->ci_created_trans = 0;
129 ci->ci_last_trans = 0;
130 }
131}
132
133void ocfs2_metadata_cache_init(struct ocfs2_caching_info *ci,
134 const struct ocfs2_caching_operations *ops)
135{
136 BUG_ON(!ops);
137
138 ci->ci_ops = ops;
139 ocfs2_metadata_cache_reset(ci, 1);
85} 140}
86 141
142void ocfs2_metadata_cache_exit(struct ocfs2_caching_info *ci)
143{
144 ocfs2_metadata_cache_purge(ci);
145 ocfs2_metadata_cache_reset(ci, 1);
146}
147
148
87/* No lock taken here as 'root' is not expected to be visible to other 149/* No lock taken here as 'root' is not expected to be visible to other
88 * processes. */ 150 * processes. */
89static unsigned int ocfs2_purge_copied_metadata_tree(struct rb_root *root) 151static unsigned int ocfs2_purge_copied_metadata_tree(struct rb_root *root)
@@ -112,19 +174,20 @@ static unsigned int ocfs2_purge_copied_metadata_tree(struct rb_root *root)
112 * This function is a few more lines longer than necessary due to some 174 * This function is a few more lines longer than necessary due to some
113 * accounting done here, but I think it's worth tracking down those 175 * accounting done here, but I think it's worth tracking down those
114 * bugs sooner -- Mark */ 176 * bugs sooner -- Mark */
115void ocfs2_metadata_cache_purge(struct inode *inode) 177void ocfs2_metadata_cache_purge(struct ocfs2_caching_info *ci)
116{ 178{
117 struct ocfs2_inode_info *oi = OCFS2_I(inode);
118 unsigned int tree, to_purge, purged; 179 unsigned int tree, to_purge, purged;
119 struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
120 struct rb_root root = RB_ROOT; 180 struct rb_root root = RB_ROOT;
121 181
122 spin_lock(&oi->ip_lock); 182 BUG_ON(!ci || !ci->ci_ops);
123 tree = !(oi->ip_flags & OCFS2_INODE_CACHE_INLINE); 183
184 ocfs2_metadata_cache_lock(ci);
185 tree = !(ci->ci_flags & OCFS2_CACHE_FL_INLINE);
124 to_purge = ci->ci_num_cached; 186 to_purge = ci->ci_num_cached;
125 187
126 mlog(0, "Purge %u %s items from Inode %llu\n", to_purge, 188 mlog(0, "Purge %u %s items from Owner %llu\n", to_purge,
127 tree ? "array" : "tree", (unsigned long long)oi->ip_blkno); 189 tree ? "array" : "tree",
190 (unsigned long long)ocfs2_metadata_cache_owner(ci));
128 191
129 /* If we're a tree, save off the root so that we can safely 192 /* If we're a tree, save off the root so that we can safely
130 * initialize the cache. We do the work to free tree members 193 * initialize the cache. We do the work to free tree members
@@ -132,16 +195,17 @@ void ocfs2_metadata_cache_purge(struct inode *inode)
132 if (tree) 195 if (tree)
133 root = ci->ci_cache.ci_tree; 196 root = ci->ci_cache.ci_tree;
134 197
135 ocfs2_metadata_cache_init(inode); 198 ocfs2_metadata_cache_reset(ci, 0);
136 spin_unlock(&oi->ip_lock); 199 ocfs2_metadata_cache_unlock(ci);
137 200
138 purged = ocfs2_purge_copied_metadata_tree(&root); 201 purged = ocfs2_purge_copied_metadata_tree(&root);
139 /* If possible, track the number wiped so that we can more 202 /* If possible, track the number wiped so that we can more
140 * easily detect counting errors. Unfortunately, this is only 203 * easily detect counting errors. Unfortunately, this is only
141 * meaningful for trees. */ 204 * meaningful for trees. */
142 if (tree && purged != to_purge) 205 if (tree && purged != to_purge)
143 mlog(ML_ERROR, "Inode %llu, count = %u, purged = %u\n", 206 mlog(ML_ERROR, "Owner %llu, count = %u, purged = %u\n",
144 (unsigned long long)oi->ip_blkno, to_purge, purged); 207 (unsigned long long)ocfs2_metadata_cache_owner(ci),
208 to_purge, purged);
145} 209}
146 210
147/* Returns the index in the cache array, -1 if not found. 211/* Returns the index in the cache array, -1 if not found.
@@ -182,27 +246,25 @@ ocfs2_search_cache_tree(struct ocfs2_caching_info *ci,
182 return NULL; 246 return NULL;
183} 247}
184 248
185static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi, 249static int ocfs2_buffer_cached(struct ocfs2_caching_info *ci,
186 struct buffer_head *bh) 250 struct buffer_head *bh)
187{ 251{
188 int index = -1; 252 int index = -1;
189 struct ocfs2_meta_cache_item *item = NULL; 253 struct ocfs2_meta_cache_item *item = NULL;
190 254
191 spin_lock(&oi->ip_lock); 255 ocfs2_metadata_cache_lock(ci);
192 256
193 mlog(0, "Inode %llu, query block %llu (inline = %u)\n", 257 mlog(0, "Owner %llu, query block %llu (inline = %u)\n",
194 (unsigned long long)oi->ip_blkno, 258 (unsigned long long)ocfs2_metadata_cache_owner(ci),
195 (unsigned long long) bh->b_blocknr, 259 (unsigned long long) bh->b_blocknr,
196 !!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE)); 260 !!(ci->ci_flags & OCFS2_CACHE_FL_INLINE));
197 261
198 if (oi->ip_flags & OCFS2_INODE_CACHE_INLINE) 262 if (ci->ci_flags & OCFS2_CACHE_FL_INLINE)
199 index = ocfs2_search_cache_array(&oi->ip_metadata_cache, 263 index = ocfs2_search_cache_array(ci, bh->b_blocknr);
200 bh->b_blocknr);
201 else 264 else
202 item = ocfs2_search_cache_tree(&oi->ip_metadata_cache, 265 item = ocfs2_search_cache_tree(ci, bh->b_blocknr);
203 bh->b_blocknr);
204 266
205 spin_unlock(&oi->ip_lock); 267 ocfs2_metadata_cache_unlock(ci);
206 268
207 mlog(0, "index = %d, item = %p\n", index, item); 269 mlog(0, "index = %d, item = %p\n", index, item);
208 270
@@ -214,7 +276,7 @@ static int ocfs2_buffer_cached(struct ocfs2_inode_info *oi,
214 * 276 *
215 * This can be called under lock_buffer() 277 * This can be called under lock_buffer()
216 */ 278 */
217int ocfs2_buffer_uptodate(struct inode *inode, 279int ocfs2_buffer_uptodate(struct ocfs2_caching_info *ci,
218 struct buffer_head *bh) 280 struct buffer_head *bh)
219{ 281{
220 /* Doesn't matter if the bh is in our cache or not -- if it's 282 /* Doesn't matter if the bh is in our cache or not -- if it's
@@ -230,24 +292,24 @@ int ocfs2_buffer_uptodate(struct inode *inode,
230 292
231 /* Ok, locally the buffer is marked as up to date, now search 293 /* Ok, locally the buffer is marked as up to date, now search
232 * our cache to see if we can trust that. */ 294 * our cache to see if we can trust that. */
233 return ocfs2_buffer_cached(OCFS2_I(inode), bh); 295 return ocfs2_buffer_cached(ci, bh);
234} 296}
235 297
236/* 298/*
237 * Determine whether a buffer is currently out on a read-ahead request. 299 * Determine whether a buffer is currently out on a read-ahead request.
238 * ip_io_sem should be held to serialize submitters with the logic here. 300 * ci_io_sem should be held to serialize submitters with the logic here.
239 */ 301 */
240int ocfs2_buffer_read_ahead(struct inode *inode, 302int ocfs2_buffer_read_ahead(struct ocfs2_caching_info *ci,
241 struct buffer_head *bh) 303 struct buffer_head *bh)
242{ 304{
243 return buffer_locked(bh) && ocfs2_buffer_cached(OCFS2_I(inode), bh); 305 return buffer_locked(bh) && ocfs2_buffer_cached(ci, bh);
244} 306}
245 307
246/* Requires ip_lock */ 308/* Requires ip_lock */
247static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci, 309static void ocfs2_append_cache_array(struct ocfs2_caching_info *ci,
248 sector_t block) 310 sector_t block)
249{ 311{
250 BUG_ON(ci->ci_num_cached >= OCFS2_INODE_MAX_CACHE_ARRAY); 312 BUG_ON(ci->ci_num_cached >= OCFS2_CACHE_INFO_MAX_ARRAY);
251 313
252 mlog(0, "block %llu takes position %u\n", (unsigned long long) block, 314 mlog(0, "block %llu takes position %u\n", (unsigned long long) block,
253 ci->ci_num_cached); 315 ci->ci_num_cached);
@@ -292,66 +354,64 @@ static void __ocfs2_insert_cache_tree(struct ocfs2_caching_info *ci,
292 ci->ci_num_cached++; 354 ci->ci_num_cached++;
293} 355}
294 356
295static inline int ocfs2_insert_can_use_array(struct ocfs2_inode_info *oi, 357/* co_cache_lock() must be held */
296 struct ocfs2_caching_info *ci) 358static inline int ocfs2_insert_can_use_array(struct ocfs2_caching_info *ci)
297{ 359{
298 assert_spin_locked(&oi->ip_lock); 360 return (ci->ci_flags & OCFS2_CACHE_FL_INLINE) &&
299 361 (ci->ci_num_cached < OCFS2_CACHE_INFO_MAX_ARRAY);
300 return (oi->ip_flags & OCFS2_INODE_CACHE_INLINE) &&
301 (ci->ci_num_cached < OCFS2_INODE_MAX_CACHE_ARRAY);
302} 362}
303 363
304/* tree should be exactly OCFS2_INODE_MAX_CACHE_ARRAY wide. NULL the 364/* tree should be exactly OCFS2_CACHE_INFO_MAX_ARRAY wide. NULL the
305 * pointers in tree after we use them - this allows caller to detect 365 * pointers in tree after we use them - this allows caller to detect
306 * when to free in case of error. */ 366 * when to free in case of error.
307static void ocfs2_expand_cache(struct ocfs2_inode_info *oi, 367 *
368 * The co_cache_lock() must be held. */
369static void ocfs2_expand_cache(struct ocfs2_caching_info *ci,
308 struct ocfs2_meta_cache_item **tree) 370 struct ocfs2_meta_cache_item **tree)
309{ 371{
310 int i; 372 int i;
311 struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
312 373
313 mlog_bug_on_msg(ci->ci_num_cached != OCFS2_INODE_MAX_CACHE_ARRAY, 374 mlog_bug_on_msg(ci->ci_num_cached != OCFS2_CACHE_INFO_MAX_ARRAY,
314 "Inode %llu, num cached = %u, should be %u\n", 375 "Owner %llu, num cached = %u, should be %u\n",
315 (unsigned long long)oi->ip_blkno, ci->ci_num_cached, 376 (unsigned long long)ocfs2_metadata_cache_owner(ci),
316 OCFS2_INODE_MAX_CACHE_ARRAY); 377 ci->ci_num_cached, OCFS2_CACHE_INFO_MAX_ARRAY);
317 mlog_bug_on_msg(!(oi->ip_flags & OCFS2_INODE_CACHE_INLINE), 378 mlog_bug_on_msg(!(ci->ci_flags & OCFS2_CACHE_FL_INLINE),
318 "Inode %llu not marked as inline anymore!\n", 379 "Owner %llu not marked as inline anymore!\n",
319 (unsigned long long)oi->ip_blkno); 380 (unsigned long long)ocfs2_metadata_cache_owner(ci));
320 assert_spin_locked(&oi->ip_lock);
321 381
322 /* Be careful to initialize the tree members *first* because 382 /* Be careful to initialize the tree members *first* because
323 * once the ci_tree is used, the array is junk... */ 383 * once the ci_tree is used, the array is junk... */
324 for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++) 384 for (i = 0; i < OCFS2_CACHE_INFO_MAX_ARRAY; i++)
325 tree[i]->c_block = ci->ci_cache.ci_array[i]; 385 tree[i]->c_block = ci->ci_cache.ci_array[i];
326 386
327 oi->ip_flags &= ~OCFS2_INODE_CACHE_INLINE; 387 ci->ci_flags &= ~OCFS2_CACHE_FL_INLINE;
328 ci->ci_cache.ci_tree = RB_ROOT; 388 ci->ci_cache.ci_tree = RB_ROOT;
329 /* this will be set again by __ocfs2_insert_cache_tree */ 389 /* this will be set again by __ocfs2_insert_cache_tree */
330 ci->ci_num_cached = 0; 390 ci->ci_num_cached = 0;
331 391
332 for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++) { 392 for (i = 0; i < OCFS2_CACHE_INFO_MAX_ARRAY; i++) {
333 __ocfs2_insert_cache_tree(ci, tree[i]); 393 __ocfs2_insert_cache_tree(ci, tree[i]);
334 tree[i] = NULL; 394 tree[i] = NULL;
335 } 395 }
336 396
337 mlog(0, "Expanded %llu to a tree cache: flags 0x%x, num = %u\n", 397 mlog(0, "Expanded %llu to a tree cache: flags 0x%x, num = %u\n",
338 (unsigned long long)oi->ip_blkno, oi->ip_flags, ci->ci_num_cached); 398 (unsigned long long)ocfs2_metadata_cache_owner(ci),
399 ci->ci_flags, ci->ci_num_cached);
339} 400}
340 401
341/* Slow path function - memory allocation is necessary. See the 402/* Slow path function - memory allocation is necessary. See the
342 * comment above ocfs2_set_buffer_uptodate for more information. */ 403 * comment above ocfs2_set_buffer_uptodate for more information. */
343static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi, 404static void __ocfs2_set_buffer_uptodate(struct ocfs2_caching_info *ci,
344 sector_t block, 405 sector_t block,
345 int expand_tree) 406 int expand_tree)
346{ 407{
347 int i; 408 int i;
348 struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
349 struct ocfs2_meta_cache_item *new = NULL; 409 struct ocfs2_meta_cache_item *new = NULL;
350 struct ocfs2_meta_cache_item *tree[OCFS2_INODE_MAX_CACHE_ARRAY] = 410 struct ocfs2_meta_cache_item *tree[OCFS2_CACHE_INFO_MAX_ARRAY] =
351 { NULL, }; 411 { NULL, };
352 412
353 mlog(0, "Inode %llu, block %llu, expand = %d\n", 413 mlog(0, "Owner %llu, block %llu, expand = %d\n",
354 (unsigned long long)oi->ip_blkno, 414 (unsigned long long)ocfs2_metadata_cache_owner(ci),
355 (unsigned long long)block, expand_tree); 415 (unsigned long long)block, expand_tree);
356 416
357 new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_NOFS); 417 new = kmem_cache_alloc(ocfs2_uptodate_cachep, GFP_NOFS);
@@ -364,7 +424,7 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
364 if (expand_tree) { 424 if (expand_tree) {
365 /* Do *not* allocate an array here - the removal code 425 /* Do *not* allocate an array here - the removal code
366 * has no way of tracking that. */ 426 * has no way of tracking that. */
367 for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++) { 427 for (i = 0; i < OCFS2_CACHE_INFO_MAX_ARRAY; i++) {
368 tree[i] = kmem_cache_alloc(ocfs2_uptodate_cachep, 428 tree[i] = kmem_cache_alloc(ocfs2_uptodate_cachep,
369 GFP_NOFS); 429 GFP_NOFS);
370 if (!tree[i]) { 430 if (!tree[i]) {
@@ -376,21 +436,21 @@ static void __ocfs2_set_buffer_uptodate(struct ocfs2_inode_info *oi,
376 } 436 }
377 } 437 }
378 438
379 spin_lock(&oi->ip_lock); 439 ocfs2_metadata_cache_lock(ci);
380 if (ocfs2_insert_can_use_array(oi, ci)) { 440 if (ocfs2_insert_can_use_array(ci)) {
381 mlog(0, "Someone cleared the tree underneath us\n"); 441 mlog(0, "Someone cleared the tree underneath us\n");
382 /* Ok, items were removed from the cache in between 442 /* Ok, items were removed from the cache in between
383 * locks. Detect this and revert back to the fast path */ 443 * locks. Detect this and revert back to the fast path */
384 ocfs2_append_cache_array(ci, block); 444 ocfs2_append_cache_array(ci, block);
385 spin_unlock(&oi->ip_lock); 445 ocfs2_metadata_cache_unlock(ci);
386 goto out_free; 446 goto out_free;
387 } 447 }
388 448
389 if (expand_tree) 449 if (expand_tree)
390 ocfs2_expand_cache(oi, tree); 450 ocfs2_expand_cache(ci, tree);
391 451
392 __ocfs2_insert_cache_tree(ci, new); 452 __ocfs2_insert_cache_tree(ci, new);
393 spin_unlock(&oi->ip_lock); 453 ocfs2_metadata_cache_unlock(ci);
394 454
395 new = NULL; 455 new = NULL;
396out_free: 456out_free:
@@ -400,14 +460,14 @@ out_free:
400 /* If these were used, then ocfs2_expand_cache re-set them to 460 /* If these were used, then ocfs2_expand_cache re-set them to
401 * NULL for us. */ 461 * NULL for us. */
402 if (tree[0]) { 462 if (tree[0]) {
403 for(i = 0; i < OCFS2_INODE_MAX_CACHE_ARRAY; i++) 463 for (i = 0; i < OCFS2_CACHE_INFO_MAX_ARRAY; i++)
404 if (tree[i]) 464 if (tree[i])
405 kmem_cache_free(ocfs2_uptodate_cachep, 465 kmem_cache_free(ocfs2_uptodate_cachep,
406 tree[i]); 466 tree[i]);
407 } 467 }
408} 468}
409 469
410/* Item insertion is guarded by ip_io_mutex, so the insertion path takes 470/* Item insertion is guarded by co_io_lock(), so the insertion path takes
411 * advantage of this by not rechecking for a duplicate insert during 471 * advantage of this by not rechecking for a duplicate insert during
412 * the slow case. Additionally, if the cache needs to be bumped up to 472 * the slow case. Additionally, if the cache needs to be bumped up to
413 * a tree, the code will not recheck after acquiring the lock -- 473 * a tree, the code will not recheck after acquiring the lock --
@@ -425,59 +485,55 @@ out_free:
425 * Readahead buffers can be passed in here before the I/O request is 485 * Readahead buffers can be passed in here before the I/O request is
426 * completed. 486 * completed.
427 */ 487 */
428void ocfs2_set_buffer_uptodate(struct inode *inode, 488void ocfs2_set_buffer_uptodate(struct ocfs2_caching_info *ci,
429 struct buffer_head *bh) 489 struct buffer_head *bh)
430{ 490{
431 int expand; 491 int expand;
432 struct ocfs2_inode_info *oi = OCFS2_I(inode);
433 struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
434 492
435 /* The block may very well exist in our cache already, so avoid 493 /* The block may very well exist in our cache already, so avoid
436 * doing any more work in that case. */ 494 * doing any more work in that case. */
437 if (ocfs2_buffer_cached(oi, bh)) 495 if (ocfs2_buffer_cached(ci, bh))
438 return; 496 return;
439 497
440 mlog(0, "Inode %llu, inserting block %llu\n", 498 mlog(0, "Owner %llu, inserting block %llu\n",
441 (unsigned long long)oi->ip_blkno, 499 (unsigned long long)ocfs2_metadata_cache_owner(ci),
442 (unsigned long long)bh->b_blocknr); 500 (unsigned long long)bh->b_blocknr);
443 501
444 /* No need to recheck under spinlock - insertion is guarded by 502 /* No need to recheck under spinlock - insertion is guarded by
445 * ip_io_mutex */ 503 * co_io_lock() */
446 spin_lock(&oi->ip_lock); 504 ocfs2_metadata_cache_lock(ci);
447 if (ocfs2_insert_can_use_array(oi, ci)) { 505 if (ocfs2_insert_can_use_array(ci)) {
448 /* Fast case - it's an array and there's a free 506 /* Fast case - it's an array and there's a free
449 * spot. */ 507 * spot. */
450 ocfs2_append_cache_array(ci, bh->b_blocknr); 508 ocfs2_append_cache_array(ci, bh->b_blocknr);
451 spin_unlock(&oi->ip_lock); 509 ocfs2_metadata_cache_unlock(ci);
452 return; 510 return;
453 } 511 }
454 512
455 expand = 0; 513 expand = 0;
456 if (oi->ip_flags & OCFS2_INODE_CACHE_INLINE) { 514 if (ci->ci_flags & OCFS2_CACHE_FL_INLINE) {
457 /* We need to bump things up to a tree. */ 515 /* We need to bump things up to a tree. */
458 expand = 1; 516 expand = 1;
459 } 517 }
460 spin_unlock(&oi->ip_lock); 518 ocfs2_metadata_cache_unlock(ci);
461 519
462 __ocfs2_set_buffer_uptodate(oi, bh->b_blocknr, expand); 520 __ocfs2_set_buffer_uptodate(ci, bh->b_blocknr, expand);
463} 521}
464 522
465/* Called against a newly allocated buffer. Most likely nobody should 523/* Called against a newly allocated buffer. Most likely nobody should
466 * be able to read this sort of metadata while it's still being 524 * be able to read this sort of metadata while it's still being
467 * allocated, but this is careful to take ip_io_mutex anyway. */ 525 * allocated, but this is careful to take co_io_lock() anyway. */
468void ocfs2_set_new_buffer_uptodate(struct inode *inode, 526void ocfs2_set_new_buffer_uptodate(struct ocfs2_caching_info *ci,
469 struct buffer_head *bh) 527 struct buffer_head *bh)
470{ 528{
471 struct ocfs2_inode_info *oi = OCFS2_I(inode);
472
473 /* This should definitely *not* exist in our cache */ 529 /* This should definitely *not* exist in our cache */
474 BUG_ON(ocfs2_buffer_cached(oi, bh)); 530 BUG_ON(ocfs2_buffer_cached(ci, bh));
475 531
476 set_buffer_uptodate(bh); 532 set_buffer_uptodate(bh);
477 533
478 mutex_lock(&oi->ip_io_mutex); 534 ocfs2_metadata_cache_io_lock(ci);
479 ocfs2_set_buffer_uptodate(inode, bh); 535 ocfs2_set_buffer_uptodate(ci, bh);
480 mutex_unlock(&oi->ip_io_mutex); 536 ocfs2_metadata_cache_io_unlock(ci);
481} 537}
482 538
483/* Requires ip_lock. */ 539/* Requires ip_lock. */
@@ -487,7 +543,7 @@ static void ocfs2_remove_metadata_array(struct ocfs2_caching_info *ci,
487 sector_t *array = ci->ci_cache.ci_array; 543 sector_t *array = ci->ci_cache.ci_array;
488 int bytes; 544 int bytes;
489 545
490 BUG_ON(index < 0 || index >= OCFS2_INODE_MAX_CACHE_ARRAY); 546 BUG_ON(index < 0 || index >= OCFS2_CACHE_INFO_MAX_ARRAY);
491 BUG_ON(index >= ci->ci_num_cached); 547 BUG_ON(index >= ci->ci_num_cached);
492 BUG_ON(!ci->ci_num_cached); 548 BUG_ON(!ci->ci_num_cached);
493 549
@@ -515,21 +571,19 @@ static void ocfs2_remove_metadata_tree(struct ocfs2_caching_info *ci,
515 ci->ci_num_cached--; 571 ci->ci_num_cached--;
516} 572}
517 573
518static void ocfs2_remove_block_from_cache(struct inode *inode, 574static void ocfs2_remove_block_from_cache(struct ocfs2_caching_info *ci,
519 sector_t block) 575 sector_t block)
520{ 576{
521 int index; 577 int index;
522 struct ocfs2_meta_cache_item *item = NULL; 578 struct ocfs2_meta_cache_item *item = NULL;
523 struct ocfs2_inode_info *oi = OCFS2_I(inode);
524 struct ocfs2_caching_info *ci = &oi->ip_metadata_cache;
525 579
526 spin_lock(&oi->ip_lock); 580 ocfs2_metadata_cache_lock(ci);
527 mlog(0, "Inode %llu, remove %llu, items = %u, array = %u\n", 581 mlog(0, "Owner %llu, remove %llu, items = %u, array = %u\n",
528 (unsigned long long)oi->ip_blkno, 582 (unsigned long long)ocfs2_metadata_cache_owner(ci),
529 (unsigned long long) block, ci->ci_num_cached, 583 (unsigned long long) block, ci->ci_num_cached,
530 oi->ip_flags & OCFS2_INODE_CACHE_INLINE); 584 ci->ci_flags & OCFS2_CACHE_FL_INLINE);
531 585
532 if (oi->ip_flags & OCFS2_INODE_CACHE_INLINE) { 586 if (ci->ci_flags & OCFS2_CACHE_FL_INLINE) {
533 index = ocfs2_search_cache_array(ci, block); 587 index = ocfs2_search_cache_array(ci, block);
534 if (index != -1) 588 if (index != -1)
535 ocfs2_remove_metadata_array(ci, index); 589 ocfs2_remove_metadata_array(ci, index);
@@ -538,7 +592,7 @@ static void ocfs2_remove_block_from_cache(struct inode *inode,
538 if (item) 592 if (item)
539 ocfs2_remove_metadata_tree(ci, item); 593 ocfs2_remove_metadata_tree(ci, item);
540 } 594 }
541 spin_unlock(&oi->ip_lock); 595 ocfs2_metadata_cache_unlock(ci);
542 596
543 if (item) 597 if (item)
544 kmem_cache_free(ocfs2_uptodate_cachep, item); 598 kmem_cache_free(ocfs2_uptodate_cachep, item);
@@ -549,23 +603,24 @@ static void ocfs2_remove_block_from_cache(struct inode *inode,
549 * bother reverting things to an inlined array in the case of a remove 603 * bother reverting things to an inlined array in the case of a remove
550 * which moves us back under the limit. 604 * which moves us back under the limit.
551 */ 605 */
552void ocfs2_remove_from_cache(struct inode *inode, 606void ocfs2_remove_from_cache(struct ocfs2_caching_info *ci,
553 struct buffer_head *bh) 607 struct buffer_head *bh)
554{ 608{
555 sector_t block = bh->b_blocknr; 609 sector_t block = bh->b_blocknr;
556 610
557 ocfs2_remove_block_from_cache(inode, block); 611 ocfs2_remove_block_from_cache(ci, block);
558} 612}
559 613
560/* Called when we remove xattr clusters from an inode. */ 614/* Called when we remove xattr clusters from an inode. */
561void ocfs2_remove_xattr_clusters_from_cache(struct inode *inode, 615void ocfs2_remove_xattr_clusters_from_cache(struct ocfs2_caching_info *ci,
562 sector_t block, 616 sector_t block,
563 u32 c_len) 617 u32 c_len)
564{ 618{
565 unsigned int i, b_len = ocfs2_clusters_to_blocks(inode->i_sb, 1) * c_len; 619 struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
620 unsigned int i, b_len = ocfs2_clusters_to_blocks(sb, 1) * c_len;
566 621
567 for (i = 0; i < b_len; i++, block++) 622 for (i = 0; i < b_len; i++, block++)
568 ocfs2_remove_block_from_cache(inode, block); 623 ocfs2_remove_block_from_cache(ci, block);
569} 624}
570 625
571int __init init_ocfs2_uptodate_cache(void) 626int __init init_ocfs2_uptodate_cache(void)
@@ -577,7 +632,7 @@ int __init init_ocfs2_uptodate_cache(void)
577 return -ENOMEM; 632 return -ENOMEM;
578 633
579 mlog(0, "%u inlined cache items per inode.\n", 634 mlog(0, "%u inlined cache items per inode.\n",
580 OCFS2_INODE_MAX_CACHE_ARRAY); 635 OCFS2_CACHE_INFO_MAX_ARRAY);
581 636
582 return 0; 637 return 0;
583} 638}
diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h
index 531b4b3a0c47..0d826fe2da0d 100644
--- a/fs/ocfs2/uptodate.h
+++ b/fs/ocfs2/uptodate.h
@@ -26,24 +26,59 @@
26#ifndef OCFS2_UPTODATE_H 26#ifndef OCFS2_UPTODATE_H
27#define OCFS2_UPTODATE_H 27#define OCFS2_UPTODATE_H
28 28
29/*
30 * The caching code relies on locking provided by the user of
31 * struct ocfs2_caching_info. These operations connect that up.
32 */
33struct ocfs2_caching_operations {
34 /*
35 * A u64 representing the owning structure. Usually this
36 * is the block number (i_blkno or whatnot). This is used so
37 * that caching log messages can identify the owning structure.
38 */
39 u64 (*co_owner)(struct ocfs2_caching_info *ci);
40
41 /* The superblock is needed during I/O. */
42 struct super_block *(*co_get_super)(struct ocfs2_caching_info *ci);
43 /*
44 * Lock and unlock the caching data. These will not sleep, and
45 * should probably be spinlocks.
46 */
47 void (*co_cache_lock)(struct ocfs2_caching_info *ci);
48 void (*co_cache_unlock)(struct ocfs2_caching_info *ci);
49
50 /*
51 * Lock and unlock for disk I/O. These will sleep, and should
52 * be mutexes.
53 */
54 void (*co_io_lock)(struct ocfs2_caching_info *ci);
55 void (*co_io_unlock)(struct ocfs2_caching_info *ci);
56};
57
29int __init init_ocfs2_uptodate_cache(void); 58int __init init_ocfs2_uptodate_cache(void);
30void exit_ocfs2_uptodate_cache(void); 59void exit_ocfs2_uptodate_cache(void);
31 60
32void ocfs2_metadata_cache_init(struct inode *inode); 61void ocfs2_metadata_cache_init(struct ocfs2_caching_info *ci,
33void ocfs2_metadata_cache_purge(struct inode *inode); 62 const struct ocfs2_caching_operations *ops);
63void ocfs2_metadata_cache_purge(struct ocfs2_caching_info *ci);
64void ocfs2_metadata_cache_exit(struct ocfs2_caching_info *ci);
65
66u64 ocfs2_metadata_cache_owner(struct ocfs2_caching_info *ci);
67void ocfs2_metadata_cache_io_lock(struct ocfs2_caching_info *ci);
68void ocfs2_metadata_cache_io_unlock(struct ocfs2_caching_info *ci);
34 69
35int ocfs2_buffer_uptodate(struct inode *inode, 70int ocfs2_buffer_uptodate(struct ocfs2_caching_info *ci,
36 struct buffer_head *bh); 71 struct buffer_head *bh);
37void ocfs2_set_buffer_uptodate(struct inode *inode, 72void ocfs2_set_buffer_uptodate(struct ocfs2_caching_info *ci,
38 struct buffer_head *bh); 73 struct buffer_head *bh);
39void ocfs2_set_new_buffer_uptodate(struct inode *inode, 74void ocfs2_set_new_buffer_uptodate(struct ocfs2_caching_info *ci,
40 struct buffer_head *bh); 75 struct buffer_head *bh);
41void ocfs2_remove_from_cache(struct inode *inode, 76void ocfs2_remove_from_cache(struct ocfs2_caching_info *ci,
42 struct buffer_head *bh); 77 struct buffer_head *bh);
43void ocfs2_remove_xattr_clusters_from_cache(struct inode *inode, 78void ocfs2_remove_xattr_clusters_from_cache(struct ocfs2_caching_info *ci,
44 sector_t block, 79 sector_t block,
45 u32 c_len); 80 u32 c_len);
46int ocfs2_buffer_read_ahead(struct inode *inode, 81int ocfs2_buffer_read_ahead(struct ocfs2_caching_info *ci,
47 struct buffer_head *bh); 82 struct buffer_head *bh);
48 83
49#endif /* OCFS2_UPTODATE_H */ 84#endif /* OCFS2_UPTODATE_H */
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index d1a27cda984f..fe3419068df2 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -55,7 +55,8 @@
55#include "buffer_head_io.h" 55#include "buffer_head_io.h"
56#include "super.h" 56#include "super.h"
57#include "xattr.h" 57#include "xattr.h"
58 58#include "refcounttree.h"
59#include "acl.h"
59 60
60struct ocfs2_xattr_def_value_root { 61struct ocfs2_xattr_def_value_root {
61 struct ocfs2_xattr_value_root xv; 62 struct ocfs2_xattr_value_root xv;
@@ -140,7 +141,7 @@ struct ocfs2_xattr_search {
140 int not_found; 141 int not_found;
141}; 142};
142 143
143static int ocfs2_xattr_bucket_get_name_value(struct inode *inode, 144static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
144 struct ocfs2_xattr_header *xh, 145 struct ocfs2_xattr_header *xh,
145 int index, 146 int index,
146 int *block_off, 147 int *block_off,
@@ -157,7 +158,7 @@ static int ocfs2_xattr_index_block_find(struct inode *inode,
157 struct ocfs2_xattr_search *xs); 158 struct ocfs2_xattr_search *xs);
158 159
159static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 160static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
160 struct ocfs2_xattr_tree_root *xt, 161 struct buffer_head *blk_bh,
161 char *buffer, 162 char *buffer,
162 size_t buffer_size); 163 size_t buffer_size);
163 164
@@ -170,12 +171,42 @@ static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
170 struct ocfs2_xattr_search *xs, 171 struct ocfs2_xattr_search *xs,
171 struct ocfs2_xattr_set_ctxt *ctxt); 172 struct ocfs2_xattr_set_ctxt *ctxt);
172 173
173static int ocfs2_delete_xattr_index_block(struct inode *inode, 174typedef int (xattr_tree_rec_func)(struct inode *inode,
174 struct buffer_head *xb_bh); 175 struct buffer_head *root_bh,
176 u64 blkno, u32 cpos, u32 len, void *para);
177static int ocfs2_iterate_xattr_index_block(struct inode *inode,
178 struct buffer_head *root_bh,
179 xattr_tree_rec_func *rec_func,
180 void *para);
181static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
182 struct ocfs2_xattr_bucket *bucket,
183 void *para);
184static int ocfs2_rm_xattr_cluster(struct inode *inode,
185 struct buffer_head *root_bh,
186 u64 blkno,
187 u32 cpos,
188 u32 len,
189 void *para);
190
175static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle, 191static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
176 u64 src_blk, u64 last_blk, u64 to_blk, 192 u64 src_blk, u64 last_blk, u64 to_blk,
177 unsigned int start_bucket, 193 unsigned int start_bucket,
178 u32 *first_hash); 194 u32 *first_hash);
195static int ocfs2_prepare_refcount_xattr(struct inode *inode,
196 struct ocfs2_dinode *di,
197 struct ocfs2_xattr_info *xi,
198 struct ocfs2_xattr_search *xis,
199 struct ocfs2_xattr_search *xbs,
200 struct ocfs2_refcount_tree **ref_tree,
201 int *meta_need,
202 int *credits);
203static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
204 struct ocfs2_xattr_bucket *bucket,
205 int offset,
206 struct ocfs2_xattr_value_root **xv,
207 struct buffer_head **bh);
208static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
209 const void *value, size_t size, int flags);
179 210
180static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) 211static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
181{ 212{
@@ -254,9 +285,9 @@ static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
254 break; 285 break;
255 } 286 }
256 287
257 if (!ocfs2_buffer_uptodate(bucket->bu_inode, 288 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
258 bucket->bu_bhs[i])) 289 bucket->bu_bhs[i]))
259 ocfs2_set_new_buffer_uptodate(bucket->bu_inode, 290 ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
260 bucket->bu_bhs[i]); 291 bucket->bu_bhs[i]);
261 } 292 }
262 293
@@ -271,7 +302,7 @@ static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
271{ 302{
272 int rc; 303 int rc;
273 304
274 rc = ocfs2_read_blocks(bucket->bu_inode, xb_blkno, 305 rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno,
275 bucket->bu_blocks, bucket->bu_bhs, 0, 306 bucket->bu_blocks, bucket->bu_bhs, 0,
276 NULL); 307 NULL);
277 if (!rc) { 308 if (!rc) {
@@ -297,7 +328,8 @@ static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
297 int i, rc = 0; 328 int i, rc = 0;
298 329
299 for (i = 0; i < bucket->bu_blocks; i++) { 330 for (i = 0; i < bucket->bu_blocks; i++) {
300 rc = ocfs2_journal_access(handle, bucket->bu_inode, 331 rc = ocfs2_journal_access(handle,
332 INODE_CACHE(bucket->bu_inode),
301 bucket->bu_bhs[i], type); 333 bucket->bu_bhs[i], type);
302 if (rc) { 334 if (rc) {
303 mlog_errno(rc); 335 mlog_errno(rc);
@@ -399,7 +431,7 @@ static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
399 int rc; 431 int rc;
400 struct buffer_head *tmp = *bh; 432 struct buffer_head *tmp = *bh;
401 433
402 rc = ocfs2_read_block(inode, xb_blkno, &tmp, 434 rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp,
403 ocfs2_validate_xattr_block); 435 ocfs2_validate_xattr_block);
404 436
405 /* If ocfs2_read_block() got us a new bh, pass it up. */ 437 /* If ocfs2_read_block() got us a new bh, pass it up. */
@@ -596,15 +628,14 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
596 int status = 0; 628 int status = 0;
597 handle_t *handle = ctxt->handle; 629 handle_t *handle = ctxt->handle;
598 enum ocfs2_alloc_restarted why; 630 enum ocfs2_alloc_restarted why;
599 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
600 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters); 631 u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
601 struct ocfs2_extent_tree et; 632 struct ocfs2_extent_tree et;
602 633
603 mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add); 634 mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
604 635
605 ocfs2_init_xattr_value_extent_tree(&et, inode, vb); 636 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
606 637
607 status = vb->vb_access(handle, inode, vb->vb_bh, 638 status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
608 OCFS2_JOURNAL_ACCESS_WRITE); 639 OCFS2_JOURNAL_ACCESS_WRITE);
609 if (status < 0) { 640 if (status < 0) {
610 mlog_errno(status); 641 mlog_errno(status);
@@ -612,13 +643,11 @@ static int ocfs2_xattr_extend_allocation(struct inode *inode,
612 } 643 }
613 644
614 prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters); 645 prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
615 status = ocfs2_add_clusters_in_btree(osb, 646 status = ocfs2_add_clusters_in_btree(handle,
616 inode, 647 &et,
617 &logical_start, 648 &logical_start,
618 clusters_to_add, 649 clusters_to_add,
619 0, 650 0,
620 &et,
621 handle,
622 ctxt->data_ac, 651 ctxt->data_ac,
623 ctxt->meta_ac, 652 ctxt->meta_ac,
624 &why); 653 &why);
@@ -649,6 +678,7 @@ leave:
649static int __ocfs2_remove_xattr_range(struct inode *inode, 678static int __ocfs2_remove_xattr_range(struct inode *inode,
650 struct ocfs2_xattr_value_buf *vb, 679 struct ocfs2_xattr_value_buf *vb,
651 u32 cpos, u32 phys_cpos, u32 len, 680 u32 cpos, u32 phys_cpos, u32 len,
681 unsigned int ext_flags,
652 struct ocfs2_xattr_set_ctxt *ctxt) 682 struct ocfs2_xattr_set_ctxt *ctxt)
653{ 683{
654 int ret; 684 int ret;
@@ -656,16 +686,16 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
656 handle_t *handle = ctxt->handle; 686 handle_t *handle = ctxt->handle;
657 struct ocfs2_extent_tree et; 687 struct ocfs2_extent_tree et;
658 688
659 ocfs2_init_xattr_value_extent_tree(&et, inode, vb); 689 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
660 690
661 ret = vb->vb_access(handle, inode, vb->vb_bh, 691 ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
662 OCFS2_JOURNAL_ACCESS_WRITE); 692 OCFS2_JOURNAL_ACCESS_WRITE);
663 if (ret) { 693 if (ret) {
664 mlog_errno(ret); 694 mlog_errno(ret);
665 goto out; 695 goto out;
666 } 696 }
667 697
668 ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, ctxt->meta_ac, 698 ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac,
669 &ctxt->dealloc); 699 &ctxt->dealloc);
670 if (ret) { 700 if (ret) {
671 mlog_errno(ret); 701 mlog_errno(ret);
@@ -680,7 +710,14 @@ static int __ocfs2_remove_xattr_range(struct inode *inode,
680 goto out; 710 goto out;
681 } 711 }
682 712
683 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc, phys_blkno, len); 713 if (ext_flags & OCFS2_EXT_REFCOUNTED)
714 ret = ocfs2_decrease_refcount(inode, handle,
715 ocfs2_blocks_to_clusters(inode->i_sb,
716 phys_blkno),
717 len, ctxt->meta_ac, &ctxt->dealloc, 1);
718 else
719 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc,
720 phys_blkno, len);
684 if (ret) 721 if (ret)
685 mlog_errno(ret); 722 mlog_errno(ret);
686 723
@@ -695,6 +732,7 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
695 struct ocfs2_xattr_set_ctxt *ctxt) 732 struct ocfs2_xattr_set_ctxt *ctxt)
696{ 733{
697 int ret = 0; 734 int ret = 0;
735 unsigned int ext_flags;
698 u32 trunc_len, cpos, phys_cpos, alloc_size; 736 u32 trunc_len, cpos, phys_cpos, alloc_size;
699 u64 block; 737 u64 block;
700 738
@@ -706,7 +744,7 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
706 while (trunc_len) { 744 while (trunc_len) {
707 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos, 745 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
708 &alloc_size, 746 &alloc_size,
709 &vb->vb_xv->xr_list); 747 &vb->vb_xv->xr_list, &ext_flags);
710 if (ret) { 748 if (ret) {
711 mlog_errno(ret); 749 mlog_errno(ret);
712 goto out; 750 goto out;
@@ -717,15 +755,15 @@ static int ocfs2_xattr_shrink_size(struct inode *inode,
717 755
718 ret = __ocfs2_remove_xattr_range(inode, vb, cpos, 756 ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
719 phys_cpos, alloc_size, 757 phys_cpos, alloc_size,
720 ctxt); 758 ext_flags, ctxt);
721 if (ret) { 759 if (ret) {
722 mlog_errno(ret); 760 mlog_errno(ret);
723 goto out; 761 goto out;
724 } 762 }
725 763
726 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); 764 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
727 ocfs2_remove_xattr_clusters_from_cache(inode, block, 765 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode),
728 alloc_size); 766 block, alloc_size);
729 cpos += alloc_size; 767 cpos += alloc_size;
730 trunc_len -= alloc_size; 768 trunc_len -= alloc_size;
731 } 769 }
@@ -810,6 +848,23 @@ static int ocfs2_xattr_list_entries(struct inode *inode,
810 return result; 848 return result;
811} 849}
812 850
851int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
852 struct ocfs2_dinode *di)
853{
854 struct ocfs2_xattr_header *xh;
855 int i;
856
857 xh = (struct ocfs2_xattr_header *)
858 ((void *)di + inode->i_sb->s_blocksize -
859 le16_to_cpu(di->i_xattr_inline_size));
860
861 for (i = 0; i < le16_to_cpu(xh->xh_count); i++)
862 if (!ocfs2_xattr_is_local(&xh->xh_entries[i]))
863 return 1;
864
865 return 0;
866}
867
813static int ocfs2_xattr_ibody_list(struct inode *inode, 868static int ocfs2_xattr_ibody_list(struct inode *inode,
814 struct ocfs2_dinode *di, 869 struct ocfs2_dinode *di,
815 char *buffer, 870 char *buffer,
@@ -855,11 +910,9 @@ static int ocfs2_xattr_block_list(struct inode *inode,
855 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; 910 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
856 ret = ocfs2_xattr_list_entries(inode, header, 911 ret = ocfs2_xattr_list_entries(inode, header,
857 buffer, buffer_size); 912 buffer, buffer_size);
858 } else { 913 } else
859 struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root; 914 ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh,
860 ret = ocfs2_xattr_tree_list_index_block(inode, xt,
861 buffer, buffer_size); 915 buffer, buffer_size);
862 }
863 916
864 brelse(blk_bh); 917 brelse(blk_bh);
865 918
@@ -961,7 +1014,7 @@ static int ocfs2_xattr_get_value_outside(struct inode *inode,
961 cpos = 0; 1014 cpos = 0;
962 while (cpos < clusters) { 1015 while (cpos < clusters) {
963 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1016 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
964 &num_clusters, el); 1017 &num_clusters, el, NULL);
965 if (ret) { 1018 if (ret) {
966 mlog_errno(ret); 1019 mlog_errno(ret);
967 goto out; 1020 goto out;
@@ -970,7 +1023,8 @@ static int ocfs2_xattr_get_value_outside(struct inode *inode,
970 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1023 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
971 /* Copy ocfs2_xattr_value */ 1024 /* Copy ocfs2_xattr_value */
972 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1025 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
973 ret = ocfs2_read_block(inode, blkno, &bh, NULL); 1026 ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1027 &bh, NULL);
974 if (ret) { 1028 if (ret) {
975 mlog_errno(ret); 1029 mlog_errno(ret);
976 goto out; 1030 goto out;
@@ -1085,7 +1139,7 @@ static int ocfs2_xattr_block_get(struct inode *inode,
1085 i = xs->here - xs->header->xh_entries; 1139 i = xs->here - xs->header->xh_entries;
1086 1140
1087 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 1141 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
1088 ret = ocfs2_xattr_bucket_get_name_value(inode, 1142 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
1089 bucket_xh(xs->bucket), 1143 bucket_xh(xs->bucket),
1090 i, 1144 i,
1091 &block_off, 1145 &block_off,
@@ -1183,7 +1237,7 @@ static int ocfs2_xattr_get(struct inode *inode,
1183 1237
1184static int __ocfs2_xattr_set_value_outside(struct inode *inode, 1238static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1185 handle_t *handle, 1239 handle_t *handle,
1186 struct ocfs2_xattr_value_root *xv, 1240 struct ocfs2_xattr_value_buf *vb,
1187 const void *value, 1241 const void *value,
1188 int value_len) 1242 int value_len)
1189{ 1243{
@@ -1194,28 +1248,34 @@ static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1194 u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len); 1248 u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
1195 u64 blkno; 1249 u64 blkno;
1196 struct buffer_head *bh = NULL; 1250 struct buffer_head *bh = NULL;
1251 unsigned int ext_flags;
1252 struct ocfs2_xattr_value_root *xv = vb->vb_xv;
1197 1253
1198 BUG_ON(clusters > le32_to_cpu(xv->xr_clusters)); 1254 BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
1199 1255
1200 while (cpos < clusters) { 1256 while (cpos < clusters) {
1201 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, 1257 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1202 &num_clusters, &xv->xr_list); 1258 &num_clusters, &xv->xr_list,
1259 &ext_flags);
1203 if (ret) { 1260 if (ret) {
1204 mlog_errno(ret); 1261 mlog_errno(ret);
1205 goto out; 1262 goto out;
1206 } 1263 }
1207 1264
1265 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
1266
1208 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); 1267 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1209 1268
1210 for (i = 0; i < num_clusters * bpc; i++, blkno++) { 1269 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1211 ret = ocfs2_read_block(inode, blkno, &bh, NULL); 1270 ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1271 &bh, NULL);
1212 if (ret) { 1272 if (ret) {
1213 mlog_errno(ret); 1273 mlog_errno(ret);
1214 goto out; 1274 goto out;
1215 } 1275 }
1216 1276
1217 ret = ocfs2_journal_access(handle, 1277 ret = ocfs2_journal_access(handle,
1218 inode, 1278 INODE_CACHE(inode),
1219 bh, 1279 bh,
1220 OCFS2_JOURNAL_ACCESS_WRITE); 1280 OCFS2_JOURNAL_ACCESS_WRITE);
1221 if (ret < 0) { 1281 if (ret < 0) {
@@ -1266,7 +1326,7 @@ static int ocfs2_xattr_cleanup(struct inode *inode,
1266 void *val = xs->base + offs; 1326 void *val = xs->base + offs;
1267 size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; 1327 size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
1268 1328
1269 ret = vb->vb_access(handle, inode, vb->vb_bh, 1329 ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
1270 OCFS2_JOURNAL_ACCESS_WRITE); 1330 OCFS2_JOURNAL_ACCESS_WRITE);
1271 if (ret) { 1331 if (ret) {
1272 mlog_errno(ret); 1332 mlog_errno(ret);
@@ -1294,7 +1354,7 @@ static int ocfs2_xattr_update_entry(struct inode *inode,
1294{ 1354{
1295 int ret; 1355 int ret;
1296 1356
1297 ret = vb->vb_access(handle, inode, vb->vb_bh, 1357 ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
1298 OCFS2_JOURNAL_ACCESS_WRITE); 1358 OCFS2_JOURNAL_ACCESS_WRITE);
1299 if (ret) { 1359 if (ret) {
1300 mlog_errno(ret); 1360 mlog_errno(ret);
@@ -1355,7 +1415,7 @@ static int ocfs2_xattr_set_value_outside(struct inode *inode,
1355 mlog_errno(ret); 1415 mlog_errno(ret);
1356 return ret; 1416 return ret;
1357 } 1417 }
1358 ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb->vb_xv, 1418 ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb,
1359 xi->value, xi->value_len); 1419 xi->value, xi->value_len);
1360 if (ret < 0) 1420 if (ret < 0)
1361 mlog_errno(ret); 1421 mlog_errno(ret);
@@ -1594,7 +1654,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
1594 1654
1595 ret = __ocfs2_xattr_set_value_outside(inode, 1655 ret = __ocfs2_xattr_set_value_outside(inode,
1596 handle, 1656 handle,
1597 vb.vb_xv, 1657 &vb,
1598 xi->value, 1658 xi->value,
1599 xi->value_len); 1659 xi->value_len);
1600 if (ret < 0) 1660 if (ret < 0)
@@ -1615,7 +1675,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
1615 } 1675 }
1616 } 1676 }
1617 1677
1618 ret = ocfs2_journal_access_di(handle, inode, xs->inode_bh, 1678 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), xs->inode_bh,
1619 OCFS2_JOURNAL_ACCESS_WRITE); 1679 OCFS2_JOURNAL_ACCESS_WRITE);
1620 if (ret) { 1680 if (ret) {
1621 mlog_errno(ret); 1681 mlog_errno(ret);
@@ -1623,7 +1683,7 @@ static int ocfs2_xattr_set_entry(struct inode *inode,
1623 } 1683 }
1624 1684
1625 if (!(flag & OCFS2_INLINE_XATTR_FL)) { 1685 if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1626 ret = vb.vb_access(handle, inode, vb.vb_bh, 1686 ret = vb.vb_access(handle, INODE_CACHE(inode), vb.vb_bh,
1627 OCFS2_JOURNAL_ACCESS_WRITE); 1687 OCFS2_JOURNAL_ACCESS_WRITE);
1628 if (ret) { 1688 if (ret) {
1629 mlog_errno(ret); 1689 mlog_errno(ret);
@@ -1700,51 +1760,112 @@ out:
1700 return ret; 1760 return ret;
1701} 1761}
1702 1762
1763/*
1764 * In xattr remove, if it is stored outside and refcounted, we may have
1765 * the chance to split the refcount tree. So need the allocators.
1766 */
1767static int ocfs2_lock_xattr_remove_allocators(struct inode *inode,
1768 struct ocfs2_xattr_value_root *xv,
1769 struct ocfs2_caching_info *ref_ci,
1770 struct buffer_head *ref_root_bh,
1771 struct ocfs2_alloc_context **meta_ac,
1772 int *ref_credits)
1773{
1774 int ret, meta_add = 0;
1775 u32 p_cluster, num_clusters;
1776 unsigned int ext_flags;
1777
1778 *ref_credits = 0;
1779 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
1780 &num_clusters,
1781 &xv->xr_list,
1782 &ext_flags);
1783 if (ret) {
1784 mlog_errno(ret);
1785 goto out;
1786 }
1787
1788 if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
1789 goto out;
1790
1791 ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci,
1792 ref_root_bh, xv,
1793 &meta_add, ref_credits);
1794 if (ret) {
1795 mlog_errno(ret);
1796 goto out;
1797 }
1798
1799 ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
1800 meta_add, meta_ac);
1801 if (ret)
1802 mlog_errno(ret);
1803
1804out:
1805 return ret;
1806}
1807
1703static int ocfs2_remove_value_outside(struct inode*inode, 1808static int ocfs2_remove_value_outside(struct inode*inode,
1704 struct ocfs2_xattr_value_buf *vb, 1809 struct ocfs2_xattr_value_buf *vb,
1705 struct ocfs2_xattr_header *header) 1810 struct ocfs2_xattr_header *header,
1811 struct ocfs2_caching_info *ref_ci,
1812 struct buffer_head *ref_root_bh)
1706{ 1813{
1707 int ret = 0, i; 1814 int ret = 0, i, ref_credits;
1708 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1815 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1709 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; 1816 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
1817 void *val;
1710 1818
1711 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 1819 ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
1712 1820
1713 ctxt.handle = ocfs2_start_trans(osb,
1714 ocfs2_remove_extent_credits(osb->sb));
1715 if (IS_ERR(ctxt.handle)) {
1716 ret = PTR_ERR(ctxt.handle);
1717 mlog_errno(ret);
1718 goto out;
1719 }
1720
1721 for (i = 0; i < le16_to_cpu(header->xh_count); i++) { 1821 for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
1722 struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; 1822 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
1723 1823
1724 if (!ocfs2_xattr_is_local(entry)) { 1824 if (ocfs2_xattr_is_local(entry))
1725 void *val; 1825 continue;
1726 1826
1727 val = (void *)header + 1827 val = (void *)header +
1728 le16_to_cpu(entry->xe_name_offset); 1828 le16_to_cpu(entry->xe_name_offset);
1729 vb->vb_xv = (struct ocfs2_xattr_value_root *) 1829 vb->vb_xv = (struct ocfs2_xattr_value_root *)
1730 (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); 1830 (val + OCFS2_XATTR_SIZE(entry->xe_name_len));
1731 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt); 1831
1732 if (ret < 0) { 1832 ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv,
1733 mlog_errno(ret); 1833 ref_ci, ref_root_bh,
1734 break; 1834 &ctxt.meta_ac,
1735 } 1835 &ref_credits);
1836
1837 ctxt.handle = ocfs2_start_trans(osb, ref_credits +
1838 ocfs2_remove_extent_credits(osb->sb));
1839 if (IS_ERR(ctxt.handle)) {
1840 ret = PTR_ERR(ctxt.handle);
1841 mlog_errno(ret);
1842 break;
1843 }
1844
1845 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
1846 if (ret < 0) {
1847 mlog_errno(ret);
1848 break;
1849 }
1850
1851 ocfs2_commit_trans(osb, ctxt.handle);
1852 if (ctxt.meta_ac) {
1853 ocfs2_free_alloc_context(ctxt.meta_ac);
1854 ctxt.meta_ac = NULL;
1736 } 1855 }
1737 } 1856 }
1738 1857
1739 ocfs2_commit_trans(osb, ctxt.handle); 1858 if (ctxt.meta_ac)
1859 ocfs2_free_alloc_context(ctxt.meta_ac);
1740 ocfs2_schedule_truncate_log_flush(osb, 1); 1860 ocfs2_schedule_truncate_log_flush(osb, 1);
1741 ocfs2_run_deallocs(osb, &ctxt.dealloc); 1861 ocfs2_run_deallocs(osb, &ctxt.dealloc);
1742out:
1743 return ret; 1862 return ret;
1744} 1863}
1745 1864
1746static int ocfs2_xattr_ibody_remove(struct inode *inode, 1865static int ocfs2_xattr_ibody_remove(struct inode *inode,
1747 struct buffer_head *di_bh) 1866 struct buffer_head *di_bh,
1867 struct ocfs2_caching_info *ref_ci,
1868 struct buffer_head *ref_root_bh)
1748{ 1869{
1749 1870
1750 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 1871 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
@@ -1759,13 +1880,21 @@ static int ocfs2_xattr_ibody_remove(struct inode *inode,
1759 ((void *)di + inode->i_sb->s_blocksize - 1880 ((void *)di + inode->i_sb->s_blocksize -
1760 le16_to_cpu(di->i_xattr_inline_size)); 1881 le16_to_cpu(di->i_xattr_inline_size));
1761 1882
1762 ret = ocfs2_remove_value_outside(inode, &vb, header); 1883 ret = ocfs2_remove_value_outside(inode, &vb, header,
1884 ref_ci, ref_root_bh);
1763 1885
1764 return ret; 1886 return ret;
1765} 1887}
1766 1888
1889struct ocfs2_rm_xattr_bucket_para {
1890 struct ocfs2_caching_info *ref_ci;
1891 struct buffer_head *ref_root_bh;
1892};
1893
1767static int ocfs2_xattr_block_remove(struct inode *inode, 1894static int ocfs2_xattr_block_remove(struct inode *inode,
1768 struct buffer_head *blk_bh) 1895 struct buffer_head *blk_bh,
1896 struct ocfs2_caching_info *ref_ci,
1897 struct buffer_head *ref_root_bh)
1769{ 1898{
1770 struct ocfs2_xattr_block *xb; 1899 struct ocfs2_xattr_block *xb;
1771 int ret = 0; 1900 int ret = 0;
@@ -1773,19 +1902,29 @@ static int ocfs2_xattr_block_remove(struct inode *inode,
1773 .vb_bh = blk_bh, 1902 .vb_bh = blk_bh,
1774 .vb_access = ocfs2_journal_access_xb, 1903 .vb_access = ocfs2_journal_access_xb,
1775 }; 1904 };
1905 struct ocfs2_rm_xattr_bucket_para args = {
1906 .ref_ci = ref_ci,
1907 .ref_root_bh = ref_root_bh,
1908 };
1776 1909
1777 xb = (struct ocfs2_xattr_block *)blk_bh->b_data; 1910 xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
1778 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { 1911 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
1779 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header); 1912 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
1780 ret = ocfs2_remove_value_outside(inode, &vb, header); 1913 ret = ocfs2_remove_value_outside(inode, &vb, header,
1914 ref_ci, ref_root_bh);
1781 } else 1915 } else
1782 ret = ocfs2_delete_xattr_index_block(inode, blk_bh); 1916 ret = ocfs2_iterate_xattr_index_block(inode,
1917 blk_bh,
1918 ocfs2_rm_xattr_cluster,
1919 &args);
1783 1920
1784 return ret; 1921 return ret;
1785} 1922}
1786 1923
1787static int ocfs2_xattr_free_block(struct inode *inode, 1924static int ocfs2_xattr_free_block(struct inode *inode,
1788 u64 block) 1925 u64 block,
1926 struct ocfs2_caching_info *ref_ci,
1927 struct buffer_head *ref_root_bh)
1789{ 1928{
1790 struct inode *xb_alloc_inode; 1929 struct inode *xb_alloc_inode;
1791 struct buffer_head *xb_alloc_bh = NULL; 1930 struct buffer_head *xb_alloc_bh = NULL;
@@ -1803,7 +1942,7 @@ static int ocfs2_xattr_free_block(struct inode *inode,
1803 goto out; 1942 goto out;
1804 } 1943 }
1805 1944
1806 ret = ocfs2_xattr_block_remove(inode, blk_bh); 1945 ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh);
1807 if (ret < 0) { 1946 if (ret < 0) {
1808 mlog_errno(ret); 1947 mlog_errno(ret);
1809 goto out; 1948 goto out;
@@ -1863,6 +2002,9 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
1863{ 2002{
1864 struct ocfs2_inode_info *oi = OCFS2_I(inode); 2003 struct ocfs2_inode_info *oi = OCFS2_I(inode);
1865 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; 2004 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2005 struct ocfs2_refcount_tree *ref_tree = NULL;
2006 struct buffer_head *ref_root_bh = NULL;
2007 struct ocfs2_caching_info *ref_ci = NULL;
1866 handle_t *handle; 2008 handle_t *handle;
1867 int ret; 2009 int ret;
1868 2010
@@ -1872,8 +2014,21 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
1872 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) 2014 if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1873 return 0; 2015 return 0;
1874 2016
2017 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
2018 ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb),
2019 le64_to_cpu(di->i_refcount_loc),
2020 1, &ref_tree, &ref_root_bh);
2021 if (ret) {
2022 mlog_errno(ret);
2023 goto out;
2024 }
2025 ref_ci = &ref_tree->rf_ci;
2026
2027 }
2028
1875 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { 2029 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
1876 ret = ocfs2_xattr_ibody_remove(inode, di_bh); 2030 ret = ocfs2_xattr_ibody_remove(inode, di_bh,
2031 ref_ci, ref_root_bh);
1877 if (ret < 0) { 2032 if (ret < 0) {
1878 mlog_errno(ret); 2033 mlog_errno(ret);
1879 goto out; 2034 goto out;
@@ -1882,7 +2037,8 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
1882 2037
1883 if (di->i_xattr_loc) { 2038 if (di->i_xattr_loc) {
1884 ret = ocfs2_xattr_free_block(inode, 2039 ret = ocfs2_xattr_free_block(inode,
1885 le64_to_cpu(di->i_xattr_loc)); 2040 le64_to_cpu(di->i_xattr_loc),
2041 ref_ci, ref_root_bh);
1886 if (ret < 0) { 2042 if (ret < 0) {
1887 mlog_errno(ret); 2043 mlog_errno(ret);
1888 goto out; 2044 goto out;
@@ -1896,7 +2052,7 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
1896 mlog_errno(ret); 2052 mlog_errno(ret);
1897 goto out; 2053 goto out;
1898 } 2054 }
1899 ret = ocfs2_journal_access_di(handle, inode, di_bh, 2055 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
1900 OCFS2_JOURNAL_ACCESS_WRITE); 2056 OCFS2_JOURNAL_ACCESS_WRITE);
1901 if (ret) { 2057 if (ret) {
1902 mlog_errno(ret); 2058 mlog_errno(ret);
@@ -1916,6 +2072,9 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
1916out_commit: 2072out_commit:
1917 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 2073 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
1918out: 2074out:
2075 if (ref_tree)
2076 ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1);
2077 brelse(ref_root_bh);
1919 return ret; 2078 return ret;
1920} 2079}
1921 2080
@@ -2083,6 +2242,84 @@ cleanup:
2083 return ret; 2242 return ret;
2084} 2243}
2085 2244
2245static int ocfs2_create_xattr_block(handle_t *handle,
2246 struct inode *inode,
2247 struct buffer_head *inode_bh,
2248 struct ocfs2_alloc_context *meta_ac,
2249 struct buffer_head **ret_bh,
2250 int indexed)
2251{
2252 int ret;
2253 u16 suballoc_bit_start;
2254 u32 num_got;
2255 u64 first_blkno;
2256 struct ocfs2_dinode *di = (struct ocfs2_dinode *)inode_bh->b_data;
2257 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2258 struct buffer_head *new_bh = NULL;
2259 struct ocfs2_xattr_block *xblk;
2260
2261 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), inode_bh,
2262 OCFS2_JOURNAL_ACCESS_CREATE);
2263 if (ret < 0) {
2264 mlog_errno(ret);
2265 goto end;
2266 }
2267
2268 ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
2269 &suballoc_bit_start, &num_got,
2270 &first_blkno);
2271 if (ret < 0) {
2272 mlog_errno(ret);
2273 goto end;
2274 }
2275
2276 new_bh = sb_getblk(inode->i_sb, first_blkno);
2277 ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
2278
2279 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode),
2280 new_bh,
2281 OCFS2_JOURNAL_ACCESS_CREATE);
2282 if (ret < 0) {
2283 mlog_errno(ret);
2284 goto end;
2285 }
2286
2287 /* Initialize ocfs2_xattr_block */
2288 xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
2289 memset(xblk, 0, inode->i_sb->s_blocksize);
2290 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2291 xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num);
2292 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2293 xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
2294 xblk->xb_blkno = cpu_to_le64(first_blkno);
2295
2296 if (indexed) {
2297 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root;
2298 xr->xt_clusters = cpu_to_le32(1);
2299 xr->xt_last_eb_blk = 0;
2300 xr->xt_list.l_tree_depth = 0;
2301 xr->xt_list.l_count = cpu_to_le16(
2302 ocfs2_xattr_recs_per_xb(inode->i_sb));
2303 xr->xt_list.l_next_free_rec = cpu_to_le16(1);
2304 xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED);
2305 }
2306
2307 ret = ocfs2_journal_dirty(handle, new_bh);
2308 if (ret < 0) {
2309 mlog_errno(ret);
2310 goto end;
2311 }
2312 di->i_xattr_loc = cpu_to_le64(first_blkno);
2313 ocfs2_journal_dirty(handle, inode_bh);
2314
2315 *ret_bh = new_bh;
2316 new_bh = NULL;
2317
2318end:
2319 brelse(new_bh);
2320 return ret;
2321}
2322
2086/* 2323/*
2087 * ocfs2_xattr_block_set() 2324 * ocfs2_xattr_block_set()
2088 * 2325 *
@@ -2095,63 +2332,24 @@ static int ocfs2_xattr_block_set(struct inode *inode,
2095 struct ocfs2_xattr_set_ctxt *ctxt) 2332 struct ocfs2_xattr_set_ctxt *ctxt)
2096{ 2333{
2097 struct buffer_head *new_bh = NULL; 2334 struct buffer_head *new_bh = NULL;
2098 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2099 struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2100 handle_t *handle = ctxt->handle; 2335 handle_t *handle = ctxt->handle;
2101 struct ocfs2_xattr_block *xblk = NULL; 2336 struct ocfs2_xattr_block *xblk = NULL;
2102 u16 suballoc_bit_start;
2103 u32 num_got;
2104 u64 first_blkno;
2105 int ret; 2337 int ret;
2106 2338
2107 if (!xs->xattr_bh) { 2339 if (!xs->xattr_bh) {
2108 ret = ocfs2_journal_access_di(handle, inode, xs->inode_bh, 2340 ret = ocfs2_create_xattr_block(handle, inode, xs->inode_bh,
2109 OCFS2_JOURNAL_ACCESS_CREATE); 2341 ctxt->meta_ac, &new_bh, 0);
2110 if (ret < 0) { 2342 if (ret) {
2111 mlog_errno(ret);
2112 goto end;
2113 }
2114
2115 ret = ocfs2_claim_metadata(osb, handle, ctxt->meta_ac, 1,
2116 &suballoc_bit_start, &num_got,
2117 &first_blkno);
2118 if (ret < 0) {
2119 mlog_errno(ret);
2120 goto end;
2121 }
2122
2123 new_bh = sb_getblk(inode->i_sb, first_blkno);
2124 ocfs2_set_new_buffer_uptodate(inode, new_bh);
2125
2126 ret = ocfs2_journal_access_xb(handle, inode, new_bh,
2127 OCFS2_JOURNAL_ACCESS_CREATE);
2128 if (ret < 0) {
2129 mlog_errno(ret); 2343 mlog_errno(ret);
2130 goto end; 2344 goto end;
2131 } 2345 }
2132 2346
2133 /* Initialize ocfs2_xattr_block */
2134 xs->xattr_bh = new_bh; 2347 xs->xattr_bh = new_bh;
2135 xblk = (struct ocfs2_xattr_block *)new_bh->b_data; 2348 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2136 memset(xblk, 0, inode->i_sb->s_blocksize);
2137 strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2138 xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num);
2139 xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2140 xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
2141 xblk->xb_blkno = cpu_to_le64(first_blkno);
2142
2143 xs->header = &xblk->xb_attrs.xb_header; 2349 xs->header = &xblk->xb_attrs.xb_header;
2144 xs->base = (void *)xs->header; 2350 xs->base = (void *)xs->header;
2145 xs->end = (void *)xblk + inode->i_sb->s_blocksize; 2351 xs->end = (void *)xblk + inode->i_sb->s_blocksize;
2146 xs->here = xs->header->xh_entries; 2352 xs->here = xs->header->xh_entries;
2147
2148 ret = ocfs2_journal_dirty(handle, new_bh);
2149 if (ret < 0) {
2150 mlog_errno(ret);
2151 goto end;
2152 }
2153 di->i_xattr_loc = cpu_to_le64(first_blkno);
2154 ocfs2_journal_dirty(handle, xs->inode_bh);
2155 } else 2353 } else
2156 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; 2354 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2157 2355
@@ -2273,7 +2471,7 @@ static int ocfs2_calc_xattr_set_need(struct inode *inode,
2273 old_in_xb = 1; 2471 old_in_xb = 1;
2274 2472
2275 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { 2473 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2276 ret = ocfs2_xattr_bucket_get_name_value(inode, 2474 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
2277 bucket_xh(xbs->bucket), 2475 bucket_xh(xbs->bucket),
2278 i, &block_off, 2476 i, &block_off,
2279 &name_offset); 2477 &name_offset);
@@ -2428,6 +2626,7 @@ static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
2428 struct ocfs2_xattr_search *xis, 2626 struct ocfs2_xattr_search *xis,
2429 struct ocfs2_xattr_search *xbs, 2627 struct ocfs2_xattr_search *xbs,
2430 struct ocfs2_xattr_set_ctxt *ctxt, 2628 struct ocfs2_xattr_set_ctxt *ctxt,
2629 int extra_meta,
2431 int *credits) 2630 int *credits)
2432{ 2631{
2433 int clusters_add, meta_add, ret; 2632 int clusters_add, meta_add, ret;
@@ -2444,6 +2643,7 @@ static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
2444 return ret; 2643 return ret;
2445 } 2644 }
2446 2645
2646 meta_add += extra_meta;
2447 mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, " 2647 mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
2448 "credits = %d\n", xi->name, meta_add, clusters_add, *credits); 2648 "credits = %d\n", xi->name, meta_add, clusters_add, *credits);
2449 2649
@@ -2598,7 +2798,7 @@ static int __ocfs2_xattr_set_handle(struct inode *inode,
2598 2798
2599 if (!ret) { 2799 if (!ret) {
2600 /* Update inode ctime. */ 2800 /* Update inode ctime. */
2601 ret = ocfs2_journal_access_di(ctxt->handle, inode, 2801 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
2602 xis->inode_bh, 2802 xis->inode_bh,
2603 OCFS2_JOURNAL_ACCESS_WRITE); 2803 OCFS2_JOURNAL_ACCESS_WRITE);
2604 if (ret) { 2804 if (ret) {
@@ -2711,10 +2911,11 @@ int ocfs2_xattr_set(struct inode *inode,
2711{ 2911{
2712 struct buffer_head *di_bh = NULL; 2912 struct buffer_head *di_bh = NULL;
2713 struct ocfs2_dinode *di; 2913 struct ocfs2_dinode *di;
2714 int ret, credits; 2914 int ret, credits, ref_meta = 0, ref_credits = 0;
2715 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 2915 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2716 struct inode *tl_inode = osb->osb_tl_inode; 2916 struct inode *tl_inode = osb->osb_tl_inode;
2717 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, }; 2917 struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
2918 struct ocfs2_refcount_tree *ref_tree = NULL;
2718 2919
2719 struct ocfs2_xattr_info xi = { 2920 struct ocfs2_xattr_info xi = {
2720 .name_index = name_index, 2921 .name_index = name_index,
@@ -2779,6 +2980,17 @@ int ocfs2_xattr_set(struct inode *inode,
2779 goto cleanup; 2980 goto cleanup;
2780 } 2981 }
2781 2982
2983 /* Check whether the value is refcounted and do some prepartion. */
2984 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL &&
2985 (!xis.not_found || !xbs.not_found)) {
2986 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
2987 &xis, &xbs, &ref_tree,
2988 &ref_meta, &ref_credits);
2989 if (ret) {
2990 mlog_errno(ret);
2991 goto cleanup;
2992 }
2993 }
2782 2994
2783 mutex_lock(&tl_inode->i_mutex); 2995 mutex_lock(&tl_inode->i_mutex);
2784 2996
@@ -2793,7 +3005,7 @@ int ocfs2_xattr_set(struct inode *inode,
2793 mutex_unlock(&tl_inode->i_mutex); 3005 mutex_unlock(&tl_inode->i_mutex);
2794 3006
2795 ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis, 3007 ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
2796 &xbs, &ctxt, &credits); 3008 &xbs, &ctxt, ref_meta, &credits);
2797 if (ret) { 3009 if (ret) {
2798 mlog_errno(ret); 3010 mlog_errno(ret);
2799 goto cleanup; 3011 goto cleanup;
@@ -2801,7 +3013,7 @@ int ocfs2_xattr_set(struct inode *inode,
2801 3013
2802 /* we need to update inode's ctime field, so add credit for it. */ 3014 /* we need to update inode's ctime field, so add credit for it. */
2803 credits += OCFS2_INODE_UPDATE_CREDITS; 3015 credits += OCFS2_INODE_UPDATE_CREDITS;
2804 ctxt.handle = ocfs2_start_trans(osb, credits); 3016 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
2805 if (IS_ERR(ctxt.handle)) { 3017 if (IS_ERR(ctxt.handle)) {
2806 ret = PTR_ERR(ctxt.handle); 3018 ret = PTR_ERR(ctxt.handle);
2807 mlog_errno(ret); 3019 mlog_errno(ret);
@@ -2819,8 +3031,16 @@ int ocfs2_xattr_set(struct inode *inode,
2819 if (ocfs2_dealloc_has_cluster(&ctxt.dealloc)) 3031 if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
2820 ocfs2_schedule_truncate_log_flush(osb, 1); 3032 ocfs2_schedule_truncate_log_flush(osb, 1);
2821 ocfs2_run_deallocs(osb, &ctxt.dealloc); 3033 ocfs2_run_deallocs(osb, &ctxt.dealloc);
3034
2822cleanup: 3035cleanup:
3036 if (ref_tree)
3037 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
2823 up_write(&OCFS2_I(inode)->ip_xattr_sem); 3038 up_write(&OCFS2_I(inode)->ip_xattr_sem);
3039 if (!value && !ret) {
3040 ret = ocfs2_try_remove_refcount_tree(inode, di_bh);
3041 if (ret)
3042 mlog_errno(ret);
3043 }
2824 ocfs2_inode_unlock(inode, 1); 3044 ocfs2_inode_unlock(inode, 1);
2825cleanup_nolock: 3045cleanup_nolock:
2826 brelse(di_bh); 3046 brelse(di_bh);
@@ -2849,7 +3069,8 @@ static int ocfs2_xattr_get_rec(struct inode *inode,
2849 u64 e_blkno = 0; 3069 u64 e_blkno = 0;
2850 3070
2851 if (el->l_tree_depth) { 3071 if (el->l_tree_depth) {
2852 ret = ocfs2_find_leaf(inode, el, name_hash, &eb_bh); 3072 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash,
3073 &eb_bh);
2853 if (ret) { 3074 if (ret) {
2854 mlog_errno(ret); 3075 mlog_errno(ret);
2855 goto out; 3076 goto out;
@@ -2931,7 +3152,7 @@ static int ocfs2_find_xe_in_bucket(struct inode *inode,
2931 if (cmp) 3152 if (cmp)
2932 continue; 3153 continue;
2933 3154
2934 ret = ocfs2_xattr_bucket_get_name_value(inode, 3155 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
2935 xh, 3156 xh,
2936 i, 3157 i,
2937 &block_off, 3158 &block_off,
@@ -3175,7 +3396,7 @@ struct ocfs2_xattr_tree_list {
3175 size_t result; 3396 size_t result;
3176}; 3397};
3177 3398
3178static int ocfs2_xattr_bucket_get_name_value(struct inode *inode, 3399static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
3179 struct ocfs2_xattr_header *xh, 3400 struct ocfs2_xattr_header *xh,
3180 int index, 3401 int index,
3181 int *block_off, 3402 int *block_off,
@@ -3188,8 +3409,8 @@ static int ocfs2_xattr_bucket_get_name_value(struct inode *inode,
3188 3409
3189 name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset); 3410 name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
3190 3411
3191 *block_off = name_offset >> inode->i_sb->s_blocksize_bits; 3412 *block_off = name_offset >> sb->s_blocksize_bits;
3192 *new_offset = name_offset % inode->i_sb->s_blocksize; 3413 *new_offset = name_offset % sb->s_blocksize;
3193 3414
3194 return 0; 3415 return 0;
3195} 3416}
@@ -3209,7 +3430,7 @@ static int ocfs2_list_xattr_bucket(struct inode *inode,
3209 prefix = ocfs2_xattr_prefix(type); 3430 prefix = ocfs2_xattr_prefix(type);
3210 3431
3211 if (prefix) { 3432 if (prefix) {
3212 ret = ocfs2_xattr_bucket_get_name_value(inode, 3433 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3213 bucket_xh(bucket), 3434 bucket_xh(bucket),
3214 i, 3435 i,
3215 &block_off, 3436 &block_off,
@@ -3232,22 +3453,19 @@ static int ocfs2_list_xattr_bucket(struct inode *inode,
3232 return ret; 3453 return ret;
3233} 3454}
3234 3455
3235static int ocfs2_xattr_tree_list_index_block(struct inode *inode, 3456static int ocfs2_iterate_xattr_index_block(struct inode *inode,
3236 struct ocfs2_xattr_tree_root *xt, 3457 struct buffer_head *blk_bh,
3237 char *buffer, 3458 xattr_tree_rec_func *rec_func,
3238 size_t buffer_size) 3459 void *para)
3239{ 3460{
3240 struct ocfs2_extent_list *el = &xt->xt_list; 3461 struct ocfs2_xattr_block *xb =
3462 (struct ocfs2_xattr_block *)blk_bh->b_data;
3463 struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
3241 int ret = 0; 3464 int ret = 0;
3242 u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0; 3465 u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
3243 u64 p_blkno = 0; 3466 u64 p_blkno = 0;
3244 struct ocfs2_xattr_tree_list xl = {
3245 .buffer = buffer,
3246 .buffer_size = buffer_size,
3247 .result = 0,
3248 };
3249 3467
3250 if (le16_to_cpu(el->l_next_free_rec) == 0) 3468 if (!el->l_next_free_rec || !rec_func)
3251 return 0; 3469 return 0;
3252 3470
3253 while (name_hash > 0) { 3471 while (name_hash > 0) {
@@ -3255,16 +3473,15 @@ static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
3255 &e_cpos, &num_clusters, el); 3473 &e_cpos, &num_clusters, el);
3256 if (ret) { 3474 if (ret) {
3257 mlog_errno(ret); 3475 mlog_errno(ret);
3258 goto out; 3476 break;
3259 } 3477 }
3260 3478
3261 ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters, 3479 ret = rec_func(inode, blk_bh, p_blkno, e_cpos,
3262 ocfs2_list_xattr_bucket, 3480 num_clusters, para);
3263 &xl);
3264 if (ret) { 3481 if (ret) {
3265 if (ret != -ERANGE) 3482 if (ret != -ERANGE)
3266 mlog_errno(ret); 3483 mlog_errno(ret);
3267 goto out; 3484 break;
3268 } 3485 }
3269 3486
3270 if (e_cpos == 0) 3487 if (e_cpos == 0)
@@ -3273,6 +3490,37 @@ static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
3273 name_hash = e_cpos - 1; 3490 name_hash = e_cpos - 1;
3274 } 3491 }
3275 3492
3493 return ret;
3494
3495}
3496
3497static int ocfs2_list_xattr_tree_rec(struct inode *inode,
3498 struct buffer_head *root_bh,
3499 u64 blkno, u32 cpos, u32 len, void *para)
3500{
3501 return ocfs2_iterate_xattr_buckets(inode, blkno, len,
3502 ocfs2_list_xattr_bucket, para);
3503}
3504
3505static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
3506 struct buffer_head *blk_bh,
3507 char *buffer,
3508 size_t buffer_size)
3509{
3510 int ret;
3511 struct ocfs2_xattr_tree_list xl = {
3512 .buffer = buffer,
3513 .buffer_size = buffer_size,
3514 .result = 0,
3515 };
3516
3517 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
3518 ocfs2_list_xattr_tree_rec, &xl);
3519 if (ret) {
3520 mlog_errno(ret);
3521 goto out;
3522 }
3523
3276 ret = xl.result; 3524 ret = xl.result;
3277out: 3525out:
3278 return ret; 3526 return ret;
@@ -3426,7 +3674,7 @@ static int ocfs2_xattr_create_index_block(struct inode *inode,
3426 */ 3674 */
3427 down_write(&oi->ip_alloc_sem); 3675 down_write(&oi->ip_alloc_sem);
3428 3676
3429 ret = ocfs2_journal_access_xb(handle, inode, xb_bh, 3677 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh,
3430 OCFS2_JOURNAL_ACCESS_WRITE); 3678 OCFS2_JOURNAL_ACCESS_WRITE);
3431 if (ret) { 3679 if (ret) {
3432 mlog_errno(ret); 3680 mlog_errno(ret);
@@ -4263,9 +4511,9 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
4263 (unsigned long long)OCFS2_I(inode)->ip_blkno, 4511 (unsigned long long)OCFS2_I(inode)->ip_blkno,
4264 prev_cpos, (unsigned long long)bucket_blkno(first)); 4512 prev_cpos, (unsigned long long)bucket_blkno(first));
4265 4513
4266 ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh); 4514 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
4267 4515
4268 ret = ocfs2_journal_access_xb(handle, inode, root_bh, 4516 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
4269 OCFS2_JOURNAL_ACCESS_WRITE); 4517 OCFS2_JOURNAL_ACCESS_WRITE);
4270 if (ret < 0) { 4518 if (ret < 0) {
4271 mlog_errno(ret); 4519 mlog_errno(ret);
@@ -4319,7 +4567,7 @@ static int ocfs2_add_new_xattr_cluster(struct inode *inode,
4319 4567
4320 mlog(0, "Insert %u clusters at block %llu for xattr at %u\n", 4568 mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
4321 num_bits, (unsigned long long)block, v_start); 4569 num_bits, (unsigned long long)block, v_start);
4322 ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block, 4570 ret = ocfs2_insert_extent(handle, &et, v_start, block,
4323 num_bits, 0, ctxt->meta_ac); 4571 num_bits, 0, ctxt->meta_ac);
4324 if (ret < 0) { 4572 if (ret < 0) {
4325 mlog_errno(ret); 4573 mlog_errno(ret);
@@ -4798,10 +5046,13 @@ static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
4798 struct ocfs2_xattr_entry *xe = xs->here; 5046 struct ocfs2_xattr_entry *xe = xs->here;
4799 struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket); 5047 struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
4800 void *base; 5048 void *base;
5049 struct ocfs2_xattr_value_buf vb = {
5050 .vb_access = ocfs2_journal_access,
5051 };
4801 5052
4802 BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe)); 5053 BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
4803 5054
4804 ret = ocfs2_xattr_bucket_get_name_value(inode, xh, 5055 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, xh,
4805 xe - xh->xh_entries, 5056 xe - xh->xh_entries,
4806 &block_off, 5057 &block_off,
4807 &offset); 5058 &offset);
@@ -4814,8 +5065,10 @@ static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
4814 xv = (struct ocfs2_xattr_value_root *)(base + offset + 5065 xv = (struct ocfs2_xattr_value_root *)(base + offset +
4815 OCFS2_XATTR_SIZE(xe->xe_name_len)); 5066 OCFS2_XATTR_SIZE(xe->xe_name_len));
4816 5067
5068 vb.vb_xv = xv;
5069 vb.vb_bh = xs->bucket->bu_bhs[block_off];
4817 ret = __ocfs2_xattr_set_value_outside(inode, handle, 5070 ret = __ocfs2_xattr_set_value_outside(inode, handle,
4818 xv, val, value_len); 5071 &vb, val, value_len);
4819 if (ret) 5072 if (ret)
4820 mlog_errno(ret); 5073 mlog_errno(ret);
4821out: 5074out:
@@ -4826,7 +5079,8 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
4826 struct buffer_head *root_bh, 5079 struct buffer_head *root_bh,
4827 u64 blkno, 5080 u64 blkno,
4828 u32 cpos, 5081 u32 cpos,
4829 u32 len) 5082 u32 len,
5083 void *para)
4830{ 5084{
4831 int ret; 5085 int ret;
4832 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 5086 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
@@ -4838,14 +5092,22 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
4838 struct ocfs2_cached_dealloc_ctxt dealloc; 5092 struct ocfs2_cached_dealloc_ctxt dealloc;
4839 struct ocfs2_extent_tree et; 5093 struct ocfs2_extent_tree et;
4840 5094
4841 ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh); 5095 ret = ocfs2_iterate_xattr_buckets(inode, blkno, len,
5096 ocfs2_delete_xattr_in_bucket, para);
5097 if (ret) {
5098 mlog_errno(ret);
5099 return ret;
5100 }
5101
5102 ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
4842 5103
4843 ocfs2_init_dealloc_ctxt(&dealloc); 5104 ocfs2_init_dealloc_ctxt(&dealloc);
4844 5105
4845 mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n", 5106 mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
4846 cpos, len, (unsigned long long)blkno); 5107 cpos, len, (unsigned long long)blkno);
4847 5108
4848 ocfs2_remove_xattr_clusters_from_cache(inode, blkno, len); 5109 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno,
5110 len);
4849 5111
4850 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); 5112 ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
4851 if (ret) { 5113 if (ret) {
@@ -4870,14 +5132,14 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode,
4870 goto out; 5132 goto out;
4871 } 5133 }
4872 5134
4873 ret = ocfs2_journal_access_xb(handle, inode, root_bh, 5135 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
4874 OCFS2_JOURNAL_ACCESS_WRITE); 5136 OCFS2_JOURNAL_ACCESS_WRITE);
4875 if (ret) { 5137 if (ret) {
4876 mlog_errno(ret); 5138 mlog_errno(ret);
4877 goto out_commit; 5139 goto out_commit;
4878 } 5140 }
4879 5141
4880 ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac, 5142 ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac,
4881 &dealloc); 5143 &dealloc);
4882 if (ret) { 5144 if (ret) {
4883 mlog_errno(ret); 5145 mlog_errno(ret);
@@ -5220,7 +5482,7 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5220 struct ocfs2_xattr_bucket *bucket, 5482 struct ocfs2_xattr_bucket *bucket,
5221 void *para) 5483 void *para)
5222{ 5484{
5223 int ret = 0; 5485 int ret = 0, ref_credits;
5224 struct ocfs2_xattr_header *xh = bucket_xh(bucket); 5486 struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5225 u16 i; 5487 u16 i;
5226 struct ocfs2_xattr_entry *xe; 5488 struct ocfs2_xattr_entry *xe;
@@ -5228,7 +5490,9 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5228 struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,}; 5490 struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
5229 int credits = ocfs2_remove_extent_credits(osb->sb) + 5491 int credits = ocfs2_remove_extent_credits(osb->sb) +
5230 ocfs2_blocks_per_xattr_bucket(inode->i_sb); 5492 ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5231 5493 struct ocfs2_xattr_value_root *xv;
5494 struct ocfs2_rm_xattr_bucket_para *args =
5495 (struct ocfs2_rm_xattr_bucket_para *)para;
5232 5496
5233 ocfs2_init_dealloc_ctxt(&ctxt.dealloc); 5497 ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
5234 5498
@@ -5237,7 +5501,16 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5237 if (ocfs2_xattr_is_local(xe)) 5501 if (ocfs2_xattr_is_local(xe))
5238 continue; 5502 continue;
5239 5503
5240 ctxt.handle = ocfs2_start_trans(osb, credits); 5504 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
5505 i, &xv, NULL);
5506
5507 ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
5508 args->ref_ci,
5509 args->ref_root_bh,
5510 &ctxt.meta_ac,
5511 &ref_credits);
5512
5513 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
5241 if (IS_ERR(ctxt.handle)) { 5514 if (IS_ERR(ctxt.handle)) {
5242 ret = PTR_ERR(ctxt.handle); 5515 ret = PTR_ERR(ctxt.handle);
5243 mlog_errno(ret); 5516 mlog_errno(ret);
@@ -5248,57 +5521,1439 @@ static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5248 i, 0, &ctxt); 5521 i, 0, &ctxt);
5249 5522
5250 ocfs2_commit_trans(osb, ctxt.handle); 5523 ocfs2_commit_trans(osb, ctxt.handle);
5524 if (ctxt.meta_ac) {
5525 ocfs2_free_alloc_context(ctxt.meta_ac);
5526 ctxt.meta_ac = NULL;
5527 }
5251 if (ret) { 5528 if (ret) {
5252 mlog_errno(ret); 5529 mlog_errno(ret);
5253 break; 5530 break;
5254 } 5531 }
5255 } 5532 }
5256 5533
5534 if (ctxt.meta_ac)
5535 ocfs2_free_alloc_context(ctxt.meta_ac);
5257 ocfs2_schedule_truncate_log_flush(osb, 1); 5536 ocfs2_schedule_truncate_log_flush(osb, 1);
5258 ocfs2_run_deallocs(osb, &ctxt.dealloc); 5537 ocfs2_run_deallocs(osb, &ctxt.dealloc);
5259 return ret; 5538 return ret;
5260} 5539}
5261 5540
5262static int ocfs2_delete_xattr_index_block(struct inode *inode, 5541/*
5263 struct buffer_head *xb_bh) 5542 * Whenever we modify a xattr value root in the bucket(e.g, CoW
5543 * or change the extent record flag), we need to recalculate
5544 * the metaecc for the whole bucket. So it is done here.
5545 *
5546 * Note:
5547 * We have to give the extra credits for the caller.
5548 */
5549static int ocfs2_xattr_bucket_post_refcount(struct inode *inode,
5550 handle_t *handle,
5551 void *para)
5552{
5553 int ret;
5554 struct ocfs2_xattr_bucket *bucket =
5555 (struct ocfs2_xattr_bucket *)para;
5556
5557 ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
5558 OCFS2_JOURNAL_ACCESS_WRITE);
5559 if (ret) {
5560 mlog_errno(ret);
5561 return ret;
5562 }
5563
5564 ocfs2_xattr_bucket_journal_dirty(handle, bucket);
5565
5566 return 0;
5567}
5568
5569/*
5570 * Special action we need if the xattr value is refcounted.
5571 *
5572 * 1. If the xattr is refcounted, lock the tree.
5573 * 2. CoW the xattr if we are setting the new value and the value
5574 * will be stored outside.
5575 * 3. In other case, decrease_refcount will work for us, so just
5576 * lock the refcount tree, calculate the meta and credits is OK.
5577 *
5578 * We have to do CoW before ocfs2_init_xattr_set_ctxt since
5579 * currently CoW is a completed transaction, while this function
5580 * will also lock the allocators and let us deadlock. So we will
5581 * CoW the whole xattr value.
5582 */
5583static int ocfs2_prepare_refcount_xattr(struct inode *inode,
5584 struct ocfs2_dinode *di,
5585 struct ocfs2_xattr_info *xi,
5586 struct ocfs2_xattr_search *xis,
5587 struct ocfs2_xattr_search *xbs,
5588 struct ocfs2_refcount_tree **ref_tree,
5589 int *meta_add,
5590 int *credits)
5264{ 5591{
5265 struct ocfs2_xattr_block *xb =
5266 (struct ocfs2_xattr_block *)xb_bh->b_data;
5267 struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
5268 int ret = 0; 5592 int ret = 0;
5269 u32 name_hash = UINT_MAX, e_cpos, num_clusters; 5593 struct ocfs2_xattr_block *xb;
5270 u64 p_blkno; 5594 struct ocfs2_xattr_entry *xe;
5595 char *base;
5596 u32 p_cluster, num_clusters;
5597 unsigned int ext_flags;
5598 int name_offset, name_len;
5599 struct ocfs2_xattr_value_buf vb;
5600 struct ocfs2_xattr_bucket *bucket = NULL;
5601 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5602 struct ocfs2_post_refcount refcount;
5603 struct ocfs2_post_refcount *p = NULL;
5604 struct buffer_head *ref_root_bh = NULL;
5271 5605
5272 if (le16_to_cpu(el->l_next_free_rec) == 0) 5606 if (!xis->not_found) {
5273 return 0; 5607 xe = xis->here;
5608 name_offset = le16_to_cpu(xe->xe_name_offset);
5609 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5610 base = xis->base;
5611 vb.vb_bh = xis->inode_bh;
5612 vb.vb_access = ocfs2_journal_access_di;
5613 } else {
5614 int i, block_off = 0;
5615 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
5616 xe = xbs->here;
5617 name_offset = le16_to_cpu(xe->xe_name_offset);
5618 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5619 i = xbs->here - xbs->header->xh_entries;
5274 5620
5275 while (name_hash > 0) { 5621 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
5276 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, 5622 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
5277 &e_cpos, &num_clusters, el); 5623 bucket_xh(xbs->bucket),
5624 i, &block_off,
5625 &name_offset);
5626 if (ret) {
5627 mlog_errno(ret);
5628 goto out;
5629 }
5630 base = bucket_block(xbs->bucket, block_off);
5631 vb.vb_bh = xbs->bucket->bu_bhs[block_off];
5632 vb.vb_access = ocfs2_journal_access;
5633
5634 if (ocfs2_meta_ecc(osb)) {
5635 /*create parameters for ocfs2_post_refcount. */
5636 bucket = xbs->bucket;
5637 refcount.credits = bucket->bu_blocks;
5638 refcount.para = bucket;
5639 refcount.func =
5640 ocfs2_xattr_bucket_post_refcount;
5641 p = &refcount;
5642 }
5643 } else {
5644 base = xbs->base;
5645 vb.vb_bh = xbs->xattr_bh;
5646 vb.vb_access = ocfs2_journal_access_xb;
5647 }
5648 }
5649
5650 if (ocfs2_xattr_is_local(xe))
5651 goto out;
5652
5653 vb.vb_xv = (struct ocfs2_xattr_value_root *)
5654 (base + name_offset + name_len);
5655
5656 ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
5657 &num_clusters, &vb.vb_xv->xr_list,
5658 &ext_flags);
5659 if (ret) {
5660 mlog_errno(ret);
5661 goto out;
5662 }
5663
5664 /*
5665 * We just need to check the 1st extent record, since we always
5666 * CoW the whole xattr. So there shouldn't be a xattr with
5667 * some REFCOUNT extent recs after the 1st one.
5668 */
5669 if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
5670 goto out;
5671
5672 ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
5673 1, ref_tree, &ref_root_bh);
5674 if (ret) {
5675 mlog_errno(ret);
5676 goto out;
5677 }
5678
5679 /*
5680 * If we are deleting the xattr or the new size will be stored inside,
5681 * cool, leave it there, the xattr truncate process will remove them
5682 * for us(it still needs the refcount tree lock and the meta, credits).
5683 * And the worse case is that every cluster truncate will split the
5684 * refcount tree, and make the original extent become 3. So we will need
5685 * 2 * cluster more extent recs at most.
5686 */
5687 if (!xi->value || xi->value_len <= OCFS2_XATTR_INLINE_SIZE) {
5688
5689 ret = ocfs2_refcounted_xattr_delete_need(inode,
5690 &(*ref_tree)->rf_ci,
5691 ref_root_bh, vb.vb_xv,
5692 meta_add, credits);
5693 if (ret)
5694 mlog_errno(ret);
5695 goto out;
5696 }
5697
5698 ret = ocfs2_refcount_cow_xattr(inode, di, &vb,
5699 *ref_tree, ref_root_bh, 0,
5700 le32_to_cpu(vb.vb_xv->xr_clusters), p);
5701 if (ret)
5702 mlog_errno(ret);
5703
5704out:
5705 brelse(ref_root_bh);
5706 return ret;
5707}
5708
5709/*
5710 * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root.
5711 * The physical clusters will be added to refcount tree.
5712 */
5713static int ocfs2_xattr_value_attach_refcount(struct inode *inode,
5714 struct ocfs2_xattr_value_root *xv,
5715 struct ocfs2_extent_tree *value_et,
5716 struct ocfs2_caching_info *ref_ci,
5717 struct buffer_head *ref_root_bh,
5718 struct ocfs2_cached_dealloc_ctxt *dealloc,
5719 struct ocfs2_post_refcount *refcount)
5720{
5721 int ret = 0;
5722 u32 clusters = le32_to_cpu(xv->xr_clusters);
5723 u32 cpos, p_cluster, num_clusters;
5724 struct ocfs2_extent_list *el = &xv->xr_list;
5725 unsigned int ext_flags;
5726
5727 cpos = 0;
5728 while (cpos < clusters) {
5729 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
5730 &num_clusters, el, &ext_flags);
5731
5732 cpos += num_clusters;
5733 if ((ext_flags & OCFS2_EXT_REFCOUNTED))
5734 continue;
5735
5736 BUG_ON(!p_cluster);
5737
5738 ret = ocfs2_add_refcount_flag(inode, value_et,
5739 ref_ci, ref_root_bh,
5740 cpos - num_clusters,
5741 p_cluster, num_clusters,
5742 dealloc, refcount);
5743 if (ret) {
5744 mlog_errno(ret);
5745 break;
5746 }
5747 }
5748
5749 return ret;
5750}
5751
5752/*
5753 * Given a normal ocfs2_xattr_header, refcount all the entries which
5754 * have value stored outside.
5755 * Used for xattrs stored in inode and ocfs2_xattr_block.
5756 */
5757static int ocfs2_xattr_attach_refcount_normal(struct inode *inode,
5758 struct ocfs2_xattr_value_buf *vb,
5759 struct ocfs2_xattr_header *header,
5760 struct ocfs2_caching_info *ref_ci,
5761 struct buffer_head *ref_root_bh,
5762 struct ocfs2_cached_dealloc_ctxt *dealloc)
5763{
5764
5765 struct ocfs2_xattr_entry *xe;
5766 struct ocfs2_xattr_value_root *xv;
5767 struct ocfs2_extent_tree et;
5768 int i, ret = 0;
5769
5770 for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
5771 xe = &header->xh_entries[i];
5772
5773 if (ocfs2_xattr_is_local(xe))
5774 continue;
5775
5776 xv = (struct ocfs2_xattr_value_root *)((void *)header +
5777 le16_to_cpu(xe->xe_name_offset) +
5778 OCFS2_XATTR_SIZE(xe->xe_name_len));
5779
5780 vb->vb_xv = xv;
5781 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
5782
5783 ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et,
5784 ref_ci, ref_root_bh,
5785 dealloc, NULL);
5786 if (ret) {
5787 mlog_errno(ret);
5788 break;
5789 }
5790 }
5791
5792 return ret;
5793}
5794
5795static int ocfs2_xattr_inline_attach_refcount(struct inode *inode,
5796 struct buffer_head *fe_bh,
5797 struct ocfs2_caching_info *ref_ci,
5798 struct buffer_head *ref_root_bh,
5799 struct ocfs2_cached_dealloc_ctxt *dealloc)
5800{
5801 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
5802 struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *)
5803 (fe_bh->b_data + inode->i_sb->s_blocksize -
5804 le16_to_cpu(di->i_xattr_inline_size));
5805 struct ocfs2_xattr_value_buf vb = {
5806 .vb_bh = fe_bh,
5807 .vb_access = ocfs2_journal_access_di,
5808 };
5809
5810 return ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
5811 ref_ci, ref_root_bh, dealloc);
5812}
5813
5814struct ocfs2_xattr_tree_value_refcount_para {
5815 struct ocfs2_caching_info *ref_ci;
5816 struct buffer_head *ref_root_bh;
5817 struct ocfs2_cached_dealloc_ctxt *dealloc;
5818};
5819
5820static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
5821 struct ocfs2_xattr_bucket *bucket,
5822 int offset,
5823 struct ocfs2_xattr_value_root **xv,
5824 struct buffer_head **bh)
5825{
5826 int ret, block_off, name_offset;
5827 struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5828 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
5829 void *base;
5830
5831 ret = ocfs2_xattr_bucket_get_name_value(sb,
5832 bucket_xh(bucket),
5833 offset,
5834 &block_off,
5835 &name_offset);
5836 if (ret) {
5837 mlog_errno(ret);
5838 goto out;
5839 }
5840
5841 base = bucket_block(bucket, block_off);
5842
5843 *xv = (struct ocfs2_xattr_value_root *)(base + name_offset +
5844 OCFS2_XATTR_SIZE(xe->xe_name_len));
5845
5846 if (bh)
5847 *bh = bucket->bu_bhs[block_off];
5848out:
5849 return ret;
5850}
5851
5852/*
5853 * For a given xattr bucket, refcount all the entries which
5854 * have value stored outside.
5855 */
5856static int ocfs2_xattr_bucket_value_refcount(struct inode *inode,
5857 struct ocfs2_xattr_bucket *bucket,
5858 void *para)
5859{
5860 int i, ret = 0;
5861 struct ocfs2_extent_tree et;
5862 struct ocfs2_xattr_tree_value_refcount_para *ref =
5863 (struct ocfs2_xattr_tree_value_refcount_para *)para;
5864 struct ocfs2_xattr_header *xh =
5865 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
5866 struct ocfs2_xattr_entry *xe;
5867 struct ocfs2_xattr_value_buf vb = {
5868 .vb_access = ocfs2_journal_access,
5869 };
5870 struct ocfs2_post_refcount refcount = {
5871 .credits = bucket->bu_blocks,
5872 .para = bucket,
5873 .func = ocfs2_xattr_bucket_post_refcount,
5874 };
5875 struct ocfs2_post_refcount *p = NULL;
5876
5877 /* We only need post_refcount if we support metaecc. */
5878 if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)))
5879 p = &refcount;
5880
5881 mlog(0, "refcount bucket %llu, count = %u\n",
5882 (unsigned long long)bucket_blkno(bucket),
5883 le16_to_cpu(xh->xh_count));
5884 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5885 xe = &xh->xh_entries[i];
5886
5887 if (ocfs2_xattr_is_local(xe))
5888 continue;
5889
5890 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i,
5891 &vb.vb_xv, &vb.vb_bh);
5892 if (ret) {
5893 mlog_errno(ret);
5894 break;
5895 }
5896
5897 ocfs2_init_xattr_value_extent_tree(&et,
5898 INODE_CACHE(inode), &vb);
5899
5900 ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv,
5901 &et, ref->ref_ci,
5902 ref->ref_root_bh,
5903 ref->dealloc, p);
5904 if (ret) {
5905 mlog_errno(ret);
5906 break;
5907 }
5908 }
5909
5910 return ret;
5911
5912}
5913
5914static int ocfs2_refcount_xattr_tree_rec(struct inode *inode,
5915 struct buffer_head *root_bh,
5916 u64 blkno, u32 cpos, u32 len, void *para)
5917{
5918 return ocfs2_iterate_xattr_buckets(inode, blkno, len,
5919 ocfs2_xattr_bucket_value_refcount,
5920 para);
5921}
5922
5923static int ocfs2_xattr_block_attach_refcount(struct inode *inode,
5924 struct buffer_head *blk_bh,
5925 struct ocfs2_caching_info *ref_ci,
5926 struct buffer_head *ref_root_bh,
5927 struct ocfs2_cached_dealloc_ctxt *dealloc)
5928{
5929 int ret = 0;
5930 struct ocfs2_xattr_block *xb =
5931 (struct ocfs2_xattr_block *)blk_bh->b_data;
5932
5933 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
5934 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
5935 struct ocfs2_xattr_value_buf vb = {
5936 .vb_bh = blk_bh,
5937 .vb_access = ocfs2_journal_access_xb,
5938 };
5939
5940 ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
5941 ref_ci, ref_root_bh,
5942 dealloc);
5943 } else {
5944 struct ocfs2_xattr_tree_value_refcount_para para = {
5945 .ref_ci = ref_ci,
5946 .ref_root_bh = ref_root_bh,
5947 .dealloc = dealloc,
5948 };
5949
5950 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
5951 ocfs2_refcount_xattr_tree_rec,
5952 &para);
5953 }
5954
5955 return ret;
5956}
5957
5958int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
5959 struct buffer_head *fe_bh,
5960 struct ocfs2_caching_info *ref_ci,
5961 struct buffer_head *ref_root_bh,
5962 struct ocfs2_cached_dealloc_ctxt *dealloc)
5963{
5964 int ret = 0;
5965 struct ocfs2_inode_info *oi = OCFS2_I(inode);
5966 struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
5967 struct buffer_head *blk_bh = NULL;
5968
5969 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
5970 ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh,
5971 ref_ci, ref_root_bh,
5972 dealloc);
5278 if (ret) { 5973 if (ret) {
5279 mlog_errno(ret); 5974 mlog_errno(ret);
5280 goto out; 5975 goto out;
5281 } 5976 }
5977 }
5978
5979 if (!di->i_xattr_loc)
5980 goto out;
5981
5982 ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
5983 &blk_bh);
5984 if (ret < 0) {
5985 mlog_errno(ret);
5986 goto out;
5987 }
5988
5989 ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci,
5990 ref_root_bh, dealloc);
5991 if (ret)
5992 mlog_errno(ret);
5993
5994 brelse(blk_bh);
5995out:
5996
5997 return ret;
5998}
5999
6000typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe);
6001/*
6002 * Store the information we need in xattr reflink.
6003 * old_bh and new_bh are inode bh for the old and new inode.
6004 */
6005struct ocfs2_xattr_reflink {
6006 struct inode *old_inode;
6007 struct inode *new_inode;
6008 struct buffer_head *old_bh;
6009 struct buffer_head *new_bh;
6010 struct ocfs2_caching_info *ref_ci;
6011 struct buffer_head *ref_root_bh;
6012 struct ocfs2_cached_dealloc_ctxt *dealloc;
6013 should_xattr_reflinked *xattr_reflinked;
6014};
6015
6016/*
6017 * Given a xattr header and xe offset,
6018 * return the proper xv and the corresponding bh.
6019 * xattr in inode, block and xattr tree have different implementaions.
6020 */
6021typedef int (get_xattr_value_root)(struct super_block *sb,
6022 struct buffer_head *bh,
6023 struct ocfs2_xattr_header *xh,
6024 int offset,
6025 struct ocfs2_xattr_value_root **xv,
6026 struct buffer_head **ret_bh,
6027 void *para);
6028
6029/*
6030 * Calculate all the xattr value root metadata stored in this xattr header and
6031 * credits we need if we create them from the scratch.
6032 * We use get_xattr_value_root so that all types of xattr container can use it.
6033 */
6034static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
6035 struct buffer_head *bh,
6036 struct ocfs2_xattr_header *xh,
6037 int *metas, int *credits,
6038 int *num_recs,
6039 get_xattr_value_root *func,
6040 void *para)
6041{
6042 int i, ret = 0;
6043 struct ocfs2_xattr_value_root *xv;
6044 struct ocfs2_xattr_entry *xe;
6045
6046 for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6047 xe = &xh->xh_entries[i];
6048 if (ocfs2_xattr_is_local(xe))
6049 continue;
6050
6051 ret = func(sb, bh, xh, i, &xv, NULL, para);
6052 if (ret) {
6053 mlog_errno(ret);
6054 break;
6055 }
6056
6057 *metas += le16_to_cpu(xv->xr_list.l_tree_depth) *
6058 le16_to_cpu(xv->xr_list.l_next_free_rec);
6059
6060 *credits += ocfs2_calc_extend_credits(sb,
6061 &def_xv.xv.xr_list,
6062 le32_to_cpu(xv->xr_clusters));
6063
6064 /*
6065 * If the value is a tree with depth > 1, We don't go deep
6066 * to the extent block, so just calculate a maximum record num.
6067 */
6068 if (!xv->xr_list.l_tree_depth)
6069 *num_recs += xv->xr_list.l_next_free_rec;
6070 else
6071 *num_recs += ocfs2_clusters_for_bytes(sb,
6072 XATTR_SIZE_MAX);
6073 }
6074
6075 return ret;
6076}
6077
6078/* Used by xattr inode and block to return the right xv and buffer_head. */
6079static int ocfs2_get_xattr_value_root(struct super_block *sb,
6080 struct buffer_head *bh,
6081 struct ocfs2_xattr_header *xh,
6082 int offset,
6083 struct ocfs2_xattr_value_root **xv,
6084 struct buffer_head **ret_bh,
6085 void *para)
6086{
6087 struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
6088
6089 *xv = (struct ocfs2_xattr_value_root *)((void *)xh +
6090 le16_to_cpu(xe->xe_name_offset) +
6091 OCFS2_XATTR_SIZE(xe->xe_name_len));
6092
6093 if (ret_bh)
6094 *ret_bh = bh;
6095
6096 return 0;
6097}
6098
6099/*
6100 * Lock the meta_ac and caculate how much credits we need for reflink xattrs.
6101 * It is only used for inline xattr and xattr block.
6102 */
6103static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb,
6104 struct ocfs2_xattr_header *xh,
6105 struct buffer_head *ref_root_bh,
6106 int *credits,
6107 struct ocfs2_alloc_context **meta_ac)
6108{
6109 int ret, meta_add = 0, num_recs = 0;
6110 struct ocfs2_refcount_block *rb =
6111 (struct ocfs2_refcount_block *)ref_root_bh->b_data;
6112
6113 *credits = 0;
6114
6115 ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh,
6116 &meta_add, credits, &num_recs,
6117 ocfs2_get_xattr_value_root,
6118 NULL);
6119 if (ret) {
6120 mlog_errno(ret);
6121 goto out;
6122 }
6123
6124 /*
6125 * We need to add/modify num_recs in refcount tree, so just calculate
6126 * an approximate number we need for refcount tree change.
6127 * Sometimes we need to split the tree, and after split, half recs
6128 * will be moved to the new block, and a new block can only provide
6129 * half number of recs. So we multiple new blocks by 2.
6130 */
6131 num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6132 meta_add += num_recs;
6133 *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6134 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6135 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6136 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6137 else
6138 *credits += 1;
6139
6140 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac);
6141 if (ret)
6142 mlog_errno(ret);
6143
6144out:
6145 return ret;
6146}
5282 6147
5283 ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters, 6148/*
5284 ocfs2_delete_xattr_in_bucket, 6149 * Given a xattr header, reflink all the xattrs in this container.
5285 NULL); 6150 * It can be used for inode, block and bucket.
6151 *
6152 * NOTE:
6153 * Before we call this function, the caller has memcpy the xattr in
6154 * old_xh to the new_xh.
6155 *
6156 * If args.xattr_reflinked is set, call it to decide whether the xe should
6157 * be reflinked or not. If not, remove it from the new xattr header.
6158 */
6159static int ocfs2_reflink_xattr_header(handle_t *handle,
6160 struct ocfs2_xattr_reflink *args,
6161 struct buffer_head *old_bh,
6162 struct ocfs2_xattr_header *xh,
6163 struct buffer_head *new_bh,
6164 struct ocfs2_xattr_header *new_xh,
6165 struct ocfs2_xattr_value_buf *vb,
6166 struct ocfs2_alloc_context *meta_ac,
6167 get_xattr_value_root *func,
6168 void *para)
6169{
6170 int ret = 0, i, j;
6171 struct super_block *sb = args->old_inode->i_sb;
6172 struct buffer_head *value_bh;
6173 struct ocfs2_xattr_entry *xe, *last;
6174 struct ocfs2_xattr_value_root *xv, *new_xv;
6175 struct ocfs2_extent_tree data_et;
6176 u32 clusters, cpos, p_cluster, num_clusters;
6177 unsigned int ext_flags = 0;
6178
6179 mlog(0, "reflink xattr in container %llu, count = %u\n",
6180 (unsigned long long)old_bh->b_blocknr, le16_to_cpu(xh->xh_count));
6181
6182 last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)];
6183 for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) {
6184 xe = &xh->xh_entries[i];
6185
6186 if (args->xattr_reflinked && !args->xattr_reflinked(xe)) {
6187 xe = &new_xh->xh_entries[j];
6188
6189 le16_add_cpu(&new_xh->xh_count, -1);
6190 if (new_xh->xh_count) {
6191 memmove(xe, xe + 1,
6192 (void *)last - (void *)xe);
6193 memset(last, 0,
6194 sizeof(struct ocfs2_xattr_entry));
6195 }
6196
6197 /*
6198 * We don't want j to increase in the next round since
6199 * it is already moved ahead.
6200 */
6201 j--;
6202 continue;
6203 }
6204
6205 if (ocfs2_xattr_is_local(xe))
6206 continue;
6207
6208 ret = func(sb, old_bh, xh, i, &xv, NULL, para);
6209 if (ret) {
6210 mlog_errno(ret);
6211 break;
6212 }
6213
6214 ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para);
6215 if (ret) {
6216 mlog_errno(ret);
6217 break;
6218 }
6219
6220 /*
6221 * For the xattr which has l_tree_depth = 0, all the extent
6222 * recs have already be copied to the new xh with the
6223 * propriate OCFS2_EXT_REFCOUNTED flag we just need to
6224 * increase the refount count int the refcount tree.
6225 *
6226 * For the xattr which has l_tree_depth > 0, we need
6227 * to initialize it to the empty default value root,
6228 * and then insert the extents one by one.
6229 */
6230 if (xv->xr_list.l_tree_depth) {
6231 memcpy(new_xv, &def_xv, sizeof(def_xv));
6232 vb->vb_xv = new_xv;
6233 vb->vb_bh = value_bh;
6234 ocfs2_init_xattr_value_extent_tree(&data_et,
6235 INODE_CACHE(args->new_inode), vb);
6236 }
6237
6238 clusters = le32_to_cpu(xv->xr_clusters);
6239 cpos = 0;
6240 while (cpos < clusters) {
6241 ret = ocfs2_xattr_get_clusters(args->old_inode,
6242 cpos,
6243 &p_cluster,
6244 &num_clusters,
6245 &xv->xr_list,
6246 &ext_flags);
6247 if (ret) {
6248 mlog_errno(ret);
6249 goto out;
6250 }
6251
6252 BUG_ON(!p_cluster);
6253
6254 if (xv->xr_list.l_tree_depth) {
6255 ret = ocfs2_insert_extent(handle,
6256 &data_et, cpos,
6257 ocfs2_clusters_to_blocks(
6258 args->old_inode->i_sb,
6259 p_cluster),
6260 num_clusters, ext_flags,
6261 meta_ac);
6262 if (ret) {
6263 mlog_errno(ret);
6264 goto out;
6265 }
6266 }
6267
6268 ret = ocfs2_increase_refcount(handle, args->ref_ci,
6269 args->ref_root_bh,
6270 p_cluster, num_clusters,
6271 meta_ac, args->dealloc);
6272 if (ret) {
6273 mlog_errno(ret);
6274 goto out;
6275 }
6276
6277 cpos += num_clusters;
6278 }
6279 }
6280
6281out:
6282 return ret;
6283}
6284
6285static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
6286{
6287 int ret = 0, credits = 0;
6288 handle_t *handle;
6289 struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb);
6290 struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data;
6291 int inline_size = le16_to_cpu(di->i_xattr_inline_size);
6292 int header_off = osb->sb->s_blocksize - inline_size;
6293 struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)
6294 (args->old_bh->b_data + header_off);
6295 struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *)
6296 (args->new_bh->b_data + header_off);
6297 struct ocfs2_alloc_context *meta_ac = NULL;
6298 struct ocfs2_inode_info *new_oi;
6299 struct ocfs2_dinode *new_di;
6300 struct ocfs2_xattr_value_buf vb = {
6301 .vb_bh = args->new_bh,
6302 .vb_access = ocfs2_journal_access_di,
6303 };
6304
6305 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6306 &credits, &meta_ac);
6307 if (ret) {
6308 mlog_errno(ret);
6309 goto out;
6310 }
6311
6312 handle = ocfs2_start_trans(osb, credits);
6313 if (IS_ERR(handle)) {
6314 ret = PTR_ERR(handle);
6315 mlog_errno(ret);
6316 goto out;
6317 }
6318
6319 ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode),
6320 args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6321 if (ret) {
6322 mlog_errno(ret);
6323 goto out_commit;
6324 }
6325
6326 memcpy(args->new_bh->b_data + header_off,
6327 args->old_bh->b_data + header_off, inline_size);
6328
6329 new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6330 new_di->i_xattr_inline_size = cpu_to_le16(inline_size);
6331
6332 ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh,
6333 args->new_bh, new_xh, &vb, meta_ac,
6334 ocfs2_get_xattr_value_root, NULL);
6335 if (ret) {
6336 mlog_errno(ret);
6337 goto out_commit;
6338 }
6339
6340 new_oi = OCFS2_I(args->new_inode);
6341 spin_lock(&new_oi->ip_lock);
6342 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
6343 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6344 spin_unlock(&new_oi->ip_lock);
6345
6346 ocfs2_journal_dirty(handle, args->new_bh);
6347
6348out_commit:
6349 ocfs2_commit_trans(osb, handle);
6350
6351out:
6352 if (meta_ac)
6353 ocfs2_free_alloc_context(meta_ac);
6354 return ret;
6355}
6356
6357static int ocfs2_create_empty_xattr_block(struct inode *inode,
6358 struct buffer_head *fe_bh,
6359 struct buffer_head **ret_bh,
6360 int indexed)
6361{
6362 int ret;
6363 handle_t *handle;
6364 struct ocfs2_alloc_context *meta_ac;
6365 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6366
6367 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
6368 if (ret < 0) {
6369 mlog_errno(ret);
6370 return ret;
6371 }
6372
6373 handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS);
6374 if (IS_ERR(handle)) {
6375 ret = PTR_ERR(handle);
6376 mlog_errno(ret);
6377 goto out;
6378 }
6379
6380 mlog(0, "create new xattr block for inode %llu, index = %d\n",
6381 (unsigned long long)fe_bh->b_blocknr, indexed);
6382 ret = ocfs2_create_xattr_block(handle, inode, fe_bh,
6383 meta_ac, ret_bh, indexed);
6384 if (ret)
6385 mlog_errno(ret);
6386
6387 ocfs2_commit_trans(osb, handle);
6388out:
6389 ocfs2_free_alloc_context(meta_ac);
6390 return ret;
6391}
6392
6393static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args,
6394 struct buffer_head *blk_bh,
6395 struct buffer_head *new_blk_bh)
6396{
6397 int ret = 0, credits = 0;
6398 handle_t *handle;
6399 struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode);
6400 struct ocfs2_dinode *new_di;
6401 struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb);
6402 int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
6403 struct ocfs2_xattr_block *xb =
6404 (struct ocfs2_xattr_block *)blk_bh->b_data;
6405 struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header;
6406 struct ocfs2_xattr_block *new_xb =
6407 (struct ocfs2_xattr_block *)new_blk_bh->b_data;
6408 struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header;
6409 struct ocfs2_alloc_context *meta_ac;
6410 struct ocfs2_xattr_value_buf vb = {
6411 .vb_bh = new_blk_bh,
6412 .vb_access = ocfs2_journal_access_xb,
6413 };
6414
6415 ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6416 &credits, &meta_ac);
6417 if (ret) {
6418 mlog_errno(ret);
6419 return ret;
6420 }
6421
6422 /* One more credits in case we need to add xattr flags in new inode. */
6423 handle = ocfs2_start_trans(osb, credits + 1);
6424 if (IS_ERR(handle)) {
6425 ret = PTR_ERR(handle);
6426 mlog_errno(ret);
6427 goto out;
6428 }
6429
6430 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6431 ret = ocfs2_journal_access_di(handle,
6432 INODE_CACHE(args->new_inode),
6433 args->new_bh,
6434 OCFS2_JOURNAL_ACCESS_WRITE);
6435 if (ret) {
6436 mlog_errno(ret);
6437 goto out_commit;
6438 }
6439 }
6440
6441 ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode),
6442 new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6443 if (ret) {
6444 mlog_errno(ret);
6445 goto out_commit;
6446 }
6447
6448 memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off,
6449 osb->sb->s_blocksize - header_off);
6450
6451 ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh,
6452 new_blk_bh, new_xh, &vb, meta_ac,
6453 ocfs2_get_xattr_value_root, NULL);
6454 if (ret) {
6455 mlog_errno(ret);
6456 goto out_commit;
6457 }
6458
6459 ocfs2_journal_dirty(handle, new_blk_bh);
6460
6461 if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6462 new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6463 spin_lock(&new_oi->ip_lock);
6464 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
6465 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6466 spin_unlock(&new_oi->ip_lock);
6467
6468 ocfs2_journal_dirty(handle, args->new_bh);
6469 }
6470
6471out_commit:
6472 ocfs2_commit_trans(osb, handle);
6473
6474out:
6475 ocfs2_free_alloc_context(meta_ac);
6476 return ret;
6477}
6478
6479struct ocfs2_reflink_xattr_tree_args {
6480 struct ocfs2_xattr_reflink *reflink;
6481 struct buffer_head *old_blk_bh;
6482 struct buffer_head *new_blk_bh;
6483 struct ocfs2_xattr_bucket *old_bucket;
6484 struct ocfs2_xattr_bucket *new_bucket;
6485};
6486
6487/*
6488 * NOTE:
6489 * We have to handle the case that both old bucket and new bucket
6490 * will call this function to get the right ret_bh.
6491 * So The caller must give us the right bh.
6492 */
6493static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb,
6494 struct buffer_head *bh,
6495 struct ocfs2_xattr_header *xh,
6496 int offset,
6497 struct ocfs2_xattr_value_root **xv,
6498 struct buffer_head **ret_bh,
6499 void *para)
6500{
6501 struct ocfs2_reflink_xattr_tree_args *args =
6502 (struct ocfs2_reflink_xattr_tree_args *)para;
6503 struct ocfs2_xattr_bucket *bucket;
6504
6505 if (bh == args->old_bucket->bu_bhs[0])
6506 bucket = args->old_bucket;
6507 else
6508 bucket = args->new_bucket;
6509
6510 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6511 xv, ret_bh);
6512}
6513
6514struct ocfs2_value_tree_metas {
6515 int num_metas;
6516 int credits;
6517 int num_recs;
6518};
6519
6520static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb,
6521 struct buffer_head *bh,
6522 struct ocfs2_xattr_header *xh,
6523 int offset,
6524 struct ocfs2_xattr_value_root **xv,
6525 struct buffer_head **ret_bh,
6526 void *para)
6527{
6528 struct ocfs2_xattr_bucket *bucket =
6529 (struct ocfs2_xattr_bucket *)para;
6530
6531 return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6532 xv, ret_bh);
6533}
6534
6535static int ocfs2_calc_value_tree_metas(struct inode *inode,
6536 struct ocfs2_xattr_bucket *bucket,
6537 void *para)
6538{
6539 struct ocfs2_value_tree_metas *metas =
6540 (struct ocfs2_value_tree_metas *)para;
6541 struct ocfs2_xattr_header *xh =
6542 (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6543
6544 /* Add the credits for this bucket first. */
6545 metas->credits += bucket->bu_blocks;
6546 return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0],
6547 xh, &metas->num_metas,
6548 &metas->credits, &metas->num_recs,
6549 ocfs2_value_tree_metas_in_bucket,
6550 bucket);
6551}
6552
6553/*
6554 * Given a xattr extent rec starting from blkno and having len clusters,
6555 * iterate all the buckets calculate how much metadata we need for reflinking
6556 * all the ocfs2_xattr_value_root and lock the allocators accordingly.
6557 */
6558static int ocfs2_lock_reflink_xattr_rec_allocators(
6559 struct ocfs2_reflink_xattr_tree_args *args,
6560 struct ocfs2_extent_tree *xt_et,
6561 u64 blkno, u32 len, int *credits,
6562 struct ocfs2_alloc_context **meta_ac,
6563 struct ocfs2_alloc_context **data_ac)
6564{
6565 int ret, num_free_extents;
6566 struct ocfs2_value_tree_metas metas;
6567 struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb);
6568 struct ocfs2_refcount_block *rb;
6569
6570 memset(&metas, 0, sizeof(metas));
6571
6572 ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len,
6573 ocfs2_calc_value_tree_metas, &metas);
6574 if (ret) {
6575 mlog_errno(ret);
6576 goto out;
6577 }
6578
6579 *credits = metas.credits;
6580
6581 /*
6582 * Calculate we need for refcount tree change.
6583 *
6584 * We need to add/modify num_recs in refcount tree, so just calculate
6585 * an approximate number we need for refcount tree change.
6586 * Sometimes we need to split the tree, and after split, half recs
6587 * will be moved to the new block, and a new block can only provide
6588 * half number of recs. So we multiple new blocks by 2.
6589 * In the end, we have to add credits for modifying the already
6590 * existed refcount block.
6591 */
6592 rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data;
6593 metas.num_recs =
6594 (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) /
6595 ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6596 metas.num_metas += metas.num_recs;
6597 *credits += metas.num_recs +
6598 metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6599 if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6600 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6601 le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6602 else
6603 *credits += 1;
6604
6605 /* count in the xattr tree change. */
6606 num_free_extents = ocfs2_num_free_extents(osb, xt_et);
6607 if (num_free_extents < 0) {
6608 ret = num_free_extents;
6609 mlog_errno(ret);
6610 goto out;
6611 }
6612
6613 if (num_free_extents < len)
6614 metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el);
6615
6616 *credits += ocfs2_calc_extend_credits(osb->sb,
6617 xt_et->et_root_el, len);
6618
6619 if (metas.num_metas) {
6620 ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas,
6621 meta_ac);
5286 if (ret) { 6622 if (ret) {
5287 mlog_errno(ret); 6623 mlog_errno(ret);
5288 goto out; 6624 goto out;
5289 } 6625 }
6626 }
5290 6627
5291 ret = ocfs2_rm_xattr_cluster(inode, xb_bh, 6628 if (len) {
5292 p_blkno, e_cpos, num_clusters); 6629 ret = ocfs2_reserve_clusters(osb, len, data_ac);
6630 if (ret)
6631 mlog_errno(ret);
6632 }
6633out:
6634 if (ret) {
6635 if (*meta_ac) {
6636 ocfs2_free_alloc_context(*meta_ac);
6637 meta_ac = NULL;
6638 }
6639 }
6640
6641 return ret;
6642}
6643
6644static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6645 u64 blkno, u64 new_blkno, u32 clusters,
6646 struct ocfs2_alloc_context *meta_ac,
6647 struct ocfs2_alloc_context *data_ac,
6648 struct ocfs2_reflink_xattr_tree_args *args)
6649{
6650 int i, j, ret = 0;
6651 struct super_block *sb = args->reflink->old_inode->i_sb;
6652 u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
6653 u32 num_buckets = clusters * bpc;
6654 int bpb = args->old_bucket->bu_blocks;
6655 struct ocfs2_xattr_value_buf vb = {
6656 .vb_access = ocfs2_journal_access,
6657 };
6658
6659 for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) {
6660 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
5293 if (ret) { 6661 if (ret) {
5294 mlog_errno(ret); 6662 mlog_errno(ret);
5295 break; 6663 break;
5296 } 6664 }
5297 6665
5298 if (e_cpos == 0) 6666 ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno);
6667 if (ret) {
6668 mlog_errno(ret);
5299 break; 6669 break;
6670 }
5300 6671
5301 name_hash = e_cpos - 1; 6672 /*
6673 * The real bucket num in this series of blocks is stored
6674 * in the 1st bucket.
6675 */
6676 if (i == 0)
6677 num_buckets = le16_to_cpu(
6678 bucket_xh(args->old_bucket)->xh_num_buckets);
6679
6680 ret = ocfs2_xattr_bucket_journal_access(handle,
6681 args->new_bucket,
6682 OCFS2_JOURNAL_ACCESS_CREATE);
6683 if (ret) {
6684 mlog_errno(ret);
6685 break;
6686 }
6687
6688 for (j = 0; j < bpb; j++)
6689 memcpy(bucket_block(args->new_bucket, j),
6690 bucket_block(args->old_bucket, j),
6691 sb->s_blocksize);
6692
6693 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6694
6695 ret = ocfs2_reflink_xattr_header(handle, args->reflink,
6696 args->old_bucket->bu_bhs[0],
6697 bucket_xh(args->old_bucket),
6698 args->new_bucket->bu_bhs[0],
6699 bucket_xh(args->new_bucket),
6700 &vb, meta_ac,
6701 ocfs2_get_reflink_xattr_value_root,
6702 args);
6703 if (ret) {
6704 mlog_errno(ret);
6705 break;
6706 }
6707
6708 /*
6709 * Re-access and dirty the bucket to calculate metaecc.
6710 * Because we may extend the transaction in reflink_xattr_header
6711 * which will let the already accessed block gone.
6712 */
6713 ret = ocfs2_xattr_bucket_journal_access(handle,
6714 args->new_bucket,
6715 OCFS2_JOURNAL_ACCESS_WRITE);
6716 if (ret) {
6717 mlog_errno(ret);
6718 break;
6719 }
6720
6721 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6722 ocfs2_xattr_bucket_relse(args->old_bucket);
6723 ocfs2_xattr_bucket_relse(args->new_bucket);
6724 }
6725
6726 ocfs2_xattr_bucket_relse(args->old_bucket);
6727 ocfs2_xattr_bucket_relse(args->new_bucket);
6728 return ret;
6729}
6730/*
6731 * Create the same xattr extent record in the new inode's xattr tree.
6732 */
6733static int ocfs2_reflink_xattr_rec(struct inode *inode,
6734 struct buffer_head *root_bh,
6735 u64 blkno,
6736 u32 cpos,
6737 u32 len,
6738 void *para)
6739{
6740 int ret, credits = 0;
6741 u32 p_cluster, num_clusters;
6742 u64 new_blkno;
6743 handle_t *handle;
6744 struct ocfs2_reflink_xattr_tree_args *args =
6745 (struct ocfs2_reflink_xattr_tree_args *)para;
6746 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6747 struct ocfs2_alloc_context *meta_ac = NULL;
6748 struct ocfs2_alloc_context *data_ac = NULL;
6749 struct ocfs2_extent_tree et;
6750
6751 ocfs2_init_xattr_tree_extent_tree(&et,
6752 INODE_CACHE(args->reflink->new_inode),
6753 args->new_blk_bh);
6754
6755 ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno,
6756 len, &credits,
6757 &meta_ac, &data_ac);
6758 if (ret) {
6759 mlog_errno(ret);
6760 goto out;
6761 }
6762
6763 handle = ocfs2_start_trans(osb, credits);
6764 if (IS_ERR(handle)) {
6765 ret = PTR_ERR(handle);
6766 mlog_errno(ret);
6767 goto out;
6768 }
6769
6770 ret = ocfs2_claim_clusters(osb, handle, data_ac,
6771 len, &p_cluster, &num_clusters);
6772 if (ret) {
6773 mlog_errno(ret);
6774 goto out_commit;
6775 }
6776
6777 new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster);
6778
6779 mlog(0, "reflink xattr buckets %llu to %llu, len %u\n",
6780 (unsigned long long)blkno, (unsigned long long)new_blkno, len);
6781 ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len,
6782 meta_ac, data_ac, args);
6783 if (ret) {
6784 mlog_errno(ret);
6785 goto out_commit;
6786 }
6787
6788 mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
6789 (unsigned long long)new_blkno, len, cpos);
6790 ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno,
6791 len, 0, meta_ac);
6792 if (ret)
6793 mlog_errno(ret);
6794
6795out_commit:
6796 ocfs2_commit_trans(osb, handle);
6797
6798out:
6799 if (meta_ac)
6800 ocfs2_free_alloc_context(meta_ac);
6801 if (data_ac)
6802 ocfs2_free_alloc_context(data_ac);
6803 return ret;
6804}
6805
6806/*
6807 * Create reflinked xattr buckets.
6808 * We will add bucket one by one, and refcount all the xattrs in the bucket
6809 * if they are stored outside.
6810 */
6811static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args,
6812 struct buffer_head *blk_bh,
6813 struct buffer_head *new_blk_bh)
6814{
6815 int ret;
6816 struct ocfs2_reflink_xattr_tree_args para;
6817
6818 memset(&para, 0, sizeof(para));
6819 para.reflink = args;
6820 para.old_blk_bh = blk_bh;
6821 para.new_blk_bh = new_blk_bh;
6822
6823 para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode);
6824 if (!para.old_bucket) {
6825 mlog_errno(-ENOMEM);
6826 return -ENOMEM;
6827 }
6828
6829 para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode);
6830 if (!para.new_bucket) {
6831 ret = -ENOMEM;
6832 mlog_errno(ret);
6833 goto out;
6834 }
6835
6836 ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh,
6837 ocfs2_reflink_xattr_rec,
6838 &para);
6839 if (ret)
6840 mlog_errno(ret);
6841
6842out:
6843 ocfs2_xattr_bucket_free(para.old_bucket);
6844 ocfs2_xattr_bucket_free(para.new_bucket);
6845 return ret;
6846}
6847
6848static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args,
6849 struct buffer_head *blk_bh)
6850{
6851 int ret, indexed = 0;
6852 struct buffer_head *new_blk_bh = NULL;
6853 struct ocfs2_xattr_block *xb =
6854 (struct ocfs2_xattr_block *)blk_bh->b_data;
6855
6856
6857 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)
6858 indexed = 1;
6859
6860 ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh,
6861 &new_blk_bh, indexed);
6862 if (ret) {
6863 mlog_errno(ret);
6864 goto out;
6865 }
6866
6867 if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED))
6868 ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh);
6869 else
6870 ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh);
6871 if (ret)
6872 mlog_errno(ret);
6873
6874out:
6875 brelse(new_blk_bh);
6876 return ret;
6877}
6878
6879static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe)
6880{
6881 int type = ocfs2_xattr_get_type(xe);
6882
6883 return type != OCFS2_XATTR_INDEX_SECURITY &&
6884 type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS &&
6885 type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
6886}
6887
6888int ocfs2_reflink_xattrs(struct inode *old_inode,
6889 struct buffer_head *old_bh,
6890 struct inode *new_inode,
6891 struct buffer_head *new_bh,
6892 bool preserve_security)
6893{
6894 int ret;
6895 struct ocfs2_xattr_reflink args;
6896 struct ocfs2_inode_info *oi = OCFS2_I(old_inode);
6897 struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data;
6898 struct buffer_head *blk_bh = NULL;
6899 struct ocfs2_cached_dealloc_ctxt dealloc;
6900 struct ocfs2_refcount_tree *ref_tree;
6901 struct buffer_head *ref_root_bh = NULL;
6902
6903 ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb),
6904 le64_to_cpu(di->i_refcount_loc),
6905 1, &ref_tree, &ref_root_bh);
6906 if (ret) {
6907 mlog_errno(ret);
6908 goto out;
6909 }
6910
6911 ocfs2_init_dealloc_ctxt(&dealloc);
6912
6913 args.old_inode = old_inode;
6914 args.new_inode = new_inode;
6915 args.old_bh = old_bh;
6916 args.new_bh = new_bh;
6917 args.ref_ci = &ref_tree->rf_ci;
6918 args.ref_root_bh = ref_root_bh;
6919 args.dealloc = &dealloc;
6920 if (preserve_security)
6921 args.xattr_reflinked = NULL;
6922 else
6923 args.xattr_reflinked = ocfs2_reflink_xattr_no_security;
6924
6925 if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
6926 ret = ocfs2_reflink_xattr_inline(&args);
6927 if (ret) {
6928 mlog_errno(ret);
6929 goto out_unlock;
6930 }
6931 }
6932
6933 if (!di->i_xattr_loc)
6934 goto out_unlock;
6935
6936 ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc),
6937 &blk_bh);
6938 if (ret < 0) {
6939 mlog_errno(ret);
6940 goto out_unlock;
6941 }
6942
6943 ret = ocfs2_reflink_xattr_in_block(&args, blk_bh);
6944 if (ret)
6945 mlog_errno(ret);
6946
6947 brelse(blk_bh);
6948
6949out_unlock:
6950 ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb),
6951 ref_tree, 1);
6952 brelse(ref_root_bh);
6953
6954 if (ocfs2_dealloc_has_cluster(&dealloc)) {
6955 ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1);
6956 ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc);
5302 } 6957 }
5303 6958
5304out: 6959out:
@@ -5306,6 +6961,51 @@ out:
5306} 6961}
5307 6962
5308/* 6963/*
6964 * Initialize security and acl for a already created inode.
6965 * Used for reflink a non-preserve-security file.
6966 *
6967 * It uses common api like ocfs2_xattr_set, so the caller
6968 * must not hold any lock expect i_mutex.
6969 */
6970int ocfs2_init_security_and_acl(struct inode *dir,
6971 struct inode *inode)
6972{
6973 int ret = 0;
6974 struct buffer_head *dir_bh = NULL;
6975 struct ocfs2_security_xattr_info si = {
6976 .enable = 1,
6977 };
6978
6979 ret = ocfs2_init_security_get(inode, dir, &si);
6980 if (!ret) {
6981 ret = ocfs2_xattr_security_set(inode, si.name,
6982 si.value, si.value_len,
6983 XATTR_CREATE);
6984 if (ret) {
6985 mlog_errno(ret);
6986 goto leave;
6987 }
6988 } else if (ret != -EOPNOTSUPP) {
6989 mlog_errno(ret);
6990 goto leave;
6991 }
6992
6993 ret = ocfs2_inode_lock(dir, &dir_bh, 0);
6994 if (ret) {
6995 mlog_errno(ret);
6996 goto leave;
6997 }
6998
6999 ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
7000 if (ret)
7001 mlog_errno(ret);
7002
7003 ocfs2_inode_unlock(dir, 0);
7004 brelse(dir_bh);
7005leave:
7006 return ret;
7007}
7008/*
5309 * 'security' attributes support 7009 * 'security' attributes support
5310 */ 7010 */
5311static size_t ocfs2_xattr_security_list(struct inode *inode, char *list, 7011static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index 1ca7e9a1b7bc..08e36389f56d 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -55,6 +55,8 @@ int ocfs2_xattr_set_handle(handle_t *, struct inode *, struct buffer_head *,
55 int, const char *, const void *, size_t, int, 55 int, const char *, const void *, size_t, int,
56 struct ocfs2_alloc_context *, 56 struct ocfs2_alloc_context *,
57 struct ocfs2_alloc_context *); 57 struct ocfs2_alloc_context *);
58int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
59 struct ocfs2_dinode *di);
58int ocfs2_xattr_remove(struct inode *, struct buffer_head *); 60int ocfs2_xattr_remove(struct inode *, struct buffer_head *);
59int ocfs2_init_security_get(struct inode *, struct inode *, 61int ocfs2_init_security_get(struct inode *, struct inode *,
60 struct ocfs2_security_xattr_info *); 62 struct ocfs2_security_xattr_info *);
@@ -83,5 +85,16 @@ struct ocfs2_xattr_value_buf {
83 struct ocfs2_xattr_value_root *vb_xv; 85 struct ocfs2_xattr_value_root *vb_xv;
84}; 86};
85 87
86 88int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
89 struct buffer_head *fe_bh,
90 struct ocfs2_caching_info *ref_ci,
91 struct buffer_head *ref_root_bh,
92 struct ocfs2_cached_dealloc_ctxt *dealloc);
93int ocfs2_reflink_xattrs(struct inode *old_inode,
94 struct buffer_head *old_bh,
95 struct inode *new_inode,
96 struct buffer_head *new_bh,
97 bool preserve_security);
98int ocfs2_init_security_and_acl(struct inode *dir,
99 struct inode *inode);
87#endif /* OCFS2_XATTR_H */ 100#endif /* OCFS2_XATTR_H */