diff options
author | Tao Ma <tao.ma@oracle.com> | 2009-08-23 23:13:37 -0400 |
---|---|---|
committer | Joel Becker <joel.becker@oracle.com> | 2009-09-22 23:09:29 -0400 |
commit | 374a263e790c4de85844283c098810a92985f623 (patch) | |
tree | a72b1f072b973723ea1cf7def27608717cf1c033 /fs/ocfs2 | |
parent | c732eb16bf07f9bfb7fa72b6868462471273bdbd (diff) |
ocfs2: Add refcount tree lock mechanism.
Implement locking around struct ocfs2_refcount_tree. This protects
all read/write operations on refcount trees. ocfs2_refcount_tree
has its own lock and its own caching_info, protecting buffers among
multiple nodes.
User must call ocfs2_lock_refcount_tree before his operation on
the tree and unlock it after that.
ocfs2_refcount_trees are referenced by the block number of the
refcount tree root block, So we create an rb-tree on the ocfs2_super
to look them up.
Signed-off-by: Tao Ma <tao.ma@oracle.com>
Diffstat (limited to 'fs/ocfs2')
-rw-r--r-- | fs/ocfs2/ocfs2.h | 4 | ||||
-rw-r--r-- | fs/ocfs2/refcounttree.c | 359 | ||||
-rw-r--r-- | fs/ocfs2/refcounttree.h | 7 | ||||
-rw-r--r-- | fs/ocfs2/super.c | 5 |
4 files changed, 375 insertions, 0 deletions
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 6688d19e4451..bb5357376ef5 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -408,6 +408,10 @@ struct ocfs2_super | |||
408 | 408 | ||
409 | /* the group we used to allocate inodes. */ | 409 | /* the group we used to allocate inodes. */ |
410 | u64 osb_inode_alloc_group; | 410 | u64 osb_inode_alloc_group; |
411 | |||
412 | /* rb tree root for refcount lock. */ | ||
413 | struct rb_root osb_rf_lock_tree; | ||
414 | struct ocfs2_refcount_tree *osb_ref_tree_lru; | ||
411 | }; | 415 | }; |
412 | 416 | ||
413 | #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) | 417 | #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index eb0f4a047938..8d79de8637b8 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include "buffer_head_io.h" | 27 | #include "buffer_head_io.h" |
28 | #include "blockcheck.h" | 28 | #include "blockcheck.h" |
29 | #include "refcounttree.h" | 29 | #include "refcounttree.h" |
30 | #include "dlmglue.h" | ||
30 | 31 | ||
31 | static inline struct ocfs2_refcount_tree * | 32 | static inline struct ocfs2_refcount_tree * |
32 | cache_info_to_refcount(struct ocfs2_caching_info *ci) | 33 | cache_info_to_refcount(struct ocfs2_caching_info *ci) |
@@ -156,3 +157,361 @@ static const struct ocfs2_caching_operations ocfs2_refcount_caching_ops = { | |||
156 | .co_io_lock = ocfs2_refcount_cache_io_lock, | 157 | .co_io_lock = ocfs2_refcount_cache_io_lock, |
157 | .co_io_unlock = ocfs2_refcount_cache_io_unlock, | 158 | .co_io_unlock = ocfs2_refcount_cache_io_unlock, |
158 | }; | 159 | }; |
160 | |||
161 | static struct ocfs2_refcount_tree * | ||
162 | ocfs2_find_refcount_tree(struct ocfs2_super *osb, u64 blkno) | ||
163 | { | ||
164 | struct rb_node *n = osb->osb_rf_lock_tree.rb_node; | ||
165 | struct ocfs2_refcount_tree *tree = NULL; | ||
166 | |||
167 | while (n) { | ||
168 | tree = rb_entry(n, struct ocfs2_refcount_tree, rf_node); | ||
169 | |||
170 | if (blkno < tree->rf_blkno) | ||
171 | n = n->rb_left; | ||
172 | else if (blkno > tree->rf_blkno) | ||
173 | n = n->rb_right; | ||
174 | else | ||
175 | return tree; | ||
176 | } | ||
177 | |||
178 | return NULL; | ||
179 | } | ||
180 | |||
181 | /* osb_lock is already locked. */ | ||
182 | static void ocfs2_insert_refcount_tree(struct ocfs2_super *osb, | ||
183 | struct ocfs2_refcount_tree *new) | ||
184 | { | ||
185 | u64 rf_blkno = new->rf_blkno; | ||
186 | struct rb_node *parent = NULL; | ||
187 | struct rb_node **p = &osb->osb_rf_lock_tree.rb_node; | ||
188 | struct ocfs2_refcount_tree *tmp; | ||
189 | |||
190 | while (*p) { | ||
191 | parent = *p; | ||
192 | |||
193 | tmp = rb_entry(parent, struct ocfs2_refcount_tree, | ||
194 | rf_node); | ||
195 | |||
196 | if (rf_blkno < tmp->rf_blkno) | ||
197 | p = &(*p)->rb_left; | ||
198 | else if (rf_blkno > tmp->rf_blkno) | ||
199 | p = &(*p)->rb_right; | ||
200 | else { | ||
201 | /* This should never happen! */ | ||
202 | mlog(ML_ERROR, "Duplicate refcount block %llu found!\n", | ||
203 | (unsigned long long)rf_blkno); | ||
204 | BUG(); | ||
205 | } | ||
206 | } | ||
207 | |||
208 | rb_link_node(&new->rf_node, parent, p); | ||
209 | rb_insert_color(&new->rf_node, &osb->osb_rf_lock_tree); | ||
210 | } | ||
211 | |||
212 | static void ocfs2_free_refcount_tree(struct ocfs2_refcount_tree *tree) | ||
213 | { | ||
214 | ocfs2_metadata_cache_exit(&tree->rf_ci); | ||
215 | ocfs2_simple_drop_lockres(OCFS2_SB(tree->rf_sb), &tree->rf_lockres); | ||
216 | ocfs2_lock_res_free(&tree->rf_lockres); | ||
217 | kfree(tree); | ||
218 | } | ||
219 | |||
220 | static inline void | ||
221 | ocfs2_erase_refcount_tree_from_list_no_lock(struct ocfs2_super *osb, | ||
222 | struct ocfs2_refcount_tree *tree) | ||
223 | { | ||
224 | rb_erase(&tree->rf_node, &osb->osb_rf_lock_tree); | ||
225 | if (osb->osb_ref_tree_lru && osb->osb_ref_tree_lru == tree) | ||
226 | osb->osb_ref_tree_lru = NULL; | ||
227 | } | ||
228 | |||
229 | static void ocfs2_erase_refcount_tree_from_list(struct ocfs2_super *osb, | ||
230 | struct ocfs2_refcount_tree *tree) | ||
231 | { | ||
232 | spin_lock(&osb->osb_lock); | ||
233 | ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree); | ||
234 | spin_unlock(&osb->osb_lock); | ||
235 | } | ||
236 | |||
237 | void ocfs2_kref_remove_refcount_tree(struct kref *kref) | ||
238 | { | ||
239 | struct ocfs2_refcount_tree *tree = | ||
240 | container_of(kref, struct ocfs2_refcount_tree, rf_getcnt); | ||
241 | |||
242 | ocfs2_free_refcount_tree(tree); | ||
243 | } | ||
244 | |||
245 | static inline void | ||
246 | ocfs2_refcount_tree_get(struct ocfs2_refcount_tree *tree) | ||
247 | { | ||
248 | kref_get(&tree->rf_getcnt); | ||
249 | } | ||
250 | |||
251 | static inline void | ||
252 | ocfs2_refcount_tree_put(struct ocfs2_refcount_tree *tree) | ||
253 | { | ||
254 | kref_put(&tree->rf_getcnt, ocfs2_kref_remove_refcount_tree); | ||
255 | } | ||
256 | |||
257 | static inline void ocfs2_init_refcount_tree_ci(struct ocfs2_refcount_tree *new, | ||
258 | struct super_block *sb) | ||
259 | { | ||
260 | ocfs2_metadata_cache_init(&new->rf_ci, &ocfs2_refcount_caching_ops); | ||
261 | mutex_init(&new->rf_io_mutex); | ||
262 | new->rf_sb = sb; | ||
263 | spin_lock_init(&new->rf_lock); | ||
264 | } | ||
265 | |||
266 | static inline void ocfs2_init_refcount_tree_lock(struct ocfs2_super *osb, | ||
267 | struct ocfs2_refcount_tree *new, | ||
268 | u64 rf_blkno, u32 generation) | ||
269 | { | ||
270 | init_rwsem(&new->rf_sem); | ||
271 | ocfs2_refcount_lock_res_init(&new->rf_lockres, osb, | ||
272 | rf_blkno, generation); | ||
273 | } | ||
274 | |||
275 | static int ocfs2_get_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno, | ||
276 | struct ocfs2_refcount_tree **ret_tree) | ||
277 | { | ||
278 | int ret = 0; | ||
279 | struct ocfs2_refcount_tree *tree, *new = NULL; | ||
280 | struct buffer_head *ref_root_bh = NULL; | ||
281 | struct ocfs2_refcount_block *ref_rb; | ||
282 | |||
283 | spin_lock(&osb->osb_lock); | ||
284 | if (osb->osb_ref_tree_lru && | ||
285 | osb->osb_ref_tree_lru->rf_blkno == rf_blkno) | ||
286 | tree = osb->osb_ref_tree_lru; | ||
287 | else | ||
288 | tree = ocfs2_find_refcount_tree(osb, rf_blkno); | ||
289 | if (tree) | ||
290 | goto out; | ||
291 | |||
292 | spin_unlock(&osb->osb_lock); | ||
293 | |||
294 | new = kzalloc(sizeof(struct ocfs2_refcount_tree), GFP_NOFS); | ||
295 | if (!new) { | ||
296 | ret = -ENOMEM; | ||
297 | return ret; | ||
298 | } | ||
299 | |||
300 | new->rf_blkno = rf_blkno; | ||
301 | kref_init(&new->rf_getcnt); | ||
302 | ocfs2_init_refcount_tree_ci(new, osb->sb); | ||
303 | |||
304 | /* | ||
305 | * We need the generation to create the refcount tree lock and since | ||
306 | * it isn't changed during the tree modification, we are safe here to | ||
307 | * read without protection. | ||
308 | * We also have to purge the cache after we create the lock since the | ||
309 | * refcount block may have the stale data. It can only be trusted when | ||
310 | * we hold the refcount lock. | ||
311 | */ | ||
312 | ret = ocfs2_read_refcount_block(&new->rf_ci, rf_blkno, &ref_root_bh); | ||
313 | if (ret) { | ||
314 | mlog_errno(ret); | ||
315 | ocfs2_metadata_cache_exit(&new->rf_ci); | ||
316 | kfree(new); | ||
317 | return ret; | ||
318 | } | ||
319 | |||
320 | ref_rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; | ||
321 | new->rf_generation = le32_to_cpu(ref_rb->rf_generation); | ||
322 | ocfs2_init_refcount_tree_lock(osb, new, rf_blkno, | ||
323 | new->rf_generation); | ||
324 | ocfs2_metadata_cache_purge(&new->rf_ci); | ||
325 | |||
326 | spin_lock(&osb->osb_lock); | ||
327 | tree = ocfs2_find_refcount_tree(osb, rf_blkno); | ||
328 | if (tree) | ||
329 | goto out; | ||
330 | |||
331 | ocfs2_insert_refcount_tree(osb, new); | ||
332 | |||
333 | tree = new; | ||
334 | new = NULL; | ||
335 | |||
336 | out: | ||
337 | *ret_tree = tree; | ||
338 | |||
339 | osb->osb_ref_tree_lru = tree; | ||
340 | |||
341 | spin_unlock(&osb->osb_lock); | ||
342 | |||
343 | if (new) | ||
344 | ocfs2_free_refcount_tree(new); | ||
345 | |||
346 | brelse(ref_root_bh); | ||
347 | return ret; | ||
348 | } | ||
349 | |||
350 | static int ocfs2_get_refcount_block(struct inode *inode, u64 *ref_blkno) | ||
351 | { | ||
352 | int ret; | ||
353 | struct buffer_head *di_bh = NULL; | ||
354 | struct ocfs2_dinode *di; | ||
355 | |||
356 | ret = ocfs2_read_inode_block(inode, &di_bh); | ||
357 | if (ret) { | ||
358 | mlog_errno(ret); | ||
359 | goto out; | ||
360 | } | ||
361 | |||
362 | BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); | ||
363 | |||
364 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
365 | *ref_blkno = le64_to_cpu(di->i_refcount_loc); | ||
366 | brelse(di_bh); | ||
367 | out: | ||
368 | return ret; | ||
369 | } | ||
370 | |||
371 | static int __ocfs2_lock_refcount_tree(struct ocfs2_super *osb, | ||
372 | struct ocfs2_refcount_tree *tree, int rw) | ||
373 | { | ||
374 | int ret; | ||
375 | |||
376 | ret = ocfs2_refcount_lock(tree, rw); | ||
377 | if (ret) { | ||
378 | mlog_errno(ret); | ||
379 | goto out; | ||
380 | } | ||
381 | |||
382 | if (rw) | ||
383 | down_write(&tree->rf_sem); | ||
384 | else | ||
385 | down_read(&tree->rf_sem); | ||
386 | |||
387 | out: | ||
388 | return ret; | ||
389 | } | ||
390 | |||
391 | /* | ||
392 | * Lock the refcount tree pointed by ref_blkno and return the tree. | ||
393 | * In most case, we lock the tree and read the refcount block. | ||
394 | * So read it here if the caller really needs it. | ||
395 | * | ||
396 | * If the tree has been re-created by other node, it will free the | ||
397 | * old one and re-create it. | ||
398 | */ | ||
399 | int ocfs2_lock_refcount_tree(struct ocfs2_super *osb, | ||
400 | u64 ref_blkno, int rw, | ||
401 | struct ocfs2_refcount_tree **ret_tree, | ||
402 | struct buffer_head **ref_bh) | ||
403 | { | ||
404 | int ret, delete_tree = 0; | ||
405 | struct ocfs2_refcount_tree *tree = NULL; | ||
406 | struct buffer_head *ref_root_bh = NULL; | ||
407 | struct ocfs2_refcount_block *rb; | ||
408 | |||
409 | again: | ||
410 | ret = ocfs2_get_refcount_tree(osb, ref_blkno, &tree); | ||
411 | if (ret) { | ||
412 | mlog_errno(ret); | ||
413 | return ret; | ||
414 | } | ||
415 | |||
416 | ocfs2_refcount_tree_get(tree); | ||
417 | |||
418 | ret = __ocfs2_lock_refcount_tree(osb, tree, rw); | ||
419 | if (ret) { | ||
420 | mlog_errno(ret); | ||
421 | ocfs2_refcount_tree_put(tree); | ||
422 | goto out; | ||
423 | } | ||
424 | |||
425 | ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno, | ||
426 | &ref_root_bh); | ||
427 | if (ret) { | ||
428 | mlog_errno(ret); | ||
429 | ocfs2_unlock_refcount_tree(osb, tree, rw); | ||
430 | ocfs2_refcount_tree_put(tree); | ||
431 | goto out; | ||
432 | } | ||
433 | |||
434 | rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data; | ||
435 | /* | ||
436 | * If the refcount block has been freed and re-created, we may need | ||
437 | * to recreate the refcount tree also. | ||
438 | * | ||
439 | * Here we just remove the tree from the rb-tree, and the last | ||
440 | * kref holder will unlock and delete this refcount_tree. | ||
441 | * Then we goto "again" and ocfs2_get_refcount_tree will create | ||
442 | * the new refcount tree for us. | ||
443 | */ | ||
444 | if (tree->rf_generation != le32_to_cpu(rb->rf_generation)) { | ||
445 | if (!tree->rf_removed) { | ||
446 | ocfs2_erase_refcount_tree_from_list(osb, tree); | ||
447 | tree->rf_removed = 1; | ||
448 | delete_tree = 1; | ||
449 | } | ||
450 | |||
451 | ocfs2_unlock_refcount_tree(osb, tree, rw); | ||
452 | /* | ||
453 | * We get an extra reference when we create the refcount | ||
454 | * tree, so another put will destroy it. | ||
455 | */ | ||
456 | if (delete_tree) | ||
457 | ocfs2_refcount_tree_put(tree); | ||
458 | brelse(ref_root_bh); | ||
459 | ref_root_bh = NULL; | ||
460 | goto again; | ||
461 | } | ||
462 | |||
463 | *ret_tree = tree; | ||
464 | if (ref_bh) { | ||
465 | *ref_bh = ref_root_bh; | ||
466 | ref_root_bh = NULL; | ||
467 | } | ||
468 | out: | ||
469 | brelse(ref_root_bh); | ||
470 | return ret; | ||
471 | } | ||
472 | |||
473 | int ocfs2_lock_refcount_tree_by_inode(struct inode *inode, int rw, | ||
474 | struct ocfs2_refcount_tree **ret_tree, | ||
475 | struct buffer_head **ref_bh) | ||
476 | { | ||
477 | int ret; | ||
478 | u64 ref_blkno; | ||
479 | |||
480 | ret = ocfs2_get_refcount_block(inode, &ref_blkno); | ||
481 | if (ret) { | ||
482 | mlog_errno(ret); | ||
483 | return ret; | ||
484 | } | ||
485 | |||
486 | return ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno, | ||
487 | rw, ret_tree, ref_bh); | ||
488 | } | ||
489 | |||
490 | void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb, | ||
491 | struct ocfs2_refcount_tree *tree, int rw) | ||
492 | { | ||
493 | if (rw) | ||
494 | up_write(&tree->rf_sem); | ||
495 | else | ||
496 | up_read(&tree->rf_sem); | ||
497 | |||
498 | ocfs2_refcount_unlock(tree, rw); | ||
499 | ocfs2_refcount_tree_put(tree); | ||
500 | } | ||
501 | |||
502 | void ocfs2_purge_refcount_trees(struct ocfs2_super *osb) | ||
503 | { | ||
504 | struct rb_node *node; | ||
505 | struct ocfs2_refcount_tree *tree; | ||
506 | struct rb_root *root = &osb->osb_rf_lock_tree; | ||
507 | |||
508 | while ((node = rb_last(root)) != NULL) { | ||
509 | tree = rb_entry(node, struct ocfs2_refcount_tree, rf_node); | ||
510 | |||
511 | mlog(0, "Purge tree %llu\n", | ||
512 | (unsigned long long) tree->rf_blkno); | ||
513 | |||
514 | rb_erase(&tree->rf_node, root); | ||
515 | ocfs2_free_refcount_tree(tree); | ||
516 | } | ||
517 | } | ||
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h index 9a3695cdbb53..2ea7fc52c23c 100644 --- a/fs/ocfs2/refcounttree.h +++ b/fs/ocfs2/refcounttree.h | |||
@@ -33,4 +33,11 @@ struct ocfs2_refcount_tree { | |||
33 | struct super_block *rf_sb; | 33 | struct super_block *rf_sb; |
34 | }; | 34 | }; |
35 | 35 | ||
36 | void ocfs2_purge_refcount_trees(struct ocfs2_super *osb); | ||
37 | int ocfs2_lock_refcount_tree(struct ocfs2_super *osb, u64 ref_blkno, int rw, | ||
38 | struct ocfs2_refcount_tree **tree, | ||
39 | struct buffer_head **ref_bh); | ||
40 | void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb, | ||
41 | struct ocfs2_refcount_tree *tree, | ||
42 | int rw); | ||
36 | #endif /* OCFS2_REFCOUNTTREE_H */ | 43 | #endif /* OCFS2_REFCOUNTTREE_H */ |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index e35a5052ce3a..8b6062176970 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -69,6 +69,7 @@ | |||
69 | #include "ver.h" | 69 | #include "ver.h" |
70 | #include "xattr.h" | 70 | #include "xattr.h" |
71 | #include "quota.h" | 71 | #include "quota.h" |
72 | #include "refcounttree.h" | ||
72 | 73 | ||
73 | #include "buffer_head_io.h" | 74 | #include "buffer_head_io.h" |
74 | 75 | ||
@@ -1858,6 +1859,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | |||
1858 | 1859 | ||
1859 | ocfs2_sync_blockdev(sb); | 1860 | ocfs2_sync_blockdev(sb); |
1860 | 1861 | ||
1862 | ocfs2_purge_refcount_trees(osb); | ||
1863 | |||
1861 | /* No cluster connection means we've failed during mount, so skip | 1864 | /* No cluster connection means we've failed during mount, so skip |
1862 | * all the steps which depended on that to complete. */ | 1865 | * all the steps which depended on that to complete. */ |
1863 | if (osb->cconn) { | 1866 | if (osb->cconn) { |
@@ -2064,6 +2067,8 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2064 | goto bail; | 2067 | goto bail; |
2065 | } | 2068 | } |
2066 | 2069 | ||
2070 | osb->osb_rf_lock_tree = RB_ROOT; | ||
2071 | |||
2067 | osb->s_feature_compat = | 2072 | osb->s_feature_compat = |
2068 | le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_compat); | 2073 | le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_compat); |
2069 | osb->s_feature_ro_compat = | 2074 | osb->s_feature_ro_compat = |