aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2
diff options
context:
space:
mode:
authorTao Ma <tao.ma@oracle.com>2009-08-23 23:13:37 -0400
committerJoel Becker <joel.becker@oracle.com>2009-09-22 23:09:29 -0400
commit374a263e790c4de85844283c098810a92985f623 (patch)
treea72b1f072b973723ea1cf7def27608717cf1c033 /fs/ocfs2
parentc732eb16bf07f9bfb7fa72b6868462471273bdbd (diff)
ocfs2: Add refcount tree lock mechanism.
Implement locking around struct ocfs2_refcount_tree. This protects all read/write operations on refcount trees. ocfs2_refcount_tree has its own lock and its own caching_info, protecting buffers among multiple nodes. User must call ocfs2_lock_refcount_tree before his operation on the tree and unlock it after that. ocfs2_refcount_trees are referenced by the block number of the refcount tree root block, So we create an rb-tree on the ocfs2_super to look them up. Signed-off-by: Tao Ma <tao.ma@oracle.com>
Diffstat (limited to 'fs/ocfs2')
-rw-r--r--fs/ocfs2/ocfs2.h4
-rw-r--r--fs/ocfs2/refcounttree.c359
-rw-r--r--fs/ocfs2/refcounttree.h7
-rw-r--r--fs/ocfs2/super.c5
4 files changed, 375 insertions, 0 deletions
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 6688d19e4451..bb5357376ef5 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -408,6 +408,10 @@ struct ocfs2_super
408 408
409 /* the group we used to allocate inodes. */ 409 /* the group we used to allocate inodes. */
410 u64 osb_inode_alloc_group; 410 u64 osb_inode_alloc_group;
411
412 /* rb tree root for refcount lock. */
413 struct rb_root osb_rf_lock_tree;
414 struct ocfs2_refcount_tree *osb_ref_tree_lru;
411}; 415};
412 416
413#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) 417#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info)
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index eb0f4a047938..8d79de8637b8 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -27,6 +27,7 @@
27#include "buffer_head_io.h" 27#include "buffer_head_io.h"
28#include "blockcheck.h" 28#include "blockcheck.h"
29#include "refcounttree.h" 29#include "refcounttree.h"
30#include "dlmglue.h"
30 31
31static inline struct ocfs2_refcount_tree * 32static inline struct ocfs2_refcount_tree *
32cache_info_to_refcount(struct ocfs2_caching_info *ci) 33cache_info_to_refcount(struct ocfs2_caching_info *ci)
@@ -156,3 +157,361 @@ static const struct ocfs2_caching_operations ocfs2_refcount_caching_ops = {
156 .co_io_lock = ocfs2_refcount_cache_io_lock, 157 .co_io_lock = ocfs2_refcount_cache_io_lock,
157 .co_io_unlock = ocfs2_refcount_cache_io_unlock, 158 .co_io_unlock = ocfs2_refcount_cache_io_unlock,
158}; 159};
160
161static struct ocfs2_refcount_tree *
162ocfs2_find_refcount_tree(struct ocfs2_super *osb, u64 blkno)
163{
164 struct rb_node *n = osb->osb_rf_lock_tree.rb_node;
165 struct ocfs2_refcount_tree *tree = NULL;
166
167 while (n) {
168 tree = rb_entry(n, struct ocfs2_refcount_tree, rf_node);
169
170 if (blkno < tree->rf_blkno)
171 n = n->rb_left;
172 else if (blkno > tree->rf_blkno)
173 n = n->rb_right;
174 else
175 return tree;
176 }
177
178 return NULL;
179}
180
181/* osb_lock is already locked. */
182static void ocfs2_insert_refcount_tree(struct ocfs2_super *osb,
183 struct ocfs2_refcount_tree *new)
184{
185 u64 rf_blkno = new->rf_blkno;
186 struct rb_node *parent = NULL;
187 struct rb_node **p = &osb->osb_rf_lock_tree.rb_node;
188 struct ocfs2_refcount_tree *tmp;
189
190 while (*p) {
191 parent = *p;
192
193 tmp = rb_entry(parent, struct ocfs2_refcount_tree,
194 rf_node);
195
196 if (rf_blkno < tmp->rf_blkno)
197 p = &(*p)->rb_left;
198 else if (rf_blkno > tmp->rf_blkno)
199 p = &(*p)->rb_right;
200 else {
201 /* This should never happen! */
202 mlog(ML_ERROR, "Duplicate refcount block %llu found!\n",
203 (unsigned long long)rf_blkno);
204 BUG();
205 }
206 }
207
208 rb_link_node(&new->rf_node, parent, p);
209 rb_insert_color(&new->rf_node, &osb->osb_rf_lock_tree);
210}
211
212static void ocfs2_free_refcount_tree(struct ocfs2_refcount_tree *tree)
213{
214 ocfs2_metadata_cache_exit(&tree->rf_ci);
215 ocfs2_simple_drop_lockres(OCFS2_SB(tree->rf_sb), &tree->rf_lockres);
216 ocfs2_lock_res_free(&tree->rf_lockres);
217 kfree(tree);
218}
219
220static inline void
221ocfs2_erase_refcount_tree_from_list_no_lock(struct ocfs2_super *osb,
222 struct ocfs2_refcount_tree *tree)
223{
224 rb_erase(&tree->rf_node, &osb->osb_rf_lock_tree);
225 if (osb->osb_ref_tree_lru && osb->osb_ref_tree_lru == tree)
226 osb->osb_ref_tree_lru = NULL;
227}
228
229static void ocfs2_erase_refcount_tree_from_list(struct ocfs2_super *osb,
230 struct ocfs2_refcount_tree *tree)
231{
232 spin_lock(&osb->osb_lock);
233 ocfs2_erase_refcount_tree_from_list_no_lock(osb, tree);
234 spin_unlock(&osb->osb_lock);
235}
236
237void ocfs2_kref_remove_refcount_tree(struct kref *kref)
238{
239 struct ocfs2_refcount_tree *tree =
240 container_of(kref, struct ocfs2_refcount_tree, rf_getcnt);
241
242 ocfs2_free_refcount_tree(tree);
243}
244
245static inline void
246ocfs2_refcount_tree_get(struct ocfs2_refcount_tree *tree)
247{
248 kref_get(&tree->rf_getcnt);
249}
250
251static inline void
252ocfs2_refcount_tree_put(struct ocfs2_refcount_tree *tree)
253{
254 kref_put(&tree->rf_getcnt, ocfs2_kref_remove_refcount_tree);
255}
256
257static inline void ocfs2_init_refcount_tree_ci(struct ocfs2_refcount_tree *new,
258 struct super_block *sb)
259{
260 ocfs2_metadata_cache_init(&new->rf_ci, &ocfs2_refcount_caching_ops);
261 mutex_init(&new->rf_io_mutex);
262 new->rf_sb = sb;
263 spin_lock_init(&new->rf_lock);
264}
265
266static inline void ocfs2_init_refcount_tree_lock(struct ocfs2_super *osb,
267 struct ocfs2_refcount_tree *new,
268 u64 rf_blkno, u32 generation)
269{
270 init_rwsem(&new->rf_sem);
271 ocfs2_refcount_lock_res_init(&new->rf_lockres, osb,
272 rf_blkno, generation);
273}
274
275static int ocfs2_get_refcount_tree(struct ocfs2_super *osb, u64 rf_blkno,
276 struct ocfs2_refcount_tree **ret_tree)
277{
278 int ret = 0;
279 struct ocfs2_refcount_tree *tree, *new = NULL;
280 struct buffer_head *ref_root_bh = NULL;
281 struct ocfs2_refcount_block *ref_rb;
282
283 spin_lock(&osb->osb_lock);
284 if (osb->osb_ref_tree_lru &&
285 osb->osb_ref_tree_lru->rf_blkno == rf_blkno)
286 tree = osb->osb_ref_tree_lru;
287 else
288 tree = ocfs2_find_refcount_tree(osb, rf_blkno);
289 if (tree)
290 goto out;
291
292 spin_unlock(&osb->osb_lock);
293
294 new = kzalloc(sizeof(struct ocfs2_refcount_tree), GFP_NOFS);
295 if (!new) {
296 ret = -ENOMEM;
297 return ret;
298 }
299
300 new->rf_blkno = rf_blkno;
301 kref_init(&new->rf_getcnt);
302 ocfs2_init_refcount_tree_ci(new, osb->sb);
303
304 /*
305 * We need the generation to create the refcount tree lock and since
306 * it isn't changed during the tree modification, we are safe here to
307 * read without protection.
308 * We also have to purge the cache after we create the lock since the
309 * refcount block may have the stale data. It can only be trusted when
310 * we hold the refcount lock.
311 */
312 ret = ocfs2_read_refcount_block(&new->rf_ci, rf_blkno, &ref_root_bh);
313 if (ret) {
314 mlog_errno(ret);
315 ocfs2_metadata_cache_exit(&new->rf_ci);
316 kfree(new);
317 return ret;
318 }
319
320 ref_rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
321 new->rf_generation = le32_to_cpu(ref_rb->rf_generation);
322 ocfs2_init_refcount_tree_lock(osb, new, rf_blkno,
323 new->rf_generation);
324 ocfs2_metadata_cache_purge(&new->rf_ci);
325
326 spin_lock(&osb->osb_lock);
327 tree = ocfs2_find_refcount_tree(osb, rf_blkno);
328 if (tree)
329 goto out;
330
331 ocfs2_insert_refcount_tree(osb, new);
332
333 tree = new;
334 new = NULL;
335
336out:
337 *ret_tree = tree;
338
339 osb->osb_ref_tree_lru = tree;
340
341 spin_unlock(&osb->osb_lock);
342
343 if (new)
344 ocfs2_free_refcount_tree(new);
345
346 brelse(ref_root_bh);
347 return ret;
348}
349
350static int ocfs2_get_refcount_block(struct inode *inode, u64 *ref_blkno)
351{
352 int ret;
353 struct buffer_head *di_bh = NULL;
354 struct ocfs2_dinode *di;
355
356 ret = ocfs2_read_inode_block(inode, &di_bh);
357 if (ret) {
358 mlog_errno(ret);
359 goto out;
360 }
361
362 BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
363
364 di = (struct ocfs2_dinode *)di_bh->b_data;
365 *ref_blkno = le64_to_cpu(di->i_refcount_loc);
366 brelse(di_bh);
367out:
368 return ret;
369}
370
371static int __ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
372 struct ocfs2_refcount_tree *tree, int rw)
373{
374 int ret;
375
376 ret = ocfs2_refcount_lock(tree, rw);
377 if (ret) {
378 mlog_errno(ret);
379 goto out;
380 }
381
382 if (rw)
383 down_write(&tree->rf_sem);
384 else
385 down_read(&tree->rf_sem);
386
387out:
388 return ret;
389}
390
391/*
392 * Lock the refcount tree pointed by ref_blkno and return the tree.
393 * In most case, we lock the tree and read the refcount block.
394 * So read it here if the caller really needs it.
395 *
396 * If the tree has been re-created by other node, it will free the
397 * old one and re-create it.
398 */
399int ocfs2_lock_refcount_tree(struct ocfs2_super *osb,
400 u64 ref_blkno, int rw,
401 struct ocfs2_refcount_tree **ret_tree,
402 struct buffer_head **ref_bh)
403{
404 int ret, delete_tree = 0;
405 struct ocfs2_refcount_tree *tree = NULL;
406 struct buffer_head *ref_root_bh = NULL;
407 struct ocfs2_refcount_block *rb;
408
409again:
410 ret = ocfs2_get_refcount_tree(osb, ref_blkno, &tree);
411 if (ret) {
412 mlog_errno(ret);
413 return ret;
414 }
415
416 ocfs2_refcount_tree_get(tree);
417
418 ret = __ocfs2_lock_refcount_tree(osb, tree, rw);
419 if (ret) {
420 mlog_errno(ret);
421 ocfs2_refcount_tree_put(tree);
422 goto out;
423 }
424
425 ret = ocfs2_read_refcount_block(&tree->rf_ci, tree->rf_blkno,
426 &ref_root_bh);
427 if (ret) {
428 mlog_errno(ret);
429 ocfs2_unlock_refcount_tree(osb, tree, rw);
430 ocfs2_refcount_tree_put(tree);
431 goto out;
432 }
433
434 rb = (struct ocfs2_refcount_block *)ref_root_bh->b_data;
435 /*
436 * If the refcount block has been freed and re-created, we may need
437 * to recreate the refcount tree also.
438 *
439 * Here we just remove the tree from the rb-tree, and the last
440 * kref holder will unlock and delete this refcount_tree.
441 * Then we goto "again" and ocfs2_get_refcount_tree will create
442 * the new refcount tree for us.
443 */
444 if (tree->rf_generation != le32_to_cpu(rb->rf_generation)) {
445 if (!tree->rf_removed) {
446 ocfs2_erase_refcount_tree_from_list(osb, tree);
447 tree->rf_removed = 1;
448 delete_tree = 1;
449 }
450
451 ocfs2_unlock_refcount_tree(osb, tree, rw);
452 /*
453 * We get an extra reference when we create the refcount
454 * tree, so another put will destroy it.
455 */
456 if (delete_tree)
457 ocfs2_refcount_tree_put(tree);
458 brelse(ref_root_bh);
459 ref_root_bh = NULL;
460 goto again;
461 }
462
463 *ret_tree = tree;
464 if (ref_bh) {
465 *ref_bh = ref_root_bh;
466 ref_root_bh = NULL;
467 }
468out:
469 brelse(ref_root_bh);
470 return ret;
471}
472
473int ocfs2_lock_refcount_tree_by_inode(struct inode *inode, int rw,
474 struct ocfs2_refcount_tree **ret_tree,
475 struct buffer_head **ref_bh)
476{
477 int ret;
478 u64 ref_blkno;
479
480 ret = ocfs2_get_refcount_block(inode, &ref_blkno);
481 if (ret) {
482 mlog_errno(ret);
483 return ret;
484 }
485
486 return ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno,
487 rw, ret_tree, ref_bh);
488}
489
490void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb,
491 struct ocfs2_refcount_tree *tree, int rw)
492{
493 if (rw)
494 up_write(&tree->rf_sem);
495 else
496 up_read(&tree->rf_sem);
497
498 ocfs2_refcount_unlock(tree, rw);
499 ocfs2_refcount_tree_put(tree);
500}
501
502void ocfs2_purge_refcount_trees(struct ocfs2_super *osb)
503{
504 struct rb_node *node;
505 struct ocfs2_refcount_tree *tree;
506 struct rb_root *root = &osb->osb_rf_lock_tree;
507
508 while ((node = rb_last(root)) != NULL) {
509 tree = rb_entry(node, struct ocfs2_refcount_tree, rf_node);
510
511 mlog(0, "Purge tree %llu\n",
512 (unsigned long long) tree->rf_blkno);
513
514 rb_erase(&tree->rf_node, root);
515 ocfs2_free_refcount_tree(tree);
516 }
517}
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h
index 9a3695cdbb53..2ea7fc52c23c 100644
--- a/fs/ocfs2/refcounttree.h
+++ b/fs/ocfs2/refcounttree.h
@@ -33,4 +33,11 @@ struct ocfs2_refcount_tree {
33 struct super_block *rf_sb; 33 struct super_block *rf_sb;
34}; 34};
35 35
36void ocfs2_purge_refcount_trees(struct ocfs2_super *osb);
37int ocfs2_lock_refcount_tree(struct ocfs2_super *osb, u64 ref_blkno, int rw,
38 struct ocfs2_refcount_tree **tree,
39 struct buffer_head **ref_bh);
40void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb,
41 struct ocfs2_refcount_tree *tree,
42 int rw);
36#endif /* OCFS2_REFCOUNTTREE_H */ 43#endif /* OCFS2_REFCOUNTTREE_H */
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index e35a5052ce3a..8b6062176970 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -69,6 +69,7 @@
69#include "ver.h" 69#include "ver.h"
70#include "xattr.h" 70#include "xattr.h"
71#include "quota.h" 71#include "quota.h"
72#include "refcounttree.h"
72 73
73#include "buffer_head_io.h" 74#include "buffer_head_io.h"
74 75
@@ -1858,6 +1859,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err)
1858 1859
1859 ocfs2_sync_blockdev(sb); 1860 ocfs2_sync_blockdev(sb);
1860 1861
1862 ocfs2_purge_refcount_trees(osb);
1863
1861 /* No cluster connection means we've failed during mount, so skip 1864 /* No cluster connection means we've failed during mount, so skip
1862 * all the steps which depended on that to complete. */ 1865 * all the steps which depended on that to complete. */
1863 if (osb->cconn) { 1866 if (osb->cconn) {
@@ -2064,6 +2067,8 @@ static int ocfs2_initialize_super(struct super_block *sb,
2064 goto bail; 2067 goto bail;
2065 } 2068 }
2066 2069
2070 osb->osb_rf_lock_tree = RB_ROOT;
2071
2067 osb->s_feature_compat = 2072 osb->s_feature_compat =
2068 le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_compat); 2073 le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_compat);
2069 osb->s_feature_ro_compat = 2074 osb->s_feature_ro_compat =