aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorSunil Mushran <sunil.mushran@oracle.com>2011-05-19 17:34:12 -0400
committerJoel Becker <jlbec@evilplan.org>2011-05-26 00:05:22 -0400
commit66effd3c681256874a81436493a933edb1701798 (patch)
treece0b9047ee7be10b6e7e8848f731e1d6a7579731 /fs
parentbddefdeec5bc56ba5aa2c2ca8c904cdff58e7e5b (diff)
ocfs2/dlm: Do not migrate resource to a node that is leaving the domain
During dlm domain shutdown, o2dlm has to free all the lock resources. Ones that have no locks and references are freed. Ones that have locks and/or references are migrated to another node. The first task in migration is finding a target. Currently we scan the lock resource and find one node that either has a lock or a reference. This is not very efficient in a parallel umount case as we might end up migrating the lock resource to a node which itself may have to migrate it to a third node. The patch scans the dlm->exit_domain_map to ensure the target node is not leaving the domain. If no valid target node is found, o2dlm does not migrate the resource but instead waits for the unlock and deref messages that will allow it to free the resource. Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com> Signed-off-by: Joel Becker <jlbec@evilplan.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c10
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c142
2 files changed, 57 insertions, 95 deletions
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 3aff23feefdc..6ed6b95dcf93 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -451,14 +451,18 @@ redo_bucket:
451 dropped = dlm_empty_lockres(dlm, res); 451 dropped = dlm_empty_lockres(dlm, res);
452 452
453 spin_lock(&res->spinlock); 453 spin_lock(&res->spinlock);
454 __dlm_lockres_calc_usage(dlm, res); 454 if (dropped)
455 iter = res->hash_node.next; 455 __dlm_lockres_calc_usage(dlm, res);
456 else
457 iter = res->hash_node.next;
456 spin_unlock(&res->spinlock); 458 spin_unlock(&res->spinlock);
457 459
458 dlm_lockres_put(res); 460 dlm_lockres_put(res);
459 461
460 if (dropped) 462 if (dropped) {
463 cond_resched_lock(&dlm->spinlock);
461 goto redo_bucket; 464 goto redo_bucket;
465 }
462 } 466 }
463 cond_resched_lock(&dlm->spinlock); 467 cond_resched_lock(&dlm->spinlock);
464 num += n; 468 num += n;
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index ec4994628948..11eefb8c12e9 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2396,8 +2396,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
2396 2396
2397 2397
2398static int dlm_migrate_lockres(struct dlm_ctxt *dlm, 2398static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2399 struct dlm_lock_resource *res, 2399 struct dlm_lock_resource *res, u8 target)
2400 u8 target)
2401{ 2400{
2402 struct dlm_master_list_entry *mle = NULL; 2401 struct dlm_master_list_entry *mle = NULL;
2403 struct dlm_master_list_entry *oldmle = NULL; 2402 struct dlm_master_list_entry *oldmle = NULL;
@@ -2411,25 +2410,15 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2411 if (!dlm_grab(dlm)) 2410 if (!dlm_grab(dlm))
2412 return -EINVAL; 2411 return -EINVAL;
2413 2412
2413 BUG_ON(target == O2NM_MAX_NODES);
2414
2414 name = res->lockname.name; 2415 name = res->lockname.name;
2415 namelen = res->lockname.len; 2416 namelen = res->lockname.len;
2416 2417
2417 mlog(0, "%s: Migrating %.*s to %u\n", dlm->name, namelen, name, target); 2418 mlog(0, "%s: Migrating %.*s to node %u\n", dlm->name, namelen, name,
2418 2419 target);
2419 /* Ensure this lockres is a proper candidate for migration */
2420 spin_lock(&res->spinlock);
2421 ret = dlm_is_lockres_migrateable(dlm, res);
2422 spin_unlock(&res->spinlock);
2423
2424 /* No work to do */
2425 if (!ret)
2426 goto leave;
2427
2428 /*
2429 * preallocate up front
2430 * if this fails, abort
2431 */
2432 2420
2421 /* preallocate up front. if this fails, abort */
2433 ret = -ENOMEM; 2422 ret = -ENOMEM;
2434 mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_NOFS); 2423 mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_NOFS);
2435 if (!mres) { 2424 if (!mres) {
@@ -2445,35 +2434,10 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
2445 ret = 0; 2434 ret = 0;
2446 2435
2447 /* 2436 /*
2448 * find a node to migrate the lockres to
2449 */
2450
2451 spin_lock(&dlm->spinlock);
2452 /* pick a new node */
2453 if (!test_bit(target, dlm->domain_map) ||
2454 target >= O2NM_MAX_NODES) {
2455 target = dlm_pick_migration_target(dlm, res);
2456 }
2457 mlog(0, "%s: res %.*s, Node %u chosen for migration\n", dlm->name,
2458 namelen, name, target);
2459
2460 if (target >= O2NM_MAX_NODES ||
2461 !test_bit(target, dlm->domain_map)) {
2462 /* target chosen is not alive */
2463 ret = -EINVAL;
2464 }
2465
2466 if (ret) {
2467 spin_unlock(&dlm->spinlock);
2468 goto fail;
2469 }
2470
2471 mlog(0, "continuing with target = %u\n", target);
2472
2473 /*
2474 * clear any existing master requests and 2437 * clear any existing master requests and
2475 * add the migration mle to the list 2438 * add the migration mle to the list
2476 */ 2439 */
2440 spin_lock(&dlm->spinlock);
2477 spin_lock(&dlm->master_lock); 2441 spin_lock(&dlm->master_lock);
2478 ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name, 2442 ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name,
2479 namelen, target, dlm->node_num); 2443 namelen, target, dlm->node_num);
@@ -2514,6 +2478,7 @@ fail:
2514 dlm_put_mle(mle); 2478 dlm_put_mle(mle);
2515 } else if (mle) { 2479 } else if (mle) {
2516 kmem_cache_free(dlm_mle_cache, mle); 2480 kmem_cache_free(dlm_mle_cache, mle);
2481 mle = NULL;
2517 } 2482 }
2518 goto leave; 2483 goto leave;
2519 } 2484 }
@@ -2635,7 +2600,6 @@ leave:
2635 if (wake) 2600 if (wake)
2636 wake_up(&res->wq); 2601 wake_up(&res->wq);
2637 2602
2638 /* TODO: cleanup */
2639 if (mres) 2603 if (mres)
2640 free_page((unsigned long)mres); 2604 free_page((unsigned long)mres);
2641 2605
@@ -2660,28 +2624,28 @@ leave:
2660 */ 2624 */
2661int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) 2625int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
2662{ 2626{
2663 int mig, ret; 2627 int ret;
2664 int lock_dropped = 0; 2628 int lock_dropped = 0;
2629 u8 target = O2NM_MAX_NODES;
2665 2630
2666 assert_spin_locked(&dlm->spinlock); 2631 assert_spin_locked(&dlm->spinlock);
2667 2632
2668 spin_lock(&res->spinlock); 2633 spin_lock(&res->spinlock);
2669 mig = dlm_is_lockres_migrateable(dlm, res); 2634 if (dlm_is_lockres_migrateable(dlm, res))
2635 target = dlm_pick_migration_target(dlm, res);
2670 spin_unlock(&res->spinlock); 2636 spin_unlock(&res->spinlock);
2671 if (!mig) 2637
2638 if (target == O2NM_MAX_NODES)
2672 goto leave; 2639 goto leave;
2673 2640
2674 /* Wheee! Migrate lockres here! Will sleep so drop spinlock. */ 2641 /* Wheee! Migrate lockres here! Will sleep so drop spinlock. */
2675 spin_unlock(&dlm->spinlock); 2642 spin_unlock(&dlm->spinlock);
2676 lock_dropped = 1; 2643 lock_dropped = 1;
2677 while (1) { 2644 ret = dlm_migrate_lockres(dlm, res, target);
2678 ret = dlm_migrate_lockres(dlm, res, O2NM_MAX_NODES); 2645 if (ret)
2679 if (ret >= 0) 2646 mlog(0, "%s: res %.*s, Migrate to node %u failed with %d\n",
2680 break; 2647 dlm->name, res->lockname.len, res->lockname.name,
2681 mlog(0, "%s: res %.*s, Migrate failed, retrying\n", dlm->name, 2648 target, ret);
2682 res->lockname.len, res->lockname.name);
2683 msleep(DLM_MIGRATION_RETRY_MS);
2684 }
2685 spin_lock(&dlm->spinlock); 2649 spin_lock(&dlm->spinlock);
2686leave: 2650leave:
2687 return lock_dropped; 2651 return lock_dropped;
@@ -2865,61 +2829,55 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm,
2865 } 2829 }
2866} 2830}
2867 2831
2868/* for now this is not too intelligent. we will 2832/*
2869 * need stats to make this do the right thing. 2833 * Pick a node to migrate the lock resource to. This function selects a
2870 * this just finds the first lock on one of the 2834 * potential target based first on the locks and then on refmap. It skips
2871 * queues and uses that node as the target. */ 2835 * nodes that are in the process of exiting the domain.
2836 */
2872static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm, 2837static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
2873 struct dlm_lock_resource *res) 2838 struct dlm_lock_resource *res)
2874{ 2839{
2875 int i; 2840 enum dlm_lockres_list idx;
2876 struct list_head *queue = &res->granted; 2841 struct list_head *queue = &res->granted;
2877 struct dlm_lock *lock; 2842 struct dlm_lock *lock;
2878 int nodenum; 2843 int noderef;
2844 u8 nodenum = O2NM_MAX_NODES;
2879 2845
2880 assert_spin_locked(&dlm->spinlock); 2846 assert_spin_locked(&dlm->spinlock);
2847 assert_spin_locked(&res->spinlock);
2881 2848
2882 spin_lock(&res->spinlock); 2849 /* Go through all the locks */
2883 for (i=0; i<3; i++) { 2850 for (idx = DLM_GRANTED_LIST; idx <= DLM_BLOCKED_LIST; idx++) {
2851 queue = dlm_list_idx_to_ptr(res, idx);
2884 list_for_each_entry(lock, queue, list) { 2852 list_for_each_entry(lock, queue, list) {
2885 /* up to the caller to make sure this node 2853 if (lock->ml.node == dlm->node_num)
2886 * is alive */ 2854 continue;
2887 if (lock->ml.node != dlm->node_num) { 2855 if (test_bit(lock->ml.node, dlm->exit_domain_map))
2888 spin_unlock(&res->spinlock); 2856 continue;
2889 return lock->ml.node; 2857 nodenum = lock->ml.node;
2890 } 2858 goto bail;
2891 } 2859 }
2892 queue++;
2893 } 2860 }
2894 2861
2895 nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); 2862 /* Go thru the refmap */
2896 if (nodenum < O2NM_MAX_NODES) { 2863 noderef = -1;
2897 spin_unlock(&res->spinlock);
2898 return nodenum;
2899 }
2900 spin_unlock(&res->spinlock);
2901 mlog(0, "have not found a suitable target yet! checking domain map\n");
2902
2903 /* ok now we're getting desperate. pick anyone alive. */
2904 nodenum = -1;
2905 while (1) { 2864 while (1) {
2906 nodenum = find_next_bit(dlm->domain_map, 2865 noderef = find_next_bit(res->refmap, O2NM_MAX_NODES,
2907 O2NM_MAX_NODES, nodenum+1); 2866 noderef + 1);
2908 mlog(0, "found %d in domain map\n", nodenum); 2867 if (noderef >= O2NM_MAX_NODES)
2909 if (nodenum >= O2NM_MAX_NODES)
2910 break; 2868 break;
2911 if (nodenum != dlm->node_num) { 2869 if (noderef == dlm->node_num)
2912 mlog(0, "picking %d\n", nodenum); 2870 continue;
2913 return nodenum; 2871 if (test_bit(noderef, dlm->exit_domain_map))
2914 } 2872 continue;
2873 nodenum = noderef;
2874 goto bail;
2915 } 2875 }
2916 2876
2917 mlog(0, "giving up. no master to migrate to\n"); 2877bail:
2918 return DLM_LOCK_RES_OWNER_UNKNOWN; 2878 return nodenum;
2919} 2879}
2920 2880
2921
2922
2923/* this is called by the new master once all lockres 2881/* this is called by the new master once all lockres
2924 * data has been received */ 2882 * data has been received */
2925static int dlm_do_migrate_request(struct dlm_ctxt *dlm, 2883static int dlm_do_migrate_request(struct dlm_ctxt *dlm,