diff options
author | Sunil Mushran <sunil.mushran@oracle.com> | 2011-05-19 17:34:12 -0400 |
---|---|---|
committer | Joel Becker <jlbec@evilplan.org> | 2011-05-26 00:05:22 -0400 |
commit | 66effd3c681256874a81436493a933edb1701798 (patch) | |
tree | ce0b9047ee7be10b6e7e8848f731e1d6a7579731 /fs | |
parent | bddefdeec5bc56ba5aa2c2ca8c904cdff58e7e5b (diff) |
ocfs2/dlm: Do not migrate resource to a node that is leaving the domain
During dlm domain shutdown, o2dlm has to free all the lock resources. Ones that
have no locks and references are freed. Ones that have locks and/or references
are migrated to another node.
The first task in migration is finding a target. Currently we scan the lock
resource and find one node that either has a lock or a reference. This is not
very efficient in a parallel umount case as we might end up migrating the
lock resource to a node which itself may have to migrate it to a third node.
The patch scans the dlm->exit_domain_map to ensure the target node is not
leaving the domain. If no valid target node is found, o2dlm does not migrate
the resource but instead waits for the unlock and deref messages that will
allow it to free the resource.
Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Signed-off-by: Joel Becker <jlbec@evilplan.org>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ocfs2/dlm/dlmdomain.c | 10 | ||||
-rw-r--r-- | fs/ocfs2/dlm/dlmmaster.c | 142 |
2 files changed, 57 insertions, 95 deletions
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 3aff23feefdc..6ed6b95dcf93 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -451,14 +451,18 @@ redo_bucket: | |||
451 | dropped = dlm_empty_lockres(dlm, res); | 451 | dropped = dlm_empty_lockres(dlm, res); |
452 | 452 | ||
453 | spin_lock(&res->spinlock); | 453 | spin_lock(&res->spinlock); |
454 | __dlm_lockres_calc_usage(dlm, res); | 454 | if (dropped) |
455 | iter = res->hash_node.next; | 455 | __dlm_lockres_calc_usage(dlm, res); |
456 | else | ||
457 | iter = res->hash_node.next; | ||
456 | spin_unlock(&res->spinlock); | 458 | spin_unlock(&res->spinlock); |
457 | 459 | ||
458 | dlm_lockres_put(res); | 460 | dlm_lockres_put(res); |
459 | 461 | ||
460 | if (dropped) | 462 | if (dropped) { |
463 | cond_resched_lock(&dlm->spinlock); | ||
461 | goto redo_bucket; | 464 | goto redo_bucket; |
465 | } | ||
462 | } | 466 | } |
463 | cond_resched_lock(&dlm->spinlock); | 467 | cond_resched_lock(&dlm->spinlock); |
464 | num += n; | 468 | num += n; |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index ec4994628948..11eefb8c12e9 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -2396,8 +2396,7 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, | |||
2396 | 2396 | ||
2397 | 2397 | ||
2398 | static int dlm_migrate_lockres(struct dlm_ctxt *dlm, | 2398 | static int dlm_migrate_lockres(struct dlm_ctxt *dlm, |
2399 | struct dlm_lock_resource *res, | 2399 | struct dlm_lock_resource *res, u8 target) |
2400 | u8 target) | ||
2401 | { | 2400 | { |
2402 | struct dlm_master_list_entry *mle = NULL; | 2401 | struct dlm_master_list_entry *mle = NULL; |
2403 | struct dlm_master_list_entry *oldmle = NULL; | 2402 | struct dlm_master_list_entry *oldmle = NULL; |
@@ -2411,25 +2410,15 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, | |||
2411 | if (!dlm_grab(dlm)) | 2410 | if (!dlm_grab(dlm)) |
2412 | return -EINVAL; | 2411 | return -EINVAL; |
2413 | 2412 | ||
2413 | BUG_ON(target == O2NM_MAX_NODES); | ||
2414 | |||
2414 | name = res->lockname.name; | 2415 | name = res->lockname.name; |
2415 | namelen = res->lockname.len; | 2416 | namelen = res->lockname.len; |
2416 | 2417 | ||
2417 | mlog(0, "%s: Migrating %.*s to %u\n", dlm->name, namelen, name, target); | 2418 | mlog(0, "%s: Migrating %.*s to node %u\n", dlm->name, namelen, name, |
2418 | 2419 | target); | |
2419 | /* Ensure this lockres is a proper candidate for migration */ | ||
2420 | spin_lock(&res->spinlock); | ||
2421 | ret = dlm_is_lockres_migrateable(dlm, res); | ||
2422 | spin_unlock(&res->spinlock); | ||
2423 | |||
2424 | /* No work to do */ | ||
2425 | if (!ret) | ||
2426 | goto leave; | ||
2427 | |||
2428 | /* | ||
2429 | * preallocate up front | ||
2430 | * if this fails, abort | ||
2431 | */ | ||
2432 | 2420 | ||
2421 | /* preallocate up front. if this fails, abort */ | ||
2433 | ret = -ENOMEM; | 2422 | ret = -ENOMEM; |
2434 | mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_NOFS); | 2423 | mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_NOFS); |
2435 | if (!mres) { | 2424 | if (!mres) { |
@@ -2445,35 +2434,10 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, | |||
2445 | ret = 0; | 2434 | ret = 0; |
2446 | 2435 | ||
2447 | /* | 2436 | /* |
2448 | * find a node to migrate the lockres to | ||
2449 | */ | ||
2450 | |||
2451 | spin_lock(&dlm->spinlock); | ||
2452 | /* pick a new node */ | ||
2453 | if (!test_bit(target, dlm->domain_map) || | ||
2454 | target >= O2NM_MAX_NODES) { | ||
2455 | target = dlm_pick_migration_target(dlm, res); | ||
2456 | } | ||
2457 | mlog(0, "%s: res %.*s, Node %u chosen for migration\n", dlm->name, | ||
2458 | namelen, name, target); | ||
2459 | |||
2460 | if (target >= O2NM_MAX_NODES || | ||
2461 | !test_bit(target, dlm->domain_map)) { | ||
2462 | /* target chosen is not alive */ | ||
2463 | ret = -EINVAL; | ||
2464 | } | ||
2465 | |||
2466 | if (ret) { | ||
2467 | spin_unlock(&dlm->spinlock); | ||
2468 | goto fail; | ||
2469 | } | ||
2470 | |||
2471 | mlog(0, "continuing with target = %u\n", target); | ||
2472 | |||
2473 | /* | ||
2474 | * clear any existing master requests and | 2437 | * clear any existing master requests and |
2475 | * add the migration mle to the list | 2438 | * add the migration mle to the list |
2476 | */ | 2439 | */ |
2440 | spin_lock(&dlm->spinlock); | ||
2477 | spin_lock(&dlm->master_lock); | 2441 | spin_lock(&dlm->master_lock); |
2478 | ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name, | 2442 | ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name, |
2479 | namelen, target, dlm->node_num); | 2443 | namelen, target, dlm->node_num); |
@@ -2514,6 +2478,7 @@ fail: | |||
2514 | dlm_put_mle(mle); | 2478 | dlm_put_mle(mle); |
2515 | } else if (mle) { | 2479 | } else if (mle) { |
2516 | kmem_cache_free(dlm_mle_cache, mle); | 2480 | kmem_cache_free(dlm_mle_cache, mle); |
2481 | mle = NULL; | ||
2517 | } | 2482 | } |
2518 | goto leave; | 2483 | goto leave; |
2519 | } | 2484 | } |
@@ -2635,7 +2600,6 @@ leave: | |||
2635 | if (wake) | 2600 | if (wake) |
2636 | wake_up(&res->wq); | 2601 | wake_up(&res->wq); |
2637 | 2602 | ||
2638 | /* TODO: cleanup */ | ||
2639 | if (mres) | 2603 | if (mres) |
2640 | free_page((unsigned long)mres); | 2604 | free_page((unsigned long)mres); |
2641 | 2605 | ||
@@ -2660,28 +2624,28 @@ leave: | |||
2660 | */ | 2624 | */ |
2661 | int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) | 2625 | int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) |
2662 | { | 2626 | { |
2663 | int mig, ret; | 2627 | int ret; |
2664 | int lock_dropped = 0; | 2628 | int lock_dropped = 0; |
2629 | u8 target = O2NM_MAX_NODES; | ||
2665 | 2630 | ||
2666 | assert_spin_locked(&dlm->spinlock); | 2631 | assert_spin_locked(&dlm->spinlock); |
2667 | 2632 | ||
2668 | spin_lock(&res->spinlock); | 2633 | spin_lock(&res->spinlock); |
2669 | mig = dlm_is_lockres_migrateable(dlm, res); | 2634 | if (dlm_is_lockres_migrateable(dlm, res)) |
2635 | target = dlm_pick_migration_target(dlm, res); | ||
2670 | spin_unlock(&res->spinlock); | 2636 | spin_unlock(&res->spinlock); |
2671 | if (!mig) | 2637 | |
2638 | if (target == O2NM_MAX_NODES) | ||
2672 | goto leave; | 2639 | goto leave; |
2673 | 2640 | ||
2674 | /* Wheee! Migrate lockres here! Will sleep so drop spinlock. */ | 2641 | /* Wheee! Migrate lockres here! Will sleep so drop spinlock. */ |
2675 | spin_unlock(&dlm->spinlock); | 2642 | spin_unlock(&dlm->spinlock); |
2676 | lock_dropped = 1; | 2643 | lock_dropped = 1; |
2677 | while (1) { | 2644 | ret = dlm_migrate_lockres(dlm, res, target); |
2678 | ret = dlm_migrate_lockres(dlm, res, O2NM_MAX_NODES); | 2645 | if (ret) |
2679 | if (ret >= 0) | 2646 | mlog(0, "%s: res %.*s, Migrate to node %u failed with %d\n", |
2680 | break; | 2647 | dlm->name, res->lockname.len, res->lockname.name, |
2681 | mlog(0, "%s: res %.*s, Migrate failed, retrying\n", dlm->name, | 2648 | target, ret); |
2682 | res->lockname.len, res->lockname.name); | ||
2683 | msleep(DLM_MIGRATION_RETRY_MS); | ||
2684 | } | ||
2685 | spin_lock(&dlm->spinlock); | 2649 | spin_lock(&dlm->spinlock); |
2686 | leave: | 2650 | leave: |
2687 | return lock_dropped; | 2651 | return lock_dropped; |
@@ -2865,61 +2829,55 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, | |||
2865 | } | 2829 | } |
2866 | } | 2830 | } |
2867 | 2831 | ||
2868 | /* for now this is not too intelligent. we will | 2832 | /* |
2869 | * need stats to make this do the right thing. | 2833 | * Pick a node to migrate the lock resource to. This function selects a |
2870 | * this just finds the first lock on one of the | 2834 | * potential target based first on the locks and then on refmap. It skips |
2871 | * queues and uses that node as the target. */ | 2835 | * nodes that are in the process of exiting the domain. |
2836 | */ | ||
2872 | static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm, | 2837 | static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm, |
2873 | struct dlm_lock_resource *res) | 2838 | struct dlm_lock_resource *res) |
2874 | { | 2839 | { |
2875 | int i; | 2840 | enum dlm_lockres_list idx; |
2876 | struct list_head *queue = &res->granted; | 2841 | struct list_head *queue = &res->granted; |
2877 | struct dlm_lock *lock; | 2842 | struct dlm_lock *lock; |
2878 | int nodenum; | 2843 | int noderef; |
2844 | u8 nodenum = O2NM_MAX_NODES; | ||
2879 | 2845 | ||
2880 | assert_spin_locked(&dlm->spinlock); | 2846 | assert_spin_locked(&dlm->spinlock); |
2847 | assert_spin_locked(&res->spinlock); | ||
2881 | 2848 | ||
2882 | spin_lock(&res->spinlock); | 2849 | /* Go through all the locks */ |
2883 | for (i=0; i<3; i++) { | 2850 | for (idx = DLM_GRANTED_LIST; idx <= DLM_BLOCKED_LIST; idx++) { |
2851 | queue = dlm_list_idx_to_ptr(res, idx); | ||
2884 | list_for_each_entry(lock, queue, list) { | 2852 | list_for_each_entry(lock, queue, list) { |
2885 | /* up to the caller to make sure this node | 2853 | if (lock->ml.node == dlm->node_num) |
2886 | * is alive */ | 2854 | continue; |
2887 | if (lock->ml.node != dlm->node_num) { | 2855 | if (test_bit(lock->ml.node, dlm->exit_domain_map)) |
2888 | spin_unlock(&res->spinlock); | 2856 | continue; |
2889 | return lock->ml.node; | 2857 | nodenum = lock->ml.node; |
2890 | } | 2858 | goto bail; |
2891 | } | 2859 | } |
2892 | queue++; | ||
2893 | } | 2860 | } |
2894 | 2861 | ||
2895 | nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); | 2862 | /* Go thru the refmap */ |
2896 | if (nodenum < O2NM_MAX_NODES) { | 2863 | noderef = -1; |
2897 | spin_unlock(&res->spinlock); | ||
2898 | return nodenum; | ||
2899 | } | ||
2900 | spin_unlock(&res->spinlock); | ||
2901 | mlog(0, "have not found a suitable target yet! checking domain map\n"); | ||
2902 | |||
2903 | /* ok now we're getting desperate. pick anyone alive. */ | ||
2904 | nodenum = -1; | ||
2905 | while (1) { | 2864 | while (1) { |
2906 | nodenum = find_next_bit(dlm->domain_map, | 2865 | noderef = find_next_bit(res->refmap, O2NM_MAX_NODES, |
2907 | O2NM_MAX_NODES, nodenum+1); | 2866 | noderef + 1); |
2908 | mlog(0, "found %d in domain map\n", nodenum); | 2867 | if (noderef >= O2NM_MAX_NODES) |
2909 | if (nodenum >= O2NM_MAX_NODES) | ||
2910 | break; | 2868 | break; |
2911 | if (nodenum != dlm->node_num) { | 2869 | if (noderef == dlm->node_num) |
2912 | mlog(0, "picking %d\n", nodenum); | 2870 | continue; |
2913 | return nodenum; | 2871 | if (test_bit(noderef, dlm->exit_domain_map)) |
2914 | } | 2872 | continue; |
2873 | nodenum = noderef; | ||
2874 | goto bail; | ||
2915 | } | 2875 | } |
2916 | 2876 | ||
2917 | mlog(0, "giving up. no master to migrate to\n"); | 2877 | bail: |
2918 | return DLM_LOCK_RES_OWNER_UNKNOWN; | 2878 | return nodenum; |
2919 | } | 2879 | } |
2920 | 2880 | ||
2921 | |||
2922 | |||
2923 | /* this is called by the new master once all lockres | 2881 | /* this is called by the new master once all lockres |
2924 | * data has been received */ | 2882 | * data has been received */ |
2925 | static int dlm_do_migrate_request(struct dlm_ctxt *dlm, | 2883 | static int dlm_do_migrate_request(struct dlm_ctxt *dlm, |