diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-26 13:55:15 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-05-26 13:55:15 -0400 |
commit | a74b81b0aff4a01e0816df5915c854fb52c5e87f (patch) | |
tree | 98364cec6a9e0c0fd510e5fe9af46f1ddb28956b /fs/ocfs2/dlm/dlmmaster.c | |
parent | f8d613e2a665bf1be9628a3c3f9bafe7599b32c0 (diff) | |
parent | ece928df16494becd43f999aff9bd530182e7e81 (diff) |
Merge branch 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2
* 'linux-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2: (28 commits)
Ocfs2: Teach local-mounted ocfs2 to handle unwritten_extents correctly.
ocfs2/dlm: Do not migrate resource to a node that is leaving the domain
ocfs2/dlm: Add new dlm message DLM_BEGIN_EXIT_DOMAIN_MSG
Ocfs2/move_extents: Set several trivial constraints for threshold.
Ocfs2/move_extents: Let defrag handle partial extent moving.
Ocfs2/move_extents: move/defrag extents within a certain range.
Ocfs2/move_extents: helper to calculate the defraging length in one run.
Ocfs2/move_extents: move entire/partial extent.
Ocfs2/move_extents: helpers to update the group descriptor and global bitmap inode.
Ocfs2/move_extents: helper to probe a proper region to move in an alloc group.
Ocfs2/move_extents: helper to validate and adjust moving goal.
Ocfs2/move_extents: find the victim alloc group, where the given #blk fits.
Ocfs2/move_extents: defrag a range of extent.
Ocfs2/move_extents: move a range of extent.
Ocfs2/move_extents: lock allocators and reserve metadata blocks and data clusters for extents moving.
Ocfs2/move_extents: Add basic framework and source files for extent moving.
Ocfs2/move_extents: Adding new ioctl code 'OCFS2_IOC_MOVE_EXT' to ocfs2.
Ocfs2/refcounttree: Publicize couple of funcs from refcounttree.c
Ocfs2: Add a new code 'OCFS2_INFO_FREEFRAG' for o2info ioctl.
Ocfs2: Add a new code 'OCFS2_INFO_FREEINODE' for o2info ioctl.
...
Diffstat (limited to 'fs/ocfs2/dlm/dlmmaster.c')
-rw-r--r-- | fs/ocfs2/dlm/dlmmaster.c | 255 |
1 files changed, 90 insertions, 165 deletions
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 84d166328cf..11eefb8c12e 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -2339,65 +2339,55 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data) | |||
2339 | dlm_lockres_put(res); | 2339 | dlm_lockres_put(res); |
2340 | } | 2340 | } |
2341 | 2341 | ||
2342 | /* Checks whether the lockres can be migrated. Returns 0 if yes, < 0 | 2342 | /* |
2343 | * if not. If 0, numlocks is set to the number of locks in the lockres. | 2343 | * A migrateable resource is one that is : |
2344 | * 1. locally mastered, and, | ||
2345 | * 2. zero local locks, and, | ||
2346 | * 3. one or more non-local locks, or, one or more references | ||
2347 | * Returns 1 if yes, 0 if not. | ||
2344 | */ | 2348 | */ |
2345 | static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, | 2349 | static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm, |
2346 | struct dlm_lock_resource *res, | 2350 | struct dlm_lock_resource *res) |
2347 | int *numlocks, | ||
2348 | int *hasrefs) | ||
2349 | { | 2351 | { |
2350 | int ret; | 2352 | enum dlm_lockres_list idx; |
2351 | int i; | 2353 | int nonlocal = 0, node_ref; |
2352 | int count = 0; | ||
2353 | struct list_head *queue; | 2354 | struct list_head *queue; |
2354 | struct dlm_lock *lock; | 2355 | struct dlm_lock *lock; |
2356 | u64 cookie; | ||
2355 | 2357 | ||
2356 | assert_spin_locked(&res->spinlock); | 2358 | assert_spin_locked(&res->spinlock); |
2357 | 2359 | ||
2358 | *numlocks = 0; | 2360 | if (res->owner != dlm->node_num) |
2359 | *hasrefs = 0; | 2361 | return 0; |
2360 | |||
2361 | ret = -EINVAL; | ||
2362 | if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { | ||
2363 | mlog(0, "cannot migrate lockres with unknown owner!\n"); | ||
2364 | goto leave; | ||
2365 | } | ||
2366 | |||
2367 | if (res->owner != dlm->node_num) { | ||
2368 | mlog(0, "cannot migrate lockres this node doesn't own!\n"); | ||
2369 | goto leave; | ||
2370 | } | ||
2371 | 2362 | ||
2372 | ret = 0; | 2363 | for (idx = DLM_GRANTED_LIST; idx <= DLM_BLOCKED_LIST; idx++) { |
2373 | queue = &res->granted; | 2364 | queue = dlm_list_idx_to_ptr(res, idx); |
2374 | for (i = 0; i < 3; i++) { | ||
2375 | list_for_each_entry(lock, queue, list) { | 2365 | list_for_each_entry(lock, queue, list) { |
2376 | ++count; | 2366 | if (lock->ml.node != dlm->node_num) { |
2377 | if (lock->ml.node == dlm->node_num) { | 2367 | nonlocal++; |
2378 | mlog(0, "found a lock owned by this node still " | 2368 | continue; |
2379 | "on the %s queue! will not migrate this " | ||
2380 | "lockres\n", (i == 0 ? "granted" : | ||
2381 | (i == 1 ? "converting" : | ||
2382 | "blocked"))); | ||
2383 | ret = -ENOTEMPTY; | ||
2384 | goto leave; | ||
2385 | } | 2369 | } |
2370 | cookie = be64_to_cpu(lock->ml.cookie); | ||
2371 | mlog(0, "%s: Not migrateable res %.*s, lock %u:%llu on " | ||
2372 | "%s list\n", dlm->name, res->lockname.len, | ||
2373 | res->lockname.name, | ||
2374 | dlm_get_lock_cookie_node(cookie), | ||
2375 | dlm_get_lock_cookie_seq(cookie), | ||
2376 | dlm_list_in_text(idx)); | ||
2377 | return 0; | ||
2386 | } | 2378 | } |
2387 | queue++; | ||
2388 | } | 2379 | } |
2389 | 2380 | ||
2390 | *numlocks = count; | 2381 | if (!nonlocal) { |
2391 | 2382 | node_ref = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); | |
2392 | count = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); | 2383 | if (node_ref >= O2NM_MAX_NODES) |
2393 | if (count < O2NM_MAX_NODES) | 2384 | return 0; |
2394 | *hasrefs = 1; | 2385 | } |
2395 | 2386 | ||
2396 | mlog(0, "%s: res %.*s, Migrateable, locks %d, refs %d\n", dlm->name, | 2387 | mlog(0, "%s: res %.*s, Migrateable\n", dlm->name, res->lockname.len, |
2397 | res->lockname.len, res->lockname.name, *numlocks, *hasrefs); | 2388 | res->lockname.name); |
2398 | 2389 | ||
2399 | leave: | 2390 | return 1; |
2400 | return ret; | ||
2401 | } | 2391 | } |
2402 | 2392 | ||
2403 | /* | 2393 | /* |
@@ -2406,8 +2396,7 @@ leave: | |||
2406 | 2396 | ||
2407 | 2397 | ||
2408 | static int dlm_migrate_lockres(struct dlm_ctxt *dlm, | 2398 | static int dlm_migrate_lockres(struct dlm_ctxt *dlm, |
2409 | struct dlm_lock_resource *res, | 2399 | struct dlm_lock_resource *res, u8 target) |
2410 | u8 target) | ||
2411 | { | 2400 | { |
2412 | struct dlm_master_list_entry *mle = NULL; | 2401 | struct dlm_master_list_entry *mle = NULL; |
2413 | struct dlm_master_list_entry *oldmle = NULL; | 2402 | struct dlm_master_list_entry *oldmle = NULL; |
@@ -2416,37 +2405,20 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, | |||
2416 | const char *name; | 2405 | const char *name; |
2417 | unsigned int namelen; | 2406 | unsigned int namelen; |
2418 | int mle_added = 0; | 2407 | int mle_added = 0; |
2419 | int numlocks, hasrefs; | ||
2420 | int wake = 0; | 2408 | int wake = 0; |
2421 | 2409 | ||
2422 | if (!dlm_grab(dlm)) | 2410 | if (!dlm_grab(dlm)) |
2423 | return -EINVAL; | 2411 | return -EINVAL; |
2424 | 2412 | ||
2413 | BUG_ON(target == O2NM_MAX_NODES); | ||
2414 | |||
2425 | name = res->lockname.name; | 2415 | name = res->lockname.name; |
2426 | namelen = res->lockname.len; | 2416 | namelen = res->lockname.len; |
2427 | 2417 | ||
2428 | mlog(0, "%s: Migrating %.*s to %u\n", dlm->name, namelen, name, target); | 2418 | mlog(0, "%s: Migrating %.*s to node %u\n", dlm->name, namelen, name, |
2429 | 2419 | target); | |
2430 | /* | ||
2431 | * ensure this lockres is a proper candidate for migration | ||
2432 | */ | ||
2433 | spin_lock(&res->spinlock); | ||
2434 | ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs); | ||
2435 | if (ret < 0) { | ||
2436 | spin_unlock(&res->spinlock); | ||
2437 | goto leave; | ||
2438 | } | ||
2439 | spin_unlock(&res->spinlock); | ||
2440 | |||
2441 | /* no work to do */ | ||
2442 | if (numlocks == 0 && !hasrefs) | ||
2443 | goto leave; | ||
2444 | |||
2445 | /* | ||
2446 | * preallocate up front | ||
2447 | * if this fails, abort | ||
2448 | */ | ||
2449 | 2420 | ||
2421 | /* preallocate up front. if this fails, abort */ | ||
2450 | ret = -ENOMEM; | 2422 | ret = -ENOMEM; |
2451 | mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_NOFS); | 2423 | mres = (struct dlm_migratable_lockres *) __get_free_page(GFP_NOFS); |
2452 | if (!mres) { | 2424 | if (!mres) { |
@@ -2462,35 +2434,10 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm, | |||
2462 | ret = 0; | 2434 | ret = 0; |
2463 | 2435 | ||
2464 | /* | 2436 | /* |
2465 | * find a node to migrate the lockres to | ||
2466 | */ | ||
2467 | |||
2468 | spin_lock(&dlm->spinlock); | ||
2469 | /* pick a new node */ | ||
2470 | if (!test_bit(target, dlm->domain_map) || | ||
2471 | target >= O2NM_MAX_NODES) { | ||
2472 | target = dlm_pick_migration_target(dlm, res); | ||
2473 | } | ||
2474 | mlog(0, "%s: res %.*s, Node %u chosen for migration\n", dlm->name, | ||
2475 | namelen, name, target); | ||
2476 | |||
2477 | if (target >= O2NM_MAX_NODES || | ||
2478 | !test_bit(target, dlm->domain_map)) { | ||
2479 | /* target chosen is not alive */ | ||
2480 | ret = -EINVAL; | ||
2481 | } | ||
2482 | |||
2483 | if (ret) { | ||
2484 | spin_unlock(&dlm->spinlock); | ||
2485 | goto fail; | ||
2486 | } | ||
2487 | |||
2488 | mlog(0, "continuing with target = %u\n", target); | ||
2489 | |||
2490 | /* | ||
2491 | * clear any existing master requests and | 2437 | * clear any existing master requests and |
2492 | * add the migration mle to the list | 2438 | * add the migration mle to the list |
2493 | */ | 2439 | */ |
2440 | spin_lock(&dlm->spinlock); | ||
2494 | spin_lock(&dlm->master_lock); | 2441 | spin_lock(&dlm->master_lock); |
2495 | ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name, | 2442 | ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name, |
2496 | namelen, target, dlm->node_num); | 2443 | namelen, target, dlm->node_num); |
@@ -2531,6 +2478,7 @@ fail: | |||
2531 | dlm_put_mle(mle); | 2478 | dlm_put_mle(mle); |
2532 | } else if (mle) { | 2479 | } else if (mle) { |
2533 | kmem_cache_free(dlm_mle_cache, mle); | 2480 | kmem_cache_free(dlm_mle_cache, mle); |
2481 | mle = NULL; | ||
2534 | } | 2482 | } |
2535 | goto leave; | 2483 | goto leave; |
2536 | } | 2484 | } |
@@ -2652,69 +2600,52 @@ leave: | |||
2652 | if (wake) | 2600 | if (wake) |
2653 | wake_up(&res->wq); | 2601 | wake_up(&res->wq); |
2654 | 2602 | ||
2655 | /* TODO: cleanup */ | ||
2656 | if (mres) | 2603 | if (mres) |
2657 | free_page((unsigned long)mres); | 2604 | free_page((unsigned long)mres); |
2658 | 2605 | ||
2659 | dlm_put(dlm); | 2606 | dlm_put(dlm); |
2660 | 2607 | ||
2661 | mlog(0, "returning %d\n", ret); | 2608 | mlog(0, "%s: Migrating %.*s to %u, returns %d\n", dlm->name, namelen, |
2609 | name, target, ret); | ||
2662 | return ret; | 2610 | return ret; |
2663 | } | 2611 | } |
2664 | 2612 | ||
2665 | #define DLM_MIGRATION_RETRY_MS 100 | 2613 | #define DLM_MIGRATION_RETRY_MS 100 |
2666 | 2614 | ||
2667 | /* Should be called only after beginning the domain leave process. | 2615 | /* |
2616 | * Should be called only after beginning the domain leave process. | ||
2668 | * There should not be any remaining locks on nonlocal lock resources, | 2617 | * There should not be any remaining locks on nonlocal lock resources, |
2669 | * and there should be no local locks left on locally mastered resources. | 2618 | * and there should be no local locks left on locally mastered resources. |
2670 | * | 2619 | * |
2671 | * Called with the dlm spinlock held, may drop it to do migration, but | 2620 | * Called with the dlm spinlock held, may drop it to do migration, but |
2672 | * will re-acquire before exit. | 2621 | * will re-acquire before exit. |
2673 | * | 2622 | * |
2674 | * Returns: 1 if dlm->spinlock was dropped/retaken, 0 if never dropped */ | 2623 | * Returns: 1 if dlm->spinlock was dropped/retaken, 0 if never dropped |
2624 | */ | ||
2675 | int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) | 2625 | int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) |
2676 | { | 2626 | { |
2677 | int ret; | 2627 | int ret; |
2678 | int lock_dropped = 0; | 2628 | int lock_dropped = 0; |
2679 | int numlocks, hasrefs; | 2629 | u8 target = O2NM_MAX_NODES; |
2630 | |||
2631 | assert_spin_locked(&dlm->spinlock); | ||
2680 | 2632 | ||
2681 | spin_lock(&res->spinlock); | 2633 | spin_lock(&res->spinlock); |
2682 | if (res->owner != dlm->node_num) { | 2634 | if (dlm_is_lockres_migrateable(dlm, res)) |
2683 | if (!__dlm_lockres_unused(res)) { | 2635 | target = dlm_pick_migration_target(dlm, res); |
2684 | mlog(ML_ERROR, "%s:%.*s: this node is not master, " | 2636 | spin_unlock(&res->spinlock); |
2685 | "trying to free this but locks remain\n", | ||
2686 | dlm->name, res->lockname.len, res->lockname.name); | ||
2687 | } | ||
2688 | spin_unlock(&res->spinlock); | ||
2689 | goto leave; | ||
2690 | } | ||
2691 | 2637 | ||
2692 | /* No need to migrate a lockres having no locks */ | 2638 | if (target == O2NM_MAX_NODES) |
2693 | ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs); | ||
2694 | if (ret >= 0 && numlocks == 0 && !hasrefs) { | ||
2695 | spin_unlock(&res->spinlock); | ||
2696 | goto leave; | 2639 | goto leave; |
2697 | } | ||
2698 | spin_unlock(&res->spinlock); | ||
2699 | 2640 | ||
2700 | /* Wheee! Migrate lockres here! Will sleep so drop spinlock. */ | 2641 | /* Wheee! Migrate lockres here! Will sleep so drop spinlock. */ |
2701 | spin_unlock(&dlm->spinlock); | 2642 | spin_unlock(&dlm->spinlock); |
2702 | lock_dropped = 1; | 2643 | lock_dropped = 1; |
2703 | while (1) { | 2644 | ret = dlm_migrate_lockres(dlm, res, target); |
2704 | ret = dlm_migrate_lockres(dlm, res, O2NM_MAX_NODES); | 2645 | if (ret) |
2705 | if (ret >= 0) | 2646 | mlog(0, "%s: res %.*s, Migrate to node %u failed with %d\n", |
2706 | break; | 2647 | dlm->name, res->lockname.len, res->lockname.name, |
2707 | if (ret == -ENOTEMPTY) { | 2648 | target, ret); |
2708 | mlog(ML_ERROR, "lockres %.*s still has local locks!\n", | ||
2709 | res->lockname.len, res->lockname.name); | ||
2710 | BUG(); | ||
2711 | } | ||
2712 | |||
2713 | mlog(0, "lockres %.*s: migrate failed, " | ||
2714 | "retrying\n", res->lockname.len, | ||
2715 | res->lockname.name); | ||
2716 | msleep(DLM_MIGRATION_RETRY_MS); | ||
2717 | } | ||
2718 | spin_lock(&dlm->spinlock); | 2649 | spin_lock(&dlm->spinlock); |
2719 | leave: | 2650 | leave: |
2720 | return lock_dropped; | 2651 | return lock_dropped; |
@@ -2898,61 +2829,55 @@ static void dlm_remove_nonlocal_locks(struct dlm_ctxt *dlm, | |||
2898 | } | 2829 | } |
2899 | } | 2830 | } |
2900 | 2831 | ||
2901 | /* for now this is not too intelligent. we will | 2832 | /* |
2902 | * need stats to make this do the right thing. | 2833 | * Pick a node to migrate the lock resource to. This function selects a |
2903 | * this just finds the first lock on one of the | 2834 | * potential target based first on the locks and then on refmap. It skips |
2904 | * queues and uses that node as the target. */ | 2835 | * nodes that are in the process of exiting the domain. |
2836 | */ | ||
2905 | static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm, | 2837 | static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm, |
2906 | struct dlm_lock_resource *res) | 2838 | struct dlm_lock_resource *res) |
2907 | { | 2839 | { |
2908 | int i; | 2840 | enum dlm_lockres_list idx; |
2909 | struct list_head *queue = &res->granted; | 2841 | struct list_head *queue = &res->granted; |
2910 | struct dlm_lock *lock; | 2842 | struct dlm_lock *lock; |
2911 | int nodenum; | 2843 | int noderef; |
2844 | u8 nodenum = O2NM_MAX_NODES; | ||
2912 | 2845 | ||
2913 | assert_spin_locked(&dlm->spinlock); | 2846 | assert_spin_locked(&dlm->spinlock); |
2847 | assert_spin_locked(&res->spinlock); | ||
2914 | 2848 | ||
2915 | spin_lock(&res->spinlock); | 2849 | /* Go through all the locks */ |
2916 | for (i=0; i<3; i++) { | 2850 | for (idx = DLM_GRANTED_LIST; idx <= DLM_BLOCKED_LIST; idx++) { |
2851 | queue = dlm_list_idx_to_ptr(res, idx); | ||
2917 | list_for_each_entry(lock, queue, list) { | 2852 | list_for_each_entry(lock, queue, list) { |
2918 | /* up to the caller to make sure this node | 2853 | if (lock->ml.node == dlm->node_num) |
2919 | * is alive */ | 2854 | continue; |
2920 | if (lock->ml.node != dlm->node_num) { | 2855 | if (test_bit(lock->ml.node, dlm->exit_domain_map)) |
2921 | spin_unlock(&res->spinlock); | 2856 | continue; |
2922 | return lock->ml.node; | 2857 | nodenum = lock->ml.node; |
2923 | } | 2858 | goto bail; |
2924 | } | 2859 | } |
2925 | queue++; | ||
2926 | } | ||
2927 | |||
2928 | nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); | ||
2929 | if (nodenum < O2NM_MAX_NODES) { | ||
2930 | spin_unlock(&res->spinlock); | ||
2931 | return nodenum; | ||
2932 | } | 2860 | } |
2933 | spin_unlock(&res->spinlock); | ||
2934 | mlog(0, "have not found a suitable target yet! checking domain map\n"); | ||
2935 | 2861 | ||
2936 | /* ok now we're getting desperate. pick anyone alive. */ | 2862 | /* Go thru the refmap */ |
2937 | nodenum = -1; | 2863 | noderef = -1; |
2938 | while (1) { | 2864 | while (1) { |
2939 | nodenum = find_next_bit(dlm->domain_map, | 2865 | noderef = find_next_bit(res->refmap, O2NM_MAX_NODES, |
2940 | O2NM_MAX_NODES, nodenum+1); | 2866 | noderef + 1); |
2941 | mlog(0, "found %d in domain map\n", nodenum); | 2867 | if (noderef >= O2NM_MAX_NODES) |
2942 | if (nodenum >= O2NM_MAX_NODES) | ||
2943 | break; | 2868 | break; |
2944 | if (nodenum != dlm->node_num) { | 2869 | if (noderef == dlm->node_num) |
2945 | mlog(0, "picking %d\n", nodenum); | 2870 | continue; |
2946 | return nodenum; | 2871 | if (test_bit(noderef, dlm->exit_domain_map)) |
2947 | } | 2872 | continue; |
2873 | nodenum = noderef; | ||
2874 | goto bail; | ||
2948 | } | 2875 | } |
2949 | 2876 | ||
2950 | mlog(0, "giving up. no master to migrate to\n"); | 2877 | bail: |
2951 | return DLM_LOCK_RES_OWNER_UNKNOWN; | 2878 | return nodenum; |
2952 | } | 2879 | } |
2953 | 2880 | ||
2954 | |||
2955 | |||
2956 | /* this is called by the new master once all lockres | 2881 | /* this is called by the new master once all lockres |
2957 | * data has been received */ | 2882 | * data has been received */ |
2958 | static int dlm_do_migrate_request(struct dlm_ctxt *dlm, | 2883 | static int dlm_do_migrate_request(struct dlm_ctxt *dlm, |