diff options
author | Mark Fasheh <mark.fasheh@oracle.com> | 2007-09-24 18:56:19 -0400 |
---|---|---|
committer | Mark Fasheh <mark.fasheh@oracle.com> | 2008-01-25 17:45:34 -0500 |
commit | 34d024f84345807bf44163fac84e921513dde323 (patch) | |
tree | aef303ae5feeb42cb8791acc1c0b8a74f0a97674 /fs/ocfs2 | |
parent | 6f7b056ea9c6fa978c79ca626eff43549df94dbb (diff) |
ocfs2: Remove mount/unmount votes
The node maps that are set/unset by these votes are no longer relevant, thus
we can remove the mount and umount votes. Since those are the last two
remaining votes, we can also remove the entire vote infrastructure.
The vote thread has been renamed to the downconvert thread, and the small
amount of functionality related to managing it has been moved into
fs/ocfs2/dlmglue.c. All references to votes have been removed or updated.
Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Diffstat (limited to 'fs/ocfs2')
-rw-r--r-- | fs/ocfs2/Makefile | 3 | ||||
-rw-r--r-- | fs/ocfs2/cluster/tcp_internal.h | 5 | ||||
-rw-r--r-- | fs/ocfs2/dcache.c | 8 | ||||
-rw-r--r-- | fs/ocfs2/dlmglue.c | 164 | ||||
-rw-r--r-- | fs/ocfs2/dlmglue.h | 5 | ||||
-rw-r--r-- | fs/ocfs2/heartbeat.c | 7 | ||||
-rw-r--r-- | fs/ocfs2/inode.c | 36 | ||||
-rw-r--r-- | fs/ocfs2/journal.c | 15 | ||||
-rw-r--r-- | fs/ocfs2/namei.c | 10 | ||||
-rw-r--r-- | fs/ocfs2/ocfs2.h | 25 | ||||
-rw-r--r-- | fs/ocfs2/slot_map.c | 19 | ||||
-rw-r--r-- | fs/ocfs2/slot_map.h | 2 | ||||
-rw-r--r-- | fs/ocfs2/super.c | 43 | ||||
-rw-r--r-- | fs/ocfs2/vote.c | 756 | ||||
-rw-r--r-- | fs/ocfs2/vote.h | 48 |
15 files changed, 179 insertions, 967 deletions
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index 9fb8132f19b0..d2057e7fbda7 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile | |||
@@ -27,8 +27,7 @@ ocfs2-objs := \ | |||
27 | symlink.o \ | 27 | symlink.o \ |
28 | sysfile.o \ | 28 | sysfile.o \ |
29 | uptodate.o \ | 29 | uptodate.o \ |
30 | ver.o \ | 30 | ver.o |
31 | vote.o | ||
32 | 31 | ||
33 | obj-$(CONFIG_OCFS2_FS) += cluster/ | 32 | obj-$(CONFIG_OCFS2_FS) += cluster/ |
34 | obj-$(CONFIG_OCFS2_FS) += dlm/ | 33 | obj-$(CONFIG_OCFS2_FS) += dlm/ |
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index 9606111fe89d..79bd6665b3ca 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h | |||
@@ -38,6 +38,9 @@ | |||
38 | * locking semantics of the file system using the protocol. It should | 38 | * locking semantics of the file system using the protocol. It should |
39 | * be somewhere else, I'm sure, but right now it isn't. | 39 | * be somewhere else, I'm sure, but right now it isn't. |
40 | * | 40 | * |
41 | * New in version 9: | ||
42 | * - All votes removed | ||
43 | * | ||
41 | * New in version 8: | 44 | * New in version 8: |
42 | * - Replace delete inode votes with a cluster lock | 45 | * - Replace delete inode votes with a cluster lock |
43 | * | 46 | * |
@@ -60,7 +63,7 @@ | |||
60 | * - full 64 bit i_size in the metadata lock lvbs | 63 | * - full 64 bit i_size in the metadata lock lvbs |
61 | * - introduction of "rw" lock and pushing meta/data locking down | 64 | * - introduction of "rw" lock and pushing meta/data locking down |
62 | */ | 65 | */ |
63 | #define O2NET_PROTOCOL_VERSION 8ULL | 66 | #define O2NET_PROTOCOL_VERSION 9ULL |
64 | struct o2net_handshake { | 67 | struct o2net_handshake { |
65 | __be64 protocol_version; | 68 | __be64 protocol_version; |
66 | __be64 connector_id; | 69 | __be64 connector_id; |
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 9923278ea6d4..b1cc7c381e88 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c | |||
@@ -128,9 +128,9 @@ static int ocfs2_match_dentry(struct dentry *dentry, | |||
128 | /* | 128 | /* |
129 | * Walk the inode alias list, and find a dentry which has a given | 129 | * Walk the inode alias list, and find a dentry which has a given |
130 | * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it | 130 | * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it |
131 | * is looking for a dentry_lock reference. The vote thread is looking | 131 | * is looking for a dentry_lock reference. The downconvert thread is |
132 | * to unhash aliases, so we allow it to skip any that already have | 132 | * looking to unhash aliases, so we allow it to skip any that already |
133 | * that property. | 133 | * have that property. |
134 | */ | 134 | */ |
135 | struct dentry *ocfs2_find_local_alias(struct inode *inode, | 135 | struct dentry *ocfs2_find_local_alias(struct inode *inode, |
136 | u64 parent_blkno, | 136 | u64 parent_blkno, |
@@ -266,7 +266,7 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, | |||
266 | dl->dl_count = 0; | 266 | dl->dl_count = 0; |
267 | /* | 267 | /* |
268 | * Does this have to happen below, for all attaches, in case | 268 | * Does this have to happen below, for all attaches, in case |
269 | * the struct inode gets blown away by votes? | 269 | * the struct inode gets blown away by the downconvert thread? |
270 | */ | 270 | */ |
271 | dl->dl_inode = igrab(inode); | 271 | dl->dl_inode = igrab(inode); |
272 | dl->dl_parent_blkno = parent_blkno; | 272 | dl->dl_parent_blkno = parent_blkno; |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 4e97dcceaf8f..b3068ade3f7b 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -55,7 +55,6 @@ | |||
55 | #include "slot_map.h" | 55 | #include "slot_map.h" |
56 | #include "super.h" | 56 | #include "super.h" |
57 | #include "uptodate.h" | 57 | #include "uptodate.h" |
58 | #include "vote.h" | ||
59 | 58 | ||
60 | #include "buffer_head_io.h" | 59 | #include "buffer_head_io.h" |
61 | 60 | ||
@@ -153,10 +152,10 @@ struct ocfs2_lock_res_ops { | |||
153 | struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); | 152 | struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); |
154 | 153 | ||
155 | /* | 154 | /* |
156 | * Optionally called in the downconvert (or "vote") thread | 155 | * Optionally called in the downconvert thread after a |
157 | * after a successful downconvert. The lockres will not be | 156 | * successful downconvert. The lockres will not be referenced |
158 | * referenced after this callback is called, so it is safe to | 157 | * after this callback is called, so it is safe to free |
159 | * free memory, etc. | 158 | * memory, etc. |
160 | * | 159 | * |
161 | * The exact semantics of when this is called are controlled | 160 | * The exact semantics of when this is called are controlled |
162 | * by ->downconvert_worker() | 161 | * by ->downconvert_worker() |
@@ -310,8 +309,9 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, | |||
310 | "resource %s: %s\n", dlm_errname(_stat), _func, \ | 309 | "resource %s: %s\n", dlm_errname(_stat), _func, \ |
311 | _lockres->l_name, dlm_errmsg(_stat)); \ | 310 | _lockres->l_name, dlm_errmsg(_stat)); \ |
312 | } while (0) | 311 | } while (0) |
313 | static void ocfs2_vote_on_unlock(struct ocfs2_super *osb, | 312 | static int ocfs2_downconvert_thread(void *arg); |
314 | struct ocfs2_lock_res *lockres); | 313 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, |
314 | struct ocfs2_lock_res *lockres); | ||
315 | static int ocfs2_meta_lock_update(struct inode *inode, | 315 | static int ocfs2_meta_lock_update(struct inode *inode, |
316 | struct buffer_head **bh); | 316 | struct buffer_head **bh); |
317 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); | 317 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); |
@@ -732,7 +732,7 @@ static void ocfs2_blocking_ast(void *opaque, int level) | |||
732 | 732 | ||
733 | wake_up(&lockres->l_event); | 733 | wake_up(&lockres->l_event); |
734 | 734 | ||
735 | ocfs2_kick_vote_thread(osb); | 735 | ocfs2_wake_downconvert_thread(osb); |
736 | } | 736 | } |
737 | 737 | ||
738 | static void ocfs2_locking_ast(void *opaque) | 738 | static void ocfs2_locking_ast(void *opaque) |
@@ -1089,7 +1089,7 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb, | |||
1089 | mlog_entry_void(); | 1089 | mlog_entry_void(); |
1090 | spin_lock_irqsave(&lockres->l_lock, flags); | 1090 | spin_lock_irqsave(&lockres->l_lock, flags); |
1091 | ocfs2_dec_holders(lockres, level); | 1091 | ocfs2_dec_holders(lockres, level); |
1092 | ocfs2_vote_on_unlock(osb, lockres); | 1092 | ocfs2_downconvert_on_unlock(osb, lockres); |
1093 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1093 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1094 | mlog_exit_void(); | 1094 | mlog_exit_void(); |
1095 | } | 1095 | } |
@@ -1372,15 +1372,15 @@ int ocfs2_data_lock_with_page(struct inode *inode, | |||
1372 | return ret; | 1372 | return ret; |
1373 | } | 1373 | } |
1374 | 1374 | ||
1375 | static void ocfs2_vote_on_unlock(struct ocfs2_super *osb, | 1375 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, |
1376 | struct ocfs2_lock_res *lockres) | 1376 | struct ocfs2_lock_res *lockres) |
1377 | { | 1377 | { |
1378 | int kick = 0; | 1378 | int kick = 0; |
1379 | 1379 | ||
1380 | mlog_entry_void(); | 1380 | mlog_entry_void(); |
1381 | 1381 | ||
1382 | /* If we know that another node is waiting on our lock, kick | 1382 | /* If we know that another node is waiting on our lock, kick |
1383 | * the vote thread * pre-emptively when we reach a release | 1383 | * the downconvert thread * pre-emptively when we reach a release |
1384 | * condition. */ | 1384 | * condition. */ |
1385 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { | 1385 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { |
1386 | switch(lockres->l_blocking) { | 1386 | switch(lockres->l_blocking) { |
@@ -1398,7 +1398,7 @@ static void ocfs2_vote_on_unlock(struct ocfs2_super *osb, | |||
1398 | } | 1398 | } |
1399 | 1399 | ||
1400 | if (kick) | 1400 | if (kick) |
1401 | ocfs2_kick_vote_thread(osb); | 1401 | ocfs2_wake_downconvert_thread(osb); |
1402 | 1402 | ||
1403 | mlog_exit_void(); | 1403 | mlog_exit_void(); |
1404 | } | 1404 | } |
@@ -1832,19 +1832,20 @@ bail: | |||
1832 | } | 1832 | } |
1833 | 1833 | ||
1834 | /* | 1834 | /* |
1835 | * This is working around a lock inversion between tasks acquiring DLM locks | 1835 | * This is working around a lock inversion between tasks acquiring DLM |
1836 | * while holding a page lock and the vote thread which blocks dlm lock acquiry | 1836 | * locks while holding a page lock and the downconvert thread which |
1837 | * while acquiring page locks. | 1837 | * blocks dlm lock acquiry while acquiring page locks. |
1838 | * | 1838 | * |
1839 | * ** These _with_page variantes are only intended to be called from aop | 1839 | * ** These _with_page variantes are only intended to be called from aop |
1840 | * methods that hold page locks and return a very specific *positive* error | 1840 | * methods that hold page locks and return a very specific *positive* error |
1841 | * code that aop methods pass up to the VFS -- test for errors with != 0. ** | 1841 | * code that aop methods pass up to the VFS -- test for errors with != 0. ** |
1842 | * | 1842 | * |
1843 | * The DLM is called such that it returns -EAGAIN if it would have blocked | 1843 | * The DLM is called such that it returns -EAGAIN if it would have |
1844 | * waiting for the vote thread. In that case we unlock our page so the vote | 1844 | * blocked waiting for the downconvert thread. In that case we unlock |
1845 | * thread can make progress. Once we've done this we have to return | 1845 | * our page so the downconvert thread can make progress. Once we've |
1846 | * AOP_TRUNCATED_PAGE so the aop method that called us can bubble that back up | 1846 | * done this we have to return AOP_TRUNCATED_PAGE so the aop method |
1847 | * into the VFS who will then immediately retry the aop call. | 1847 | * that called us can bubble that back up into the VFS who will then |
1848 | * immediately retry the aop call. | ||
1848 | * | 1849 | * |
1849 | * We do a blocking lock and immediate unlock before returning, though, so that | 1850 | * We do a blocking lock and immediate unlock before returning, though, so that |
1850 | * the lock has a great chance of being cached on this node by the time the VFS | 1851 | * the lock has a great chance of being cached on this node by the time the VFS |
@@ -2320,11 +2321,11 @@ int ocfs2_dlm_init(struct ocfs2_super *osb) | |||
2320 | goto bail; | 2321 | goto bail; |
2321 | } | 2322 | } |
2322 | 2323 | ||
2323 | /* launch vote thread */ | 2324 | /* launch downconvert thread */ |
2324 | osb->vote_task = kthread_run(ocfs2_vote_thread, osb, "ocfs2vote"); | 2325 | osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc"); |
2325 | if (IS_ERR(osb->vote_task)) { | 2326 | if (IS_ERR(osb->dc_task)) { |
2326 | status = PTR_ERR(osb->vote_task); | 2327 | status = PTR_ERR(osb->dc_task); |
2327 | osb->vote_task = NULL; | 2328 | osb->dc_task = NULL; |
2328 | mlog_errno(status); | 2329 | mlog_errno(status); |
2329 | goto bail; | 2330 | goto bail; |
2330 | } | 2331 | } |
@@ -2353,8 +2354,8 @@ local: | |||
2353 | bail: | 2354 | bail: |
2354 | if (status < 0) { | 2355 | if (status < 0) { |
2355 | ocfs2_dlm_shutdown_debug(osb); | 2356 | ocfs2_dlm_shutdown_debug(osb); |
2356 | if (osb->vote_task) | 2357 | if (osb->dc_task) |
2357 | kthread_stop(osb->vote_task); | 2358 | kthread_stop(osb->dc_task); |
2358 | } | 2359 | } |
2359 | 2360 | ||
2360 | mlog_exit(status); | 2361 | mlog_exit(status); |
@@ -2369,9 +2370,9 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb) | |||
2369 | 2370 | ||
2370 | ocfs2_drop_osb_locks(osb); | 2371 | ocfs2_drop_osb_locks(osb); |
2371 | 2372 | ||
2372 | if (osb->vote_task) { | 2373 | if (osb->dc_task) { |
2373 | kthread_stop(osb->vote_task); | 2374 | kthread_stop(osb->dc_task); |
2374 | osb->vote_task = NULL; | 2375 | osb->dc_task = NULL; |
2375 | } | 2376 | } |
2376 | 2377 | ||
2377 | ocfs2_lock_res_free(&osb->osb_super_lockres); | 2378 | ocfs2_lock_res_free(&osb->osb_super_lockres); |
@@ -2527,7 +2528,7 @@ out: | |||
2527 | 2528 | ||
2528 | /* Mark the lockres as being dropped. It will no longer be | 2529 | /* Mark the lockres as being dropped. It will no longer be |
2529 | * queued if blocking, but we still may have to wait on it | 2530 | * queued if blocking, but we still may have to wait on it |
2530 | * being dequeued from the vote thread before we can consider | 2531 | * being dequeued from the downconvert thread before we can consider |
2531 | * it safe to drop. | 2532 | * it safe to drop. |
2532 | * | 2533 | * |
2533 | * You can *not* attempt to call cluster_lock on this lockres anymore. */ | 2534 | * You can *not* attempt to call cluster_lock on this lockres anymore. */ |
@@ -2903,7 +2904,7 @@ static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) | |||
2903 | 2904 | ||
2904 | /* | 2905 | /* |
2905 | * Does the final reference drop on our dentry lock. Right now this | 2906 | * Does the final reference drop on our dentry lock. Right now this |
2906 | * happens in the vote thread, but we could choose to simplify the | 2907 | * happens in the downconvert thread, but we could choose to simplify the |
2907 | * dlmglue API and push these off to the ocfs2_wq in the future. | 2908 | * dlmglue API and push these off to the ocfs2_wq in the future. |
2908 | */ | 2909 | */ |
2909 | static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, | 2910 | static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, |
@@ -3042,7 +3043,7 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | |||
3042 | mlog(0, "lockres %s blocked.\n", lockres->l_name); | 3043 | mlog(0, "lockres %s blocked.\n", lockres->l_name); |
3043 | 3044 | ||
3044 | /* Detect whether a lock has been marked as going away while | 3045 | /* Detect whether a lock has been marked as going away while |
3045 | * the vote thread was processing other things. A lock can | 3046 | * the downconvert thread was processing other things. A lock can |
3046 | * still be marked with OCFS2_LOCK_FREEING after this check, | 3047 | * still be marked with OCFS2_LOCK_FREEING after this check, |
3047 | * but short circuiting here will still save us some | 3048 | * but short circuiting here will still save us some |
3048 | * performance. */ | 3049 | * performance. */ |
@@ -3091,13 +3092,104 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, | |||
3091 | 3092 | ||
3092 | lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); | 3093 | lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); |
3093 | 3094 | ||
3094 | spin_lock(&osb->vote_task_lock); | 3095 | spin_lock(&osb->dc_task_lock); |
3095 | if (list_empty(&lockres->l_blocked_list)) { | 3096 | if (list_empty(&lockres->l_blocked_list)) { |
3096 | list_add_tail(&lockres->l_blocked_list, | 3097 | list_add_tail(&lockres->l_blocked_list, |
3097 | &osb->blocked_lock_list); | 3098 | &osb->blocked_lock_list); |
3098 | osb->blocked_lock_count++; | 3099 | osb->blocked_lock_count++; |
3099 | } | 3100 | } |
3100 | spin_unlock(&osb->vote_task_lock); | 3101 | spin_unlock(&osb->dc_task_lock); |
3101 | 3102 | ||
3102 | mlog_exit_void(); | 3103 | mlog_exit_void(); |
3103 | } | 3104 | } |
3105 | |||
3106 | static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) | ||
3107 | { | ||
3108 | unsigned long processed; | ||
3109 | struct ocfs2_lock_res *lockres; | ||
3110 | |||
3111 | mlog_entry_void(); | ||
3112 | |||
3113 | spin_lock(&osb->dc_task_lock); | ||
3114 | /* grab this early so we know to try again if a state change and | ||
3115 | * wake happens part-way through our work */ | ||
3116 | osb->dc_work_sequence = osb->dc_wake_sequence; | ||
3117 | |||
3118 | processed = osb->blocked_lock_count; | ||
3119 | while (processed) { | ||
3120 | BUG_ON(list_empty(&osb->blocked_lock_list)); | ||
3121 | |||
3122 | lockres = list_entry(osb->blocked_lock_list.next, | ||
3123 | struct ocfs2_lock_res, l_blocked_list); | ||
3124 | list_del_init(&lockres->l_blocked_list); | ||
3125 | osb->blocked_lock_count--; | ||
3126 | spin_unlock(&osb->dc_task_lock); | ||
3127 | |||
3128 | BUG_ON(!processed); | ||
3129 | processed--; | ||
3130 | |||
3131 | ocfs2_process_blocked_lock(osb, lockres); | ||
3132 | |||
3133 | spin_lock(&osb->dc_task_lock); | ||
3134 | } | ||
3135 | spin_unlock(&osb->dc_task_lock); | ||
3136 | |||
3137 | mlog_exit_void(); | ||
3138 | } | ||
3139 | |||
3140 | static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) | ||
3141 | { | ||
3142 | int empty = 0; | ||
3143 | |||
3144 | spin_lock(&osb->dc_task_lock); | ||
3145 | if (list_empty(&osb->blocked_lock_list)) | ||
3146 | empty = 1; | ||
3147 | |||
3148 | spin_unlock(&osb->dc_task_lock); | ||
3149 | return empty; | ||
3150 | } | ||
3151 | |||
3152 | static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) | ||
3153 | { | ||
3154 | int should_wake = 0; | ||
3155 | |||
3156 | spin_lock(&osb->dc_task_lock); | ||
3157 | if (osb->dc_work_sequence != osb->dc_wake_sequence) | ||
3158 | should_wake = 1; | ||
3159 | spin_unlock(&osb->dc_task_lock); | ||
3160 | |||
3161 | return should_wake; | ||
3162 | } | ||
3163 | |||
3164 | int ocfs2_downconvert_thread(void *arg) | ||
3165 | { | ||
3166 | int status = 0; | ||
3167 | struct ocfs2_super *osb = arg; | ||
3168 | |||
3169 | /* only quit once we've been asked to stop and there is no more | ||
3170 | * work available */ | ||
3171 | while (!(kthread_should_stop() && | ||
3172 | ocfs2_downconvert_thread_lists_empty(osb))) { | ||
3173 | |||
3174 | wait_event_interruptible(osb->dc_event, | ||
3175 | ocfs2_downconvert_thread_should_wake(osb) || | ||
3176 | kthread_should_stop()); | ||
3177 | |||
3178 | mlog(0, "downconvert_thread: awoken\n"); | ||
3179 | |||
3180 | ocfs2_downconvert_thread_do_work(osb); | ||
3181 | } | ||
3182 | |||
3183 | osb->dc_task = NULL; | ||
3184 | return status; | ||
3185 | } | ||
3186 | |||
3187 | void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb) | ||
3188 | { | ||
3189 | spin_lock(&osb->dc_task_lock); | ||
3190 | /* make sure the voting thread gets a swipe at whatever changes | ||
3191 | * the caller may have made to the voting state */ | ||
3192 | osb->dc_wake_sequence++; | ||
3193 | spin_unlock(&osb->dc_task_lock); | ||
3194 | wake_up(&osb->dc_event); | ||
3195 | } | ||
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index 87a785e41205..931f6ee55146 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h | |||
@@ -54,7 +54,7 @@ struct ocfs2_meta_lvb { | |||
54 | #define OCFS2_META_LOCK_RECOVERY (0x01) | 54 | #define OCFS2_META_LOCK_RECOVERY (0x01) |
55 | /* Instruct the dlm not to queue ourselves on the other node. */ | 55 | /* Instruct the dlm not to queue ourselves on the other node. */ |
56 | #define OCFS2_META_LOCK_NOQUEUE (0x02) | 56 | #define OCFS2_META_LOCK_NOQUEUE (0x02) |
57 | /* don't block waiting for the vote thread, instead return -EAGAIN */ | 57 | /* don't block waiting for the downconvert thread, instead return -EAGAIN */ |
58 | #define OCFS2_LOCK_NONBLOCK (0x04) | 58 | #define OCFS2_LOCK_NONBLOCK (0x04) |
59 | 59 | ||
60 | int ocfs2_dlm_init(struct ocfs2_super *osb); | 60 | int ocfs2_dlm_init(struct ocfs2_super *osb); |
@@ -112,9 +112,10 @@ void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres); | |||
112 | void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, | 112 | void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, |
113 | struct ocfs2_lock_res *lockres); | 113 | struct ocfs2_lock_res *lockres); |
114 | 114 | ||
115 | /* for the vote thread */ | 115 | /* for the downconvert thread */ |
116 | void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | 116 | void ocfs2_process_blocked_lock(struct ocfs2_super *osb, |
117 | struct ocfs2_lock_res *lockres); | 117 | struct ocfs2_lock_res *lockres); |
118 | void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb); | ||
118 | 119 | ||
119 | struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void); | 120 | struct ocfs2_dlm_debug *ocfs2_new_dlm_debug(void); |
120 | void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug); | 121 | void ocfs2_put_dlm_debug(struct ocfs2_dlm_debug *dlm_debug); |
diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c index 6239fc52790c..c0efd9489fe8 100644 --- a/fs/ocfs2/heartbeat.c +++ b/fs/ocfs2/heartbeat.c | |||
@@ -41,7 +41,6 @@ | |||
41 | #include "heartbeat.h" | 41 | #include "heartbeat.h" |
42 | #include "inode.h" | 42 | #include "inode.h" |
43 | #include "journal.h" | 43 | #include "journal.h" |
44 | #include "vote.h" | ||
45 | 44 | ||
46 | #include "buffer_head_io.h" | 45 | #include "buffer_head_io.h" |
47 | 46 | ||
@@ -58,9 +57,7 @@ static void __ocfs2_node_map_set(struct ocfs2_node_map *target, | |||
58 | void ocfs2_init_node_maps(struct ocfs2_super *osb) | 57 | void ocfs2_init_node_maps(struct ocfs2_super *osb) |
59 | { | 58 | { |
60 | spin_lock_init(&osb->node_map_lock); | 59 | spin_lock_init(&osb->node_map_lock); |
61 | ocfs2_node_map_init(&osb->mounted_map); | ||
62 | ocfs2_node_map_init(&osb->recovery_map); | 60 | ocfs2_node_map_init(&osb->recovery_map); |
63 | ocfs2_node_map_init(&osb->umount_map); | ||
64 | ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs); | 61 | ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs); |
65 | } | 62 | } |
66 | 63 | ||
@@ -82,8 +79,6 @@ static void ocfs2_do_node_down(int node_num, | |||
82 | } | 79 | } |
83 | 80 | ||
84 | ocfs2_recovery_thread(osb, node_num); | 81 | ocfs2_recovery_thread(osb, node_num); |
85 | |||
86 | ocfs2_remove_node_from_vote_queues(osb, node_num); | ||
87 | } | 82 | } |
88 | 83 | ||
89 | /* Called from the dlm when it's about to evict a node. We may also | 84 | /* Called from the dlm when it's about to evict a node. We may also |
@@ -268,8 +263,6 @@ int ocfs2_recovery_map_set(struct ocfs2_super *osb, | |||
268 | 263 | ||
269 | spin_lock(&osb->node_map_lock); | 264 | spin_lock(&osb->node_map_lock); |
270 | 265 | ||
271 | __ocfs2_node_map_clear_bit(&osb->mounted_map, num); | ||
272 | |||
273 | if (!test_bit(num, osb->recovery_map.map)) { | 266 | if (!test_bit(num, osb->recovery_map.map)) { |
274 | __ocfs2_node_map_set_bit(&osb->recovery_map, num); | 267 | __ocfs2_node_map_set_bit(&osb->recovery_map, num); |
275 | set = 1; | 268 | set = 1; |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index ebb2bbe30f35..86cf073996b5 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
@@ -49,7 +49,6 @@ | |||
49 | #include "symlink.h" | 49 | #include "symlink.h" |
50 | #include "sysfile.h" | 50 | #include "sysfile.h" |
51 | #include "uptodate.h" | 51 | #include "uptodate.h" |
52 | #include "vote.h" | ||
53 | 52 | ||
54 | #include "buffer_head_io.h" | 53 | #include "buffer_head_io.h" |
55 | 54 | ||
@@ -718,8 +717,8 @@ static int ocfs2_wipe_inode(struct inode *inode, | |||
718 | } | 717 | } |
719 | 718 | ||
720 | /* we do this while holding the orphan dir lock because we | 719 | /* we do this while holding the orphan dir lock because we |
721 | * don't want recovery being run from another node to vote for | 720 | * don't want recovery being run from another node to try an |
722 | * an inode delete on us -- this will result in two nodes | 721 | * inode delete underneath us -- this will result in two nodes |
723 | * truncating the same file! */ | 722 | * truncating the same file! */ |
724 | status = ocfs2_truncate_for_delete(osb, inode, di_bh); | 723 | status = ocfs2_truncate_for_delete(osb, inode, di_bh); |
725 | if (status < 0) { | 724 | if (status < 0) { |
@@ -744,7 +743,7 @@ bail: | |||
744 | } | 743 | } |
745 | 744 | ||
746 | /* There is a series of simple checks that should be done before a | 745 | /* There is a series of simple checks that should be done before a |
747 | * vote is even considered. Encapsulate those in this function. */ | 746 | * trylock is even considered. Encapsulate those in this function. */ |
748 | static int ocfs2_inode_is_valid_to_delete(struct inode *inode) | 747 | static int ocfs2_inode_is_valid_to_delete(struct inode *inode) |
749 | { | 748 | { |
750 | int ret = 0; | 749 | int ret = 0; |
@@ -758,14 +757,14 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode) | |||
758 | goto bail; | 757 | goto bail; |
759 | } | 758 | } |
760 | 759 | ||
761 | /* If we're coming from process_vote we can't go into our own | 760 | /* If we're coming from downconvert_thread we can't go into our own |
762 | * voting [hello, deadlock city!], so unforuntately we just | 761 | * voting [hello, deadlock city!], so unforuntately we just |
763 | * have to skip deleting this guy. That's OK though because | 762 | * have to skip deleting this guy. That's OK though because |
764 | * the node who's doing the actual deleting should handle it | 763 | * the node who's doing the actual deleting should handle it |
765 | * anyway. */ | 764 | * anyway. */ |
766 | if (current == osb->vote_task) { | 765 | if (current == osb->dc_task) { |
767 | mlog(0, "Skipping delete of %lu because we're currently " | 766 | mlog(0, "Skipping delete of %lu because we're currently " |
768 | "in process_vote\n", inode->i_ino); | 767 | "in downconvert\n", inode->i_ino); |
769 | goto bail; | 768 | goto bail; |
770 | } | 769 | } |
771 | 770 | ||
@@ -779,10 +778,9 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode) | |||
779 | goto bail_unlock; | 778 | goto bail_unlock; |
780 | } | 779 | } |
781 | 780 | ||
782 | /* If we have voted "yes" on the wipe of this inode for | 781 | /* If we have allowd wipe of this inode for another node, it |
783 | * another node, it will be marked here so we can safely skip | 782 | * will be marked here so we can safely skip it. Recovery will |
784 | * it. Recovery will cleanup any inodes we might inadvertantly | 783 | * cleanup any inodes we might inadvertantly skip here. */ |
785 | * skip here. */ | ||
786 | if (oi->ip_flags & OCFS2_INODE_SKIP_DELETE) { | 784 | if (oi->ip_flags & OCFS2_INODE_SKIP_DELETE) { |
787 | mlog(0, "Skipping delete of %lu because another node " | 785 | mlog(0, "Skipping delete of %lu because another node " |
788 | "has done this for us.\n", inode->i_ino); | 786 | "has done this for us.\n", inode->i_ino); |
@@ -929,7 +927,7 @@ void ocfs2_delete_inode(struct inode *inode) | |||
929 | 927 | ||
930 | /* Lock down the inode. This gives us an up to date view of | 928 | /* Lock down the inode. This gives us an up to date view of |
931 | * it's metadata (for verification), and allows us to | 929 | * it's metadata (for verification), and allows us to |
932 | * serialize delete_inode votes. | 930 | * serialize delete_inode on multiple nodes. |
933 | * | 931 | * |
934 | * Even though we might be doing a truncate, we don't take the | 932 | * Even though we might be doing a truncate, we don't take the |
935 | * allocation lock here as it won't be needed - nobody will | 933 | * allocation lock here as it won't be needed - nobody will |
@@ -947,15 +945,15 @@ void ocfs2_delete_inode(struct inode *inode) | |||
947 | * before we go ahead and wipe the inode. */ | 945 | * before we go ahead and wipe the inode. */ |
948 | status = ocfs2_query_inode_wipe(inode, di_bh, &wipe); | 946 | status = ocfs2_query_inode_wipe(inode, di_bh, &wipe); |
949 | if (!wipe || status < 0) { | 947 | if (!wipe || status < 0) { |
950 | /* Error and inode busy vote both mean we won't be | 948 | /* Error and remote inode busy both mean we won't be |
951 | * removing the inode, so they take almost the same | 949 | * removing the inode, so they take almost the same |
952 | * path. */ | 950 | * path. */ |
953 | if (status < 0) | 951 | if (status < 0) |
954 | mlog_errno(status); | 952 | mlog_errno(status); |
955 | 953 | ||
956 | /* Someone in the cluster has voted to not wipe this | 954 | /* Someone in the cluster has disallowed a wipe of |
957 | * inode, or it was never completely orphaned. Write | 955 | * this inode, or it was never completely |
958 | * out the pages and exit now. */ | 956 | * orphaned. Write out the pages and exit now. */ |
959 | ocfs2_cleanup_delete_inode(inode, 1); | 957 | ocfs2_cleanup_delete_inode(inode, 1); |
960 | goto bail_unlock_inode; | 958 | goto bail_unlock_inode; |
961 | } | 959 | } |
@@ -1008,12 +1006,12 @@ void ocfs2_clear_inode(struct inode *inode) | |||
1008 | mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL, | 1006 | mlog_bug_on_msg(OCFS2_SB(inode->i_sb) == NULL, |
1009 | "Inode=%lu\n", inode->i_ino); | 1007 | "Inode=%lu\n", inode->i_ino); |
1010 | 1008 | ||
1011 | /* For remove delete_inode vote, we hold open lock before, | 1009 | /* To preven remote deletes we hold open lock before, now it |
1012 | * now it is time to unlock PR and EX open locks. */ | 1010 | * is time to unlock PR and EX open locks. */ |
1013 | ocfs2_open_unlock(inode); | 1011 | ocfs2_open_unlock(inode); |
1014 | 1012 | ||
1015 | /* Do these before all the other work so that we don't bounce | 1013 | /* Do these before all the other work so that we don't bounce |
1016 | * the vote thread while waiting to destroy the locks. */ | 1014 | * the downconvert thread while waiting to destroy the locks. */ |
1017 | ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres); | 1015 | ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres); |
1018 | ocfs2_mark_lockres_freeing(&oi->ip_meta_lockres); | 1016 | ocfs2_mark_lockres_freeing(&oi->ip_meta_lockres); |
1019 | ocfs2_mark_lockres_freeing(&oi->ip_data_lockres); | 1017 | ocfs2_mark_lockres_freeing(&oi->ip_data_lockres); |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 8d81f6c1b877..f2ebe2eb3c21 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -44,7 +44,6 @@ | |||
44 | #include "localalloc.h" | 44 | #include "localalloc.h" |
45 | #include "slot_map.h" | 45 | #include "slot_map.h" |
46 | #include "super.h" | 46 | #include "super.h" |
47 | #include "vote.h" | ||
48 | #include "sysfile.h" | 47 | #include "sysfile.h" |
49 | 48 | ||
50 | #include "buffer_head_io.h" | 49 | #include "buffer_head_io.h" |
@@ -103,7 +102,7 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb) | |||
103 | mlog(0, "commit_thread: flushed transaction %lu (%u handles)\n", | 102 | mlog(0, "commit_thread: flushed transaction %lu (%u handles)\n", |
104 | journal->j_trans_id, flushed); | 103 | journal->j_trans_id, flushed); |
105 | 104 | ||
106 | ocfs2_kick_vote_thread(osb); | 105 | ocfs2_wake_downconvert_thread(osb); |
107 | wake_up(&journal->j_checkpointed); | 106 | wake_up(&journal->j_checkpointed); |
108 | finally: | 107 | finally: |
109 | mlog_exit(status); | 108 | mlog_exit(status); |
@@ -883,8 +882,8 @@ restart: | |||
883 | ocfs2_super_unlock(osb, 1); | 882 | ocfs2_super_unlock(osb, 1); |
884 | 883 | ||
885 | /* We always run recovery on our own orphan dir - the dead | 884 | /* We always run recovery on our own orphan dir - the dead |
886 | * node(s) may have voted "no" on an inode delete earlier. A | 885 | * node(s) may have disallowd a previos inode delete. Re-processing |
887 | * revote is therefore required. */ | 886 | * is therefore required. */ |
888 | ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL, | 887 | ocfs2_queue_recovery_completion(osb->journal, osb->slot_num, NULL, |
889 | NULL); | 888 | NULL); |
890 | 889 | ||
@@ -1380,10 +1379,10 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
1380 | iter = oi->ip_next_orphan; | 1379 | iter = oi->ip_next_orphan; |
1381 | 1380 | ||
1382 | spin_lock(&oi->ip_lock); | 1381 | spin_lock(&oi->ip_lock); |
1383 | /* Delete voting may have set these on the assumption | 1382 | /* The remote delete code may have set these on the |
1384 | * that the other node would wipe them successfully. | 1383 | * assumption that the other node would wipe them |
1385 | * If they are still in the node's orphan dir, we need | 1384 | * successfully. If they are still in the node's |
1386 | * to reset that state. */ | 1385 | * orphan dir, we need to reset that state. */ |
1387 | oi->ip_flags &= ~(OCFS2_INODE_DELETED|OCFS2_INODE_SKIP_DELETE); | 1386 | oi->ip_flags &= ~(OCFS2_INODE_DELETED|OCFS2_INODE_SKIP_DELETE); |
1388 | 1387 | ||
1389 | /* Set the proper information to get us going into | 1388 | /* Set the proper information to get us going into |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 989ac2718587..6295fd6ae469 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -60,7 +60,6 @@ | |||
60 | #include "symlink.h" | 60 | #include "symlink.h" |
61 | #include "sysfile.h" | 61 | #include "sysfile.h" |
62 | #include "uptodate.h" | 62 | #include "uptodate.h" |
63 | #include "vote.h" | ||
64 | 63 | ||
65 | #include "buffer_head_io.h" | 64 | #include "buffer_head_io.h" |
66 | 65 | ||
@@ -176,7 +175,7 @@ bail_unlock: | |||
176 | /* Don't drop the cluster lock until *after* the d_add -- | 175 | /* Don't drop the cluster lock until *after* the d_add -- |
177 | * unlink on another node will message us to remove that | 176 | * unlink on another node will message us to remove that |
178 | * dentry under this lock so otherwise we can race this with | 177 | * dentry under this lock so otherwise we can race this with |
179 | * the vote thread and have a stale dentry. */ | 178 | * the downconvert thread and have a stale dentry. */ |
180 | ocfs2_meta_unlock(dir, 0); | 179 | ocfs2_meta_unlock(dir, 0); |
181 | 180 | ||
182 | bail: | 181 | bail: |
@@ -765,7 +764,7 @@ static int ocfs2_unlink(struct inode *dir, | |||
765 | 764 | ||
766 | status = ocfs2_remote_dentry_delete(dentry); | 765 | status = ocfs2_remote_dentry_delete(dentry); |
767 | if (status < 0) { | 766 | if (status < 0) { |
768 | /* This vote should succeed under all normal | 767 | /* This remote delete should succeed under all normal |
769 | * circumstances. */ | 768 | * circumstances. */ |
770 | mlog_errno(status); | 769 | mlog_errno(status); |
771 | goto leave; | 770 | goto leave; |
@@ -1031,8 +1030,9 @@ static int ocfs2_rename(struct inode *old_dir, | |||
1031 | 1030 | ||
1032 | /* | 1031 | /* |
1033 | * Aside from allowing a meta data update, the locking here | 1032 | * Aside from allowing a meta data update, the locking here |
1034 | * also ensures that the vote thread on other nodes won't have | 1033 | * also ensures that the downconvert thread on other nodes |
1035 | * to concurrently downconvert the inode and the dentry locks. | 1034 | * won't have to concurrently downconvert the inode and the |
1035 | * dentry locks. | ||
1036 | */ | 1036 | */ |
1037 | status = ocfs2_meta_lock(old_inode, &old_inode_bh, 1); | 1037 | status = ocfs2_meta_lock(old_inode, &old_inode_bh, 1); |
1038 | if (status < 0) { | 1038 | if (status < 0) { |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 60a23e1906b0..f8f866144c6a 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -189,9 +189,7 @@ struct ocfs2_super | |||
189 | struct ocfs2_slot_info *slot_info; | 189 | struct ocfs2_slot_info *slot_info; |
190 | 190 | ||
191 | spinlock_t node_map_lock; | 191 | spinlock_t node_map_lock; |
192 | struct ocfs2_node_map mounted_map; | ||
193 | struct ocfs2_node_map recovery_map; | 192 | struct ocfs2_node_map recovery_map; |
194 | struct ocfs2_node_map umount_map; | ||
195 | 193 | ||
196 | u64 root_blkno; | 194 | u64 root_blkno; |
197 | u64 system_dir_blkno; | 195 | u64 system_dir_blkno; |
@@ -254,28 +252,15 @@ struct ocfs2_super | |||
254 | 252 | ||
255 | wait_queue_head_t recovery_event; | 253 | wait_queue_head_t recovery_event; |
256 | 254 | ||
257 | spinlock_t vote_task_lock; | 255 | spinlock_t dc_task_lock; |
258 | struct task_struct *vote_task; | 256 | struct task_struct *dc_task; |
259 | wait_queue_head_t vote_event; | 257 | wait_queue_head_t dc_event; |
260 | unsigned long vote_wake_sequence; | 258 | unsigned long dc_wake_sequence; |
261 | unsigned long vote_work_sequence; | 259 | unsigned long dc_work_sequence; |
262 | 260 | ||
263 | struct list_head blocked_lock_list; | 261 | struct list_head blocked_lock_list; |
264 | unsigned long blocked_lock_count; | 262 | unsigned long blocked_lock_count; |
265 | 263 | ||
266 | struct list_head vote_list; | ||
267 | int vote_count; | ||
268 | |||
269 | u32 net_key; | ||
270 | spinlock_t net_response_lock; | ||
271 | unsigned int net_response_ids; | ||
272 | struct list_head net_response_list; | ||
273 | |||
274 | struct o2hb_callback_func osb_hb_up; | ||
275 | struct o2hb_callback_func osb_hb_down; | ||
276 | |||
277 | struct list_head osb_net_handlers; | ||
278 | |||
279 | wait_queue_head_t osb_mount_event; | 264 | wait_queue_head_t osb_mount_event; |
280 | 265 | ||
281 | /* Truncate log info */ | 266 | /* Truncate log info */ |
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index af4882b62cfa..3a50ce555e64 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c | |||
@@ -48,25 +48,6 @@ static void __ocfs2_fill_slot(struct ocfs2_slot_info *si, | |||
48 | s16 slot_num, | 48 | s16 slot_num, |
49 | s16 node_num); | 49 | s16 node_num); |
50 | 50 | ||
51 | /* Use the slot information we've collected to create a map of mounted | ||
52 | * nodes. Should be holding an EX on super block. assumes slot info is | ||
53 | * up to date. Note that we call this *after* we find a slot, so our | ||
54 | * own node should be set in the map too... */ | ||
55 | void ocfs2_populate_mounted_map(struct ocfs2_super *osb) | ||
56 | { | ||
57 | int i; | ||
58 | struct ocfs2_slot_info *si = osb->slot_info; | ||
59 | |||
60 | spin_lock(&si->si_lock); | ||
61 | |||
62 | for (i = 0; i < si->si_size; i++) | ||
63 | if (si->si_global_node_nums[i] != OCFS2_INVALID_SLOT) | ||
64 | ocfs2_node_map_set_bit(osb, &osb->mounted_map, | ||
65 | si->si_global_node_nums[i]); | ||
66 | |||
67 | spin_unlock(&si->si_lock); | ||
68 | } | ||
69 | |||
70 | /* post the slot information on disk into our slot_info struct. */ | 51 | /* post the slot information on disk into our slot_info struct. */ |
71 | void ocfs2_update_slot_info(struct ocfs2_slot_info *si) | 52 | void ocfs2_update_slot_info(struct ocfs2_slot_info *si) |
72 | { | 53 | { |
diff --git a/fs/ocfs2/slot_map.h b/fs/ocfs2/slot_map.h index d8c8ceed031b..1025872aaade 100644 --- a/fs/ocfs2/slot_map.h +++ b/fs/ocfs2/slot_map.h | |||
@@ -52,8 +52,6 @@ s16 ocfs2_node_num_to_slot(struct ocfs2_slot_info *si, | |||
52 | void ocfs2_clear_slot(struct ocfs2_slot_info *si, | 52 | void ocfs2_clear_slot(struct ocfs2_slot_info *si, |
53 | s16 slot_num); | 53 | s16 slot_num); |
54 | 54 | ||
55 | void ocfs2_populate_mounted_map(struct ocfs2_super *osb); | ||
56 | |||
57 | static inline int ocfs2_is_empty_slot(struct ocfs2_slot_info *si, | 55 | static inline int ocfs2_is_empty_slot(struct ocfs2_slot_info *si, |
58 | int slot_num) | 56 | int slot_num) |
59 | { | 57 | { |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 64b81b341ece..1996820488cc 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -65,7 +65,6 @@ | |||
65 | #include "sysfile.h" | 65 | #include "sysfile.h" |
66 | #include "uptodate.h" | 66 | #include "uptodate.h" |
67 | #include "ver.h" | 67 | #include "ver.h" |
68 | #include "vote.h" | ||
69 | 68 | ||
70 | #include "buffer_head_io.h" | 69 | #include "buffer_head_io.h" |
71 | 70 | ||
@@ -1123,13 +1122,6 @@ static int ocfs2_mount_volume(struct super_block *sb) | |||
1123 | goto leave; | 1122 | goto leave; |
1124 | } | 1123 | } |
1125 | 1124 | ||
1126 | /* requires vote_thread to be running. */ | ||
1127 | status = ocfs2_register_net_handlers(osb); | ||
1128 | if (status < 0) { | ||
1129 | mlog_errno(status); | ||
1130 | goto leave; | ||
1131 | } | ||
1132 | |||
1133 | status = ocfs2_super_lock(osb, 1); | 1125 | status = ocfs2_super_lock(osb, 1); |
1134 | if (status < 0) { | 1126 | if (status < 0) { |
1135 | mlog_errno(status); | 1127 | mlog_errno(status); |
@@ -1144,8 +1136,6 @@ static int ocfs2_mount_volume(struct super_block *sb) | |||
1144 | goto leave; | 1136 | goto leave; |
1145 | } | 1137 | } |
1146 | 1138 | ||
1147 | ocfs2_populate_mounted_map(osb); | ||
1148 | |||
1149 | /* load all node-local system inodes */ | 1139 | /* load all node-local system inodes */ |
1150 | status = ocfs2_init_local_system_inodes(osb); | 1140 | status = ocfs2_init_local_system_inodes(osb); |
1151 | if (status < 0) { | 1141 | if (status < 0) { |
@@ -1168,15 +1158,6 @@ static int ocfs2_mount_volume(struct super_block *sb) | |||
1168 | if (ocfs2_mount_local(osb)) | 1158 | if (ocfs2_mount_local(osb)) |
1169 | goto leave; | 1159 | goto leave; |
1170 | 1160 | ||
1171 | /* This should be sent *after* we recovered our journal as it | ||
1172 | * will cause other nodes to unmark us as needing | ||
1173 | * recovery. However, we need to send it *before* dropping the | ||
1174 | * super block lock as otherwise their recovery threads might | ||
1175 | * try to clean us up while we're live! */ | ||
1176 | status = ocfs2_request_mount_vote(osb); | ||
1177 | if (status < 0) | ||
1178 | mlog_errno(status); | ||
1179 | |||
1180 | leave: | 1161 | leave: |
1181 | if (unlock_super) | 1162 | if (unlock_super) |
1182 | ocfs2_super_unlock(osb, 1); | 1163 | ocfs2_super_unlock(osb, 1); |
@@ -1234,10 +1215,6 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | |||
1234 | mlog_errno(tmp); | 1215 | mlog_errno(tmp); |
1235 | return; | 1216 | return; |
1236 | } | 1217 | } |
1237 | |||
1238 | tmp = ocfs2_request_umount_vote(osb); | ||
1239 | if (tmp < 0) | ||
1240 | mlog_errno(tmp); | ||
1241 | } | 1218 | } |
1242 | 1219 | ||
1243 | if (osb->slot_num != OCFS2_INVALID_SLOT) | 1220 | if (osb->slot_num != OCFS2_INVALID_SLOT) |
@@ -1248,11 +1225,8 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | |||
1248 | 1225 | ||
1249 | ocfs2_release_system_inodes(osb); | 1226 | ocfs2_release_system_inodes(osb); |
1250 | 1227 | ||
1251 | if (osb->dlm) { | 1228 | if (osb->dlm) |
1252 | ocfs2_unregister_net_handlers(osb); | ||
1253 | |||
1254 | ocfs2_dlm_shutdown(osb); | 1229 | ocfs2_dlm_shutdown(osb); |
1255 | } | ||
1256 | 1230 | ||
1257 | debugfs_remove(osb->osb_debug_root); | 1231 | debugfs_remove(osb->osb_debug_root); |
1258 | 1232 | ||
@@ -1336,19 +1310,13 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1336 | osb->s_sectsize_bits = blksize_bits(sector_size); | 1310 | osb->s_sectsize_bits = blksize_bits(sector_size); |
1337 | BUG_ON(!osb->s_sectsize_bits); | 1311 | BUG_ON(!osb->s_sectsize_bits); |
1338 | 1312 | ||
1339 | osb->net_response_ids = 0; | ||
1340 | spin_lock_init(&osb->net_response_lock); | ||
1341 | INIT_LIST_HEAD(&osb->net_response_list); | ||
1342 | |||
1343 | INIT_LIST_HEAD(&osb->osb_net_handlers); | ||
1344 | init_waitqueue_head(&osb->recovery_event); | 1313 | init_waitqueue_head(&osb->recovery_event); |
1345 | spin_lock_init(&osb->vote_task_lock); | 1314 | spin_lock_init(&osb->dc_task_lock); |
1346 | init_waitqueue_head(&osb->vote_event); | 1315 | init_waitqueue_head(&osb->dc_event); |
1347 | osb->vote_work_sequence = 0; | 1316 | osb->dc_work_sequence = 0; |
1348 | osb->vote_wake_sequence = 0; | 1317 | osb->dc_wake_sequence = 0; |
1349 | INIT_LIST_HEAD(&osb->blocked_lock_list); | 1318 | INIT_LIST_HEAD(&osb->blocked_lock_list); |
1350 | osb->blocked_lock_count = 0; | 1319 | osb->blocked_lock_count = 0; |
1351 | INIT_LIST_HEAD(&osb->vote_list); | ||
1352 | spin_lock_init(&osb->osb_lock); | 1320 | spin_lock_init(&osb->osb_lock); |
1353 | 1321 | ||
1354 | atomic_set(&osb->alloc_stats.moves, 0); | 1322 | atomic_set(&osb->alloc_stats.moves, 0); |
@@ -1488,7 +1456,6 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
1488 | } | 1456 | } |
1489 | 1457 | ||
1490 | memcpy(&uuid_net_key, di->id2.i_super.s_uuid, sizeof(uuid_net_key)); | 1458 | memcpy(&uuid_net_key, di->id2.i_super.s_uuid, sizeof(uuid_net_key)); |
1491 | osb->net_key = le32_to_cpu(uuid_net_key); | ||
1492 | 1459 | ||
1493 | strncpy(osb->vol_label, di->id2.i_super.s_label, 63); | 1460 | strncpy(osb->vol_label, di->id2.i_super.s_label, 63); |
1494 | osb->vol_label[63] = '\0'; | 1461 | osb->vol_label[63] = '\0'; |
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c deleted file mode 100644 index c05358538f2b..000000000000 --- a/fs/ocfs2/vote.c +++ /dev/null | |||
@@ -1,756 +0,0 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * vote.c | ||
5 | * | ||
6 | * description here | ||
7 | * | ||
8 | * Copyright (C) 2003, 2004 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | #include <linux/types.h> | ||
27 | #include <linux/slab.h> | ||
28 | #include <linux/highmem.h> | ||
29 | #include <linux/kthread.h> | ||
30 | |||
31 | #include <cluster/heartbeat.h> | ||
32 | #include <cluster/nodemanager.h> | ||
33 | #include <cluster/tcp.h> | ||
34 | |||
35 | #include <dlm/dlmapi.h> | ||
36 | |||
37 | #define MLOG_MASK_PREFIX ML_VOTE | ||
38 | #include <cluster/masklog.h> | ||
39 | |||
40 | #include "ocfs2.h" | ||
41 | |||
42 | #include "alloc.h" | ||
43 | #include "dlmglue.h" | ||
44 | #include "extent_map.h" | ||
45 | #include "heartbeat.h" | ||
46 | #include "inode.h" | ||
47 | #include "journal.h" | ||
48 | #include "slot_map.h" | ||
49 | #include "vote.h" | ||
50 | |||
51 | #include "buffer_head_io.h" | ||
52 | |||
53 | #define OCFS2_MESSAGE_TYPE_VOTE (0x1) | ||
54 | #define OCFS2_MESSAGE_TYPE_RESPONSE (0x2) | ||
55 | struct ocfs2_msg_hdr | ||
56 | { | ||
57 | __be32 h_response_id; /* used to lookup message handle on sending | ||
58 | * node. */ | ||
59 | __be32 h_request; | ||
60 | __be64 h_blkno; | ||
61 | __be32 h_generation; | ||
62 | __be32 h_node_num; /* node sending this particular message. */ | ||
63 | }; | ||
64 | |||
65 | struct ocfs2_vote_msg | ||
66 | { | ||
67 | struct ocfs2_msg_hdr v_hdr; | ||
68 | __be32 v_reserved1; | ||
69 | } __attribute__ ((packed)); | ||
70 | |||
71 | /* Responses are given these values to maintain backwards | ||
72 | * compatibility with older ocfs2 versions */ | ||
73 | #define OCFS2_RESPONSE_OK (0) | ||
74 | #define OCFS2_RESPONSE_BUSY (-16) | ||
75 | #define OCFS2_RESPONSE_BAD_MSG (-22) | ||
76 | |||
77 | struct ocfs2_response_msg | ||
78 | { | ||
79 | struct ocfs2_msg_hdr r_hdr; | ||
80 | __be32 r_response; | ||
81 | } __attribute__ ((packed)); | ||
82 | |||
83 | struct ocfs2_vote_work { | ||
84 | struct list_head w_list; | ||
85 | struct ocfs2_vote_msg w_msg; | ||
86 | }; | ||
87 | |||
88 | enum ocfs2_vote_request { | ||
89 | OCFS2_VOTE_REQ_INVALID = 0, | ||
90 | OCFS2_VOTE_REQ_MOUNT, | ||
91 | OCFS2_VOTE_REQ_UMOUNT, | ||
92 | OCFS2_VOTE_REQ_LAST | ||
93 | }; | ||
94 | |||
95 | static inline int ocfs2_is_valid_vote_request(int request) | ||
96 | { | ||
97 | return OCFS2_VOTE_REQ_INVALID < request && | ||
98 | request < OCFS2_VOTE_REQ_LAST; | ||
99 | } | ||
100 | |||
101 | typedef void (*ocfs2_net_response_callback)(void *priv, | ||
102 | struct ocfs2_response_msg *resp); | ||
103 | struct ocfs2_net_response_cb { | ||
104 | ocfs2_net_response_callback rc_cb; | ||
105 | void *rc_priv; | ||
106 | }; | ||
107 | |||
108 | struct ocfs2_net_wait_ctxt { | ||
109 | struct list_head n_list; | ||
110 | u32 n_response_id; | ||
111 | wait_queue_head_t n_event; | ||
112 | struct ocfs2_node_map n_node_map; | ||
113 | int n_response; /* an agreggate response. 0 if | ||
114 | * all nodes are go, < 0 on any | ||
115 | * negative response from any | ||
116 | * node or network error. */ | ||
117 | struct ocfs2_net_response_cb *n_callback; | ||
118 | }; | ||
119 | |||
120 | static void ocfs2_process_mount_request(struct ocfs2_super *osb, | ||
121 | unsigned int node_num) | ||
122 | { | ||
123 | mlog(0, "MOUNT vote from node %u\n", node_num); | ||
124 | /* The other node only sends us this message when he has an EX | ||
125 | * on the superblock, so our recovery threads (if having been | ||
126 | * launched) are waiting on it.*/ | ||
127 | ocfs2_recovery_map_clear(osb, node_num); | ||
128 | ocfs2_node_map_set_bit(osb, &osb->mounted_map, node_num); | ||
129 | |||
130 | /* We clear the umount map here because a node may have been | ||
131 | * previously mounted, safely unmounted but never stopped | ||
132 | * heartbeating - in which case we'd have a stale entry. */ | ||
133 | ocfs2_node_map_clear_bit(osb, &osb->umount_map, node_num); | ||
134 | } | ||
135 | |||
136 | static void ocfs2_process_umount_request(struct ocfs2_super *osb, | ||
137 | unsigned int node_num) | ||
138 | { | ||
139 | mlog(0, "UMOUNT vote from node %u\n", node_num); | ||
140 | ocfs2_node_map_clear_bit(osb, &osb->mounted_map, node_num); | ||
141 | ocfs2_node_map_set_bit(osb, &osb->umount_map, node_num); | ||
142 | } | ||
143 | |||
144 | static void ocfs2_process_vote(struct ocfs2_super *osb, | ||
145 | struct ocfs2_vote_msg *msg) | ||
146 | { | ||
147 | int net_status, vote_response; | ||
148 | unsigned int node_num; | ||
149 | u64 blkno; | ||
150 | enum ocfs2_vote_request request; | ||
151 | struct ocfs2_msg_hdr *hdr = &msg->v_hdr; | ||
152 | struct ocfs2_response_msg response; | ||
153 | |||
154 | /* decode the network mumbo jumbo into local variables. */ | ||
155 | request = be32_to_cpu(hdr->h_request); | ||
156 | blkno = be64_to_cpu(hdr->h_blkno); | ||
157 | node_num = be32_to_cpu(hdr->h_node_num); | ||
158 | |||
159 | mlog(0, "processing vote: request = %u, blkno = %llu, node_num = %u\n", | ||
160 | request, (unsigned long long)blkno, node_num); | ||
161 | |||
162 | if (!ocfs2_is_valid_vote_request(request)) { | ||
163 | mlog(ML_ERROR, "Invalid vote request %d from node %u\n", | ||
164 | request, node_num); | ||
165 | vote_response = OCFS2_RESPONSE_BAD_MSG; | ||
166 | goto respond; | ||
167 | } | ||
168 | |||
169 | vote_response = OCFS2_RESPONSE_OK; | ||
170 | |||
171 | switch (request) { | ||
172 | case OCFS2_VOTE_REQ_UMOUNT: | ||
173 | ocfs2_process_umount_request(osb, node_num); | ||
174 | goto respond; | ||
175 | case OCFS2_VOTE_REQ_MOUNT: | ||
176 | ocfs2_process_mount_request(osb, node_num); | ||
177 | goto respond; | ||
178 | default: | ||
179 | /* avoids a gcc warning */ | ||
180 | break; | ||
181 | } | ||
182 | |||
183 | respond: | ||
184 | /* Response struture is small so we just put it on the stack | ||
185 | * and stuff it inline. */ | ||
186 | memset(&response, 0, sizeof(struct ocfs2_response_msg)); | ||
187 | response.r_hdr.h_response_id = hdr->h_response_id; | ||
188 | response.r_hdr.h_blkno = hdr->h_blkno; | ||
189 | response.r_hdr.h_generation = hdr->h_generation; | ||
190 | response.r_hdr.h_node_num = cpu_to_be32(osb->node_num); | ||
191 | response.r_response = cpu_to_be32(vote_response); | ||
192 | |||
193 | net_status = o2net_send_message(OCFS2_MESSAGE_TYPE_RESPONSE, | ||
194 | osb->net_key, | ||
195 | &response, | ||
196 | sizeof(struct ocfs2_response_msg), | ||
197 | node_num, | ||
198 | NULL); | ||
199 | /* We still want to error print for ENOPROTOOPT here. The | ||
200 | * sending node shouldn't have unregistered his net handler | ||
201 | * without sending an unmount vote 1st */ | ||
202 | if (net_status < 0 | ||
203 | && net_status != -ETIMEDOUT | ||
204 | && net_status != -ENOTCONN) | ||
205 | mlog(ML_ERROR, "message to node %u fails with error %d!\n", | ||
206 | node_num, net_status); | ||
207 | } | ||
208 | |||
209 | static void ocfs2_vote_thread_do_work(struct ocfs2_super *osb) | ||
210 | { | ||
211 | unsigned long processed; | ||
212 | struct ocfs2_lock_res *lockres; | ||
213 | struct ocfs2_vote_work *work; | ||
214 | |||
215 | mlog_entry_void(); | ||
216 | |||
217 | spin_lock(&osb->vote_task_lock); | ||
218 | /* grab this early so we know to try again if a state change and | ||
219 | * wake happens part-way through our work */ | ||
220 | osb->vote_work_sequence = osb->vote_wake_sequence; | ||
221 | |||
222 | processed = osb->blocked_lock_count; | ||
223 | while (processed) { | ||
224 | BUG_ON(list_empty(&osb->blocked_lock_list)); | ||
225 | |||
226 | lockres = list_entry(osb->blocked_lock_list.next, | ||
227 | struct ocfs2_lock_res, l_blocked_list); | ||
228 | list_del_init(&lockres->l_blocked_list); | ||
229 | osb->blocked_lock_count--; | ||
230 | spin_unlock(&osb->vote_task_lock); | ||
231 | |||
232 | BUG_ON(!processed); | ||
233 | processed--; | ||
234 | |||
235 | ocfs2_process_blocked_lock(osb, lockres); | ||
236 | |||
237 | spin_lock(&osb->vote_task_lock); | ||
238 | } | ||
239 | |||
240 | while (osb->vote_count) { | ||
241 | BUG_ON(list_empty(&osb->vote_list)); | ||
242 | work = list_entry(osb->vote_list.next, | ||
243 | struct ocfs2_vote_work, w_list); | ||
244 | list_del(&work->w_list); | ||
245 | osb->vote_count--; | ||
246 | spin_unlock(&osb->vote_task_lock); | ||
247 | |||
248 | ocfs2_process_vote(osb, &work->w_msg); | ||
249 | kfree(work); | ||
250 | |||
251 | spin_lock(&osb->vote_task_lock); | ||
252 | } | ||
253 | spin_unlock(&osb->vote_task_lock); | ||
254 | |||
255 | mlog_exit_void(); | ||
256 | } | ||
257 | |||
258 | static int ocfs2_vote_thread_lists_empty(struct ocfs2_super *osb) | ||
259 | { | ||
260 | int empty = 0; | ||
261 | |||
262 | spin_lock(&osb->vote_task_lock); | ||
263 | if (list_empty(&osb->blocked_lock_list) && | ||
264 | list_empty(&osb->vote_list)) | ||
265 | empty = 1; | ||
266 | |||
267 | spin_unlock(&osb->vote_task_lock); | ||
268 | return empty; | ||
269 | } | ||
270 | |||
271 | static int ocfs2_vote_thread_should_wake(struct ocfs2_super *osb) | ||
272 | { | ||
273 | int should_wake = 0; | ||
274 | |||
275 | spin_lock(&osb->vote_task_lock); | ||
276 | if (osb->vote_work_sequence != osb->vote_wake_sequence) | ||
277 | should_wake = 1; | ||
278 | spin_unlock(&osb->vote_task_lock); | ||
279 | |||
280 | return should_wake; | ||
281 | } | ||
282 | |||
283 | int ocfs2_vote_thread(void *arg) | ||
284 | { | ||
285 | int status = 0; | ||
286 | struct ocfs2_super *osb = arg; | ||
287 | |||
288 | /* only quit once we've been asked to stop and there is no more | ||
289 | * work available */ | ||
290 | while (!(kthread_should_stop() && | ||
291 | ocfs2_vote_thread_lists_empty(osb))) { | ||
292 | |||
293 | wait_event_interruptible(osb->vote_event, | ||
294 | ocfs2_vote_thread_should_wake(osb) || | ||
295 | kthread_should_stop()); | ||
296 | |||
297 | mlog(0, "vote_thread: awoken\n"); | ||
298 | |||
299 | ocfs2_vote_thread_do_work(osb); | ||
300 | } | ||
301 | |||
302 | osb->vote_task = NULL; | ||
303 | return status; | ||
304 | } | ||
305 | |||
306 | static struct ocfs2_net_wait_ctxt *ocfs2_new_net_wait_ctxt(unsigned int response_id) | ||
307 | { | ||
308 | struct ocfs2_net_wait_ctxt *w; | ||
309 | |||
310 | w = kzalloc(sizeof(*w), GFP_NOFS); | ||
311 | if (!w) { | ||
312 | mlog_errno(-ENOMEM); | ||
313 | goto bail; | ||
314 | } | ||
315 | |||
316 | INIT_LIST_HEAD(&w->n_list); | ||
317 | init_waitqueue_head(&w->n_event); | ||
318 | ocfs2_node_map_init(&w->n_node_map); | ||
319 | w->n_response_id = response_id; | ||
320 | w->n_callback = NULL; | ||
321 | bail: | ||
322 | return w; | ||
323 | } | ||
324 | |||
325 | static unsigned int ocfs2_new_response_id(struct ocfs2_super *osb) | ||
326 | { | ||
327 | unsigned int ret; | ||
328 | |||
329 | spin_lock(&osb->net_response_lock); | ||
330 | ret = ++osb->net_response_ids; | ||
331 | spin_unlock(&osb->net_response_lock); | ||
332 | |||
333 | return ret; | ||
334 | } | ||
335 | |||
336 | static void ocfs2_dequeue_net_wait_ctxt(struct ocfs2_super *osb, | ||
337 | struct ocfs2_net_wait_ctxt *w) | ||
338 | { | ||
339 | spin_lock(&osb->net_response_lock); | ||
340 | list_del(&w->n_list); | ||
341 | spin_unlock(&osb->net_response_lock); | ||
342 | } | ||
343 | |||
344 | static void ocfs2_queue_net_wait_ctxt(struct ocfs2_super *osb, | ||
345 | struct ocfs2_net_wait_ctxt *w) | ||
346 | { | ||
347 | spin_lock(&osb->net_response_lock); | ||
348 | list_add_tail(&w->n_list, | ||
349 | &osb->net_response_list); | ||
350 | spin_unlock(&osb->net_response_lock); | ||
351 | } | ||
352 | |||
353 | static void __ocfs2_mark_node_responded(struct ocfs2_super *osb, | ||
354 | struct ocfs2_net_wait_ctxt *w, | ||
355 | int node_num) | ||
356 | { | ||
357 | assert_spin_locked(&osb->net_response_lock); | ||
358 | |||
359 | ocfs2_node_map_clear_bit(osb, &w->n_node_map, node_num); | ||
360 | if (ocfs2_node_map_is_empty(osb, &w->n_node_map)) | ||
361 | wake_up(&w->n_event); | ||
362 | } | ||
363 | |||
364 | /* Intended to be called from the node down callback, we fake remove | ||
365 | * the node from all our response contexts */ | ||
366 | void ocfs2_remove_node_from_vote_queues(struct ocfs2_super *osb, | ||
367 | int node_num) | ||
368 | { | ||
369 | struct list_head *p; | ||
370 | struct ocfs2_net_wait_ctxt *w = NULL; | ||
371 | |||
372 | spin_lock(&osb->net_response_lock); | ||
373 | |||
374 | list_for_each(p, &osb->net_response_list) { | ||
375 | w = list_entry(p, struct ocfs2_net_wait_ctxt, n_list); | ||
376 | |||
377 | __ocfs2_mark_node_responded(osb, w, node_num); | ||
378 | } | ||
379 | |||
380 | spin_unlock(&osb->net_response_lock); | ||
381 | } | ||
382 | |||
383 | static int ocfs2_broadcast_vote(struct ocfs2_super *osb, | ||
384 | struct ocfs2_vote_msg *request, | ||
385 | unsigned int response_id, | ||
386 | int *response, | ||
387 | struct ocfs2_net_response_cb *callback) | ||
388 | { | ||
389 | int status, i, remote_err; | ||
390 | struct ocfs2_net_wait_ctxt *w = NULL; | ||
391 | int dequeued = 0; | ||
392 | |||
393 | mlog_entry_void(); | ||
394 | |||
395 | w = ocfs2_new_net_wait_ctxt(response_id); | ||
396 | if (!w) { | ||
397 | status = -ENOMEM; | ||
398 | mlog_errno(status); | ||
399 | goto bail; | ||
400 | } | ||
401 | w->n_callback = callback; | ||
402 | |||
403 | /* we're pretty much ready to go at this point, and this fills | ||
404 | * in n_response which we need anyway... */ | ||
405 | ocfs2_queue_net_wait_ctxt(osb, w); | ||
406 | |||
407 | i = ocfs2_node_map_iterate(osb, &osb->mounted_map, 0); | ||
408 | |||
409 | while (i != O2NM_INVALID_NODE_NUM) { | ||
410 | if (i != osb->node_num) { | ||
411 | mlog(0, "trying to send request to node %i\n", i); | ||
412 | ocfs2_node_map_set_bit(osb, &w->n_node_map, i); | ||
413 | |||
414 | remote_err = 0; | ||
415 | status = o2net_send_message(OCFS2_MESSAGE_TYPE_VOTE, | ||
416 | osb->net_key, | ||
417 | request, | ||
418 | sizeof(*request), | ||
419 | i, | ||
420 | &remote_err); | ||
421 | if (status == -ETIMEDOUT) { | ||
422 | mlog(0, "remote node %d timed out!\n", i); | ||
423 | status = -EAGAIN; | ||
424 | goto bail; | ||
425 | } | ||
426 | if (remote_err < 0) { | ||
427 | status = remote_err; | ||
428 | mlog(0, "remote error %d on node %d!\n", | ||
429 | remote_err, i); | ||
430 | mlog_errno(status); | ||
431 | goto bail; | ||
432 | } | ||
433 | if (status < 0) { | ||
434 | mlog_errno(status); | ||
435 | goto bail; | ||
436 | } | ||
437 | } | ||
438 | i++; | ||
439 | i = ocfs2_node_map_iterate(osb, &osb->mounted_map, i); | ||
440 | mlog(0, "next is %d, i am %d\n", i, osb->node_num); | ||
441 | } | ||
442 | mlog(0, "done sending, now waiting on responses...\n"); | ||
443 | |||
444 | wait_event(w->n_event, ocfs2_node_map_is_empty(osb, &w->n_node_map)); | ||
445 | |||
446 | ocfs2_dequeue_net_wait_ctxt(osb, w); | ||
447 | dequeued = 1; | ||
448 | |||
449 | *response = w->n_response; | ||
450 | status = 0; | ||
451 | bail: | ||
452 | if (w) { | ||
453 | if (!dequeued) | ||
454 | ocfs2_dequeue_net_wait_ctxt(osb, w); | ||
455 | kfree(w); | ||
456 | } | ||
457 | |||
458 | mlog_exit(status); | ||
459 | return status; | ||
460 | } | ||
461 | |||
462 | static struct ocfs2_vote_msg * ocfs2_new_vote_request(struct ocfs2_super *osb, | ||
463 | u64 blkno, | ||
464 | unsigned int generation, | ||
465 | enum ocfs2_vote_request type) | ||
466 | { | ||
467 | struct ocfs2_vote_msg *request; | ||
468 | struct ocfs2_msg_hdr *hdr; | ||
469 | |||
470 | BUG_ON(!ocfs2_is_valid_vote_request(type)); | ||
471 | |||
472 | request = kzalloc(sizeof(*request), GFP_NOFS); | ||
473 | if (!request) { | ||
474 | mlog_errno(-ENOMEM); | ||
475 | } else { | ||
476 | hdr = &request->v_hdr; | ||
477 | hdr->h_node_num = cpu_to_be32(osb->node_num); | ||
478 | hdr->h_request = cpu_to_be32(type); | ||
479 | hdr->h_blkno = cpu_to_be64(blkno); | ||
480 | hdr->h_generation = cpu_to_be32(generation); | ||
481 | } | ||
482 | |||
483 | return request; | ||
484 | } | ||
485 | |||
486 | /* Complete the buildup of a new vote request and process the | ||
487 | * broadcast return value. */ | ||
488 | static int ocfs2_do_request_vote(struct ocfs2_super *osb, | ||
489 | struct ocfs2_vote_msg *request, | ||
490 | struct ocfs2_net_response_cb *callback) | ||
491 | { | ||
492 | int status, response = -EBUSY; | ||
493 | unsigned int response_id; | ||
494 | struct ocfs2_msg_hdr *hdr; | ||
495 | |||
496 | response_id = ocfs2_new_response_id(osb); | ||
497 | |||
498 | hdr = &request->v_hdr; | ||
499 | hdr->h_response_id = cpu_to_be32(response_id); | ||
500 | |||
501 | status = ocfs2_broadcast_vote(osb, request, response_id, &response, | ||
502 | callback); | ||
503 | if (status < 0) { | ||
504 | mlog_errno(status); | ||
505 | goto bail; | ||
506 | } | ||
507 | |||
508 | status = response; | ||
509 | bail: | ||
510 | |||
511 | return status; | ||
512 | } | ||
513 | |||
514 | int ocfs2_request_mount_vote(struct ocfs2_super *osb) | ||
515 | { | ||
516 | int status; | ||
517 | struct ocfs2_vote_msg *request = NULL; | ||
518 | |||
519 | request = ocfs2_new_vote_request(osb, 0ULL, 0, OCFS2_VOTE_REQ_MOUNT); | ||
520 | if (!request) { | ||
521 | status = -ENOMEM; | ||
522 | goto bail; | ||
523 | } | ||
524 | |||
525 | status = -EAGAIN; | ||
526 | while (status == -EAGAIN) { | ||
527 | if (!(osb->s_mount_opt & OCFS2_MOUNT_NOINTR) && | ||
528 | signal_pending(current)) { | ||
529 | status = -ERESTARTSYS; | ||
530 | goto bail; | ||
531 | } | ||
532 | |||
533 | if (ocfs2_node_map_is_only(osb, &osb->mounted_map, | ||
534 | osb->node_num)) { | ||
535 | status = 0; | ||
536 | goto bail; | ||
537 | } | ||
538 | |||
539 | status = ocfs2_do_request_vote(osb, request, NULL); | ||
540 | } | ||
541 | |||
542 | bail: | ||
543 | kfree(request); | ||
544 | return status; | ||
545 | } | ||
546 | |||
547 | int ocfs2_request_umount_vote(struct ocfs2_super *osb) | ||
548 | { | ||
549 | int status; | ||
550 | struct ocfs2_vote_msg *request = NULL; | ||
551 | |||
552 | request = ocfs2_new_vote_request(osb, 0ULL, 0, OCFS2_VOTE_REQ_UMOUNT); | ||
553 | if (!request) { | ||
554 | status = -ENOMEM; | ||
555 | goto bail; | ||
556 | } | ||
557 | |||
558 | status = -EAGAIN; | ||
559 | while (status == -EAGAIN) { | ||
560 | /* Do not check signals on this vote... We really want | ||
561 | * this one to go all the way through. */ | ||
562 | |||
563 | if (ocfs2_node_map_is_only(osb, &osb->mounted_map, | ||
564 | osb->node_num)) { | ||
565 | status = 0; | ||
566 | goto bail; | ||
567 | } | ||
568 | |||
569 | status = ocfs2_do_request_vote(osb, request, NULL); | ||
570 | } | ||
571 | |||
572 | bail: | ||
573 | kfree(request); | ||
574 | return status; | ||
575 | } | ||
576 | |||
577 | /* TODO: This should eventually be a hash table! */ | ||
578 | static struct ocfs2_net_wait_ctxt * __ocfs2_find_net_wait_ctxt(struct ocfs2_super *osb, | ||
579 | u32 response_id) | ||
580 | { | ||
581 | struct list_head *p; | ||
582 | struct ocfs2_net_wait_ctxt *w = NULL; | ||
583 | |||
584 | list_for_each(p, &osb->net_response_list) { | ||
585 | w = list_entry(p, struct ocfs2_net_wait_ctxt, n_list); | ||
586 | if (response_id == w->n_response_id) | ||
587 | break; | ||
588 | w = NULL; | ||
589 | } | ||
590 | |||
591 | return w; | ||
592 | } | ||
593 | |||
594 | /* Translate response codes into local node errno values */ | ||
595 | static inline int ocfs2_translate_response(int response) | ||
596 | { | ||
597 | int ret; | ||
598 | |||
599 | switch (response) { | ||
600 | case OCFS2_RESPONSE_OK: | ||
601 | ret = 0; | ||
602 | break; | ||
603 | |||
604 | case OCFS2_RESPONSE_BUSY: | ||
605 | ret = -EBUSY; | ||
606 | break; | ||
607 | |||
608 | default: | ||
609 | ret = -EINVAL; | ||
610 | } | ||
611 | |||
612 | return ret; | ||
613 | } | ||
614 | |||
615 | static int ocfs2_handle_response_message(struct o2net_msg *msg, | ||
616 | u32 len, | ||
617 | void *data, void **ret_data) | ||
618 | { | ||
619 | unsigned int response_id, node_num; | ||
620 | int response_status; | ||
621 | struct ocfs2_super *osb = data; | ||
622 | struct ocfs2_response_msg *resp; | ||
623 | struct ocfs2_net_wait_ctxt * w; | ||
624 | struct ocfs2_net_response_cb *resp_cb; | ||
625 | |||
626 | resp = (struct ocfs2_response_msg *) msg->buf; | ||
627 | |||
628 | response_id = be32_to_cpu(resp->r_hdr.h_response_id); | ||
629 | node_num = be32_to_cpu(resp->r_hdr.h_node_num); | ||
630 | response_status = | ||
631 | ocfs2_translate_response(be32_to_cpu(resp->r_response)); | ||
632 | |||
633 | mlog(0, "received response message:\n"); | ||
634 | mlog(0, "h_response_id = %u\n", response_id); | ||
635 | mlog(0, "h_request = %u\n", be32_to_cpu(resp->r_hdr.h_request)); | ||
636 | mlog(0, "h_blkno = %llu\n", | ||
637 | (unsigned long long)be64_to_cpu(resp->r_hdr.h_blkno)); | ||
638 | mlog(0, "h_generation = %u\n", be32_to_cpu(resp->r_hdr.h_generation)); | ||
639 | mlog(0, "h_node_num = %u\n", node_num); | ||
640 | mlog(0, "r_response = %d\n", response_status); | ||
641 | |||
642 | spin_lock(&osb->net_response_lock); | ||
643 | w = __ocfs2_find_net_wait_ctxt(osb, response_id); | ||
644 | if (!w) { | ||
645 | mlog(0, "request not found!\n"); | ||
646 | goto bail; | ||
647 | } | ||
648 | resp_cb = w->n_callback; | ||
649 | |||
650 | if (response_status && (!w->n_response)) { | ||
651 | /* we only really need one negative response so don't | ||
652 | * set it twice. */ | ||
653 | w->n_response = response_status; | ||
654 | } | ||
655 | |||
656 | if (resp_cb) { | ||
657 | spin_unlock(&osb->net_response_lock); | ||
658 | |||
659 | resp_cb->rc_cb(resp_cb->rc_priv, resp); | ||
660 | |||
661 | spin_lock(&osb->net_response_lock); | ||
662 | } | ||
663 | |||
664 | __ocfs2_mark_node_responded(osb, w, node_num); | ||
665 | bail: | ||
666 | spin_unlock(&osb->net_response_lock); | ||
667 | |||
668 | return 0; | ||
669 | } | ||
670 | |||
671 | static int ocfs2_handle_vote_message(struct o2net_msg *msg, | ||
672 | u32 len, | ||
673 | void *data, void **ret_data) | ||
674 | { | ||
675 | int status; | ||
676 | struct ocfs2_super *osb = data; | ||
677 | struct ocfs2_vote_work *work; | ||
678 | |||
679 | work = kmalloc(sizeof(struct ocfs2_vote_work), GFP_NOFS); | ||
680 | if (!work) { | ||
681 | status = -ENOMEM; | ||
682 | mlog_errno(status); | ||
683 | goto bail; | ||
684 | } | ||
685 | |||
686 | INIT_LIST_HEAD(&work->w_list); | ||
687 | memcpy(&work->w_msg, msg->buf, sizeof(struct ocfs2_vote_msg)); | ||
688 | |||
689 | mlog(0, "scheduling vote request:\n"); | ||
690 | mlog(0, "h_response_id = %u\n", | ||
691 | be32_to_cpu(work->w_msg.v_hdr.h_response_id)); | ||
692 | mlog(0, "h_request = %u\n", be32_to_cpu(work->w_msg.v_hdr.h_request)); | ||
693 | mlog(0, "h_blkno = %llu\n", | ||
694 | (unsigned long long)be64_to_cpu(work->w_msg.v_hdr.h_blkno)); | ||
695 | mlog(0, "h_generation = %u\n", | ||
696 | be32_to_cpu(work->w_msg.v_hdr.h_generation)); | ||
697 | mlog(0, "h_node_num = %u\n", | ||
698 | be32_to_cpu(work->w_msg.v_hdr.h_node_num)); | ||
699 | |||
700 | spin_lock(&osb->vote_task_lock); | ||
701 | list_add_tail(&work->w_list, &osb->vote_list); | ||
702 | osb->vote_count++; | ||
703 | spin_unlock(&osb->vote_task_lock); | ||
704 | |||
705 | ocfs2_kick_vote_thread(osb); | ||
706 | |||
707 | status = 0; | ||
708 | bail: | ||
709 | return status; | ||
710 | } | ||
711 | |||
712 | void ocfs2_unregister_net_handlers(struct ocfs2_super *osb) | ||
713 | { | ||
714 | if (!osb->net_key) | ||
715 | return; | ||
716 | |||
717 | o2net_unregister_handler_list(&osb->osb_net_handlers); | ||
718 | |||
719 | if (!list_empty(&osb->net_response_list)) | ||
720 | mlog(ML_ERROR, "net response list not empty!\n"); | ||
721 | |||
722 | osb->net_key = 0; | ||
723 | } | ||
724 | |||
725 | int ocfs2_register_net_handlers(struct ocfs2_super *osb) | ||
726 | { | ||
727 | int status = 0; | ||
728 | |||
729 | if (ocfs2_mount_local(osb)) | ||
730 | return 0; | ||
731 | |||
732 | status = o2net_register_handler(OCFS2_MESSAGE_TYPE_RESPONSE, | ||
733 | osb->net_key, | ||
734 | sizeof(struct ocfs2_response_msg), | ||
735 | ocfs2_handle_response_message, | ||
736 | osb, NULL, &osb->osb_net_handlers); | ||
737 | if (status) { | ||
738 | mlog_errno(status); | ||
739 | goto bail; | ||
740 | } | ||
741 | |||
742 | status = o2net_register_handler(OCFS2_MESSAGE_TYPE_VOTE, | ||
743 | osb->net_key, | ||
744 | sizeof(struct ocfs2_vote_msg), | ||
745 | ocfs2_handle_vote_message, | ||
746 | osb, NULL, &osb->osb_net_handlers); | ||
747 | if (status) { | ||
748 | mlog_errno(status); | ||
749 | goto bail; | ||
750 | } | ||
751 | bail: | ||
752 | if (status < 0) | ||
753 | ocfs2_unregister_net_handlers(osb); | ||
754 | |||
755 | return status; | ||
756 | } | ||
diff --git a/fs/ocfs2/vote.h b/fs/ocfs2/vote.h deleted file mode 100644 index 9ea46f62de31..000000000000 --- a/fs/ocfs2/vote.h +++ /dev/null | |||
@@ -1,48 +0,0 @@ | |||
1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
3 | * | ||
4 | * vote.h | ||
5 | * | ||
6 | * description here | ||
7 | * | ||
8 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or | ||
11 | * modify it under the terms of the GNU General Public | ||
12 | * License as published by the Free Software Foundation; either | ||
13 | * version 2 of the License, or (at your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public | ||
21 | * License along with this program; if not, write to the | ||
22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
23 | * Boston, MA 021110-1307, USA. | ||
24 | */ | ||
25 | |||
26 | |||
27 | #ifndef VOTE_H | ||
28 | #define VOTE_H | ||
29 | |||
30 | int ocfs2_vote_thread(void *arg); | ||
31 | static inline void ocfs2_kick_vote_thread(struct ocfs2_super *osb) | ||
32 | { | ||
33 | spin_lock(&osb->vote_task_lock); | ||
34 | /* make sure the voting thread gets a swipe at whatever changes | ||
35 | * the caller may have made to the voting state */ | ||
36 | osb->vote_wake_sequence++; | ||
37 | spin_unlock(&osb->vote_task_lock); | ||
38 | wake_up(&osb->vote_event); | ||
39 | } | ||
40 | |||
41 | int ocfs2_request_mount_vote(struct ocfs2_super *osb); | ||
42 | int ocfs2_request_umount_vote(struct ocfs2_super *osb); | ||
43 | int ocfs2_register_net_handlers(struct ocfs2_super *osb); | ||
44 | void ocfs2_unregister_net_handlers(struct ocfs2_super *osb); | ||
45 | |||
46 | void ocfs2_remove_node_from_vote_queues(struct ocfs2_super *osb, | ||
47 | int node_num); | ||
48 | #endif | ||