diff options
Diffstat (limited to 'fs/ocfs2/vote.c')
-rw-r--r-- | fs/ocfs2/vote.c | 287 |
1 files changed, 7 insertions, 280 deletions
diff --git a/fs/ocfs2/vote.c b/fs/ocfs2/vote.c index 1552d6449478..4f82a2f0efef 100644 --- a/fs/ocfs2/vote.c +++ b/fs/ocfs2/vote.c | |||
@@ -63,17 +63,10 @@ struct ocfs2_msg_hdr | |||
63 | __be32 h_node_num; /* node sending this particular message. */ | 63 | __be32 h_node_num; /* node sending this particular message. */ |
64 | }; | 64 | }; |
65 | 65 | ||
66 | /* OCFS2_MAX_FILENAME_LEN is 255 characters, but we want to align this | ||
67 | * for the network. */ | ||
68 | #define OCFS2_VOTE_FILENAME_LEN 256 | ||
69 | struct ocfs2_vote_msg | 66 | struct ocfs2_vote_msg |
70 | { | 67 | { |
71 | struct ocfs2_msg_hdr v_hdr; | 68 | struct ocfs2_msg_hdr v_hdr; |
72 | union { | 69 | __be32 v_reserved1; |
73 | __be32 v_generic1; | ||
74 | __be32 v_orphaned_slot; /* Used during delete votes */ | ||
75 | __be32 v_nlink; /* Used during unlink votes */ | ||
76 | } md1; /* Message type dependant 1 */ | ||
77 | }; | 70 | }; |
78 | 71 | ||
79 | /* Responses are given these values to maintain backwards | 72 | /* Responses are given these values to maintain backwards |
@@ -86,7 +79,6 @@ struct ocfs2_response_msg | |||
86 | { | 79 | { |
87 | struct ocfs2_msg_hdr r_hdr; | 80 | struct ocfs2_msg_hdr r_hdr; |
88 | __be32 r_response; | 81 | __be32 r_response; |
89 | __be32 r_orphaned_slot; | ||
90 | }; | 82 | }; |
91 | 83 | ||
92 | struct ocfs2_vote_work { | 84 | struct ocfs2_vote_work { |
@@ -96,7 +88,6 @@ struct ocfs2_vote_work { | |||
96 | 88 | ||
97 | enum ocfs2_vote_request { | 89 | enum ocfs2_vote_request { |
98 | OCFS2_VOTE_REQ_INVALID = 0, | 90 | OCFS2_VOTE_REQ_INVALID = 0, |
99 | OCFS2_VOTE_REQ_DELETE, | ||
100 | OCFS2_VOTE_REQ_MOUNT, | 91 | OCFS2_VOTE_REQ_MOUNT, |
101 | OCFS2_VOTE_REQ_UMOUNT, | 92 | OCFS2_VOTE_REQ_UMOUNT, |
102 | OCFS2_VOTE_REQ_LAST | 93 | OCFS2_VOTE_REQ_LAST |
@@ -151,135 +142,23 @@ static void ocfs2_process_umount_request(struct ocfs2_super *osb, | |||
151 | ocfs2_node_map_set_bit(osb, &osb->umount_map, node_num); | 142 | ocfs2_node_map_set_bit(osb, &osb->umount_map, node_num); |
152 | } | 143 | } |
153 | 144 | ||
154 | void ocfs2_mark_inode_remotely_deleted(struct inode *inode) | ||
155 | { | ||
156 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
157 | |||
158 | assert_spin_locked(&oi->ip_lock); | ||
159 | /* We set the SKIP_DELETE flag on the inode so we don't try to | ||
160 | * delete it in delete_inode ourselves, thus avoiding | ||
161 | * unecessary lock pinging. If the other node failed to wipe | ||
162 | * the inode as a result of a crash, then recovery will pick | ||
163 | * up the slack. */ | ||
164 | oi->ip_flags |= OCFS2_INODE_DELETED|OCFS2_INODE_SKIP_DELETE; | ||
165 | } | ||
166 | |||
167 | static int ocfs2_process_delete_request(struct inode *inode, | ||
168 | int *orphaned_slot) | ||
169 | { | ||
170 | int response = OCFS2_RESPONSE_BUSY; | ||
171 | |||
172 | mlog(0, "DELETE vote on inode %lu, read lnk_cnt = %u, slot = %d\n", | ||
173 | inode->i_ino, inode->i_nlink, *orphaned_slot); | ||
174 | |||
175 | spin_lock(&OCFS2_I(inode)->ip_lock); | ||
176 | |||
177 | /* Whatever our vote response is, we want to make sure that | ||
178 | * the orphaned slot is recorded properly on this node *and* | ||
179 | * on the requesting node. Technically, if the requesting node | ||
180 | * did not know which slot the inode is orphaned in but we | ||
181 | * respond with BUSY he doesn't actually need the orphaned | ||
182 | * slot, but it doesn't hurt to do it here anyway. */ | ||
183 | if ((*orphaned_slot) != OCFS2_INVALID_SLOT) { | ||
184 | mlog_bug_on_msg(OCFS2_I(inode)->ip_orphaned_slot != | ||
185 | OCFS2_INVALID_SLOT && | ||
186 | OCFS2_I(inode)->ip_orphaned_slot != | ||
187 | (*orphaned_slot), | ||
188 | "Inode %llu: This node thinks it's " | ||
189 | "orphaned in slot %d, messaged it's in %d\n", | ||
190 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
191 | OCFS2_I(inode)->ip_orphaned_slot, | ||
192 | *orphaned_slot); | ||
193 | |||
194 | mlog(0, "Setting orphaned slot for inode %llu to %d\n", | ||
195 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
196 | *orphaned_slot); | ||
197 | |||
198 | OCFS2_I(inode)->ip_orphaned_slot = *orphaned_slot; | ||
199 | } else { | ||
200 | mlog(0, "Sending back orphaned slot %d for inode %llu\n", | ||
201 | OCFS2_I(inode)->ip_orphaned_slot, | ||
202 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
203 | |||
204 | *orphaned_slot = OCFS2_I(inode)->ip_orphaned_slot; | ||
205 | } | ||
206 | |||
207 | /* vote no if the file is still open. */ | ||
208 | if (OCFS2_I(inode)->ip_open_count) { | ||
209 | mlog(0, "open count = %u\n", | ||
210 | OCFS2_I(inode)->ip_open_count); | ||
211 | spin_unlock(&OCFS2_I(inode)->ip_lock); | ||
212 | goto done; | ||
213 | } | ||
214 | spin_unlock(&OCFS2_I(inode)->ip_lock); | ||
215 | |||
216 | /* directories are a bit ugly... What if someone is sitting in | ||
217 | * it? We want to make sure the inode is removed completely as | ||
218 | * a result of the iput in process_vote. */ | ||
219 | if (S_ISDIR(inode->i_mode) && (atomic_read(&inode->i_count) != 1)) { | ||
220 | mlog(0, "i_count = %u\n", atomic_read(&inode->i_count)); | ||
221 | goto done; | ||
222 | } | ||
223 | |||
224 | if (filemap_fdatawrite(inode->i_mapping)) { | ||
225 | mlog(ML_ERROR, "Could not sync inode %llu for delete!\n", | ||
226 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
227 | goto done; | ||
228 | } | ||
229 | sync_mapping_buffers(inode->i_mapping); | ||
230 | truncate_inode_pages(inode->i_mapping, 0); | ||
231 | ocfs2_extent_map_trunc(inode, 0); | ||
232 | |||
233 | spin_lock(&OCFS2_I(inode)->ip_lock); | ||
234 | /* double check open count - someone might have raced this | ||
235 | * thread into ocfs2_file_open while we were writing out | ||
236 | * data. If we're to allow a wipe of this inode now, we *must* | ||
237 | * hold the spinlock until we've marked it. */ | ||
238 | if (OCFS2_I(inode)->ip_open_count) { | ||
239 | mlog(0, "Raced to wipe! open count = %u\n", | ||
240 | OCFS2_I(inode)->ip_open_count); | ||
241 | spin_unlock(&OCFS2_I(inode)->ip_lock); | ||
242 | goto done; | ||
243 | } | ||
244 | |||
245 | /* Mark the inode as being wiped from disk. */ | ||
246 | ocfs2_mark_inode_remotely_deleted(inode); | ||
247 | spin_unlock(&OCFS2_I(inode)->ip_lock); | ||
248 | |||
249 | /* Not sure this is necessary anymore. */ | ||
250 | d_prune_aliases(inode); | ||
251 | |||
252 | /* If we get here, then we're voting 'yes', so commit the | ||
253 | * delete on our side. */ | ||
254 | response = OCFS2_RESPONSE_OK; | ||
255 | done: | ||
256 | return response; | ||
257 | } | ||
258 | |||
259 | static void ocfs2_process_vote(struct ocfs2_super *osb, | 145 | static void ocfs2_process_vote(struct ocfs2_super *osb, |
260 | struct ocfs2_vote_msg *msg) | 146 | struct ocfs2_vote_msg *msg) |
261 | { | 147 | { |
262 | int net_status, vote_response; | 148 | int net_status, vote_response; |
263 | int orphaned_slot = 0; | 149 | unsigned int node_num; |
264 | unsigned int node_num, generation; | ||
265 | u64 blkno; | 150 | u64 blkno; |
266 | enum ocfs2_vote_request request; | 151 | enum ocfs2_vote_request request; |
267 | struct inode *inode = NULL; | ||
268 | struct ocfs2_msg_hdr *hdr = &msg->v_hdr; | 152 | struct ocfs2_msg_hdr *hdr = &msg->v_hdr; |
269 | struct ocfs2_response_msg response; | 153 | struct ocfs2_response_msg response; |
270 | 154 | ||
271 | /* decode the network mumbo jumbo into local variables. */ | 155 | /* decode the network mumbo jumbo into local variables. */ |
272 | request = be32_to_cpu(hdr->h_request); | 156 | request = be32_to_cpu(hdr->h_request); |
273 | blkno = be64_to_cpu(hdr->h_blkno); | 157 | blkno = be64_to_cpu(hdr->h_blkno); |
274 | generation = be32_to_cpu(hdr->h_generation); | ||
275 | node_num = be32_to_cpu(hdr->h_node_num); | 158 | node_num = be32_to_cpu(hdr->h_node_num); |
276 | if (request == OCFS2_VOTE_REQ_DELETE) | ||
277 | orphaned_slot = be32_to_cpu(msg->md1.v_orphaned_slot); | ||
278 | 159 | ||
279 | mlog(0, "processing vote: request = %u, blkno = %llu, " | 160 | mlog(0, "processing vote: request = %u, blkno = %llu, node_num = %u\n", |
280 | "generation = %u, node_num = %u, priv1 = %u\n", request, | 161 | request, (unsigned long long)blkno, node_num); |
281 | (unsigned long long)blkno, generation, node_num, | ||
282 | be32_to_cpu(msg->md1.v_generic1)); | ||
283 | 162 | ||
284 | if (!ocfs2_is_valid_vote_request(request)) { | 163 | if (!ocfs2_is_valid_vote_request(request)) { |
285 | mlog(ML_ERROR, "Invalid vote request %d from node %u\n", | 164 | mlog(ML_ERROR, "Invalid vote request %d from node %u\n", |
@@ -302,52 +181,6 @@ static void ocfs2_process_vote(struct ocfs2_super *osb, | |||
302 | break; | 181 | break; |
303 | } | 182 | } |
304 | 183 | ||
305 | /* We cannot process the remaining message types before we're | ||
306 | * fully mounted. It's perfectly safe however to send a 'yes' | ||
307 | * response as we can't possibly have any of the state they're | ||
308 | * asking us to modify yet. */ | ||
309 | if (atomic_read(&osb->vol_state) == VOLUME_INIT) | ||
310 | goto respond; | ||
311 | |||
312 | /* If we get here, then the request is against an inode. */ | ||
313 | inode = ocfs2_ilookup_for_vote(osb, blkno, | ||
314 | request == OCFS2_VOTE_REQ_DELETE); | ||
315 | |||
316 | /* Not finding the inode is perfectly valid - it means we're | ||
317 | * not interested in what the other node is about to do to it | ||
318 | * so in those cases we automatically respond with an | ||
319 | * affirmative. Cluster locking ensures that we won't race | ||
320 | * interest in the inode with this vote request. */ | ||
321 | if (!inode) | ||
322 | goto respond; | ||
323 | |||
324 | /* Check generation values. It's possible for us to get a | ||
325 | * request against a stale inode. If so then we proceed as if | ||
326 | * we had not found an inode in the first place. */ | ||
327 | if (inode->i_generation != generation) { | ||
328 | mlog(0, "generation passed %u != inode generation = %u, " | ||
329 | "ip_flags = %x, ip_blkno = %llu, msg %llu, i_count = %u, " | ||
330 | "message type = %u\n", generation, inode->i_generation, | ||
331 | OCFS2_I(inode)->ip_flags, | ||
332 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
333 | (unsigned long long)blkno, atomic_read(&inode->i_count), | ||
334 | request); | ||
335 | iput(inode); | ||
336 | inode = NULL; | ||
337 | goto respond; | ||
338 | } | ||
339 | |||
340 | switch (request) { | ||
341 | case OCFS2_VOTE_REQ_DELETE: | ||
342 | vote_response = ocfs2_process_delete_request(inode, | ||
343 | &orphaned_slot); | ||
344 | break; | ||
345 | default: | ||
346 | mlog(ML_ERROR, "node %u, invalid request: %u\n", | ||
347 | node_num, request); | ||
348 | vote_response = OCFS2_RESPONSE_BAD_MSG; | ||
349 | } | ||
350 | |||
351 | respond: | 184 | respond: |
352 | /* Response struture is small so we just put it on the stack | 185 | /* Response struture is small so we just put it on the stack |
353 | * and stuff it inline. */ | 186 | * and stuff it inline. */ |
@@ -357,7 +190,6 @@ respond: | |||
357 | response.r_hdr.h_generation = hdr->h_generation; | 190 | response.r_hdr.h_generation = hdr->h_generation; |
358 | response.r_hdr.h_node_num = cpu_to_be32(osb->node_num); | 191 | response.r_hdr.h_node_num = cpu_to_be32(osb->node_num); |
359 | response.r_response = cpu_to_be32(vote_response); | 192 | response.r_response = cpu_to_be32(vote_response); |
360 | response.r_orphaned_slot = cpu_to_be32(orphaned_slot); | ||
361 | 193 | ||
362 | net_status = o2net_send_message(OCFS2_MESSAGE_TYPE_RESPONSE, | 194 | net_status = o2net_send_message(OCFS2_MESSAGE_TYPE_RESPONSE, |
363 | osb->net_key, | 195 | osb->net_key, |
@@ -373,9 +205,6 @@ respond: | |||
373 | && net_status != -ENOTCONN) | 205 | && net_status != -ENOTCONN) |
374 | mlog(ML_ERROR, "message to node %u fails with error %d!\n", | 206 | mlog(ML_ERROR, "message to node %u fails with error %d!\n", |
375 | node_num, net_status); | 207 | node_num, net_status); |
376 | |||
377 | if (inode) | ||
378 | iput(inode); | ||
379 | } | 208 | } |
380 | 209 | ||
381 | static void ocfs2_vote_thread_do_work(struct ocfs2_super *osb) | 210 | static void ocfs2_vote_thread_do_work(struct ocfs2_super *osb) |
@@ -634,8 +463,7 @@ bail: | |||
634 | static struct ocfs2_vote_msg * ocfs2_new_vote_request(struct ocfs2_super *osb, | 463 | static struct ocfs2_vote_msg * ocfs2_new_vote_request(struct ocfs2_super *osb, |
635 | u64 blkno, | 464 | u64 blkno, |
636 | unsigned int generation, | 465 | unsigned int generation, |
637 | enum ocfs2_vote_request type, | 466 | enum ocfs2_vote_request type) |
638 | u32 priv) | ||
639 | { | 467 | { |
640 | struct ocfs2_vote_msg *request; | 468 | struct ocfs2_vote_msg *request; |
641 | struct ocfs2_msg_hdr *hdr; | 469 | struct ocfs2_msg_hdr *hdr; |
@@ -651,8 +479,6 @@ static struct ocfs2_vote_msg * ocfs2_new_vote_request(struct ocfs2_super *osb, | |||
651 | hdr->h_request = cpu_to_be32(type); | 479 | hdr->h_request = cpu_to_be32(type); |
652 | hdr->h_blkno = cpu_to_be64(blkno); | 480 | hdr->h_blkno = cpu_to_be64(blkno); |
653 | hdr->h_generation = cpu_to_be32(generation); | 481 | hdr->h_generation = cpu_to_be32(generation); |
654 | |||
655 | request->md1.v_generic1 = cpu_to_be32(priv); | ||
656 | } | 482 | } |
657 | 483 | ||
658 | return request; | 484 | return request; |
@@ -686,109 +512,12 @@ bail: | |||
686 | return status; | 512 | return status; |
687 | } | 513 | } |
688 | 514 | ||
689 | static int ocfs2_request_vote(struct inode *inode, | ||
690 | struct ocfs2_vote_msg *request, | ||
691 | struct ocfs2_net_response_cb *callback) | ||
692 | { | ||
693 | int status; | ||
694 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
695 | |||
696 | if (ocfs2_inode_is_new(inode)) | ||
697 | return 0; | ||
698 | |||
699 | status = -EAGAIN; | ||
700 | while (status == -EAGAIN) { | ||
701 | if (!(osb->s_mount_opt & OCFS2_MOUNT_NOINTR) && | ||
702 | signal_pending(current)) | ||
703 | return -ERESTARTSYS; | ||
704 | |||
705 | status = ocfs2_super_lock(osb, 0); | ||
706 | if (status < 0) { | ||
707 | mlog_errno(status); | ||
708 | break; | ||
709 | } | ||
710 | |||
711 | status = 0; | ||
712 | if (!ocfs2_node_map_is_only(osb, &osb->mounted_map, | ||
713 | osb->node_num)) | ||
714 | status = ocfs2_do_request_vote(osb, request, callback); | ||
715 | |||
716 | ocfs2_super_unlock(osb, 0); | ||
717 | } | ||
718 | return status; | ||
719 | } | ||
720 | |||
721 | static void ocfs2_delete_response_cb(void *priv, | ||
722 | struct ocfs2_response_msg *resp) | ||
723 | { | ||
724 | int orphaned_slot, node; | ||
725 | struct inode *inode = priv; | ||
726 | |||
727 | orphaned_slot = be32_to_cpu(resp->r_orphaned_slot); | ||
728 | node = be32_to_cpu(resp->r_hdr.h_node_num); | ||
729 | mlog(0, "node %d tells us that inode %llu is orphaned in slot %d\n", | ||
730 | node, (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
731 | orphaned_slot); | ||
732 | |||
733 | /* The other node may not actually know which slot the inode | ||
734 | * is orphaned in. */ | ||
735 | if (orphaned_slot == OCFS2_INVALID_SLOT) | ||
736 | return; | ||
737 | |||
738 | /* Ok, the responding node knows which slot this inode is | ||
739 | * orphaned in. We verify that the information is correct and | ||
740 | * then record this in the inode. ocfs2_delete_inode will use | ||
741 | * this information to determine which lock to take. */ | ||
742 | spin_lock(&OCFS2_I(inode)->ip_lock); | ||
743 | mlog_bug_on_msg(OCFS2_I(inode)->ip_orphaned_slot != orphaned_slot && | ||
744 | OCFS2_I(inode)->ip_orphaned_slot | ||
745 | != OCFS2_INVALID_SLOT, "Inode %llu: Node %d says it's " | ||
746 | "orphaned in slot %d, we think it's in %d\n", | ||
747 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
748 | be32_to_cpu(resp->r_hdr.h_node_num), | ||
749 | orphaned_slot, OCFS2_I(inode)->ip_orphaned_slot); | ||
750 | |||
751 | OCFS2_I(inode)->ip_orphaned_slot = orphaned_slot; | ||
752 | spin_unlock(&OCFS2_I(inode)->ip_lock); | ||
753 | } | ||
754 | |||
755 | int ocfs2_request_delete_vote(struct inode *inode) | ||
756 | { | ||
757 | int orphaned_slot, status; | ||
758 | struct ocfs2_net_response_cb delete_cb; | ||
759 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
760 | struct ocfs2_vote_msg *request; | ||
761 | |||
762 | spin_lock(&OCFS2_I(inode)->ip_lock); | ||
763 | orphaned_slot = OCFS2_I(inode)->ip_orphaned_slot; | ||
764 | spin_unlock(&OCFS2_I(inode)->ip_lock); | ||
765 | |||
766 | delete_cb.rc_cb = ocfs2_delete_response_cb; | ||
767 | delete_cb.rc_priv = inode; | ||
768 | |||
769 | mlog(0, "Inode %llu, we start thinking orphaned slot is %d\n", | ||
770 | (unsigned long long)OCFS2_I(inode)->ip_blkno, orphaned_slot); | ||
771 | |||
772 | status = -ENOMEM; | ||
773 | request = ocfs2_new_vote_request(osb, OCFS2_I(inode)->ip_blkno, | ||
774 | inode->i_generation, | ||
775 | OCFS2_VOTE_REQ_DELETE, orphaned_slot); | ||
776 | if (request) { | ||
777 | status = ocfs2_request_vote(inode, request, &delete_cb); | ||
778 | |||
779 | kfree(request); | ||
780 | } | ||
781 | |||
782 | return status; | ||
783 | } | ||
784 | |||
785 | int ocfs2_request_mount_vote(struct ocfs2_super *osb) | 515 | int ocfs2_request_mount_vote(struct ocfs2_super *osb) |
786 | { | 516 | { |
787 | int status; | 517 | int status; |
788 | struct ocfs2_vote_msg *request = NULL; | 518 | struct ocfs2_vote_msg *request = NULL; |
789 | 519 | ||
790 | request = ocfs2_new_vote_request(osb, 0ULL, 0, | 520 | request = ocfs2_new_vote_request(osb, 0ULL, 0, OCFS2_VOTE_REQ_MOUNT); |
791 | OCFS2_VOTE_REQ_MOUNT, 0); | ||
792 | if (!request) { | 521 | if (!request) { |
793 | status = -ENOMEM; | 522 | status = -ENOMEM; |
794 | goto bail; | 523 | goto bail; |
@@ -821,8 +550,7 @@ int ocfs2_request_umount_vote(struct ocfs2_super *osb) | |||
821 | int status; | 550 | int status; |
822 | struct ocfs2_vote_msg *request = NULL; | 551 | struct ocfs2_vote_msg *request = NULL; |
823 | 552 | ||
824 | request = ocfs2_new_vote_request(osb, 0ULL, 0, | 553 | request = ocfs2_new_vote_request(osb, 0ULL, 0, OCFS2_VOTE_REQ_UMOUNT); |
825 | OCFS2_VOTE_REQ_UMOUNT, 0); | ||
826 | if (!request) { | 554 | if (!request) { |
827 | status = -ENOMEM; | 555 | status = -ENOMEM; |
828 | goto bail; | 556 | goto bail; |
@@ -969,7 +697,6 @@ static int ocfs2_handle_vote_message(struct o2net_msg *msg, | |||
969 | be32_to_cpu(work->w_msg.v_hdr.h_generation)); | 697 | be32_to_cpu(work->w_msg.v_hdr.h_generation)); |
970 | mlog(0, "h_node_num = %u\n", | 698 | mlog(0, "h_node_num = %u\n", |
971 | be32_to_cpu(work->w_msg.v_hdr.h_node_num)); | 699 | be32_to_cpu(work->w_msg.v_hdr.h_node_num)); |
972 | mlog(0, "v_generic1 = %u\n", be32_to_cpu(work->w_msg.md1.v_generic1)); | ||
973 | 700 | ||
974 | spin_lock(&osb->vote_task_lock); | 701 | spin_lock(&osb->vote_task_lock); |
975 | list_add_tail(&work->w_list, &osb->vote_list); | 702 | list_add_tail(&work->w_list, &osb->vote_list); |