diff options
Diffstat (limited to 'fs/ocfs2')
-rw-r--r-- | fs/ocfs2/cluster/heartbeat.c | 40 | ||||
-rw-r--r-- | fs/ocfs2/dlm/userdlm.c | 74 | ||||
-rw-r--r-- | fs/ocfs2/file.c | 19 |
3 files changed, 99 insertions, 34 deletions
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index bff0f0d06867..21f38accd039 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -153,6 +153,7 @@ struct o2hb_region { | |||
153 | struct o2hb_bio_wait_ctxt { | 153 | struct o2hb_bio_wait_ctxt { |
154 | atomic_t wc_num_reqs; | 154 | atomic_t wc_num_reqs; |
155 | struct completion wc_io_complete; | 155 | struct completion wc_io_complete; |
156 | int wc_error; | ||
156 | }; | 157 | }; |
157 | 158 | ||
158 | static void o2hb_write_timeout(void *arg) | 159 | static void o2hb_write_timeout(void *arg) |
@@ -186,6 +187,7 @@ static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc, | |||
186 | { | 187 | { |
187 | atomic_set(&wc->wc_num_reqs, num_ios); | 188 | atomic_set(&wc->wc_num_reqs, num_ios); |
188 | init_completion(&wc->wc_io_complete); | 189 | init_completion(&wc->wc_io_complete); |
190 | wc->wc_error = 0; | ||
189 | } | 191 | } |
190 | 192 | ||
191 | /* Used in error paths too */ | 193 | /* Used in error paths too */ |
@@ -218,8 +220,10 @@ static int o2hb_bio_end_io(struct bio *bio, | |||
218 | { | 220 | { |
219 | struct o2hb_bio_wait_ctxt *wc = bio->bi_private; | 221 | struct o2hb_bio_wait_ctxt *wc = bio->bi_private; |
220 | 222 | ||
221 | if (error) | 223 | if (error) { |
222 | mlog(ML_ERROR, "IO Error %d\n", error); | 224 | mlog(ML_ERROR, "IO Error %d\n", error); |
225 | wc->wc_error = error; | ||
226 | } | ||
223 | 227 | ||
224 | if (bio->bi_size) | 228 | if (bio->bi_size) |
225 | return 1; | 229 | return 1; |
@@ -390,6 +394,8 @@ static int o2hb_read_slots(struct o2hb_region *reg, | |||
390 | 394 | ||
391 | bail_and_wait: | 395 | bail_and_wait: |
392 | o2hb_wait_on_io(reg, &wc); | 396 | o2hb_wait_on_io(reg, &wc); |
397 | if (wc.wc_error && !status) | ||
398 | status = wc.wc_error; | ||
393 | 399 | ||
394 | if (bios) { | 400 | if (bios) { |
395 | for(i = 0; i < num_bios; i++) | 401 | for(i = 0; i < num_bios; i++) |
@@ -790,20 +796,24 @@ static int o2hb_highest_node(unsigned long *nodes, | |||
790 | return highest; | 796 | return highest; |
791 | } | 797 | } |
792 | 798 | ||
793 | static void o2hb_do_disk_heartbeat(struct o2hb_region *reg) | 799 | static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) |
794 | { | 800 | { |
795 | int i, ret, highest_node, change = 0; | 801 | int i, ret, highest_node, change = 0; |
796 | unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 802 | unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
797 | struct bio *write_bio; | 803 | struct bio *write_bio; |
798 | struct o2hb_bio_wait_ctxt write_wc; | 804 | struct o2hb_bio_wait_ctxt write_wc; |
799 | 805 | ||
800 | if (o2nm_configured_node_map(configured_nodes, sizeof(configured_nodes))) | 806 | ret = o2nm_configured_node_map(configured_nodes, |
801 | return; | 807 | sizeof(configured_nodes)); |
808 | if (ret) { | ||
809 | mlog_errno(ret); | ||
810 | return ret; | ||
811 | } | ||
802 | 812 | ||
803 | highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES); | 813 | highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES); |
804 | if (highest_node >= O2NM_MAX_NODES) { | 814 | if (highest_node >= O2NM_MAX_NODES) { |
805 | mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n"); | 815 | mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n"); |
806 | return; | 816 | return -EINVAL; |
807 | } | 817 | } |
808 | 818 | ||
809 | /* No sense in reading the slots of nodes that don't exist | 819 | /* No sense in reading the slots of nodes that don't exist |
@@ -813,7 +823,7 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
813 | ret = o2hb_read_slots(reg, highest_node + 1); | 823 | ret = o2hb_read_slots(reg, highest_node + 1); |
814 | if (ret < 0) { | 824 | if (ret < 0) { |
815 | mlog_errno(ret); | 825 | mlog_errno(ret); |
816 | return; | 826 | return ret; |
817 | } | 827 | } |
818 | 828 | ||
819 | /* With an up to date view of the slots, we can check that no | 829 | /* With an up to date view of the slots, we can check that no |
@@ -831,7 +841,7 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
831 | ret = o2hb_issue_node_write(reg, &write_bio, &write_wc); | 841 | ret = o2hb_issue_node_write(reg, &write_bio, &write_wc); |
832 | if (ret < 0) { | 842 | if (ret < 0) { |
833 | mlog_errno(ret); | 843 | mlog_errno(ret); |
834 | return; | 844 | return ret; |
835 | } | 845 | } |
836 | 846 | ||
837 | i = -1; | 847 | i = -1; |
@@ -847,6 +857,15 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
847 | */ | 857 | */ |
848 | o2hb_wait_on_io(reg, &write_wc); | 858 | o2hb_wait_on_io(reg, &write_wc); |
849 | bio_put(write_bio); | 859 | bio_put(write_bio); |
860 | if (write_wc.wc_error) { | ||
861 | /* Do not re-arm the write timeout on I/O error - we | ||
862 | * can't be sure that the new block ever made it to | ||
863 | * disk */ | ||
864 | mlog(ML_ERROR, "Write error %d on device \"%s\"\n", | ||
865 | write_wc.wc_error, reg->hr_dev_name); | ||
866 | return write_wc.wc_error; | ||
867 | } | ||
868 | |||
850 | o2hb_arm_write_timeout(reg); | 869 | o2hb_arm_write_timeout(reg); |
851 | 870 | ||
852 | /* let the person who launched us know when things are steady */ | 871 | /* let the person who launched us know when things are steady */ |
@@ -854,6 +873,8 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
854 | if (atomic_dec_and_test(®->hr_steady_iterations)) | 873 | if (atomic_dec_and_test(®->hr_steady_iterations)) |
855 | wake_up(&o2hb_steady_queue); | 874 | wake_up(&o2hb_steady_queue); |
856 | } | 875 | } |
876 | |||
877 | return 0; | ||
857 | } | 878 | } |
858 | 879 | ||
859 | /* Subtract b from a, storing the result in a. a *must* have a larger | 880 | /* Subtract b from a, storing the result in a. a *must* have a larger |
@@ -913,7 +934,10 @@ static int o2hb_thread(void *data) | |||
913 | * likely to time itself out. */ | 934 | * likely to time itself out. */ |
914 | do_gettimeofday(&before_hb); | 935 | do_gettimeofday(&before_hb); |
915 | 936 | ||
916 | o2hb_do_disk_heartbeat(reg); | 937 | i = 0; |
938 | do { | ||
939 | ret = o2hb_do_disk_heartbeat(reg); | ||
940 | } while (ret && ++i < 2); | ||
917 | 941 | ||
918 | do_gettimeofday(&after_hb); | 942 | do_gettimeofday(&after_hb); |
919 | elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb); | 943 | elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb); |
diff --git a/fs/ocfs2/dlm/userdlm.c b/fs/ocfs2/dlm/userdlm.c index c3764f4744ee..74ca4e5f9765 100644 --- a/fs/ocfs2/dlm/userdlm.c +++ b/fs/ocfs2/dlm/userdlm.c | |||
@@ -139,6 +139,10 @@ static void user_ast(void *opaque) | |||
139 | return; | 139 | return; |
140 | } | 140 | } |
141 | 141 | ||
142 | mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE, | ||
143 | "Lockres %s, requested ivmode. flags 0x%x\n", | ||
144 | lockres->l_name, lockres->l_flags); | ||
145 | |||
142 | /* we're downconverting. */ | 146 | /* we're downconverting. */ |
143 | if (lockres->l_requested < lockres->l_level) { | 147 | if (lockres->l_requested < lockres->l_level) { |
144 | if (lockres->l_requested <= | 148 | if (lockres->l_requested <= |
@@ -229,23 +233,42 @@ static void user_unlock_ast(void *opaque, enum dlm_status status) | |||
229 | 233 | ||
230 | mlog(0, "UNLOCK AST called on lock %s\n", lockres->l_name); | 234 | mlog(0, "UNLOCK AST called on lock %s\n", lockres->l_name); |
231 | 235 | ||
232 | if (status != DLM_NORMAL) | 236 | if (status != DLM_NORMAL && status != DLM_CANCELGRANT) |
233 | mlog(ML_ERROR, "Dlm returns status %d\n", status); | 237 | mlog(ML_ERROR, "Dlm returns status %d\n", status); |
234 | 238 | ||
235 | spin_lock(&lockres->l_lock); | 239 | spin_lock(&lockres->l_lock); |
236 | if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) | 240 | /* The teardown flag gets set early during the unlock process, |
241 | * so test the cancel flag to make sure that this ast isn't | ||
242 | * for a concurrent cancel. */ | ||
243 | if (lockres->l_flags & USER_LOCK_IN_TEARDOWN | ||
244 | && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) { | ||
237 | lockres->l_level = LKM_IVMODE; | 245 | lockres->l_level = LKM_IVMODE; |
238 | else { | 246 | } else if (status == DLM_CANCELGRANT) { |
247 | mlog(0, "Lock %s, cancel fails, flags 0x%x\n", | ||
248 | lockres->l_name, lockres->l_flags); | ||
249 | /* We tried to cancel a convert request, but it was | ||
250 | * already granted. Don't clear the busy flag - the | ||
251 | * ast should've done this already. */ | ||
252 | BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL)); | ||
253 | lockres->l_flags &= ~USER_LOCK_IN_CANCEL; | ||
254 | goto out_noclear; | ||
255 | } else { | ||
256 | BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL)); | ||
257 | /* Cancel succeeded, we want to re-queue */ | ||
258 | mlog(0, "Lock %s, cancel succeeds, flags 0x%x\n", | ||
259 | lockres->l_name, lockres->l_flags); | ||
239 | lockres->l_requested = LKM_IVMODE; /* cancel an | 260 | lockres->l_requested = LKM_IVMODE; /* cancel an |
240 | * upconvert | 261 | * upconvert |
241 | * request. */ | 262 | * request. */ |
242 | lockres->l_flags &= ~USER_LOCK_IN_CANCEL; | 263 | lockres->l_flags &= ~USER_LOCK_IN_CANCEL; |
243 | /* we want the unblock thread to look at it again | 264 | /* we want the unblock thread to look at it again |
244 | * now. */ | 265 | * now. */ |
245 | __user_dlm_queue_lockres(lockres); | 266 | if (lockres->l_flags & USER_LOCK_BLOCKED) |
267 | __user_dlm_queue_lockres(lockres); | ||
246 | } | 268 | } |
247 | 269 | ||
248 | lockres->l_flags &= ~USER_LOCK_BUSY; | 270 | lockres->l_flags &= ~USER_LOCK_BUSY; |
271 | out_noclear: | ||
249 | spin_unlock(&lockres->l_lock); | 272 | spin_unlock(&lockres->l_lock); |
250 | 273 | ||
251 | wake_up(&lockres->l_event); | 274 | wake_up(&lockres->l_event); |
@@ -268,13 +291,26 @@ static void user_dlm_unblock_lock(void *opaque) | |||
268 | 291 | ||
269 | spin_lock(&lockres->l_lock); | 292 | spin_lock(&lockres->l_lock); |
270 | 293 | ||
271 | BUG_ON(!(lockres->l_flags & USER_LOCK_BLOCKED)); | 294 | mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED), |
272 | BUG_ON(!(lockres->l_flags & USER_LOCK_QUEUED)); | 295 | "Lockres %s, flags 0x%x\n", |
296 | lockres->l_name, lockres->l_flags); | ||
273 | 297 | ||
274 | /* notice that we don't clear USER_LOCK_BLOCKED here. That's | 298 | /* notice that we don't clear USER_LOCK_BLOCKED here. If it's |
275 | * for user_ast to do. */ | 299 | * set, we want user_ast clear it. */ |
276 | lockres->l_flags &= ~USER_LOCK_QUEUED; | 300 | lockres->l_flags &= ~USER_LOCK_QUEUED; |
277 | 301 | ||
302 | /* It's valid to get here and no longer be blocked - if we get | ||
303 | * several basts in a row, we might be queued by the first | ||
304 | * one, the unblock thread might run and clear the queued | ||
305 | * flag, and finally we might get another bast which re-queues | ||
306 | * us before our ast for the downconvert is called. */ | ||
307 | if (!(lockres->l_flags & USER_LOCK_BLOCKED)) { | ||
308 | mlog(0, "Lockres %s, flags 0x%x: queued but not blocking\n", | ||
309 | lockres->l_name, lockres->l_flags); | ||
310 | spin_unlock(&lockres->l_lock); | ||
311 | goto drop_ref; | ||
312 | } | ||
313 | |||
278 | if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { | 314 | if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { |
279 | mlog(0, "lock is in teardown so we do nothing\n"); | 315 | mlog(0, "lock is in teardown so we do nothing\n"); |
280 | spin_unlock(&lockres->l_lock); | 316 | spin_unlock(&lockres->l_lock); |
@@ -282,7 +318,9 @@ static void user_dlm_unblock_lock(void *opaque) | |||
282 | } | 318 | } |
283 | 319 | ||
284 | if (lockres->l_flags & USER_LOCK_BUSY) { | 320 | if (lockres->l_flags & USER_LOCK_BUSY) { |
285 | mlog(0, "BUSY flag detected...\n"); | 321 | mlog(0, "Cancel lock %s, flags 0x%x\n", |
322 | lockres->l_name, lockres->l_flags); | ||
323 | |||
286 | if (lockres->l_flags & USER_LOCK_IN_CANCEL) { | 324 | if (lockres->l_flags & USER_LOCK_IN_CANCEL) { |
287 | spin_unlock(&lockres->l_lock); | 325 | spin_unlock(&lockres->l_lock); |
288 | goto drop_ref; | 326 | goto drop_ref; |
@@ -296,14 +334,7 @@ static void user_dlm_unblock_lock(void *opaque) | |||
296 | LKM_CANCEL, | 334 | LKM_CANCEL, |
297 | user_unlock_ast, | 335 | user_unlock_ast, |
298 | lockres); | 336 | lockres); |
299 | if (status == DLM_CANCELGRANT) { | 337 | if (status != DLM_NORMAL) |
300 | /* If we got this, then the ast was fired | ||
301 | * before we could cancel. We cleanup our | ||
302 | * state, and restart the function. */ | ||
303 | spin_lock(&lockres->l_lock); | ||
304 | lockres->l_flags &= ~USER_LOCK_IN_CANCEL; | ||
305 | spin_unlock(&lockres->l_lock); | ||
306 | } else if (status != DLM_NORMAL) | ||
307 | user_log_dlm_error("dlmunlock", status, lockres); | 338 | user_log_dlm_error("dlmunlock", status, lockres); |
308 | goto drop_ref; | 339 | goto drop_ref; |
309 | } | 340 | } |
@@ -581,6 +612,14 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres) | |||
581 | mlog(0, "asked to destroy %s\n", lockres->l_name); | 612 | mlog(0, "asked to destroy %s\n", lockres->l_name); |
582 | 613 | ||
583 | spin_lock(&lockres->l_lock); | 614 | spin_lock(&lockres->l_lock); |
615 | if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { | ||
616 | mlog(0, "Lock is already torn down\n"); | ||
617 | spin_unlock(&lockres->l_lock); | ||
618 | return 0; | ||
619 | } | ||
620 | |||
621 | lockres->l_flags |= USER_LOCK_IN_TEARDOWN; | ||
622 | |||
584 | while (lockres->l_flags & USER_LOCK_BUSY) { | 623 | while (lockres->l_flags & USER_LOCK_BUSY) { |
585 | spin_unlock(&lockres->l_lock); | 624 | spin_unlock(&lockres->l_lock); |
586 | 625 | ||
@@ -606,7 +645,6 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres) | |||
606 | 645 | ||
607 | lockres->l_flags &= ~USER_LOCK_ATTACHED; | 646 | lockres->l_flags &= ~USER_LOCK_ATTACHED; |
608 | lockres->l_flags |= USER_LOCK_BUSY; | 647 | lockres->l_flags |= USER_LOCK_BUSY; |
609 | lockres->l_flags |= USER_LOCK_IN_TEARDOWN; | ||
610 | spin_unlock(&lockres->l_lock); | 648 | spin_unlock(&lockres->l_lock); |
611 | 649 | ||
612 | mlog(0, "unlocking lockres %s\n", lockres->l_name); | 650 | mlog(0, "unlocking lockres %s\n", lockres->l_name); |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 34e903a6a46b..581eb451a41a 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -260,6 +260,17 @@ static int ocfs2_truncate_file(struct inode *inode, | |||
260 | if (new_i_size == le64_to_cpu(fe->i_size)) | 260 | if (new_i_size == le64_to_cpu(fe->i_size)) |
261 | goto bail; | 261 | goto bail; |
262 | 262 | ||
263 | /* This forces other nodes to sync and drop their pages. Do | ||
264 | * this even if we have a truncate without allocation change - | ||
265 | * ocfs2 cluster sizes can be much greater than page size, so | ||
266 | * we have to truncate them anyway. */ | ||
267 | status = ocfs2_data_lock(inode, 1); | ||
268 | if (status < 0) { | ||
269 | mlog_errno(status); | ||
270 | goto bail; | ||
271 | } | ||
272 | ocfs2_data_unlock(inode, 1); | ||
273 | |||
263 | if (le32_to_cpu(fe->i_clusters) == | 274 | if (le32_to_cpu(fe->i_clusters) == |
264 | ocfs2_clusters_for_bytes(osb->sb, new_i_size)) { | 275 | ocfs2_clusters_for_bytes(osb->sb, new_i_size)) { |
265 | mlog(0, "fe->i_clusters = %u, so we do a simple truncate\n", | 276 | mlog(0, "fe->i_clusters = %u, so we do a simple truncate\n", |
@@ -272,14 +283,6 @@ static int ocfs2_truncate_file(struct inode *inode, | |||
272 | goto bail; | 283 | goto bail; |
273 | } | 284 | } |
274 | 285 | ||
275 | /* This forces other nodes to sync and drop their pages */ | ||
276 | status = ocfs2_data_lock(inode, 1); | ||
277 | if (status < 0) { | ||
278 | mlog_errno(status); | ||
279 | goto bail; | ||
280 | } | ||
281 | ocfs2_data_unlock(inode, 1); | ||
282 | |||
283 | /* alright, we're going to need to do a full blown alloc size | 286 | /* alright, we're going to need to do a full blown alloc size |
284 | * change. Orphan the inode so that recovery can complete the | 287 | * change. Orphan the inode so that recovery can complete the |
285 | * truncate if necessary. This does the task of marking | 288 | * truncate if necessary. This does the task of marking |