aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2006-04-10 19:44:09 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-04-10 19:44:09 -0400
commite38d557896c4213dd0919770feac0f4a8f60151b (patch)
tree6f35cc88a0591a2195ce3e4a849d34c367288903
parentde12a7878c11f3b282d640888aa635e0711d0b5e (diff)
parent65714b918415e06c92426f6544b2296dae694590 (diff)
Merge branch 'upstream-linus' of git://oss.oracle.com/home/sourcebo/git/ocfs2
* 'upstream-linus' of git://oss.oracle.com/home/sourcebo/git/ocfs2: [PATCH] CONFIGFS_FS must depend on SYSFS [PATCH] Bogus NULL pointer check in fs/configfs/dir.c ocfs2: Better I/O error handling in heartbeat ocfs2: test and set teardown flag early in user_dlm_destroy_lock() ocfs2: Handle the DLM_CANCELGRANT case in user_unlock_ast() ocfs2: catch an invalid ast case in dlmfs ocfs2: remove an overly aggressive BUG() in dlmfs ocfs2: multi node truncate fix
-rw-r--r--fs/Kconfig2
-rw-r--r--fs/configfs/dir.c2
-rw-r--r--fs/ocfs2/cluster/heartbeat.c40
-rw-r--r--fs/ocfs2/dlm/userdlm.c74
-rw-r--r--fs/ocfs2/file.c19
5 files changed, 101 insertions, 36 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index e207be68d4ca..97f317413122 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -861,7 +861,7 @@ config RAMFS
861 861
862config CONFIGFS_FS 862config CONFIGFS_FS
863 tristate "Userspace-driven configuration filesystem (EXPERIMENTAL)" 863 tristate "Userspace-driven configuration filesystem (EXPERIMENTAL)"
864 depends on EXPERIMENTAL 864 depends on SYSFS && EXPERIMENTAL
865 help 865 help
866 configfs is a ram-based filesystem that provides the converse 866 configfs is a ram-based filesystem that provides the converse
867 of sysfs's functionality. Where sysfs is a filesystem-based 867 of sysfs's functionality. Where sysfs is a filesystem-based
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 8ed9b06a9828..5638c8f9362f 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -504,7 +504,7 @@ static int populate_groups(struct config_group *group)
504 int ret = 0; 504 int ret = 0;
505 int i; 505 int i;
506 506
507 if (group && group->default_groups) { 507 if (group->default_groups) {
508 /* FYI, we're faking mkdir here 508 /* FYI, we're faking mkdir here
509 * I'm not sure we need this semaphore, as we're called 509 * I'm not sure we need this semaphore, as we're called
510 * from our parent's mkdir. That holds our parent's 510 * from our parent's mkdir. That holds our parent's
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index bff0f0d06867..21f38accd039 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -153,6 +153,7 @@ struct o2hb_region {
153struct o2hb_bio_wait_ctxt { 153struct o2hb_bio_wait_ctxt {
154 atomic_t wc_num_reqs; 154 atomic_t wc_num_reqs;
155 struct completion wc_io_complete; 155 struct completion wc_io_complete;
156 int wc_error;
156}; 157};
157 158
158static void o2hb_write_timeout(void *arg) 159static void o2hb_write_timeout(void *arg)
@@ -186,6 +187,7 @@ static inline void o2hb_bio_wait_init(struct o2hb_bio_wait_ctxt *wc,
186{ 187{
187 atomic_set(&wc->wc_num_reqs, num_ios); 188 atomic_set(&wc->wc_num_reqs, num_ios);
188 init_completion(&wc->wc_io_complete); 189 init_completion(&wc->wc_io_complete);
190 wc->wc_error = 0;
189} 191}
190 192
191/* Used in error paths too */ 193/* Used in error paths too */
@@ -218,8 +220,10 @@ static int o2hb_bio_end_io(struct bio *bio,
218{ 220{
219 struct o2hb_bio_wait_ctxt *wc = bio->bi_private; 221 struct o2hb_bio_wait_ctxt *wc = bio->bi_private;
220 222
221 if (error) 223 if (error) {
222 mlog(ML_ERROR, "IO Error %d\n", error); 224 mlog(ML_ERROR, "IO Error %d\n", error);
225 wc->wc_error = error;
226 }
223 227
224 if (bio->bi_size) 228 if (bio->bi_size)
225 return 1; 229 return 1;
@@ -390,6 +394,8 @@ static int o2hb_read_slots(struct o2hb_region *reg,
390 394
391bail_and_wait: 395bail_and_wait:
392 o2hb_wait_on_io(reg, &wc); 396 o2hb_wait_on_io(reg, &wc);
397 if (wc.wc_error && !status)
398 status = wc.wc_error;
393 399
394 if (bios) { 400 if (bios) {
395 for(i = 0; i < num_bios; i++) 401 for(i = 0; i < num_bios; i++)
@@ -790,20 +796,24 @@ static int o2hb_highest_node(unsigned long *nodes,
790 return highest; 796 return highest;
791} 797}
792 798
793static void o2hb_do_disk_heartbeat(struct o2hb_region *reg) 799static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
794{ 800{
795 int i, ret, highest_node, change = 0; 801 int i, ret, highest_node, change = 0;
796 unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; 802 unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)];
797 struct bio *write_bio; 803 struct bio *write_bio;
798 struct o2hb_bio_wait_ctxt write_wc; 804 struct o2hb_bio_wait_ctxt write_wc;
799 805
800 if (o2nm_configured_node_map(configured_nodes, sizeof(configured_nodes))) 806 ret = o2nm_configured_node_map(configured_nodes,
801 return; 807 sizeof(configured_nodes));
808 if (ret) {
809 mlog_errno(ret);
810 return ret;
811 }
802 812
803 highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES); 813 highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES);
804 if (highest_node >= O2NM_MAX_NODES) { 814 if (highest_node >= O2NM_MAX_NODES) {
805 mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n"); 815 mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n");
806 return; 816 return -EINVAL;
807 } 817 }
808 818
809 /* No sense in reading the slots of nodes that don't exist 819 /* No sense in reading the slots of nodes that don't exist
@@ -813,7 +823,7 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
813 ret = o2hb_read_slots(reg, highest_node + 1); 823 ret = o2hb_read_slots(reg, highest_node + 1);
814 if (ret < 0) { 824 if (ret < 0) {
815 mlog_errno(ret); 825 mlog_errno(ret);
816 return; 826 return ret;
817 } 827 }
818 828
819 /* With an up to date view of the slots, we can check that no 829 /* With an up to date view of the slots, we can check that no
@@ -831,7 +841,7 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
831 ret = o2hb_issue_node_write(reg, &write_bio, &write_wc); 841 ret = o2hb_issue_node_write(reg, &write_bio, &write_wc);
832 if (ret < 0) { 842 if (ret < 0) {
833 mlog_errno(ret); 843 mlog_errno(ret);
834 return; 844 return ret;
835 } 845 }
836 846
837 i = -1; 847 i = -1;
@@ -847,6 +857,15 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
847 */ 857 */
848 o2hb_wait_on_io(reg, &write_wc); 858 o2hb_wait_on_io(reg, &write_wc);
849 bio_put(write_bio); 859 bio_put(write_bio);
860 if (write_wc.wc_error) {
861 /* Do not re-arm the write timeout on I/O error - we
862 * can't be sure that the new block ever made it to
863 * disk */
864 mlog(ML_ERROR, "Write error %d on device \"%s\"\n",
865 write_wc.wc_error, reg->hr_dev_name);
866 return write_wc.wc_error;
867 }
868
850 o2hb_arm_write_timeout(reg); 869 o2hb_arm_write_timeout(reg);
851 870
852 /* let the person who launched us know when things are steady */ 871 /* let the person who launched us know when things are steady */
@@ -854,6 +873,8 @@ static void o2hb_do_disk_heartbeat(struct o2hb_region *reg)
854 if (atomic_dec_and_test(&reg->hr_steady_iterations)) 873 if (atomic_dec_and_test(&reg->hr_steady_iterations))
855 wake_up(&o2hb_steady_queue); 874 wake_up(&o2hb_steady_queue);
856 } 875 }
876
877 return 0;
857} 878}
858 879
859/* Subtract b from a, storing the result in a. a *must* have a larger 880/* Subtract b from a, storing the result in a. a *must* have a larger
@@ -913,7 +934,10 @@ static int o2hb_thread(void *data)
913 * likely to time itself out. */ 934 * likely to time itself out. */
914 do_gettimeofday(&before_hb); 935 do_gettimeofday(&before_hb);
915 936
916 o2hb_do_disk_heartbeat(reg); 937 i = 0;
938 do {
939 ret = o2hb_do_disk_heartbeat(reg);
940 } while (ret && ++i < 2);
917 941
918 do_gettimeofday(&after_hb); 942 do_gettimeofday(&after_hb);
919 elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb); 943 elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb);
diff --git a/fs/ocfs2/dlm/userdlm.c b/fs/ocfs2/dlm/userdlm.c
index c3764f4744ee..74ca4e5f9765 100644
--- a/fs/ocfs2/dlm/userdlm.c
+++ b/fs/ocfs2/dlm/userdlm.c
@@ -139,6 +139,10 @@ static void user_ast(void *opaque)
139 return; 139 return;
140 } 140 }
141 141
142 mlog_bug_on_msg(lockres->l_requested == LKM_IVMODE,
143 "Lockres %s, requested ivmode. flags 0x%x\n",
144 lockres->l_name, lockres->l_flags);
145
142 /* we're downconverting. */ 146 /* we're downconverting. */
143 if (lockres->l_requested < lockres->l_level) { 147 if (lockres->l_requested < lockres->l_level) {
144 if (lockres->l_requested <= 148 if (lockres->l_requested <=
@@ -229,23 +233,42 @@ static void user_unlock_ast(void *opaque, enum dlm_status status)
229 233
230 mlog(0, "UNLOCK AST called on lock %s\n", lockres->l_name); 234 mlog(0, "UNLOCK AST called on lock %s\n", lockres->l_name);
231 235
232 if (status != DLM_NORMAL) 236 if (status != DLM_NORMAL && status != DLM_CANCELGRANT)
233 mlog(ML_ERROR, "Dlm returns status %d\n", status); 237 mlog(ML_ERROR, "Dlm returns status %d\n", status);
234 238
235 spin_lock(&lockres->l_lock); 239 spin_lock(&lockres->l_lock);
236 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) 240 /* The teardown flag gets set early during the unlock process,
241 * so test the cancel flag to make sure that this ast isn't
242 * for a concurrent cancel. */
243 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN
244 && !(lockres->l_flags & USER_LOCK_IN_CANCEL)) {
237 lockres->l_level = LKM_IVMODE; 245 lockres->l_level = LKM_IVMODE;
238 else { 246 } else if (status == DLM_CANCELGRANT) {
247 mlog(0, "Lock %s, cancel fails, flags 0x%x\n",
248 lockres->l_name, lockres->l_flags);
249 /* We tried to cancel a convert request, but it was
250 * already granted. Don't clear the busy flag - the
251 * ast should've done this already. */
252 BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
253 lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
254 goto out_noclear;
255 } else {
256 BUG_ON(!(lockres->l_flags & USER_LOCK_IN_CANCEL));
257 /* Cancel succeeded, we want to re-queue */
258 mlog(0, "Lock %s, cancel succeeds, flags 0x%x\n",
259 lockres->l_name, lockres->l_flags);
239 lockres->l_requested = LKM_IVMODE; /* cancel an 260 lockres->l_requested = LKM_IVMODE; /* cancel an
240 * upconvert 261 * upconvert
241 * request. */ 262 * request. */
242 lockres->l_flags &= ~USER_LOCK_IN_CANCEL; 263 lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
243 /* we want the unblock thread to look at it again 264 /* we want the unblock thread to look at it again
244 * now. */ 265 * now. */
245 __user_dlm_queue_lockres(lockres); 266 if (lockres->l_flags & USER_LOCK_BLOCKED)
267 __user_dlm_queue_lockres(lockres);
246 } 268 }
247 269
248 lockres->l_flags &= ~USER_LOCK_BUSY; 270 lockres->l_flags &= ~USER_LOCK_BUSY;
271out_noclear:
249 spin_unlock(&lockres->l_lock); 272 spin_unlock(&lockres->l_lock);
250 273
251 wake_up(&lockres->l_event); 274 wake_up(&lockres->l_event);
@@ -268,13 +291,26 @@ static void user_dlm_unblock_lock(void *opaque)
268 291
269 spin_lock(&lockres->l_lock); 292 spin_lock(&lockres->l_lock);
270 293
271 BUG_ON(!(lockres->l_flags & USER_LOCK_BLOCKED)); 294 mlog_bug_on_msg(!(lockres->l_flags & USER_LOCK_QUEUED),
272 BUG_ON(!(lockres->l_flags & USER_LOCK_QUEUED)); 295 "Lockres %s, flags 0x%x\n",
296 lockres->l_name, lockres->l_flags);
273 297
274 /* notice that we don't clear USER_LOCK_BLOCKED here. That's 298 /* notice that we don't clear USER_LOCK_BLOCKED here. If it's
275 * for user_ast to do. */ 299 * set, we want user_ast clear it. */
276 lockres->l_flags &= ~USER_LOCK_QUEUED; 300 lockres->l_flags &= ~USER_LOCK_QUEUED;
277 301
302 /* It's valid to get here and no longer be blocked - if we get
303 * several basts in a row, we might be queued by the first
304 * one, the unblock thread might run and clear the queued
305 * flag, and finally we might get another bast which re-queues
306 * us before our ast for the downconvert is called. */
307 if (!(lockres->l_flags & USER_LOCK_BLOCKED)) {
308 mlog(0, "Lockres %s, flags 0x%x: queued but not blocking\n",
309 lockres->l_name, lockres->l_flags);
310 spin_unlock(&lockres->l_lock);
311 goto drop_ref;
312 }
313
278 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) { 314 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
279 mlog(0, "lock is in teardown so we do nothing\n"); 315 mlog(0, "lock is in teardown so we do nothing\n");
280 spin_unlock(&lockres->l_lock); 316 spin_unlock(&lockres->l_lock);
@@ -282,7 +318,9 @@ static void user_dlm_unblock_lock(void *opaque)
282 } 318 }
283 319
284 if (lockres->l_flags & USER_LOCK_BUSY) { 320 if (lockres->l_flags & USER_LOCK_BUSY) {
285 mlog(0, "BUSY flag detected...\n"); 321 mlog(0, "Cancel lock %s, flags 0x%x\n",
322 lockres->l_name, lockres->l_flags);
323
286 if (lockres->l_flags & USER_LOCK_IN_CANCEL) { 324 if (lockres->l_flags & USER_LOCK_IN_CANCEL) {
287 spin_unlock(&lockres->l_lock); 325 spin_unlock(&lockres->l_lock);
288 goto drop_ref; 326 goto drop_ref;
@@ -296,14 +334,7 @@ static void user_dlm_unblock_lock(void *opaque)
296 LKM_CANCEL, 334 LKM_CANCEL,
297 user_unlock_ast, 335 user_unlock_ast,
298 lockres); 336 lockres);
299 if (status == DLM_CANCELGRANT) { 337 if (status != DLM_NORMAL)
300 /* If we got this, then the ast was fired
301 * before we could cancel. We cleanup our
302 * state, and restart the function. */
303 spin_lock(&lockres->l_lock);
304 lockres->l_flags &= ~USER_LOCK_IN_CANCEL;
305 spin_unlock(&lockres->l_lock);
306 } else if (status != DLM_NORMAL)
307 user_log_dlm_error("dlmunlock", status, lockres); 338 user_log_dlm_error("dlmunlock", status, lockres);
308 goto drop_ref; 339 goto drop_ref;
309 } 340 }
@@ -581,6 +612,14 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
581 mlog(0, "asked to destroy %s\n", lockres->l_name); 612 mlog(0, "asked to destroy %s\n", lockres->l_name);
582 613
583 spin_lock(&lockres->l_lock); 614 spin_lock(&lockres->l_lock);
615 if (lockres->l_flags & USER_LOCK_IN_TEARDOWN) {
616 mlog(0, "Lock is already torn down\n");
617 spin_unlock(&lockres->l_lock);
618 return 0;
619 }
620
621 lockres->l_flags |= USER_LOCK_IN_TEARDOWN;
622
584 while (lockres->l_flags & USER_LOCK_BUSY) { 623 while (lockres->l_flags & USER_LOCK_BUSY) {
585 spin_unlock(&lockres->l_lock); 624 spin_unlock(&lockres->l_lock);
586 625
@@ -606,7 +645,6 @@ int user_dlm_destroy_lock(struct user_lock_res *lockres)
606 645
607 lockres->l_flags &= ~USER_LOCK_ATTACHED; 646 lockres->l_flags &= ~USER_LOCK_ATTACHED;
608 lockres->l_flags |= USER_LOCK_BUSY; 647 lockres->l_flags |= USER_LOCK_BUSY;
609 lockres->l_flags |= USER_LOCK_IN_TEARDOWN;
610 spin_unlock(&lockres->l_lock); 648 spin_unlock(&lockres->l_lock);
611 649
612 mlog(0, "unlocking lockres %s\n", lockres->l_name); 650 mlog(0, "unlocking lockres %s\n", lockres->l_name);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 34e903a6a46b..581eb451a41a 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -260,6 +260,17 @@ static int ocfs2_truncate_file(struct inode *inode,
260 if (new_i_size == le64_to_cpu(fe->i_size)) 260 if (new_i_size == le64_to_cpu(fe->i_size))
261 goto bail; 261 goto bail;
262 262
263 /* This forces other nodes to sync and drop their pages. Do
264 * this even if we have a truncate without allocation change -
265 * ocfs2 cluster sizes can be much greater than page size, so
266 * we have to truncate them anyway. */
267 status = ocfs2_data_lock(inode, 1);
268 if (status < 0) {
269 mlog_errno(status);
270 goto bail;
271 }
272 ocfs2_data_unlock(inode, 1);
273
263 if (le32_to_cpu(fe->i_clusters) == 274 if (le32_to_cpu(fe->i_clusters) ==
264 ocfs2_clusters_for_bytes(osb->sb, new_i_size)) { 275 ocfs2_clusters_for_bytes(osb->sb, new_i_size)) {
265 mlog(0, "fe->i_clusters = %u, so we do a simple truncate\n", 276 mlog(0, "fe->i_clusters = %u, so we do a simple truncate\n",
@@ -272,14 +283,6 @@ static int ocfs2_truncate_file(struct inode *inode,
272 goto bail; 283 goto bail;
273 } 284 }
274 285
275 /* This forces other nodes to sync and drop their pages */
276 status = ocfs2_data_lock(inode, 1);
277 if (status < 0) {
278 mlog_errno(status);
279 goto bail;
280 }
281 ocfs2_data_unlock(inode, 1);
282
283 /* alright, we're going to need to do a full blown alloc size 286 /* alright, we're going to need to do a full blown alloc size
284 * change. Orphan the inode so that recovery can complete the 287 * change. Orphan the inode so that recovery can complete the
285 * truncate if necessary. This does the task of marking 288 * truncate if necessary. This does the task of marking