diff options
Diffstat (limited to 'fs/ocfs2/dlmglue.c')
-rw-r--r-- | fs/ocfs2/dlmglue.c | 571 |
1 files changed, 413 insertions, 158 deletions
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 41c76ff2fcfb..3867244fb144 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -55,7 +55,6 @@ | |||
55 | #include "slot_map.h" | 55 | #include "slot_map.h" |
56 | #include "super.h" | 56 | #include "super.h" |
57 | #include "uptodate.h" | 57 | #include "uptodate.h" |
58 | #include "vote.h" | ||
59 | 58 | ||
60 | #include "buffer_head_io.h" | 59 | #include "buffer_head_io.h" |
61 | 60 | ||
@@ -69,6 +68,7 @@ struct ocfs2_mask_waiter { | |||
69 | 68 | ||
70 | static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); | 69 | static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); |
71 | static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); | 70 | static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); |
71 | static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres); | ||
72 | 72 | ||
73 | /* | 73 | /* |
74 | * Return value from ->downconvert_worker functions. | 74 | * Return value from ->downconvert_worker functions. |
@@ -153,10 +153,10 @@ struct ocfs2_lock_res_ops { | |||
153 | struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); | 153 | struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); |
154 | 154 | ||
155 | /* | 155 | /* |
156 | * Optionally called in the downconvert (or "vote") thread | 156 | * Optionally called in the downconvert thread after a |
157 | * after a successful downconvert. The lockres will not be | 157 | * successful downconvert. The lockres will not be referenced |
158 | * referenced after this callback is called, so it is safe to | 158 | * after this callback is called, so it is safe to free |
159 | * free memory, etc. | 159 | * memory, etc. |
160 | * | 160 | * |
161 | * The exact semantics of when this is called are controlled | 161 | * The exact semantics of when this is called are controlled |
162 | * by ->downconvert_worker() | 162 | * by ->downconvert_worker() |
@@ -225,17 +225,12 @@ static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { | |||
225 | .flags = 0, | 225 | .flags = 0, |
226 | }; | 226 | }; |
227 | 227 | ||
228 | static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = { | 228 | static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { |
229 | .get_osb = ocfs2_get_inode_osb, | 229 | .get_osb = ocfs2_get_inode_osb, |
230 | .check_downconvert = ocfs2_check_meta_downconvert, | 230 | .check_downconvert = ocfs2_check_meta_downconvert, |
231 | .set_lvb = ocfs2_set_meta_lvb, | 231 | .set_lvb = ocfs2_set_meta_lvb, |
232 | .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, | ||
233 | }; | ||
234 | |||
235 | static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = { | ||
236 | .get_osb = ocfs2_get_inode_osb, | ||
237 | .downconvert_worker = ocfs2_data_convert_worker, | 232 | .downconvert_worker = ocfs2_data_convert_worker, |
238 | .flags = 0, | 233 | .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, |
239 | }; | 234 | }; |
240 | 235 | ||
241 | static struct ocfs2_lock_res_ops ocfs2_super_lops = { | 236 | static struct ocfs2_lock_res_ops ocfs2_super_lops = { |
@@ -258,10 +253,14 @@ static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { | |||
258 | .flags = 0, | 253 | .flags = 0, |
259 | }; | 254 | }; |
260 | 255 | ||
256 | static struct ocfs2_lock_res_ops ocfs2_flock_lops = { | ||
257 | .get_osb = ocfs2_get_file_osb, | ||
258 | .flags = 0, | ||
259 | }; | ||
260 | |||
261 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) | 261 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) |
262 | { | 262 | { |
263 | return lockres->l_type == OCFS2_LOCK_TYPE_META || | 263 | return lockres->l_type == OCFS2_LOCK_TYPE_META || |
264 | lockres->l_type == OCFS2_LOCK_TYPE_DATA || | ||
265 | lockres->l_type == OCFS2_LOCK_TYPE_RW || | 264 | lockres->l_type == OCFS2_LOCK_TYPE_RW || |
266 | lockres->l_type == OCFS2_LOCK_TYPE_OPEN; | 265 | lockres->l_type == OCFS2_LOCK_TYPE_OPEN; |
267 | } | 266 | } |
@@ -310,12 +309,24 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, | |||
310 | "resource %s: %s\n", dlm_errname(_stat), _func, \ | 309 | "resource %s: %s\n", dlm_errname(_stat), _func, \ |
311 | _lockres->l_name, dlm_errmsg(_stat)); \ | 310 | _lockres->l_name, dlm_errmsg(_stat)); \ |
312 | } while (0) | 311 | } while (0) |
313 | static void ocfs2_vote_on_unlock(struct ocfs2_super *osb, | 312 | static int ocfs2_downconvert_thread(void *arg); |
314 | struct ocfs2_lock_res *lockres); | 313 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, |
315 | static int ocfs2_meta_lock_update(struct inode *inode, | 314 | struct ocfs2_lock_res *lockres); |
315 | static int ocfs2_inode_lock_update(struct inode *inode, | ||
316 | struct buffer_head **bh); | 316 | struct buffer_head **bh); |
317 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); | 317 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); |
318 | static inline int ocfs2_highest_compat_lock_level(int level); | 318 | static inline int ocfs2_highest_compat_lock_level(int level); |
319 | static void ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, | ||
320 | int new_level); | ||
321 | static int ocfs2_downconvert_lock(struct ocfs2_super *osb, | ||
322 | struct ocfs2_lock_res *lockres, | ||
323 | int new_level, | ||
324 | int lvb); | ||
325 | static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, | ||
326 | struct ocfs2_lock_res *lockres); | ||
327 | static int ocfs2_cancel_convert(struct ocfs2_super *osb, | ||
328 | struct ocfs2_lock_res *lockres); | ||
329 | |||
319 | 330 | ||
320 | static void ocfs2_build_lock_name(enum ocfs2_lock_type type, | 331 | static void ocfs2_build_lock_name(enum ocfs2_lock_type type, |
321 | u64 blkno, | 332 | u64 blkno, |
@@ -402,10 +413,7 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, | |||
402 | ops = &ocfs2_inode_rw_lops; | 413 | ops = &ocfs2_inode_rw_lops; |
403 | break; | 414 | break; |
404 | case OCFS2_LOCK_TYPE_META: | 415 | case OCFS2_LOCK_TYPE_META: |
405 | ops = &ocfs2_inode_meta_lops; | 416 | ops = &ocfs2_inode_inode_lops; |
406 | break; | ||
407 | case OCFS2_LOCK_TYPE_DATA: | ||
408 | ops = &ocfs2_inode_data_lops; | ||
409 | break; | 417 | break; |
410 | case OCFS2_LOCK_TYPE_OPEN: | 418 | case OCFS2_LOCK_TYPE_OPEN: |
411 | ops = &ocfs2_inode_open_lops; | 419 | ops = &ocfs2_inode_open_lops; |
@@ -428,6 +436,13 @@ static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) | |||
428 | return OCFS2_SB(inode->i_sb); | 436 | return OCFS2_SB(inode->i_sb); |
429 | } | 437 | } |
430 | 438 | ||
439 | static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres) | ||
440 | { | ||
441 | struct ocfs2_file_private *fp = lockres->l_priv; | ||
442 | |||
443 | return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb); | ||
444 | } | ||
445 | |||
431 | static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) | 446 | static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) |
432 | { | 447 | { |
433 | __be64 inode_blkno_be; | 448 | __be64 inode_blkno_be; |
@@ -508,6 +523,21 @@ static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, | |||
508 | &ocfs2_rename_lops, osb); | 523 | &ocfs2_rename_lops, osb); |
509 | } | 524 | } |
510 | 525 | ||
526 | void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, | ||
527 | struct ocfs2_file_private *fp) | ||
528 | { | ||
529 | struct inode *inode = fp->fp_file->f_mapping->host; | ||
530 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
531 | |||
532 | ocfs2_lock_res_init_once(lockres); | ||
533 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno, | ||
534 | inode->i_generation, lockres->l_name); | ||
535 | ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, | ||
536 | OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops, | ||
537 | fp); | ||
538 | lockres->l_flags |= OCFS2_LOCK_NOCACHE; | ||
539 | } | ||
540 | |||
511 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res) | 541 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res) |
512 | { | 542 | { |
513 | mlog_entry_void(); | 543 | mlog_entry_void(); |
@@ -670,7 +700,7 @@ static inline void ocfs2_generic_handle_attach_action(struct ocfs2_lock_res *loc | |||
670 | { | 700 | { |
671 | mlog_entry_void(); | 701 | mlog_entry_void(); |
672 | 702 | ||
673 | BUG_ON((!lockres->l_flags & OCFS2_LOCK_BUSY)); | 703 | BUG_ON((!(lockres->l_flags & OCFS2_LOCK_BUSY))); |
674 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); | 704 | BUG_ON(lockres->l_flags & OCFS2_LOCK_ATTACHED); |
675 | 705 | ||
676 | if (lockres->l_requested > LKM_NLMODE && | 706 | if (lockres->l_requested > LKM_NLMODE && |
@@ -724,6 +754,13 @@ static void ocfs2_blocking_ast(void *opaque, int level) | |||
724 | lockres->l_name, level, lockres->l_level, | 754 | lockres->l_name, level, lockres->l_level, |
725 | ocfs2_lock_type_string(lockres->l_type)); | 755 | ocfs2_lock_type_string(lockres->l_type)); |
726 | 756 | ||
757 | /* | ||
758 | * We can skip the bast for locks which don't enable caching - | ||
759 | * they'll be dropped at the earliest possible time anyway. | ||
760 | */ | ||
761 | if (lockres->l_flags & OCFS2_LOCK_NOCACHE) | ||
762 | return; | ||
763 | |||
727 | spin_lock_irqsave(&lockres->l_lock, flags); | 764 | spin_lock_irqsave(&lockres->l_lock, flags); |
728 | needs_downconvert = ocfs2_generic_handle_bast(lockres, level); | 765 | needs_downconvert = ocfs2_generic_handle_bast(lockres, level); |
729 | if (needs_downconvert) | 766 | if (needs_downconvert) |
@@ -732,7 +769,7 @@ static void ocfs2_blocking_ast(void *opaque, int level) | |||
732 | 769 | ||
733 | wake_up(&lockres->l_event); | 770 | wake_up(&lockres->l_event); |
734 | 771 | ||
735 | ocfs2_kick_vote_thread(osb); | 772 | ocfs2_wake_downconvert_thread(osb); |
736 | } | 773 | } |
737 | 774 | ||
738 | static void ocfs2_locking_ast(void *opaque) | 775 | static void ocfs2_locking_ast(void *opaque) |
@@ -935,6 +972,21 @@ static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, | |||
935 | 972 | ||
936 | } | 973 | } |
937 | 974 | ||
975 | static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, | ||
976 | struct ocfs2_lock_res *lockres) | ||
977 | { | ||
978 | int ret; | ||
979 | |||
980 | ret = wait_for_completion_interruptible(&mw->mw_complete); | ||
981 | if (ret) | ||
982 | lockres_remove_mask_waiter(lockres, mw); | ||
983 | else | ||
984 | ret = mw->mw_status; | ||
985 | /* Re-arm the completion in case we want to wait on it again */ | ||
986 | INIT_COMPLETION(mw->mw_complete); | ||
987 | return ret; | ||
988 | } | ||
989 | |||
938 | static int ocfs2_cluster_lock(struct ocfs2_super *osb, | 990 | static int ocfs2_cluster_lock(struct ocfs2_super *osb, |
939 | struct ocfs2_lock_res *lockres, | 991 | struct ocfs2_lock_res *lockres, |
940 | int level, | 992 | int level, |
@@ -980,18 +1032,6 @@ again: | |||
980 | goto unlock; | 1032 | goto unlock; |
981 | } | 1033 | } |
982 | 1034 | ||
983 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { | ||
984 | /* lock has not been created yet. */ | ||
985 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
986 | |||
987 | ret = ocfs2_lock_create(osb, lockres, LKM_NLMODE, 0); | ||
988 | if (ret < 0) { | ||
989 | mlog_errno(ret); | ||
990 | goto out; | ||
991 | } | ||
992 | goto again; | ||
993 | } | ||
994 | |||
995 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED && | 1035 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED && |
996 | !ocfs2_may_continue_on_blocked_lock(lockres, level)) { | 1036 | !ocfs2_may_continue_on_blocked_lock(lockres, level)) { |
997 | /* is the lock is currently blocked on behalf of | 1037 | /* is the lock is currently blocked on behalf of |
@@ -1006,7 +1046,14 @@ again: | |||
1006 | mlog(ML_ERROR, "lockres %s has action %u pending\n", | 1046 | mlog(ML_ERROR, "lockres %s has action %u pending\n", |
1007 | lockres->l_name, lockres->l_action); | 1047 | lockres->l_name, lockres->l_action); |
1008 | 1048 | ||
1009 | lockres->l_action = OCFS2_AST_CONVERT; | 1049 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { |
1050 | lockres->l_action = OCFS2_AST_ATTACH; | ||
1051 | lkm_flags &= ~LKM_CONVERT; | ||
1052 | } else { | ||
1053 | lockres->l_action = OCFS2_AST_CONVERT; | ||
1054 | lkm_flags |= LKM_CONVERT; | ||
1055 | } | ||
1056 | |||
1010 | lockres->l_requested = level; | 1057 | lockres->l_requested = level; |
1011 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | 1058 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); |
1012 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1059 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
@@ -1021,7 +1068,7 @@ again: | |||
1021 | status = dlmlock(osb->dlm, | 1068 | status = dlmlock(osb->dlm, |
1022 | level, | 1069 | level, |
1023 | &lockres->l_lksb, | 1070 | &lockres->l_lksb, |
1024 | lkm_flags|LKM_CONVERT, | 1071 | lkm_flags, |
1025 | lockres->l_name, | 1072 | lockres->l_name, |
1026 | OCFS2_LOCK_ID_MAX_LEN - 1, | 1073 | OCFS2_LOCK_ID_MAX_LEN - 1, |
1027 | ocfs2_locking_ast, | 1074 | ocfs2_locking_ast, |
@@ -1094,7 +1141,7 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb, | |||
1094 | mlog_entry_void(); | 1141 | mlog_entry_void(); |
1095 | spin_lock_irqsave(&lockres->l_lock, flags); | 1142 | spin_lock_irqsave(&lockres->l_lock, flags); |
1096 | ocfs2_dec_holders(lockres, level); | 1143 | ocfs2_dec_holders(lockres, level); |
1097 | ocfs2_vote_on_unlock(osb, lockres); | 1144 | ocfs2_downconvert_on_unlock(osb, lockres); |
1098 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1145 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1099 | mlog_exit_void(); | 1146 | mlog_exit_void(); |
1100 | } | 1147 | } |
@@ -1152,13 +1199,7 @@ int ocfs2_create_new_inode_locks(struct inode *inode) | |||
1152 | * We don't want to use LKM_LOCAL on a meta data lock as they | 1199 | * We don't want to use LKM_LOCAL on a meta data lock as they |
1153 | * don't use a generation in their lock names. | 1200 | * don't use a generation in their lock names. |
1154 | */ | 1201 | */ |
1155 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_meta_lockres, 1, 0); | 1202 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); |
1156 | if (ret) { | ||
1157 | mlog_errno(ret); | ||
1158 | goto bail; | ||
1159 | } | ||
1160 | |||
1161 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1, 1); | ||
1162 | if (ret) { | 1203 | if (ret) { |
1163 | mlog_errno(ret); | 1204 | mlog_errno(ret); |
1164 | goto bail; | 1205 | goto bail; |
@@ -1316,76 +1357,221 @@ out: | |||
1316 | mlog_exit_void(); | 1357 | mlog_exit_void(); |
1317 | } | 1358 | } |
1318 | 1359 | ||
1319 | int ocfs2_data_lock_full(struct inode *inode, | 1360 | static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres, |
1320 | int write, | 1361 | int level) |
1321 | int arg_flags) | ||
1322 | { | 1362 | { |
1323 | int status = 0, level; | 1363 | int ret; |
1324 | struct ocfs2_lock_res *lockres; | 1364 | struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); |
1325 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1365 | unsigned long flags; |
1366 | struct ocfs2_mask_waiter mw; | ||
1326 | 1367 | ||
1327 | BUG_ON(!inode); | 1368 | ocfs2_init_mask_waiter(&mw); |
1328 | 1369 | ||
1329 | mlog_entry_void(); | 1370 | retry_cancel: |
1371 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
1372 | if (lockres->l_flags & OCFS2_LOCK_BUSY) { | ||
1373 | ret = ocfs2_prepare_cancel_convert(osb, lockres); | ||
1374 | if (ret) { | ||
1375 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1376 | ret = ocfs2_cancel_convert(osb, lockres); | ||
1377 | if (ret < 0) { | ||
1378 | mlog_errno(ret); | ||
1379 | goto out; | ||
1380 | } | ||
1381 | goto retry_cancel; | ||
1382 | } | ||
1383 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | ||
1384 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1330 | 1385 | ||
1331 | mlog(0, "inode %llu take %s DATA lock\n", | 1386 | ocfs2_wait_for_mask(&mw); |
1332 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 1387 | goto retry_cancel; |
1333 | write ? "EXMODE" : "PRMODE"); | 1388 | } |
1334 | 1389 | ||
1335 | /* We'll allow faking a readonly data lock for | 1390 | ret = -ERESTARTSYS; |
1336 | * rodevices. */ | 1391 | /* |
1337 | if (ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb))) { | 1392 | * We may still have gotten the lock, in which case there's no |
1338 | if (write) { | 1393 | * point to restarting the syscall. |
1339 | status = -EROFS; | 1394 | */ |
1340 | mlog_errno(status); | 1395 | if (lockres->l_level == level) |
1396 | ret = 0; | ||
1397 | |||
1398 | mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret, | ||
1399 | lockres->l_flags, lockres->l_level, lockres->l_action); | ||
1400 | |||
1401 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1402 | |||
1403 | out: | ||
1404 | return ret; | ||
1405 | } | ||
1406 | |||
1407 | /* | ||
1408 | * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of | ||
1409 | * flock() calls. The locking approach this requires is sufficiently | ||
1410 | * different from all other cluster lock types that we implement a | ||
1411 | * seperate path to the "low-level" dlm calls. In particular: | ||
1412 | * | ||
1413 | * - No optimization of lock levels is done - we take at exactly | ||
1414 | * what's been requested. | ||
1415 | * | ||
1416 | * - No lock caching is employed. We immediately downconvert to | ||
1417 | * no-lock at unlock time. This also means flock locks never go on | ||
1418 | * the blocking list). | ||
1419 | * | ||
1420 | * - Since userspace can trivially deadlock itself with flock, we make | ||
1421 | * sure to allow cancellation of a misbehaving applications flock() | ||
1422 | * request. | ||
1423 | * | ||
1424 | * - Access to any flock lockres doesn't require concurrency, so we | ||
1425 | * can simplify the code by requiring the caller to guarantee | ||
1426 | * serialization of dlmglue flock calls. | ||
1427 | */ | ||
1428 | int ocfs2_file_lock(struct file *file, int ex, int trylock) | ||
1429 | { | ||
1430 | int ret, level = ex ? LKM_EXMODE : LKM_PRMODE; | ||
1431 | unsigned int lkm_flags = trylock ? LKM_NOQUEUE : 0; | ||
1432 | unsigned long flags; | ||
1433 | struct ocfs2_file_private *fp = file->private_data; | ||
1434 | struct ocfs2_lock_res *lockres = &fp->fp_flock; | ||
1435 | struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); | ||
1436 | struct ocfs2_mask_waiter mw; | ||
1437 | |||
1438 | ocfs2_init_mask_waiter(&mw); | ||
1439 | |||
1440 | if ((lockres->l_flags & OCFS2_LOCK_BUSY) || | ||
1441 | (lockres->l_level > LKM_NLMODE)) { | ||
1442 | mlog(ML_ERROR, | ||
1443 | "File lock \"%s\" has busy or locked state: flags: 0x%lx, " | ||
1444 | "level: %u\n", lockres->l_name, lockres->l_flags, | ||
1445 | lockres->l_level); | ||
1446 | return -EINVAL; | ||
1447 | } | ||
1448 | |||
1449 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
1450 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { | ||
1451 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | ||
1452 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1453 | |||
1454 | /* | ||
1455 | * Get the lock at NLMODE to start - that way we | ||
1456 | * can cancel the upconvert request if need be. | ||
1457 | */ | ||
1458 | ret = ocfs2_lock_create(osb, lockres, LKM_NLMODE, 0); | ||
1459 | if (ret < 0) { | ||
1460 | mlog_errno(ret); | ||
1461 | goto out; | ||
1341 | } | 1462 | } |
1342 | goto out; | 1463 | |
1464 | ret = ocfs2_wait_for_mask(&mw); | ||
1465 | if (ret) { | ||
1466 | mlog_errno(ret); | ||
1467 | goto out; | ||
1468 | } | ||
1469 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
1343 | } | 1470 | } |
1344 | 1471 | ||
1345 | if (ocfs2_mount_local(osb)) | 1472 | lockres->l_action = OCFS2_AST_CONVERT; |
1346 | goto out; | 1473 | lkm_flags |= LKM_CONVERT; |
1474 | lockres->l_requested = level; | ||
1475 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | ||
1347 | 1476 | ||
1348 | lockres = &OCFS2_I(inode)->ip_data_lockres; | 1477 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); |
1478 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1349 | 1479 | ||
1350 | level = write ? LKM_EXMODE : LKM_PRMODE; | 1480 | ret = dlmlock(osb->dlm, level, &lockres->l_lksb, lkm_flags, |
1481 | lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1, | ||
1482 | ocfs2_locking_ast, lockres, ocfs2_blocking_ast); | ||
1483 | if (ret != DLM_NORMAL) { | ||
1484 | if (trylock && ret == DLM_NOTQUEUED) | ||
1485 | ret = -EAGAIN; | ||
1486 | else { | ||
1487 | ocfs2_log_dlm_error("dlmlock", ret, lockres); | ||
1488 | ret = -EINVAL; | ||
1489 | } | ||
1351 | 1490 | ||
1352 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, | 1491 | ocfs2_recover_from_dlm_error(lockres, 1); |
1353 | 0, arg_flags); | 1492 | lockres_remove_mask_waiter(lockres, &mw); |
1354 | if (status < 0 && status != -EAGAIN) | 1493 | goto out; |
1355 | mlog_errno(status); | 1494 | } |
1495 | |||
1496 | ret = ocfs2_wait_for_mask_interruptible(&mw, lockres); | ||
1497 | if (ret == -ERESTARTSYS) { | ||
1498 | /* | ||
1499 | * Userspace can cause deadlock itself with | ||
1500 | * flock(). Current behavior locally is to allow the | ||
1501 | * deadlock, but abort the system call if a signal is | ||
1502 | * received. We follow this example, otherwise a | ||
1503 | * poorly written program could sit in kernel until | ||
1504 | * reboot. | ||
1505 | * | ||
1506 | * Handling this is a bit more complicated for Ocfs2 | ||
1507 | * though. We can't exit this function with an | ||
1508 | * outstanding lock request, so a cancel convert is | ||
1509 | * required. We intentionally overwrite 'ret' - if the | ||
1510 | * cancel fails and the lock was granted, it's easier | ||
1511 | * to just bubble sucess back up to the user. | ||
1512 | */ | ||
1513 | ret = ocfs2_flock_handle_signal(lockres, level); | ||
1514 | } | ||
1356 | 1515 | ||
1357 | out: | 1516 | out: |
1358 | mlog_exit(status); | 1517 | |
1359 | return status; | 1518 | mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n", |
1519 | lockres->l_name, ex, trylock, ret); | ||
1520 | return ret; | ||
1360 | } | 1521 | } |
1361 | 1522 | ||
1362 | /* see ocfs2_meta_lock_with_page() */ | 1523 | void ocfs2_file_unlock(struct file *file) |
1363 | int ocfs2_data_lock_with_page(struct inode *inode, | ||
1364 | int write, | ||
1365 | struct page *page) | ||
1366 | { | 1524 | { |
1367 | int ret; | 1525 | int ret; |
1526 | unsigned long flags; | ||
1527 | struct ocfs2_file_private *fp = file->private_data; | ||
1528 | struct ocfs2_lock_res *lockres = &fp->fp_flock; | ||
1529 | struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); | ||
1530 | struct ocfs2_mask_waiter mw; | ||
1368 | 1531 | ||
1369 | ret = ocfs2_data_lock_full(inode, write, OCFS2_LOCK_NONBLOCK); | 1532 | ocfs2_init_mask_waiter(&mw); |
1370 | if (ret == -EAGAIN) { | 1533 | |
1371 | unlock_page(page); | 1534 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) |
1372 | if (ocfs2_data_lock(inode, write) == 0) | 1535 | return; |
1373 | ocfs2_data_unlock(inode, write); | 1536 | |
1374 | ret = AOP_TRUNCATED_PAGE; | 1537 | if (lockres->l_level == LKM_NLMODE) |
1538 | return; | ||
1539 | |||
1540 | mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", | ||
1541 | lockres->l_name, lockres->l_flags, lockres->l_level, | ||
1542 | lockres->l_action); | ||
1543 | |||
1544 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
1545 | /* | ||
1546 | * Fake a blocking ast for the downconvert code. | ||
1547 | */ | ||
1548 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); | ||
1549 | lockres->l_blocking = LKM_EXMODE; | ||
1550 | |||
1551 | ocfs2_prepare_downconvert(lockres, LKM_NLMODE); | ||
1552 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | ||
1553 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1554 | |||
1555 | ret = ocfs2_downconvert_lock(osb, lockres, LKM_NLMODE, 0); | ||
1556 | if (ret) { | ||
1557 | mlog_errno(ret); | ||
1558 | return; | ||
1375 | } | 1559 | } |
1376 | 1560 | ||
1377 | return ret; | 1561 | ret = ocfs2_wait_for_mask(&mw); |
1562 | if (ret) | ||
1563 | mlog_errno(ret); | ||
1378 | } | 1564 | } |
1379 | 1565 | ||
1380 | static void ocfs2_vote_on_unlock(struct ocfs2_super *osb, | 1566 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, |
1381 | struct ocfs2_lock_res *lockres) | 1567 | struct ocfs2_lock_res *lockres) |
1382 | { | 1568 | { |
1383 | int kick = 0; | 1569 | int kick = 0; |
1384 | 1570 | ||
1385 | mlog_entry_void(); | 1571 | mlog_entry_void(); |
1386 | 1572 | ||
1387 | /* If we know that another node is waiting on our lock, kick | 1573 | /* If we know that another node is waiting on our lock, kick |
1388 | * the vote thread * pre-emptively when we reach a release | 1574 | * the downconvert thread * pre-emptively when we reach a release |
1389 | * condition. */ | 1575 | * condition. */ |
1390 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { | 1576 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { |
1391 | switch(lockres->l_blocking) { | 1577 | switch(lockres->l_blocking) { |
@@ -1403,27 +1589,7 @@ static void ocfs2_vote_on_unlock(struct ocfs2_super *osb, | |||
1403 | } | 1589 | } |
1404 | 1590 | ||
1405 | if (kick) | 1591 | if (kick) |
1406 | ocfs2_kick_vote_thread(osb); | 1592 | ocfs2_wake_downconvert_thread(osb); |
1407 | |||
1408 | mlog_exit_void(); | ||
1409 | } | ||
1410 | |||
1411 | void ocfs2_data_unlock(struct inode *inode, | ||
1412 | int write) | ||
1413 | { | ||
1414 | int level = write ? LKM_EXMODE : LKM_PRMODE; | ||
1415 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_data_lockres; | ||
1416 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1417 | |||
1418 | mlog_entry_void(); | ||
1419 | |||
1420 | mlog(0, "inode %llu drop %s DATA lock\n", | ||
1421 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
1422 | write ? "EXMODE" : "PRMODE"); | ||
1423 | |||
1424 | if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && | ||
1425 | !ocfs2_mount_local(osb)) | ||
1426 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); | ||
1427 | 1593 | ||
1428 | mlog_exit_void(); | 1594 | mlog_exit_void(); |
1429 | } | 1595 | } |
@@ -1447,11 +1613,11 @@ static u64 ocfs2_pack_timespec(struct timespec *spec) | |||
1447 | 1613 | ||
1448 | /* Call this with the lockres locked. I am reasonably sure we don't | 1614 | /* Call this with the lockres locked. I am reasonably sure we don't |
1449 | * need ip_lock in this function as anyone who would be changing those | 1615 | * need ip_lock in this function as anyone who would be changing those |
1450 | * values is supposed to be blocked in ocfs2_meta_lock right now. */ | 1616 | * values is supposed to be blocked in ocfs2_inode_lock right now. */ |
1451 | static void __ocfs2_stuff_meta_lvb(struct inode *inode) | 1617 | static void __ocfs2_stuff_meta_lvb(struct inode *inode) |
1452 | { | 1618 | { |
1453 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 1619 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
1454 | struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres; | 1620 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; |
1455 | struct ocfs2_meta_lvb *lvb; | 1621 | struct ocfs2_meta_lvb *lvb; |
1456 | 1622 | ||
1457 | mlog_entry_void(); | 1623 | mlog_entry_void(); |
@@ -1501,7 +1667,7 @@ static void ocfs2_unpack_timespec(struct timespec *spec, | |||
1501 | static void ocfs2_refresh_inode_from_lvb(struct inode *inode) | 1667 | static void ocfs2_refresh_inode_from_lvb(struct inode *inode) |
1502 | { | 1668 | { |
1503 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 1669 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
1504 | struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres; | 1670 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; |
1505 | struct ocfs2_meta_lvb *lvb; | 1671 | struct ocfs2_meta_lvb *lvb; |
1506 | 1672 | ||
1507 | mlog_entry_void(); | 1673 | mlog_entry_void(); |
@@ -1609,12 +1775,12 @@ static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockre | |||
1609 | } | 1775 | } |
1610 | 1776 | ||
1611 | /* may or may not return a bh if it went to disk. */ | 1777 | /* may or may not return a bh if it went to disk. */ |
1612 | static int ocfs2_meta_lock_update(struct inode *inode, | 1778 | static int ocfs2_inode_lock_update(struct inode *inode, |
1613 | struct buffer_head **bh) | 1779 | struct buffer_head **bh) |
1614 | { | 1780 | { |
1615 | int status = 0; | 1781 | int status = 0; |
1616 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 1782 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
1617 | struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres; | 1783 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; |
1618 | struct ocfs2_dinode *fe; | 1784 | struct ocfs2_dinode *fe; |
1619 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1785 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1620 | 1786 | ||
@@ -1726,7 +1892,7 @@ static int ocfs2_assign_bh(struct inode *inode, | |||
1726 | * returns < 0 error if the callback will never be called, otherwise | 1892 | * returns < 0 error if the callback will never be called, otherwise |
1727 | * the result of the lock will be communicated via the callback. | 1893 | * the result of the lock will be communicated via the callback. |
1728 | */ | 1894 | */ |
1729 | int ocfs2_meta_lock_full(struct inode *inode, | 1895 | int ocfs2_inode_lock_full(struct inode *inode, |
1730 | struct buffer_head **ret_bh, | 1896 | struct buffer_head **ret_bh, |
1731 | int ex, | 1897 | int ex, |
1732 | int arg_flags) | 1898 | int arg_flags) |
@@ -1761,7 +1927,7 @@ int ocfs2_meta_lock_full(struct inode *inode, | |||
1761 | wait_event(osb->recovery_event, | 1927 | wait_event(osb->recovery_event, |
1762 | ocfs2_node_map_is_empty(osb, &osb->recovery_map)); | 1928 | ocfs2_node_map_is_empty(osb, &osb->recovery_map)); |
1763 | 1929 | ||
1764 | lockres = &OCFS2_I(inode)->ip_meta_lockres; | 1930 | lockres = &OCFS2_I(inode)->ip_inode_lockres; |
1765 | level = ex ? LKM_EXMODE : LKM_PRMODE; | 1931 | level = ex ? LKM_EXMODE : LKM_PRMODE; |
1766 | dlm_flags = 0; | 1932 | dlm_flags = 0; |
1767 | if (arg_flags & OCFS2_META_LOCK_NOQUEUE) | 1933 | if (arg_flags & OCFS2_META_LOCK_NOQUEUE) |
@@ -1800,11 +1966,11 @@ local: | |||
1800 | } | 1966 | } |
1801 | 1967 | ||
1802 | /* This is fun. The caller may want a bh back, or it may | 1968 | /* This is fun. The caller may want a bh back, or it may |
1803 | * not. ocfs2_meta_lock_update definitely wants one in, but | 1969 | * not. ocfs2_inode_lock_update definitely wants one in, but |
1804 | * may or may not read one, depending on what's in the | 1970 | * may or may not read one, depending on what's in the |
1805 | * LVB. The result of all of this is that we've *only* gone to | 1971 | * LVB. The result of all of this is that we've *only* gone to |
1806 | * disk if we have to, so the complexity is worthwhile. */ | 1972 | * disk if we have to, so the complexity is worthwhile. */ |
1807 | status = ocfs2_meta_lock_update(inode, &local_bh); | 1973 | status = ocfs2_inode_lock_update(inode, &local_bh); |
1808 | if (status < 0) { | 1974 | if (status < 0) { |
1809 | if (status != -ENOENT) | 1975 | if (status != -ENOENT) |
1810 | mlog_errno(status); | 1976 | mlog_errno(status); |
@@ -1826,7 +1992,7 @@ bail: | |||
1826 | *ret_bh = NULL; | 1992 | *ret_bh = NULL; |
1827 | } | 1993 | } |
1828 | if (acquired) | 1994 | if (acquired) |
1829 | ocfs2_meta_unlock(inode, ex); | 1995 | ocfs2_inode_unlock(inode, ex); |
1830 | } | 1996 | } |
1831 | 1997 | ||
1832 | if (local_bh) | 1998 | if (local_bh) |
@@ -1837,19 +2003,20 @@ bail: | |||
1837 | } | 2003 | } |
1838 | 2004 | ||
1839 | /* | 2005 | /* |
1840 | * This is working around a lock inversion between tasks acquiring DLM locks | 2006 | * This is working around a lock inversion between tasks acquiring DLM |
1841 | * while holding a page lock and the vote thread which blocks dlm lock acquiry | 2007 | * locks while holding a page lock and the downconvert thread which |
1842 | * while acquiring page locks. | 2008 | * blocks dlm lock acquiry while acquiring page locks. |
1843 | * | 2009 | * |
1844 | * ** These _with_page variantes are only intended to be called from aop | 2010 | * ** These _with_page variantes are only intended to be called from aop |
1845 | * methods that hold page locks and return a very specific *positive* error | 2011 | * methods that hold page locks and return a very specific *positive* error |
1846 | * code that aop methods pass up to the VFS -- test for errors with != 0. ** | 2012 | * code that aop methods pass up to the VFS -- test for errors with != 0. ** |
1847 | * | 2013 | * |
1848 | * The DLM is called such that it returns -EAGAIN if it would have blocked | 2014 | * The DLM is called such that it returns -EAGAIN if it would have |
1849 | * waiting for the vote thread. In that case we unlock our page so the vote | 2015 | * blocked waiting for the downconvert thread. In that case we unlock |
1850 | * thread can make progress. Once we've done this we have to return | 2016 | * our page so the downconvert thread can make progress. Once we've |
1851 | * AOP_TRUNCATED_PAGE so the aop method that called us can bubble that back up | 2017 | * done this we have to return AOP_TRUNCATED_PAGE so the aop method |
1852 | * into the VFS who will then immediately retry the aop call. | 2018 | * that called us can bubble that back up into the VFS who will then |
2019 | * immediately retry the aop call. | ||
1853 | * | 2020 | * |
1854 | * We do a blocking lock and immediate unlock before returning, though, so that | 2021 | * We do a blocking lock and immediate unlock before returning, though, so that |
1855 | * the lock has a great chance of being cached on this node by the time the VFS | 2022 | * the lock has a great chance of being cached on this node by the time the VFS |
@@ -1857,32 +2024,32 @@ bail: | |||
1857 | * ping locks back and forth, but that's a risk we're willing to take to avoid | 2024 | * ping locks back and forth, but that's a risk we're willing to take to avoid |
1858 | * the lock inversion simply. | 2025 | * the lock inversion simply. |
1859 | */ | 2026 | */ |
1860 | int ocfs2_meta_lock_with_page(struct inode *inode, | 2027 | int ocfs2_inode_lock_with_page(struct inode *inode, |
1861 | struct buffer_head **ret_bh, | 2028 | struct buffer_head **ret_bh, |
1862 | int ex, | 2029 | int ex, |
1863 | struct page *page) | 2030 | struct page *page) |
1864 | { | 2031 | { |
1865 | int ret; | 2032 | int ret; |
1866 | 2033 | ||
1867 | ret = ocfs2_meta_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); | 2034 | ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); |
1868 | if (ret == -EAGAIN) { | 2035 | if (ret == -EAGAIN) { |
1869 | unlock_page(page); | 2036 | unlock_page(page); |
1870 | if (ocfs2_meta_lock(inode, ret_bh, ex) == 0) | 2037 | if (ocfs2_inode_lock(inode, ret_bh, ex) == 0) |
1871 | ocfs2_meta_unlock(inode, ex); | 2038 | ocfs2_inode_unlock(inode, ex); |
1872 | ret = AOP_TRUNCATED_PAGE; | 2039 | ret = AOP_TRUNCATED_PAGE; |
1873 | } | 2040 | } |
1874 | 2041 | ||
1875 | return ret; | 2042 | return ret; |
1876 | } | 2043 | } |
1877 | 2044 | ||
1878 | int ocfs2_meta_lock_atime(struct inode *inode, | 2045 | int ocfs2_inode_lock_atime(struct inode *inode, |
1879 | struct vfsmount *vfsmnt, | 2046 | struct vfsmount *vfsmnt, |
1880 | int *level) | 2047 | int *level) |
1881 | { | 2048 | { |
1882 | int ret; | 2049 | int ret; |
1883 | 2050 | ||
1884 | mlog_entry_void(); | 2051 | mlog_entry_void(); |
1885 | ret = ocfs2_meta_lock(inode, NULL, 0); | 2052 | ret = ocfs2_inode_lock(inode, NULL, 0); |
1886 | if (ret < 0) { | 2053 | if (ret < 0) { |
1887 | mlog_errno(ret); | 2054 | mlog_errno(ret); |
1888 | return ret; | 2055 | return ret; |
@@ -1895,8 +2062,8 @@ int ocfs2_meta_lock_atime(struct inode *inode, | |||
1895 | if (ocfs2_should_update_atime(inode, vfsmnt)) { | 2062 | if (ocfs2_should_update_atime(inode, vfsmnt)) { |
1896 | struct buffer_head *bh = NULL; | 2063 | struct buffer_head *bh = NULL; |
1897 | 2064 | ||
1898 | ocfs2_meta_unlock(inode, 0); | 2065 | ocfs2_inode_unlock(inode, 0); |
1899 | ret = ocfs2_meta_lock(inode, &bh, 1); | 2066 | ret = ocfs2_inode_lock(inode, &bh, 1); |
1900 | if (ret < 0) { | 2067 | if (ret < 0) { |
1901 | mlog_errno(ret); | 2068 | mlog_errno(ret); |
1902 | return ret; | 2069 | return ret; |
@@ -1913,11 +2080,11 @@ int ocfs2_meta_lock_atime(struct inode *inode, | |||
1913 | return ret; | 2080 | return ret; |
1914 | } | 2081 | } |
1915 | 2082 | ||
1916 | void ocfs2_meta_unlock(struct inode *inode, | 2083 | void ocfs2_inode_unlock(struct inode *inode, |
1917 | int ex) | 2084 | int ex) |
1918 | { | 2085 | { |
1919 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 2086 | int level = ex ? LKM_EXMODE : LKM_PRMODE; |
1920 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres; | 2087 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; |
1921 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 2088 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1922 | 2089 | ||
1923 | mlog_entry_void(); | 2090 | mlog_entry_void(); |
@@ -2325,11 +2492,11 @@ int ocfs2_dlm_init(struct ocfs2_super *osb) | |||
2325 | goto bail; | 2492 | goto bail; |
2326 | } | 2493 | } |
2327 | 2494 | ||
2328 | /* launch vote thread */ | 2495 | /* launch downconvert thread */ |
2329 | osb->vote_task = kthread_run(ocfs2_vote_thread, osb, "ocfs2vote"); | 2496 | osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc"); |
2330 | if (IS_ERR(osb->vote_task)) { | 2497 | if (IS_ERR(osb->dc_task)) { |
2331 | status = PTR_ERR(osb->vote_task); | 2498 | status = PTR_ERR(osb->dc_task); |
2332 | osb->vote_task = NULL; | 2499 | osb->dc_task = NULL; |
2333 | mlog_errno(status); | 2500 | mlog_errno(status); |
2334 | goto bail; | 2501 | goto bail; |
2335 | } | 2502 | } |
@@ -2358,8 +2525,8 @@ local: | |||
2358 | bail: | 2525 | bail: |
2359 | if (status < 0) { | 2526 | if (status < 0) { |
2360 | ocfs2_dlm_shutdown_debug(osb); | 2527 | ocfs2_dlm_shutdown_debug(osb); |
2361 | if (osb->vote_task) | 2528 | if (osb->dc_task) |
2362 | kthread_stop(osb->vote_task); | 2529 | kthread_stop(osb->dc_task); |
2363 | } | 2530 | } |
2364 | 2531 | ||
2365 | mlog_exit(status); | 2532 | mlog_exit(status); |
@@ -2374,9 +2541,9 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb) | |||
2374 | 2541 | ||
2375 | ocfs2_drop_osb_locks(osb); | 2542 | ocfs2_drop_osb_locks(osb); |
2376 | 2543 | ||
2377 | if (osb->vote_task) { | 2544 | if (osb->dc_task) { |
2378 | kthread_stop(osb->vote_task); | 2545 | kthread_stop(osb->dc_task); |
2379 | osb->vote_task = NULL; | 2546 | osb->dc_task = NULL; |
2380 | } | 2547 | } |
2381 | 2548 | ||
2382 | ocfs2_lock_res_free(&osb->osb_super_lockres); | 2549 | ocfs2_lock_res_free(&osb->osb_super_lockres); |
@@ -2532,7 +2699,7 @@ out: | |||
2532 | 2699 | ||
2533 | /* Mark the lockres as being dropped. It will no longer be | 2700 | /* Mark the lockres as being dropped. It will no longer be |
2534 | * queued if blocking, but we still may have to wait on it | 2701 | * queued if blocking, but we still may have to wait on it |
2535 | * being dequeued from the vote thread before we can consider | 2702 | * being dequeued from the downconvert thread before we can consider |
2536 | * it safe to drop. | 2703 | * it safe to drop. |
2537 | * | 2704 | * |
2538 | * You can *not* attempt to call cluster_lock on this lockres anymore. */ | 2705 | * You can *not* attempt to call cluster_lock on this lockres anymore. */ |
@@ -2595,14 +2762,7 @@ int ocfs2_drop_inode_locks(struct inode *inode) | |||
2595 | status = err; | 2762 | status = err; |
2596 | 2763 | ||
2597 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | 2764 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), |
2598 | &OCFS2_I(inode)->ip_data_lockres); | 2765 | &OCFS2_I(inode)->ip_inode_lockres); |
2599 | if (err < 0) | ||
2600 | mlog_errno(err); | ||
2601 | if (err < 0 && !status) | ||
2602 | status = err; | ||
2603 | |||
2604 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | ||
2605 | &OCFS2_I(inode)->ip_meta_lockres); | ||
2606 | if (err < 0) | 2766 | if (err < 0) |
2607 | mlog_errno(err); | 2767 | mlog_errno(err); |
2608 | if (err < 0 && !status) | 2768 | if (err < 0 && !status) |
@@ -2855,6 +3015,9 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | |||
2855 | inode = ocfs2_lock_res_inode(lockres); | 3015 | inode = ocfs2_lock_res_inode(lockres); |
2856 | mapping = inode->i_mapping; | 3016 | mapping = inode->i_mapping; |
2857 | 3017 | ||
3018 | if (S_ISREG(inode->i_mode)) | ||
3019 | goto out; | ||
3020 | |||
2858 | /* | 3021 | /* |
2859 | * We need this before the filemap_fdatawrite() so that it can | 3022 | * We need this before the filemap_fdatawrite() so that it can |
2860 | * transfer the dirty bit from the PTE to the | 3023 | * transfer the dirty bit from the PTE to the |
@@ -2880,6 +3043,7 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | |||
2880 | filemap_fdatawait(mapping); | 3043 | filemap_fdatawait(mapping); |
2881 | } | 3044 | } |
2882 | 3045 | ||
3046 | out: | ||
2883 | return UNBLOCK_CONTINUE; | 3047 | return UNBLOCK_CONTINUE; |
2884 | } | 3048 | } |
2885 | 3049 | ||
@@ -2908,7 +3072,7 @@ static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) | |||
2908 | 3072 | ||
2909 | /* | 3073 | /* |
2910 | * Does the final reference drop on our dentry lock. Right now this | 3074 | * Does the final reference drop on our dentry lock. Right now this |
2911 | * happens in the vote thread, but we could choose to simplify the | 3075 | * happens in the downconvert thread, but we could choose to simplify the |
2912 | * dlmglue API and push these off to the ocfs2_wq in the future. | 3076 | * dlmglue API and push these off to the ocfs2_wq in the future. |
2913 | */ | 3077 | */ |
2914 | static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, | 3078 | static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, |
@@ -3047,7 +3211,7 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | |||
3047 | mlog(0, "lockres %s blocked.\n", lockres->l_name); | 3211 | mlog(0, "lockres %s blocked.\n", lockres->l_name); |
3048 | 3212 | ||
3049 | /* Detect whether a lock has been marked as going away while | 3213 | /* Detect whether a lock has been marked as going away while |
3050 | * the vote thread was processing other things. A lock can | 3214 | * the downconvert thread was processing other things. A lock can |
3051 | * still be marked with OCFS2_LOCK_FREEING after this check, | 3215 | * still be marked with OCFS2_LOCK_FREEING after this check, |
3052 | * but short circuiting here will still save us some | 3216 | * but short circuiting here will still save us some |
3053 | * performance. */ | 3217 | * performance. */ |
@@ -3096,13 +3260,104 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, | |||
3096 | 3260 | ||
3097 | lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); | 3261 | lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); |
3098 | 3262 | ||
3099 | spin_lock(&osb->vote_task_lock); | 3263 | spin_lock(&osb->dc_task_lock); |
3100 | if (list_empty(&lockres->l_blocked_list)) { | 3264 | if (list_empty(&lockres->l_blocked_list)) { |
3101 | list_add_tail(&lockres->l_blocked_list, | 3265 | list_add_tail(&lockres->l_blocked_list, |
3102 | &osb->blocked_lock_list); | 3266 | &osb->blocked_lock_list); |
3103 | osb->blocked_lock_count++; | 3267 | osb->blocked_lock_count++; |
3104 | } | 3268 | } |
3105 | spin_unlock(&osb->vote_task_lock); | 3269 | spin_unlock(&osb->dc_task_lock); |
3106 | 3270 | ||
3107 | mlog_exit_void(); | 3271 | mlog_exit_void(); |
3108 | } | 3272 | } |
3273 | |||
3274 | static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) | ||
3275 | { | ||
3276 | unsigned long processed; | ||
3277 | struct ocfs2_lock_res *lockres; | ||
3278 | |||
3279 | mlog_entry_void(); | ||
3280 | |||
3281 | spin_lock(&osb->dc_task_lock); | ||
3282 | /* grab this early so we know to try again if a state change and | ||
3283 | * wake happens part-way through our work */ | ||
3284 | osb->dc_work_sequence = osb->dc_wake_sequence; | ||
3285 | |||
3286 | processed = osb->blocked_lock_count; | ||
3287 | while (processed) { | ||
3288 | BUG_ON(list_empty(&osb->blocked_lock_list)); | ||
3289 | |||
3290 | lockres = list_entry(osb->blocked_lock_list.next, | ||
3291 | struct ocfs2_lock_res, l_blocked_list); | ||
3292 | list_del_init(&lockres->l_blocked_list); | ||
3293 | osb->blocked_lock_count--; | ||
3294 | spin_unlock(&osb->dc_task_lock); | ||
3295 | |||
3296 | BUG_ON(!processed); | ||
3297 | processed--; | ||
3298 | |||
3299 | ocfs2_process_blocked_lock(osb, lockres); | ||
3300 | |||
3301 | spin_lock(&osb->dc_task_lock); | ||
3302 | } | ||
3303 | spin_unlock(&osb->dc_task_lock); | ||
3304 | |||
3305 | mlog_exit_void(); | ||
3306 | } | ||
3307 | |||
3308 | static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) | ||
3309 | { | ||
3310 | int empty = 0; | ||
3311 | |||
3312 | spin_lock(&osb->dc_task_lock); | ||
3313 | if (list_empty(&osb->blocked_lock_list)) | ||
3314 | empty = 1; | ||
3315 | |||
3316 | spin_unlock(&osb->dc_task_lock); | ||
3317 | return empty; | ||
3318 | } | ||
3319 | |||
3320 | static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) | ||
3321 | { | ||
3322 | int should_wake = 0; | ||
3323 | |||
3324 | spin_lock(&osb->dc_task_lock); | ||
3325 | if (osb->dc_work_sequence != osb->dc_wake_sequence) | ||
3326 | should_wake = 1; | ||
3327 | spin_unlock(&osb->dc_task_lock); | ||
3328 | |||
3329 | return should_wake; | ||
3330 | } | ||
3331 | |||
3332 | int ocfs2_downconvert_thread(void *arg) | ||
3333 | { | ||
3334 | int status = 0; | ||
3335 | struct ocfs2_super *osb = arg; | ||
3336 | |||
3337 | /* only quit once we've been asked to stop and there is no more | ||
3338 | * work available */ | ||
3339 | while (!(kthread_should_stop() && | ||
3340 | ocfs2_downconvert_thread_lists_empty(osb))) { | ||
3341 | |||
3342 | wait_event_interruptible(osb->dc_event, | ||
3343 | ocfs2_downconvert_thread_should_wake(osb) || | ||
3344 | kthread_should_stop()); | ||
3345 | |||
3346 | mlog(0, "downconvert_thread: awoken\n"); | ||
3347 | |||
3348 | ocfs2_downconvert_thread_do_work(osb); | ||
3349 | } | ||
3350 | |||
3351 | osb->dc_task = NULL; | ||
3352 | return status; | ||
3353 | } | ||
3354 | |||
3355 | void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb) | ||
3356 | { | ||
3357 | spin_lock(&osb->dc_task_lock); | ||
3358 | /* make sure the voting thread gets a swipe at whatever changes | ||
3359 | * the caller may have made to the voting state */ | ||
3360 | osb->dc_wake_sequence++; | ||
3361 | spin_unlock(&osb->dc_task_lock); | ||
3362 | wake_up(&osb->dc_event); | ||
3363 | } | ||