diff options
Diffstat (limited to 'fs/ocfs2/dlmglue.c')
-rw-r--r-- | fs/ocfs2/dlmglue.c | 546 |
1 files changed, 403 insertions, 143 deletions
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 4e97dcceaf8f..3867244fb144 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -55,7 +55,6 @@ | |||
55 | #include "slot_map.h" | 55 | #include "slot_map.h" |
56 | #include "super.h" | 56 | #include "super.h" |
57 | #include "uptodate.h" | 57 | #include "uptodate.h" |
58 | #include "vote.h" | ||
59 | 58 | ||
60 | #include "buffer_head_io.h" | 59 | #include "buffer_head_io.h" |
61 | 60 | ||
@@ -69,6 +68,7 @@ struct ocfs2_mask_waiter { | |||
69 | 68 | ||
70 | static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); | 69 | static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); |
71 | static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); | 70 | static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); |
71 | static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres); | ||
72 | 72 | ||
73 | /* | 73 | /* |
74 | * Return value from ->downconvert_worker functions. | 74 | * Return value from ->downconvert_worker functions. |
@@ -153,10 +153,10 @@ struct ocfs2_lock_res_ops { | |||
153 | struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); | 153 | struct ocfs2_super * (*get_osb)(struct ocfs2_lock_res *); |
154 | 154 | ||
155 | /* | 155 | /* |
156 | * Optionally called in the downconvert (or "vote") thread | 156 | * Optionally called in the downconvert thread after a |
157 | * after a successful downconvert. The lockres will not be | 157 | * successful downconvert. The lockres will not be referenced |
158 | * referenced after this callback is called, so it is safe to | 158 | * after this callback is called, so it is safe to free |
159 | * free memory, etc. | 159 | * memory, etc. |
160 | * | 160 | * |
161 | * The exact semantics of when this is called are controlled | 161 | * The exact semantics of when this is called are controlled |
162 | * by ->downconvert_worker() | 162 | * by ->downconvert_worker() |
@@ -225,17 +225,12 @@ static struct ocfs2_lock_res_ops ocfs2_inode_rw_lops = { | |||
225 | .flags = 0, | 225 | .flags = 0, |
226 | }; | 226 | }; |
227 | 227 | ||
228 | static struct ocfs2_lock_res_ops ocfs2_inode_meta_lops = { | 228 | static struct ocfs2_lock_res_ops ocfs2_inode_inode_lops = { |
229 | .get_osb = ocfs2_get_inode_osb, | 229 | .get_osb = ocfs2_get_inode_osb, |
230 | .check_downconvert = ocfs2_check_meta_downconvert, | 230 | .check_downconvert = ocfs2_check_meta_downconvert, |
231 | .set_lvb = ocfs2_set_meta_lvb, | 231 | .set_lvb = ocfs2_set_meta_lvb, |
232 | .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, | ||
233 | }; | ||
234 | |||
235 | static struct ocfs2_lock_res_ops ocfs2_inode_data_lops = { | ||
236 | .get_osb = ocfs2_get_inode_osb, | ||
237 | .downconvert_worker = ocfs2_data_convert_worker, | 232 | .downconvert_worker = ocfs2_data_convert_worker, |
238 | .flags = 0, | 233 | .flags = LOCK_TYPE_REQUIRES_REFRESH|LOCK_TYPE_USES_LVB, |
239 | }; | 234 | }; |
240 | 235 | ||
241 | static struct ocfs2_lock_res_ops ocfs2_super_lops = { | 236 | static struct ocfs2_lock_res_ops ocfs2_super_lops = { |
@@ -258,10 +253,14 @@ static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { | |||
258 | .flags = 0, | 253 | .flags = 0, |
259 | }; | 254 | }; |
260 | 255 | ||
256 | static struct ocfs2_lock_res_ops ocfs2_flock_lops = { | ||
257 | .get_osb = ocfs2_get_file_osb, | ||
258 | .flags = 0, | ||
259 | }; | ||
260 | |||
261 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) | 261 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) |
262 | { | 262 | { |
263 | return lockres->l_type == OCFS2_LOCK_TYPE_META || | 263 | return lockres->l_type == OCFS2_LOCK_TYPE_META || |
264 | lockres->l_type == OCFS2_LOCK_TYPE_DATA || | ||
265 | lockres->l_type == OCFS2_LOCK_TYPE_RW || | 264 | lockres->l_type == OCFS2_LOCK_TYPE_RW || |
266 | lockres->l_type == OCFS2_LOCK_TYPE_OPEN; | 265 | lockres->l_type == OCFS2_LOCK_TYPE_OPEN; |
267 | } | 266 | } |
@@ -310,12 +309,24 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres, | |||
310 | "resource %s: %s\n", dlm_errname(_stat), _func, \ | 309 | "resource %s: %s\n", dlm_errname(_stat), _func, \ |
311 | _lockres->l_name, dlm_errmsg(_stat)); \ | 310 | _lockres->l_name, dlm_errmsg(_stat)); \ |
312 | } while (0) | 311 | } while (0) |
313 | static void ocfs2_vote_on_unlock(struct ocfs2_super *osb, | 312 | static int ocfs2_downconvert_thread(void *arg); |
314 | struct ocfs2_lock_res *lockres); | 313 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, |
315 | static int ocfs2_meta_lock_update(struct inode *inode, | 314 | struct ocfs2_lock_res *lockres); |
315 | static int ocfs2_inode_lock_update(struct inode *inode, | ||
316 | struct buffer_head **bh); | 316 | struct buffer_head **bh); |
317 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); | 317 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); |
318 | static inline int ocfs2_highest_compat_lock_level(int level); | 318 | static inline int ocfs2_highest_compat_lock_level(int level); |
319 | static void ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, | ||
320 | int new_level); | ||
321 | static int ocfs2_downconvert_lock(struct ocfs2_super *osb, | ||
322 | struct ocfs2_lock_res *lockres, | ||
323 | int new_level, | ||
324 | int lvb); | ||
325 | static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, | ||
326 | struct ocfs2_lock_res *lockres); | ||
327 | static int ocfs2_cancel_convert(struct ocfs2_super *osb, | ||
328 | struct ocfs2_lock_res *lockres); | ||
329 | |||
319 | 330 | ||
320 | static void ocfs2_build_lock_name(enum ocfs2_lock_type type, | 331 | static void ocfs2_build_lock_name(enum ocfs2_lock_type type, |
321 | u64 blkno, | 332 | u64 blkno, |
@@ -402,10 +413,7 @@ void ocfs2_inode_lock_res_init(struct ocfs2_lock_res *res, | |||
402 | ops = &ocfs2_inode_rw_lops; | 413 | ops = &ocfs2_inode_rw_lops; |
403 | break; | 414 | break; |
404 | case OCFS2_LOCK_TYPE_META: | 415 | case OCFS2_LOCK_TYPE_META: |
405 | ops = &ocfs2_inode_meta_lops; | 416 | ops = &ocfs2_inode_inode_lops; |
406 | break; | ||
407 | case OCFS2_LOCK_TYPE_DATA: | ||
408 | ops = &ocfs2_inode_data_lops; | ||
409 | break; | 417 | break; |
410 | case OCFS2_LOCK_TYPE_OPEN: | 418 | case OCFS2_LOCK_TYPE_OPEN: |
411 | ops = &ocfs2_inode_open_lops; | 419 | ops = &ocfs2_inode_open_lops; |
@@ -428,6 +436,13 @@ static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) | |||
428 | return OCFS2_SB(inode->i_sb); | 436 | return OCFS2_SB(inode->i_sb); |
429 | } | 437 | } |
430 | 438 | ||
439 | static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres) | ||
440 | { | ||
441 | struct ocfs2_file_private *fp = lockres->l_priv; | ||
442 | |||
443 | return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb); | ||
444 | } | ||
445 | |||
431 | static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) | 446 | static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) |
432 | { | 447 | { |
433 | __be64 inode_blkno_be; | 448 | __be64 inode_blkno_be; |
@@ -508,6 +523,21 @@ static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, | |||
508 | &ocfs2_rename_lops, osb); | 523 | &ocfs2_rename_lops, osb); |
509 | } | 524 | } |
510 | 525 | ||
526 | void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, | ||
527 | struct ocfs2_file_private *fp) | ||
528 | { | ||
529 | struct inode *inode = fp->fp_file->f_mapping->host; | ||
530 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
531 | |||
532 | ocfs2_lock_res_init_once(lockres); | ||
533 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno, | ||
534 | inode->i_generation, lockres->l_name); | ||
535 | ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, | ||
536 | OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops, | ||
537 | fp); | ||
538 | lockres->l_flags |= OCFS2_LOCK_NOCACHE; | ||
539 | } | ||
540 | |||
511 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res) | 541 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res) |
512 | { | 542 | { |
513 | mlog_entry_void(); | 543 | mlog_entry_void(); |
@@ -724,6 +754,13 @@ static void ocfs2_blocking_ast(void *opaque, int level) | |||
724 | lockres->l_name, level, lockres->l_level, | 754 | lockres->l_name, level, lockres->l_level, |
725 | ocfs2_lock_type_string(lockres->l_type)); | 755 | ocfs2_lock_type_string(lockres->l_type)); |
726 | 756 | ||
757 | /* | ||
758 | * We can skip the bast for locks which don't enable caching - | ||
759 | * they'll be dropped at the earliest possible time anyway. | ||
760 | */ | ||
761 | if (lockres->l_flags & OCFS2_LOCK_NOCACHE) | ||
762 | return; | ||
763 | |||
727 | spin_lock_irqsave(&lockres->l_lock, flags); | 764 | spin_lock_irqsave(&lockres->l_lock, flags); |
728 | needs_downconvert = ocfs2_generic_handle_bast(lockres, level); | 765 | needs_downconvert = ocfs2_generic_handle_bast(lockres, level); |
729 | if (needs_downconvert) | 766 | if (needs_downconvert) |
@@ -732,7 +769,7 @@ static void ocfs2_blocking_ast(void *opaque, int level) | |||
732 | 769 | ||
733 | wake_up(&lockres->l_event); | 770 | wake_up(&lockres->l_event); |
734 | 771 | ||
735 | ocfs2_kick_vote_thread(osb); | 772 | ocfs2_wake_downconvert_thread(osb); |
736 | } | 773 | } |
737 | 774 | ||
738 | static void ocfs2_locking_ast(void *opaque) | 775 | static void ocfs2_locking_ast(void *opaque) |
@@ -935,6 +972,21 @@ static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, | |||
935 | 972 | ||
936 | } | 973 | } |
937 | 974 | ||
975 | static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, | ||
976 | struct ocfs2_lock_res *lockres) | ||
977 | { | ||
978 | int ret; | ||
979 | |||
980 | ret = wait_for_completion_interruptible(&mw->mw_complete); | ||
981 | if (ret) | ||
982 | lockres_remove_mask_waiter(lockres, mw); | ||
983 | else | ||
984 | ret = mw->mw_status; | ||
985 | /* Re-arm the completion in case we want to wait on it again */ | ||
986 | INIT_COMPLETION(mw->mw_complete); | ||
987 | return ret; | ||
988 | } | ||
989 | |||
938 | static int ocfs2_cluster_lock(struct ocfs2_super *osb, | 990 | static int ocfs2_cluster_lock(struct ocfs2_super *osb, |
939 | struct ocfs2_lock_res *lockres, | 991 | struct ocfs2_lock_res *lockres, |
940 | int level, | 992 | int level, |
@@ -1089,7 +1141,7 @@ static void ocfs2_cluster_unlock(struct ocfs2_super *osb, | |||
1089 | mlog_entry_void(); | 1141 | mlog_entry_void(); |
1090 | spin_lock_irqsave(&lockres->l_lock, flags); | 1142 | spin_lock_irqsave(&lockres->l_lock, flags); |
1091 | ocfs2_dec_holders(lockres, level); | 1143 | ocfs2_dec_holders(lockres, level); |
1092 | ocfs2_vote_on_unlock(osb, lockres); | 1144 | ocfs2_downconvert_on_unlock(osb, lockres); |
1093 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 1145 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
1094 | mlog_exit_void(); | 1146 | mlog_exit_void(); |
1095 | } | 1147 | } |
@@ -1147,13 +1199,7 @@ int ocfs2_create_new_inode_locks(struct inode *inode) | |||
1147 | * We don't want to use LKM_LOCAL on a meta data lock as they | 1199 | * We don't want to use LKM_LOCAL on a meta data lock as they |
1148 | * don't use a generation in their lock names. | 1200 | * don't use a generation in their lock names. |
1149 | */ | 1201 | */ |
1150 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_meta_lockres, 1, 0); | 1202 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_inode_lockres, 1, 0); |
1151 | if (ret) { | ||
1152 | mlog_errno(ret); | ||
1153 | goto bail; | ||
1154 | } | ||
1155 | |||
1156 | ret = ocfs2_create_new_lock(osb, &OCFS2_I(inode)->ip_data_lockres, 1, 1); | ||
1157 | if (ret) { | 1203 | if (ret) { |
1158 | mlog_errno(ret); | 1204 | mlog_errno(ret); |
1159 | goto bail; | 1205 | goto bail; |
@@ -1311,76 +1357,221 @@ out: | |||
1311 | mlog_exit_void(); | 1357 | mlog_exit_void(); |
1312 | } | 1358 | } |
1313 | 1359 | ||
1314 | int ocfs2_data_lock_full(struct inode *inode, | 1360 | static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres, |
1315 | int write, | 1361 | int level) |
1316 | int arg_flags) | ||
1317 | { | 1362 | { |
1318 | int status = 0, level; | 1363 | int ret; |
1319 | struct ocfs2_lock_res *lockres; | 1364 | struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); |
1320 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1365 | unsigned long flags; |
1366 | struct ocfs2_mask_waiter mw; | ||
1321 | 1367 | ||
1322 | BUG_ON(!inode); | 1368 | ocfs2_init_mask_waiter(&mw); |
1323 | 1369 | ||
1324 | mlog_entry_void(); | 1370 | retry_cancel: |
1371 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
1372 | if (lockres->l_flags & OCFS2_LOCK_BUSY) { | ||
1373 | ret = ocfs2_prepare_cancel_convert(osb, lockres); | ||
1374 | if (ret) { | ||
1375 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1376 | ret = ocfs2_cancel_convert(osb, lockres); | ||
1377 | if (ret < 0) { | ||
1378 | mlog_errno(ret); | ||
1379 | goto out; | ||
1380 | } | ||
1381 | goto retry_cancel; | ||
1382 | } | ||
1383 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | ||
1384 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1325 | 1385 | ||
1326 | mlog(0, "inode %llu take %s DATA lock\n", | 1386 | ocfs2_wait_for_mask(&mw); |
1327 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 1387 | goto retry_cancel; |
1328 | write ? "EXMODE" : "PRMODE"); | 1388 | } |
1329 | 1389 | ||
1330 | /* We'll allow faking a readonly data lock for | 1390 | ret = -ERESTARTSYS; |
1331 | * rodevices. */ | 1391 | /* |
1332 | if (ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb))) { | 1392 | * We may still have gotten the lock, in which case there's no |
1333 | if (write) { | 1393 | * point to restarting the syscall. |
1334 | status = -EROFS; | 1394 | */ |
1335 | mlog_errno(status); | 1395 | if (lockres->l_level == level) |
1396 | ret = 0; | ||
1397 | |||
1398 | mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret, | ||
1399 | lockres->l_flags, lockres->l_level, lockres->l_action); | ||
1400 | |||
1401 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1402 | |||
1403 | out: | ||
1404 | return ret; | ||
1405 | } | ||
1406 | |||
1407 | /* | ||
1408 | * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of | ||
1409 | * flock() calls. The locking approach this requires is sufficiently | ||
1410 | * different from all other cluster lock types that we implement a | ||
1411 | * seperate path to the "low-level" dlm calls. In particular: | ||
1412 | * | ||
1413 | * - No optimization of lock levels is done - we take at exactly | ||
1414 | * what's been requested. | ||
1415 | * | ||
1416 | * - No lock caching is employed. We immediately downconvert to | ||
1417 | * no-lock at unlock time. This also means flock locks never go on | ||
1418 | * the blocking list). | ||
1419 | * | ||
1420 | * - Since userspace can trivially deadlock itself with flock, we make | ||
1421 | * sure to allow cancellation of a misbehaving applications flock() | ||
1422 | * request. | ||
1423 | * | ||
1424 | * - Access to any flock lockres doesn't require concurrency, so we | ||
1425 | * can simplify the code by requiring the caller to guarantee | ||
1426 | * serialization of dlmglue flock calls. | ||
1427 | */ | ||
1428 | int ocfs2_file_lock(struct file *file, int ex, int trylock) | ||
1429 | { | ||
1430 | int ret, level = ex ? LKM_EXMODE : LKM_PRMODE; | ||
1431 | unsigned int lkm_flags = trylock ? LKM_NOQUEUE : 0; | ||
1432 | unsigned long flags; | ||
1433 | struct ocfs2_file_private *fp = file->private_data; | ||
1434 | struct ocfs2_lock_res *lockres = &fp->fp_flock; | ||
1435 | struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); | ||
1436 | struct ocfs2_mask_waiter mw; | ||
1437 | |||
1438 | ocfs2_init_mask_waiter(&mw); | ||
1439 | |||
1440 | if ((lockres->l_flags & OCFS2_LOCK_BUSY) || | ||
1441 | (lockres->l_level > LKM_NLMODE)) { | ||
1442 | mlog(ML_ERROR, | ||
1443 | "File lock \"%s\" has busy or locked state: flags: 0x%lx, " | ||
1444 | "level: %u\n", lockres->l_name, lockres->l_flags, | ||
1445 | lockres->l_level); | ||
1446 | return -EINVAL; | ||
1447 | } | ||
1448 | |||
1449 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
1450 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { | ||
1451 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | ||
1452 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1453 | |||
1454 | /* | ||
1455 | * Get the lock at NLMODE to start - that way we | ||
1456 | * can cancel the upconvert request if need be. | ||
1457 | */ | ||
1458 | ret = ocfs2_lock_create(osb, lockres, LKM_NLMODE, 0); | ||
1459 | if (ret < 0) { | ||
1460 | mlog_errno(ret); | ||
1461 | goto out; | ||
1336 | } | 1462 | } |
1337 | goto out; | 1463 | |
1464 | ret = ocfs2_wait_for_mask(&mw); | ||
1465 | if (ret) { | ||
1466 | mlog_errno(ret); | ||
1467 | goto out; | ||
1468 | } | ||
1469 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
1338 | } | 1470 | } |
1339 | 1471 | ||
1340 | if (ocfs2_mount_local(osb)) | 1472 | lockres->l_action = OCFS2_AST_CONVERT; |
1341 | goto out; | 1473 | lkm_flags |= LKM_CONVERT; |
1474 | lockres->l_requested = level; | ||
1475 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | ||
1342 | 1476 | ||
1343 | lockres = &OCFS2_I(inode)->ip_data_lockres; | 1477 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); |
1478 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1344 | 1479 | ||
1345 | level = write ? LKM_EXMODE : LKM_PRMODE; | 1480 | ret = dlmlock(osb->dlm, level, &lockres->l_lksb, lkm_flags, |
1481 | lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1, | ||
1482 | ocfs2_locking_ast, lockres, ocfs2_blocking_ast); | ||
1483 | if (ret != DLM_NORMAL) { | ||
1484 | if (trylock && ret == DLM_NOTQUEUED) | ||
1485 | ret = -EAGAIN; | ||
1486 | else { | ||
1487 | ocfs2_log_dlm_error("dlmlock", ret, lockres); | ||
1488 | ret = -EINVAL; | ||
1489 | } | ||
1346 | 1490 | ||
1347 | status = ocfs2_cluster_lock(OCFS2_SB(inode->i_sb), lockres, level, | 1491 | ocfs2_recover_from_dlm_error(lockres, 1); |
1348 | 0, arg_flags); | 1492 | lockres_remove_mask_waiter(lockres, &mw); |
1349 | if (status < 0 && status != -EAGAIN) | 1493 | goto out; |
1350 | mlog_errno(status); | 1494 | } |
1495 | |||
1496 | ret = ocfs2_wait_for_mask_interruptible(&mw, lockres); | ||
1497 | if (ret == -ERESTARTSYS) { | ||
1498 | /* | ||
1499 | * Userspace can cause deadlock itself with | ||
1500 | * flock(). Current behavior locally is to allow the | ||
1501 | * deadlock, but abort the system call if a signal is | ||
1502 | * received. We follow this example, otherwise a | ||
1503 | * poorly written program could sit in kernel until | ||
1504 | * reboot. | ||
1505 | * | ||
1506 | * Handling this is a bit more complicated for Ocfs2 | ||
1507 | * though. We can't exit this function with an | ||
1508 | * outstanding lock request, so a cancel convert is | ||
1509 | * required. We intentionally overwrite 'ret' - if the | ||
1510 | * cancel fails and the lock was granted, it's easier | ||
1511 | * to just bubble sucess back up to the user. | ||
1512 | */ | ||
1513 | ret = ocfs2_flock_handle_signal(lockres, level); | ||
1514 | } | ||
1351 | 1515 | ||
1352 | out: | 1516 | out: |
1353 | mlog_exit(status); | 1517 | |
1354 | return status; | 1518 | mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n", |
1519 | lockres->l_name, ex, trylock, ret); | ||
1520 | return ret; | ||
1355 | } | 1521 | } |
1356 | 1522 | ||
1357 | /* see ocfs2_meta_lock_with_page() */ | 1523 | void ocfs2_file_unlock(struct file *file) |
1358 | int ocfs2_data_lock_with_page(struct inode *inode, | ||
1359 | int write, | ||
1360 | struct page *page) | ||
1361 | { | 1524 | { |
1362 | int ret; | 1525 | int ret; |
1526 | unsigned long flags; | ||
1527 | struct ocfs2_file_private *fp = file->private_data; | ||
1528 | struct ocfs2_lock_res *lockres = &fp->fp_flock; | ||
1529 | struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); | ||
1530 | struct ocfs2_mask_waiter mw; | ||
1363 | 1531 | ||
1364 | ret = ocfs2_data_lock_full(inode, write, OCFS2_LOCK_NONBLOCK); | 1532 | ocfs2_init_mask_waiter(&mw); |
1365 | if (ret == -EAGAIN) { | 1533 | |
1366 | unlock_page(page); | 1534 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) |
1367 | if (ocfs2_data_lock(inode, write) == 0) | 1535 | return; |
1368 | ocfs2_data_unlock(inode, write); | 1536 | |
1369 | ret = AOP_TRUNCATED_PAGE; | 1537 | if (lockres->l_level == LKM_NLMODE) |
1538 | return; | ||
1539 | |||
1540 | mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", | ||
1541 | lockres->l_name, lockres->l_flags, lockres->l_level, | ||
1542 | lockres->l_action); | ||
1543 | |||
1544 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
1545 | /* | ||
1546 | * Fake a blocking ast for the downconvert code. | ||
1547 | */ | ||
1548 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); | ||
1549 | lockres->l_blocking = LKM_EXMODE; | ||
1550 | |||
1551 | ocfs2_prepare_downconvert(lockres, LKM_NLMODE); | ||
1552 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | ||
1553 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1554 | |||
1555 | ret = ocfs2_downconvert_lock(osb, lockres, LKM_NLMODE, 0); | ||
1556 | if (ret) { | ||
1557 | mlog_errno(ret); | ||
1558 | return; | ||
1370 | } | 1559 | } |
1371 | 1560 | ||
1372 | return ret; | 1561 | ret = ocfs2_wait_for_mask(&mw); |
1562 | if (ret) | ||
1563 | mlog_errno(ret); | ||
1373 | } | 1564 | } |
1374 | 1565 | ||
1375 | static void ocfs2_vote_on_unlock(struct ocfs2_super *osb, | 1566 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, |
1376 | struct ocfs2_lock_res *lockres) | 1567 | struct ocfs2_lock_res *lockres) |
1377 | { | 1568 | { |
1378 | int kick = 0; | 1569 | int kick = 0; |
1379 | 1570 | ||
1380 | mlog_entry_void(); | 1571 | mlog_entry_void(); |
1381 | 1572 | ||
1382 | /* If we know that another node is waiting on our lock, kick | 1573 | /* If we know that another node is waiting on our lock, kick |
1383 | * the vote thread * pre-emptively when we reach a release | 1574 | * the downconvert thread * pre-emptively when we reach a release |
1384 | * condition. */ | 1575 | * condition. */ |
1385 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { | 1576 | if (lockres->l_flags & OCFS2_LOCK_BLOCKED) { |
1386 | switch(lockres->l_blocking) { | 1577 | switch(lockres->l_blocking) { |
@@ -1398,27 +1589,7 @@ static void ocfs2_vote_on_unlock(struct ocfs2_super *osb, | |||
1398 | } | 1589 | } |
1399 | 1590 | ||
1400 | if (kick) | 1591 | if (kick) |
1401 | ocfs2_kick_vote_thread(osb); | 1592 | ocfs2_wake_downconvert_thread(osb); |
1402 | |||
1403 | mlog_exit_void(); | ||
1404 | } | ||
1405 | |||
1406 | void ocfs2_data_unlock(struct inode *inode, | ||
1407 | int write) | ||
1408 | { | ||
1409 | int level = write ? LKM_EXMODE : LKM_PRMODE; | ||
1410 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_data_lockres; | ||
1411 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1412 | |||
1413 | mlog_entry_void(); | ||
1414 | |||
1415 | mlog(0, "inode %llu drop %s DATA lock\n", | ||
1416 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
1417 | write ? "EXMODE" : "PRMODE"); | ||
1418 | |||
1419 | if (!ocfs2_is_hard_readonly(OCFS2_SB(inode->i_sb)) && | ||
1420 | !ocfs2_mount_local(osb)) | ||
1421 | ocfs2_cluster_unlock(OCFS2_SB(inode->i_sb), lockres, level); | ||
1422 | 1593 | ||
1423 | mlog_exit_void(); | 1594 | mlog_exit_void(); |
1424 | } | 1595 | } |
@@ -1442,11 +1613,11 @@ static u64 ocfs2_pack_timespec(struct timespec *spec) | |||
1442 | 1613 | ||
1443 | /* Call this with the lockres locked. I am reasonably sure we don't | 1614 | /* Call this with the lockres locked. I am reasonably sure we don't |
1444 | * need ip_lock in this function as anyone who would be changing those | 1615 | * need ip_lock in this function as anyone who would be changing those |
1445 | * values is supposed to be blocked in ocfs2_meta_lock right now. */ | 1616 | * values is supposed to be blocked in ocfs2_inode_lock right now. */ |
1446 | static void __ocfs2_stuff_meta_lvb(struct inode *inode) | 1617 | static void __ocfs2_stuff_meta_lvb(struct inode *inode) |
1447 | { | 1618 | { |
1448 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 1619 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
1449 | struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres; | 1620 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; |
1450 | struct ocfs2_meta_lvb *lvb; | 1621 | struct ocfs2_meta_lvb *lvb; |
1451 | 1622 | ||
1452 | mlog_entry_void(); | 1623 | mlog_entry_void(); |
@@ -1496,7 +1667,7 @@ static void ocfs2_unpack_timespec(struct timespec *spec, | |||
1496 | static void ocfs2_refresh_inode_from_lvb(struct inode *inode) | 1667 | static void ocfs2_refresh_inode_from_lvb(struct inode *inode) |
1497 | { | 1668 | { |
1498 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 1669 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
1499 | struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres; | 1670 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; |
1500 | struct ocfs2_meta_lvb *lvb; | 1671 | struct ocfs2_meta_lvb *lvb; |
1501 | 1672 | ||
1502 | mlog_entry_void(); | 1673 | mlog_entry_void(); |
@@ -1604,12 +1775,12 @@ static inline void ocfs2_complete_lock_res_refresh(struct ocfs2_lock_res *lockre | |||
1604 | } | 1775 | } |
1605 | 1776 | ||
1606 | /* may or may not return a bh if it went to disk. */ | 1777 | /* may or may not return a bh if it went to disk. */ |
1607 | static int ocfs2_meta_lock_update(struct inode *inode, | 1778 | static int ocfs2_inode_lock_update(struct inode *inode, |
1608 | struct buffer_head **bh) | 1779 | struct buffer_head **bh) |
1609 | { | 1780 | { |
1610 | int status = 0; | 1781 | int status = 0; |
1611 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 1782 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
1612 | struct ocfs2_lock_res *lockres = &oi->ip_meta_lockres; | 1783 | struct ocfs2_lock_res *lockres = &oi->ip_inode_lockres; |
1613 | struct ocfs2_dinode *fe; | 1784 | struct ocfs2_dinode *fe; |
1614 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1785 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1615 | 1786 | ||
@@ -1721,7 +1892,7 @@ static int ocfs2_assign_bh(struct inode *inode, | |||
1721 | * returns < 0 error if the callback will never be called, otherwise | 1892 | * returns < 0 error if the callback will never be called, otherwise |
1722 | * the result of the lock will be communicated via the callback. | 1893 | * the result of the lock will be communicated via the callback. |
1723 | */ | 1894 | */ |
1724 | int ocfs2_meta_lock_full(struct inode *inode, | 1895 | int ocfs2_inode_lock_full(struct inode *inode, |
1725 | struct buffer_head **ret_bh, | 1896 | struct buffer_head **ret_bh, |
1726 | int ex, | 1897 | int ex, |
1727 | int arg_flags) | 1898 | int arg_flags) |
@@ -1756,7 +1927,7 @@ int ocfs2_meta_lock_full(struct inode *inode, | |||
1756 | wait_event(osb->recovery_event, | 1927 | wait_event(osb->recovery_event, |
1757 | ocfs2_node_map_is_empty(osb, &osb->recovery_map)); | 1928 | ocfs2_node_map_is_empty(osb, &osb->recovery_map)); |
1758 | 1929 | ||
1759 | lockres = &OCFS2_I(inode)->ip_meta_lockres; | 1930 | lockres = &OCFS2_I(inode)->ip_inode_lockres; |
1760 | level = ex ? LKM_EXMODE : LKM_PRMODE; | 1931 | level = ex ? LKM_EXMODE : LKM_PRMODE; |
1761 | dlm_flags = 0; | 1932 | dlm_flags = 0; |
1762 | if (arg_flags & OCFS2_META_LOCK_NOQUEUE) | 1933 | if (arg_flags & OCFS2_META_LOCK_NOQUEUE) |
@@ -1795,11 +1966,11 @@ local: | |||
1795 | } | 1966 | } |
1796 | 1967 | ||
1797 | /* This is fun. The caller may want a bh back, or it may | 1968 | /* This is fun. The caller may want a bh back, or it may |
1798 | * not. ocfs2_meta_lock_update definitely wants one in, but | 1969 | * not. ocfs2_inode_lock_update definitely wants one in, but |
1799 | * may or may not read one, depending on what's in the | 1970 | * may or may not read one, depending on what's in the |
1800 | * LVB. The result of all of this is that we've *only* gone to | 1971 | * LVB. The result of all of this is that we've *only* gone to |
1801 | * disk if we have to, so the complexity is worthwhile. */ | 1972 | * disk if we have to, so the complexity is worthwhile. */ |
1802 | status = ocfs2_meta_lock_update(inode, &local_bh); | 1973 | status = ocfs2_inode_lock_update(inode, &local_bh); |
1803 | if (status < 0) { | 1974 | if (status < 0) { |
1804 | if (status != -ENOENT) | 1975 | if (status != -ENOENT) |
1805 | mlog_errno(status); | 1976 | mlog_errno(status); |
@@ -1821,7 +1992,7 @@ bail: | |||
1821 | *ret_bh = NULL; | 1992 | *ret_bh = NULL; |
1822 | } | 1993 | } |
1823 | if (acquired) | 1994 | if (acquired) |
1824 | ocfs2_meta_unlock(inode, ex); | 1995 | ocfs2_inode_unlock(inode, ex); |
1825 | } | 1996 | } |
1826 | 1997 | ||
1827 | if (local_bh) | 1998 | if (local_bh) |
@@ -1832,19 +2003,20 @@ bail: | |||
1832 | } | 2003 | } |
1833 | 2004 | ||
1834 | /* | 2005 | /* |
1835 | * This is working around a lock inversion between tasks acquiring DLM locks | 2006 | * This is working around a lock inversion between tasks acquiring DLM |
1836 | * while holding a page lock and the vote thread which blocks dlm lock acquiry | 2007 | * locks while holding a page lock and the downconvert thread which |
1837 | * while acquiring page locks. | 2008 | * blocks dlm lock acquiry while acquiring page locks. |
1838 | * | 2009 | * |
1839 | * ** These _with_page variantes are only intended to be called from aop | 2010 | * ** These _with_page variantes are only intended to be called from aop |
1840 | * methods that hold page locks and return a very specific *positive* error | 2011 | * methods that hold page locks and return a very specific *positive* error |
1841 | * code that aop methods pass up to the VFS -- test for errors with != 0. ** | 2012 | * code that aop methods pass up to the VFS -- test for errors with != 0. ** |
1842 | * | 2013 | * |
1843 | * The DLM is called such that it returns -EAGAIN if it would have blocked | 2014 | * The DLM is called such that it returns -EAGAIN if it would have |
1844 | * waiting for the vote thread. In that case we unlock our page so the vote | 2015 | * blocked waiting for the downconvert thread. In that case we unlock |
1845 | * thread can make progress. Once we've done this we have to return | 2016 | * our page so the downconvert thread can make progress. Once we've |
1846 | * AOP_TRUNCATED_PAGE so the aop method that called us can bubble that back up | 2017 | * done this we have to return AOP_TRUNCATED_PAGE so the aop method |
1847 | * into the VFS who will then immediately retry the aop call. | 2018 | * that called us can bubble that back up into the VFS who will then |
2019 | * immediately retry the aop call. | ||
1848 | * | 2020 | * |
1849 | * We do a blocking lock and immediate unlock before returning, though, so that | 2021 | * We do a blocking lock and immediate unlock before returning, though, so that |
1850 | * the lock has a great chance of being cached on this node by the time the VFS | 2022 | * the lock has a great chance of being cached on this node by the time the VFS |
@@ -1852,32 +2024,32 @@ bail: | |||
1852 | * ping locks back and forth, but that's a risk we're willing to take to avoid | 2024 | * ping locks back and forth, but that's a risk we're willing to take to avoid |
1853 | * the lock inversion simply. | 2025 | * the lock inversion simply. |
1854 | */ | 2026 | */ |
1855 | int ocfs2_meta_lock_with_page(struct inode *inode, | 2027 | int ocfs2_inode_lock_with_page(struct inode *inode, |
1856 | struct buffer_head **ret_bh, | 2028 | struct buffer_head **ret_bh, |
1857 | int ex, | 2029 | int ex, |
1858 | struct page *page) | 2030 | struct page *page) |
1859 | { | 2031 | { |
1860 | int ret; | 2032 | int ret; |
1861 | 2033 | ||
1862 | ret = ocfs2_meta_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); | 2034 | ret = ocfs2_inode_lock_full(inode, ret_bh, ex, OCFS2_LOCK_NONBLOCK); |
1863 | if (ret == -EAGAIN) { | 2035 | if (ret == -EAGAIN) { |
1864 | unlock_page(page); | 2036 | unlock_page(page); |
1865 | if (ocfs2_meta_lock(inode, ret_bh, ex) == 0) | 2037 | if (ocfs2_inode_lock(inode, ret_bh, ex) == 0) |
1866 | ocfs2_meta_unlock(inode, ex); | 2038 | ocfs2_inode_unlock(inode, ex); |
1867 | ret = AOP_TRUNCATED_PAGE; | 2039 | ret = AOP_TRUNCATED_PAGE; |
1868 | } | 2040 | } |
1869 | 2041 | ||
1870 | return ret; | 2042 | return ret; |
1871 | } | 2043 | } |
1872 | 2044 | ||
1873 | int ocfs2_meta_lock_atime(struct inode *inode, | 2045 | int ocfs2_inode_lock_atime(struct inode *inode, |
1874 | struct vfsmount *vfsmnt, | 2046 | struct vfsmount *vfsmnt, |
1875 | int *level) | 2047 | int *level) |
1876 | { | 2048 | { |
1877 | int ret; | 2049 | int ret; |
1878 | 2050 | ||
1879 | mlog_entry_void(); | 2051 | mlog_entry_void(); |
1880 | ret = ocfs2_meta_lock(inode, NULL, 0); | 2052 | ret = ocfs2_inode_lock(inode, NULL, 0); |
1881 | if (ret < 0) { | 2053 | if (ret < 0) { |
1882 | mlog_errno(ret); | 2054 | mlog_errno(ret); |
1883 | return ret; | 2055 | return ret; |
@@ -1890,8 +2062,8 @@ int ocfs2_meta_lock_atime(struct inode *inode, | |||
1890 | if (ocfs2_should_update_atime(inode, vfsmnt)) { | 2062 | if (ocfs2_should_update_atime(inode, vfsmnt)) { |
1891 | struct buffer_head *bh = NULL; | 2063 | struct buffer_head *bh = NULL; |
1892 | 2064 | ||
1893 | ocfs2_meta_unlock(inode, 0); | 2065 | ocfs2_inode_unlock(inode, 0); |
1894 | ret = ocfs2_meta_lock(inode, &bh, 1); | 2066 | ret = ocfs2_inode_lock(inode, &bh, 1); |
1895 | if (ret < 0) { | 2067 | if (ret < 0) { |
1896 | mlog_errno(ret); | 2068 | mlog_errno(ret); |
1897 | return ret; | 2069 | return ret; |
@@ -1908,11 +2080,11 @@ int ocfs2_meta_lock_atime(struct inode *inode, | |||
1908 | return ret; | 2080 | return ret; |
1909 | } | 2081 | } |
1910 | 2082 | ||
1911 | void ocfs2_meta_unlock(struct inode *inode, | 2083 | void ocfs2_inode_unlock(struct inode *inode, |
1912 | int ex) | 2084 | int ex) |
1913 | { | 2085 | { |
1914 | int level = ex ? LKM_EXMODE : LKM_PRMODE; | 2086 | int level = ex ? LKM_EXMODE : LKM_PRMODE; |
1915 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_meta_lockres; | 2087 | struct ocfs2_lock_res *lockres = &OCFS2_I(inode)->ip_inode_lockres; |
1916 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 2088 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1917 | 2089 | ||
1918 | mlog_entry_void(); | 2090 | mlog_entry_void(); |
@@ -2320,11 +2492,11 @@ int ocfs2_dlm_init(struct ocfs2_super *osb) | |||
2320 | goto bail; | 2492 | goto bail; |
2321 | } | 2493 | } |
2322 | 2494 | ||
2323 | /* launch vote thread */ | 2495 | /* launch downconvert thread */ |
2324 | osb->vote_task = kthread_run(ocfs2_vote_thread, osb, "ocfs2vote"); | 2496 | osb->dc_task = kthread_run(ocfs2_downconvert_thread, osb, "ocfs2dc"); |
2325 | if (IS_ERR(osb->vote_task)) { | 2497 | if (IS_ERR(osb->dc_task)) { |
2326 | status = PTR_ERR(osb->vote_task); | 2498 | status = PTR_ERR(osb->dc_task); |
2327 | osb->vote_task = NULL; | 2499 | osb->dc_task = NULL; |
2328 | mlog_errno(status); | 2500 | mlog_errno(status); |
2329 | goto bail; | 2501 | goto bail; |
2330 | } | 2502 | } |
@@ -2353,8 +2525,8 @@ local: | |||
2353 | bail: | 2525 | bail: |
2354 | if (status < 0) { | 2526 | if (status < 0) { |
2355 | ocfs2_dlm_shutdown_debug(osb); | 2527 | ocfs2_dlm_shutdown_debug(osb); |
2356 | if (osb->vote_task) | 2528 | if (osb->dc_task) |
2357 | kthread_stop(osb->vote_task); | 2529 | kthread_stop(osb->dc_task); |
2358 | } | 2530 | } |
2359 | 2531 | ||
2360 | mlog_exit(status); | 2532 | mlog_exit(status); |
@@ -2369,9 +2541,9 @@ void ocfs2_dlm_shutdown(struct ocfs2_super *osb) | |||
2369 | 2541 | ||
2370 | ocfs2_drop_osb_locks(osb); | 2542 | ocfs2_drop_osb_locks(osb); |
2371 | 2543 | ||
2372 | if (osb->vote_task) { | 2544 | if (osb->dc_task) { |
2373 | kthread_stop(osb->vote_task); | 2545 | kthread_stop(osb->dc_task); |
2374 | osb->vote_task = NULL; | 2546 | osb->dc_task = NULL; |
2375 | } | 2547 | } |
2376 | 2548 | ||
2377 | ocfs2_lock_res_free(&osb->osb_super_lockres); | 2549 | ocfs2_lock_res_free(&osb->osb_super_lockres); |
@@ -2527,7 +2699,7 @@ out: | |||
2527 | 2699 | ||
2528 | /* Mark the lockres as being dropped. It will no longer be | 2700 | /* Mark the lockres as being dropped. It will no longer be |
2529 | * queued if blocking, but we still may have to wait on it | 2701 | * queued if blocking, but we still may have to wait on it |
2530 | * being dequeued from the vote thread before we can consider | 2702 | * being dequeued from the downconvert thread before we can consider |
2531 | * it safe to drop. | 2703 | * it safe to drop. |
2532 | * | 2704 | * |
2533 | * You can *not* attempt to call cluster_lock on this lockres anymore. */ | 2705 | * You can *not* attempt to call cluster_lock on this lockres anymore. */ |
@@ -2590,14 +2762,7 @@ int ocfs2_drop_inode_locks(struct inode *inode) | |||
2590 | status = err; | 2762 | status = err; |
2591 | 2763 | ||
2592 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | 2764 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), |
2593 | &OCFS2_I(inode)->ip_data_lockres); | 2765 | &OCFS2_I(inode)->ip_inode_lockres); |
2594 | if (err < 0) | ||
2595 | mlog_errno(err); | ||
2596 | if (err < 0 && !status) | ||
2597 | status = err; | ||
2598 | |||
2599 | err = ocfs2_drop_lock(OCFS2_SB(inode->i_sb), | ||
2600 | &OCFS2_I(inode)->ip_meta_lockres); | ||
2601 | if (err < 0) | 2766 | if (err < 0) |
2602 | mlog_errno(err); | 2767 | mlog_errno(err); |
2603 | if (err < 0 && !status) | 2768 | if (err < 0 && !status) |
@@ -2850,6 +3015,9 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | |||
2850 | inode = ocfs2_lock_res_inode(lockres); | 3015 | inode = ocfs2_lock_res_inode(lockres); |
2851 | mapping = inode->i_mapping; | 3016 | mapping = inode->i_mapping; |
2852 | 3017 | ||
3018 | if (S_ISREG(inode->i_mode)) | ||
3019 | goto out; | ||
3020 | |||
2853 | /* | 3021 | /* |
2854 | * We need this before the filemap_fdatawrite() so that it can | 3022 | * We need this before the filemap_fdatawrite() so that it can |
2855 | * transfer the dirty bit from the PTE to the | 3023 | * transfer the dirty bit from the PTE to the |
@@ -2875,6 +3043,7 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | |||
2875 | filemap_fdatawait(mapping); | 3043 | filemap_fdatawait(mapping); |
2876 | } | 3044 | } |
2877 | 3045 | ||
3046 | out: | ||
2878 | return UNBLOCK_CONTINUE; | 3047 | return UNBLOCK_CONTINUE; |
2879 | } | 3048 | } |
2880 | 3049 | ||
@@ -2903,7 +3072,7 @@ static void ocfs2_set_meta_lvb(struct ocfs2_lock_res *lockres) | |||
2903 | 3072 | ||
2904 | /* | 3073 | /* |
2905 | * Does the final reference drop on our dentry lock. Right now this | 3074 | * Does the final reference drop on our dentry lock. Right now this |
2906 | * happens in the vote thread, but we could choose to simplify the | 3075 | * happens in the downconvert thread, but we could choose to simplify the |
2907 | * dlmglue API and push these off to the ocfs2_wq in the future. | 3076 | * dlmglue API and push these off to the ocfs2_wq in the future. |
2908 | */ | 3077 | */ |
2909 | static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, | 3078 | static void ocfs2_dentry_post_unlock(struct ocfs2_super *osb, |
@@ -3042,7 +3211,7 @@ void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | |||
3042 | mlog(0, "lockres %s blocked.\n", lockres->l_name); | 3211 | mlog(0, "lockres %s blocked.\n", lockres->l_name); |
3043 | 3212 | ||
3044 | /* Detect whether a lock has been marked as going away while | 3213 | /* Detect whether a lock has been marked as going away while |
3045 | * the vote thread was processing other things. A lock can | 3214 | * the downconvert thread was processing other things. A lock can |
3046 | * still be marked with OCFS2_LOCK_FREEING after this check, | 3215 | * still be marked with OCFS2_LOCK_FREEING after this check, |
3047 | * but short circuiting here will still save us some | 3216 | * but short circuiting here will still save us some |
3048 | * performance. */ | 3217 | * performance. */ |
@@ -3091,13 +3260,104 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb, | |||
3091 | 3260 | ||
3092 | lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); | 3261 | lockres_or_flags(lockres, OCFS2_LOCK_QUEUED); |
3093 | 3262 | ||
3094 | spin_lock(&osb->vote_task_lock); | 3263 | spin_lock(&osb->dc_task_lock); |
3095 | if (list_empty(&lockres->l_blocked_list)) { | 3264 | if (list_empty(&lockres->l_blocked_list)) { |
3096 | list_add_tail(&lockres->l_blocked_list, | 3265 | list_add_tail(&lockres->l_blocked_list, |
3097 | &osb->blocked_lock_list); | 3266 | &osb->blocked_lock_list); |
3098 | osb->blocked_lock_count++; | 3267 | osb->blocked_lock_count++; |
3099 | } | 3268 | } |
3100 | spin_unlock(&osb->vote_task_lock); | 3269 | spin_unlock(&osb->dc_task_lock); |
3270 | |||
3271 | mlog_exit_void(); | ||
3272 | } | ||
3273 | |||
3274 | static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb) | ||
3275 | { | ||
3276 | unsigned long processed; | ||
3277 | struct ocfs2_lock_res *lockres; | ||
3278 | |||
3279 | mlog_entry_void(); | ||
3280 | |||
3281 | spin_lock(&osb->dc_task_lock); | ||
3282 | /* grab this early so we know to try again if a state change and | ||
3283 | * wake happens part-way through our work */ | ||
3284 | osb->dc_work_sequence = osb->dc_wake_sequence; | ||
3285 | |||
3286 | processed = osb->blocked_lock_count; | ||
3287 | while (processed) { | ||
3288 | BUG_ON(list_empty(&osb->blocked_lock_list)); | ||
3289 | |||
3290 | lockres = list_entry(osb->blocked_lock_list.next, | ||
3291 | struct ocfs2_lock_res, l_blocked_list); | ||
3292 | list_del_init(&lockres->l_blocked_list); | ||
3293 | osb->blocked_lock_count--; | ||
3294 | spin_unlock(&osb->dc_task_lock); | ||
3295 | |||
3296 | BUG_ON(!processed); | ||
3297 | processed--; | ||
3298 | |||
3299 | ocfs2_process_blocked_lock(osb, lockres); | ||
3300 | |||
3301 | spin_lock(&osb->dc_task_lock); | ||
3302 | } | ||
3303 | spin_unlock(&osb->dc_task_lock); | ||
3101 | 3304 | ||
3102 | mlog_exit_void(); | 3305 | mlog_exit_void(); |
3103 | } | 3306 | } |
3307 | |||
3308 | static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb) | ||
3309 | { | ||
3310 | int empty = 0; | ||
3311 | |||
3312 | spin_lock(&osb->dc_task_lock); | ||
3313 | if (list_empty(&osb->blocked_lock_list)) | ||
3314 | empty = 1; | ||
3315 | |||
3316 | spin_unlock(&osb->dc_task_lock); | ||
3317 | return empty; | ||
3318 | } | ||
3319 | |||
3320 | static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb) | ||
3321 | { | ||
3322 | int should_wake = 0; | ||
3323 | |||
3324 | spin_lock(&osb->dc_task_lock); | ||
3325 | if (osb->dc_work_sequence != osb->dc_wake_sequence) | ||
3326 | should_wake = 1; | ||
3327 | spin_unlock(&osb->dc_task_lock); | ||
3328 | |||
3329 | return should_wake; | ||
3330 | } | ||
3331 | |||
3332 | int ocfs2_downconvert_thread(void *arg) | ||
3333 | { | ||
3334 | int status = 0; | ||
3335 | struct ocfs2_super *osb = arg; | ||
3336 | |||
3337 | /* only quit once we've been asked to stop and there is no more | ||
3338 | * work available */ | ||
3339 | while (!(kthread_should_stop() && | ||
3340 | ocfs2_downconvert_thread_lists_empty(osb))) { | ||
3341 | |||
3342 | wait_event_interruptible(osb->dc_event, | ||
3343 | ocfs2_downconvert_thread_should_wake(osb) || | ||
3344 | kthread_should_stop()); | ||
3345 | |||
3346 | mlog(0, "downconvert_thread: awoken\n"); | ||
3347 | |||
3348 | ocfs2_downconvert_thread_do_work(osb); | ||
3349 | } | ||
3350 | |||
3351 | osb->dc_task = NULL; | ||
3352 | return status; | ||
3353 | } | ||
3354 | |||
3355 | void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb) | ||
3356 | { | ||
3357 | spin_lock(&osb->dc_task_lock); | ||
3358 | /* make sure the voting thread gets a swipe at whatever changes | ||
3359 | * the caller may have made to the voting state */ | ||
3360 | osb->dc_wake_sequence++; | ||
3361 | spin_unlock(&osb->dc_task_lock); | ||
3362 | wake_up(&osb->dc_event); | ||
3363 | } | ||