diff options
Diffstat (limited to 'fs/ocfs2/dlmglue.c')
-rw-r--r-- | fs/ocfs2/dlmglue.c | 267 |
1 files changed, 267 insertions, 0 deletions
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index fa5e3bdc295..3867244fb14 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -68,6 +68,7 @@ struct ocfs2_mask_waiter { | |||
68 | 68 | ||
69 | static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); | 69 | static struct ocfs2_super *ocfs2_get_dentry_osb(struct ocfs2_lock_res *lockres); |
70 | static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); | 70 | static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres); |
71 | static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres); | ||
71 | 72 | ||
72 | /* | 73 | /* |
73 | * Return value from ->downconvert_worker functions. | 74 | * Return value from ->downconvert_worker functions. |
@@ -252,6 +253,11 @@ static struct ocfs2_lock_res_ops ocfs2_inode_open_lops = { | |||
252 | .flags = 0, | 253 | .flags = 0, |
253 | }; | 254 | }; |
254 | 255 | ||
256 | static struct ocfs2_lock_res_ops ocfs2_flock_lops = { | ||
257 | .get_osb = ocfs2_get_file_osb, | ||
258 | .flags = 0, | ||
259 | }; | ||
260 | |||
255 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) | 261 | static inline int ocfs2_is_inode_lock(struct ocfs2_lock_res *lockres) |
256 | { | 262 | { |
257 | return lockres->l_type == OCFS2_LOCK_TYPE_META || | 263 | return lockres->l_type == OCFS2_LOCK_TYPE_META || |
@@ -310,6 +316,17 @@ static int ocfs2_inode_lock_update(struct inode *inode, | |||
310 | struct buffer_head **bh); | 316 | struct buffer_head **bh); |
311 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); | 317 | static void ocfs2_drop_osb_locks(struct ocfs2_super *osb); |
312 | static inline int ocfs2_highest_compat_lock_level(int level); | 318 | static inline int ocfs2_highest_compat_lock_level(int level); |
319 | static void ocfs2_prepare_downconvert(struct ocfs2_lock_res *lockres, | ||
320 | int new_level); | ||
321 | static int ocfs2_downconvert_lock(struct ocfs2_super *osb, | ||
322 | struct ocfs2_lock_res *lockres, | ||
323 | int new_level, | ||
324 | int lvb); | ||
325 | static int ocfs2_prepare_cancel_convert(struct ocfs2_super *osb, | ||
326 | struct ocfs2_lock_res *lockres); | ||
327 | static int ocfs2_cancel_convert(struct ocfs2_super *osb, | ||
328 | struct ocfs2_lock_res *lockres); | ||
329 | |||
313 | 330 | ||
314 | static void ocfs2_build_lock_name(enum ocfs2_lock_type type, | 331 | static void ocfs2_build_lock_name(enum ocfs2_lock_type type, |
315 | u64 blkno, | 332 | u64 blkno, |
@@ -419,6 +436,13 @@ static struct ocfs2_super *ocfs2_get_inode_osb(struct ocfs2_lock_res *lockres) | |||
419 | return OCFS2_SB(inode->i_sb); | 436 | return OCFS2_SB(inode->i_sb); |
420 | } | 437 | } |
421 | 438 | ||
439 | static struct ocfs2_super *ocfs2_get_file_osb(struct ocfs2_lock_res *lockres) | ||
440 | { | ||
441 | struct ocfs2_file_private *fp = lockres->l_priv; | ||
442 | |||
443 | return OCFS2_SB(fp->fp_file->f_mapping->host->i_sb); | ||
444 | } | ||
445 | |||
422 | static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) | 446 | static __u64 ocfs2_get_dentry_lock_ino(struct ocfs2_lock_res *lockres) |
423 | { | 447 | { |
424 | __be64 inode_blkno_be; | 448 | __be64 inode_blkno_be; |
@@ -499,6 +523,21 @@ static void ocfs2_rename_lock_res_init(struct ocfs2_lock_res *res, | |||
499 | &ocfs2_rename_lops, osb); | 523 | &ocfs2_rename_lops, osb); |
500 | } | 524 | } |
501 | 525 | ||
526 | void ocfs2_file_lock_res_init(struct ocfs2_lock_res *lockres, | ||
527 | struct ocfs2_file_private *fp) | ||
528 | { | ||
529 | struct inode *inode = fp->fp_file->f_mapping->host; | ||
530 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
531 | |||
532 | ocfs2_lock_res_init_once(lockres); | ||
533 | ocfs2_build_lock_name(OCFS2_LOCK_TYPE_FLOCK, oi->ip_blkno, | ||
534 | inode->i_generation, lockres->l_name); | ||
535 | ocfs2_lock_res_init_common(OCFS2_SB(inode->i_sb), lockres, | ||
536 | OCFS2_LOCK_TYPE_FLOCK, &ocfs2_flock_lops, | ||
537 | fp); | ||
538 | lockres->l_flags |= OCFS2_LOCK_NOCACHE; | ||
539 | } | ||
540 | |||
502 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res) | 541 | void ocfs2_lock_res_free(struct ocfs2_lock_res *res) |
503 | { | 542 | { |
504 | mlog_entry_void(); | 543 | mlog_entry_void(); |
@@ -715,6 +754,13 @@ static void ocfs2_blocking_ast(void *opaque, int level) | |||
715 | lockres->l_name, level, lockres->l_level, | 754 | lockres->l_name, level, lockres->l_level, |
716 | ocfs2_lock_type_string(lockres->l_type)); | 755 | ocfs2_lock_type_string(lockres->l_type)); |
717 | 756 | ||
757 | /* | ||
758 | * We can skip the bast for locks which don't enable caching - | ||
759 | * they'll be dropped at the earliest possible time anyway. | ||
760 | */ | ||
761 | if (lockres->l_flags & OCFS2_LOCK_NOCACHE) | ||
762 | return; | ||
763 | |||
718 | spin_lock_irqsave(&lockres->l_lock, flags); | 764 | spin_lock_irqsave(&lockres->l_lock, flags); |
719 | needs_downconvert = ocfs2_generic_handle_bast(lockres, level); | 765 | needs_downconvert = ocfs2_generic_handle_bast(lockres, level); |
720 | if (needs_downconvert) | 766 | if (needs_downconvert) |
@@ -926,6 +972,21 @@ static int lockres_remove_mask_waiter(struct ocfs2_lock_res *lockres, | |||
926 | 972 | ||
927 | } | 973 | } |
928 | 974 | ||
975 | static int ocfs2_wait_for_mask_interruptible(struct ocfs2_mask_waiter *mw, | ||
976 | struct ocfs2_lock_res *lockres) | ||
977 | { | ||
978 | int ret; | ||
979 | |||
980 | ret = wait_for_completion_interruptible(&mw->mw_complete); | ||
981 | if (ret) | ||
982 | lockres_remove_mask_waiter(lockres, mw); | ||
983 | else | ||
984 | ret = mw->mw_status; | ||
985 | /* Re-arm the completion in case we want to wait on it again */ | ||
986 | INIT_COMPLETION(mw->mw_complete); | ||
987 | return ret; | ||
988 | } | ||
989 | |||
929 | static int ocfs2_cluster_lock(struct ocfs2_super *osb, | 990 | static int ocfs2_cluster_lock(struct ocfs2_super *osb, |
930 | struct ocfs2_lock_res *lockres, | 991 | struct ocfs2_lock_res *lockres, |
931 | int level, | 992 | int level, |
@@ -1296,6 +1357,212 @@ out: | |||
1296 | mlog_exit_void(); | 1357 | mlog_exit_void(); |
1297 | } | 1358 | } |
1298 | 1359 | ||
1360 | static int ocfs2_flock_handle_signal(struct ocfs2_lock_res *lockres, | ||
1361 | int level) | ||
1362 | { | ||
1363 | int ret; | ||
1364 | struct ocfs2_super *osb = ocfs2_get_lockres_osb(lockres); | ||
1365 | unsigned long flags; | ||
1366 | struct ocfs2_mask_waiter mw; | ||
1367 | |||
1368 | ocfs2_init_mask_waiter(&mw); | ||
1369 | |||
1370 | retry_cancel: | ||
1371 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
1372 | if (lockres->l_flags & OCFS2_LOCK_BUSY) { | ||
1373 | ret = ocfs2_prepare_cancel_convert(osb, lockres); | ||
1374 | if (ret) { | ||
1375 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1376 | ret = ocfs2_cancel_convert(osb, lockres); | ||
1377 | if (ret < 0) { | ||
1378 | mlog_errno(ret); | ||
1379 | goto out; | ||
1380 | } | ||
1381 | goto retry_cancel; | ||
1382 | } | ||
1383 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | ||
1384 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1385 | |||
1386 | ocfs2_wait_for_mask(&mw); | ||
1387 | goto retry_cancel; | ||
1388 | } | ||
1389 | |||
1390 | ret = -ERESTARTSYS; | ||
1391 | /* | ||
1392 | * We may still have gotten the lock, in which case there's no | ||
1393 | * point to restarting the syscall. | ||
1394 | */ | ||
1395 | if (lockres->l_level == level) | ||
1396 | ret = 0; | ||
1397 | |||
1398 | mlog(0, "Cancel returning %d. flags: 0x%lx, level: %d, act: %d\n", ret, | ||
1399 | lockres->l_flags, lockres->l_level, lockres->l_action); | ||
1400 | |||
1401 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1402 | |||
1403 | out: | ||
1404 | return ret; | ||
1405 | } | ||
1406 | |||
1407 | /* | ||
1408 | * ocfs2_file_lock() and ocfs2_file_unlock() map to a single pair of | ||
1409 | * flock() calls. The locking approach this requires is sufficiently | ||
1410 | * different from all other cluster lock types that we implement a | ||
1411 | * seperate path to the "low-level" dlm calls. In particular: | ||
1412 | * | ||
1413 | * - No optimization of lock levels is done - we take at exactly | ||
1414 | * what's been requested. | ||
1415 | * | ||
1416 | * - No lock caching is employed. We immediately downconvert to | ||
1417 | * no-lock at unlock time. This also means flock locks never go on | ||
1418 | * the blocking list). | ||
1419 | * | ||
1420 | * - Since userspace can trivially deadlock itself with flock, we make | ||
1421 | * sure to allow cancellation of a misbehaving applications flock() | ||
1422 | * request. | ||
1423 | * | ||
1424 | * - Access to any flock lockres doesn't require concurrency, so we | ||
1425 | * can simplify the code by requiring the caller to guarantee | ||
1426 | * serialization of dlmglue flock calls. | ||
1427 | */ | ||
1428 | int ocfs2_file_lock(struct file *file, int ex, int trylock) | ||
1429 | { | ||
1430 | int ret, level = ex ? LKM_EXMODE : LKM_PRMODE; | ||
1431 | unsigned int lkm_flags = trylock ? LKM_NOQUEUE : 0; | ||
1432 | unsigned long flags; | ||
1433 | struct ocfs2_file_private *fp = file->private_data; | ||
1434 | struct ocfs2_lock_res *lockres = &fp->fp_flock; | ||
1435 | struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); | ||
1436 | struct ocfs2_mask_waiter mw; | ||
1437 | |||
1438 | ocfs2_init_mask_waiter(&mw); | ||
1439 | |||
1440 | if ((lockres->l_flags & OCFS2_LOCK_BUSY) || | ||
1441 | (lockres->l_level > LKM_NLMODE)) { | ||
1442 | mlog(ML_ERROR, | ||
1443 | "File lock \"%s\" has busy or locked state: flags: 0x%lx, " | ||
1444 | "level: %u\n", lockres->l_name, lockres->l_flags, | ||
1445 | lockres->l_level); | ||
1446 | return -EINVAL; | ||
1447 | } | ||
1448 | |||
1449 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
1450 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) { | ||
1451 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | ||
1452 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1453 | |||
1454 | /* | ||
1455 | * Get the lock at NLMODE to start - that way we | ||
1456 | * can cancel the upconvert request if need be. | ||
1457 | */ | ||
1458 | ret = ocfs2_lock_create(osb, lockres, LKM_NLMODE, 0); | ||
1459 | if (ret < 0) { | ||
1460 | mlog_errno(ret); | ||
1461 | goto out; | ||
1462 | } | ||
1463 | |||
1464 | ret = ocfs2_wait_for_mask(&mw); | ||
1465 | if (ret) { | ||
1466 | mlog_errno(ret); | ||
1467 | goto out; | ||
1468 | } | ||
1469 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
1470 | } | ||
1471 | |||
1472 | lockres->l_action = OCFS2_AST_CONVERT; | ||
1473 | lkm_flags |= LKM_CONVERT; | ||
1474 | lockres->l_requested = level; | ||
1475 | lockres_or_flags(lockres, OCFS2_LOCK_BUSY); | ||
1476 | |||
1477 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | ||
1478 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1479 | |||
1480 | ret = dlmlock(osb->dlm, level, &lockres->l_lksb, lkm_flags, | ||
1481 | lockres->l_name, OCFS2_LOCK_ID_MAX_LEN - 1, | ||
1482 | ocfs2_locking_ast, lockres, ocfs2_blocking_ast); | ||
1483 | if (ret != DLM_NORMAL) { | ||
1484 | if (trylock && ret == DLM_NOTQUEUED) | ||
1485 | ret = -EAGAIN; | ||
1486 | else { | ||
1487 | ocfs2_log_dlm_error("dlmlock", ret, lockres); | ||
1488 | ret = -EINVAL; | ||
1489 | } | ||
1490 | |||
1491 | ocfs2_recover_from_dlm_error(lockres, 1); | ||
1492 | lockres_remove_mask_waiter(lockres, &mw); | ||
1493 | goto out; | ||
1494 | } | ||
1495 | |||
1496 | ret = ocfs2_wait_for_mask_interruptible(&mw, lockres); | ||
1497 | if (ret == -ERESTARTSYS) { | ||
1498 | /* | ||
1499 | * Userspace can cause deadlock itself with | ||
1500 | * flock(). Current behavior locally is to allow the | ||
1501 | * deadlock, but abort the system call if a signal is | ||
1502 | * received. We follow this example, otherwise a | ||
1503 | * poorly written program could sit in kernel until | ||
1504 | * reboot. | ||
1505 | * | ||
1506 | * Handling this is a bit more complicated for Ocfs2 | ||
1507 | * though. We can't exit this function with an | ||
1508 | * outstanding lock request, so a cancel convert is | ||
1509 | * required. We intentionally overwrite 'ret' - if the | ||
1510 | * cancel fails and the lock was granted, it's easier | ||
1511 | * to just bubble sucess back up to the user. | ||
1512 | */ | ||
1513 | ret = ocfs2_flock_handle_signal(lockres, level); | ||
1514 | } | ||
1515 | |||
1516 | out: | ||
1517 | |||
1518 | mlog(0, "Lock: \"%s\" ex: %d, trylock: %d, returns: %d\n", | ||
1519 | lockres->l_name, ex, trylock, ret); | ||
1520 | return ret; | ||
1521 | } | ||
1522 | |||
1523 | void ocfs2_file_unlock(struct file *file) | ||
1524 | { | ||
1525 | int ret; | ||
1526 | unsigned long flags; | ||
1527 | struct ocfs2_file_private *fp = file->private_data; | ||
1528 | struct ocfs2_lock_res *lockres = &fp->fp_flock; | ||
1529 | struct ocfs2_super *osb = OCFS2_SB(file->f_mapping->host->i_sb); | ||
1530 | struct ocfs2_mask_waiter mw; | ||
1531 | |||
1532 | ocfs2_init_mask_waiter(&mw); | ||
1533 | |||
1534 | if (!(lockres->l_flags & OCFS2_LOCK_ATTACHED)) | ||
1535 | return; | ||
1536 | |||
1537 | if (lockres->l_level == LKM_NLMODE) | ||
1538 | return; | ||
1539 | |||
1540 | mlog(0, "Unlock: \"%s\" flags: 0x%lx, level: %d, act: %d\n", | ||
1541 | lockres->l_name, lockres->l_flags, lockres->l_level, | ||
1542 | lockres->l_action); | ||
1543 | |||
1544 | spin_lock_irqsave(&lockres->l_lock, flags); | ||
1545 | /* | ||
1546 | * Fake a blocking ast for the downconvert code. | ||
1547 | */ | ||
1548 | lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED); | ||
1549 | lockres->l_blocking = LKM_EXMODE; | ||
1550 | |||
1551 | ocfs2_prepare_downconvert(lockres, LKM_NLMODE); | ||
1552 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_BUSY, 0); | ||
1553 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
1554 | |||
1555 | ret = ocfs2_downconvert_lock(osb, lockres, LKM_NLMODE, 0); | ||
1556 | if (ret) { | ||
1557 | mlog_errno(ret); | ||
1558 | return; | ||
1559 | } | ||
1560 | |||
1561 | ret = ocfs2_wait_for_mask(&mw); | ||
1562 | if (ret) | ||
1563 | mlog_errno(ret); | ||
1564 | } | ||
1565 | |||
1299 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, | 1566 | static void ocfs2_downconvert_on_unlock(struct ocfs2_super *osb, |
1300 | struct ocfs2_lock_res *lockres) | 1567 | struct ocfs2_lock_res *lockres) |
1301 | { | 1568 | { |