diff options
Diffstat (limited to 'fs/ocfs2/journal.c')
-rw-r--r-- | fs/ocfs2/journal.c | 163 |
1 files changed, 112 insertions, 51 deletions
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 303c8d96457f..4be801f4559b 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -147,8 +147,7 @@ struct ocfs2_journal_handle *ocfs2_start_trans(struct ocfs2_super *osb, | |||
147 | 147 | ||
148 | mlog_entry("(max_buffs = %d)\n", max_buffs); | 148 | mlog_entry("(max_buffs = %d)\n", max_buffs); |
149 | 149 | ||
150 | if (!osb || !osb->journal->j_journal) | 150 | BUG_ON(!osb || !osb->journal->j_journal); |
151 | BUG(); | ||
152 | 151 | ||
153 | if (ocfs2_is_hard_readonly(osb)) { | 152 | if (ocfs2_is_hard_readonly(osb)) { |
154 | ret = -EROFS; | 153 | ret = -EROFS; |
@@ -401,7 +400,7 @@ int ocfs2_journal_access(struct ocfs2_journal_handle *handle, | |||
401 | * j_trans_barrier for us. */ | 400 | * j_trans_barrier for us. */ |
402 | ocfs2_set_inode_lock_trans(OCFS2_SB(inode->i_sb)->journal, inode); | 401 | ocfs2_set_inode_lock_trans(OCFS2_SB(inode->i_sb)->journal, inode); |
403 | 402 | ||
404 | down(&OCFS2_I(inode)->ip_io_sem); | 403 | mutex_lock(&OCFS2_I(inode)->ip_io_mutex); |
405 | switch (type) { | 404 | switch (type) { |
406 | case OCFS2_JOURNAL_ACCESS_CREATE: | 405 | case OCFS2_JOURNAL_ACCESS_CREATE: |
407 | case OCFS2_JOURNAL_ACCESS_WRITE: | 406 | case OCFS2_JOURNAL_ACCESS_WRITE: |
@@ -416,7 +415,7 @@ int ocfs2_journal_access(struct ocfs2_journal_handle *handle, | |||
416 | status = -EINVAL; | 415 | status = -EINVAL; |
417 | mlog(ML_ERROR, "Uknown access type!\n"); | 416 | mlog(ML_ERROR, "Uknown access type!\n"); |
418 | } | 417 | } |
419 | up(&OCFS2_I(inode)->ip_io_sem); | 418 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); |
420 | 419 | ||
421 | if (status < 0) | 420 | if (status < 0) |
422 | mlog(ML_ERROR, "Error %d getting %d access to buffer!\n", | 421 | mlog(ML_ERROR, "Error %d getting %d access to buffer!\n", |
@@ -561,7 +560,11 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) | |||
561 | SET_INODE_JOURNAL(inode); | 560 | SET_INODE_JOURNAL(inode); |
562 | OCFS2_I(inode)->ip_open_count++; | 561 | OCFS2_I(inode)->ip_open_count++; |
563 | 562 | ||
564 | status = ocfs2_meta_lock(inode, NULL, &bh, 1); | 563 | /* Skip recovery waits here - journal inode metadata never |
564 | * changes in a live cluster so it can be considered an | ||
565 | * exception to the rule. */ | ||
566 | status = ocfs2_meta_lock_full(inode, NULL, &bh, 1, | ||
567 | OCFS2_META_LOCK_RECOVERY); | ||
565 | if (status < 0) { | 568 | if (status < 0) { |
566 | if (status != -ERESTARTSYS) | 569 | if (status != -ERESTARTSYS) |
567 | mlog(ML_ERROR, "Could not get lock on journal!\n"); | 570 | mlog(ML_ERROR, "Could not get lock on journal!\n"); |
@@ -672,8 +675,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) | |||
672 | 675 | ||
673 | mlog_entry_void(); | 676 | mlog_entry_void(); |
674 | 677 | ||
675 | if (!osb) | 678 | BUG_ON(!osb); |
676 | BUG(); | ||
677 | 679 | ||
678 | journal = osb->journal; | 680 | journal = osb->journal; |
679 | if (!journal) | 681 | if (!journal) |
@@ -805,8 +807,7 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full) | |||
805 | 807 | ||
806 | mlog_entry_void(); | 808 | mlog_entry_void(); |
807 | 809 | ||
808 | if (!journal) | 810 | BUG_ON(!journal); |
809 | BUG(); | ||
810 | 811 | ||
811 | status = journal_wipe(journal->j_journal, full); | 812 | status = journal_wipe(journal->j_journal, full); |
812 | if (status < 0) { | 813 | if (status < 0) { |
@@ -1072,10 +1073,10 @@ restart: | |||
1072 | NULL); | 1073 | NULL); |
1073 | 1074 | ||
1074 | bail: | 1075 | bail: |
1075 | down(&osb->recovery_lock); | 1076 | mutex_lock(&osb->recovery_lock); |
1076 | if (!status && | 1077 | if (!status && |
1077 | !ocfs2_node_map_is_empty(osb, &osb->recovery_map)) { | 1078 | !ocfs2_node_map_is_empty(osb, &osb->recovery_map)) { |
1078 | up(&osb->recovery_lock); | 1079 | mutex_unlock(&osb->recovery_lock); |
1079 | goto restart; | 1080 | goto restart; |
1080 | } | 1081 | } |
1081 | 1082 | ||
@@ -1083,7 +1084,7 @@ bail: | |||
1083 | mb(); /* sync with ocfs2_recovery_thread_running */ | 1084 | mb(); /* sync with ocfs2_recovery_thread_running */ |
1084 | wake_up(&osb->recovery_event); | 1085 | wake_up(&osb->recovery_event); |
1085 | 1086 | ||
1086 | up(&osb->recovery_lock); | 1087 | mutex_unlock(&osb->recovery_lock); |
1087 | 1088 | ||
1088 | mlog_exit(status); | 1089 | mlog_exit(status); |
1089 | /* no one is callint kthread_stop() for us so the kthread() api | 1090 | /* no one is callint kthread_stop() for us so the kthread() api |
@@ -1098,7 +1099,7 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) | |||
1098 | mlog_entry("(node_num=%d, osb->node_num = %d)\n", | 1099 | mlog_entry("(node_num=%d, osb->node_num = %d)\n", |
1099 | node_num, osb->node_num); | 1100 | node_num, osb->node_num); |
1100 | 1101 | ||
1101 | down(&osb->recovery_lock); | 1102 | mutex_lock(&osb->recovery_lock); |
1102 | if (osb->disable_recovery) | 1103 | if (osb->disable_recovery) |
1103 | goto out; | 1104 | goto out; |
1104 | 1105 | ||
@@ -1120,7 +1121,7 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) | |||
1120 | } | 1121 | } |
1121 | 1122 | ||
1122 | out: | 1123 | out: |
1123 | up(&osb->recovery_lock); | 1124 | mutex_unlock(&osb->recovery_lock); |
1124 | wake_up(&osb->recovery_event); | 1125 | wake_up(&osb->recovery_event); |
1125 | 1126 | ||
1126 | mlog_exit_void(); | 1127 | mlog_exit_void(); |
@@ -1271,8 +1272,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb, | |||
1271 | 1272 | ||
1272 | /* Should not ever be called to recover ourselves -- in that | 1273 | /* Should not ever be called to recover ourselves -- in that |
1273 | * case we should've called ocfs2_journal_load instead. */ | 1274 | * case we should've called ocfs2_journal_load instead. */ |
1274 | if (osb->node_num == node_num) | 1275 | BUG_ON(osb->node_num == node_num); |
1275 | BUG(); | ||
1276 | 1276 | ||
1277 | slot_num = ocfs2_node_num_to_slot(si, node_num); | 1277 | slot_num = ocfs2_node_num_to_slot(si, node_num); |
1278 | if (slot_num == OCFS2_INVALID_SLOT) { | 1278 | if (slot_num == OCFS2_INVALID_SLOT) { |
@@ -1408,21 +1408,17 @@ bail: | |||
1408 | return status; | 1408 | return status; |
1409 | } | 1409 | } |
1410 | 1410 | ||
1411 | static int ocfs2_recover_orphans(struct ocfs2_super *osb, | 1411 | static int ocfs2_queue_orphans(struct ocfs2_super *osb, |
1412 | int slot) | 1412 | int slot, |
1413 | struct inode **head) | ||
1413 | { | 1414 | { |
1414 | int status = 0; | 1415 | int status; |
1415 | int have_disk_lock = 0; | ||
1416 | struct inode *inode = NULL; | ||
1417 | struct inode *iter; | ||
1418 | struct inode *orphan_dir_inode = NULL; | 1416 | struct inode *orphan_dir_inode = NULL; |
1417 | struct inode *iter; | ||
1419 | unsigned long offset, blk, local; | 1418 | unsigned long offset, blk, local; |
1420 | struct buffer_head *bh = NULL; | 1419 | struct buffer_head *bh = NULL; |
1421 | struct ocfs2_dir_entry *de; | 1420 | struct ocfs2_dir_entry *de; |
1422 | struct super_block *sb = osb->sb; | 1421 | struct super_block *sb = osb->sb; |
1423 | struct ocfs2_inode_info *oi; | ||
1424 | |||
1425 | mlog(0, "Recover inodes from orphan dir in slot %d\n", slot); | ||
1426 | 1422 | ||
1427 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, | 1423 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, |
1428 | ORPHAN_DIR_SYSTEM_INODE, | 1424 | ORPHAN_DIR_SYSTEM_INODE, |
@@ -1430,17 +1426,15 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
1430 | if (!orphan_dir_inode) { | 1426 | if (!orphan_dir_inode) { |
1431 | status = -ENOENT; | 1427 | status = -ENOENT; |
1432 | mlog_errno(status); | 1428 | mlog_errno(status); |
1433 | goto out; | 1429 | return status; |
1434 | } | 1430 | } |
1435 | 1431 | ||
1436 | mutex_lock(&orphan_dir_inode->i_mutex); | 1432 | mutex_lock(&orphan_dir_inode->i_mutex); |
1437 | status = ocfs2_meta_lock(orphan_dir_inode, NULL, NULL, 0); | 1433 | status = ocfs2_meta_lock(orphan_dir_inode, NULL, NULL, 0); |
1438 | if (status < 0) { | 1434 | if (status < 0) { |
1439 | mutex_unlock(&orphan_dir_inode->i_mutex); | ||
1440 | mlog_errno(status); | 1435 | mlog_errno(status); |
1441 | goto out; | 1436 | goto out; |
1442 | } | 1437 | } |
1443 | have_disk_lock = 1; | ||
1444 | 1438 | ||
1445 | offset = 0; | 1439 | offset = 0; |
1446 | iter = NULL; | 1440 | iter = NULL; |
@@ -1451,11 +1445,10 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
1451 | if (!bh) | 1445 | if (!bh) |
1452 | status = -EINVAL; | 1446 | status = -EINVAL; |
1453 | if (status < 0) { | 1447 | if (status < 0) { |
1454 | mutex_unlock(&orphan_dir_inode->i_mutex); | ||
1455 | if (bh) | 1448 | if (bh) |
1456 | brelse(bh); | 1449 | brelse(bh); |
1457 | mlog_errno(status); | 1450 | mlog_errno(status); |
1458 | goto out; | 1451 | goto out_unlock; |
1459 | } | 1452 | } |
1460 | 1453 | ||
1461 | local = 0; | 1454 | local = 0; |
@@ -1465,11 +1458,10 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
1465 | 1458 | ||
1466 | if (!ocfs2_check_dir_entry(orphan_dir_inode, | 1459 | if (!ocfs2_check_dir_entry(orphan_dir_inode, |
1467 | de, bh, local)) { | 1460 | de, bh, local)) { |
1468 | mutex_unlock(&orphan_dir_inode->i_mutex); | ||
1469 | status = -EINVAL; | 1461 | status = -EINVAL; |
1470 | mlog_errno(status); | 1462 | mlog_errno(status); |
1471 | brelse(bh); | 1463 | brelse(bh); |
1472 | goto out; | 1464 | goto out_unlock; |
1473 | } | 1465 | } |
1474 | 1466 | ||
1475 | local += le16_to_cpu(de->rec_len); | 1467 | local += le16_to_cpu(de->rec_len); |
@@ -1504,18 +1496,95 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
1504 | 1496 | ||
1505 | mlog(0, "queue orphan %"MLFu64"\n", | 1497 | mlog(0, "queue orphan %"MLFu64"\n", |
1506 | OCFS2_I(iter)->ip_blkno); | 1498 | OCFS2_I(iter)->ip_blkno); |
1507 | OCFS2_I(iter)->ip_next_orphan = inode; | 1499 | /* No locking is required for the next_orphan |
1508 | inode = iter; | 1500 | * queue as there is only ever a single |
1501 | * process doing orphan recovery. */ | ||
1502 | OCFS2_I(iter)->ip_next_orphan = *head; | ||
1503 | *head = iter; | ||
1509 | } | 1504 | } |
1510 | brelse(bh); | 1505 | brelse(bh); |
1511 | } | 1506 | } |
1512 | mutex_unlock(&orphan_dir_inode->i_mutex); | ||
1513 | 1507 | ||
1508 | out_unlock: | ||
1514 | ocfs2_meta_unlock(orphan_dir_inode, 0); | 1509 | ocfs2_meta_unlock(orphan_dir_inode, 0); |
1515 | have_disk_lock = 0; | 1510 | out: |
1516 | 1511 | mutex_unlock(&orphan_dir_inode->i_mutex); | |
1517 | iput(orphan_dir_inode); | 1512 | iput(orphan_dir_inode); |
1518 | orphan_dir_inode = NULL; | 1513 | return status; |
1514 | } | ||
1515 | |||
1516 | static int ocfs2_orphan_recovery_can_continue(struct ocfs2_super *osb, | ||
1517 | int slot) | ||
1518 | { | ||
1519 | int ret; | ||
1520 | |||
1521 | spin_lock(&osb->osb_lock); | ||
1522 | ret = !osb->osb_orphan_wipes[slot]; | ||
1523 | spin_unlock(&osb->osb_lock); | ||
1524 | return ret; | ||
1525 | } | ||
1526 | |||
1527 | static void ocfs2_mark_recovering_orphan_dir(struct ocfs2_super *osb, | ||
1528 | int slot) | ||
1529 | { | ||
1530 | spin_lock(&osb->osb_lock); | ||
1531 | /* Mark ourselves such that new processes in delete_inode() | ||
1532 | * know to quit early. */ | ||
1533 | ocfs2_node_map_set_bit(osb, &osb->osb_recovering_orphan_dirs, slot); | ||
1534 | while (osb->osb_orphan_wipes[slot]) { | ||
1535 | /* If any processes are already in the middle of an | ||
1536 | * orphan wipe on this dir, then we need to wait for | ||
1537 | * them. */ | ||
1538 | spin_unlock(&osb->osb_lock); | ||
1539 | wait_event_interruptible(osb->osb_wipe_event, | ||
1540 | ocfs2_orphan_recovery_can_continue(osb, slot)); | ||
1541 | spin_lock(&osb->osb_lock); | ||
1542 | } | ||
1543 | spin_unlock(&osb->osb_lock); | ||
1544 | } | ||
1545 | |||
1546 | static void ocfs2_clear_recovering_orphan_dir(struct ocfs2_super *osb, | ||
1547 | int slot) | ||
1548 | { | ||
1549 | ocfs2_node_map_clear_bit(osb, &osb->osb_recovering_orphan_dirs, slot); | ||
1550 | } | ||
1551 | |||
1552 | /* | ||
1553 | * Orphan recovery. Each mounted node has it's own orphan dir which we | ||
1554 | * must run during recovery. Our strategy here is to build a list of | ||
1555 | * the inodes in the orphan dir and iget/iput them. The VFS does | ||
1556 | * (most) of the rest of the work. | ||
1557 | * | ||
1558 | * Orphan recovery can happen at any time, not just mount so we have a | ||
1559 | * couple of extra considerations. | ||
1560 | * | ||
1561 | * - We grab as many inodes as we can under the orphan dir lock - | ||
1562 | * doing iget() outside the orphan dir risks getting a reference on | ||
1563 | * an invalid inode. | ||
1564 | * - We must be sure not to deadlock with other processes on the | ||
1565 | * system wanting to run delete_inode(). This can happen when they go | ||
1566 | * to lock the orphan dir and the orphan recovery process attempts to | ||
1567 | * iget() inside the orphan dir lock. This can be avoided by | ||
1568 | * advertising our state to ocfs2_delete_inode(). | ||
1569 | */ | ||
1570 | static int ocfs2_recover_orphans(struct ocfs2_super *osb, | ||
1571 | int slot) | ||
1572 | { | ||
1573 | int ret = 0; | ||
1574 | struct inode *inode = NULL; | ||
1575 | struct inode *iter; | ||
1576 | struct ocfs2_inode_info *oi; | ||
1577 | |||
1578 | mlog(0, "Recover inodes from orphan dir in slot %d\n", slot); | ||
1579 | |||
1580 | ocfs2_mark_recovering_orphan_dir(osb, slot); | ||
1581 | ret = ocfs2_queue_orphans(osb, slot, &inode); | ||
1582 | ocfs2_clear_recovering_orphan_dir(osb, slot); | ||
1583 | |||
1584 | /* Error here should be noted, but we want to continue with as | ||
1585 | * many queued inodes as we've got. */ | ||
1586 | if (ret) | ||
1587 | mlog_errno(ret); | ||
1519 | 1588 | ||
1520 | while (inode) { | 1589 | while (inode) { |
1521 | oi = OCFS2_I(inode); | 1590 | oi = OCFS2_I(inode); |
@@ -1541,14 +1610,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
1541 | inode = iter; | 1610 | inode = iter; |
1542 | } | 1611 | } |
1543 | 1612 | ||
1544 | out: | 1613 | return ret; |
1545 | if (have_disk_lock) | ||
1546 | ocfs2_meta_unlock(orphan_dir_inode, 0); | ||
1547 | |||
1548 | if (orphan_dir_inode) | ||
1549 | iput(orphan_dir_inode); | ||
1550 | |||
1551 | return status; | ||
1552 | } | 1614 | } |
1553 | 1615 | ||
1554 | static int ocfs2_wait_on_mount(struct ocfs2_super *osb) | 1616 | static int ocfs2_wait_on_mount(struct ocfs2_super *osb) |
@@ -1584,10 +1646,9 @@ static int ocfs2_commit_thread(void *arg) | |||
1584 | while (!(kthread_should_stop() && | 1646 | while (!(kthread_should_stop() && |
1585 | atomic_read(&journal->j_num_trans) == 0)) { | 1647 | atomic_read(&journal->j_num_trans) == 0)) { |
1586 | 1648 | ||
1587 | wait_event_interruptible_timeout(osb->checkpoint_event, | 1649 | wait_event_interruptible(osb->checkpoint_event, |
1588 | atomic_read(&journal->j_num_trans) | 1650 | atomic_read(&journal->j_num_trans) |
1589 | || kthread_should_stop(), | 1651 | || kthread_should_stop()); |
1590 | OCFS2_CHECKPOINT_INTERVAL); | ||
1591 | 1652 | ||
1592 | status = ocfs2_commit_cache(osb); | 1653 | status = ocfs2_commit_cache(osb); |
1593 | if (status < 0) | 1654 | if (status < 0) |