diff options
| -rw-r--r-- | fs/ocfs2/heartbeat.c | 1 | ||||
| -rw-r--r-- | fs/ocfs2/inode.c | 46 | ||||
| -rw-r--r-- | fs/ocfs2/journal.c | 124 | ||||
| -rw-r--r-- | fs/ocfs2/ocfs2.h | 4 | ||||
| -rw-r--r-- | fs/ocfs2/super.c | 11 |
5 files changed, 154 insertions, 32 deletions
diff --git a/fs/ocfs2/heartbeat.c b/fs/ocfs2/heartbeat.c index 0bbd22f46c80..cbfd45a97a63 100644 --- a/fs/ocfs2/heartbeat.c +++ b/fs/ocfs2/heartbeat.c | |||
| @@ -67,6 +67,7 @@ void ocfs2_init_node_maps(struct ocfs2_super *osb) | |||
| 67 | ocfs2_node_map_init(&osb->mounted_map); | 67 | ocfs2_node_map_init(&osb->mounted_map); |
| 68 | ocfs2_node_map_init(&osb->recovery_map); | 68 | ocfs2_node_map_init(&osb->recovery_map); |
| 69 | ocfs2_node_map_init(&osb->umount_map); | 69 | ocfs2_node_map_init(&osb->umount_map); |
| 70 | ocfs2_node_map_init(&osb->osb_recovering_orphan_dirs); | ||
| 70 | } | 71 | } |
| 71 | 72 | ||
| 72 | static void ocfs2_do_node_down(int node_num, | 73 | static void ocfs2_do_node_down(int node_num, |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 8122489c5762..315472a5c192 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
| @@ -41,6 +41,7 @@ | |||
| 41 | #include "dlmglue.h" | 41 | #include "dlmglue.h" |
| 42 | #include "extent_map.h" | 42 | #include "extent_map.h" |
| 43 | #include "file.h" | 43 | #include "file.h" |
| 44 | #include "heartbeat.h" | ||
| 44 | #include "inode.h" | 45 | #include "inode.h" |
| 45 | #include "journal.h" | 46 | #include "journal.h" |
| 46 | #include "namei.h" | 47 | #include "namei.h" |
| @@ -544,6 +545,42 @@ bail: | |||
| 544 | return status; | 545 | return status; |
| 545 | } | 546 | } |
| 546 | 547 | ||
| 548 | /* | ||
| 549 | * Serialize with orphan dir recovery. If the process doing | ||
| 550 | * recovery on this orphan dir does an iget() with the dir | ||
| 551 | * i_mutex held, we'll deadlock here. Instead we detect this | ||
| 552 | * and exit early - recovery will wipe this inode for us. | ||
| 553 | */ | ||
| 554 | static int ocfs2_check_orphan_recovery_state(struct ocfs2_super *osb, | ||
| 555 | int slot) | ||
| 556 | { | ||
| 557 | int ret = 0; | ||
| 558 | |||
| 559 | spin_lock(&osb->osb_lock); | ||
| 560 | if (ocfs2_node_map_test_bit(osb, &osb->osb_recovering_orphan_dirs, slot)) { | ||
| 561 | mlog(0, "Recovery is happening on orphan dir %d, will skip " | ||
| 562 | "this inode\n", slot); | ||
| 563 | ret = -EDEADLK; | ||
| 564 | goto out; | ||
| 565 | } | ||
| 566 | /* This signals to the orphan recovery process that it should | ||
| 567 | * wait for us to handle the wipe. */ | ||
| 568 | osb->osb_orphan_wipes[slot]++; | ||
| 569 | out: | ||
| 570 | spin_unlock(&osb->osb_lock); | ||
| 571 | return ret; | ||
| 572 | } | ||
| 573 | |||
| 574 | static void ocfs2_signal_wipe_completion(struct ocfs2_super *osb, | ||
| 575 | int slot) | ||
| 576 | { | ||
| 577 | spin_lock(&osb->osb_lock); | ||
| 578 | osb->osb_orphan_wipes[slot]--; | ||
| 579 | spin_unlock(&osb->osb_lock); | ||
| 580 | |||
| 581 | wake_up(&osb->osb_wipe_event); | ||
| 582 | } | ||
| 583 | |||
| 547 | static int ocfs2_wipe_inode(struct inode *inode, | 584 | static int ocfs2_wipe_inode(struct inode *inode, |
| 548 | struct buffer_head *di_bh) | 585 | struct buffer_head *di_bh) |
| 549 | { | 586 | { |
| @@ -555,6 +592,11 @@ static int ocfs2_wipe_inode(struct inode *inode, | |||
| 555 | /* We've already voted on this so it should be readonly - no | 592 | /* We've already voted on this so it should be readonly - no |
| 556 | * spinlock needed. */ | 593 | * spinlock needed. */ |
| 557 | orphaned_slot = OCFS2_I(inode)->ip_orphaned_slot; | 594 | orphaned_slot = OCFS2_I(inode)->ip_orphaned_slot; |
| 595 | |||
| 596 | status = ocfs2_check_orphan_recovery_state(osb, orphaned_slot); | ||
| 597 | if (status) | ||
| 598 | return status; | ||
| 599 | |||
| 558 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, | 600 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, |
| 559 | ORPHAN_DIR_SYSTEM_INODE, | 601 | ORPHAN_DIR_SYSTEM_INODE, |
| 560 | orphaned_slot); | 602 | orphaned_slot); |
| @@ -597,6 +639,7 @@ bail_unlock_dir: | |||
| 597 | brelse(orphan_dir_bh); | 639 | brelse(orphan_dir_bh); |
| 598 | bail: | 640 | bail: |
| 599 | iput(orphan_dir_inode); | 641 | iput(orphan_dir_inode); |
| 642 | ocfs2_signal_wipe_completion(osb, orphaned_slot); | ||
| 600 | 643 | ||
| 601 | return status; | 644 | return status; |
| 602 | } | 645 | } |
| @@ -822,7 +865,8 @@ void ocfs2_delete_inode(struct inode *inode) | |||
| 822 | 865 | ||
| 823 | status = ocfs2_wipe_inode(inode, di_bh); | 866 | status = ocfs2_wipe_inode(inode, di_bh); |
| 824 | if (status < 0) { | 867 | if (status < 0) { |
| 825 | mlog_errno(status); | 868 | if (status != -EDEADLK) |
| 869 | mlog_errno(status); | ||
| 826 | goto bail_unlock_inode; | 870 | goto bail_unlock_inode; |
| 827 | } | 871 | } |
| 828 | 872 | ||
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index d329c9df90ae..4be801f4559b 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
| @@ -1408,21 +1408,17 @@ bail: | |||
| 1408 | return status; | 1408 | return status; |
| 1409 | } | 1409 | } |
| 1410 | 1410 | ||
| 1411 | static int ocfs2_recover_orphans(struct ocfs2_super *osb, | 1411 | static int ocfs2_queue_orphans(struct ocfs2_super *osb, |
| 1412 | int slot) | 1412 | int slot, |
| 1413 | struct inode **head) | ||
| 1413 | { | 1414 | { |
| 1414 | int status = 0; | 1415 | int status; |
| 1415 | int have_disk_lock = 0; | ||
| 1416 | struct inode *inode = NULL; | ||
| 1417 | struct inode *iter; | ||
| 1418 | struct inode *orphan_dir_inode = NULL; | 1416 | struct inode *orphan_dir_inode = NULL; |
| 1417 | struct inode *iter; | ||
| 1419 | unsigned long offset, blk, local; | 1418 | unsigned long offset, blk, local; |
| 1420 | struct buffer_head *bh = NULL; | 1419 | struct buffer_head *bh = NULL; |
| 1421 | struct ocfs2_dir_entry *de; | 1420 | struct ocfs2_dir_entry *de; |
| 1422 | struct super_block *sb = osb->sb; | 1421 | struct super_block *sb = osb->sb; |
| 1423 | struct ocfs2_inode_info *oi; | ||
| 1424 | |||
| 1425 | mlog(0, "Recover inodes from orphan dir in slot %d\n", slot); | ||
| 1426 | 1422 | ||
| 1427 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, | 1423 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, |
| 1428 | ORPHAN_DIR_SYSTEM_INODE, | 1424 | ORPHAN_DIR_SYSTEM_INODE, |
| @@ -1430,17 +1426,15 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
| 1430 | if (!orphan_dir_inode) { | 1426 | if (!orphan_dir_inode) { |
| 1431 | status = -ENOENT; | 1427 | status = -ENOENT; |
| 1432 | mlog_errno(status); | 1428 | mlog_errno(status); |
| 1433 | goto out; | 1429 | return status; |
| 1434 | } | 1430 | } |
| 1435 | 1431 | ||
| 1436 | mutex_lock(&orphan_dir_inode->i_mutex); | 1432 | mutex_lock(&orphan_dir_inode->i_mutex); |
| 1437 | status = ocfs2_meta_lock(orphan_dir_inode, NULL, NULL, 0); | 1433 | status = ocfs2_meta_lock(orphan_dir_inode, NULL, NULL, 0); |
| 1438 | if (status < 0) { | 1434 | if (status < 0) { |
| 1439 | mutex_unlock(&orphan_dir_inode->i_mutex); | ||
| 1440 | mlog_errno(status); | 1435 | mlog_errno(status); |
| 1441 | goto out; | 1436 | goto out; |
| 1442 | } | 1437 | } |
| 1443 | have_disk_lock = 1; | ||
| 1444 | 1438 | ||
| 1445 | offset = 0; | 1439 | offset = 0; |
| 1446 | iter = NULL; | 1440 | iter = NULL; |
| @@ -1451,11 +1445,10 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
| 1451 | if (!bh) | 1445 | if (!bh) |
| 1452 | status = -EINVAL; | 1446 | status = -EINVAL; |
| 1453 | if (status < 0) { | 1447 | if (status < 0) { |
| 1454 | mutex_unlock(&orphan_dir_inode->i_mutex); | ||
| 1455 | if (bh) | 1448 | if (bh) |
| 1456 | brelse(bh); | 1449 | brelse(bh); |
| 1457 | mlog_errno(status); | 1450 | mlog_errno(status); |
| 1458 | goto out; | 1451 | goto out_unlock; |
| 1459 | } | 1452 | } |
| 1460 | 1453 | ||
| 1461 | local = 0; | 1454 | local = 0; |
| @@ -1465,11 +1458,10 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
| 1465 | 1458 | ||
| 1466 | if (!ocfs2_check_dir_entry(orphan_dir_inode, | 1459 | if (!ocfs2_check_dir_entry(orphan_dir_inode, |
| 1467 | de, bh, local)) { | 1460 | de, bh, local)) { |
| 1468 | mutex_unlock(&orphan_dir_inode->i_mutex); | ||
| 1469 | status = -EINVAL; | 1461 | status = -EINVAL; |
| 1470 | mlog_errno(status); | 1462 | mlog_errno(status); |
| 1471 | brelse(bh); | 1463 | brelse(bh); |
| 1472 | goto out; | 1464 | goto out_unlock; |
| 1473 | } | 1465 | } |
| 1474 | 1466 | ||
| 1475 | local += le16_to_cpu(de->rec_len); | 1467 | local += le16_to_cpu(de->rec_len); |
| @@ -1504,18 +1496,95 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
| 1504 | 1496 | ||
| 1505 | mlog(0, "queue orphan %"MLFu64"\n", | 1497 | mlog(0, "queue orphan %"MLFu64"\n", |
| 1506 | OCFS2_I(iter)->ip_blkno); | 1498 | OCFS2_I(iter)->ip_blkno); |
| 1507 | OCFS2_I(iter)->ip_next_orphan = inode; | 1499 | /* No locking is required for the next_orphan |
| 1508 | inode = iter; | 1500 | * queue as there is only ever a single |
| 1501 | * process doing orphan recovery. */ | ||
| 1502 | OCFS2_I(iter)->ip_next_orphan = *head; | ||
| 1503 | *head = iter; | ||
| 1509 | } | 1504 | } |
| 1510 | brelse(bh); | 1505 | brelse(bh); |
| 1511 | } | 1506 | } |
| 1512 | mutex_unlock(&orphan_dir_inode->i_mutex); | ||
| 1513 | 1507 | ||
| 1508 | out_unlock: | ||
| 1514 | ocfs2_meta_unlock(orphan_dir_inode, 0); | 1509 | ocfs2_meta_unlock(orphan_dir_inode, 0); |
| 1515 | have_disk_lock = 0; | 1510 | out: |
| 1516 | 1511 | mutex_unlock(&orphan_dir_inode->i_mutex); | |
| 1517 | iput(orphan_dir_inode); | 1512 | iput(orphan_dir_inode); |
| 1518 | orphan_dir_inode = NULL; | 1513 | return status; |
| 1514 | } | ||
| 1515 | |||
| 1516 | static int ocfs2_orphan_recovery_can_continue(struct ocfs2_super *osb, | ||
| 1517 | int slot) | ||
| 1518 | { | ||
| 1519 | int ret; | ||
| 1520 | |||
| 1521 | spin_lock(&osb->osb_lock); | ||
| 1522 | ret = !osb->osb_orphan_wipes[slot]; | ||
| 1523 | spin_unlock(&osb->osb_lock); | ||
| 1524 | return ret; | ||
| 1525 | } | ||
| 1526 | |||
| 1527 | static void ocfs2_mark_recovering_orphan_dir(struct ocfs2_super *osb, | ||
| 1528 | int slot) | ||
| 1529 | { | ||
| 1530 | spin_lock(&osb->osb_lock); | ||
| 1531 | /* Mark ourselves such that new processes in delete_inode() | ||
| 1532 | * know to quit early. */ | ||
| 1533 | ocfs2_node_map_set_bit(osb, &osb->osb_recovering_orphan_dirs, slot); | ||
| 1534 | while (osb->osb_orphan_wipes[slot]) { | ||
| 1535 | /* If any processes are already in the middle of an | ||
| 1536 | * orphan wipe on this dir, then we need to wait for | ||
| 1537 | * them. */ | ||
| 1538 | spin_unlock(&osb->osb_lock); | ||
| 1539 | wait_event_interruptible(osb->osb_wipe_event, | ||
| 1540 | ocfs2_orphan_recovery_can_continue(osb, slot)); | ||
| 1541 | spin_lock(&osb->osb_lock); | ||
| 1542 | } | ||
| 1543 | spin_unlock(&osb->osb_lock); | ||
| 1544 | } | ||
| 1545 | |||
| 1546 | static void ocfs2_clear_recovering_orphan_dir(struct ocfs2_super *osb, | ||
| 1547 | int slot) | ||
| 1548 | { | ||
| 1549 | ocfs2_node_map_clear_bit(osb, &osb->osb_recovering_orphan_dirs, slot); | ||
| 1550 | } | ||
| 1551 | |||
| 1552 | /* | ||
| 1553 | * Orphan recovery. Each mounted node has it's own orphan dir which we | ||
| 1554 | * must run during recovery. Our strategy here is to build a list of | ||
| 1555 | * the inodes in the orphan dir and iget/iput them. The VFS does | ||
| 1556 | * (most) of the rest of the work. | ||
| 1557 | * | ||
| 1558 | * Orphan recovery can happen at any time, not just mount so we have a | ||
| 1559 | * couple of extra considerations. | ||
| 1560 | * | ||
| 1561 | * - We grab as many inodes as we can under the orphan dir lock - | ||
| 1562 | * doing iget() outside the orphan dir risks getting a reference on | ||
| 1563 | * an invalid inode. | ||
| 1564 | * - We must be sure not to deadlock with other processes on the | ||
| 1565 | * system wanting to run delete_inode(). This can happen when they go | ||
| 1566 | * to lock the orphan dir and the orphan recovery process attempts to | ||
| 1567 | * iget() inside the orphan dir lock. This can be avoided by | ||
| 1568 | * advertising our state to ocfs2_delete_inode(). | ||
| 1569 | */ | ||
| 1570 | static int ocfs2_recover_orphans(struct ocfs2_super *osb, | ||
| 1571 | int slot) | ||
| 1572 | { | ||
| 1573 | int ret = 0; | ||
| 1574 | struct inode *inode = NULL; | ||
| 1575 | struct inode *iter; | ||
| 1576 | struct ocfs2_inode_info *oi; | ||
| 1577 | |||
| 1578 | mlog(0, "Recover inodes from orphan dir in slot %d\n", slot); | ||
| 1579 | |||
| 1580 | ocfs2_mark_recovering_orphan_dir(osb, slot); | ||
| 1581 | ret = ocfs2_queue_orphans(osb, slot, &inode); | ||
| 1582 | ocfs2_clear_recovering_orphan_dir(osb, slot); | ||
| 1583 | |||
| 1584 | /* Error here should be noted, but we want to continue with as | ||
| 1585 | * many queued inodes as we've got. */ | ||
| 1586 | if (ret) | ||
| 1587 | mlog_errno(ret); | ||
| 1519 | 1588 | ||
| 1520 | while (inode) { | 1589 | while (inode) { |
| 1521 | oi = OCFS2_I(inode); | 1590 | oi = OCFS2_I(inode); |
| @@ -1541,14 +1610,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
| 1541 | inode = iter; | 1610 | inode = iter; |
| 1542 | } | 1611 | } |
| 1543 | 1612 | ||
| 1544 | out: | 1613 | return ret; |
| 1545 | if (have_disk_lock) | ||
| 1546 | ocfs2_meta_unlock(orphan_dir_inode, 0); | ||
| 1547 | |||
| 1548 | if (orphan_dir_inode) | ||
| 1549 | iput(orphan_dir_inode); | ||
| 1550 | |||
| 1551 | return status; | ||
| 1552 | } | 1614 | } |
| 1553 | 1615 | ||
| 1554 | static int ocfs2_wait_on_mount(struct ocfs2_super *osb) | 1616 | static int ocfs2_wait_on_mount(struct ocfs2_super *osb) |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 19360e3d842e..e89de9b6e491 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
| @@ -287,6 +287,10 @@ struct ocfs2_super | |||
| 287 | struct inode *osb_tl_inode; | 287 | struct inode *osb_tl_inode; |
| 288 | struct buffer_head *osb_tl_bh; | 288 | struct buffer_head *osb_tl_bh; |
| 289 | struct work_struct osb_truncate_log_wq; | 289 | struct work_struct osb_truncate_log_wq; |
| 290 | |||
| 291 | struct ocfs2_node_map osb_recovering_orphan_dirs; | ||
| 292 | unsigned int *osb_orphan_wipes; | ||
| 293 | wait_queue_head_t osb_wipe_event; | ||
| 290 | }; | 294 | }; |
| 291 | 295 | ||
| 292 | #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) | 296 | #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 046824b6b625..8dd3aafec499 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
| @@ -1325,6 +1325,16 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 1325 | } | 1325 | } |
| 1326 | mlog(ML_NOTICE, "max_slots for this device: %u\n", osb->max_slots); | 1326 | mlog(ML_NOTICE, "max_slots for this device: %u\n", osb->max_slots); |
| 1327 | 1327 | ||
| 1328 | init_waitqueue_head(&osb->osb_wipe_event); | ||
| 1329 | osb->osb_orphan_wipes = kcalloc(osb->max_slots, | ||
| 1330 | sizeof(*osb->osb_orphan_wipes), | ||
| 1331 | GFP_KERNEL); | ||
| 1332 | if (!osb->osb_orphan_wipes) { | ||
| 1333 | status = -ENOMEM; | ||
| 1334 | mlog_errno(status); | ||
| 1335 | goto bail; | ||
| 1336 | } | ||
| 1337 | |||
| 1328 | osb->s_feature_compat = | 1338 | osb->s_feature_compat = |
| 1329 | le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_compat); | 1339 | le32_to_cpu(OCFS2_RAW_SB(di)->s_feature_compat); |
| 1330 | osb->s_feature_ro_compat = | 1340 | osb->s_feature_ro_compat = |
| @@ -1638,6 +1648,7 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb) | |||
| 1638 | if (osb->slot_info) | 1648 | if (osb->slot_info) |
| 1639 | ocfs2_free_slot_info(osb->slot_info); | 1649 | ocfs2_free_slot_info(osb->slot_info); |
| 1640 | 1650 | ||
| 1651 | kfree(osb->osb_orphan_wipes); | ||
| 1641 | /* FIXME | 1652 | /* FIXME |
| 1642 | * This belongs in journal shutdown, but because we have to | 1653 | * This belongs in journal shutdown, but because we have to |
| 1643 | * allocate osb->journal at the start of ocfs2_initalize_osb(), | 1654 | * allocate osb->journal at the start of ocfs2_initalize_osb(), |
