diff options
author | Yongqiang Yang <xiaoqiangnk@gmail.com> | 2012-01-04 17:09:50 -0500 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2012-01-04 17:09:50 -0500 |
commit | 61f296cc49751f1dc992039229d12b0de7e0c2ae (patch) | |
tree | 4dc7991db75379b3770f19c573751a588960c407 /fs | |
parent | d89651c8e222b2d2797bf66d4eb7064459f4f4f4 (diff) |
ext4: let ext4_group_add() use common code
This patch lets ext4_group_add() call ext4_flex_group_add().
Signed-off-by: Yongqiang Yang <xiaoqiangnk@gmail.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ext4/resize.c | 309 |
1 files changed, 10 insertions, 299 deletions
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index eba706d9276a..f9d948f0eb86 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -595,137 +595,6 @@ out: | |||
595 | } | 595 | } |
596 | 596 | ||
597 | /* | 597 | /* |
598 | * Set up the block and inode bitmaps, and the inode table for the new group. | ||
599 | * This doesn't need to be part of the main transaction, since we are only | ||
600 | * changing blocks outside the actual filesystem. We still do journaling to | ||
601 | * ensure the recovery is correct in case of a failure just after resize. | ||
602 | * If any part of this fails, we simply abort the resize. | ||
603 | */ | ||
604 | static int setup_new_group_blocks(struct super_block *sb, | ||
605 | struct ext4_new_group_data *input) | ||
606 | { | ||
607 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
608 | ext4_fsblk_t start = ext4_group_first_block_no(sb, input->group); | ||
609 | int reserved_gdb = ext4_bg_has_super(sb, input->group) ? | ||
610 | le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0; | ||
611 | unsigned long gdblocks = ext4_bg_num_gdb(sb, input->group); | ||
612 | struct buffer_head *bh; | ||
613 | handle_t *handle; | ||
614 | ext4_fsblk_t block; | ||
615 | ext4_grpblk_t bit; | ||
616 | int i; | ||
617 | int err = 0, err2; | ||
618 | |||
619 | /* This transaction may be extended/restarted along the way */ | ||
620 | handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); | ||
621 | |||
622 | if (IS_ERR(handle)) | ||
623 | return PTR_ERR(handle); | ||
624 | |||
625 | BUG_ON(input->group != sbi->s_groups_count); | ||
626 | |||
627 | /* Copy all of the GDT blocks into the backup in this group */ | ||
628 | for (i = 0, bit = 1, block = start + 1; | ||
629 | i < gdblocks; i++, block++, bit++) { | ||
630 | struct buffer_head *gdb; | ||
631 | |||
632 | ext4_debug("update backup group %#04llx (+%d)\n", block, bit); | ||
633 | err = extend_or_restart_transaction(handle, 1); | ||
634 | if (err) | ||
635 | goto exit_journal; | ||
636 | |||
637 | gdb = sb_getblk(sb, block); | ||
638 | if (!gdb) { | ||
639 | err = -EIO; | ||
640 | goto exit_journal; | ||
641 | } | ||
642 | if ((err = ext4_journal_get_write_access(handle, gdb))) { | ||
643 | brelse(gdb); | ||
644 | goto exit_journal; | ||
645 | } | ||
646 | memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); | ||
647 | set_buffer_uptodate(gdb); | ||
648 | err = ext4_handle_dirty_metadata(handle, NULL, gdb); | ||
649 | if (unlikely(err)) { | ||
650 | brelse(gdb); | ||
651 | goto exit_journal; | ||
652 | } | ||
653 | brelse(gdb); | ||
654 | } | ||
655 | |||
656 | /* Zero out all of the reserved backup group descriptor table blocks */ | ||
657 | ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", | ||
658 | block, sbi->s_itb_per_group); | ||
659 | err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb, | ||
660 | GFP_NOFS); | ||
661 | if (err) | ||
662 | goto exit_journal; | ||
663 | |||
664 | err = extend_or_restart_transaction(handle, 2); | ||
665 | if (err) | ||
666 | goto exit_journal; | ||
667 | |||
668 | bh = bclean(handle, sb, input->block_bitmap); | ||
669 | if (IS_ERR(bh)) { | ||
670 | err = PTR_ERR(bh); | ||
671 | goto exit_journal; | ||
672 | } | ||
673 | |||
674 | if (ext4_bg_has_super(sb, input->group)) { | ||
675 | ext4_debug("mark backup group tables %#04llx (+0)\n", start); | ||
676 | ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + 1); | ||
677 | } | ||
678 | |||
679 | ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, | ||
680 | input->block_bitmap - start); | ||
681 | ext4_set_bit(input->block_bitmap - start, bh->b_data); | ||
682 | ext4_debug("mark inode bitmap %#04llx (+%llu)\n", input->inode_bitmap, | ||
683 | input->inode_bitmap - start); | ||
684 | ext4_set_bit(input->inode_bitmap - start, bh->b_data); | ||
685 | |||
686 | /* Zero out all of the inode table blocks */ | ||
687 | block = input->inode_table; | ||
688 | ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", | ||
689 | block, sbi->s_itb_per_group); | ||
690 | err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); | ||
691 | if (err) | ||
692 | goto exit_bh; | ||
693 | ext4_set_bits(bh->b_data, input->inode_table - start, | ||
694 | sbi->s_itb_per_group); | ||
695 | |||
696 | |||
697 | ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, | ||
698 | bh->b_data); | ||
699 | err = ext4_handle_dirty_metadata(handle, NULL, bh); | ||
700 | if (unlikely(err)) { | ||
701 | ext4_std_error(sb, err); | ||
702 | goto exit_bh; | ||
703 | } | ||
704 | brelse(bh); | ||
705 | /* Mark unused entries in inode bitmap used */ | ||
706 | ext4_debug("clear inode bitmap %#04llx (+%llu)\n", | ||
707 | input->inode_bitmap, input->inode_bitmap - start); | ||
708 | if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) { | ||
709 | err = PTR_ERR(bh); | ||
710 | goto exit_journal; | ||
711 | } | ||
712 | |||
713 | ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, | ||
714 | bh->b_data); | ||
715 | err = ext4_handle_dirty_metadata(handle, NULL, bh); | ||
716 | if (unlikely(err)) | ||
717 | ext4_std_error(sb, err); | ||
718 | exit_bh: | ||
719 | brelse(bh); | ||
720 | |||
721 | exit_journal: | ||
722 | if ((err2 = ext4_journal_stop(handle)) && !err) | ||
723 | err = err2; | ||
724 | |||
725 | return err; | ||
726 | } | ||
727 | |||
728 | /* | ||
729 | * Iterate through the groups which hold BACKUP superblock/GDT copies in an | 598 | * Iterate through the groups which hold BACKUP superblock/GDT copies in an |
730 | * ext4 filesystem. The counters should be initialized to 1, 5, and 7 before | 599 | * ext4 filesystem. The counters should be initialized to 1, 5, and 7 before |
731 | * calling this for the first time. In a sparse filesystem it will be the | 600 | * calling this for the first time. In a sparse filesystem it will be the |
@@ -1509,16 +1378,15 @@ static int ext4_setup_next_flex_gd(struct super_block *sb, | |||
1509 | */ | 1378 | */ |
1510 | int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | 1379 | int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) |
1511 | { | 1380 | { |
1381 | struct ext4_new_flex_group_data flex_gd; | ||
1512 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1382 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
1513 | struct ext4_super_block *es = sbi->s_es; | 1383 | struct ext4_super_block *es = sbi->s_es; |
1514 | int reserved_gdb = ext4_bg_has_super(sb, input->group) ? | 1384 | int reserved_gdb = ext4_bg_has_super(sb, input->group) ? |
1515 | le16_to_cpu(es->s_reserved_gdt_blocks) : 0; | 1385 | le16_to_cpu(es->s_reserved_gdt_blocks) : 0; |
1516 | struct buffer_head *primary = NULL; | ||
1517 | struct ext4_group_desc *gdp; | ||
1518 | struct inode *inode = NULL; | 1386 | struct inode *inode = NULL; |
1519 | handle_t *handle; | ||
1520 | int gdb_off, gdb_num; | 1387 | int gdb_off, gdb_num; |
1521 | int err, err2; | 1388 | int err; |
1389 | __u16 bg_flags = 0; | ||
1522 | 1390 | ||
1523 | gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); | 1391 | gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); |
1524 | gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); | 1392 | gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); |
@@ -1557,172 +1425,15 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
1557 | } | 1425 | } |
1558 | 1426 | ||
1559 | 1427 | ||
1560 | if ((err = verify_group_input(sb, input))) | 1428 | err = verify_group_input(sb, input); |
1561 | goto exit_put; | ||
1562 | |||
1563 | if ((err = setup_new_group_blocks(sb, input))) | ||
1564 | goto exit_put; | ||
1565 | |||
1566 | /* | ||
1567 | * We will always be modifying at least the superblock and a GDT | ||
1568 | * block. If we are adding a group past the last current GDT block, | ||
1569 | * we will also modify the inode and the dindirect block. If we | ||
1570 | * are adding a group with superblock/GDT backups we will also | ||
1571 | * modify each of the reserved GDT dindirect blocks. | ||
1572 | */ | ||
1573 | handle = ext4_journal_start_sb(sb, | ||
1574 | ext4_bg_has_super(sb, input->group) ? | ||
1575 | 3 + reserved_gdb : 4); | ||
1576 | if (IS_ERR(handle)) { | ||
1577 | err = PTR_ERR(handle); | ||
1578 | goto exit_put; | ||
1579 | } | ||
1580 | |||
1581 | if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) | ||
1582 | goto exit_journal; | ||
1583 | |||
1584 | /* | ||
1585 | * We will only either add reserved group blocks to a backup group | ||
1586 | * or remove reserved blocks for the first group in a new group block. | ||
1587 | * Doing both would be mean more complex code, and sane people don't | ||
1588 | * use non-sparse filesystems anymore. This is already checked above. | ||
1589 | */ | ||
1590 | if (gdb_off) { | ||
1591 | primary = sbi->s_group_desc[gdb_num]; | ||
1592 | if ((err = ext4_journal_get_write_access(handle, primary))) | ||
1593 | goto exit_journal; | ||
1594 | |||
1595 | if (reserved_gdb && ext4_bg_num_gdb(sb, input->group)) { | ||
1596 | err = reserve_backup_gdb(handle, inode, input->group); | ||
1597 | if (err) | ||
1598 | goto exit_journal; | ||
1599 | } | ||
1600 | } else { | ||
1601 | /* | ||
1602 | * Note that we can access new group descriptor block safely | ||
1603 | * only if add_new_gdb() succeeds. | ||
1604 | */ | ||
1605 | err = add_new_gdb(handle, inode, input->group); | ||
1606 | if (err) | ||
1607 | goto exit_journal; | ||
1608 | primary = sbi->s_group_desc[gdb_num]; | ||
1609 | } | ||
1610 | |||
1611 | /* | ||
1612 | * OK, now we've set up the new group. Time to make it active. | ||
1613 | * | ||
1614 | * so we have to be safe wrt. concurrent accesses the group | ||
1615 | * data. So we need to be careful to set all of the relevant | ||
1616 | * group descriptor data etc. *before* we enable the group. | ||
1617 | * | ||
1618 | * The key field here is sbi->s_groups_count: as long as | ||
1619 | * that retains its old value, nobody is going to access the new | ||
1620 | * group. | ||
1621 | * | ||
1622 | * So first we update all the descriptor metadata for the new | ||
1623 | * group; then we update the total disk blocks count; then we | ||
1624 | * update the groups count to enable the group; then finally we | ||
1625 | * update the free space counts so that the system can start | ||
1626 | * using the new disk blocks. | ||
1627 | */ | ||
1628 | |||
1629 | /* Update group descriptor block for new group */ | ||
1630 | gdp = (struct ext4_group_desc *)((char *)primary->b_data + | ||
1631 | gdb_off * EXT4_DESC_SIZE(sb)); | ||
1632 | |||
1633 | memset(gdp, 0, EXT4_DESC_SIZE(sb)); | ||
1634 | ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ | ||
1635 | ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ | ||
1636 | ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ | ||
1637 | ext4_free_group_clusters_set(sb, gdp, input->free_blocks_count); | ||
1638 | ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); | ||
1639 | gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED); | ||
1640 | gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); | ||
1641 | |||
1642 | /* | ||
1643 | * We can allocate memory for mb_alloc based on the new group | ||
1644 | * descriptor | ||
1645 | */ | ||
1646 | err = ext4_mb_add_groupinfo(sb, input->group, gdp); | ||
1647 | if (err) | 1429 | if (err) |
1648 | goto exit_journal; | 1430 | goto out; |
1649 | |||
1650 | /* | ||
1651 | * Make the new blocks and inodes valid next. We do this before | ||
1652 | * increasing the group count so that once the group is enabled, | ||
1653 | * all of its blocks and inodes are already valid. | ||
1654 | * | ||
1655 | * We always allocate group-by-group, then block-by-block or | ||
1656 | * inode-by-inode within a group, so enabling these | ||
1657 | * blocks/inodes before the group is live won't actually let us | ||
1658 | * allocate the new space yet. | ||
1659 | */ | ||
1660 | ext4_blocks_count_set(es, ext4_blocks_count(es) + | ||
1661 | input->blocks_count); | ||
1662 | le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb)); | ||
1663 | |||
1664 | /* | ||
1665 | * We need to protect s_groups_count against other CPUs seeing | ||
1666 | * inconsistent state in the superblock. | ||
1667 | * | ||
1668 | * The precise rules we use are: | ||
1669 | * | ||
1670 | * * Writers must perform a smp_wmb() after updating all dependent | ||
1671 | * data and before modifying the groups count | ||
1672 | * | ||
1673 | * * Readers must perform an smp_rmb() after reading the groups count | ||
1674 | * and before reading any dependent data. | ||
1675 | * | ||
1676 | * NB. These rules can be relaxed when checking the group count | ||
1677 | * while freeing data, as we can only allocate from a block | ||
1678 | * group after serialising against the group count, and we can | ||
1679 | * only then free after serialising in turn against that | ||
1680 | * allocation. | ||
1681 | */ | ||
1682 | smp_wmb(); | ||
1683 | |||
1684 | /* Update the global fs size fields */ | ||
1685 | sbi->s_groups_count++; | ||
1686 | |||
1687 | err = ext4_handle_dirty_metadata(handle, NULL, primary); | ||
1688 | if (unlikely(err)) { | ||
1689 | ext4_std_error(sb, err); | ||
1690 | goto exit_journal; | ||
1691 | } | ||
1692 | |||
1693 | /* Update the reserved block counts only once the new group is | ||
1694 | * active. */ | ||
1695 | ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) + | ||
1696 | input->reserved_blocks); | ||
1697 | |||
1698 | /* Update the free space counts */ | ||
1699 | percpu_counter_add(&sbi->s_freeclusters_counter, | ||
1700 | EXT4_B2C(sbi, input->free_blocks_count)); | ||
1701 | percpu_counter_add(&sbi->s_freeinodes_counter, | ||
1702 | EXT4_INODES_PER_GROUP(sb)); | ||
1703 | |||
1704 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) && | ||
1705 | sbi->s_log_groups_per_flex) { | ||
1706 | ext4_group_t flex_group; | ||
1707 | flex_group = ext4_flex_group(sbi, input->group); | ||
1708 | atomic_add(EXT4_B2C(sbi, input->free_blocks_count), | ||
1709 | &sbi->s_flex_groups[flex_group].free_clusters); | ||
1710 | atomic_add(EXT4_INODES_PER_GROUP(sb), | ||
1711 | &sbi->s_flex_groups[flex_group].free_inodes); | ||
1712 | } | ||
1713 | |||
1714 | ext4_handle_dirty_super(handle, sb); | ||
1715 | 1431 | ||
1716 | exit_journal: | 1432 | flex_gd.count = 1; |
1717 | if ((err2 = ext4_journal_stop(handle)) && !err) | 1433 | flex_gd.groups = input; |
1718 | err = err2; | 1434 | flex_gd.bg_flags = &bg_flags; |
1719 | if (!err && primary) { | 1435 | err = ext4_flex_group_add(sb, inode, &flex_gd); |
1720 | update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, | 1436 | out: |
1721 | sizeof(struct ext4_super_block)); | ||
1722 | update_backups(sb, primary->b_blocknr, primary->b_data, | ||
1723 | primary->b_size); | ||
1724 | } | ||
1725 | exit_put: | ||
1726 | iput(inode); | 1437 | iput(inode); |
1727 | return err; | 1438 | return err; |
1728 | } /* ext4_group_add */ | 1439 | } /* ext4_group_add */ |