diff options
Diffstat (limited to 'fs/ext4/ialloc.c')
-rw-r--r-- | fs/ext4/ialloc.c | 146 |
1 files changed, 137 insertions, 9 deletions
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 38e9a0a705df..c61f37fd3f05 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -28,6 +28,7 @@ | |||
28 | 28 | ||
29 | #include "xattr.h" | 29 | #include "xattr.h" |
30 | #include "acl.h" | 30 | #include "acl.h" |
31 | #include "group.h" | ||
31 | 32 | ||
32 | /* | 33 | /* |
33 | * ialloc.c contains the inodes allocation and deallocation routines | 34 | * ialloc.c contains the inodes allocation and deallocation routines |
@@ -43,6 +44,52 @@ | |||
43 | * the free blocks count in the block. | 44 | * the free blocks count in the block. |
44 | */ | 45 | */ |
45 | 46 | ||
47 | /* | ||
48 | * To avoid calling the atomic setbit hundreds or thousands of times, we only | ||
49 | * need to use it within a single byte (to ensure we get endianness right). | ||
50 | * We can use memset for the rest of the bitmap as there are no other users. | ||
51 | */ | ||
52 | void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) | ||
53 | { | ||
54 | int i; | ||
55 | |||
56 | if (start_bit >= end_bit) | ||
57 | return; | ||
58 | |||
59 | ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit); | ||
60 | for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) | ||
61 | ext4_set_bit(i, bitmap); | ||
62 | if (i < end_bit) | ||
63 | memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); | ||
64 | } | ||
65 | |||
66 | /* Initializes an uninitialized inode bitmap */ | ||
67 | unsigned ext4_init_inode_bitmap(struct super_block *sb, | ||
68 | struct buffer_head *bh, int block_group, | ||
69 | struct ext4_group_desc *gdp) | ||
70 | { | ||
71 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
72 | |||
73 | J_ASSERT_BH(bh, buffer_locked(bh)); | ||
74 | |||
75 | /* If checksum is bad mark all blocks and inodes use to prevent | ||
76 | * allocation, essentially implementing a per-group read-only flag. */ | ||
77 | if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) { | ||
78 | ext4_error(sb, __FUNCTION__, "Checksum bad for group %u\n", | ||
79 | block_group); | ||
80 | gdp->bg_free_blocks_count = 0; | ||
81 | gdp->bg_free_inodes_count = 0; | ||
82 | gdp->bg_itable_unused = 0; | ||
83 | memset(bh->b_data, 0xff, sb->s_blocksize); | ||
84 | return 0; | ||
85 | } | ||
86 | |||
87 | memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); | ||
88 | mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb), | ||
89 | bh->b_data); | ||
90 | |||
91 | return EXT4_INODES_PER_GROUP(sb); | ||
92 | } | ||
46 | 93 | ||
47 | /* | 94 | /* |
48 | * Read the inode allocation bitmap for a given block_group, reading | 95 | * Read the inode allocation bitmap for a given block_group, reading |
@@ -59,8 +106,20 @@ read_inode_bitmap(struct super_block * sb, unsigned long block_group) | |||
59 | desc = ext4_get_group_desc(sb, block_group, NULL); | 106 | desc = ext4_get_group_desc(sb, block_group, NULL); |
60 | if (!desc) | 107 | if (!desc) |
61 | goto error_out; | 108 | goto error_out; |
62 | 109 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { | |
63 | bh = sb_bread(sb, ext4_inode_bitmap(sb, desc)); | 110 | bh = sb_getblk(sb, ext4_inode_bitmap(sb, desc)); |
111 | if (!buffer_uptodate(bh)) { | ||
112 | lock_buffer(bh); | ||
113 | if (!buffer_uptodate(bh)) { | ||
114 | ext4_init_inode_bitmap(sb, bh, block_group, | ||
115 | desc); | ||
116 | set_buffer_uptodate(bh); | ||
117 | } | ||
118 | unlock_buffer(bh); | ||
119 | } | ||
120 | } else { | ||
121 | bh = sb_bread(sb, ext4_inode_bitmap(sb, desc)); | ||
122 | } | ||
64 | if (!bh) | 123 | if (!bh) |
65 | ext4_error(sb, "read_inode_bitmap", | 124 | ext4_error(sb, "read_inode_bitmap", |
66 | "Cannot read inode bitmap - " | 125 | "Cannot read inode bitmap - " |
@@ -169,6 +228,8 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) | |||
169 | if (is_directory) | 228 | if (is_directory) |
170 | gdp->bg_used_dirs_count = cpu_to_le16( | 229 | gdp->bg_used_dirs_count = cpu_to_le16( |
171 | le16_to_cpu(gdp->bg_used_dirs_count) - 1); | 230 | le16_to_cpu(gdp->bg_used_dirs_count) - 1); |
231 | gdp->bg_checksum = ext4_group_desc_csum(sbi, | ||
232 | block_group, gdp); | ||
172 | spin_unlock(sb_bgl_lock(sbi, block_group)); | 233 | spin_unlock(sb_bgl_lock(sbi, block_group)); |
173 | percpu_counter_inc(&sbi->s_freeinodes_counter); | 234 | percpu_counter_inc(&sbi->s_freeinodes_counter); |
174 | if (is_directory) | 235 | if (is_directory) |
@@ -435,7 +496,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) | |||
435 | struct ext4_sb_info *sbi; | 496 | struct ext4_sb_info *sbi; |
436 | int err = 0; | 497 | int err = 0; |
437 | struct inode *ret; | 498 | struct inode *ret; |
438 | int i; | 499 | int i, free = 0; |
439 | 500 | ||
440 | /* Cannot create files in a deleted directory */ | 501 | /* Cannot create files in a deleted directory */ |
441 | if (!dir || !dir->i_nlink) | 502 | if (!dir || !dir->i_nlink) |
@@ -517,11 +578,13 @@ repeat_in_this_group: | |||
517 | goto out; | 578 | goto out; |
518 | 579 | ||
519 | got: | 580 | got: |
520 | ino += group * EXT4_INODES_PER_GROUP(sb) + 1; | 581 | ino++; |
521 | if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { | 582 | if ((group == 0 && ino < EXT4_FIRST_INO(sb)) || |
522 | ext4_error (sb, "ext4_new_inode", | 583 | ino > EXT4_INODES_PER_GROUP(sb)) { |
523 | "reserved inode or inode > inodes count - " | 584 | ext4_error(sb, __FUNCTION__, |
524 | "block_group = %d, inode=%lu", group, ino); | 585 | "reserved inode or inode > inodes count - " |
586 | "block_group = %d, inode=%lu", group, | ||
587 | ino + group * EXT4_INODES_PER_GROUP(sb)); | ||
525 | err = -EIO; | 588 | err = -EIO; |
526 | goto fail; | 589 | goto fail; |
527 | } | 590 | } |
@@ -529,13 +592,78 @@ got: | |||
529 | BUFFER_TRACE(bh2, "get_write_access"); | 592 | BUFFER_TRACE(bh2, "get_write_access"); |
530 | err = ext4_journal_get_write_access(handle, bh2); | 593 | err = ext4_journal_get_write_access(handle, bh2); |
531 | if (err) goto fail; | 594 | if (err) goto fail; |
595 | |||
596 | /* We may have to initialize the block bitmap if it isn't already */ | ||
597 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) && | ||
598 | gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | ||
599 | struct buffer_head *block_bh = read_block_bitmap(sb, group); | ||
600 | |||
601 | BUFFER_TRACE(block_bh, "get block bitmap access"); | ||
602 | err = ext4_journal_get_write_access(handle, block_bh); | ||
603 | if (err) { | ||
604 | brelse(block_bh); | ||
605 | goto fail; | ||
606 | } | ||
607 | |||
608 | free = 0; | ||
609 | spin_lock(sb_bgl_lock(sbi, group)); | ||
610 | /* recheck and clear flag under lock if we still need to */ | ||
611 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | ||
612 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); | ||
613 | free = ext4_free_blocks_after_init(sb, group, gdp); | ||
614 | gdp->bg_free_blocks_count = cpu_to_le16(free); | ||
615 | } | ||
616 | spin_unlock(sb_bgl_lock(sbi, group)); | ||
617 | |||
618 | /* Don't need to dirty bitmap block if we didn't change it */ | ||
619 | if (free) { | ||
620 | BUFFER_TRACE(block_bh, "dirty block bitmap"); | ||
621 | err = ext4_journal_dirty_metadata(handle, block_bh); | ||
622 | } | ||
623 | |||
624 | brelse(block_bh); | ||
625 | if (err) | ||
626 | goto fail; | ||
627 | } | ||
628 | |||
532 | spin_lock(sb_bgl_lock(sbi, group)); | 629 | spin_lock(sb_bgl_lock(sbi, group)); |
630 | /* If we didn't allocate from within the initialized part of the inode | ||
631 | * table then we need to initialize up to this inode. */ | ||
632 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { | ||
633 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { | ||
634 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); | ||
635 | |||
636 | /* When marking the block group with | ||
637 | * ~EXT4_BG_INODE_UNINIT we don't want to depend | ||
638 | * on the value of bg_itable_unsed even though | ||
639 | * mke2fs could have initialized the same for us. | ||
640 | * Instead we calculated the value below | ||
641 | */ | ||
642 | |||
643 | free = 0; | ||
644 | } else { | ||
645 | free = EXT4_INODES_PER_GROUP(sb) - | ||
646 | le16_to_cpu(gdp->bg_itable_unused); | ||
647 | } | ||
648 | |||
649 | /* | ||
650 | * Check the relative inode number against the last used | ||
651 | * relative inode number in this group. if it is greater | ||
652 | * we need to update the bg_itable_unused count | ||
653 | * | ||
654 | */ | ||
655 | if (ino > free) | ||
656 | gdp->bg_itable_unused = | ||
657 | cpu_to_le16(EXT4_INODES_PER_GROUP(sb) - ino); | ||
658 | } | ||
659 | |||
533 | gdp->bg_free_inodes_count = | 660 | gdp->bg_free_inodes_count = |
534 | cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1); | 661 | cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1); |
535 | if (S_ISDIR(mode)) { | 662 | if (S_ISDIR(mode)) { |
536 | gdp->bg_used_dirs_count = | 663 | gdp->bg_used_dirs_count = |
537 | cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1); | 664 | cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1); |
538 | } | 665 | } |
666 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | ||
539 | spin_unlock(sb_bgl_lock(sbi, group)); | 667 | spin_unlock(sb_bgl_lock(sbi, group)); |
540 | BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata"); | 668 | BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata"); |
541 | err = ext4_journal_dirty_metadata(handle, bh2); | 669 | err = ext4_journal_dirty_metadata(handle, bh2); |
@@ -557,7 +685,7 @@ got: | |||
557 | inode->i_gid = current->fsgid; | 685 | inode->i_gid = current->fsgid; |
558 | inode->i_mode = mode; | 686 | inode->i_mode = mode; |
559 | 687 | ||
560 | inode->i_ino = ino; | 688 | inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb); |
561 | /* This is the optimal IO size (for stat), not the fs block size */ | 689 | /* This is the optimal IO size (for stat), not the fs block size */ |
562 | inode->i_blocks = 0; | 690 | inode->i_blocks = 0; |
563 | inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = | 691 | inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = |