diff options
author | Theodore Ts'o <tytso@mit.edu> | 2012-01-10 11:54:07 -0500 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2012-01-10 11:54:07 -0500 |
commit | ff9cb1c4eead5e4c292e75cd3170a82d66944101 (patch) | |
tree | cdb132a39e550a9b7b28ea67544cb86cd6ebdb6e /fs | |
parent | e4e11180dfa545233e5145919b75b7fac88638df (diff) | |
parent | d50f2ab6f050311dbf7b8f5501b25f0bf64a439b (diff) |
Merge branch 'for_linus' into for_linus_merged
Conflicts:
fs/ext4/ioctl.c
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ext4/balloc.c | 4 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 29 | ||||
-rw-r--r-- | fs/ext4/extents.c | 10 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 18 | ||||
-rw-r--r-- | fs/ext4/inode.c | 143 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 86 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 2 | ||||
-rw-r--r-- | fs/ext4/resize.c | 1175 | ||||
-rw-r--r-- | fs/ext4/super.c | 11 | ||||
-rw-r--r-- | fs/ext4/xattr_security.c | 5 | ||||
-rw-r--r-- | fs/jbd2/commit.c | 6 | ||||
-rw-r--r-- | fs/jbd2/revoke.c | 34 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 5 |
13 files changed, 1068 insertions, 460 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 12ccacda44e0..f9e2cd8cf711 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -23,6 +23,8 @@ | |||
23 | 23 | ||
24 | #include <trace/events/ext4.h> | 24 | #include <trace/events/ext4.h> |
25 | 25 | ||
26 | static unsigned ext4_num_base_meta_clusters(struct super_block *sb, | ||
27 | ext4_group_t block_group); | ||
26 | /* | 28 | /* |
27 | * balloc.c contains the blocks allocation and deallocation routines | 29 | * balloc.c contains the blocks allocation and deallocation routines |
28 | */ | 30 | */ |
@@ -668,7 +670,7 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group) | |||
668 | * This function returns the number of file system metadata clusters at | 670 | * This function returns the number of file system metadata clusters at |
669 | * the beginning of a block group, including the reserved gdt blocks. | 671 | * the beginning of a block group, including the reserved gdt blocks. |
670 | */ | 672 | */ |
671 | unsigned ext4_num_base_meta_clusters(struct super_block *sb, | 673 | static unsigned ext4_num_base_meta_clusters(struct super_block *sb, |
672 | ext4_group_t block_group) | 674 | ext4_group_t block_group) |
673 | { | 675 | { |
674 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 676 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1554b15f91bc..513004fc3d84 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -511,6 +511,14 @@ struct ext4_new_group_data { | |||
511 | __u32 free_blocks_count; | 511 | __u32 free_blocks_count; |
512 | }; | 512 | }; |
513 | 513 | ||
514 | /* Indexes used to index group tables in ext4_new_group_data */ | ||
515 | enum { | ||
516 | BLOCK_BITMAP = 0, /* block bitmap */ | ||
517 | INODE_BITMAP, /* inode bitmap */ | ||
518 | INODE_TABLE, /* inode tables */ | ||
519 | GROUP_TABLE_COUNT, | ||
520 | }; | ||
521 | |||
514 | /* | 522 | /* |
515 | * Flags used by ext4_map_blocks() | 523 | * Flags used by ext4_map_blocks() |
516 | */ | 524 | */ |
@@ -575,6 +583,7 @@ struct ext4_new_group_data { | |||
575 | /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ | 583 | /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ |
576 | #define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) | 584 | #define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) |
577 | #define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) | 585 | #define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) |
586 | #define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64) | ||
578 | 587 | ||
579 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) | 588 | #if defined(__KERNEL__) && defined(CONFIG_COMPAT) |
580 | /* | 589 | /* |
@@ -957,12 +966,13 @@ struct ext4_inode_info { | |||
957 | #define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \ | 966 | #define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \ |
958 | EXT4_MOUNT2_##opt) | 967 | EXT4_MOUNT2_##opt) |
959 | 968 | ||
960 | #define ext4_set_bit __test_and_set_bit_le | 969 | #define ext4_test_and_set_bit __test_and_set_bit_le |
970 | #define ext4_set_bit __set_bit_le | ||
961 | #define ext4_set_bit_atomic ext2_set_bit_atomic | 971 | #define ext4_set_bit_atomic ext2_set_bit_atomic |
962 | #define ext4_clear_bit __test_and_clear_bit_le | 972 | #define ext4_test_and_clear_bit __test_and_clear_bit_le |
973 | #define ext4_clear_bit __clear_bit_le | ||
963 | #define ext4_clear_bit_atomic ext2_clear_bit_atomic | 974 | #define ext4_clear_bit_atomic ext2_clear_bit_atomic |
964 | #define ext4_test_bit test_bit_le | 975 | #define ext4_test_bit test_bit_le |
965 | #define ext4_find_first_zero_bit find_first_zero_bit_le | ||
966 | #define ext4_find_next_zero_bit find_next_zero_bit_le | 976 | #define ext4_find_next_zero_bit find_next_zero_bit_le |
967 | #define ext4_find_next_bit find_next_bit_le | 977 | #define ext4_find_next_bit find_next_bit_le |
968 | 978 | ||
@@ -1397,6 +1407,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) | |||
1397 | #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 | 1407 | #define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 |
1398 | #define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 | 1408 | #define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 |
1399 | #define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200 | 1409 | #define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200 |
1410 | #define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400 | ||
1400 | 1411 | ||
1401 | #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 | 1412 | #define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 |
1402 | #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 | 1413 | #define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 |
@@ -1409,6 +1420,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei) | |||
1409 | #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 | 1420 | #define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 |
1410 | #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ | 1421 | #define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ |
1411 | #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ | 1422 | #define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ |
1423 | #define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x2000 /* data in inode */ | ||
1424 | #define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */ | ||
1412 | 1425 | ||
1413 | #define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR | 1426 | #define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR |
1414 | #define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ | 1427 | #define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ |
@@ -1790,8 +1803,6 @@ extern void ext4_init_block_bitmap(struct super_block *sb, | |||
1790 | extern unsigned ext4_free_clusters_after_init(struct super_block *sb, | 1803 | extern unsigned ext4_free_clusters_after_init(struct super_block *sb, |
1791 | ext4_group_t block_group, | 1804 | ext4_group_t block_group, |
1792 | struct ext4_group_desc *gdp); | 1805 | struct ext4_group_desc *gdp); |
1793 | extern unsigned ext4_num_base_meta_clusters(struct super_block *sb, | ||
1794 | ext4_group_t block_group); | ||
1795 | extern unsigned ext4_num_overhead_clusters(struct super_block *sb, | 1806 | extern unsigned ext4_num_overhead_clusters(struct super_block *sb, |
1796 | ext4_group_t block_group, | 1807 | ext4_group_t block_group, |
1797 | struct ext4_group_desc *gdp); | 1808 | struct ext4_group_desc *gdp); |
@@ -1880,16 +1891,9 @@ extern int ext4_alloc_da_blocks(struct inode *inode); | |||
1880 | extern void ext4_set_aops(struct inode *inode); | 1891 | extern void ext4_set_aops(struct inode *inode); |
1881 | extern int ext4_writepage_trans_blocks(struct inode *); | 1892 | extern int ext4_writepage_trans_blocks(struct inode *); |
1882 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); | 1893 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); |
1883 | extern int ext4_block_truncate_page(handle_t *handle, | ||
1884 | struct address_space *mapping, loff_t from); | ||
1885 | extern int ext4_block_zero_page_range(handle_t *handle, | ||
1886 | struct address_space *mapping, loff_t from, loff_t length); | ||
1887 | extern int ext4_discard_partial_page_buffers(handle_t *handle, | 1894 | extern int ext4_discard_partial_page_buffers(handle_t *handle, |
1888 | struct address_space *mapping, loff_t from, | 1895 | struct address_space *mapping, loff_t from, |
1889 | loff_t length, int flags); | 1896 | loff_t length, int flags); |
1890 | extern int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, | ||
1891 | struct inode *inode, struct page *page, loff_t from, | ||
1892 | loff_t length, int flags); | ||
1893 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | 1897 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
1894 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); | 1898 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); |
1895 | extern void ext4_da_update_reserve_space(struct inode *inode, | 1899 | extern void ext4_da_update_reserve_space(struct inode *inode, |
@@ -1924,6 +1928,7 @@ extern int ext4_group_add(struct super_block *sb, | |||
1924 | extern int ext4_group_extend(struct super_block *sb, | 1928 | extern int ext4_group_extend(struct super_block *sb, |
1925 | struct ext4_super_block *es, | 1929 | struct ext4_super_block *es, |
1926 | ext4_fsblk_t n_blocks_count); | 1930 | ext4_fsblk_t n_blocks_count); |
1931 | extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count); | ||
1927 | 1932 | ||
1928 | /* super.c */ | 1933 | /* super.c */ |
1929 | extern void *ext4_kvmalloc(size_t size, gfp_t flags); | 1934 | extern void *ext4_kvmalloc(size_t size, gfp_t flags); |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 841faf5fb785..74f23c292e1b 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -3280,6 +3280,9 @@ static int ext4_find_delalloc_range(struct inode *inode, | |||
3280 | ext4_lblk_t i, pg_lblk; | 3280 | ext4_lblk_t i, pg_lblk; |
3281 | pgoff_t index; | 3281 | pgoff_t index; |
3282 | 3282 | ||
3283 | if (!test_opt(inode->i_sb, DELALLOC)) | ||
3284 | return 0; | ||
3285 | |||
3283 | /* reverse search wont work if fs block size is less than page size */ | 3286 | /* reverse search wont work if fs block size is less than page size */ |
3284 | if (inode->i_blkbits < PAGE_CACHE_SHIFT) | 3287 | if (inode->i_blkbits < PAGE_CACHE_SHIFT) |
3285 | search_hint_reverse = 0; | 3288 | search_hint_reverse = 0; |
@@ -3452,8 +3455,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3452 | int err = 0; | 3455 | int err = 0; |
3453 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; | 3456 | ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; |
3454 | 3457 | ||
3455 | ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical" | 3458 | ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical " |
3456 | "block %llu, max_blocks %u, flags %d, allocated %u", | 3459 | "block %llu, max_blocks %u, flags %x, allocated %u\n", |
3457 | inode->i_ino, (unsigned long long)map->m_lblk, map->m_len, | 3460 | inode->i_ino, (unsigned long long)map->m_lblk, map->m_len, |
3458 | flags, allocated); | 3461 | flags, allocated); |
3459 | ext4_ext_show_leaf(inode, path); | 3462 | ext4_ext_show_leaf(inode, path); |
@@ -3624,7 +3627,7 @@ static int get_implied_cluster_alloc(struct super_block *sb, | |||
3624 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 3627 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
3625 | ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1); | 3628 | ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1); |
3626 | ext4_lblk_t ex_cluster_start, ex_cluster_end; | 3629 | ext4_lblk_t ex_cluster_start, ex_cluster_end; |
3627 | ext4_lblk_t rr_cluster_start, rr_cluster_end; | 3630 | ext4_lblk_t rr_cluster_start; |
3628 | ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); | 3631 | ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); |
3629 | ext4_fsblk_t ee_start = ext4_ext_pblock(ex); | 3632 | ext4_fsblk_t ee_start = ext4_ext_pblock(ex); |
3630 | unsigned short ee_len = ext4_ext_get_actual_len(ex); | 3633 | unsigned short ee_len = ext4_ext_get_actual_len(ex); |
@@ -3635,7 +3638,6 @@ static int get_implied_cluster_alloc(struct super_block *sb, | |||
3635 | 3638 | ||
3636 | /* The requested region passed into ext4_map_blocks() */ | 3639 | /* The requested region passed into ext4_map_blocks() */ |
3637 | rr_cluster_start = EXT4_B2C(sbi, map->m_lblk); | 3640 | rr_cluster_start = EXT4_B2C(sbi, map->m_lblk); |
3638 | rr_cluster_end = EXT4_B2C(sbi, map->m_lblk + map->m_len - 1); | ||
3639 | 3641 | ||
3640 | if ((rr_cluster_start == ex_cluster_end) || | 3642 | if ((rr_cluster_start == ex_cluster_end) || |
3641 | (rr_cluster_start == ex_cluster_start)) { | 3643 | (rr_cluster_start == ex_cluster_start)) { |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 4637af036d9c..25d8c9781ad9 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -252,7 +252,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode) | |||
252 | fatal = ext4_journal_get_write_access(handle, bh2); | 252 | fatal = ext4_journal_get_write_access(handle, bh2); |
253 | } | 253 | } |
254 | ext4_lock_group(sb, block_group); | 254 | ext4_lock_group(sb, block_group); |
255 | cleared = ext4_clear_bit(bit, bitmap_bh->b_data); | 255 | cleared = ext4_test_and_clear_bit(bit, bitmap_bh->b_data); |
256 | if (fatal || !cleared) { | 256 | if (fatal || !cleared) { |
257 | ext4_unlock_group(sb, block_group); | 257 | ext4_unlock_group(sb, block_group); |
258 | goto out; | 258 | goto out; |
@@ -358,7 +358,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent, | |||
358 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 358 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
359 | ext4_group_t real_ngroups = ext4_get_groups_count(sb); | 359 | ext4_group_t real_ngroups = ext4_get_groups_count(sb); |
360 | int inodes_per_group = EXT4_INODES_PER_GROUP(sb); | 360 | int inodes_per_group = EXT4_INODES_PER_GROUP(sb); |
361 | unsigned int freei, avefreei; | 361 | unsigned int freei, avefreei, grp_free; |
362 | ext4_fsblk_t freeb, avefreec; | 362 | ext4_fsblk_t freeb, avefreec; |
363 | unsigned int ndirs; | 363 | unsigned int ndirs; |
364 | int max_dirs, min_inodes; | 364 | int max_dirs, min_inodes; |
@@ -477,8 +477,8 @@ fallback_retry: | |||
477 | for (i = 0; i < ngroups; i++) { | 477 | for (i = 0; i < ngroups; i++) { |
478 | grp = (parent_group + i) % ngroups; | 478 | grp = (parent_group + i) % ngroups; |
479 | desc = ext4_get_group_desc(sb, grp, NULL); | 479 | desc = ext4_get_group_desc(sb, grp, NULL); |
480 | if (desc && ext4_free_inodes_count(sb, desc) && | 480 | grp_free = ext4_free_inodes_count(sb, desc); |
481 | ext4_free_inodes_count(sb, desc) >= avefreei) { | 481 | if (desc && grp_free && grp_free >= avefreei) { |
482 | *group = grp; | 482 | *group = grp; |
483 | return 0; | 483 | return 0; |
484 | } | 484 | } |
@@ -618,7 +618,7 @@ static int ext4_claim_inode(struct super_block *sb, | |||
618 | */ | 618 | */ |
619 | down_read(&grp->alloc_sem); | 619 | down_read(&grp->alloc_sem); |
620 | ext4_lock_group(sb, group); | 620 | ext4_lock_group(sb, group); |
621 | if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { | 621 | if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) { |
622 | /* not a free inode */ | 622 | /* not a free inode */ |
623 | retval = 1; | 623 | retval = 1; |
624 | goto err_ret; | 624 | goto err_ret; |
@@ -885,8 +885,12 @@ got: | |||
885 | if (IS_DIRSYNC(inode)) | 885 | if (IS_DIRSYNC(inode)) |
886 | ext4_handle_sync(handle); | 886 | ext4_handle_sync(handle); |
887 | if (insert_inode_locked(inode) < 0) { | 887 | if (insert_inode_locked(inode) < 0) { |
888 | err = -EINVAL; | 888 | /* |
889 | goto fail_drop; | 889 | * Likely a bitmap corruption causing inode to be allocated |
890 | * twice. | ||
891 | */ | ||
892 | err = -EIO; | ||
893 | goto fail; | ||
890 | } | 894 | } |
891 | spin_lock(&sbi->s_next_gen_lock); | 895 | spin_lock(&sbi->s_next_gen_lock); |
892 | inode->i_generation = sbi->s_next_generation++; | 896 | inode->i_generation = sbi->s_next_generation++; |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index aa8efa6572d6..feaa82fe629d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -71,6 +71,9 @@ static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode); | |||
71 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | 71 | static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); |
72 | static int __ext4_journalled_writepage(struct page *page, unsigned int len); | 72 | static int __ext4_journalled_writepage(struct page *page, unsigned int len); |
73 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); | 73 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); |
74 | static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, | ||
75 | struct inode *inode, struct page *page, loff_t from, | ||
76 | loff_t length, int flags); | ||
74 | 77 | ||
75 | /* | 78 | /* |
76 | * Test whether an inode is a fast symlink. | 79 | * Test whether an inode is a fast symlink. |
@@ -2759,7 +2762,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
2759 | if (!io_end || !size) | 2762 | if (!io_end || !size) |
2760 | goto out; | 2763 | goto out; |
2761 | 2764 | ||
2762 | ext_debug("ext4_end_io_dio(): io_end 0x%p" | 2765 | ext_debug("ext4_end_io_dio(): io_end 0x%p " |
2763 | "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", | 2766 | "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", |
2764 | iocb->private, io_end->inode->i_ino, iocb, offset, | 2767 | iocb->private, io_end->inode->i_ino, iocb, offset, |
2765 | size); | 2768 | size); |
@@ -3160,7 +3163,7 @@ int ext4_discard_partial_page_buffers(handle_t *handle, | |||
3160 | * | 3163 | * |
3161 | * Returns zero on sucess or negative on failure. | 3164 | * Returns zero on sucess or negative on failure. |
3162 | */ | 3165 | */ |
3163 | int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, | 3166 | static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, |
3164 | struct inode *inode, struct page *page, loff_t from, | 3167 | struct inode *inode, struct page *page, loff_t from, |
3165 | loff_t length, int flags) | 3168 | loff_t length, int flags) |
3166 | { | 3169 | { |
@@ -3300,126 +3303,6 @@ next: | |||
3300 | return err; | 3303 | return err; |
3301 | } | 3304 | } |
3302 | 3305 | ||
3303 | /* | ||
3304 | * ext4_block_truncate_page() zeroes out a mapping from file offset `from' | ||
3305 | * up to the end of the block which corresponds to `from'. | ||
3306 | * This required during truncate. We need to physically zero the tail end | ||
3307 | * of that block so it doesn't yield old data if the file is later grown. | ||
3308 | */ | ||
3309 | int ext4_block_truncate_page(handle_t *handle, | ||
3310 | struct address_space *mapping, loff_t from) | ||
3311 | { | ||
3312 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | ||
3313 | unsigned length; | ||
3314 | unsigned blocksize; | ||
3315 | struct inode *inode = mapping->host; | ||
3316 | |||
3317 | blocksize = inode->i_sb->s_blocksize; | ||
3318 | length = blocksize - (offset & (blocksize - 1)); | ||
3319 | |||
3320 | return ext4_block_zero_page_range(handle, mapping, from, length); | ||
3321 | } | ||
3322 | |||
3323 | /* | ||
3324 | * ext4_block_zero_page_range() zeros out a mapping of length 'length' | ||
3325 | * starting from file offset 'from'. The range to be zero'd must | ||
3326 | * be contained with in one block. If the specified range exceeds | ||
3327 | * the end of the block it will be shortened to end of the block | ||
3328 | * that cooresponds to 'from' | ||
3329 | */ | ||
3330 | int ext4_block_zero_page_range(handle_t *handle, | ||
3331 | struct address_space *mapping, loff_t from, loff_t length) | ||
3332 | { | ||
3333 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; | ||
3334 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | ||
3335 | unsigned blocksize, max, pos; | ||
3336 | ext4_lblk_t iblock; | ||
3337 | struct inode *inode = mapping->host; | ||
3338 | struct buffer_head *bh; | ||
3339 | struct page *page; | ||
3340 | int err = 0; | ||
3341 | |||
3342 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, | ||
3343 | mapping_gfp_mask(mapping) & ~__GFP_FS); | ||
3344 | if (!page) | ||
3345 | return -ENOMEM; | ||
3346 | |||
3347 | blocksize = inode->i_sb->s_blocksize; | ||
3348 | max = blocksize - (offset & (blocksize - 1)); | ||
3349 | |||
3350 | /* | ||
3351 | * correct length if it does not fall between | ||
3352 | * 'from' and the end of the block | ||
3353 | */ | ||
3354 | if (length > max || length < 0) | ||
3355 | length = max; | ||
3356 | |||
3357 | iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); | ||
3358 | |||
3359 | if (!page_has_buffers(page)) | ||
3360 | create_empty_buffers(page, blocksize, 0); | ||
3361 | |||
3362 | /* Find the buffer that contains "offset" */ | ||
3363 | bh = page_buffers(page); | ||
3364 | pos = blocksize; | ||
3365 | while (offset >= pos) { | ||
3366 | bh = bh->b_this_page; | ||
3367 | iblock++; | ||
3368 | pos += blocksize; | ||
3369 | } | ||
3370 | |||
3371 | err = 0; | ||
3372 | if (buffer_freed(bh)) { | ||
3373 | BUFFER_TRACE(bh, "freed: skip"); | ||
3374 | goto unlock; | ||
3375 | } | ||
3376 | |||
3377 | if (!buffer_mapped(bh)) { | ||
3378 | BUFFER_TRACE(bh, "unmapped"); | ||
3379 | ext4_get_block(inode, iblock, bh, 0); | ||
3380 | /* unmapped? It's a hole - nothing to do */ | ||
3381 | if (!buffer_mapped(bh)) { | ||
3382 | BUFFER_TRACE(bh, "still unmapped"); | ||
3383 | goto unlock; | ||
3384 | } | ||
3385 | } | ||
3386 | |||
3387 | /* Ok, it's mapped. Make sure it's up-to-date */ | ||
3388 | if (PageUptodate(page)) | ||
3389 | set_buffer_uptodate(bh); | ||
3390 | |||
3391 | if (!buffer_uptodate(bh)) { | ||
3392 | err = -EIO; | ||
3393 | ll_rw_block(READ, 1, &bh); | ||
3394 | wait_on_buffer(bh); | ||
3395 | /* Uhhuh. Read error. Complain and punt. */ | ||
3396 | if (!buffer_uptodate(bh)) | ||
3397 | goto unlock; | ||
3398 | } | ||
3399 | |||
3400 | if (ext4_should_journal_data(inode)) { | ||
3401 | BUFFER_TRACE(bh, "get write access"); | ||
3402 | err = ext4_journal_get_write_access(handle, bh); | ||
3403 | if (err) | ||
3404 | goto unlock; | ||
3405 | } | ||
3406 | |||
3407 | zero_user(page, offset, length); | ||
3408 | |||
3409 | BUFFER_TRACE(bh, "zeroed end of block"); | ||
3410 | |||
3411 | err = 0; | ||
3412 | if (ext4_should_journal_data(inode)) { | ||
3413 | err = ext4_handle_dirty_metadata(handle, inode, bh); | ||
3414 | } else | ||
3415 | mark_buffer_dirty(bh); | ||
3416 | |||
3417 | unlock: | ||
3418 | unlock_page(page); | ||
3419 | page_cache_release(page); | ||
3420 | return err; | ||
3421 | } | ||
3422 | |||
3423 | int ext4_can_truncate(struct inode *inode) | 3306 | int ext4_can_truncate(struct inode *inode) |
3424 | { | 3307 | { |
3425 | if (S_ISREG(inode->i_mode)) | 3308 | if (S_ISREG(inode->i_mode)) |
@@ -4646,9 +4529,19 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) | |||
4646 | return 0; | 4529 | return 0; |
4647 | if (is_journal_aborted(journal)) | 4530 | if (is_journal_aborted(journal)) |
4648 | return -EROFS; | 4531 | return -EROFS; |
4532 | /* We have to allocate physical blocks for delalloc blocks | ||
4533 | * before flushing journal. otherwise delalloc blocks can not | ||
4534 | * be allocated any more. even more truncate on delalloc blocks | ||
4535 | * could trigger BUG by flushing delalloc blocks in journal. | ||
4536 | * There is no delalloc block in non-journal data mode. | ||
4537 | */ | ||
4538 | if (val && test_opt(inode->i_sb, DELALLOC)) { | ||
4539 | err = ext4_alloc_da_blocks(inode); | ||
4540 | if (err < 0) | ||
4541 | return err; | ||
4542 | } | ||
4649 | 4543 | ||
4650 | jbd2_journal_lock_updates(journal); | 4544 | jbd2_journal_lock_updates(journal); |
4651 | jbd2_journal_flush(journal); | ||
4652 | 4545 | ||
4653 | /* | 4546 | /* |
4654 | * OK, there are no updates running now, and all cached data is | 4547 | * OK, there are no updates running now, and all cached data is |
@@ -4660,8 +4553,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) | |||
4660 | 4553 | ||
4661 | if (val) | 4554 | if (val) |
4662 | ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); | 4555 | ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); |
4663 | else | 4556 | else { |
4557 | jbd2_journal_flush(journal); | ||
4664 | ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); | 4558 | ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); |
4559 | } | ||
4665 | ext4_set_aops(inode); | 4560 | ext4_set_aops(inode); |
4666 | 4561 | ||
4667 | jbd2_journal_unlock_updates(journal); | 4562 | jbd2_journal_unlock_updates(journal); |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index e87a932b073b..6eee25591b81 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -18,6 +18,8 @@ | |||
18 | #include "ext4_jbd2.h" | 18 | #include "ext4_jbd2.h" |
19 | #include "ext4.h" | 19 | #include "ext4.h" |
20 | 20 | ||
21 | #define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1) | ||
22 | |||
21 | long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 23 | long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
22 | { | 24 | { |
23 | struct inode *inode = filp->f_dentry->d_inode; | 25 | struct inode *inode = filp->f_dentry->d_inode; |
@@ -186,19 +188,22 @@ setversion_out: | |||
186 | if (err) | 188 | if (err) |
187 | return err; | 189 | return err; |
188 | 190 | ||
189 | if (get_user(n_blocks_count, (__u32 __user *)arg)) | 191 | if (get_user(n_blocks_count, (__u32 __user *)arg)) { |
190 | return -EFAULT; | 192 | err = -EFAULT; |
193 | goto group_extend_out; | ||
194 | } | ||
191 | 195 | ||
192 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | 196 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, |
193 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { | 197 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { |
194 | ext4_msg(sb, KERN_ERR, | 198 | ext4_msg(sb, KERN_ERR, |
195 | "Online resizing not supported with bigalloc"); | 199 | "Online resizing not supported with bigalloc"); |
196 | return -EOPNOTSUPP; | 200 | err = -EOPNOTSUPP; |
201 | goto group_extend_out; | ||
197 | } | 202 | } |
198 | 203 | ||
199 | err = mnt_want_write_file(filp); | 204 | err = mnt_want_write_file(filp); |
200 | if (err) | 205 | if (err) |
201 | return err; | 206 | goto group_extend_out; |
202 | 207 | ||
203 | err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); | 208 | err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); |
204 | if (EXT4_SB(sb)->s_journal) { | 209 | if (EXT4_SB(sb)->s_journal) { |
@@ -209,8 +214,8 @@ setversion_out: | |||
209 | if (err == 0) | 214 | if (err == 0) |
210 | err = err2; | 215 | err = err2; |
211 | mnt_drop_write_file(filp); | 216 | mnt_drop_write_file(filp); |
217 | group_extend_out: | ||
212 | ext4_resize_end(sb); | 218 | ext4_resize_end(sb); |
213 | |||
214 | return err; | 219 | return err; |
215 | } | 220 | } |
216 | 221 | ||
@@ -251,8 +256,7 @@ setversion_out: | |||
251 | err = ext4_move_extents(filp, donor_filp, me.orig_start, | 256 | err = ext4_move_extents(filp, donor_filp, me.orig_start, |
252 | me.donor_start, me.len, &me.moved_len); | 257 | me.donor_start, me.len, &me.moved_len); |
253 | mnt_drop_write_file(filp); | 258 | mnt_drop_write_file(filp); |
254 | if (me.moved_len > 0) | 259 | mnt_drop_write(filp->f_path.mnt); |
255 | file_remove_suid(donor_filp); | ||
256 | 260 | ||
257 | if (copy_to_user((struct move_extent __user *)arg, | 261 | if (copy_to_user((struct move_extent __user *)arg, |
258 | &me, sizeof(me))) | 262 | &me, sizeof(me))) |
@@ -271,19 +275,22 @@ mext_out: | |||
271 | return err; | 275 | return err; |
272 | 276 | ||
273 | if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, | 277 | if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, |
274 | sizeof(input))) | 278 | sizeof(input))) { |
275 | return -EFAULT; | 279 | err = -EFAULT; |
280 | goto group_add_out; | ||
281 | } | ||
276 | 282 | ||
277 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | 283 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, |
278 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { | 284 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { |
279 | ext4_msg(sb, KERN_ERR, | 285 | ext4_msg(sb, KERN_ERR, |
280 | "Online resizing not supported with bigalloc"); | 286 | "Online resizing not supported with bigalloc"); |
281 | return -EOPNOTSUPP; | 287 | err = -EOPNOTSUPP; |
288 | goto group_add_out; | ||
282 | } | 289 | } |
283 | 290 | ||
284 | err = mnt_want_write_file(filp); | 291 | err = mnt_want_write_file(filp); |
285 | if (err) | 292 | if (err) |
286 | return err; | 293 | goto group_add_out; |
287 | 294 | ||
288 | err = ext4_group_add(sb, &input); | 295 | err = ext4_group_add(sb, &input); |
289 | if (EXT4_SB(sb)->s_journal) { | 296 | if (EXT4_SB(sb)->s_journal) { |
@@ -294,8 +301,8 @@ mext_out: | |||
294 | if (err == 0) | 301 | if (err == 0) |
295 | err = err2; | 302 | err = err2; |
296 | mnt_drop_write_file(filp); | 303 | mnt_drop_write_file(filp); |
304 | group_add_out: | ||
297 | ext4_resize_end(sb); | 305 | ext4_resize_end(sb); |
298 | |||
299 | return err; | 306 | return err; |
300 | } | 307 | } |
301 | 308 | ||
@@ -335,6 +342,60 @@ mext_out: | |||
335 | return err; | 342 | return err; |
336 | } | 343 | } |
337 | 344 | ||
345 | case EXT4_IOC_RESIZE_FS: { | ||
346 | ext4_fsblk_t n_blocks_count; | ||
347 | struct super_block *sb = inode->i_sb; | ||
348 | int err = 0, err2 = 0; | ||
349 | |||
350 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
351 | EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { | ||
352 | ext4_msg(sb, KERN_ERR, | ||
353 | "Online resizing not (yet) supported with bigalloc"); | ||
354 | return -EOPNOTSUPP; | ||
355 | } | ||
356 | |||
357 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, | ||
358 | EXT4_FEATURE_INCOMPAT_META_BG)) { | ||
359 | ext4_msg(sb, KERN_ERR, | ||
360 | "Online resizing not (yet) supported with meta_bg"); | ||
361 | return -EOPNOTSUPP; | ||
362 | } | ||
363 | |||
364 | if (copy_from_user(&n_blocks_count, (__u64 __user *)arg, | ||
365 | sizeof(__u64))) { | ||
366 | return -EFAULT; | ||
367 | } | ||
368 | |||
369 | if (n_blocks_count > MAX_32_NUM && | ||
370 | !EXT4_HAS_INCOMPAT_FEATURE(sb, | ||
371 | EXT4_FEATURE_INCOMPAT_64BIT)) { | ||
372 | ext4_msg(sb, KERN_ERR, | ||
373 | "File system only supports 32-bit block numbers"); | ||
374 | return -EOPNOTSUPP; | ||
375 | } | ||
376 | |||
377 | err = ext4_resize_begin(sb); | ||
378 | if (err) | ||
379 | return err; | ||
380 | |||
381 | err = mnt_want_write(filp->f_path.mnt); | ||
382 | if (err) | ||
383 | goto resizefs_out; | ||
384 | |||
385 | err = ext4_resize_fs(sb, n_blocks_count); | ||
386 | if (EXT4_SB(sb)->s_journal) { | ||
387 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); | ||
388 | err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); | ||
389 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | ||
390 | } | ||
391 | if (err == 0) | ||
392 | err = err2; | ||
393 | mnt_drop_write(filp->f_path.mnt); | ||
394 | resizefs_out: | ||
395 | ext4_resize_end(sb); | ||
396 | return err; | ||
397 | } | ||
398 | |||
338 | case FITRIM: | 399 | case FITRIM: |
339 | { | 400 | { |
340 | struct request_queue *q = bdev_get_queue(sb->s_bdev); | 401 | struct request_queue *q = bdev_get_queue(sb->s_bdev); |
@@ -433,6 +494,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
433 | } | 494 | } |
434 | case EXT4_IOC_MOVE_EXT: | 495 | case EXT4_IOC_MOVE_EXT: |
435 | case FITRIM: | 496 | case FITRIM: |
497 | case EXT4_IOC_RESIZE_FS: | ||
436 | break; | 498 | break; |
437 | default: | 499 | default: |
438 | return -ENOIOCTLCMD; | 500 | return -ENOIOCTLCMD; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index e2d8be8f28bf..cb990b21c698 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -3671,7 +3671,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b, | |||
3671 | ext4_group_t group; | 3671 | ext4_group_t group; |
3672 | ext4_grpblk_t bit; | 3672 | ext4_grpblk_t bit; |
3673 | 3673 | ||
3674 | trace_ext4_mb_release_group_pa(pa); | 3674 | trace_ext4_mb_release_group_pa(sb, pa); |
3675 | BUG_ON(pa->pa_deleted == 0); | 3675 | BUG_ON(pa->pa_deleted == 0); |
3676 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); | 3676 | ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); |
3677 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); | 3677 | BUG_ON(group != e4b->bd_group && pa->pa_len != 0); |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 996780ab4f4e..f9d948f0eb86 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -134,6 +134,172 @@ static int verify_group_input(struct super_block *sb, | |||
134 | return err; | 134 | return err; |
135 | } | 135 | } |
136 | 136 | ||
137 | /* | ||
138 | * ext4_new_flex_group_data is used by 64bit-resize interface to add a flex | ||
139 | * group each time. | ||
140 | */ | ||
141 | struct ext4_new_flex_group_data { | ||
142 | struct ext4_new_group_data *groups; /* new_group_data for groups | ||
143 | in the flex group */ | ||
144 | __u16 *bg_flags; /* block group flags of groups | ||
145 | in @groups */ | ||
146 | ext4_group_t count; /* number of groups in @groups | ||
147 | */ | ||
148 | }; | ||
149 | |||
150 | /* | ||
151 | * alloc_flex_gd() allocates a ext4_new_flex_group_data with size of | ||
152 | * @flexbg_size. | ||
153 | * | ||
154 | * Returns NULL on failure otherwise address of the allocated structure. | ||
155 | */ | ||
156 | static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size) | ||
157 | { | ||
158 | struct ext4_new_flex_group_data *flex_gd; | ||
159 | |||
160 | flex_gd = kmalloc(sizeof(*flex_gd), GFP_NOFS); | ||
161 | if (flex_gd == NULL) | ||
162 | goto out3; | ||
163 | |||
164 | flex_gd->count = flexbg_size; | ||
165 | |||
166 | flex_gd->groups = kmalloc(sizeof(struct ext4_new_group_data) * | ||
167 | flexbg_size, GFP_NOFS); | ||
168 | if (flex_gd->groups == NULL) | ||
169 | goto out2; | ||
170 | |||
171 | flex_gd->bg_flags = kmalloc(flexbg_size * sizeof(__u16), GFP_NOFS); | ||
172 | if (flex_gd->bg_flags == NULL) | ||
173 | goto out1; | ||
174 | |||
175 | return flex_gd; | ||
176 | |||
177 | out1: | ||
178 | kfree(flex_gd->groups); | ||
179 | out2: | ||
180 | kfree(flex_gd); | ||
181 | out3: | ||
182 | return NULL; | ||
183 | } | ||
184 | |||
185 | static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd) | ||
186 | { | ||
187 | kfree(flex_gd->bg_flags); | ||
188 | kfree(flex_gd->groups); | ||
189 | kfree(flex_gd); | ||
190 | } | ||
191 | |||
192 | /* | ||
193 | * ext4_alloc_group_tables() allocates block bitmaps, inode bitmaps | ||
194 | * and inode tables for a flex group. | ||
195 | * | ||
196 | * This function is used by 64bit-resize. Note that this function allocates | ||
197 | * group tables from the 1st group of groups contained by @flexgd, which may | ||
198 | * be a partial of a flex group. | ||
199 | * | ||
200 | * @sb: super block of fs to which the groups belongs | ||
201 | */ | ||
202 | static void ext4_alloc_group_tables(struct super_block *sb, | ||
203 | struct ext4_new_flex_group_data *flex_gd, | ||
204 | int flexbg_size) | ||
205 | { | ||
206 | struct ext4_new_group_data *group_data = flex_gd->groups; | ||
207 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | ||
208 | ext4_fsblk_t start_blk; | ||
209 | ext4_fsblk_t last_blk; | ||
210 | ext4_group_t src_group; | ||
211 | ext4_group_t bb_index = 0; | ||
212 | ext4_group_t ib_index = 0; | ||
213 | ext4_group_t it_index = 0; | ||
214 | ext4_group_t group; | ||
215 | ext4_group_t last_group; | ||
216 | unsigned overhead; | ||
217 | |||
218 | BUG_ON(flex_gd->count == 0 || group_data == NULL); | ||
219 | |||
220 | src_group = group_data[0].group; | ||
221 | last_group = src_group + flex_gd->count - 1; | ||
222 | |||
223 | BUG_ON((flexbg_size > 1) && ((src_group & ~(flexbg_size - 1)) != | ||
224 | (last_group & ~(flexbg_size - 1)))); | ||
225 | next_group: | ||
226 | group = group_data[0].group; | ||
227 | start_blk = ext4_group_first_block_no(sb, src_group); | ||
228 | last_blk = start_blk + group_data[src_group - group].blocks_count; | ||
229 | |||
230 | overhead = ext4_bg_has_super(sb, src_group) ? | ||
231 | (1 + ext4_bg_num_gdb(sb, src_group) + | ||
232 | le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; | ||
233 | |||
234 | start_blk += overhead; | ||
235 | |||
236 | BUG_ON(src_group >= group_data[0].group + flex_gd->count); | ||
237 | /* We collect contiguous blocks as much as possible. */ | ||
238 | src_group++; | ||
239 | for (; src_group <= last_group; src_group++) | ||
240 | if (!ext4_bg_has_super(sb, src_group)) | ||
241 | last_blk += group_data[src_group - group].blocks_count; | ||
242 | else | ||
243 | break; | ||
244 | |||
245 | /* Allocate block bitmaps */ | ||
246 | for (; bb_index < flex_gd->count; bb_index++) { | ||
247 | if (start_blk >= last_blk) | ||
248 | goto next_group; | ||
249 | group_data[bb_index].block_bitmap = start_blk++; | ||
250 | ext4_get_group_no_and_offset(sb, start_blk - 1, &group, NULL); | ||
251 | group -= group_data[0].group; | ||
252 | group_data[group].free_blocks_count--; | ||
253 | if (flexbg_size > 1) | ||
254 | flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT; | ||
255 | } | ||
256 | |||
257 | /* Allocate inode bitmaps */ | ||
258 | for (; ib_index < flex_gd->count; ib_index++) { | ||
259 | if (start_blk >= last_blk) | ||
260 | goto next_group; | ||
261 | group_data[ib_index].inode_bitmap = start_blk++; | ||
262 | ext4_get_group_no_and_offset(sb, start_blk - 1, &group, NULL); | ||
263 | group -= group_data[0].group; | ||
264 | group_data[group].free_blocks_count--; | ||
265 | if (flexbg_size > 1) | ||
266 | flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT; | ||
267 | } | ||
268 | |||
269 | /* Allocate inode tables */ | ||
270 | for (; it_index < flex_gd->count; it_index++) { | ||
271 | if (start_blk + EXT4_SB(sb)->s_itb_per_group > last_blk) | ||
272 | goto next_group; | ||
273 | group_data[it_index].inode_table = start_blk; | ||
274 | ext4_get_group_no_and_offset(sb, start_blk, &group, NULL); | ||
275 | group -= group_data[0].group; | ||
276 | group_data[group].free_blocks_count -= | ||
277 | EXT4_SB(sb)->s_itb_per_group; | ||
278 | if (flexbg_size > 1) | ||
279 | flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT; | ||
280 | |||
281 | start_blk += EXT4_SB(sb)->s_itb_per_group; | ||
282 | } | ||
283 | |||
284 | if (test_opt(sb, DEBUG)) { | ||
285 | int i; | ||
286 | group = group_data[0].group; | ||
287 | |||
288 | printk(KERN_DEBUG "EXT4-fs: adding a flex group with " | ||
289 | "%d groups, flexbg size is %d:\n", flex_gd->count, | ||
290 | flexbg_size); | ||
291 | |||
292 | for (i = 0; i < flex_gd->count; i++) { | ||
293 | printk(KERN_DEBUG "adding %s group %u: %u " | ||
294 | "blocks (%d free)\n", | ||
295 | ext4_bg_has_super(sb, group + i) ? "normal" : | ||
296 | "no-super", group + i, | ||
297 | group_data[i].blocks_count, | ||
298 | group_data[i].free_blocks_count); | ||
299 | } | ||
300 | } | ||
301 | } | ||
302 | |||
137 | static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, | 303 | static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, |
138 | ext4_fsblk_t blk) | 304 | ext4_fsblk_t blk) |
139 | { | 305 | { |
@@ -179,131 +345,250 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh) | |||
179 | } | 345 | } |
180 | 346 | ||
181 | /* | 347 | /* |
182 | * Set up the block and inode bitmaps, and the inode table for the new group. | 348 | * set_flexbg_block_bitmap() mark @count blocks starting from @block used. |
349 | * | ||
350 | * Helper function for ext4_setup_new_group_blocks() which set . | ||
351 | * | ||
352 | * @sb: super block | ||
353 | * @handle: journal handle | ||
354 | * @flex_gd: flex group data | ||
355 | */ | ||
356 | static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle, | ||
357 | struct ext4_new_flex_group_data *flex_gd, | ||
358 | ext4_fsblk_t block, ext4_group_t count) | ||
359 | { | ||
360 | ext4_group_t count2; | ||
361 | |||
362 | ext4_debug("mark blocks [%llu/%u] used\n", block, count); | ||
363 | for (count2 = count; count > 0; count -= count2, block += count2) { | ||
364 | ext4_fsblk_t start; | ||
365 | struct buffer_head *bh; | ||
366 | ext4_group_t group; | ||
367 | int err; | ||
368 | |||
369 | ext4_get_group_no_and_offset(sb, block, &group, NULL); | ||
370 | start = ext4_group_first_block_no(sb, group); | ||
371 | group -= flex_gd->groups[0].group; | ||
372 | |||
373 | count2 = sb->s_blocksize * 8 - (block - start); | ||
374 | if (count2 > count) | ||
375 | count2 = count; | ||
376 | |||
377 | if (flex_gd->bg_flags[group] & EXT4_BG_BLOCK_UNINIT) { | ||
378 | BUG_ON(flex_gd->count > 1); | ||
379 | continue; | ||
380 | } | ||
381 | |||
382 | err = extend_or_restart_transaction(handle, 1); | ||
383 | if (err) | ||
384 | return err; | ||
385 | |||
386 | bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap); | ||
387 | if (!bh) | ||
388 | return -EIO; | ||
389 | |||
390 | err = ext4_journal_get_write_access(handle, bh); | ||
391 | if (err) | ||
392 | return err; | ||
393 | ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", block, | ||
394 | block - start, count2); | ||
395 | ext4_set_bits(bh->b_data, block - start, count2); | ||
396 | |||
397 | err = ext4_handle_dirty_metadata(handle, NULL, bh); | ||
398 | if (unlikely(err)) | ||
399 | return err; | ||
400 | brelse(bh); | ||
401 | } | ||
402 | |||
403 | return 0; | ||
404 | } | ||
405 | |||
406 | /* | ||
407 | * Set up the block and inode bitmaps, and the inode table for the new groups. | ||
183 | * This doesn't need to be part of the main transaction, since we are only | 408 | * This doesn't need to be part of the main transaction, since we are only |
184 | * changing blocks outside the actual filesystem. We still do journaling to | 409 | * changing blocks outside the actual filesystem. We still do journaling to |
185 | * ensure the recovery is correct in case of a failure just after resize. | 410 | * ensure the recovery is correct in case of a failure just after resize. |
186 | * If any part of this fails, we simply abort the resize. | 411 | * If any part of this fails, we simply abort the resize. |
412 | * | ||
413 | * setup_new_flex_group_blocks handles a flex group as follow: | ||
414 | * 1. copy super block and GDT, and initialize group tables if necessary. | ||
415 | * In this step, we only set bits in blocks bitmaps for blocks taken by | ||
416 | * super block and GDT. | ||
417 | * 2. allocate group tables in block bitmaps, that is, set bits in block | ||
418 | * bitmap for blocks taken by group tables. | ||
187 | */ | 419 | */ |
188 | static int setup_new_group_blocks(struct super_block *sb, | 420 | static int setup_new_flex_group_blocks(struct super_block *sb, |
189 | struct ext4_new_group_data *input) | 421 | struct ext4_new_flex_group_data *flex_gd) |
190 | { | 422 | { |
423 | int group_table_count[] = {1, 1, EXT4_SB(sb)->s_itb_per_group}; | ||
424 | ext4_fsblk_t start; | ||
425 | ext4_fsblk_t block; | ||
191 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 426 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
192 | ext4_fsblk_t start = ext4_group_first_block_no(sb, input->group); | 427 | struct ext4_super_block *es = sbi->s_es; |
193 | int reserved_gdb = ext4_bg_has_super(sb, input->group) ? | 428 | struct ext4_new_group_data *group_data = flex_gd->groups; |
194 | le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0; | 429 | __u16 *bg_flags = flex_gd->bg_flags; |
195 | unsigned long gdblocks = ext4_bg_num_gdb(sb, input->group); | ||
196 | struct buffer_head *bh; | ||
197 | handle_t *handle; | 430 | handle_t *handle; |
198 | ext4_fsblk_t block; | 431 | ext4_group_t group, count; |
199 | ext4_grpblk_t bit; | 432 | struct buffer_head *bh = NULL; |
200 | int i; | 433 | int reserved_gdb, i, j, err = 0, err2; |
201 | int err = 0, err2; | 434 | |
435 | BUG_ON(!flex_gd->count || !group_data || | ||
436 | group_data[0].group != sbi->s_groups_count); | ||
437 | |||
438 | reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks); | ||
202 | 439 | ||
203 | /* This transaction may be extended/restarted along the way */ | 440 | /* This transaction may be extended/restarted along the way */ |
204 | handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); | 441 | handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); |
205 | |||
206 | if (IS_ERR(handle)) | 442 | if (IS_ERR(handle)) |
207 | return PTR_ERR(handle); | 443 | return PTR_ERR(handle); |
208 | 444 | ||
209 | BUG_ON(input->group != sbi->s_groups_count); | 445 | group = group_data[0].group; |
446 | for (i = 0; i < flex_gd->count; i++, group++) { | ||
447 | unsigned long gdblocks; | ||
210 | 448 | ||
211 | /* Copy all of the GDT blocks into the backup in this group */ | 449 | gdblocks = ext4_bg_num_gdb(sb, group); |
212 | for (i = 0, bit = 1, block = start + 1; | 450 | start = ext4_group_first_block_no(sb, group); |
213 | i < gdblocks; i++, block++, bit++) { | ||
214 | struct buffer_head *gdb; | ||
215 | 451 | ||
216 | ext4_debug("update backup group %#04llx (+%d)\n", block, bit); | 452 | /* Copy all of the GDT blocks into the backup in this group */ |
217 | err = extend_or_restart_transaction(handle, 1); | 453 | for (j = 0, block = start + 1; j < gdblocks; j++, block++) { |
218 | if (err) | 454 | struct buffer_head *gdb; |
219 | goto exit_journal; | ||
220 | 455 | ||
221 | gdb = sb_getblk(sb, block); | 456 | ext4_debug("update backup group %#04llx\n", block); |
222 | if (!gdb) { | 457 | err = extend_or_restart_transaction(handle, 1); |
223 | err = -EIO; | 458 | if (err) |
224 | goto exit_journal; | 459 | goto out; |
225 | } | 460 | |
226 | if ((err = ext4_journal_get_write_access(handle, gdb))) { | 461 | gdb = sb_getblk(sb, block); |
462 | if (!gdb) { | ||
463 | err = -EIO; | ||
464 | goto out; | ||
465 | } | ||
466 | |||
467 | err = ext4_journal_get_write_access(handle, gdb); | ||
468 | if (err) { | ||
469 | brelse(gdb); | ||
470 | goto out; | ||
471 | } | ||
472 | memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data, | ||
473 | gdb->b_size); | ||
474 | set_buffer_uptodate(gdb); | ||
475 | |||
476 | err = ext4_handle_dirty_metadata(handle, NULL, gdb); | ||
477 | if (unlikely(err)) { | ||
478 | brelse(gdb); | ||
479 | goto out; | ||
480 | } | ||
227 | brelse(gdb); | 481 | brelse(gdb); |
228 | goto exit_journal; | ||
229 | } | 482 | } |
230 | memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); | 483 | |
231 | set_buffer_uptodate(gdb); | 484 | /* Zero out all of the reserved backup group descriptor |
232 | err = ext4_handle_dirty_metadata(handle, NULL, gdb); | 485 | * table blocks |
233 | if (unlikely(err)) { | 486 | */ |
234 | brelse(gdb); | 487 | if (ext4_bg_has_super(sb, group)) { |
235 | goto exit_journal; | 488 | err = sb_issue_zeroout(sb, gdblocks + start + 1, |
489 | reserved_gdb, GFP_NOFS); | ||
490 | if (err) | ||
491 | goto out; | ||
236 | } | 492 | } |
237 | brelse(gdb); | ||
238 | } | ||
239 | 493 | ||
240 | /* Zero out all of the reserved backup group descriptor table blocks */ | 494 | /* Initialize group tables of the grop @group */ |
241 | ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", | 495 | if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED)) |
242 | block, sbi->s_itb_per_group); | 496 | goto handle_bb; |
243 | err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb, | ||
244 | GFP_NOFS); | ||
245 | if (err) | ||
246 | goto exit_journal; | ||
247 | 497 | ||
248 | err = extend_or_restart_transaction(handle, 2); | 498 | /* Zero out all of the inode table blocks */ |
249 | if (err) | 499 | block = group_data[i].inode_table; |
250 | goto exit_journal; | 500 | ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", |
501 | block, sbi->s_itb_per_group); | ||
502 | err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, | ||
503 | GFP_NOFS); | ||
504 | if (err) | ||
505 | goto out; | ||
251 | 506 | ||
252 | bh = bclean(handle, sb, input->block_bitmap); | 507 | handle_bb: |
253 | if (IS_ERR(bh)) { | 508 | if (bg_flags[i] & EXT4_BG_BLOCK_UNINIT) |
254 | err = PTR_ERR(bh); | 509 | goto handle_ib; |
255 | goto exit_journal; | ||
256 | } | ||
257 | 510 | ||
258 | if (ext4_bg_has_super(sb, input->group)) { | 511 | /* Initialize block bitmap of the @group */ |
259 | ext4_debug("mark backup group tables %#04llx (+0)\n", start); | 512 | block = group_data[i].block_bitmap; |
260 | ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + 1); | 513 | err = extend_or_restart_transaction(handle, 1); |
261 | } | 514 | if (err) |
515 | goto out; | ||
262 | 516 | ||
263 | ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, | 517 | bh = bclean(handle, sb, block); |
264 | input->block_bitmap - start); | 518 | if (IS_ERR(bh)) { |
265 | ext4_set_bit(input->block_bitmap - start, bh->b_data); | 519 | err = PTR_ERR(bh); |
266 | ext4_debug("mark inode bitmap %#04llx (+%llu)\n", input->inode_bitmap, | 520 | goto out; |
267 | input->inode_bitmap - start); | 521 | } |
268 | ext4_set_bit(input->inode_bitmap - start, bh->b_data); | 522 | if (ext4_bg_has_super(sb, group)) { |
269 | 523 | ext4_debug("mark backup superblock %#04llx (+0)\n", | |
270 | /* Zero out all of the inode table blocks */ | 524 | start); |
271 | block = input->inode_table; | 525 | ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + |
272 | ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", | 526 | 1); |
273 | block, sbi->s_itb_per_group); | 527 | } |
274 | err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); | 528 | ext4_mark_bitmap_end(group_data[i].blocks_count, |
275 | if (err) | 529 | sb->s_blocksize * 8, bh->b_data); |
276 | goto exit_bh; | 530 | err = ext4_handle_dirty_metadata(handle, NULL, bh); |
277 | ext4_set_bits(bh->b_data, input->inode_table - start, | 531 | if (err) |
278 | sbi->s_itb_per_group); | 532 | goto out; |
533 | brelse(bh); | ||
279 | 534 | ||
535 | handle_ib: | ||
536 | if (bg_flags[i] & EXT4_BG_INODE_UNINIT) | ||
537 | continue; | ||
280 | 538 | ||
281 | ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, | 539 | /* Initialize inode bitmap of the @group */ |
282 | bh->b_data); | 540 | block = group_data[i].inode_bitmap; |
283 | err = ext4_handle_dirty_metadata(handle, NULL, bh); | 541 | err = extend_or_restart_transaction(handle, 1); |
284 | if (unlikely(err)) { | 542 | if (err) |
285 | ext4_std_error(sb, err); | 543 | goto out; |
286 | goto exit_bh; | 544 | /* Mark unused entries in inode bitmap used */ |
545 | bh = bclean(handle, sb, block); | ||
546 | if (IS_ERR(bh)) { | ||
547 | err = PTR_ERR(bh); | ||
548 | goto out; | ||
549 | } | ||
550 | |||
551 | ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), | ||
552 | sb->s_blocksize * 8, bh->b_data); | ||
553 | err = ext4_handle_dirty_metadata(handle, NULL, bh); | ||
554 | if (err) | ||
555 | goto out; | ||
556 | brelse(bh); | ||
287 | } | 557 | } |
288 | brelse(bh); | 558 | bh = NULL; |
289 | /* Mark unused entries in inode bitmap used */ | 559 | |
290 | ext4_debug("clear inode bitmap %#04llx (+%llu)\n", | 560 | /* Mark group tables in block bitmap */ |
291 | input->inode_bitmap, input->inode_bitmap - start); | 561 | for (j = 0; j < GROUP_TABLE_COUNT; j++) { |
292 | if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) { | 562 | count = group_table_count[j]; |
293 | err = PTR_ERR(bh); | 563 | start = (&group_data[0].block_bitmap)[j]; |
294 | goto exit_journal; | 564 | block = start; |
565 | for (i = 1; i < flex_gd->count; i++) { | ||
566 | block += group_table_count[j]; | ||
567 | if (block == (&group_data[i].block_bitmap)[j]) { | ||
568 | count += group_table_count[j]; | ||
569 | continue; | ||
570 | } | ||
571 | err = set_flexbg_block_bitmap(sb, handle, | ||
572 | flex_gd, start, count); | ||
573 | if (err) | ||
574 | goto out; | ||
575 | count = group_table_count[j]; | ||
576 | start = group_data[i].block_bitmap; | ||
577 | block = start; | ||
578 | } | ||
579 | |||
580 | if (count) { | ||
581 | err = set_flexbg_block_bitmap(sb, handle, | ||
582 | flex_gd, start, count); | ||
583 | if (err) | ||
584 | goto out; | ||
585 | } | ||
295 | } | 586 | } |
296 | 587 | ||
297 | ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, | 588 | out: |
298 | bh->b_data); | ||
299 | err = ext4_handle_dirty_metadata(handle, NULL, bh); | ||
300 | if (unlikely(err)) | ||
301 | ext4_std_error(sb, err); | ||
302 | exit_bh: | ||
303 | brelse(bh); | 589 | brelse(bh); |
304 | 590 | err2 = ext4_journal_stop(handle); | |
305 | exit_journal: | 591 | if (err2 && !err) |
306 | if ((err2 = ext4_journal_stop(handle)) && !err) | ||
307 | err = err2; | 592 | err = err2; |
308 | 593 | ||
309 | return err; | 594 | return err; |
@@ -351,10 +636,10 @@ static unsigned ext4_list_backups(struct super_block *sb, unsigned *three, | |||
351 | * groups in current filesystem that have BACKUPS, or -ve error code. | 636 | * groups in current filesystem that have BACKUPS, or -ve error code. |
352 | */ | 637 | */ |
353 | static int verify_reserved_gdb(struct super_block *sb, | 638 | static int verify_reserved_gdb(struct super_block *sb, |
639 | ext4_group_t end, | ||
354 | struct buffer_head *primary) | 640 | struct buffer_head *primary) |
355 | { | 641 | { |
356 | const ext4_fsblk_t blk = primary->b_blocknr; | 642 | const ext4_fsblk_t blk = primary->b_blocknr; |
357 | const ext4_group_t end = EXT4_SB(sb)->s_groups_count; | ||
358 | unsigned three = 1; | 643 | unsigned three = 1; |
359 | unsigned five = 5; | 644 | unsigned five = 5; |
360 | unsigned seven = 7; | 645 | unsigned seven = 7; |
@@ -429,7 +714,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
429 | if (!gdb_bh) | 714 | if (!gdb_bh) |
430 | return -EIO; | 715 | return -EIO; |
431 | 716 | ||
432 | gdbackups = verify_reserved_gdb(sb, gdb_bh); | 717 | gdbackups = verify_reserved_gdb(sb, group, gdb_bh); |
433 | if (gdbackups < 0) { | 718 | if (gdbackups < 0) { |
434 | err = gdbackups; | 719 | err = gdbackups; |
435 | goto exit_bh; | 720 | goto exit_bh; |
@@ -592,7 +877,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, | |||
592 | err = -EIO; | 877 | err = -EIO; |
593 | goto exit_bh; | 878 | goto exit_bh; |
594 | } | 879 | } |
595 | if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) { | 880 | gdbackups = verify_reserved_gdb(sb, group, primary[res]); |
881 | if (gdbackups < 0) { | ||
596 | brelse(primary[res]); | 882 | brelse(primary[res]); |
597 | err = gdbackups; | 883 | err = gdbackups; |
598 | goto exit_bh; | 884 | goto exit_bh; |
@@ -735,6 +1021,348 @@ exit_err: | |||
735 | } | 1021 | } |
736 | } | 1022 | } |
737 | 1023 | ||
1024 | /* | ||
1025 | * ext4_add_new_descs() adds @count group descriptor of groups | ||
1026 | * starting at @group | ||
1027 | * | ||
1028 | * @handle: journal handle | ||
1029 | * @sb: super block | ||
1030 | * @group: the group no. of the first group desc to be added | ||
1031 | * @resize_inode: the resize inode | ||
1032 | * @count: number of group descriptors to be added | ||
1033 | */ | ||
1034 | static int ext4_add_new_descs(handle_t *handle, struct super_block *sb, | ||
1035 | ext4_group_t group, struct inode *resize_inode, | ||
1036 | ext4_group_t count) | ||
1037 | { | ||
1038 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1039 | struct ext4_super_block *es = sbi->s_es; | ||
1040 | struct buffer_head *gdb_bh; | ||
1041 | int i, gdb_off, gdb_num, err = 0; | ||
1042 | |||
1043 | for (i = 0; i < count; i++, group++) { | ||
1044 | int reserved_gdb = ext4_bg_has_super(sb, group) ? | ||
1045 | le16_to_cpu(es->s_reserved_gdt_blocks) : 0; | ||
1046 | |||
1047 | gdb_off = group % EXT4_DESC_PER_BLOCK(sb); | ||
1048 | gdb_num = group / EXT4_DESC_PER_BLOCK(sb); | ||
1049 | |||
1050 | /* | ||
1051 | * We will only either add reserved group blocks to a backup group | ||
1052 | * or remove reserved blocks for the first group in a new group block. | ||
1053 | * Doing both would be mean more complex code, and sane people don't | ||
1054 | * use non-sparse filesystems anymore. This is already checked above. | ||
1055 | */ | ||
1056 | if (gdb_off) { | ||
1057 | gdb_bh = sbi->s_group_desc[gdb_num]; | ||
1058 | err = ext4_journal_get_write_access(handle, gdb_bh); | ||
1059 | |||
1060 | if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group)) | ||
1061 | err = reserve_backup_gdb(handle, resize_inode, group); | ||
1062 | } else | ||
1063 | err = add_new_gdb(handle, resize_inode, group); | ||
1064 | if (err) | ||
1065 | break; | ||
1066 | } | ||
1067 | return err; | ||
1068 | } | ||
1069 | |||
1070 | /* | ||
1071 | * ext4_setup_new_descs() will set up the group descriptor descriptors of a flex bg | ||
1072 | */ | ||
1073 | static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb, | ||
1074 | struct ext4_new_flex_group_data *flex_gd) | ||
1075 | { | ||
1076 | struct ext4_new_group_data *group_data = flex_gd->groups; | ||
1077 | struct ext4_group_desc *gdp; | ||
1078 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1079 | struct buffer_head *gdb_bh; | ||
1080 | ext4_group_t group; | ||
1081 | __u16 *bg_flags = flex_gd->bg_flags; | ||
1082 | int i, gdb_off, gdb_num, err = 0; | ||
1083 | |||
1084 | |||
1085 | for (i = 0; i < flex_gd->count; i++, group_data++, bg_flags++) { | ||
1086 | group = group_data->group; | ||
1087 | |||
1088 | gdb_off = group % EXT4_DESC_PER_BLOCK(sb); | ||
1089 | gdb_num = group / EXT4_DESC_PER_BLOCK(sb); | ||
1090 | |||
1091 | /* | ||
1092 | * get_write_access() has been called on gdb_bh by ext4_add_new_desc(). | ||
1093 | */ | ||
1094 | gdb_bh = sbi->s_group_desc[gdb_num]; | ||
1095 | /* Update group descriptor block for new group */ | ||
1096 | gdp = (struct ext4_group_desc *)((char *)gdb_bh->b_data + | ||
1097 | gdb_off * EXT4_DESC_SIZE(sb)); | ||
1098 | |||
1099 | memset(gdp, 0, EXT4_DESC_SIZE(sb)); | ||
1100 | ext4_block_bitmap_set(sb, gdp, group_data->block_bitmap); | ||
1101 | ext4_inode_bitmap_set(sb, gdp, group_data->inode_bitmap); | ||
1102 | ext4_inode_table_set(sb, gdp, group_data->inode_table); | ||
1103 | ext4_free_group_clusters_set(sb, gdp, | ||
1104 | EXT4_B2C(sbi, group_data->free_blocks_count)); | ||
1105 | ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); | ||
1106 | gdp->bg_flags = cpu_to_le16(*bg_flags); | ||
1107 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp); | ||
1108 | |||
1109 | err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); | ||
1110 | if (unlikely(err)) { | ||
1111 | ext4_std_error(sb, err); | ||
1112 | break; | ||
1113 | } | ||
1114 | |||
1115 | /* | ||
1116 | * We can allocate memory for mb_alloc based on the new group | ||
1117 | * descriptor | ||
1118 | */ | ||
1119 | err = ext4_mb_add_groupinfo(sb, group, gdp); | ||
1120 | if (err) | ||
1121 | break; | ||
1122 | } | ||
1123 | return err; | ||
1124 | } | ||
1125 | |||
1126 | /* | ||
1127 | * ext4_update_super() updates the super block so that the newly added | ||
1128 | * groups can be seen by the filesystem. | ||
1129 | * | ||
1130 | * @sb: super block | ||
1131 | * @flex_gd: new added groups | ||
1132 | */ | ||
1133 | static void ext4_update_super(struct super_block *sb, | ||
1134 | struct ext4_new_flex_group_data *flex_gd) | ||
1135 | { | ||
1136 | ext4_fsblk_t blocks_count = 0; | ||
1137 | ext4_fsblk_t free_blocks = 0; | ||
1138 | ext4_fsblk_t reserved_blocks = 0; | ||
1139 | struct ext4_new_group_data *group_data = flex_gd->groups; | ||
1140 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1141 | struct ext4_super_block *es = sbi->s_es; | ||
1142 | int i; | ||
1143 | |||
1144 | BUG_ON(flex_gd->count == 0 || group_data == NULL); | ||
1145 | /* | ||
1146 | * Make the new blocks and inodes valid next. We do this before | ||
1147 | * increasing the group count so that once the group is enabled, | ||
1148 | * all of its blocks and inodes are already valid. | ||
1149 | * | ||
1150 | * We always allocate group-by-group, then block-by-block or | ||
1151 | * inode-by-inode within a group, so enabling these | ||
1152 | * blocks/inodes before the group is live won't actually let us | ||
1153 | * allocate the new space yet. | ||
1154 | */ | ||
1155 | for (i = 0; i < flex_gd->count; i++) { | ||
1156 | blocks_count += group_data[i].blocks_count; | ||
1157 | free_blocks += group_data[i].free_blocks_count; | ||
1158 | } | ||
1159 | |||
1160 | reserved_blocks = ext4_r_blocks_count(es) * 100; | ||
1161 | do_div(reserved_blocks, ext4_blocks_count(es)); | ||
1162 | reserved_blocks *= blocks_count; | ||
1163 | do_div(reserved_blocks, 100); | ||
1164 | |||
1165 | ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count); | ||
1166 | le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) * | ||
1167 | flex_gd->count); | ||
1168 | |||
1169 | /* | ||
1170 | * We need to protect s_groups_count against other CPUs seeing | ||
1171 | * inconsistent state in the superblock. | ||
1172 | * | ||
1173 | * The precise rules we use are: | ||
1174 | * | ||
1175 | * * Writers must perform a smp_wmb() after updating all | ||
1176 | * dependent data and before modifying the groups count | ||
1177 | * | ||
1178 | * * Readers must perform an smp_rmb() after reading the groups | ||
1179 | * count and before reading any dependent data. | ||
1180 | * | ||
1181 | * NB. These rules can be relaxed when checking the group count | ||
1182 | * while freeing data, as we can only allocate from a block | ||
1183 | * group after serialising against the group count, and we can | ||
1184 | * only then free after serialising in turn against that | ||
1185 | * allocation. | ||
1186 | */ | ||
1187 | smp_wmb(); | ||
1188 | |||
1189 | /* Update the global fs size fields */ | ||
1190 | sbi->s_groups_count += flex_gd->count; | ||
1191 | |||
1192 | /* Update the reserved block counts only once the new group is | ||
1193 | * active. */ | ||
1194 | ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) + | ||
1195 | reserved_blocks); | ||
1196 | |||
1197 | /* Update the free space counts */ | ||
1198 | percpu_counter_add(&sbi->s_freeclusters_counter, | ||
1199 | EXT4_B2C(sbi, free_blocks)); | ||
1200 | percpu_counter_add(&sbi->s_freeinodes_counter, | ||
1201 | EXT4_INODES_PER_GROUP(sb) * flex_gd->count); | ||
1202 | |||
1203 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, | ||
1204 | EXT4_FEATURE_INCOMPAT_FLEX_BG) && | ||
1205 | sbi->s_log_groups_per_flex) { | ||
1206 | ext4_group_t flex_group; | ||
1207 | flex_group = ext4_flex_group(sbi, group_data[0].group); | ||
1208 | atomic_add(EXT4_B2C(sbi, free_blocks), | ||
1209 | &sbi->s_flex_groups[flex_group].free_clusters); | ||
1210 | atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count, | ||
1211 | &sbi->s_flex_groups[flex_group].free_inodes); | ||
1212 | } | ||
1213 | |||
1214 | if (test_opt(sb, DEBUG)) | ||
1215 | printk(KERN_DEBUG "EXT4-fs: added group %u:" | ||
1216 | "%llu blocks(%llu free %llu reserved)\n", flex_gd->count, | ||
1217 | blocks_count, free_blocks, reserved_blocks); | ||
1218 | } | ||
1219 | |||
1220 | /* Add a flex group to an fs. Ensure we handle all possible error conditions | ||
1221 | * _before_ we start modifying the filesystem, because we cannot abort the | ||
1222 | * transaction and not have it write the data to disk. | ||
1223 | */ | ||
1224 | static int ext4_flex_group_add(struct super_block *sb, | ||
1225 | struct inode *resize_inode, | ||
1226 | struct ext4_new_flex_group_data *flex_gd) | ||
1227 | { | ||
1228 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1229 | struct ext4_super_block *es = sbi->s_es; | ||
1230 | ext4_fsblk_t o_blocks_count; | ||
1231 | ext4_grpblk_t last; | ||
1232 | ext4_group_t group; | ||
1233 | handle_t *handle; | ||
1234 | unsigned reserved_gdb; | ||
1235 | int err = 0, err2 = 0, credit; | ||
1236 | |||
1237 | BUG_ON(!flex_gd->count || !flex_gd->groups || !flex_gd->bg_flags); | ||
1238 | |||
1239 | reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks); | ||
1240 | o_blocks_count = ext4_blocks_count(es); | ||
1241 | ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last); | ||
1242 | BUG_ON(last); | ||
1243 | |||
1244 | err = setup_new_flex_group_blocks(sb, flex_gd); | ||
1245 | if (err) | ||
1246 | goto exit; | ||
1247 | /* | ||
1248 | * We will always be modifying at least the superblock and GDT | ||
1249 | * block. If we are adding a group past the last current GDT block, | ||
1250 | * we will also modify the inode and the dindirect block. If we | ||
1251 | * are adding a group with superblock/GDT backups we will also | ||
1252 | * modify each of the reserved GDT dindirect blocks. | ||
1253 | */ | ||
1254 | credit = flex_gd->count * 4 + reserved_gdb; | ||
1255 | handle = ext4_journal_start_sb(sb, credit); | ||
1256 | if (IS_ERR(handle)) { | ||
1257 | err = PTR_ERR(handle); | ||
1258 | goto exit; | ||
1259 | } | ||
1260 | |||
1261 | err = ext4_journal_get_write_access(handle, sbi->s_sbh); | ||
1262 | if (err) | ||
1263 | goto exit_journal; | ||
1264 | |||
1265 | group = flex_gd->groups[0].group; | ||
1266 | BUG_ON(group != EXT4_SB(sb)->s_groups_count); | ||
1267 | err = ext4_add_new_descs(handle, sb, group, | ||
1268 | resize_inode, flex_gd->count); | ||
1269 | if (err) | ||
1270 | goto exit_journal; | ||
1271 | |||
1272 | err = ext4_setup_new_descs(handle, sb, flex_gd); | ||
1273 | if (err) | ||
1274 | goto exit_journal; | ||
1275 | |||
1276 | ext4_update_super(sb, flex_gd); | ||
1277 | |||
1278 | err = ext4_handle_dirty_super(handle, sb); | ||
1279 | |||
1280 | exit_journal: | ||
1281 | err2 = ext4_journal_stop(handle); | ||
1282 | if (!err) | ||
1283 | err = err2; | ||
1284 | |||
1285 | if (!err) { | ||
1286 | int i; | ||
1287 | update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, | ||
1288 | sizeof(struct ext4_super_block)); | ||
1289 | for (i = 0; i < flex_gd->count; i++, group++) { | ||
1290 | struct buffer_head *gdb_bh; | ||
1291 | int gdb_num; | ||
1292 | gdb_num = group / EXT4_BLOCKS_PER_GROUP(sb); | ||
1293 | gdb_bh = sbi->s_group_desc[gdb_num]; | ||
1294 | update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, | ||
1295 | gdb_bh->b_size); | ||
1296 | } | ||
1297 | } | ||
1298 | exit: | ||
1299 | return err; | ||
1300 | } | ||
1301 | |||
1302 | static int ext4_setup_next_flex_gd(struct super_block *sb, | ||
1303 | struct ext4_new_flex_group_data *flex_gd, | ||
1304 | ext4_fsblk_t n_blocks_count, | ||
1305 | unsigned long flexbg_size) | ||
1306 | { | ||
1307 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | ||
1308 | struct ext4_new_group_data *group_data = flex_gd->groups; | ||
1309 | ext4_fsblk_t o_blocks_count; | ||
1310 | ext4_group_t n_group; | ||
1311 | ext4_group_t group; | ||
1312 | ext4_group_t last_group; | ||
1313 | ext4_grpblk_t last; | ||
1314 | ext4_grpblk_t blocks_per_group; | ||
1315 | unsigned long i; | ||
1316 | |||
1317 | blocks_per_group = EXT4_BLOCKS_PER_GROUP(sb); | ||
1318 | |||
1319 | o_blocks_count = ext4_blocks_count(es); | ||
1320 | |||
1321 | if (o_blocks_count == n_blocks_count) | ||
1322 | return 0; | ||
1323 | |||
1324 | ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last); | ||
1325 | BUG_ON(last); | ||
1326 | ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &last); | ||
1327 | |||
1328 | last_group = group | (flexbg_size - 1); | ||
1329 | if (last_group > n_group) | ||
1330 | last_group = n_group; | ||
1331 | |||
1332 | flex_gd->count = last_group - group + 1; | ||
1333 | |||
1334 | for (i = 0; i < flex_gd->count; i++) { | ||
1335 | int overhead; | ||
1336 | |||
1337 | group_data[i].group = group + i; | ||
1338 | group_data[i].blocks_count = blocks_per_group; | ||
1339 | overhead = ext4_bg_has_super(sb, group + i) ? | ||
1340 | (1 + ext4_bg_num_gdb(sb, group + i) + | ||
1341 | le16_to_cpu(es->s_reserved_gdt_blocks)) : 0; | ||
1342 | group_data[i].free_blocks_count = blocks_per_group - overhead; | ||
1343 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
1344 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) | ||
1345 | flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT | | ||
1346 | EXT4_BG_INODE_UNINIT; | ||
1347 | else | ||
1348 | flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED; | ||
1349 | } | ||
1350 | |||
1351 | if (last_group == n_group && | ||
1352 | EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
1353 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) | ||
1354 | /* We need to initialize block bitmap of last group. */ | ||
1355 | flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT; | ||
1356 | |||
1357 | if ((last_group == n_group) && (last != blocks_per_group - 1)) { | ||
1358 | group_data[i - 1].blocks_count = last + 1; | ||
1359 | group_data[i - 1].free_blocks_count -= blocks_per_group- | ||
1360 | last - 1; | ||
1361 | } | ||
1362 | |||
1363 | return 1; | ||
1364 | } | ||
1365 | |||
738 | /* Add group descriptor data to an existing or new group descriptor block. | 1366 | /* Add group descriptor data to an existing or new group descriptor block. |
739 | * Ensure we handle all possible error conditions _before_ we start modifying | 1367 | * Ensure we handle all possible error conditions _before_ we start modifying |
740 | * the filesystem, because we cannot abort the transaction and not have it | 1368 | * the filesystem, because we cannot abort the transaction and not have it |
@@ -750,16 +1378,15 @@ exit_err: | |||
750 | */ | 1378 | */ |
751 | int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | 1379 | int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) |
752 | { | 1380 | { |
1381 | struct ext4_new_flex_group_data flex_gd; | ||
753 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1382 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
754 | struct ext4_super_block *es = sbi->s_es; | 1383 | struct ext4_super_block *es = sbi->s_es; |
755 | int reserved_gdb = ext4_bg_has_super(sb, input->group) ? | 1384 | int reserved_gdb = ext4_bg_has_super(sb, input->group) ? |
756 | le16_to_cpu(es->s_reserved_gdt_blocks) : 0; | 1385 | le16_to_cpu(es->s_reserved_gdt_blocks) : 0; |
757 | struct buffer_head *primary = NULL; | ||
758 | struct ext4_group_desc *gdp; | ||
759 | struct inode *inode = NULL; | 1386 | struct inode *inode = NULL; |
760 | handle_t *handle; | ||
761 | int gdb_off, gdb_num; | 1387 | int gdb_off, gdb_num; |
762 | int err, err2; | 1388 | int err; |
1389 | __u16 bg_flags = 0; | ||
763 | 1390 | ||
764 | gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); | 1391 | gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); |
765 | gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); | 1392 | gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); |
@@ -798,175 +1425,69 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
798 | } | 1425 | } |
799 | 1426 | ||
800 | 1427 | ||
801 | if ((err = verify_group_input(sb, input))) | 1428 | err = verify_group_input(sb, input); |
802 | goto exit_put; | 1429 | if (err) |
1430 | goto out; | ||
803 | 1431 | ||
804 | if ((err = setup_new_group_blocks(sb, input))) | 1432 | flex_gd.count = 1; |
805 | goto exit_put; | 1433 | flex_gd.groups = input; |
1434 | flex_gd.bg_flags = &bg_flags; | ||
1435 | err = ext4_flex_group_add(sb, inode, &flex_gd); | ||
1436 | out: | ||
1437 | iput(inode); | ||
1438 | return err; | ||
1439 | } /* ext4_group_add */ | ||
806 | 1440 | ||
807 | /* | 1441 | /* |
808 | * We will always be modifying at least the superblock and a GDT | 1442 | * extend a group without checking assuming that checking has been done. |
809 | * block. If we are adding a group past the last current GDT block, | 1443 | */ |
810 | * we will also modify the inode and the dindirect block. If we | 1444 | static int ext4_group_extend_no_check(struct super_block *sb, |
811 | * are adding a group with superblock/GDT backups we will also | 1445 | ext4_fsblk_t o_blocks_count, ext4_grpblk_t add) |
812 | * modify each of the reserved GDT dindirect blocks. | 1446 | { |
1447 | struct ext4_super_block *es = EXT4_SB(sb)->s_es; | ||
1448 | handle_t *handle; | ||
1449 | int err = 0, err2; | ||
1450 | |||
1451 | /* We will update the superblock, one block bitmap, and | ||
1452 | * one group descriptor via ext4_group_add_blocks(). | ||
813 | */ | 1453 | */ |
814 | handle = ext4_journal_start_sb(sb, | 1454 | handle = ext4_journal_start_sb(sb, 3); |
815 | ext4_bg_has_super(sb, input->group) ? | ||
816 | 3 + reserved_gdb : 4); | ||
817 | if (IS_ERR(handle)) { | 1455 | if (IS_ERR(handle)) { |
818 | err = PTR_ERR(handle); | 1456 | err = PTR_ERR(handle); |
819 | goto exit_put; | 1457 | ext4_warning(sb, "error %d on journal start", err); |
1458 | return err; | ||
820 | } | 1459 | } |
821 | 1460 | ||
822 | if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) | 1461 | err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); |
823 | goto exit_journal; | 1462 | if (err) { |
824 | 1463 | ext4_warning(sb, "error %d on journal write access", err); | |
825 | /* | 1464 | goto errout; |
826 | * We will only either add reserved group blocks to a backup group | ||
827 | * or remove reserved blocks for the first group in a new group block. | ||
828 | * Doing both would be mean more complex code, and sane people don't | ||
829 | * use non-sparse filesystems anymore. This is already checked above. | ||
830 | */ | ||
831 | if (gdb_off) { | ||
832 | primary = sbi->s_group_desc[gdb_num]; | ||
833 | if ((err = ext4_journal_get_write_access(handle, primary))) | ||
834 | goto exit_journal; | ||
835 | |||
836 | if (reserved_gdb && ext4_bg_num_gdb(sb, input->group)) { | ||
837 | err = reserve_backup_gdb(handle, inode, input->group); | ||
838 | if (err) | ||
839 | goto exit_journal; | ||
840 | } | ||
841 | } else { | ||
842 | /* | ||
843 | * Note that we can access new group descriptor block safely | ||
844 | * only if add_new_gdb() succeeds. | ||
845 | */ | ||
846 | err = add_new_gdb(handle, inode, input->group); | ||
847 | if (err) | ||
848 | goto exit_journal; | ||
849 | primary = sbi->s_group_desc[gdb_num]; | ||
850 | } | 1465 | } |
851 | 1466 | ||
852 | /* | 1467 | ext4_blocks_count_set(es, o_blocks_count + add); |
853 | * OK, now we've set up the new group. Time to make it active. | 1468 | ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, |
854 | * | 1469 | o_blocks_count + add); |
855 | * so we have to be safe wrt. concurrent accesses the group | 1470 | /* We add the blocks to the bitmap and set the group need init bit */ |
856 | * data. So we need to be careful to set all of the relevant | 1471 | err = ext4_group_add_blocks(handle, sb, o_blocks_count, add); |
857 | * group descriptor data etc. *before* we enable the group. | ||
858 | * | ||
859 | * The key field here is sbi->s_groups_count: as long as | ||
860 | * that retains its old value, nobody is going to access the new | ||
861 | * group. | ||
862 | * | ||
863 | * So first we update all the descriptor metadata for the new | ||
864 | * group; then we update the total disk blocks count; then we | ||
865 | * update the groups count to enable the group; then finally we | ||
866 | * update the free space counts so that the system can start | ||
867 | * using the new disk blocks. | ||
868 | */ | ||
869 | |||
870 | /* Update group descriptor block for new group */ | ||
871 | gdp = (struct ext4_group_desc *)((char *)primary->b_data + | ||
872 | gdb_off * EXT4_DESC_SIZE(sb)); | ||
873 | |||
874 | memset(gdp, 0, EXT4_DESC_SIZE(sb)); | ||
875 | ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */ | ||
876 | ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */ | ||
877 | ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */ | ||
878 | ext4_free_group_clusters_set(sb, gdp, input->free_blocks_count); | ||
879 | ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); | ||
880 | gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED); | ||
881 | gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp); | ||
882 | |||
883 | /* | ||
884 | * We can allocate memory for mb_alloc based on the new group | ||
885 | * descriptor | ||
886 | */ | ||
887 | err = ext4_mb_add_groupinfo(sb, input->group, gdp); | ||
888 | if (err) | 1472 | if (err) |
889 | goto exit_journal; | 1473 | goto errout; |
890 | |||
891 | /* | ||
892 | * Make the new blocks and inodes valid next. We do this before | ||
893 | * increasing the group count so that once the group is enabled, | ||
894 | * all of its blocks and inodes are already valid. | ||
895 | * | ||
896 | * We always allocate group-by-group, then block-by-block or | ||
897 | * inode-by-inode within a group, so enabling these | ||
898 | * blocks/inodes before the group is live won't actually let us | ||
899 | * allocate the new space yet. | ||
900 | */ | ||
901 | ext4_blocks_count_set(es, ext4_blocks_count(es) + | ||
902 | input->blocks_count); | ||
903 | le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb)); | ||
904 | |||
905 | /* | ||
906 | * We need to protect s_groups_count against other CPUs seeing | ||
907 | * inconsistent state in the superblock. | ||
908 | * | ||
909 | * The precise rules we use are: | ||
910 | * | ||
911 | * * Writers must perform a smp_wmb() after updating all dependent | ||
912 | * data and before modifying the groups count | ||
913 | * | ||
914 | * * Readers must perform an smp_rmb() after reading the groups count | ||
915 | * and before reading any dependent data. | ||
916 | * | ||
917 | * NB. These rules can be relaxed when checking the group count | ||
918 | * while freeing data, as we can only allocate from a block | ||
919 | * group after serialising against the group count, and we can | ||
920 | * only then free after serialising in turn against that | ||
921 | * allocation. | ||
922 | */ | ||
923 | smp_wmb(); | ||
924 | |||
925 | /* Update the global fs size fields */ | ||
926 | sbi->s_groups_count++; | ||
927 | |||
928 | err = ext4_handle_dirty_metadata(handle, NULL, primary); | ||
929 | if (unlikely(err)) { | ||
930 | ext4_std_error(sb, err); | ||
931 | goto exit_journal; | ||
932 | } | ||
933 | |||
934 | /* Update the reserved block counts only once the new group is | ||
935 | * active. */ | ||
936 | ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) + | ||
937 | input->reserved_blocks); | ||
938 | |||
939 | /* Update the free space counts */ | ||
940 | percpu_counter_add(&sbi->s_freeclusters_counter, | ||
941 | EXT4_B2C(sbi, input->free_blocks_count)); | ||
942 | percpu_counter_add(&sbi->s_freeinodes_counter, | ||
943 | EXT4_INODES_PER_GROUP(sb)); | ||
944 | |||
945 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) && | ||
946 | sbi->s_log_groups_per_flex) { | ||
947 | ext4_group_t flex_group; | ||
948 | flex_group = ext4_flex_group(sbi, input->group); | ||
949 | atomic_add(EXT4_B2C(sbi, input->free_blocks_count), | ||
950 | &sbi->s_flex_groups[flex_group].free_clusters); | ||
951 | atomic_add(EXT4_INODES_PER_GROUP(sb), | ||
952 | &sbi->s_flex_groups[flex_group].free_inodes); | ||
953 | } | ||
954 | |||
955 | ext4_handle_dirty_super(handle, sb); | 1474 | ext4_handle_dirty_super(handle, sb); |
956 | 1475 | ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, | |
957 | exit_journal: | 1476 | o_blocks_count + add); |
958 | if ((err2 = ext4_journal_stop(handle)) && !err) | 1477 | errout: |
1478 | err2 = ext4_journal_stop(handle); | ||
1479 | if (err2 && !err) | ||
959 | err = err2; | 1480 | err = err2; |
960 | if (!err && primary) { | 1481 | |
961 | update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, | 1482 | if (!err) { |
1483 | if (test_opt(sb, DEBUG)) | ||
1484 | printk(KERN_DEBUG "EXT4-fs: extended group to %llu " | ||
1485 | "blocks\n", ext4_blocks_count(es)); | ||
1486 | update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es, | ||
962 | sizeof(struct ext4_super_block)); | 1487 | sizeof(struct ext4_super_block)); |
963 | update_backups(sb, primary->b_blocknr, primary->b_data, | ||
964 | primary->b_size); | ||
965 | } | 1488 | } |
966 | exit_put: | ||
967 | iput(inode); | ||
968 | return err; | 1489 | return err; |
969 | } /* ext4_group_add */ | 1490 | } |
970 | 1491 | ||
971 | /* | 1492 | /* |
972 | * Extend the filesystem to the new number of blocks specified. This entry | 1493 | * Extend the filesystem to the new number of blocks specified. This entry |
@@ -985,8 +1506,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
985 | ext4_grpblk_t last; | 1506 | ext4_grpblk_t last; |
986 | ext4_grpblk_t add; | 1507 | ext4_grpblk_t add; |
987 | struct buffer_head *bh; | 1508 | struct buffer_head *bh; |
988 | handle_t *handle; | 1509 | int err; |
989 | int err, err2; | ||
990 | ext4_group_t group; | 1510 | ext4_group_t group; |
991 | 1511 | ||
992 | o_blocks_count = ext4_blocks_count(es); | 1512 | o_blocks_count = ext4_blocks_count(es); |
@@ -1042,42 +1562,119 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
1042 | } | 1562 | } |
1043 | brelse(bh); | 1563 | brelse(bh); |
1044 | 1564 | ||
1045 | /* We will update the superblock, one block bitmap, and | 1565 | err = ext4_group_extend_no_check(sb, o_blocks_count, add); |
1046 | * one group descriptor via ext4_free_blocks(). | 1566 | return err; |
1047 | */ | 1567 | } /* ext4_group_extend */ |
1048 | handle = ext4_journal_start_sb(sb, 3); | 1568 | |
1049 | if (IS_ERR(handle)) { | 1569 | /* |
1050 | err = PTR_ERR(handle); | 1570 | * ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count |
1051 | ext4_warning(sb, "error %d on journal start", err); | 1571 | * |
1052 | goto exit_put; | 1572 | * @sb: super block of the fs to be resized |
1573 | * @n_blocks_count: the number of blocks resides in the resized fs | ||
1574 | */ | ||
1575 | int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count) | ||
1576 | { | ||
1577 | struct ext4_new_flex_group_data *flex_gd = NULL; | ||
1578 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
1579 | struct ext4_super_block *es = sbi->s_es; | ||
1580 | struct buffer_head *bh; | ||
1581 | struct inode *resize_inode; | ||
1582 | ext4_fsblk_t o_blocks_count; | ||
1583 | ext4_group_t o_group; | ||
1584 | ext4_group_t n_group; | ||
1585 | ext4_grpblk_t offset; | ||
1586 | unsigned long n_desc_blocks; | ||
1587 | unsigned long o_desc_blocks; | ||
1588 | unsigned long desc_blocks; | ||
1589 | int err = 0, flexbg_size = 1; | ||
1590 | |||
1591 | o_blocks_count = ext4_blocks_count(es); | ||
1592 | |||
1593 | if (test_opt(sb, DEBUG)) | ||
1594 | printk(KERN_DEBUG "EXT4-fs: resizing filesystem from %llu " | ||
1595 | "upto %llu blocks\n", o_blocks_count, n_blocks_count); | ||
1596 | |||
1597 | if (n_blocks_count < o_blocks_count) { | ||
1598 | /* On-line shrinking not supported */ | ||
1599 | ext4_warning(sb, "can't shrink FS - resize aborted"); | ||
1600 | return -EINVAL; | ||
1053 | } | 1601 | } |
1054 | 1602 | ||
1055 | if ((err = ext4_journal_get_write_access(handle, | 1603 | if (n_blocks_count == o_blocks_count) |
1056 | EXT4_SB(sb)->s_sbh))) { | 1604 | /* Nothing need to do */ |
1057 | ext4_warning(sb, "error %d on journal write access", err); | 1605 | return 0; |
1058 | ext4_journal_stop(handle); | 1606 | |
1059 | goto exit_put; | 1607 | ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset); |
1608 | ext4_get_group_no_and_offset(sb, o_blocks_count, &o_group, &offset); | ||
1609 | |||
1610 | n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) / | ||
1611 | EXT4_DESC_PER_BLOCK(sb); | ||
1612 | o_desc_blocks = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / | ||
1613 | EXT4_DESC_PER_BLOCK(sb); | ||
1614 | desc_blocks = n_desc_blocks - o_desc_blocks; | ||
1615 | |||
1616 | if (desc_blocks && | ||
1617 | (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE) || | ||
1618 | le16_to_cpu(es->s_reserved_gdt_blocks) < desc_blocks)) { | ||
1619 | ext4_warning(sb, "No reserved GDT blocks, can't resize"); | ||
1620 | return -EPERM; | ||
1060 | } | 1621 | } |
1061 | ext4_blocks_count_set(es, o_blocks_count + add); | ||
1062 | ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count, | ||
1063 | o_blocks_count + add); | ||
1064 | /* We add the blocks to the bitmap and set the group need init bit */ | ||
1065 | err = ext4_group_add_blocks(handle, sb, o_blocks_count, add); | ||
1066 | ext4_handle_dirty_super(handle, sb); | ||
1067 | ext4_debug("freed blocks %llu through %llu\n", o_blocks_count, | ||
1068 | o_blocks_count + add); | ||
1069 | err2 = ext4_journal_stop(handle); | ||
1070 | if (!err && err2) | ||
1071 | err = err2; | ||
1072 | 1622 | ||
1073 | if (err) | 1623 | resize_inode = ext4_iget(sb, EXT4_RESIZE_INO); |
1074 | goto exit_put; | 1624 | if (IS_ERR(resize_inode)) { |
1625 | ext4_warning(sb, "Error opening resize inode"); | ||
1626 | return PTR_ERR(resize_inode); | ||
1627 | } | ||
1075 | 1628 | ||
1629 | /* See if the device is actually as big as what was requested */ | ||
1630 | bh = sb_bread(sb, n_blocks_count - 1); | ||
1631 | if (!bh) { | ||
1632 | ext4_warning(sb, "can't read last block, resize aborted"); | ||
1633 | return -ENOSPC; | ||
1634 | } | ||
1635 | brelse(bh); | ||
1636 | |||
1637 | if (offset != 0) { | ||
1638 | /* extend the last group */ | ||
1639 | ext4_grpblk_t add; | ||
1640 | add = EXT4_BLOCKS_PER_GROUP(sb) - offset; | ||
1641 | err = ext4_group_extend_no_check(sb, o_blocks_count, add); | ||
1642 | if (err) | ||
1643 | goto out; | ||
1644 | } | ||
1645 | |||
1646 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) && | ||
1647 | es->s_log_groups_per_flex) | ||
1648 | flexbg_size = 1 << es->s_log_groups_per_flex; | ||
1649 | |||
1650 | o_blocks_count = ext4_blocks_count(es); | ||
1651 | if (o_blocks_count == n_blocks_count) | ||
1652 | goto out; | ||
1653 | |||
1654 | flex_gd = alloc_flex_gd(flexbg_size); | ||
1655 | if (flex_gd == NULL) { | ||
1656 | err = -ENOMEM; | ||
1657 | goto out; | ||
1658 | } | ||
1659 | |||
1660 | /* Add flex groups. Note that a regular group is a | ||
1661 | * flex group with 1 group. | ||
1662 | */ | ||
1663 | while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count, | ||
1664 | flexbg_size)) { | ||
1665 | ext4_alloc_group_tables(sb, flex_gd, flexbg_size); | ||
1666 | err = ext4_flex_group_add(sb, resize_inode, flex_gd); | ||
1667 | if (unlikely(err)) | ||
1668 | break; | ||
1669 | } | ||
1670 | |||
1671 | out: | ||
1672 | if (flex_gd) | ||
1673 | free_flex_gd(flex_gd); | ||
1674 | |||
1675 | iput(resize_inode); | ||
1076 | if (test_opt(sb, DEBUG)) | 1676 | if (test_opt(sb, DEBUG)) |
1077 | printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n", | 1677 | printk(KERN_DEBUG "EXT4-fs: resized filesystem from %llu " |
1078 | ext4_blocks_count(es)); | 1678 | "upto %llu blocks\n", o_blocks_count, n_blocks_count); |
1079 | update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es, | ||
1080 | sizeof(struct ext4_super_block)); | ||
1081 | exit_put: | ||
1082 | return err; | 1679 | return err; |
1083 | } /* ext4_group_extend */ | 1680 | } |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ed3ce82e2de4..502c61fd7392 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -1095,7 +1095,7 @@ static int ext4_show_options(struct seq_file *seq, struct dentry *root) | |||
1095 | } | 1095 | } |
1096 | if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { | 1096 | if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { |
1097 | seq_printf(seq, ",max_batch_time=%u", | 1097 | seq_printf(seq, ",max_batch_time=%u", |
1098 | (unsigned) sbi->s_min_batch_time); | 1098 | (unsigned) sbi->s_max_batch_time); |
1099 | } | 1099 | } |
1100 | 1100 | ||
1101 | /* | 1101 | /* |
@@ -2005,17 +2005,16 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
2005 | struct ext4_group_desc *gdp = NULL; | 2005 | struct ext4_group_desc *gdp = NULL; |
2006 | ext4_group_t flex_group_count; | 2006 | ext4_group_t flex_group_count; |
2007 | ext4_group_t flex_group; | 2007 | ext4_group_t flex_group; |
2008 | int groups_per_flex = 0; | 2008 | unsigned int groups_per_flex = 0; |
2009 | size_t size; | 2009 | size_t size; |
2010 | int i; | 2010 | int i; |
2011 | 2011 | ||
2012 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; | 2012 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; |
2013 | groups_per_flex = 1 << sbi->s_log_groups_per_flex; | 2013 | if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) { |
2014 | |||
2015 | if (groups_per_flex < 2) { | ||
2016 | sbi->s_log_groups_per_flex = 0; | 2014 | sbi->s_log_groups_per_flex = 0; |
2017 | return 1; | 2015 | return 1; |
2018 | } | 2016 | } |
2017 | groups_per_flex = 1 << sbi->s_log_groups_per_flex; | ||
2019 | 2018 | ||
2020 | /* We allocate both existing and potentially added groups */ | 2019 | /* We allocate both existing and potentially added groups */ |
2021 | flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + | 2020 | flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + |
@@ -3506,7 +3505,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3506 | * of the filesystem. | 3505 | * of the filesystem. |
3507 | */ | 3506 | */ |
3508 | if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { | 3507 | if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { |
3509 | ext4_msg(sb, KERN_WARNING, "bad geometry: first data" | 3508 | ext4_msg(sb, KERN_WARNING, "bad geometry: first data " |
3510 | "block %u is beyond end of filesystem (%llu)", | 3509 | "block %u is beyond end of filesystem (%llu)", |
3511 | le32_to_cpu(es->s_first_data_block), | 3510 | le32_to_cpu(es->s_first_data_block), |
3512 | ext4_blocks_count(es)); | 3511 | ext4_blocks_count(es)); |
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c index b60f9f81e33c..d2a200624af5 100644 --- a/fs/ext4/xattr_security.c +++ b/fs/ext4/xattr_security.c | |||
@@ -47,8 +47,9 @@ ext4_xattr_security_set(struct dentry *dentry, const char *name, | |||
47 | name, value, size, flags); | 47 | name, value, size, flags); |
48 | } | 48 | } |
49 | 49 | ||
50 | int ext4_initxattrs(struct inode *inode, const struct xattr *xattr_array, | 50 | static int |
51 | void *fs_info) | 51 | ext4_initxattrs(struct inode *inode, const struct xattr *xattr_array, |
52 | void *fs_info) | ||
52 | { | 53 | { |
53 | const struct xattr *xattr; | 54 | const struct xattr *xattr; |
54 | handle_t *handle = fs_info; | 55 | handle_t *handle = fs_info; |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 68d704db787f..5069b8475150 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -430,6 +430,12 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
430 | jbd_debug(3, "JBD2: commit phase 1\n"); | 430 | jbd_debug(3, "JBD2: commit phase 1\n"); |
431 | 431 | ||
432 | /* | 432 | /* |
433 | * Clear revoked flag to reflect there is no revoked buffers | ||
434 | * in the next transaction which is going to be started. | ||
435 | */ | ||
436 | jbd2_clear_buffer_revoked_flags(journal); | ||
437 | |||
438 | /* | ||
433 | * Switch to a new revoke table. | 439 | * Switch to a new revoke table. |
434 | */ | 440 | */ |
435 | jbd2_journal_switch_revoke_table(journal); | 441 | jbd2_journal_switch_revoke_table(journal); |
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index 69fd93588118..30b2867d6cc9 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c | |||
@@ -47,6 +47,10 @@ | |||
47 | * overwriting the new data. We don't even need to clear the revoke | 47 | * overwriting the new data. We don't even need to clear the revoke |
48 | * bit here. | 48 | * bit here. |
49 | * | 49 | * |
50 | * We cache revoke status of a buffer in the current transaction in b_states | ||
51 | * bits. As the name says, revokevalid flag indicates that the cached revoke | ||
52 | * status of a buffer is valid and we can rely on the cached status. | ||
53 | * | ||
50 | * Revoke information on buffers is a tri-state value: | 54 | * Revoke information on buffers is a tri-state value: |
51 | * | 55 | * |
52 | * RevokeValid clear: no cached revoke status, need to look it up | 56 | * RevokeValid clear: no cached revoke status, need to look it up |
@@ -478,6 +482,36 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh) | |||
478 | return did_revoke; | 482 | return did_revoke; |
479 | } | 483 | } |
480 | 484 | ||
485 | /* | ||
486 | * journal_clear_revoked_flag clears revoked flag of buffers in | ||
487 | * revoke table to reflect there is no revoked buffers in the next | ||
488 | * transaction which is going to be started. | ||
489 | */ | ||
490 | void jbd2_clear_buffer_revoked_flags(journal_t *journal) | ||
491 | { | ||
492 | struct jbd2_revoke_table_s *revoke = journal->j_revoke; | ||
493 | int i = 0; | ||
494 | |||
495 | for (i = 0; i < revoke->hash_size; i++) { | ||
496 | struct list_head *hash_list; | ||
497 | struct list_head *list_entry; | ||
498 | hash_list = &revoke->hash_table[i]; | ||
499 | |||
500 | list_for_each(list_entry, hash_list) { | ||
501 | struct jbd2_revoke_record_s *record; | ||
502 | struct buffer_head *bh; | ||
503 | record = (struct jbd2_revoke_record_s *)list_entry; | ||
504 | bh = __find_get_block(journal->j_fs_dev, | ||
505 | record->blocknr, | ||
506 | journal->j_blocksize); | ||
507 | if (bh) { | ||
508 | clear_buffer_revoked(bh); | ||
509 | __brelse(bh); | ||
510 | } | ||
511 | } | ||
512 | } | ||
513 | } | ||
514 | |||
481 | /* journal_switch_revoke table select j_revoke for next transaction | 515 | /* journal_switch_revoke table select j_revoke for next transaction |
482 | * we do not want to suspend any processing until all revokes are | 516 | * we do not want to suspend any processing until all revokes are |
483 | * written -bzzz | 517 | * written -bzzz |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index a0e41a4c080e..35ae096bed5d 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -517,12 +517,13 @@ void jbd2_journal_lock_updates(journal_t *journal) | |||
517 | break; | 517 | break; |
518 | 518 | ||
519 | spin_lock(&transaction->t_handle_lock); | 519 | spin_lock(&transaction->t_handle_lock); |
520 | prepare_to_wait(&journal->j_wait_updates, &wait, | ||
521 | TASK_UNINTERRUPTIBLE); | ||
520 | if (!atomic_read(&transaction->t_updates)) { | 522 | if (!atomic_read(&transaction->t_updates)) { |
521 | spin_unlock(&transaction->t_handle_lock); | 523 | spin_unlock(&transaction->t_handle_lock); |
524 | finish_wait(&journal->j_wait_updates, &wait); | ||
522 | break; | 525 | break; |
523 | } | 526 | } |
524 | prepare_to_wait(&journal->j_wait_updates, &wait, | ||
525 | TASK_UNINTERRUPTIBLE); | ||
526 | spin_unlock(&transaction->t_handle_lock); | 527 | spin_unlock(&transaction->t_handle_lock); |
527 | write_unlock(&journal->j_state_lock); | 528 | write_unlock(&journal->j_state_lock); |
528 | schedule(); | 529 | schedule(); |