aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/balloc.c4
-rw-r--r--fs/ext4/ext4.h29
-rw-r--r--fs/ext4/extents.c10
-rw-r--r--fs/ext4/ialloc.c18
-rw-r--r--fs/ext4/inode.c143
-rw-r--r--fs/ext4/ioctl.c86
-rw-r--r--fs/ext4/mballoc.c2
-rw-r--r--fs/ext4/resize.c1175
-rw-r--r--fs/ext4/super.c11
-rw-r--r--fs/ext4/xattr_security.c5
10 files changed, 1025 insertions, 458 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 12ccacda44e0..f9e2cd8cf711 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -23,6 +23,8 @@
23 23
24#include <trace/events/ext4.h> 24#include <trace/events/ext4.h>
25 25
26static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
27 ext4_group_t block_group);
26/* 28/*
27 * balloc.c contains the blocks allocation and deallocation routines 29 * balloc.c contains the blocks allocation and deallocation routines
28 */ 30 */
@@ -668,7 +670,7 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
668 * This function returns the number of file system metadata clusters at 670 * This function returns the number of file system metadata clusters at
669 * the beginning of a block group, including the reserved gdt blocks. 671 * the beginning of a block group, including the reserved gdt blocks.
670 */ 672 */
671unsigned ext4_num_base_meta_clusters(struct super_block *sb, 673static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
672 ext4_group_t block_group) 674 ext4_group_t block_group)
673{ 675{
674 struct ext4_sb_info *sbi = EXT4_SB(sb); 676 struct ext4_sb_info *sbi = EXT4_SB(sb);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1554b15f91bc..513004fc3d84 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -511,6 +511,14 @@ struct ext4_new_group_data {
511 __u32 free_blocks_count; 511 __u32 free_blocks_count;
512}; 512};
513 513
514/* Indexes used to index group tables in ext4_new_group_data */
515enum {
516 BLOCK_BITMAP = 0, /* block bitmap */
517 INODE_BITMAP, /* inode bitmap */
518 INODE_TABLE, /* inode tables */
519 GROUP_TABLE_COUNT,
520};
521
514/* 522/*
515 * Flags used by ext4_map_blocks() 523 * Flags used by ext4_map_blocks()
516 */ 524 */
@@ -575,6 +583,7 @@ struct ext4_new_group_data {
575 /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ 583 /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
576#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) 584#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
577#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) 585#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
586#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
578 587
579#if defined(__KERNEL__) && defined(CONFIG_COMPAT) 588#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
580/* 589/*
@@ -957,12 +966,13 @@ struct ext4_inode_info {
957#define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \ 966#define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \
958 EXT4_MOUNT2_##opt) 967 EXT4_MOUNT2_##opt)
959 968
960#define ext4_set_bit __test_and_set_bit_le 969#define ext4_test_and_set_bit __test_and_set_bit_le
970#define ext4_set_bit __set_bit_le
961#define ext4_set_bit_atomic ext2_set_bit_atomic 971#define ext4_set_bit_atomic ext2_set_bit_atomic
962#define ext4_clear_bit __test_and_clear_bit_le 972#define ext4_test_and_clear_bit __test_and_clear_bit_le
973#define ext4_clear_bit __clear_bit_le
963#define ext4_clear_bit_atomic ext2_clear_bit_atomic 974#define ext4_clear_bit_atomic ext2_clear_bit_atomic
964#define ext4_test_bit test_bit_le 975#define ext4_test_bit test_bit_le
965#define ext4_find_first_zero_bit find_first_zero_bit_le
966#define ext4_find_next_zero_bit find_next_zero_bit_le 976#define ext4_find_next_zero_bit find_next_zero_bit_le
967#define ext4_find_next_bit find_next_bit_le 977#define ext4_find_next_bit find_next_bit_le
968 978
@@ -1397,6 +1407,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1397#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 1407#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
1398#define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 1408#define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100
1399#define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200 1409#define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200
1410#define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400
1400 1411
1401#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 1412#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
1402#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 1413#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
@@ -1409,6 +1420,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1409#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 1420#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200
1410#define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ 1421#define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */
1411#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ 1422#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */
1423#define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x2000 /* data in inode */
1424#define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */
1412 1425
1413#define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR 1426#define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR
1414#define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ 1427#define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
@@ -1790,8 +1803,6 @@ extern void ext4_init_block_bitmap(struct super_block *sb,
1790extern unsigned ext4_free_clusters_after_init(struct super_block *sb, 1803extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
1791 ext4_group_t block_group, 1804 ext4_group_t block_group,
1792 struct ext4_group_desc *gdp); 1805 struct ext4_group_desc *gdp);
1793extern unsigned ext4_num_base_meta_clusters(struct super_block *sb,
1794 ext4_group_t block_group);
1795extern unsigned ext4_num_overhead_clusters(struct super_block *sb, 1806extern unsigned ext4_num_overhead_clusters(struct super_block *sb,
1796 ext4_group_t block_group, 1807 ext4_group_t block_group,
1797 struct ext4_group_desc *gdp); 1808 struct ext4_group_desc *gdp);
@@ -1880,16 +1891,9 @@ extern int ext4_alloc_da_blocks(struct inode *inode);
1880extern void ext4_set_aops(struct inode *inode); 1891extern void ext4_set_aops(struct inode *inode);
1881extern int ext4_writepage_trans_blocks(struct inode *); 1892extern int ext4_writepage_trans_blocks(struct inode *);
1882extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); 1893extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
1883extern int ext4_block_truncate_page(handle_t *handle,
1884 struct address_space *mapping, loff_t from);
1885extern int ext4_block_zero_page_range(handle_t *handle,
1886 struct address_space *mapping, loff_t from, loff_t length);
1887extern int ext4_discard_partial_page_buffers(handle_t *handle, 1894extern int ext4_discard_partial_page_buffers(handle_t *handle,
1888 struct address_space *mapping, loff_t from, 1895 struct address_space *mapping, loff_t from,
1889 loff_t length, int flags); 1896 loff_t length, int flags);
1890extern int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
1891 struct inode *inode, struct page *page, loff_t from,
1892 loff_t length, int flags);
1893extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 1897extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
1894extern qsize_t *ext4_get_reserved_space(struct inode *inode); 1898extern qsize_t *ext4_get_reserved_space(struct inode *inode);
1895extern void ext4_da_update_reserve_space(struct inode *inode, 1899extern void ext4_da_update_reserve_space(struct inode *inode,
@@ -1924,6 +1928,7 @@ extern int ext4_group_add(struct super_block *sb,
1924extern int ext4_group_extend(struct super_block *sb, 1928extern int ext4_group_extend(struct super_block *sb,
1925 struct ext4_super_block *es, 1929 struct ext4_super_block *es,
1926 ext4_fsblk_t n_blocks_count); 1930 ext4_fsblk_t n_blocks_count);
1931extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count);
1927 1932
1928/* super.c */ 1933/* super.c */
1929extern void *ext4_kvmalloc(size_t size, gfp_t flags); 1934extern void *ext4_kvmalloc(size_t size, gfp_t flags);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 841faf5fb785..74f23c292e1b 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3280,6 +3280,9 @@ static int ext4_find_delalloc_range(struct inode *inode,
3280 ext4_lblk_t i, pg_lblk; 3280 ext4_lblk_t i, pg_lblk;
3281 pgoff_t index; 3281 pgoff_t index;
3282 3282
3283 if (!test_opt(inode->i_sb, DELALLOC))
3284 return 0;
3285
3283 /* reverse search wont work if fs block size is less than page size */ 3286 /* reverse search wont work if fs block size is less than page size */
3284 if (inode->i_blkbits < PAGE_CACHE_SHIFT) 3287 if (inode->i_blkbits < PAGE_CACHE_SHIFT)
3285 search_hint_reverse = 0; 3288 search_hint_reverse = 0;
@@ -3452,8 +3455,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3452 int err = 0; 3455 int err = 0;
3453 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3456 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
3454 3457
3455 ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical" 3458 ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical "
3456 "block %llu, max_blocks %u, flags %d, allocated %u", 3459 "block %llu, max_blocks %u, flags %x, allocated %u\n",
3457 inode->i_ino, (unsigned long long)map->m_lblk, map->m_len, 3460 inode->i_ino, (unsigned long long)map->m_lblk, map->m_len,
3458 flags, allocated); 3461 flags, allocated);
3459 ext4_ext_show_leaf(inode, path); 3462 ext4_ext_show_leaf(inode, path);
@@ -3624,7 +3627,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
3624 struct ext4_sb_info *sbi = EXT4_SB(sb); 3627 struct ext4_sb_info *sbi = EXT4_SB(sb);
3625 ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1); 3628 ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
3626 ext4_lblk_t ex_cluster_start, ex_cluster_end; 3629 ext4_lblk_t ex_cluster_start, ex_cluster_end;
3627 ext4_lblk_t rr_cluster_start, rr_cluster_end; 3630 ext4_lblk_t rr_cluster_start;
3628 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); 3631 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
3629 ext4_fsblk_t ee_start = ext4_ext_pblock(ex); 3632 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
3630 unsigned short ee_len = ext4_ext_get_actual_len(ex); 3633 unsigned short ee_len = ext4_ext_get_actual_len(ex);
@@ -3635,7 +3638,6 @@ static int get_implied_cluster_alloc(struct super_block *sb,
3635 3638
3636 /* The requested region passed into ext4_map_blocks() */ 3639 /* The requested region passed into ext4_map_blocks() */
3637 rr_cluster_start = EXT4_B2C(sbi, map->m_lblk); 3640 rr_cluster_start = EXT4_B2C(sbi, map->m_lblk);
3638 rr_cluster_end = EXT4_B2C(sbi, map->m_lblk + map->m_len - 1);
3639 3641
3640 if ((rr_cluster_start == ex_cluster_end) || 3642 if ((rr_cluster_start == ex_cluster_end) ||
3641 (rr_cluster_start == ex_cluster_start)) { 3643 (rr_cluster_start == ex_cluster_start)) {
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 4637af036d9c..25d8c9781ad9 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -252,7 +252,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
252 fatal = ext4_journal_get_write_access(handle, bh2); 252 fatal = ext4_journal_get_write_access(handle, bh2);
253 } 253 }
254 ext4_lock_group(sb, block_group); 254 ext4_lock_group(sb, block_group);
255 cleared = ext4_clear_bit(bit, bitmap_bh->b_data); 255 cleared = ext4_test_and_clear_bit(bit, bitmap_bh->b_data);
256 if (fatal || !cleared) { 256 if (fatal || !cleared) {
257 ext4_unlock_group(sb, block_group); 257 ext4_unlock_group(sb, block_group);
258 goto out; 258 goto out;
@@ -358,7 +358,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
358 struct ext4_sb_info *sbi = EXT4_SB(sb); 358 struct ext4_sb_info *sbi = EXT4_SB(sb);
359 ext4_group_t real_ngroups = ext4_get_groups_count(sb); 359 ext4_group_t real_ngroups = ext4_get_groups_count(sb);
360 int inodes_per_group = EXT4_INODES_PER_GROUP(sb); 360 int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
361 unsigned int freei, avefreei; 361 unsigned int freei, avefreei, grp_free;
362 ext4_fsblk_t freeb, avefreec; 362 ext4_fsblk_t freeb, avefreec;
363 unsigned int ndirs; 363 unsigned int ndirs;
364 int max_dirs, min_inodes; 364 int max_dirs, min_inodes;
@@ -477,8 +477,8 @@ fallback_retry:
477 for (i = 0; i < ngroups; i++) { 477 for (i = 0; i < ngroups; i++) {
478 grp = (parent_group + i) % ngroups; 478 grp = (parent_group + i) % ngroups;
479 desc = ext4_get_group_desc(sb, grp, NULL); 479 desc = ext4_get_group_desc(sb, grp, NULL);
480 if (desc && ext4_free_inodes_count(sb, desc) && 480 grp_free = ext4_free_inodes_count(sb, desc);
481 ext4_free_inodes_count(sb, desc) >= avefreei) { 481 if (desc && grp_free && grp_free >= avefreei) {
482 *group = grp; 482 *group = grp;
483 return 0; 483 return 0;
484 } 484 }
@@ -618,7 +618,7 @@ static int ext4_claim_inode(struct super_block *sb,
618 */ 618 */
619 down_read(&grp->alloc_sem); 619 down_read(&grp->alloc_sem);
620 ext4_lock_group(sb, group); 620 ext4_lock_group(sb, group);
621 if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { 621 if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) {
622 /* not a free inode */ 622 /* not a free inode */
623 retval = 1; 623 retval = 1;
624 goto err_ret; 624 goto err_ret;
@@ -885,8 +885,12 @@ got:
885 if (IS_DIRSYNC(inode)) 885 if (IS_DIRSYNC(inode))
886 ext4_handle_sync(handle); 886 ext4_handle_sync(handle);
887 if (insert_inode_locked(inode) < 0) { 887 if (insert_inode_locked(inode) < 0) {
888 err = -EINVAL; 888 /*
889 goto fail_drop; 889 * Likely a bitmap corruption causing inode to be allocated
890 * twice.
891 */
892 err = -EIO;
893 goto fail;
890 } 894 }
891 spin_lock(&sbi->s_next_gen_lock); 895 spin_lock(&sbi->s_next_gen_lock);
892 inode->i_generation = sbi->s_next_generation++; 896 inode->i_generation = sbi->s_next_generation++;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index aa8efa6572d6..feaa82fe629d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -71,6 +71,9 @@ static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
71static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); 71static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
72static int __ext4_journalled_writepage(struct page *page, unsigned int len); 72static int __ext4_journalled_writepage(struct page *page, unsigned int len);
73static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); 73static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
74static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
75 struct inode *inode, struct page *page, loff_t from,
76 loff_t length, int flags);
74 77
75/* 78/*
76 * Test whether an inode is a fast symlink. 79 * Test whether an inode is a fast symlink.
@@ -2759,7 +2762,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
2759 if (!io_end || !size) 2762 if (!io_end || !size)
2760 goto out; 2763 goto out;
2761 2764
2762 ext_debug("ext4_end_io_dio(): io_end 0x%p" 2765 ext_debug("ext4_end_io_dio(): io_end 0x%p "
2763 "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", 2766 "for inode %lu, iocb 0x%p, offset %llu, size %llu\n",
2764 iocb->private, io_end->inode->i_ino, iocb, offset, 2767 iocb->private, io_end->inode->i_ino, iocb, offset,
2765 size); 2768 size);
@@ -3160,7 +3163,7 @@ int ext4_discard_partial_page_buffers(handle_t *handle,
3160 * 3163 *
3161 * Returns zero on sucess or negative on failure. 3164 * Returns zero on sucess or negative on failure.
3162 */ 3165 */
3163int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, 3166static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
3164 struct inode *inode, struct page *page, loff_t from, 3167 struct inode *inode, struct page *page, loff_t from,
3165 loff_t length, int flags) 3168 loff_t length, int flags)
3166{ 3169{
@@ -3300,126 +3303,6 @@ next:
3300 return err; 3303 return err;
3301} 3304}
3302 3305
3303/*
3304 * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
3305 * up to the end of the block which corresponds to `from'.
3306 * This required during truncate. We need to physically zero the tail end
3307 * of that block so it doesn't yield old data if the file is later grown.
3308 */
3309int ext4_block_truncate_page(handle_t *handle,
3310 struct address_space *mapping, loff_t from)
3311{
3312 unsigned offset = from & (PAGE_CACHE_SIZE-1);
3313 unsigned length;
3314 unsigned blocksize;
3315 struct inode *inode = mapping->host;
3316
3317 blocksize = inode->i_sb->s_blocksize;
3318 length = blocksize - (offset & (blocksize - 1));
3319
3320 return ext4_block_zero_page_range(handle, mapping, from, length);
3321}
3322
3323/*
3324 * ext4_block_zero_page_range() zeros out a mapping of length 'length'
3325 * starting from file offset 'from'. The range to be zero'd must
3326 * be contained with in one block. If the specified range exceeds
3327 * the end of the block it will be shortened to end of the block
3328 * that cooresponds to 'from'
3329 */
3330int ext4_block_zero_page_range(handle_t *handle,
3331 struct address_space *mapping, loff_t from, loff_t length)
3332{
3333 ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
3334 unsigned offset = from & (PAGE_CACHE_SIZE-1);
3335 unsigned blocksize, max, pos;
3336 ext4_lblk_t iblock;
3337 struct inode *inode = mapping->host;
3338 struct buffer_head *bh;
3339 struct page *page;
3340 int err = 0;
3341
3342 page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
3343 mapping_gfp_mask(mapping) & ~__GFP_FS);
3344 if (!page)
3345 return -ENOMEM;
3346
3347 blocksize = inode->i_sb->s_blocksize;
3348 max = blocksize - (offset & (blocksize - 1));
3349
3350 /*
3351 * correct length if it does not fall between
3352 * 'from' and the end of the block
3353 */
3354 if (length > max || length < 0)
3355 length = max;
3356
3357 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
3358
3359 if (!page_has_buffers(page))
3360 create_empty_buffers(page, blocksize, 0);
3361
3362 /* Find the buffer that contains "offset" */
3363 bh = page_buffers(page);
3364 pos = blocksize;
3365 while (offset >= pos) {
3366 bh = bh->b_this_page;
3367 iblock++;
3368 pos += blocksize;
3369 }
3370
3371 err = 0;
3372 if (buffer_freed(bh)) {
3373 BUFFER_TRACE(bh, "freed: skip");
3374 goto unlock;
3375 }
3376
3377 if (!buffer_mapped(bh)) {
3378 BUFFER_TRACE(bh, "unmapped");
3379 ext4_get_block(inode, iblock, bh, 0);
3380 /* unmapped? It's a hole - nothing to do */
3381 if (!buffer_mapped(bh)) {
3382 BUFFER_TRACE(bh, "still unmapped");
3383 goto unlock;
3384 }
3385 }
3386
3387 /* Ok, it's mapped. Make sure it's up-to-date */
3388 if (PageUptodate(page))
3389 set_buffer_uptodate(bh);
3390
3391 if (!buffer_uptodate(bh)) {
3392 err = -EIO;
3393 ll_rw_block(READ, 1, &bh);
3394 wait_on_buffer(bh);
3395 /* Uhhuh. Read error. Complain and punt. */
3396 if (!buffer_uptodate(bh))
3397 goto unlock;
3398 }
3399
3400 if (ext4_should_journal_data(inode)) {
3401 BUFFER_TRACE(bh, "get write access");
3402 err = ext4_journal_get_write_access(handle, bh);
3403 if (err)
3404 goto unlock;
3405 }
3406
3407 zero_user(page, offset, length);
3408
3409 BUFFER_TRACE(bh, "zeroed end of block");
3410
3411 err = 0;
3412 if (ext4_should_journal_data(inode)) {
3413 err = ext4_handle_dirty_metadata(handle, inode, bh);
3414 } else
3415 mark_buffer_dirty(bh);
3416
3417unlock:
3418 unlock_page(page);
3419 page_cache_release(page);
3420 return err;
3421}
3422
3423int ext4_can_truncate(struct inode *inode) 3306int ext4_can_truncate(struct inode *inode)
3424{ 3307{
3425 if (S_ISREG(inode->i_mode)) 3308 if (S_ISREG(inode->i_mode))
@@ -4646,9 +4529,19 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
4646 return 0; 4529 return 0;
4647 if (is_journal_aborted(journal)) 4530 if (is_journal_aborted(journal))
4648 return -EROFS; 4531 return -EROFS;
4532 /* We have to allocate physical blocks for delalloc blocks
4533 * before flushing journal. otherwise delalloc blocks can not
4534 * be allocated any more. even more truncate on delalloc blocks
4535 * could trigger BUG by flushing delalloc blocks in journal.
4536 * There is no delalloc block in non-journal data mode.
4537 */
4538 if (val && test_opt(inode->i_sb, DELALLOC)) {
4539 err = ext4_alloc_da_blocks(inode);
4540 if (err < 0)
4541 return err;
4542 }
4649 4543
4650 jbd2_journal_lock_updates(journal); 4544 jbd2_journal_lock_updates(journal);
4651 jbd2_journal_flush(journal);
4652 4545
4653 /* 4546 /*
4654 * OK, there are no updates running now, and all cached data is 4547 * OK, there are no updates running now, and all cached data is
@@ -4660,8 +4553,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
4660 4553
4661 if (val) 4554 if (val)
4662 ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); 4555 ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
4663 else 4556 else {
4557 jbd2_journal_flush(journal);
4664 ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); 4558 ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
4559 }
4665 ext4_set_aops(inode); 4560 ext4_set_aops(inode);
4666 4561
4667 jbd2_journal_unlock_updates(journal); 4562 jbd2_journal_unlock_updates(journal);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index e87a932b073b..6eee25591b81 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -18,6 +18,8 @@
18#include "ext4_jbd2.h" 18#include "ext4_jbd2.h"
19#include "ext4.h" 19#include "ext4.h"
20 20
21#define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1)
22
21long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 23long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
22{ 24{
23 struct inode *inode = filp->f_dentry->d_inode; 25 struct inode *inode = filp->f_dentry->d_inode;
@@ -186,19 +188,22 @@ setversion_out:
186 if (err) 188 if (err)
187 return err; 189 return err;
188 190
189 if (get_user(n_blocks_count, (__u32 __user *)arg)) 191 if (get_user(n_blocks_count, (__u32 __user *)arg)) {
190 return -EFAULT; 192 err = -EFAULT;
193 goto group_extend_out;
194 }
191 195
192 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 196 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
193 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { 197 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
194 ext4_msg(sb, KERN_ERR, 198 ext4_msg(sb, KERN_ERR,
195 "Online resizing not supported with bigalloc"); 199 "Online resizing not supported with bigalloc");
196 return -EOPNOTSUPP; 200 err = -EOPNOTSUPP;
201 goto group_extend_out;
197 } 202 }
198 203
199 err = mnt_want_write_file(filp); 204 err = mnt_want_write_file(filp);
200 if (err) 205 if (err)
201 return err; 206 goto group_extend_out;
202 207
203 err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); 208 err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
204 if (EXT4_SB(sb)->s_journal) { 209 if (EXT4_SB(sb)->s_journal) {
@@ -209,8 +214,8 @@ setversion_out:
209 if (err == 0) 214 if (err == 0)
210 err = err2; 215 err = err2;
211 mnt_drop_write_file(filp); 216 mnt_drop_write_file(filp);
217group_extend_out:
212 ext4_resize_end(sb); 218 ext4_resize_end(sb);
213
214 return err; 219 return err;
215 } 220 }
216 221
@@ -251,8 +256,7 @@ setversion_out:
251 err = ext4_move_extents(filp, donor_filp, me.orig_start, 256 err = ext4_move_extents(filp, donor_filp, me.orig_start,
252 me.donor_start, me.len, &me.moved_len); 257 me.donor_start, me.len, &me.moved_len);
253 mnt_drop_write_file(filp); 258 mnt_drop_write_file(filp);
254 if (me.moved_len > 0) 259 mnt_drop_write(filp->f_path.mnt);
255 file_remove_suid(donor_filp);
256 260
257 if (copy_to_user((struct move_extent __user *)arg, 261 if (copy_to_user((struct move_extent __user *)arg,
258 &me, sizeof(me))) 262 &me, sizeof(me)))
@@ -271,19 +275,22 @@ mext_out:
271 return err; 275 return err;
272 276
273 if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, 277 if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg,
274 sizeof(input))) 278 sizeof(input))) {
275 return -EFAULT; 279 err = -EFAULT;
280 goto group_add_out;
281 }
276 282
277 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 283 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
278 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { 284 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
279 ext4_msg(sb, KERN_ERR, 285 ext4_msg(sb, KERN_ERR,
280 "Online resizing not supported with bigalloc"); 286 "Online resizing not supported with bigalloc");
281 return -EOPNOTSUPP; 287 err = -EOPNOTSUPP;
288 goto group_add_out;
282 } 289 }
283 290
284 err = mnt_want_write_file(filp); 291 err = mnt_want_write_file(filp);
285 if (err) 292 if (err)
286 return err; 293 goto group_add_out;
287 294
288 err = ext4_group_add(sb, &input); 295 err = ext4_group_add(sb, &input);
289 if (EXT4_SB(sb)->s_journal) { 296 if (EXT4_SB(sb)->s_journal) {
@@ -294,8 +301,8 @@ mext_out:
294 if (err == 0) 301 if (err == 0)
295 err = err2; 302 err = err2;
296 mnt_drop_write_file(filp); 303 mnt_drop_write_file(filp);
304group_add_out:
297 ext4_resize_end(sb); 305 ext4_resize_end(sb);
298
299 return err; 306 return err;
300 } 307 }
301 308
@@ -335,6 +342,60 @@ mext_out:
335 return err; 342 return err;
336 } 343 }
337 344
345 case EXT4_IOC_RESIZE_FS: {
346 ext4_fsblk_t n_blocks_count;
347 struct super_block *sb = inode->i_sb;
348 int err = 0, err2 = 0;
349
350 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
351 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
352 ext4_msg(sb, KERN_ERR,
353 "Online resizing not (yet) supported with bigalloc");
354 return -EOPNOTSUPP;
355 }
356
357 if (EXT4_HAS_INCOMPAT_FEATURE(sb,
358 EXT4_FEATURE_INCOMPAT_META_BG)) {
359 ext4_msg(sb, KERN_ERR,
360 "Online resizing not (yet) supported with meta_bg");
361 return -EOPNOTSUPP;
362 }
363
364 if (copy_from_user(&n_blocks_count, (__u64 __user *)arg,
365 sizeof(__u64))) {
366 return -EFAULT;
367 }
368
369 if (n_blocks_count > MAX_32_NUM &&
370 !EXT4_HAS_INCOMPAT_FEATURE(sb,
371 EXT4_FEATURE_INCOMPAT_64BIT)) {
372 ext4_msg(sb, KERN_ERR,
373 "File system only supports 32-bit block numbers");
374 return -EOPNOTSUPP;
375 }
376
377 err = ext4_resize_begin(sb);
378 if (err)
379 return err;
380
381 err = mnt_want_write(filp->f_path.mnt);
382 if (err)
383 goto resizefs_out;
384
385 err = ext4_resize_fs(sb, n_blocks_count);
386 if (EXT4_SB(sb)->s_journal) {
387 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
388 err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
389 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
390 }
391 if (err == 0)
392 err = err2;
393 mnt_drop_write(filp->f_path.mnt);
394resizefs_out:
395 ext4_resize_end(sb);
396 return err;
397 }
398
338 case FITRIM: 399 case FITRIM:
339 { 400 {
340 struct request_queue *q = bdev_get_queue(sb->s_bdev); 401 struct request_queue *q = bdev_get_queue(sb->s_bdev);
@@ -433,6 +494,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
433 } 494 }
434 case EXT4_IOC_MOVE_EXT: 495 case EXT4_IOC_MOVE_EXT:
435 case FITRIM: 496 case FITRIM:
497 case EXT4_IOC_RESIZE_FS:
436 break; 498 break;
437 default: 499 default:
438 return -ENOIOCTLCMD; 500 return -ENOIOCTLCMD;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index e2d8be8f28bf..cb990b21c698 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3671,7 +3671,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3671 ext4_group_t group; 3671 ext4_group_t group;
3672 ext4_grpblk_t bit; 3672 ext4_grpblk_t bit;
3673 3673
3674 trace_ext4_mb_release_group_pa(pa); 3674 trace_ext4_mb_release_group_pa(sb, pa);
3675 BUG_ON(pa->pa_deleted == 0); 3675 BUG_ON(pa->pa_deleted == 0);
3676 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); 3676 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3677 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3677 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 996780ab4f4e..f9d948f0eb86 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -134,6 +134,172 @@ static int verify_group_input(struct super_block *sb,
134 return err; 134 return err;
135} 135}
136 136
137/*
138 * ext4_new_flex_group_data is used by 64bit-resize interface to add a flex
139 * group each time.
140 */
141struct ext4_new_flex_group_data {
142 struct ext4_new_group_data *groups; /* new_group_data for groups
143 in the flex group */
144 __u16 *bg_flags; /* block group flags of groups
145 in @groups */
146 ext4_group_t count; /* number of groups in @groups
147 */
148};
149
150/*
151 * alloc_flex_gd() allocates a ext4_new_flex_group_data with size of
152 * @flexbg_size.
153 *
154 * Returns NULL on failure otherwise address of the allocated structure.
155 */
156static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size)
157{
158 struct ext4_new_flex_group_data *flex_gd;
159
160 flex_gd = kmalloc(sizeof(*flex_gd), GFP_NOFS);
161 if (flex_gd == NULL)
162 goto out3;
163
164 flex_gd->count = flexbg_size;
165
166 flex_gd->groups = kmalloc(sizeof(struct ext4_new_group_data) *
167 flexbg_size, GFP_NOFS);
168 if (flex_gd->groups == NULL)
169 goto out2;
170
171 flex_gd->bg_flags = kmalloc(flexbg_size * sizeof(__u16), GFP_NOFS);
172 if (flex_gd->bg_flags == NULL)
173 goto out1;
174
175 return flex_gd;
176
177out1:
178 kfree(flex_gd->groups);
179out2:
180 kfree(flex_gd);
181out3:
182 return NULL;
183}
184
185static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd)
186{
187 kfree(flex_gd->bg_flags);
188 kfree(flex_gd->groups);
189 kfree(flex_gd);
190}
191
192/*
193 * ext4_alloc_group_tables() allocates block bitmaps, inode bitmaps
194 * and inode tables for a flex group.
195 *
196 * This function is used by 64bit-resize. Note that this function allocates
197 * group tables from the 1st group of groups contained by @flexgd, which may
198 * be a partial of a flex group.
199 *
200 * @sb: super block of fs to which the groups belongs
201 */
202static void ext4_alloc_group_tables(struct super_block *sb,
203 struct ext4_new_flex_group_data *flex_gd,
204 int flexbg_size)
205{
206 struct ext4_new_group_data *group_data = flex_gd->groups;
207 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
208 ext4_fsblk_t start_blk;
209 ext4_fsblk_t last_blk;
210 ext4_group_t src_group;
211 ext4_group_t bb_index = 0;
212 ext4_group_t ib_index = 0;
213 ext4_group_t it_index = 0;
214 ext4_group_t group;
215 ext4_group_t last_group;
216 unsigned overhead;
217
218 BUG_ON(flex_gd->count == 0 || group_data == NULL);
219
220 src_group = group_data[0].group;
221 last_group = src_group + flex_gd->count - 1;
222
223 BUG_ON((flexbg_size > 1) && ((src_group & ~(flexbg_size - 1)) !=
224 (last_group & ~(flexbg_size - 1))));
225next_group:
226 group = group_data[0].group;
227 start_blk = ext4_group_first_block_no(sb, src_group);
228 last_blk = start_blk + group_data[src_group - group].blocks_count;
229
230 overhead = ext4_bg_has_super(sb, src_group) ?
231 (1 + ext4_bg_num_gdb(sb, src_group) +
232 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
233
234 start_blk += overhead;
235
236 BUG_ON(src_group >= group_data[0].group + flex_gd->count);
237 /* We collect contiguous blocks as much as possible. */
238 src_group++;
239 for (; src_group <= last_group; src_group++)
240 if (!ext4_bg_has_super(sb, src_group))
241 last_blk += group_data[src_group - group].blocks_count;
242 else
243 break;
244
245 /* Allocate block bitmaps */
246 for (; bb_index < flex_gd->count; bb_index++) {
247 if (start_blk >= last_blk)
248 goto next_group;
249 group_data[bb_index].block_bitmap = start_blk++;
250 ext4_get_group_no_and_offset(sb, start_blk - 1, &group, NULL);
251 group -= group_data[0].group;
252 group_data[group].free_blocks_count--;
253 if (flexbg_size > 1)
254 flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
255 }
256
257 /* Allocate inode bitmaps */
258 for (; ib_index < flex_gd->count; ib_index++) {
259 if (start_blk >= last_blk)
260 goto next_group;
261 group_data[ib_index].inode_bitmap = start_blk++;
262 ext4_get_group_no_and_offset(sb, start_blk - 1, &group, NULL);
263 group -= group_data[0].group;
264 group_data[group].free_blocks_count--;
265 if (flexbg_size > 1)
266 flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
267 }
268
269 /* Allocate inode tables */
270 for (; it_index < flex_gd->count; it_index++) {
271 if (start_blk + EXT4_SB(sb)->s_itb_per_group > last_blk)
272 goto next_group;
273 group_data[it_index].inode_table = start_blk;
274 ext4_get_group_no_and_offset(sb, start_blk, &group, NULL);
275 group -= group_data[0].group;
276 group_data[group].free_blocks_count -=
277 EXT4_SB(sb)->s_itb_per_group;
278 if (flexbg_size > 1)
279 flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
280
281 start_blk += EXT4_SB(sb)->s_itb_per_group;
282 }
283
284 if (test_opt(sb, DEBUG)) {
285 int i;
286 group = group_data[0].group;
287
288 printk(KERN_DEBUG "EXT4-fs: adding a flex group with "
289 "%d groups, flexbg size is %d:\n", flex_gd->count,
290 flexbg_size);
291
292 for (i = 0; i < flex_gd->count; i++) {
293 printk(KERN_DEBUG "adding %s group %u: %u "
294 "blocks (%d free)\n",
295 ext4_bg_has_super(sb, group + i) ? "normal" :
296 "no-super", group + i,
297 group_data[i].blocks_count,
298 group_data[i].free_blocks_count);
299 }
300 }
301}
302
137static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, 303static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
138 ext4_fsblk_t blk) 304 ext4_fsblk_t blk)
139{ 305{
@@ -179,131 +345,250 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh)
179} 345}
180 346
181/* 347/*
182 * Set up the block and inode bitmaps, and the inode table for the new group. 348 * set_flexbg_block_bitmap() mark @count blocks starting from @block used.
349 *
350 * Helper function for ext4_setup_new_group_blocks() which set .
351 *
352 * @sb: super block
353 * @handle: journal handle
354 * @flex_gd: flex group data
355 */
356static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
357 struct ext4_new_flex_group_data *flex_gd,
358 ext4_fsblk_t block, ext4_group_t count)
359{
360 ext4_group_t count2;
361
362 ext4_debug("mark blocks [%llu/%u] used\n", block, count);
363 for (count2 = count; count > 0; count -= count2, block += count2) {
364 ext4_fsblk_t start;
365 struct buffer_head *bh;
366 ext4_group_t group;
367 int err;
368
369 ext4_get_group_no_and_offset(sb, block, &group, NULL);
370 start = ext4_group_first_block_no(sb, group);
371 group -= flex_gd->groups[0].group;
372
373 count2 = sb->s_blocksize * 8 - (block - start);
374 if (count2 > count)
375 count2 = count;
376
377 if (flex_gd->bg_flags[group] & EXT4_BG_BLOCK_UNINIT) {
378 BUG_ON(flex_gd->count > 1);
379 continue;
380 }
381
382 err = extend_or_restart_transaction(handle, 1);
383 if (err)
384 return err;
385
386 bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap);
387 if (!bh)
388 return -EIO;
389
390 err = ext4_journal_get_write_access(handle, bh);
391 if (err)
392 return err;
393 ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", block,
394 block - start, count2);
395 ext4_set_bits(bh->b_data, block - start, count2);
396
397 err = ext4_handle_dirty_metadata(handle, NULL, bh);
398 if (unlikely(err))
399 return err;
400 brelse(bh);
401 }
402
403 return 0;
404}
405
406/*
407 * Set up the block and inode bitmaps, and the inode table for the new groups.
183 * This doesn't need to be part of the main transaction, since we are only 408 * This doesn't need to be part of the main transaction, since we are only
184 * changing blocks outside the actual filesystem. We still do journaling to 409 * changing blocks outside the actual filesystem. We still do journaling to
185 * ensure the recovery is correct in case of a failure just after resize. 410 * ensure the recovery is correct in case of a failure just after resize.
186 * If any part of this fails, we simply abort the resize. 411 * If any part of this fails, we simply abort the resize.
412 *
413 * setup_new_flex_group_blocks handles a flex group as follow:
414 * 1. copy super block and GDT, and initialize group tables if necessary.
415 * In this step, we only set bits in blocks bitmaps for blocks taken by
416 * super block and GDT.
417 * 2. allocate group tables in block bitmaps, that is, set bits in block
418 * bitmap for blocks taken by group tables.
187 */ 419 */
188static int setup_new_group_blocks(struct super_block *sb, 420static int setup_new_flex_group_blocks(struct super_block *sb,
189 struct ext4_new_group_data *input) 421 struct ext4_new_flex_group_data *flex_gd)
190{ 422{
423 int group_table_count[] = {1, 1, EXT4_SB(sb)->s_itb_per_group};
424 ext4_fsblk_t start;
425 ext4_fsblk_t block;
191 struct ext4_sb_info *sbi = EXT4_SB(sb); 426 struct ext4_sb_info *sbi = EXT4_SB(sb);
192 ext4_fsblk_t start = ext4_group_first_block_no(sb, input->group); 427 struct ext4_super_block *es = sbi->s_es;
193 int reserved_gdb = ext4_bg_has_super(sb, input->group) ? 428 struct ext4_new_group_data *group_data = flex_gd->groups;
194 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0; 429 __u16 *bg_flags = flex_gd->bg_flags;
195 unsigned long gdblocks = ext4_bg_num_gdb(sb, input->group);
196 struct buffer_head *bh;
197 handle_t *handle; 430 handle_t *handle;
198 ext4_fsblk_t block; 431 ext4_group_t group, count;
199 ext4_grpblk_t bit; 432 struct buffer_head *bh = NULL;
200 int i; 433 int reserved_gdb, i, j, err = 0, err2;
201 int err = 0, err2; 434
435 BUG_ON(!flex_gd->count || !group_data ||
436 group_data[0].group != sbi->s_groups_count);
437
438 reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks);
202 439
203 /* This transaction may be extended/restarted along the way */ 440 /* This transaction may be extended/restarted along the way */
204 handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); 441 handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA);
205
206 if (IS_ERR(handle)) 442 if (IS_ERR(handle))
207 return PTR_ERR(handle); 443 return PTR_ERR(handle);
208 444
209 BUG_ON(input->group != sbi->s_groups_count); 445 group = group_data[0].group;
446 for (i = 0; i < flex_gd->count; i++, group++) {
447 unsigned long gdblocks;
210 448
211 /* Copy all of the GDT blocks into the backup in this group */ 449 gdblocks = ext4_bg_num_gdb(sb, group);
212 for (i = 0, bit = 1, block = start + 1; 450 start = ext4_group_first_block_no(sb, group);
213 i < gdblocks; i++, block++, bit++) {
214 struct buffer_head *gdb;
215 451
216 ext4_debug("update backup group %#04llx (+%d)\n", block, bit); 452 /* Copy all of the GDT blocks into the backup in this group */
217 err = extend_or_restart_transaction(handle, 1); 453 for (j = 0, block = start + 1; j < gdblocks; j++, block++) {
218 if (err) 454 struct buffer_head *gdb;
219 goto exit_journal;
220 455
221 gdb = sb_getblk(sb, block); 456 ext4_debug("update backup group %#04llx\n", block);
222 if (!gdb) { 457 err = extend_or_restart_transaction(handle, 1);
223 err = -EIO; 458 if (err)
224 goto exit_journal; 459 goto out;
225 } 460
226 if ((err = ext4_journal_get_write_access(handle, gdb))) { 461 gdb = sb_getblk(sb, block);
462 if (!gdb) {
463 err = -EIO;
464 goto out;
465 }
466
467 err = ext4_journal_get_write_access(handle, gdb);
468 if (err) {
469 brelse(gdb);
470 goto out;
471 }
472 memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data,
473 gdb->b_size);
474 set_buffer_uptodate(gdb);
475
476 err = ext4_handle_dirty_metadata(handle, NULL, gdb);
477 if (unlikely(err)) {
478 brelse(gdb);
479 goto out;
480 }
227 brelse(gdb); 481 brelse(gdb);
228 goto exit_journal;
229 } 482 }
230 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); 483
231 set_buffer_uptodate(gdb); 484 /* Zero out all of the reserved backup group descriptor
232 err = ext4_handle_dirty_metadata(handle, NULL, gdb); 485 * table blocks
233 if (unlikely(err)) { 486 */
234 brelse(gdb); 487 if (ext4_bg_has_super(sb, group)) {
235 goto exit_journal; 488 err = sb_issue_zeroout(sb, gdblocks + start + 1,
489 reserved_gdb, GFP_NOFS);
490 if (err)
491 goto out;
236 } 492 }
237 brelse(gdb);
238 }
239 493
240 /* Zero out all of the reserved backup group descriptor table blocks */ 494 /* Initialize group tables of the grop @group */
241 ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", 495 if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED))
242 block, sbi->s_itb_per_group); 496 goto handle_bb;
243 err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb,
244 GFP_NOFS);
245 if (err)
246 goto exit_journal;
247 497
248 err = extend_or_restart_transaction(handle, 2); 498 /* Zero out all of the inode table blocks */
249 if (err) 499 block = group_data[i].inode_table;
250 goto exit_journal; 500 ext4_debug("clear inode table blocks %#04llx -> %#04lx\n",
501 block, sbi->s_itb_per_group);
502 err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group,
503 GFP_NOFS);
504 if (err)
505 goto out;
251 506
252 bh = bclean(handle, sb, input->block_bitmap); 507handle_bb:
253 if (IS_ERR(bh)) { 508 if (bg_flags[i] & EXT4_BG_BLOCK_UNINIT)
254 err = PTR_ERR(bh); 509 goto handle_ib;
255 goto exit_journal;
256 }
257 510
258 if (ext4_bg_has_super(sb, input->group)) { 511 /* Initialize block bitmap of the @group */
259 ext4_debug("mark backup group tables %#04llx (+0)\n", start); 512 block = group_data[i].block_bitmap;
260 ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + 1); 513 err = extend_or_restart_transaction(handle, 1);
261 } 514 if (err)
515 goto out;
262 516
263 ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, 517 bh = bclean(handle, sb, block);
264 input->block_bitmap - start); 518 if (IS_ERR(bh)) {
265 ext4_set_bit(input->block_bitmap - start, bh->b_data); 519 err = PTR_ERR(bh);
266 ext4_debug("mark inode bitmap %#04llx (+%llu)\n", input->inode_bitmap, 520 goto out;
267 input->inode_bitmap - start); 521 }
268 ext4_set_bit(input->inode_bitmap - start, bh->b_data); 522 if (ext4_bg_has_super(sb, group)) {
269 523 ext4_debug("mark backup superblock %#04llx (+0)\n",
270 /* Zero out all of the inode table blocks */ 524 start);
271 block = input->inode_table; 525 ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb +
272 ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", 526 1);
273 block, sbi->s_itb_per_group); 527 }
274 err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); 528 ext4_mark_bitmap_end(group_data[i].blocks_count,
275 if (err) 529 sb->s_blocksize * 8, bh->b_data);
276 goto exit_bh; 530 err = ext4_handle_dirty_metadata(handle, NULL, bh);
277 ext4_set_bits(bh->b_data, input->inode_table - start, 531 if (err)
278 sbi->s_itb_per_group); 532 goto out;
533 brelse(bh);
279 534
535handle_ib:
536 if (bg_flags[i] & EXT4_BG_INODE_UNINIT)
537 continue;
280 538
281 ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, 539 /* Initialize inode bitmap of the @group */
282 bh->b_data); 540 block = group_data[i].inode_bitmap;
283 err = ext4_handle_dirty_metadata(handle, NULL, bh); 541 err = extend_or_restart_transaction(handle, 1);
284 if (unlikely(err)) { 542 if (err)
285 ext4_std_error(sb, err); 543 goto out;
286 goto exit_bh; 544 /* Mark unused entries in inode bitmap used */
545 bh = bclean(handle, sb, block);
546 if (IS_ERR(bh)) {
547 err = PTR_ERR(bh);
548 goto out;
549 }
550
551 ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb),
552 sb->s_blocksize * 8, bh->b_data);
553 err = ext4_handle_dirty_metadata(handle, NULL, bh);
554 if (err)
555 goto out;
556 brelse(bh);
287 } 557 }
288 brelse(bh); 558 bh = NULL;
289 /* Mark unused entries in inode bitmap used */ 559
290 ext4_debug("clear inode bitmap %#04llx (+%llu)\n", 560 /* Mark group tables in block bitmap */
291 input->inode_bitmap, input->inode_bitmap - start); 561 for (j = 0; j < GROUP_TABLE_COUNT; j++) {
292 if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) { 562 count = group_table_count[j];
293 err = PTR_ERR(bh); 563 start = (&group_data[0].block_bitmap)[j];
294 goto exit_journal; 564 block = start;
565 for (i = 1; i < flex_gd->count; i++) {
566 block += group_table_count[j];
567 if (block == (&group_data[i].block_bitmap)[j]) {
568 count += group_table_count[j];
569 continue;
570 }
571 err = set_flexbg_block_bitmap(sb, handle,
572 flex_gd, start, count);
573 if (err)
574 goto out;
575 count = group_table_count[j];
576 start = group_data[i].block_bitmap;
577 block = start;
578 }
579
580 if (count) {
581 err = set_flexbg_block_bitmap(sb, handle,
582 flex_gd, start, count);
583 if (err)
584 goto out;
585 }
295 } 586 }
296 587
297 ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, 588out:
298 bh->b_data);
299 err = ext4_handle_dirty_metadata(handle, NULL, bh);
300 if (unlikely(err))
301 ext4_std_error(sb, err);
302exit_bh:
303 brelse(bh); 589 brelse(bh);
304 590 err2 = ext4_journal_stop(handle);
305exit_journal: 591 if (err2 && !err)
306 if ((err2 = ext4_journal_stop(handle)) && !err)
307 err = err2; 592 err = err2;
308 593
309 return err; 594 return err;
@@ -351,10 +636,10 @@ static unsigned ext4_list_backups(struct super_block *sb, unsigned *three,
351 * groups in current filesystem that have BACKUPS, or -ve error code. 636 * groups in current filesystem that have BACKUPS, or -ve error code.
352 */ 637 */
353static int verify_reserved_gdb(struct super_block *sb, 638static int verify_reserved_gdb(struct super_block *sb,
639 ext4_group_t end,
354 struct buffer_head *primary) 640 struct buffer_head *primary)
355{ 641{
356 const ext4_fsblk_t blk = primary->b_blocknr; 642 const ext4_fsblk_t blk = primary->b_blocknr;
357 const ext4_group_t end = EXT4_SB(sb)->s_groups_count;
358 unsigned three = 1; 643 unsigned three = 1;
359 unsigned five = 5; 644 unsigned five = 5;
360 unsigned seven = 7; 645 unsigned seven = 7;
@@ -429,7 +714,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
429 if (!gdb_bh) 714 if (!gdb_bh)
430 return -EIO; 715 return -EIO;
431 716
432 gdbackups = verify_reserved_gdb(sb, gdb_bh); 717 gdbackups = verify_reserved_gdb(sb, group, gdb_bh);
433 if (gdbackups < 0) { 718 if (gdbackups < 0) {
434 err = gdbackups; 719 err = gdbackups;
435 goto exit_bh; 720 goto exit_bh;
@@ -592,7 +877,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
592 err = -EIO; 877 err = -EIO;
593 goto exit_bh; 878 goto exit_bh;
594 } 879 }
595 if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) { 880 gdbackups = verify_reserved_gdb(sb, group, primary[res]);
881 if (gdbackups < 0) {
596 brelse(primary[res]); 882 brelse(primary[res]);
597 err = gdbackups; 883 err = gdbackups;
598 goto exit_bh; 884 goto exit_bh;
@@ -735,6 +1021,348 @@ exit_err:
735 } 1021 }
736} 1022}
737 1023
1024/*
1025 * ext4_add_new_descs() adds @count group descriptor of groups
1026 * starting at @group
1027 *
1028 * @handle: journal handle
1029 * @sb: super block
1030 * @group: the group no. of the first group desc to be added
1031 * @resize_inode: the resize inode
1032 * @count: number of group descriptors to be added
1033 */
1034static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
1035 ext4_group_t group, struct inode *resize_inode,
1036 ext4_group_t count)
1037{
1038 struct ext4_sb_info *sbi = EXT4_SB(sb);
1039 struct ext4_super_block *es = sbi->s_es;
1040 struct buffer_head *gdb_bh;
1041 int i, gdb_off, gdb_num, err = 0;
1042
1043 for (i = 0; i < count; i++, group++) {
1044 int reserved_gdb = ext4_bg_has_super(sb, group) ?
1045 le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
1046
1047 gdb_off = group % EXT4_DESC_PER_BLOCK(sb);
1048 gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
1049
1050 /*
1051 * We will only either add reserved group blocks to a backup group
1052 * or remove reserved blocks for the first group in a new group block.
1053 * Doing both would be mean more complex code, and sane people don't
1054 * use non-sparse filesystems anymore. This is already checked above.
1055 */
1056 if (gdb_off) {
1057 gdb_bh = sbi->s_group_desc[gdb_num];
1058 err = ext4_journal_get_write_access(handle, gdb_bh);
1059
1060 if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group))
1061 err = reserve_backup_gdb(handle, resize_inode, group);
1062 } else
1063 err = add_new_gdb(handle, resize_inode, group);
1064 if (err)
1065 break;
1066 }
1067 return err;
1068}
1069
1070/*
1071 * ext4_setup_new_descs() will set up the group descriptor descriptors of a flex bg
1072 */
1073static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
1074 struct ext4_new_flex_group_data *flex_gd)
1075{
1076 struct ext4_new_group_data *group_data = flex_gd->groups;
1077 struct ext4_group_desc *gdp;
1078 struct ext4_sb_info *sbi = EXT4_SB(sb);
1079 struct buffer_head *gdb_bh;
1080 ext4_group_t group;
1081 __u16 *bg_flags = flex_gd->bg_flags;
1082 int i, gdb_off, gdb_num, err = 0;
1083
1084
1085 for (i = 0; i < flex_gd->count; i++, group_data++, bg_flags++) {
1086 group = group_data->group;
1087
1088 gdb_off = group % EXT4_DESC_PER_BLOCK(sb);
1089 gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
1090
1091 /*
1092 * get_write_access() has been called on gdb_bh by ext4_add_new_desc().
1093 */
1094 gdb_bh = sbi->s_group_desc[gdb_num];
1095 /* Update group descriptor block for new group */
1096 gdp = (struct ext4_group_desc *)((char *)gdb_bh->b_data +
1097 gdb_off * EXT4_DESC_SIZE(sb));
1098
1099 memset(gdp, 0, EXT4_DESC_SIZE(sb));
1100 ext4_block_bitmap_set(sb, gdp, group_data->block_bitmap);
1101 ext4_inode_bitmap_set(sb, gdp, group_data->inode_bitmap);
1102 ext4_inode_table_set(sb, gdp, group_data->inode_table);
1103 ext4_free_group_clusters_set(sb, gdp,
1104 EXT4_B2C(sbi, group_data->free_blocks_count));
1105 ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
1106 gdp->bg_flags = cpu_to_le16(*bg_flags);
1107 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
1108
1109 err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
1110 if (unlikely(err)) {
1111 ext4_std_error(sb, err);
1112 break;
1113 }
1114
1115 /*
1116 * We can allocate memory for mb_alloc based on the new group
1117 * descriptor
1118 */
1119 err = ext4_mb_add_groupinfo(sb, group, gdp);
1120 if (err)
1121 break;
1122 }
1123 return err;
1124}
1125
1126/*
1127 * ext4_update_super() updates the super block so that the newly added
1128 * groups can be seen by the filesystem.
1129 *
1130 * @sb: super block
1131 * @flex_gd: new added groups
1132 */
1133static void ext4_update_super(struct super_block *sb,
1134 struct ext4_new_flex_group_data *flex_gd)
1135{
1136 ext4_fsblk_t blocks_count = 0;
1137 ext4_fsblk_t free_blocks = 0;
1138 ext4_fsblk_t reserved_blocks = 0;
1139 struct ext4_new_group_data *group_data = flex_gd->groups;
1140 struct ext4_sb_info *sbi = EXT4_SB(sb);
1141 struct ext4_super_block *es = sbi->s_es;
1142 int i;
1143
1144 BUG_ON(flex_gd->count == 0 || group_data == NULL);
1145 /*
1146 * Make the new blocks and inodes valid next. We do this before
1147 * increasing the group count so that once the group is enabled,
1148 * all of its blocks and inodes are already valid.
1149 *
1150 * We always allocate group-by-group, then block-by-block or
1151 * inode-by-inode within a group, so enabling these
1152 * blocks/inodes before the group is live won't actually let us
1153 * allocate the new space yet.
1154 */
1155 for (i = 0; i < flex_gd->count; i++) {
1156 blocks_count += group_data[i].blocks_count;
1157 free_blocks += group_data[i].free_blocks_count;
1158 }
1159
1160 reserved_blocks = ext4_r_blocks_count(es) * 100;
1161 do_div(reserved_blocks, ext4_blocks_count(es));
1162 reserved_blocks *= blocks_count;
1163 do_div(reserved_blocks, 100);
1164
1165 ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count);
1166 le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) *
1167 flex_gd->count);
1168
1169 /*
1170 * We need to protect s_groups_count against other CPUs seeing
1171 * inconsistent state in the superblock.
1172 *
1173 * The precise rules we use are:
1174 *
1175 * * Writers must perform a smp_wmb() after updating all
1176 * dependent data and before modifying the groups count
1177 *
1178 * * Readers must perform an smp_rmb() after reading the groups
1179 * count and before reading any dependent data.
1180 *
1181 * NB. These rules can be relaxed when checking the group count
1182 * while freeing data, as we can only allocate from a block
1183 * group after serialising against the group count, and we can
1184 * only then free after serialising in turn against that
1185 * allocation.
1186 */
1187 smp_wmb();
1188
1189 /* Update the global fs size fields */
1190 sbi->s_groups_count += flex_gd->count;
1191
1192 /* Update the reserved block counts only once the new group is
1193 * active. */
1194 ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) +
1195 reserved_blocks);
1196
1197 /* Update the free space counts */
1198 percpu_counter_add(&sbi->s_freeclusters_counter,
1199 EXT4_B2C(sbi, free_blocks));
1200 percpu_counter_add(&sbi->s_freeinodes_counter,
1201 EXT4_INODES_PER_GROUP(sb) * flex_gd->count);
1202
1203 if (EXT4_HAS_INCOMPAT_FEATURE(sb,
1204 EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
1205 sbi->s_log_groups_per_flex) {
1206 ext4_group_t flex_group;
1207 flex_group = ext4_flex_group(sbi, group_data[0].group);
1208 atomic_add(EXT4_B2C(sbi, free_blocks),
1209 &sbi->s_flex_groups[flex_group].free_clusters);
1210 atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count,
1211 &sbi->s_flex_groups[flex_group].free_inodes);
1212 }
1213
1214 if (test_opt(sb, DEBUG))
1215 printk(KERN_DEBUG "EXT4-fs: added group %u:"
1216 "%llu blocks(%llu free %llu reserved)\n", flex_gd->count,
1217 blocks_count, free_blocks, reserved_blocks);
1218}
1219
1220/* Add a flex group to an fs. Ensure we handle all possible error conditions
1221 * _before_ we start modifying the filesystem, because we cannot abort the
1222 * transaction and not have it write the data to disk.
1223 */
1224static int ext4_flex_group_add(struct super_block *sb,
1225 struct inode *resize_inode,
1226 struct ext4_new_flex_group_data *flex_gd)
1227{
1228 struct ext4_sb_info *sbi = EXT4_SB(sb);
1229 struct ext4_super_block *es = sbi->s_es;
1230 ext4_fsblk_t o_blocks_count;
1231 ext4_grpblk_t last;
1232 ext4_group_t group;
1233 handle_t *handle;
1234 unsigned reserved_gdb;
1235 int err = 0, err2 = 0, credit;
1236
1237 BUG_ON(!flex_gd->count || !flex_gd->groups || !flex_gd->bg_flags);
1238
1239 reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks);
1240 o_blocks_count = ext4_blocks_count(es);
1241 ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
1242 BUG_ON(last);
1243
1244 err = setup_new_flex_group_blocks(sb, flex_gd);
1245 if (err)
1246 goto exit;
1247 /*
1248 * We will always be modifying at least the superblock and GDT
1249 * block. If we are adding a group past the last current GDT block,
1250 * we will also modify the inode and the dindirect block. If we
1251 * are adding a group with superblock/GDT backups we will also
1252 * modify each of the reserved GDT dindirect blocks.
1253 */
1254 credit = flex_gd->count * 4 + reserved_gdb;
1255 handle = ext4_journal_start_sb(sb, credit);
1256 if (IS_ERR(handle)) {
1257 err = PTR_ERR(handle);
1258 goto exit;
1259 }
1260
1261 err = ext4_journal_get_write_access(handle, sbi->s_sbh);
1262 if (err)
1263 goto exit_journal;
1264
1265 group = flex_gd->groups[0].group;
1266 BUG_ON(group != EXT4_SB(sb)->s_groups_count);
1267 err = ext4_add_new_descs(handle, sb, group,
1268 resize_inode, flex_gd->count);
1269 if (err)
1270 goto exit_journal;
1271
1272 err = ext4_setup_new_descs(handle, sb, flex_gd);
1273 if (err)
1274 goto exit_journal;
1275
1276 ext4_update_super(sb, flex_gd);
1277
1278 err = ext4_handle_dirty_super(handle, sb);
1279
1280exit_journal:
1281 err2 = ext4_journal_stop(handle);
1282 if (!err)
1283 err = err2;
1284
1285 if (!err) {
1286 int i;
1287 update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
1288 sizeof(struct ext4_super_block));
1289 for (i = 0; i < flex_gd->count; i++, group++) {
1290 struct buffer_head *gdb_bh;
1291 int gdb_num;
1292 gdb_num = group / EXT4_BLOCKS_PER_GROUP(sb);
1293 gdb_bh = sbi->s_group_desc[gdb_num];
1294 update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data,
1295 gdb_bh->b_size);
1296 }
1297 }
1298exit:
1299 return err;
1300}
1301
1302static int ext4_setup_next_flex_gd(struct super_block *sb,
1303 struct ext4_new_flex_group_data *flex_gd,
1304 ext4_fsblk_t n_blocks_count,
1305 unsigned long flexbg_size)
1306{
1307 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
1308 struct ext4_new_group_data *group_data = flex_gd->groups;
1309 ext4_fsblk_t o_blocks_count;
1310 ext4_group_t n_group;
1311 ext4_group_t group;
1312 ext4_group_t last_group;
1313 ext4_grpblk_t last;
1314 ext4_grpblk_t blocks_per_group;
1315 unsigned long i;
1316
1317 blocks_per_group = EXT4_BLOCKS_PER_GROUP(sb);
1318
1319 o_blocks_count = ext4_blocks_count(es);
1320
1321 if (o_blocks_count == n_blocks_count)
1322 return 0;
1323
1324 ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
1325 BUG_ON(last);
1326 ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &last);
1327
1328 last_group = group | (flexbg_size - 1);
1329 if (last_group > n_group)
1330 last_group = n_group;
1331
1332 flex_gd->count = last_group - group + 1;
1333
1334 for (i = 0; i < flex_gd->count; i++) {
1335 int overhead;
1336
1337 group_data[i].group = group + i;
1338 group_data[i].blocks_count = blocks_per_group;
1339 overhead = ext4_bg_has_super(sb, group + i) ?
1340 (1 + ext4_bg_num_gdb(sb, group + i) +
1341 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
1342 group_data[i].free_blocks_count = blocks_per_group - overhead;
1343 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
1344 EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
1345 flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT |
1346 EXT4_BG_INODE_UNINIT;
1347 else
1348 flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED;
1349 }
1350
1351 if (last_group == n_group &&
1352 EXT4_HAS_RO_COMPAT_FEATURE(sb,
1353 EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
1354 /* We need to initialize block bitmap of last group. */
1355 flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT;
1356
1357 if ((last_group == n_group) && (last != blocks_per_group - 1)) {
1358 group_data[i - 1].blocks_count = last + 1;
1359 group_data[i - 1].free_blocks_count -= blocks_per_group-
1360 last - 1;
1361 }
1362
1363 return 1;
1364}
1365
738/* Add group descriptor data to an existing or new group descriptor block. 1366/* Add group descriptor data to an existing or new group descriptor block.
739 * Ensure we handle all possible error conditions _before_ we start modifying 1367 * Ensure we handle all possible error conditions _before_ we start modifying
740 * the filesystem, because we cannot abort the transaction and not have it 1368 * the filesystem, because we cannot abort the transaction and not have it
@@ -750,16 +1378,15 @@ exit_err:
750 */ 1378 */
751int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) 1379int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
752{ 1380{
1381 struct ext4_new_flex_group_data flex_gd;
753 struct ext4_sb_info *sbi = EXT4_SB(sb); 1382 struct ext4_sb_info *sbi = EXT4_SB(sb);
754 struct ext4_super_block *es = sbi->s_es; 1383 struct ext4_super_block *es = sbi->s_es;
755 int reserved_gdb = ext4_bg_has_super(sb, input->group) ? 1384 int reserved_gdb = ext4_bg_has_super(sb, input->group) ?
756 le16_to_cpu(es->s_reserved_gdt_blocks) : 0; 1385 le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
757 struct buffer_head *primary = NULL;
758 struct ext4_group_desc *gdp;
759 struct inode *inode = NULL; 1386 struct inode *inode = NULL;
760 handle_t *handle;
761 int gdb_off, gdb_num; 1387 int gdb_off, gdb_num;
762 int err, err2; 1388 int err;
1389 __u16 bg_flags = 0;
763 1390
764 gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); 1391 gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb);
765 gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); 1392 gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb);
@@ -798,175 +1425,69 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
798 } 1425 }
799 1426
800 1427
801 if ((err = verify_group_input(sb, input))) 1428 err = verify_group_input(sb, input);
802 goto exit_put; 1429 if (err)
1430 goto out;
803 1431
804 if ((err = setup_new_group_blocks(sb, input))) 1432 flex_gd.count = 1;
805 goto exit_put; 1433 flex_gd.groups = input;
1434 flex_gd.bg_flags = &bg_flags;
1435 err = ext4_flex_group_add(sb, inode, &flex_gd);
1436out:
1437 iput(inode);
1438 return err;
1439} /* ext4_group_add */
806 1440
807 /* 1441/*
808 * We will always be modifying at least the superblock and a GDT 1442 * extend a group without checking assuming that checking has been done.
809 * block. If we are adding a group past the last current GDT block, 1443 */
810 * we will also modify the inode and the dindirect block. If we 1444static int ext4_group_extend_no_check(struct super_block *sb,
811 * are adding a group with superblock/GDT backups we will also 1445 ext4_fsblk_t o_blocks_count, ext4_grpblk_t add)
812 * modify each of the reserved GDT dindirect blocks. 1446{
1447 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
1448 handle_t *handle;
1449 int err = 0, err2;
1450
1451 /* We will update the superblock, one block bitmap, and
1452 * one group descriptor via ext4_group_add_blocks().
813 */ 1453 */
814 handle = ext4_journal_start_sb(sb, 1454 handle = ext4_journal_start_sb(sb, 3);
815 ext4_bg_has_super(sb, input->group) ?
816 3 + reserved_gdb : 4);
817 if (IS_ERR(handle)) { 1455 if (IS_ERR(handle)) {
818 err = PTR_ERR(handle); 1456 err = PTR_ERR(handle);
819 goto exit_put; 1457 ext4_warning(sb, "error %d on journal start", err);
1458 return err;
820 } 1459 }
821 1460
822 if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) 1461 err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
823 goto exit_journal; 1462 if (err) {
824 1463 ext4_warning(sb, "error %d on journal write access", err);
825 /* 1464 goto errout;
826 * We will only either add reserved group blocks to a backup group
827 * or remove reserved blocks for the first group in a new group block.
828 * Doing both would be mean more complex code, and sane people don't
829 * use non-sparse filesystems anymore. This is already checked above.
830 */
831 if (gdb_off) {
832 primary = sbi->s_group_desc[gdb_num];
833 if ((err = ext4_journal_get_write_access(handle, primary)))
834 goto exit_journal;
835
836 if (reserved_gdb && ext4_bg_num_gdb(sb, input->group)) {
837 err = reserve_backup_gdb(handle, inode, input->group);
838 if (err)
839 goto exit_journal;
840 }
841 } else {
842 /*
843 * Note that we can access new group descriptor block safely
844 * only if add_new_gdb() succeeds.
845 */
846 err = add_new_gdb(handle, inode, input->group);
847 if (err)
848 goto exit_journal;
849 primary = sbi->s_group_desc[gdb_num];
850 } 1465 }
851 1466
852 /* 1467 ext4_blocks_count_set(es, o_blocks_count + add);
853 * OK, now we've set up the new group. Time to make it active. 1468 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
854 * 1469 o_blocks_count + add);
855 * so we have to be safe wrt. concurrent accesses the group 1470 /* We add the blocks to the bitmap and set the group need init bit */
856 * data. So we need to be careful to set all of the relevant 1471 err = ext4_group_add_blocks(handle, sb, o_blocks_count, add);
857 * group descriptor data etc. *before* we enable the group.
858 *
859 * The key field here is sbi->s_groups_count: as long as
860 * that retains its old value, nobody is going to access the new
861 * group.
862 *
863 * So first we update all the descriptor metadata for the new
864 * group; then we update the total disk blocks count; then we
865 * update the groups count to enable the group; then finally we
866 * update the free space counts so that the system can start
867 * using the new disk blocks.
868 */
869
870 /* Update group descriptor block for new group */
871 gdp = (struct ext4_group_desc *)((char *)primary->b_data +
872 gdb_off * EXT4_DESC_SIZE(sb));
873
874 memset(gdp, 0, EXT4_DESC_SIZE(sb));
875 ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
876 ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
877 ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
878 ext4_free_group_clusters_set(sb, gdp, input->free_blocks_count);
879 ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
880 gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED);
881 gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
882
883 /*
884 * We can allocate memory for mb_alloc based on the new group
885 * descriptor
886 */
887 err = ext4_mb_add_groupinfo(sb, input->group, gdp);
888 if (err) 1472 if (err)
889 goto exit_journal; 1473 goto errout;
890
891 /*
892 * Make the new blocks and inodes valid next. We do this before
893 * increasing the group count so that once the group is enabled,
894 * all of its blocks and inodes are already valid.
895 *
896 * We always allocate group-by-group, then block-by-block or
897 * inode-by-inode within a group, so enabling these
898 * blocks/inodes before the group is live won't actually let us
899 * allocate the new space yet.
900 */
901 ext4_blocks_count_set(es, ext4_blocks_count(es) +
902 input->blocks_count);
903 le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb));
904
905 /*
906 * We need to protect s_groups_count against other CPUs seeing
907 * inconsistent state in the superblock.
908 *
909 * The precise rules we use are:
910 *
911 * * Writers must perform a smp_wmb() after updating all dependent
912 * data and before modifying the groups count
913 *
914 * * Readers must perform an smp_rmb() after reading the groups count
915 * and before reading any dependent data.
916 *
917 * NB. These rules can be relaxed when checking the group count
918 * while freeing data, as we can only allocate from a block
919 * group after serialising against the group count, and we can
920 * only then free after serialising in turn against that
921 * allocation.
922 */
923 smp_wmb();
924
925 /* Update the global fs size fields */
926 sbi->s_groups_count++;
927
928 err = ext4_handle_dirty_metadata(handle, NULL, primary);
929 if (unlikely(err)) {
930 ext4_std_error(sb, err);
931 goto exit_journal;
932 }
933
934 /* Update the reserved block counts only once the new group is
935 * active. */
936 ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) +
937 input->reserved_blocks);
938
939 /* Update the free space counts */
940 percpu_counter_add(&sbi->s_freeclusters_counter,
941 EXT4_B2C(sbi, input->free_blocks_count));
942 percpu_counter_add(&sbi->s_freeinodes_counter,
943 EXT4_INODES_PER_GROUP(sb));
944
945 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
946 sbi->s_log_groups_per_flex) {
947 ext4_group_t flex_group;
948 flex_group = ext4_flex_group(sbi, input->group);
949 atomic_add(EXT4_B2C(sbi, input->free_blocks_count),
950 &sbi->s_flex_groups[flex_group].free_clusters);
951 atomic_add(EXT4_INODES_PER_GROUP(sb),
952 &sbi->s_flex_groups[flex_group].free_inodes);
953 }
954
955 ext4_handle_dirty_super(handle, sb); 1474 ext4_handle_dirty_super(handle, sb);
956 1475 ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
957exit_journal: 1476 o_blocks_count + add);
958 if ((err2 = ext4_journal_stop(handle)) && !err) 1477errout:
1478 err2 = ext4_journal_stop(handle);
1479 if (err2 && !err)
959 err = err2; 1480 err = err2;
960 if (!err && primary) { 1481
961 update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, 1482 if (!err) {
1483 if (test_opt(sb, DEBUG))
1484 printk(KERN_DEBUG "EXT4-fs: extended group to %llu "
1485 "blocks\n", ext4_blocks_count(es));
1486 update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es,
962 sizeof(struct ext4_super_block)); 1487 sizeof(struct ext4_super_block));
963 update_backups(sb, primary->b_blocknr, primary->b_data,
964 primary->b_size);
965 } 1488 }
966exit_put:
967 iput(inode);
968 return err; 1489 return err;
969} /* ext4_group_add */ 1490}
970 1491
971/* 1492/*
972 * Extend the filesystem to the new number of blocks specified. This entry 1493 * Extend the filesystem to the new number of blocks specified. This entry
@@ -985,8 +1506,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
985 ext4_grpblk_t last; 1506 ext4_grpblk_t last;
986 ext4_grpblk_t add; 1507 ext4_grpblk_t add;
987 struct buffer_head *bh; 1508 struct buffer_head *bh;
988 handle_t *handle; 1509 int err;
989 int err, err2;
990 ext4_group_t group; 1510 ext4_group_t group;
991 1511
992 o_blocks_count = ext4_blocks_count(es); 1512 o_blocks_count = ext4_blocks_count(es);
@@ -1042,42 +1562,119 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1042 } 1562 }
1043 brelse(bh); 1563 brelse(bh);
1044 1564
1045 /* We will update the superblock, one block bitmap, and 1565 err = ext4_group_extend_no_check(sb, o_blocks_count, add);
1046 * one group descriptor via ext4_free_blocks(). 1566 return err;
1047 */ 1567} /* ext4_group_extend */
1048 handle = ext4_journal_start_sb(sb, 3); 1568
1049 if (IS_ERR(handle)) { 1569/*
1050 err = PTR_ERR(handle); 1570 * ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count
1051 ext4_warning(sb, "error %d on journal start", err); 1571 *
1052 goto exit_put; 1572 * @sb: super block of the fs to be resized
1573 * @n_blocks_count: the number of blocks resides in the resized fs
1574 */
1575int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
1576{
1577 struct ext4_new_flex_group_data *flex_gd = NULL;
1578 struct ext4_sb_info *sbi = EXT4_SB(sb);
1579 struct ext4_super_block *es = sbi->s_es;
1580 struct buffer_head *bh;
1581 struct inode *resize_inode;
1582 ext4_fsblk_t o_blocks_count;
1583 ext4_group_t o_group;
1584 ext4_group_t n_group;
1585 ext4_grpblk_t offset;
1586 unsigned long n_desc_blocks;
1587 unsigned long o_desc_blocks;
1588 unsigned long desc_blocks;
1589 int err = 0, flexbg_size = 1;
1590
1591 o_blocks_count = ext4_blocks_count(es);
1592
1593 if (test_opt(sb, DEBUG))
1594 printk(KERN_DEBUG "EXT4-fs: resizing filesystem from %llu "
1595 "upto %llu blocks\n", o_blocks_count, n_blocks_count);
1596
1597 if (n_blocks_count < o_blocks_count) {
1598 /* On-line shrinking not supported */
1599 ext4_warning(sb, "can't shrink FS - resize aborted");
1600 return -EINVAL;
1053 } 1601 }
1054 1602
1055 if ((err = ext4_journal_get_write_access(handle, 1603 if (n_blocks_count == o_blocks_count)
1056 EXT4_SB(sb)->s_sbh))) { 1604 /* Nothing need to do */
1057 ext4_warning(sb, "error %d on journal write access", err); 1605 return 0;
1058 ext4_journal_stop(handle); 1606
1059 goto exit_put; 1607 ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset);
1608 ext4_get_group_no_and_offset(sb, o_blocks_count, &o_group, &offset);
1609
1610 n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) /
1611 EXT4_DESC_PER_BLOCK(sb);
1612 o_desc_blocks = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
1613 EXT4_DESC_PER_BLOCK(sb);
1614 desc_blocks = n_desc_blocks - o_desc_blocks;
1615
1616 if (desc_blocks &&
1617 (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE) ||
1618 le16_to_cpu(es->s_reserved_gdt_blocks) < desc_blocks)) {
1619 ext4_warning(sb, "No reserved GDT blocks, can't resize");
1620 return -EPERM;
1060 } 1621 }
1061 ext4_blocks_count_set(es, o_blocks_count + add);
1062 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
1063 o_blocks_count + add);
1064 /* We add the blocks to the bitmap and set the group need init bit */
1065 err = ext4_group_add_blocks(handle, sb, o_blocks_count, add);
1066 ext4_handle_dirty_super(handle, sb);
1067 ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
1068 o_blocks_count + add);
1069 err2 = ext4_journal_stop(handle);
1070 if (!err && err2)
1071 err = err2;
1072 1622
1073 if (err) 1623 resize_inode = ext4_iget(sb, EXT4_RESIZE_INO);
1074 goto exit_put; 1624 if (IS_ERR(resize_inode)) {
1625 ext4_warning(sb, "Error opening resize inode");
1626 return PTR_ERR(resize_inode);
1627 }
1075 1628
1629 /* See if the device is actually as big as what was requested */
1630 bh = sb_bread(sb, n_blocks_count - 1);
1631 if (!bh) {
1632 ext4_warning(sb, "can't read last block, resize aborted");
1633 return -ENOSPC;
1634 }
1635 brelse(bh);
1636
1637 if (offset != 0) {
1638 /* extend the last group */
1639 ext4_grpblk_t add;
1640 add = EXT4_BLOCKS_PER_GROUP(sb) - offset;
1641 err = ext4_group_extend_no_check(sb, o_blocks_count, add);
1642 if (err)
1643 goto out;
1644 }
1645
1646 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
1647 es->s_log_groups_per_flex)
1648 flexbg_size = 1 << es->s_log_groups_per_flex;
1649
1650 o_blocks_count = ext4_blocks_count(es);
1651 if (o_blocks_count == n_blocks_count)
1652 goto out;
1653
1654 flex_gd = alloc_flex_gd(flexbg_size);
1655 if (flex_gd == NULL) {
1656 err = -ENOMEM;
1657 goto out;
1658 }
1659
1660 /* Add flex groups. Note that a regular group is a
1661 * flex group with 1 group.
1662 */
1663 while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count,
1664 flexbg_size)) {
1665 ext4_alloc_group_tables(sb, flex_gd, flexbg_size);
1666 err = ext4_flex_group_add(sb, resize_inode, flex_gd);
1667 if (unlikely(err))
1668 break;
1669 }
1670
1671out:
1672 if (flex_gd)
1673 free_flex_gd(flex_gd);
1674
1675 iput(resize_inode);
1076 if (test_opt(sb, DEBUG)) 1676 if (test_opt(sb, DEBUG))
1077 printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n", 1677 printk(KERN_DEBUG "EXT4-fs: resized filesystem from %llu "
1078 ext4_blocks_count(es)); 1678 "upto %llu blocks\n", o_blocks_count, n_blocks_count);
1079 update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es,
1080 sizeof(struct ext4_super_block));
1081exit_put:
1082 return err; 1679 return err;
1083} /* ext4_group_extend */ 1680}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ed3ce82e2de4..502c61fd7392 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1095,7 +1095,7 @@ static int ext4_show_options(struct seq_file *seq, struct dentry *root)
1095 } 1095 }
1096 if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { 1096 if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) {
1097 seq_printf(seq, ",max_batch_time=%u", 1097 seq_printf(seq, ",max_batch_time=%u",
1098 (unsigned) sbi->s_min_batch_time); 1098 (unsigned) sbi->s_max_batch_time);
1099 } 1099 }
1100 1100
1101 /* 1101 /*
@@ -2005,17 +2005,16 @@ static int ext4_fill_flex_info(struct super_block *sb)
2005 struct ext4_group_desc *gdp = NULL; 2005 struct ext4_group_desc *gdp = NULL;
2006 ext4_group_t flex_group_count; 2006 ext4_group_t flex_group_count;
2007 ext4_group_t flex_group; 2007 ext4_group_t flex_group;
2008 int groups_per_flex = 0; 2008 unsigned int groups_per_flex = 0;
2009 size_t size; 2009 size_t size;
2010 int i; 2010 int i;
2011 2011
2012 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; 2012 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
2013 groups_per_flex = 1 << sbi->s_log_groups_per_flex; 2013 if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
2014
2015 if (groups_per_flex < 2) {
2016 sbi->s_log_groups_per_flex = 0; 2014 sbi->s_log_groups_per_flex = 0;
2017 return 1; 2015 return 1;
2018 } 2016 }
2017 groups_per_flex = 1 << sbi->s_log_groups_per_flex;
2019 2018
2020 /* We allocate both existing and potentially added groups */ 2019 /* We allocate both existing and potentially added groups */
2021 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + 2020 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
@@ -3506,7 +3505,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3506 * of the filesystem. 3505 * of the filesystem.
3507 */ 3506 */
3508 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { 3507 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
3509 ext4_msg(sb, KERN_WARNING, "bad geometry: first data" 3508 ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
3510 "block %u is beyond end of filesystem (%llu)", 3509 "block %u is beyond end of filesystem (%llu)",
3511 le32_to_cpu(es->s_first_data_block), 3510 le32_to_cpu(es->s_first_data_block),
3512 ext4_blocks_count(es)); 3511 ext4_blocks_count(es));
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index b60f9f81e33c..d2a200624af5 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -47,8 +47,9 @@ ext4_xattr_security_set(struct dentry *dentry, const char *name,
47 name, value, size, flags); 47 name, value, size, flags);
48} 48}
49 49
50int ext4_initxattrs(struct inode *inode, const struct xattr *xattr_array, 50static int
51 void *fs_info) 51ext4_initxattrs(struct inode *inode, const struct xattr *xattr_array,
52 void *fs_info)
52{ 53{
53 const struct xattr *xattr; 54 const struct xattr *xattr;
54 handle_t *handle = fs_info; 55 handle_t *handle = fs_info;