aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/ext4.txt7
-rw-r--r--fs/ext4/balloc.c4
-rw-r--r--fs/ext4/ext4.h29
-rw-r--r--fs/ext4/extents.c10
-rw-r--r--fs/ext4/ialloc.c18
-rw-r--r--fs/ext4/inode.c143
-rw-r--r--fs/ext4/ioctl.c86
-rw-r--r--fs/ext4/mballoc.c2
-rw-r--r--fs/ext4/resize.c1175
-rw-r--r--fs/ext4/super.c11
-rw-r--r--fs/ext4/xattr_security.c5
-rw-r--r--fs/jbd2/commit.c6
-rw-r--r--fs/jbd2/revoke.c34
-rw-r--r--fs/jbd2/transaction.c5
-rw-r--r--include/linux/jbd2.h1
-rw-r--r--include/trace/events/ext4.h6
16 files changed, 1079 insertions, 463 deletions
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index 4917cf24a5e0..10ec4639f152 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -581,6 +581,13 @@ Table of Ext4 specific ioctls
581 behaviour may change in the future as it is 581 behaviour may change in the future as it is
582 not necessary and has been done this way only 582 not necessary and has been done this way only
583 for sake of simplicity. 583 for sake of simplicity.
584
585 EXT4_IOC_RESIZE_FS Resize the filesystem to a new size. The number
586 of blocks of resized filesystem is passed in via
587 64 bit integer argument. The kernel allocates
588 bitmaps and inode table, the userspace tool thus
589 just passes the new number of blocks.
590
584.............................................................................. 591..............................................................................
585 592
586References 593References
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 12ccacda44e0..f9e2cd8cf711 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -23,6 +23,8 @@
23 23
24#include <trace/events/ext4.h> 24#include <trace/events/ext4.h>
25 25
26static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
27 ext4_group_t block_group);
26/* 28/*
27 * balloc.c contains the blocks allocation and deallocation routines 29 * balloc.c contains the blocks allocation and deallocation routines
28 */ 30 */
@@ -668,7 +670,7 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group)
668 * This function returns the number of file system metadata clusters at 670 * This function returns the number of file system metadata clusters at
669 * the beginning of a block group, including the reserved gdt blocks. 671 * the beginning of a block group, including the reserved gdt blocks.
670 */ 672 */
671unsigned ext4_num_base_meta_clusters(struct super_block *sb, 673static unsigned ext4_num_base_meta_clusters(struct super_block *sb,
672 ext4_group_t block_group) 674 ext4_group_t block_group)
673{ 675{
674 struct ext4_sb_info *sbi = EXT4_SB(sb); 676 struct ext4_sb_info *sbi = EXT4_SB(sb);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 1554b15f91bc..513004fc3d84 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -511,6 +511,14 @@ struct ext4_new_group_data {
511 __u32 free_blocks_count; 511 __u32 free_blocks_count;
512}; 512};
513 513
514/* Indexes used to index group tables in ext4_new_group_data */
515enum {
516 BLOCK_BITMAP = 0, /* block bitmap */
517 INODE_BITMAP, /* inode bitmap */
518 INODE_TABLE, /* inode tables */
519 GROUP_TABLE_COUNT,
520};
521
514/* 522/*
515 * Flags used by ext4_map_blocks() 523 * Flags used by ext4_map_blocks()
516 */ 524 */
@@ -575,6 +583,7 @@ struct ext4_new_group_data {
575 /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ 583 /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */
576#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12) 584#define EXT4_IOC_ALLOC_DA_BLKS _IO('f', 12)
577#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent) 585#define EXT4_IOC_MOVE_EXT _IOWR('f', 15, struct move_extent)
586#define EXT4_IOC_RESIZE_FS _IOW('f', 16, __u64)
578 587
579#if defined(__KERNEL__) && defined(CONFIG_COMPAT) 588#if defined(__KERNEL__) && defined(CONFIG_COMPAT)
580/* 589/*
@@ -957,12 +966,13 @@ struct ext4_inode_info {
957#define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \ 966#define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \
958 EXT4_MOUNT2_##opt) 967 EXT4_MOUNT2_##opt)
959 968
960#define ext4_set_bit __test_and_set_bit_le 969#define ext4_test_and_set_bit __test_and_set_bit_le
970#define ext4_set_bit __set_bit_le
961#define ext4_set_bit_atomic ext2_set_bit_atomic 971#define ext4_set_bit_atomic ext2_set_bit_atomic
962#define ext4_clear_bit __test_and_clear_bit_le 972#define ext4_test_and_clear_bit __test_and_clear_bit_le
973#define ext4_clear_bit __clear_bit_le
963#define ext4_clear_bit_atomic ext2_clear_bit_atomic 974#define ext4_clear_bit_atomic ext2_clear_bit_atomic
964#define ext4_test_bit test_bit_le 975#define ext4_test_bit test_bit_le
965#define ext4_find_first_zero_bit find_first_zero_bit_le
966#define ext4_find_next_zero_bit find_next_zero_bit_le 976#define ext4_find_next_zero_bit find_next_zero_bit_le
967#define ext4_find_next_bit find_next_bit_le 977#define ext4_find_next_bit find_next_bit_le
968 978
@@ -1397,6 +1407,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1397#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040 1407#define EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE 0x0040
1398#define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100 1408#define EXT4_FEATURE_RO_COMPAT_QUOTA 0x0100
1399#define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200 1409#define EXT4_FEATURE_RO_COMPAT_BIGALLOC 0x0200
1410#define EXT4_FEATURE_RO_COMPAT_METADATA_CSUM 0x0400
1400 1411
1401#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001 1412#define EXT4_FEATURE_INCOMPAT_COMPRESSION 0x0001
1402#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002 1413#define EXT4_FEATURE_INCOMPAT_FILETYPE 0x0002
@@ -1409,6 +1420,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1409#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200 1420#define EXT4_FEATURE_INCOMPAT_FLEX_BG 0x0200
1410#define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */ 1421#define EXT4_FEATURE_INCOMPAT_EA_INODE 0x0400 /* EA in inode */
1411#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */ 1422#define EXT4_FEATURE_INCOMPAT_DIRDATA 0x1000 /* data in dirent */
1423#define EXT4_FEATURE_INCOMPAT_INLINEDATA 0x2000 /* data in inode */
1424#define EXT4_FEATURE_INCOMPAT_LARGEDIR 0x4000 /* >2GB or 3-lvl htree */
1412 1425
1413#define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR 1426#define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR
1414#define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \ 1427#define EXT2_FEATURE_INCOMPAT_SUPP (EXT4_FEATURE_INCOMPAT_FILETYPE| \
@@ -1790,8 +1803,6 @@ extern void ext4_init_block_bitmap(struct super_block *sb,
1790extern unsigned ext4_free_clusters_after_init(struct super_block *sb, 1803extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
1791 ext4_group_t block_group, 1804 ext4_group_t block_group,
1792 struct ext4_group_desc *gdp); 1805 struct ext4_group_desc *gdp);
1793extern unsigned ext4_num_base_meta_clusters(struct super_block *sb,
1794 ext4_group_t block_group);
1795extern unsigned ext4_num_overhead_clusters(struct super_block *sb, 1806extern unsigned ext4_num_overhead_clusters(struct super_block *sb,
1796 ext4_group_t block_group, 1807 ext4_group_t block_group,
1797 struct ext4_group_desc *gdp); 1808 struct ext4_group_desc *gdp);
@@ -1880,16 +1891,9 @@ extern int ext4_alloc_da_blocks(struct inode *inode);
1880extern void ext4_set_aops(struct inode *inode); 1891extern void ext4_set_aops(struct inode *inode);
1881extern int ext4_writepage_trans_blocks(struct inode *); 1892extern int ext4_writepage_trans_blocks(struct inode *);
1882extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); 1893extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
1883extern int ext4_block_truncate_page(handle_t *handle,
1884 struct address_space *mapping, loff_t from);
1885extern int ext4_block_zero_page_range(handle_t *handle,
1886 struct address_space *mapping, loff_t from, loff_t length);
1887extern int ext4_discard_partial_page_buffers(handle_t *handle, 1894extern int ext4_discard_partial_page_buffers(handle_t *handle,
1888 struct address_space *mapping, loff_t from, 1895 struct address_space *mapping, loff_t from,
1889 loff_t length, int flags); 1896 loff_t length, int flags);
1890extern int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
1891 struct inode *inode, struct page *page, loff_t from,
1892 loff_t length, int flags);
1893extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 1897extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
1894extern qsize_t *ext4_get_reserved_space(struct inode *inode); 1898extern qsize_t *ext4_get_reserved_space(struct inode *inode);
1895extern void ext4_da_update_reserve_space(struct inode *inode, 1899extern void ext4_da_update_reserve_space(struct inode *inode,
@@ -1924,6 +1928,7 @@ extern int ext4_group_add(struct super_block *sb,
1924extern int ext4_group_extend(struct super_block *sb, 1928extern int ext4_group_extend(struct super_block *sb,
1925 struct ext4_super_block *es, 1929 struct ext4_super_block *es,
1926 ext4_fsblk_t n_blocks_count); 1930 ext4_fsblk_t n_blocks_count);
1931extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count);
1927 1932
1928/* super.c */ 1933/* super.c */
1929extern void *ext4_kvmalloc(size_t size, gfp_t flags); 1934extern void *ext4_kvmalloc(size_t size, gfp_t flags);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 841faf5fb785..74f23c292e1b 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3280,6 +3280,9 @@ static int ext4_find_delalloc_range(struct inode *inode,
3280 ext4_lblk_t i, pg_lblk; 3280 ext4_lblk_t i, pg_lblk;
3281 pgoff_t index; 3281 pgoff_t index;
3282 3282
3283 if (!test_opt(inode->i_sb, DELALLOC))
3284 return 0;
3285
3283 /* reverse search wont work if fs block size is less than page size */ 3286 /* reverse search wont work if fs block size is less than page size */
3284 if (inode->i_blkbits < PAGE_CACHE_SHIFT) 3287 if (inode->i_blkbits < PAGE_CACHE_SHIFT)
3285 search_hint_reverse = 0; 3288 search_hint_reverse = 0;
@@ -3452,8 +3455,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3452 int err = 0; 3455 int err = 0;
3453 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3456 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
3454 3457
3455 ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical" 3458 ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical "
3456 "block %llu, max_blocks %u, flags %d, allocated %u", 3459 "block %llu, max_blocks %u, flags %x, allocated %u\n",
3457 inode->i_ino, (unsigned long long)map->m_lblk, map->m_len, 3460 inode->i_ino, (unsigned long long)map->m_lblk, map->m_len,
3458 flags, allocated); 3461 flags, allocated);
3459 ext4_ext_show_leaf(inode, path); 3462 ext4_ext_show_leaf(inode, path);
@@ -3624,7 +3627,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
3624 struct ext4_sb_info *sbi = EXT4_SB(sb); 3627 struct ext4_sb_info *sbi = EXT4_SB(sb);
3625 ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1); 3628 ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
3626 ext4_lblk_t ex_cluster_start, ex_cluster_end; 3629 ext4_lblk_t ex_cluster_start, ex_cluster_end;
3627 ext4_lblk_t rr_cluster_start, rr_cluster_end; 3630 ext4_lblk_t rr_cluster_start;
3628 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); 3631 ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
3629 ext4_fsblk_t ee_start = ext4_ext_pblock(ex); 3632 ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
3630 unsigned short ee_len = ext4_ext_get_actual_len(ex); 3633 unsigned short ee_len = ext4_ext_get_actual_len(ex);
@@ -3635,7 +3638,6 @@ static int get_implied_cluster_alloc(struct super_block *sb,
3635 3638
3636 /* The requested region passed into ext4_map_blocks() */ 3639 /* The requested region passed into ext4_map_blocks() */
3637 rr_cluster_start = EXT4_B2C(sbi, map->m_lblk); 3640 rr_cluster_start = EXT4_B2C(sbi, map->m_lblk);
3638 rr_cluster_end = EXT4_B2C(sbi, map->m_lblk + map->m_len - 1);
3639 3641
3640 if ((rr_cluster_start == ex_cluster_end) || 3642 if ((rr_cluster_start == ex_cluster_end) ||
3641 (rr_cluster_start == ex_cluster_start)) { 3643 (rr_cluster_start == ex_cluster_start)) {
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 4637af036d9c..25d8c9781ad9 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -252,7 +252,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
252 fatal = ext4_journal_get_write_access(handle, bh2); 252 fatal = ext4_journal_get_write_access(handle, bh2);
253 } 253 }
254 ext4_lock_group(sb, block_group); 254 ext4_lock_group(sb, block_group);
255 cleared = ext4_clear_bit(bit, bitmap_bh->b_data); 255 cleared = ext4_test_and_clear_bit(bit, bitmap_bh->b_data);
256 if (fatal || !cleared) { 256 if (fatal || !cleared) {
257 ext4_unlock_group(sb, block_group); 257 ext4_unlock_group(sb, block_group);
258 goto out; 258 goto out;
@@ -358,7 +358,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent,
358 struct ext4_sb_info *sbi = EXT4_SB(sb); 358 struct ext4_sb_info *sbi = EXT4_SB(sb);
359 ext4_group_t real_ngroups = ext4_get_groups_count(sb); 359 ext4_group_t real_ngroups = ext4_get_groups_count(sb);
360 int inodes_per_group = EXT4_INODES_PER_GROUP(sb); 360 int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
361 unsigned int freei, avefreei; 361 unsigned int freei, avefreei, grp_free;
362 ext4_fsblk_t freeb, avefreec; 362 ext4_fsblk_t freeb, avefreec;
363 unsigned int ndirs; 363 unsigned int ndirs;
364 int max_dirs, min_inodes; 364 int max_dirs, min_inodes;
@@ -477,8 +477,8 @@ fallback_retry:
477 for (i = 0; i < ngroups; i++) { 477 for (i = 0; i < ngroups; i++) {
478 grp = (parent_group + i) % ngroups; 478 grp = (parent_group + i) % ngroups;
479 desc = ext4_get_group_desc(sb, grp, NULL); 479 desc = ext4_get_group_desc(sb, grp, NULL);
480 if (desc && ext4_free_inodes_count(sb, desc) && 480 grp_free = ext4_free_inodes_count(sb, desc);
481 ext4_free_inodes_count(sb, desc) >= avefreei) { 481 if (desc && grp_free && grp_free >= avefreei) {
482 *group = grp; 482 *group = grp;
483 return 0; 483 return 0;
484 } 484 }
@@ -618,7 +618,7 @@ static int ext4_claim_inode(struct super_block *sb,
618 */ 618 */
619 down_read(&grp->alloc_sem); 619 down_read(&grp->alloc_sem);
620 ext4_lock_group(sb, group); 620 ext4_lock_group(sb, group);
621 if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) { 621 if (ext4_test_and_set_bit(ino, inode_bitmap_bh->b_data)) {
622 /* not a free inode */ 622 /* not a free inode */
623 retval = 1; 623 retval = 1;
624 goto err_ret; 624 goto err_ret;
@@ -885,8 +885,12 @@ got:
885 if (IS_DIRSYNC(inode)) 885 if (IS_DIRSYNC(inode))
886 ext4_handle_sync(handle); 886 ext4_handle_sync(handle);
887 if (insert_inode_locked(inode) < 0) { 887 if (insert_inode_locked(inode) < 0) {
888 err = -EINVAL; 888 /*
889 goto fail_drop; 889 * Likely a bitmap corruption causing inode to be allocated
890 * twice.
891 */
892 err = -EIO;
893 goto fail;
890 } 894 }
891 spin_lock(&sbi->s_next_gen_lock); 895 spin_lock(&sbi->s_next_gen_lock);
892 inode->i_generation = sbi->s_next_generation++; 896 inode->i_generation = sbi->s_next_generation++;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index aa8efa6572d6..feaa82fe629d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -71,6 +71,9 @@ static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
71static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); 71static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
72static int __ext4_journalled_writepage(struct page *page, unsigned int len); 72static int __ext4_journalled_writepage(struct page *page, unsigned int len);
73static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); 73static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
74static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
75 struct inode *inode, struct page *page, loff_t from,
76 loff_t length, int flags);
74 77
75/* 78/*
76 * Test whether an inode is a fast symlink. 79 * Test whether an inode is a fast symlink.
@@ -2759,7 +2762,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
2759 if (!io_end || !size) 2762 if (!io_end || !size)
2760 goto out; 2763 goto out;
2761 2764
2762 ext_debug("ext4_end_io_dio(): io_end 0x%p" 2765 ext_debug("ext4_end_io_dio(): io_end 0x%p "
2763 "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", 2766 "for inode %lu, iocb 0x%p, offset %llu, size %llu\n",
2764 iocb->private, io_end->inode->i_ino, iocb, offset, 2767 iocb->private, io_end->inode->i_ino, iocb, offset,
2765 size); 2768 size);
@@ -3160,7 +3163,7 @@ int ext4_discard_partial_page_buffers(handle_t *handle,
3160 * 3163 *
3161 * Returns zero on sucess or negative on failure. 3164 * Returns zero on sucess or negative on failure.
3162 */ 3165 */
3163int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, 3166static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle,
3164 struct inode *inode, struct page *page, loff_t from, 3167 struct inode *inode, struct page *page, loff_t from,
3165 loff_t length, int flags) 3168 loff_t length, int flags)
3166{ 3169{
@@ -3300,126 +3303,6 @@ next:
3300 return err; 3303 return err;
3301} 3304}
3302 3305
3303/*
3304 * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
3305 * up to the end of the block which corresponds to `from'.
3306 * This required during truncate. We need to physically zero the tail end
3307 * of that block so it doesn't yield old data if the file is later grown.
3308 */
3309int ext4_block_truncate_page(handle_t *handle,
3310 struct address_space *mapping, loff_t from)
3311{
3312 unsigned offset = from & (PAGE_CACHE_SIZE-1);
3313 unsigned length;
3314 unsigned blocksize;
3315 struct inode *inode = mapping->host;
3316
3317 blocksize = inode->i_sb->s_blocksize;
3318 length = blocksize - (offset & (blocksize - 1));
3319
3320 return ext4_block_zero_page_range(handle, mapping, from, length);
3321}
3322
3323/*
3324 * ext4_block_zero_page_range() zeros out a mapping of length 'length'
3325 * starting from file offset 'from'. The range to be zero'd must
3326 * be contained with in one block. If the specified range exceeds
3327 * the end of the block it will be shortened to end of the block
3328 * that cooresponds to 'from'
3329 */
3330int ext4_block_zero_page_range(handle_t *handle,
3331 struct address_space *mapping, loff_t from, loff_t length)
3332{
3333 ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
3334 unsigned offset = from & (PAGE_CACHE_SIZE-1);
3335 unsigned blocksize, max, pos;
3336 ext4_lblk_t iblock;
3337 struct inode *inode = mapping->host;
3338 struct buffer_head *bh;
3339 struct page *page;
3340 int err = 0;
3341
3342 page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
3343 mapping_gfp_mask(mapping) & ~__GFP_FS);
3344 if (!page)
3345 return -ENOMEM;
3346
3347 blocksize = inode->i_sb->s_blocksize;
3348 max = blocksize - (offset & (blocksize - 1));
3349
3350 /*
3351 * correct length if it does not fall between
3352 * 'from' and the end of the block
3353 */
3354 if (length > max || length < 0)
3355 length = max;
3356
3357 iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
3358
3359 if (!page_has_buffers(page))
3360 create_empty_buffers(page, blocksize, 0);
3361
3362 /* Find the buffer that contains "offset" */
3363 bh = page_buffers(page);
3364 pos = blocksize;
3365 while (offset >= pos) {
3366 bh = bh->b_this_page;
3367 iblock++;
3368 pos += blocksize;
3369 }
3370
3371 err = 0;
3372 if (buffer_freed(bh)) {
3373 BUFFER_TRACE(bh, "freed: skip");
3374 goto unlock;
3375 }
3376
3377 if (!buffer_mapped(bh)) {
3378 BUFFER_TRACE(bh, "unmapped");
3379 ext4_get_block(inode, iblock, bh, 0);
3380 /* unmapped? It's a hole - nothing to do */
3381 if (!buffer_mapped(bh)) {
3382 BUFFER_TRACE(bh, "still unmapped");
3383 goto unlock;
3384 }
3385 }
3386
3387 /* Ok, it's mapped. Make sure it's up-to-date */
3388 if (PageUptodate(page))
3389 set_buffer_uptodate(bh);
3390
3391 if (!buffer_uptodate(bh)) {
3392 err = -EIO;
3393 ll_rw_block(READ, 1, &bh);
3394 wait_on_buffer(bh);
3395 /* Uhhuh. Read error. Complain and punt. */
3396 if (!buffer_uptodate(bh))
3397 goto unlock;
3398 }
3399
3400 if (ext4_should_journal_data(inode)) {
3401 BUFFER_TRACE(bh, "get write access");
3402 err = ext4_journal_get_write_access(handle, bh);
3403 if (err)
3404 goto unlock;
3405 }
3406
3407 zero_user(page, offset, length);
3408
3409 BUFFER_TRACE(bh, "zeroed end of block");
3410
3411 err = 0;
3412 if (ext4_should_journal_data(inode)) {
3413 err = ext4_handle_dirty_metadata(handle, inode, bh);
3414 } else
3415 mark_buffer_dirty(bh);
3416
3417unlock:
3418 unlock_page(page);
3419 page_cache_release(page);
3420 return err;
3421}
3422
3423int ext4_can_truncate(struct inode *inode) 3306int ext4_can_truncate(struct inode *inode)
3424{ 3307{
3425 if (S_ISREG(inode->i_mode)) 3308 if (S_ISREG(inode->i_mode))
@@ -4646,9 +4529,19 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
4646 return 0; 4529 return 0;
4647 if (is_journal_aborted(journal)) 4530 if (is_journal_aborted(journal))
4648 return -EROFS; 4531 return -EROFS;
4532 /* We have to allocate physical blocks for delalloc blocks
4533 * before flushing journal. otherwise delalloc blocks can not
4534 * be allocated any more. even more truncate on delalloc blocks
4535 * could trigger BUG by flushing delalloc blocks in journal.
4536 * There is no delalloc block in non-journal data mode.
4537 */
4538 if (val && test_opt(inode->i_sb, DELALLOC)) {
4539 err = ext4_alloc_da_blocks(inode);
4540 if (err < 0)
4541 return err;
4542 }
4649 4543
4650 jbd2_journal_lock_updates(journal); 4544 jbd2_journal_lock_updates(journal);
4651 jbd2_journal_flush(journal);
4652 4545
4653 /* 4546 /*
4654 * OK, there are no updates running now, and all cached data is 4547 * OK, there are no updates running now, and all cached data is
@@ -4660,8 +4553,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
4660 4553
4661 if (val) 4554 if (val)
4662 ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); 4555 ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
4663 else 4556 else {
4557 jbd2_journal_flush(journal);
4664 ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); 4558 ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
4559 }
4665 ext4_set_aops(inode); 4560 ext4_set_aops(inode);
4666 4561
4667 jbd2_journal_unlock_updates(journal); 4562 jbd2_journal_unlock_updates(journal);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index e87a932b073b..6eee25591b81 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -18,6 +18,8 @@
18#include "ext4_jbd2.h" 18#include "ext4_jbd2.h"
19#include "ext4.h" 19#include "ext4.h"
20 20
21#define MAX_32_NUM ((((unsigned long long) 1) << 32) - 1)
22
21long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 23long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
22{ 24{
23 struct inode *inode = filp->f_dentry->d_inode; 25 struct inode *inode = filp->f_dentry->d_inode;
@@ -186,19 +188,22 @@ setversion_out:
186 if (err) 188 if (err)
187 return err; 189 return err;
188 190
189 if (get_user(n_blocks_count, (__u32 __user *)arg)) 191 if (get_user(n_blocks_count, (__u32 __user *)arg)) {
190 return -EFAULT; 192 err = -EFAULT;
193 goto group_extend_out;
194 }
191 195
192 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 196 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
193 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { 197 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
194 ext4_msg(sb, KERN_ERR, 198 ext4_msg(sb, KERN_ERR,
195 "Online resizing not supported with bigalloc"); 199 "Online resizing not supported with bigalloc");
196 return -EOPNOTSUPP; 200 err = -EOPNOTSUPP;
201 goto group_extend_out;
197 } 202 }
198 203
199 err = mnt_want_write_file(filp); 204 err = mnt_want_write_file(filp);
200 if (err) 205 if (err)
201 return err; 206 goto group_extend_out;
202 207
203 err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); 208 err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
204 if (EXT4_SB(sb)->s_journal) { 209 if (EXT4_SB(sb)->s_journal) {
@@ -209,8 +214,8 @@ setversion_out:
209 if (err == 0) 214 if (err == 0)
210 err = err2; 215 err = err2;
211 mnt_drop_write_file(filp); 216 mnt_drop_write_file(filp);
217group_extend_out:
212 ext4_resize_end(sb); 218 ext4_resize_end(sb);
213
214 return err; 219 return err;
215 } 220 }
216 221
@@ -251,8 +256,7 @@ setversion_out:
251 err = ext4_move_extents(filp, donor_filp, me.orig_start, 256 err = ext4_move_extents(filp, donor_filp, me.orig_start,
252 me.donor_start, me.len, &me.moved_len); 257 me.donor_start, me.len, &me.moved_len);
253 mnt_drop_write_file(filp); 258 mnt_drop_write_file(filp);
254 if (me.moved_len > 0) 259 mnt_drop_write(filp->f_path.mnt);
255 file_remove_suid(donor_filp);
256 260
257 if (copy_to_user((struct move_extent __user *)arg, 261 if (copy_to_user((struct move_extent __user *)arg,
258 &me, sizeof(me))) 262 &me, sizeof(me)))
@@ -271,19 +275,22 @@ mext_out:
271 return err; 275 return err;
272 276
273 if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg, 277 if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg,
274 sizeof(input))) 278 sizeof(input))) {
275 return -EFAULT; 279 err = -EFAULT;
280 goto group_add_out;
281 }
276 282
277 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 283 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
278 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) { 284 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
279 ext4_msg(sb, KERN_ERR, 285 ext4_msg(sb, KERN_ERR,
280 "Online resizing not supported with bigalloc"); 286 "Online resizing not supported with bigalloc");
281 return -EOPNOTSUPP; 287 err = -EOPNOTSUPP;
288 goto group_add_out;
282 } 289 }
283 290
284 err = mnt_want_write_file(filp); 291 err = mnt_want_write_file(filp);
285 if (err) 292 if (err)
286 return err; 293 goto group_add_out;
287 294
288 err = ext4_group_add(sb, &input); 295 err = ext4_group_add(sb, &input);
289 if (EXT4_SB(sb)->s_journal) { 296 if (EXT4_SB(sb)->s_journal) {
@@ -294,8 +301,8 @@ mext_out:
294 if (err == 0) 301 if (err == 0)
295 err = err2; 302 err = err2;
296 mnt_drop_write_file(filp); 303 mnt_drop_write_file(filp);
304group_add_out:
297 ext4_resize_end(sb); 305 ext4_resize_end(sb);
298
299 return err; 306 return err;
300 } 307 }
301 308
@@ -335,6 +342,60 @@ mext_out:
335 return err; 342 return err;
336 } 343 }
337 344
345 case EXT4_IOC_RESIZE_FS: {
346 ext4_fsblk_t n_blocks_count;
347 struct super_block *sb = inode->i_sb;
348 int err = 0, err2 = 0;
349
350 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
351 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
352 ext4_msg(sb, KERN_ERR,
353 "Online resizing not (yet) supported with bigalloc");
354 return -EOPNOTSUPP;
355 }
356
357 if (EXT4_HAS_INCOMPAT_FEATURE(sb,
358 EXT4_FEATURE_INCOMPAT_META_BG)) {
359 ext4_msg(sb, KERN_ERR,
360 "Online resizing not (yet) supported with meta_bg");
361 return -EOPNOTSUPP;
362 }
363
364 if (copy_from_user(&n_blocks_count, (__u64 __user *)arg,
365 sizeof(__u64))) {
366 return -EFAULT;
367 }
368
369 if (n_blocks_count > MAX_32_NUM &&
370 !EXT4_HAS_INCOMPAT_FEATURE(sb,
371 EXT4_FEATURE_INCOMPAT_64BIT)) {
372 ext4_msg(sb, KERN_ERR,
373 "File system only supports 32-bit block numbers");
374 return -EOPNOTSUPP;
375 }
376
377 err = ext4_resize_begin(sb);
378 if (err)
379 return err;
380
381 err = mnt_want_write(filp->f_path.mnt);
382 if (err)
383 goto resizefs_out;
384
385 err = ext4_resize_fs(sb, n_blocks_count);
386 if (EXT4_SB(sb)->s_journal) {
387 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
388 err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
389 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
390 }
391 if (err == 0)
392 err = err2;
393 mnt_drop_write(filp->f_path.mnt);
394resizefs_out:
395 ext4_resize_end(sb);
396 return err;
397 }
398
338 case FITRIM: 399 case FITRIM:
339 { 400 {
340 struct request_queue *q = bdev_get_queue(sb->s_bdev); 401 struct request_queue *q = bdev_get_queue(sb->s_bdev);
@@ -433,6 +494,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
433 } 494 }
434 case EXT4_IOC_MOVE_EXT: 495 case EXT4_IOC_MOVE_EXT:
435 case FITRIM: 496 case FITRIM:
497 case EXT4_IOC_RESIZE_FS:
436 break; 498 break;
437 default: 499 default:
438 return -ENOIOCTLCMD; 500 return -ENOIOCTLCMD;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index e2d8be8f28bf..cb990b21c698 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3671,7 +3671,7 @@ ext4_mb_release_group_pa(struct ext4_buddy *e4b,
3671 ext4_group_t group; 3671 ext4_group_t group;
3672 ext4_grpblk_t bit; 3672 ext4_grpblk_t bit;
3673 3673
3674 trace_ext4_mb_release_group_pa(pa); 3674 trace_ext4_mb_release_group_pa(sb, pa);
3675 BUG_ON(pa->pa_deleted == 0); 3675 BUG_ON(pa->pa_deleted == 0);
3676 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit); 3676 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
3677 BUG_ON(group != e4b->bd_group && pa->pa_len != 0); 3677 BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 996780ab4f4e..f9d948f0eb86 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -134,6 +134,172 @@ static int verify_group_input(struct super_block *sb,
134 return err; 134 return err;
135} 135}
136 136
137/*
138 * ext4_new_flex_group_data is used by 64bit-resize interface to add a flex
139 * group each time.
140 */
141struct ext4_new_flex_group_data {
142 struct ext4_new_group_data *groups; /* new_group_data for groups
143 in the flex group */
144 __u16 *bg_flags; /* block group flags of groups
145 in @groups */
146 ext4_group_t count; /* number of groups in @groups
147 */
148};
149
150/*
151 * alloc_flex_gd() allocates a ext4_new_flex_group_data with size of
152 * @flexbg_size.
153 *
154 * Returns NULL on failure otherwise address of the allocated structure.
155 */
156static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size)
157{
158 struct ext4_new_flex_group_data *flex_gd;
159
160 flex_gd = kmalloc(sizeof(*flex_gd), GFP_NOFS);
161 if (flex_gd == NULL)
162 goto out3;
163
164 flex_gd->count = flexbg_size;
165
166 flex_gd->groups = kmalloc(sizeof(struct ext4_new_group_data) *
167 flexbg_size, GFP_NOFS);
168 if (flex_gd->groups == NULL)
169 goto out2;
170
171 flex_gd->bg_flags = kmalloc(flexbg_size * sizeof(__u16), GFP_NOFS);
172 if (flex_gd->bg_flags == NULL)
173 goto out1;
174
175 return flex_gd;
176
177out1:
178 kfree(flex_gd->groups);
179out2:
180 kfree(flex_gd);
181out3:
182 return NULL;
183}
184
185static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd)
186{
187 kfree(flex_gd->bg_flags);
188 kfree(flex_gd->groups);
189 kfree(flex_gd);
190}
191
192/*
193 * ext4_alloc_group_tables() allocates block bitmaps, inode bitmaps
194 * and inode tables for a flex group.
195 *
196 * This function is used by 64bit-resize. Note that this function allocates
197 * group tables from the 1st group of groups contained by @flexgd, which may
198 * be a partial of a flex group.
199 *
200 * @sb: super block of fs to which the groups belongs
201 */
202static void ext4_alloc_group_tables(struct super_block *sb,
203 struct ext4_new_flex_group_data *flex_gd,
204 int flexbg_size)
205{
206 struct ext4_new_group_data *group_data = flex_gd->groups;
207 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
208 ext4_fsblk_t start_blk;
209 ext4_fsblk_t last_blk;
210 ext4_group_t src_group;
211 ext4_group_t bb_index = 0;
212 ext4_group_t ib_index = 0;
213 ext4_group_t it_index = 0;
214 ext4_group_t group;
215 ext4_group_t last_group;
216 unsigned overhead;
217
218 BUG_ON(flex_gd->count == 0 || group_data == NULL);
219
220 src_group = group_data[0].group;
221 last_group = src_group + flex_gd->count - 1;
222
223 BUG_ON((flexbg_size > 1) && ((src_group & ~(flexbg_size - 1)) !=
224 (last_group & ~(flexbg_size - 1))));
225next_group:
226 group = group_data[0].group;
227 start_blk = ext4_group_first_block_no(sb, src_group);
228 last_blk = start_blk + group_data[src_group - group].blocks_count;
229
230 overhead = ext4_bg_has_super(sb, src_group) ?
231 (1 + ext4_bg_num_gdb(sb, src_group) +
232 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
233
234 start_blk += overhead;
235
236 BUG_ON(src_group >= group_data[0].group + flex_gd->count);
237 /* We collect contiguous blocks as much as possible. */
238 src_group++;
239 for (; src_group <= last_group; src_group++)
240 if (!ext4_bg_has_super(sb, src_group))
241 last_blk += group_data[src_group - group].blocks_count;
242 else
243 break;
244
245 /* Allocate block bitmaps */
246 for (; bb_index < flex_gd->count; bb_index++) {
247 if (start_blk >= last_blk)
248 goto next_group;
249 group_data[bb_index].block_bitmap = start_blk++;
250 ext4_get_group_no_and_offset(sb, start_blk - 1, &group, NULL);
251 group -= group_data[0].group;
252 group_data[group].free_blocks_count--;
253 if (flexbg_size > 1)
254 flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
255 }
256
257 /* Allocate inode bitmaps */
258 for (; ib_index < flex_gd->count; ib_index++) {
259 if (start_blk >= last_blk)
260 goto next_group;
261 group_data[ib_index].inode_bitmap = start_blk++;
262 ext4_get_group_no_and_offset(sb, start_blk - 1, &group, NULL);
263 group -= group_data[0].group;
264 group_data[group].free_blocks_count--;
265 if (flexbg_size > 1)
266 flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
267 }
268
269 /* Allocate inode tables */
270 for (; it_index < flex_gd->count; it_index++) {
271 if (start_blk + EXT4_SB(sb)->s_itb_per_group > last_blk)
272 goto next_group;
273 group_data[it_index].inode_table = start_blk;
274 ext4_get_group_no_and_offset(sb, start_blk, &group, NULL);
275 group -= group_data[0].group;
276 group_data[group].free_blocks_count -=
277 EXT4_SB(sb)->s_itb_per_group;
278 if (flexbg_size > 1)
279 flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
280
281 start_blk += EXT4_SB(sb)->s_itb_per_group;
282 }
283
284 if (test_opt(sb, DEBUG)) {
285 int i;
286 group = group_data[0].group;
287
288 printk(KERN_DEBUG "EXT4-fs: adding a flex group with "
289 "%d groups, flexbg size is %d:\n", flex_gd->count,
290 flexbg_size);
291
292 for (i = 0; i < flex_gd->count; i++) {
293 printk(KERN_DEBUG "adding %s group %u: %u "
294 "blocks (%d free)\n",
295 ext4_bg_has_super(sb, group + i) ? "normal" :
296 "no-super", group + i,
297 group_data[i].blocks_count,
298 group_data[i].free_blocks_count);
299 }
300 }
301}
302
137static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, 303static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
138 ext4_fsblk_t blk) 304 ext4_fsblk_t blk)
139{ 305{
@@ -179,131 +345,250 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh)
179} 345}
180 346
181/* 347/*
182 * Set up the block and inode bitmaps, and the inode table for the new group. 348 * set_flexbg_block_bitmap() mark @count blocks starting from @block used.
349 *
350 * Helper function for ext4_setup_new_group_blocks() which set .
351 *
352 * @sb: super block
353 * @handle: journal handle
354 * @flex_gd: flex group data
355 */
356static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
357 struct ext4_new_flex_group_data *flex_gd,
358 ext4_fsblk_t block, ext4_group_t count)
359{
360 ext4_group_t count2;
361
362 ext4_debug("mark blocks [%llu/%u] used\n", block, count);
363 for (count2 = count; count > 0; count -= count2, block += count2) {
364 ext4_fsblk_t start;
365 struct buffer_head *bh;
366 ext4_group_t group;
367 int err;
368
369 ext4_get_group_no_and_offset(sb, block, &group, NULL);
370 start = ext4_group_first_block_no(sb, group);
371 group -= flex_gd->groups[0].group;
372
373 count2 = sb->s_blocksize * 8 - (block - start);
374 if (count2 > count)
375 count2 = count;
376
377 if (flex_gd->bg_flags[group] & EXT4_BG_BLOCK_UNINIT) {
378 BUG_ON(flex_gd->count > 1);
379 continue;
380 }
381
382 err = extend_or_restart_transaction(handle, 1);
383 if (err)
384 return err;
385
386 bh = sb_getblk(sb, flex_gd->groups[group].block_bitmap);
387 if (!bh)
388 return -EIO;
389
390 err = ext4_journal_get_write_access(handle, bh);
391 if (err)
392 return err;
393 ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", block,
394 block - start, count2);
395 ext4_set_bits(bh->b_data, block - start, count2);
396
397 err = ext4_handle_dirty_metadata(handle, NULL, bh);
398 if (unlikely(err))
399 return err;
400 brelse(bh);
401 }
402
403 return 0;
404}
405
406/*
407 * Set up the block and inode bitmaps, and the inode table for the new groups.
183 * This doesn't need to be part of the main transaction, since we are only 408 * This doesn't need to be part of the main transaction, since we are only
184 * changing blocks outside the actual filesystem. We still do journaling to 409 * changing blocks outside the actual filesystem. We still do journaling to
185 * ensure the recovery is correct in case of a failure just after resize. 410 * ensure the recovery is correct in case of a failure just after resize.
186 * If any part of this fails, we simply abort the resize. 411 * If any part of this fails, we simply abort the resize.
412 *
413 * setup_new_flex_group_blocks handles a flex group as follow:
414 * 1. copy super block and GDT, and initialize group tables if necessary.
415 * In this step, we only set bits in blocks bitmaps for blocks taken by
416 * super block and GDT.
417 * 2. allocate group tables in block bitmaps, that is, set bits in block
418 * bitmap for blocks taken by group tables.
187 */ 419 */
188static int setup_new_group_blocks(struct super_block *sb, 420static int setup_new_flex_group_blocks(struct super_block *sb,
189 struct ext4_new_group_data *input) 421 struct ext4_new_flex_group_data *flex_gd)
190{ 422{
423 int group_table_count[] = {1, 1, EXT4_SB(sb)->s_itb_per_group};
424 ext4_fsblk_t start;
425 ext4_fsblk_t block;
191 struct ext4_sb_info *sbi = EXT4_SB(sb); 426 struct ext4_sb_info *sbi = EXT4_SB(sb);
192 ext4_fsblk_t start = ext4_group_first_block_no(sb, input->group); 427 struct ext4_super_block *es = sbi->s_es;
193 int reserved_gdb = ext4_bg_has_super(sb, input->group) ? 428 struct ext4_new_group_data *group_data = flex_gd->groups;
194 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0; 429 __u16 *bg_flags = flex_gd->bg_flags;
195 unsigned long gdblocks = ext4_bg_num_gdb(sb, input->group);
196 struct buffer_head *bh;
197 handle_t *handle; 430 handle_t *handle;
198 ext4_fsblk_t block; 431 ext4_group_t group, count;
199 ext4_grpblk_t bit; 432 struct buffer_head *bh = NULL;
200 int i; 433 int reserved_gdb, i, j, err = 0, err2;
201 int err = 0, err2; 434
435 BUG_ON(!flex_gd->count || !group_data ||
436 group_data[0].group != sbi->s_groups_count);
437
438 reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks);
202 439
203 /* This transaction may be extended/restarted along the way */ 440 /* This transaction may be extended/restarted along the way */
204 handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); 441 handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA);
205
206 if (IS_ERR(handle)) 442 if (IS_ERR(handle))
207 return PTR_ERR(handle); 443 return PTR_ERR(handle);
208 444
209 BUG_ON(input->group != sbi->s_groups_count); 445 group = group_data[0].group;
446 for (i = 0; i < flex_gd->count; i++, group++) {
447 unsigned long gdblocks;
210 448
211 /* Copy all of the GDT blocks into the backup in this group */ 449 gdblocks = ext4_bg_num_gdb(sb, group);
212 for (i = 0, bit = 1, block = start + 1; 450 start = ext4_group_first_block_no(sb, group);
213 i < gdblocks; i++, block++, bit++) {
214 struct buffer_head *gdb;
215 451
216 ext4_debug("update backup group %#04llx (+%d)\n", block, bit); 452 /* Copy all of the GDT blocks into the backup in this group */
217 err = extend_or_restart_transaction(handle, 1); 453 for (j = 0, block = start + 1; j < gdblocks; j++, block++) {
218 if (err) 454 struct buffer_head *gdb;
219 goto exit_journal;
220 455
221 gdb = sb_getblk(sb, block); 456 ext4_debug("update backup group %#04llx\n", block);
222 if (!gdb) { 457 err = extend_or_restart_transaction(handle, 1);
223 err = -EIO; 458 if (err)
224 goto exit_journal; 459 goto out;
225 } 460
226 if ((err = ext4_journal_get_write_access(handle, gdb))) { 461 gdb = sb_getblk(sb, block);
462 if (!gdb) {
463 err = -EIO;
464 goto out;
465 }
466
467 err = ext4_journal_get_write_access(handle, gdb);
468 if (err) {
469 brelse(gdb);
470 goto out;
471 }
472 memcpy(gdb->b_data, sbi->s_group_desc[j]->b_data,
473 gdb->b_size);
474 set_buffer_uptodate(gdb);
475
476 err = ext4_handle_dirty_metadata(handle, NULL, gdb);
477 if (unlikely(err)) {
478 brelse(gdb);
479 goto out;
480 }
227 brelse(gdb); 481 brelse(gdb);
228 goto exit_journal;
229 } 482 }
230 memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, gdb->b_size); 483
231 set_buffer_uptodate(gdb); 484 /* Zero out all of the reserved backup group descriptor
232 err = ext4_handle_dirty_metadata(handle, NULL, gdb); 485 * table blocks
233 if (unlikely(err)) { 486 */
234 brelse(gdb); 487 if (ext4_bg_has_super(sb, group)) {
235 goto exit_journal; 488 err = sb_issue_zeroout(sb, gdblocks + start + 1,
489 reserved_gdb, GFP_NOFS);
490 if (err)
491 goto out;
236 } 492 }
237 brelse(gdb);
238 }
239 493
240 /* Zero out all of the reserved backup group descriptor table blocks */ 494 /* Initialize group tables of the grop @group */
241 ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", 495 if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED))
242 block, sbi->s_itb_per_group); 496 goto handle_bb;
243 err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb,
244 GFP_NOFS);
245 if (err)
246 goto exit_journal;
247 497
248 err = extend_or_restart_transaction(handle, 2); 498 /* Zero out all of the inode table blocks */
249 if (err) 499 block = group_data[i].inode_table;
250 goto exit_journal; 500 ext4_debug("clear inode table blocks %#04llx -> %#04lx\n",
501 block, sbi->s_itb_per_group);
502 err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group,
503 GFP_NOFS);
504 if (err)
505 goto out;
251 506
252 bh = bclean(handle, sb, input->block_bitmap); 507handle_bb:
253 if (IS_ERR(bh)) { 508 if (bg_flags[i] & EXT4_BG_BLOCK_UNINIT)
254 err = PTR_ERR(bh); 509 goto handle_ib;
255 goto exit_journal;
256 }
257 510
258 if (ext4_bg_has_super(sb, input->group)) { 511 /* Initialize block bitmap of the @group */
259 ext4_debug("mark backup group tables %#04llx (+0)\n", start); 512 block = group_data[i].block_bitmap;
260 ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + 1); 513 err = extend_or_restart_transaction(handle, 1);
261 } 514 if (err)
515 goto out;
262 516
263 ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap, 517 bh = bclean(handle, sb, block);
264 input->block_bitmap - start); 518 if (IS_ERR(bh)) {
265 ext4_set_bit(input->block_bitmap - start, bh->b_data); 519 err = PTR_ERR(bh);
266 ext4_debug("mark inode bitmap %#04llx (+%llu)\n", input->inode_bitmap, 520 goto out;
267 input->inode_bitmap - start); 521 }
268 ext4_set_bit(input->inode_bitmap - start, bh->b_data); 522 if (ext4_bg_has_super(sb, group)) {
269 523 ext4_debug("mark backup superblock %#04llx (+0)\n",
270 /* Zero out all of the inode table blocks */ 524 start);
271 block = input->inode_table; 525 ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb +
272 ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", 526 1);
273 block, sbi->s_itb_per_group); 527 }
274 err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); 528 ext4_mark_bitmap_end(group_data[i].blocks_count,
275 if (err) 529 sb->s_blocksize * 8, bh->b_data);
276 goto exit_bh; 530 err = ext4_handle_dirty_metadata(handle, NULL, bh);
277 ext4_set_bits(bh->b_data, input->inode_table - start, 531 if (err)
278 sbi->s_itb_per_group); 532 goto out;
533 brelse(bh);
279 534
535handle_ib:
536 if (bg_flags[i] & EXT4_BG_INODE_UNINIT)
537 continue;
280 538
281 ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, 539 /* Initialize inode bitmap of the @group */
282 bh->b_data); 540 block = group_data[i].inode_bitmap;
283 err = ext4_handle_dirty_metadata(handle, NULL, bh); 541 err = extend_or_restart_transaction(handle, 1);
284 if (unlikely(err)) { 542 if (err)
285 ext4_std_error(sb, err); 543 goto out;
286 goto exit_bh; 544 /* Mark unused entries in inode bitmap used */
545 bh = bclean(handle, sb, block);
546 if (IS_ERR(bh)) {
547 err = PTR_ERR(bh);
548 goto out;
549 }
550
551 ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb),
552 sb->s_blocksize * 8, bh->b_data);
553 err = ext4_handle_dirty_metadata(handle, NULL, bh);
554 if (err)
555 goto out;
556 brelse(bh);
287 } 557 }
288 brelse(bh); 558 bh = NULL;
289 /* Mark unused entries in inode bitmap used */ 559
290 ext4_debug("clear inode bitmap %#04llx (+%llu)\n", 560 /* Mark group tables in block bitmap */
291 input->inode_bitmap, input->inode_bitmap - start); 561 for (j = 0; j < GROUP_TABLE_COUNT; j++) {
292 if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) { 562 count = group_table_count[j];
293 err = PTR_ERR(bh); 563 start = (&group_data[0].block_bitmap)[j];
294 goto exit_journal; 564 block = start;
565 for (i = 1; i < flex_gd->count; i++) {
566 block += group_table_count[j];
567 if (block == (&group_data[i].block_bitmap)[j]) {
568 count += group_table_count[j];
569 continue;
570 }
571 err = set_flexbg_block_bitmap(sb, handle,
572 flex_gd, start, count);
573 if (err)
574 goto out;
575 count = group_table_count[j];
576 start = group_data[i].block_bitmap;
577 block = start;
578 }
579
580 if (count) {
581 err = set_flexbg_block_bitmap(sb, handle,
582 flex_gd, start, count);
583 if (err)
584 goto out;
585 }
295 } 586 }
296 587
297 ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, 588out:
298 bh->b_data);
299 err = ext4_handle_dirty_metadata(handle, NULL, bh);
300 if (unlikely(err))
301 ext4_std_error(sb, err);
302exit_bh:
303 brelse(bh); 589 brelse(bh);
304 590 err2 = ext4_journal_stop(handle);
305exit_journal: 591 if (err2 && !err)
306 if ((err2 = ext4_journal_stop(handle)) && !err)
307 err = err2; 592 err = err2;
308 593
309 return err; 594 return err;
@@ -351,10 +636,10 @@ static unsigned ext4_list_backups(struct super_block *sb, unsigned *three,
351 * groups in current filesystem that have BACKUPS, or -ve error code. 636 * groups in current filesystem that have BACKUPS, or -ve error code.
352 */ 637 */
353static int verify_reserved_gdb(struct super_block *sb, 638static int verify_reserved_gdb(struct super_block *sb,
639 ext4_group_t end,
354 struct buffer_head *primary) 640 struct buffer_head *primary)
355{ 641{
356 const ext4_fsblk_t blk = primary->b_blocknr; 642 const ext4_fsblk_t blk = primary->b_blocknr;
357 const ext4_group_t end = EXT4_SB(sb)->s_groups_count;
358 unsigned three = 1; 643 unsigned three = 1;
359 unsigned five = 5; 644 unsigned five = 5;
360 unsigned seven = 7; 645 unsigned seven = 7;
@@ -429,7 +714,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
429 if (!gdb_bh) 714 if (!gdb_bh)
430 return -EIO; 715 return -EIO;
431 716
432 gdbackups = verify_reserved_gdb(sb, gdb_bh); 717 gdbackups = verify_reserved_gdb(sb, group, gdb_bh);
433 if (gdbackups < 0) { 718 if (gdbackups < 0) {
434 err = gdbackups; 719 err = gdbackups;
435 goto exit_bh; 720 goto exit_bh;
@@ -592,7 +877,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
592 err = -EIO; 877 err = -EIO;
593 goto exit_bh; 878 goto exit_bh;
594 } 879 }
595 if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) { 880 gdbackups = verify_reserved_gdb(sb, group, primary[res]);
881 if (gdbackups < 0) {
596 brelse(primary[res]); 882 brelse(primary[res]);
597 err = gdbackups; 883 err = gdbackups;
598 goto exit_bh; 884 goto exit_bh;
@@ -735,6 +1021,348 @@ exit_err:
735 } 1021 }
736} 1022}
737 1023
1024/*
1025 * ext4_add_new_descs() adds @count group descriptor of groups
1026 * starting at @group
1027 *
1028 * @handle: journal handle
1029 * @sb: super block
1030 * @group: the group no. of the first group desc to be added
1031 * @resize_inode: the resize inode
1032 * @count: number of group descriptors to be added
1033 */
1034static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
1035 ext4_group_t group, struct inode *resize_inode,
1036 ext4_group_t count)
1037{
1038 struct ext4_sb_info *sbi = EXT4_SB(sb);
1039 struct ext4_super_block *es = sbi->s_es;
1040 struct buffer_head *gdb_bh;
1041 int i, gdb_off, gdb_num, err = 0;
1042
1043 for (i = 0; i < count; i++, group++) {
1044 int reserved_gdb = ext4_bg_has_super(sb, group) ?
1045 le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
1046
1047 gdb_off = group % EXT4_DESC_PER_BLOCK(sb);
1048 gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
1049
1050 /*
1051 * We will only either add reserved group blocks to a backup group
1052 * or remove reserved blocks for the first group in a new group block.
1053 * Doing both would be mean more complex code, and sane people don't
1054 * use non-sparse filesystems anymore. This is already checked above.
1055 */
1056 if (gdb_off) {
1057 gdb_bh = sbi->s_group_desc[gdb_num];
1058 err = ext4_journal_get_write_access(handle, gdb_bh);
1059
1060 if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group))
1061 err = reserve_backup_gdb(handle, resize_inode, group);
1062 } else
1063 err = add_new_gdb(handle, resize_inode, group);
1064 if (err)
1065 break;
1066 }
1067 return err;
1068}
1069
1070/*
1071 * ext4_setup_new_descs() will set up the group descriptor descriptors of a flex bg
1072 */
1073static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
1074 struct ext4_new_flex_group_data *flex_gd)
1075{
1076 struct ext4_new_group_data *group_data = flex_gd->groups;
1077 struct ext4_group_desc *gdp;
1078 struct ext4_sb_info *sbi = EXT4_SB(sb);
1079 struct buffer_head *gdb_bh;
1080 ext4_group_t group;
1081 __u16 *bg_flags = flex_gd->bg_flags;
1082 int i, gdb_off, gdb_num, err = 0;
1083
1084
1085 for (i = 0; i < flex_gd->count; i++, group_data++, bg_flags++) {
1086 group = group_data->group;
1087
1088 gdb_off = group % EXT4_DESC_PER_BLOCK(sb);
1089 gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
1090
1091 /*
1092 * get_write_access() has been called on gdb_bh by ext4_add_new_desc().
1093 */
1094 gdb_bh = sbi->s_group_desc[gdb_num];
1095 /* Update group descriptor block for new group */
1096 gdp = (struct ext4_group_desc *)((char *)gdb_bh->b_data +
1097 gdb_off * EXT4_DESC_SIZE(sb));
1098
1099 memset(gdp, 0, EXT4_DESC_SIZE(sb));
1100 ext4_block_bitmap_set(sb, gdp, group_data->block_bitmap);
1101 ext4_inode_bitmap_set(sb, gdp, group_data->inode_bitmap);
1102 ext4_inode_table_set(sb, gdp, group_data->inode_table);
1103 ext4_free_group_clusters_set(sb, gdp,
1104 EXT4_B2C(sbi, group_data->free_blocks_count));
1105 ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
1106 gdp->bg_flags = cpu_to_le16(*bg_flags);
1107 gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
1108
1109 err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
1110 if (unlikely(err)) {
1111 ext4_std_error(sb, err);
1112 break;
1113 }
1114
1115 /*
1116 * We can allocate memory for mb_alloc based on the new group
1117 * descriptor
1118 */
1119 err = ext4_mb_add_groupinfo(sb, group, gdp);
1120 if (err)
1121 break;
1122 }
1123 return err;
1124}
1125
1126/*
1127 * ext4_update_super() updates the super block so that the newly added
1128 * groups can be seen by the filesystem.
1129 *
1130 * @sb: super block
1131 * @flex_gd: new added groups
1132 */
1133static void ext4_update_super(struct super_block *sb,
1134 struct ext4_new_flex_group_data *flex_gd)
1135{
1136 ext4_fsblk_t blocks_count = 0;
1137 ext4_fsblk_t free_blocks = 0;
1138 ext4_fsblk_t reserved_blocks = 0;
1139 struct ext4_new_group_data *group_data = flex_gd->groups;
1140 struct ext4_sb_info *sbi = EXT4_SB(sb);
1141 struct ext4_super_block *es = sbi->s_es;
1142 int i;
1143
1144 BUG_ON(flex_gd->count == 0 || group_data == NULL);
1145 /*
1146 * Make the new blocks and inodes valid next. We do this before
1147 * increasing the group count so that once the group is enabled,
1148 * all of its blocks and inodes are already valid.
1149 *
1150 * We always allocate group-by-group, then block-by-block or
1151 * inode-by-inode within a group, so enabling these
1152 * blocks/inodes before the group is live won't actually let us
1153 * allocate the new space yet.
1154 */
1155 for (i = 0; i < flex_gd->count; i++) {
1156 blocks_count += group_data[i].blocks_count;
1157 free_blocks += group_data[i].free_blocks_count;
1158 }
1159
1160 reserved_blocks = ext4_r_blocks_count(es) * 100;
1161 do_div(reserved_blocks, ext4_blocks_count(es));
1162 reserved_blocks *= blocks_count;
1163 do_div(reserved_blocks, 100);
1164
1165 ext4_blocks_count_set(es, ext4_blocks_count(es) + blocks_count);
1166 le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb) *
1167 flex_gd->count);
1168
1169 /*
1170 * We need to protect s_groups_count against other CPUs seeing
1171 * inconsistent state in the superblock.
1172 *
1173 * The precise rules we use are:
1174 *
1175 * * Writers must perform a smp_wmb() after updating all
1176 * dependent data and before modifying the groups count
1177 *
1178 * * Readers must perform an smp_rmb() after reading the groups
1179 * count and before reading any dependent data.
1180 *
1181 * NB. These rules can be relaxed when checking the group count
1182 * while freeing data, as we can only allocate from a block
1183 * group after serialising against the group count, and we can
1184 * only then free after serialising in turn against that
1185 * allocation.
1186 */
1187 smp_wmb();
1188
1189 /* Update the global fs size fields */
1190 sbi->s_groups_count += flex_gd->count;
1191
1192 /* Update the reserved block counts only once the new group is
1193 * active. */
1194 ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) +
1195 reserved_blocks);
1196
1197 /* Update the free space counts */
1198 percpu_counter_add(&sbi->s_freeclusters_counter,
1199 EXT4_B2C(sbi, free_blocks));
1200 percpu_counter_add(&sbi->s_freeinodes_counter,
1201 EXT4_INODES_PER_GROUP(sb) * flex_gd->count);
1202
1203 if (EXT4_HAS_INCOMPAT_FEATURE(sb,
1204 EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
1205 sbi->s_log_groups_per_flex) {
1206 ext4_group_t flex_group;
1207 flex_group = ext4_flex_group(sbi, group_data[0].group);
1208 atomic_add(EXT4_B2C(sbi, free_blocks),
1209 &sbi->s_flex_groups[flex_group].free_clusters);
1210 atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count,
1211 &sbi->s_flex_groups[flex_group].free_inodes);
1212 }
1213
1214 if (test_opt(sb, DEBUG))
1215 printk(KERN_DEBUG "EXT4-fs: added group %u:"
1216 "%llu blocks(%llu free %llu reserved)\n", flex_gd->count,
1217 blocks_count, free_blocks, reserved_blocks);
1218}
1219
1220/* Add a flex group to an fs. Ensure we handle all possible error conditions
1221 * _before_ we start modifying the filesystem, because we cannot abort the
1222 * transaction and not have it write the data to disk.
1223 */
1224static int ext4_flex_group_add(struct super_block *sb,
1225 struct inode *resize_inode,
1226 struct ext4_new_flex_group_data *flex_gd)
1227{
1228 struct ext4_sb_info *sbi = EXT4_SB(sb);
1229 struct ext4_super_block *es = sbi->s_es;
1230 ext4_fsblk_t o_blocks_count;
1231 ext4_grpblk_t last;
1232 ext4_group_t group;
1233 handle_t *handle;
1234 unsigned reserved_gdb;
1235 int err = 0, err2 = 0, credit;
1236
1237 BUG_ON(!flex_gd->count || !flex_gd->groups || !flex_gd->bg_flags);
1238
1239 reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks);
1240 o_blocks_count = ext4_blocks_count(es);
1241 ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
1242 BUG_ON(last);
1243
1244 err = setup_new_flex_group_blocks(sb, flex_gd);
1245 if (err)
1246 goto exit;
1247 /*
1248 * We will always be modifying at least the superblock and GDT
1249 * block. If we are adding a group past the last current GDT block,
1250 * we will also modify the inode and the dindirect block. If we
1251 * are adding a group with superblock/GDT backups we will also
1252 * modify each of the reserved GDT dindirect blocks.
1253 */
1254 credit = flex_gd->count * 4 + reserved_gdb;
1255 handle = ext4_journal_start_sb(sb, credit);
1256 if (IS_ERR(handle)) {
1257 err = PTR_ERR(handle);
1258 goto exit;
1259 }
1260
1261 err = ext4_journal_get_write_access(handle, sbi->s_sbh);
1262 if (err)
1263 goto exit_journal;
1264
1265 group = flex_gd->groups[0].group;
1266 BUG_ON(group != EXT4_SB(sb)->s_groups_count);
1267 err = ext4_add_new_descs(handle, sb, group,
1268 resize_inode, flex_gd->count);
1269 if (err)
1270 goto exit_journal;
1271
1272 err = ext4_setup_new_descs(handle, sb, flex_gd);
1273 if (err)
1274 goto exit_journal;
1275
1276 ext4_update_super(sb, flex_gd);
1277
1278 err = ext4_handle_dirty_super(handle, sb);
1279
1280exit_journal:
1281 err2 = ext4_journal_stop(handle);
1282 if (!err)
1283 err = err2;
1284
1285 if (!err) {
1286 int i;
1287 update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
1288 sizeof(struct ext4_super_block));
1289 for (i = 0; i < flex_gd->count; i++, group++) {
1290 struct buffer_head *gdb_bh;
1291 int gdb_num;
1292 gdb_num = group / EXT4_BLOCKS_PER_GROUP(sb);
1293 gdb_bh = sbi->s_group_desc[gdb_num];
1294 update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data,
1295 gdb_bh->b_size);
1296 }
1297 }
1298exit:
1299 return err;
1300}
1301
1302static int ext4_setup_next_flex_gd(struct super_block *sb,
1303 struct ext4_new_flex_group_data *flex_gd,
1304 ext4_fsblk_t n_blocks_count,
1305 unsigned long flexbg_size)
1306{
1307 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
1308 struct ext4_new_group_data *group_data = flex_gd->groups;
1309 ext4_fsblk_t o_blocks_count;
1310 ext4_group_t n_group;
1311 ext4_group_t group;
1312 ext4_group_t last_group;
1313 ext4_grpblk_t last;
1314 ext4_grpblk_t blocks_per_group;
1315 unsigned long i;
1316
1317 blocks_per_group = EXT4_BLOCKS_PER_GROUP(sb);
1318
1319 o_blocks_count = ext4_blocks_count(es);
1320
1321 if (o_blocks_count == n_blocks_count)
1322 return 0;
1323
1324 ext4_get_group_no_and_offset(sb, o_blocks_count, &group, &last);
1325 BUG_ON(last);
1326 ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &last);
1327
1328 last_group = group | (flexbg_size - 1);
1329 if (last_group > n_group)
1330 last_group = n_group;
1331
1332 flex_gd->count = last_group - group + 1;
1333
1334 for (i = 0; i < flex_gd->count; i++) {
1335 int overhead;
1336
1337 group_data[i].group = group + i;
1338 group_data[i].blocks_count = blocks_per_group;
1339 overhead = ext4_bg_has_super(sb, group + i) ?
1340 (1 + ext4_bg_num_gdb(sb, group + i) +
1341 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
1342 group_data[i].free_blocks_count = blocks_per_group - overhead;
1343 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
1344 EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
1345 flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT |
1346 EXT4_BG_INODE_UNINIT;
1347 else
1348 flex_gd->bg_flags[i] = EXT4_BG_INODE_ZEROED;
1349 }
1350
1351 if (last_group == n_group &&
1352 EXT4_HAS_RO_COMPAT_FEATURE(sb,
1353 EXT4_FEATURE_RO_COMPAT_GDT_CSUM))
1354 /* We need to initialize block bitmap of last group. */
1355 flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT;
1356
1357 if ((last_group == n_group) && (last != blocks_per_group - 1)) {
1358 group_data[i - 1].blocks_count = last + 1;
1359 group_data[i - 1].free_blocks_count -= blocks_per_group-
1360 last - 1;
1361 }
1362
1363 return 1;
1364}
1365
738/* Add group descriptor data to an existing or new group descriptor block. 1366/* Add group descriptor data to an existing or new group descriptor block.
739 * Ensure we handle all possible error conditions _before_ we start modifying 1367 * Ensure we handle all possible error conditions _before_ we start modifying
740 * the filesystem, because we cannot abort the transaction and not have it 1368 * the filesystem, because we cannot abort the transaction and not have it
@@ -750,16 +1378,15 @@ exit_err:
750 */ 1378 */
751int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) 1379int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
752{ 1380{
1381 struct ext4_new_flex_group_data flex_gd;
753 struct ext4_sb_info *sbi = EXT4_SB(sb); 1382 struct ext4_sb_info *sbi = EXT4_SB(sb);
754 struct ext4_super_block *es = sbi->s_es; 1383 struct ext4_super_block *es = sbi->s_es;
755 int reserved_gdb = ext4_bg_has_super(sb, input->group) ? 1384 int reserved_gdb = ext4_bg_has_super(sb, input->group) ?
756 le16_to_cpu(es->s_reserved_gdt_blocks) : 0; 1385 le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
757 struct buffer_head *primary = NULL;
758 struct ext4_group_desc *gdp;
759 struct inode *inode = NULL; 1386 struct inode *inode = NULL;
760 handle_t *handle;
761 int gdb_off, gdb_num; 1387 int gdb_off, gdb_num;
762 int err, err2; 1388 int err;
1389 __u16 bg_flags = 0;
763 1390
764 gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); 1391 gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb);
765 gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); 1392 gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb);
@@ -798,175 +1425,69 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
798 } 1425 }
799 1426
800 1427
801 if ((err = verify_group_input(sb, input))) 1428 err = verify_group_input(sb, input);
802 goto exit_put; 1429 if (err)
1430 goto out;
803 1431
804 if ((err = setup_new_group_blocks(sb, input))) 1432 flex_gd.count = 1;
805 goto exit_put; 1433 flex_gd.groups = input;
1434 flex_gd.bg_flags = &bg_flags;
1435 err = ext4_flex_group_add(sb, inode, &flex_gd);
1436out:
1437 iput(inode);
1438 return err;
1439} /* ext4_group_add */
806 1440
807 /* 1441/*
808 * We will always be modifying at least the superblock and a GDT 1442 * extend a group without checking assuming that checking has been done.
809 * block. If we are adding a group past the last current GDT block, 1443 */
810 * we will also modify the inode and the dindirect block. If we 1444static int ext4_group_extend_no_check(struct super_block *sb,
811 * are adding a group with superblock/GDT backups we will also 1445 ext4_fsblk_t o_blocks_count, ext4_grpblk_t add)
812 * modify each of the reserved GDT dindirect blocks. 1446{
1447 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
1448 handle_t *handle;
1449 int err = 0, err2;
1450
1451 /* We will update the superblock, one block bitmap, and
1452 * one group descriptor via ext4_group_add_blocks().
813 */ 1453 */
814 handle = ext4_journal_start_sb(sb, 1454 handle = ext4_journal_start_sb(sb, 3);
815 ext4_bg_has_super(sb, input->group) ?
816 3 + reserved_gdb : 4);
817 if (IS_ERR(handle)) { 1455 if (IS_ERR(handle)) {
818 err = PTR_ERR(handle); 1456 err = PTR_ERR(handle);
819 goto exit_put; 1457 ext4_warning(sb, "error %d on journal start", err);
1458 return err;
820 } 1459 }
821 1460
822 if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh))) 1461 err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
823 goto exit_journal; 1462 if (err) {
824 1463 ext4_warning(sb, "error %d on journal write access", err);
825 /* 1464 goto errout;
826 * We will only either add reserved group blocks to a backup group
827 * or remove reserved blocks for the first group in a new group block.
828 * Doing both would be mean more complex code, and sane people don't
829 * use non-sparse filesystems anymore. This is already checked above.
830 */
831 if (gdb_off) {
832 primary = sbi->s_group_desc[gdb_num];
833 if ((err = ext4_journal_get_write_access(handle, primary)))
834 goto exit_journal;
835
836 if (reserved_gdb && ext4_bg_num_gdb(sb, input->group)) {
837 err = reserve_backup_gdb(handle, inode, input->group);
838 if (err)
839 goto exit_journal;
840 }
841 } else {
842 /*
843 * Note that we can access new group descriptor block safely
844 * only if add_new_gdb() succeeds.
845 */
846 err = add_new_gdb(handle, inode, input->group);
847 if (err)
848 goto exit_journal;
849 primary = sbi->s_group_desc[gdb_num];
850 } 1465 }
851 1466
852 /* 1467 ext4_blocks_count_set(es, o_blocks_count + add);
853 * OK, now we've set up the new group. Time to make it active. 1468 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
854 * 1469 o_blocks_count + add);
855 * so we have to be safe wrt. concurrent accesses the group 1470 /* We add the blocks to the bitmap and set the group need init bit */
856 * data. So we need to be careful to set all of the relevant 1471 err = ext4_group_add_blocks(handle, sb, o_blocks_count, add);
857 * group descriptor data etc. *before* we enable the group.
858 *
859 * The key field here is sbi->s_groups_count: as long as
860 * that retains its old value, nobody is going to access the new
861 * group.
862 *
863 * So first we update all the descriptor metadata for the new
864 * group; then we update the total disk blocks count; then we
865 * update the groups count to enable the group; then finally we
866 * update the free space counts so that the system can start
867 * using the new disk blocks.
868 */
869
870 /* Update group descriptor block for new group */
871 gdp = (struct ext4_group_desc *)((char *)primary->b_data +
872 gdb_off * EXT4_DESC_SIZE(sb));
873
874 memset(gdp, 0, EXT4_DESC_SIZE(sb));
875 ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
876 ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
877 ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
878 ext4_free_group_clusters_set(sb, gdp, input->free_blocks_count);
879 ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
880 gdp->bg_flags = cpu_to_le16(EXT4_BG_INODE_ZEROED);
881 gdp->bg_checksum = ext4_group_desc_csum(sbi, input->group, gdp);
882
883 /*
884 * We can allocate memory for mb_alloc based on the new group
885 * descriptor
886 */
887 err = ext4_mb_add_groupinfo(sb, input->group, gdp);
888 if (err) 1472 if (err)
889 goto exit_journal; 1473 goto errout;
890
891 /*
892 * Make the new blocks and inodes valid next. We do this before
893 * increasing the group count so that once the group is enabled,
894 * all of its blocks and inodes are already valid.
895 *
896 * We always allocate group-by-group, then block-by-block or
897 * inode-by-inode within a group, so enabling these
898 * blocks/inodes before the group is live won't actually let us
899 * allocate the new space yet.
900 */
901 ext4_blocks_count_set(es, ext4_blocks_count(es) +
902 input->blocks_count);
903 le32_add_cpu(&es->s_inodes_count, EXT4_INODES_PER_GROUP(sb));
904
905 /*
906 * We need to protect s_groups_count against other CPUs seeing
907 * inconsistent state in the superblock.
908 *
909 * The precise rules we use are:
910 *
911 * * Writers must perform a smp_wmb() after updating all dependent
912 * data and before modifying the groups count
913 *
914 * * Readers must perform an smp_rmb() after reading the groups count
915 * and before reading any dependent data.
916 *
917 * NB. These rules can be relaxed when checking the group count
918 * while freeing data, as we can only allocate from a block
919 * group after serialising against the group count, and we can
920 * only then free after serialising in turn against that
921 * allocation.
922 */
923 smp_wmb();
924
925 /* Update the global fs size fields */
926 sbi->s_groups_count++;
927
928 err = ext4_handle_dirty_metadata(handle, NULL, primary);
929 if (unlikely(err)) {
930 ext4_std_error(sb, err);
931 goto exit_journal;
932 }
933
934 /* Update the reserved block counts only once the new group is
935 * active. */
936 ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) +
937 input->reserved_blocks);
938
939 /* Update the free space counts */
940 percpu_counter_add(&sbi->s_freeclusters_counter,
941 EXT4_B2C(sbi, input->free_blocks_count));
942 percpu_counter_add(&sbi->s_freeinodes_counter,
943 EXT4_INODES_PER_GROUP(sb));
944
945 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
946 sbi->s_log_groups_per_flex) {
947 ext4_group_t flex_group;
948 flex_group = ext4_flex_group(sbi, input->group);
949 atomic_add(EXT4_B2C(sbi, input->free_blocks_count),
950 &sbi->s_flex_groups[flex_group].free_clusters);
951 atomic_add(EXT4_INODES_PER_GROUP(sb),
952 &sbi->s_flex_groups[flex_group].free_inodes);
953 }
954
955 ext4_handle_dirty_super(handle, sb); 1474 ext4_handle_dirty_super(handle, sb);
956 1475 ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
957exit_journal: 1476 o_blocks_count + add);
958 if ((err2 = ext4_journal_stop(handle)) && !err) 1477errout:
1478 err2 = ext4_journal_stop(handle);
1479 if (err2 && !err)
959 err = err2; 1480 err = err2;
960 if (!err && primary) { 1481
961 update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, 1482 if (!err) {
1483 if (test_opt(sb, DEBUG))
1484 printk(KERN_DEBUG "EXT4-fs: extended group to %llu "
1485 "blocks\n", ext4_blocks_count(es));
1486 update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es,
962 sizeof(struct ext4_super_block)); 1487 sizeof(struct ext4_super_block));
963 update_backups(sb, primary->b_blocknr, primary->b_data,
964 primary->b_size);
965 } 1488 }
966exit_put:
967 iput(inode);
968 return err; 1489 return err;
969} /* ext4_group_add */ 1490}
970 1491
971/* 1492/*
972 * Extend the filesystem to the new number of blocks specified. This entry 1493 * Extend the filesystem to the new number of blocks specified. This entry
@@ -985,8 +1506,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
985 ext4_grpblk_t last; 1506 ext4_grpblk_t last;
986 ext4_grpblk_t add; 1507 ext4_grpblk_t add;
987 struct buffer_head *bh; 1508 struct buffer_head *bh;
988 handle_t *handle; 1509 int err;
989 int err, err2;
990 ext4_group_t group; 1510 ext4_group_t group;
991 1511
992 o_blocks_count = ext4_blocks_count(es); 1512 o_blocks_count = ext4_blocks_count(es);
@@ -1042,42 +1562,119 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1042 } 1562 }
1043 brelse(bh); 1563 brelse(bh);
1044 1564
1045 /* We will update the superblock, one block bitmap, and 1565 err = ext4_group_extend_no_check(sb, o_blocks_count, add);
1046 * one group descriptor via ext4_free_blocks(). 1566 return err;
1047 */ 1567} /* ext4_group_extend */
1048 handle = ext4_journal_start_sb(sb, 3); 1568
1049 if (IS_ERR(handle)) { 1569/*
1050 err = PTR_ERR(handle); 1570 * ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count
1051 ext4_warning(sb, "error %d on journal start", err); 1571 *
1052 goto exit_put; 1572 * @sb: super block of the fs to be resized
1573 * @n_blocks_count: the number of blocks resides in the resized fs
1574 */
1575int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
1576{
1577 struct ext4_new_flex_group_data *flex_gd = NULL;
1578 struct ext4_sb_info *sbi = EXT4_SB(sb);
1579 struct ext4_super_block *es = sbi->s_es;
1580 struct buffer_head *bh;
1581 struct inode *resize_inode;
1582 ext4_fsblk_t o_blocks_count;
1583 ext4_group_t o_group;
1584 ext4_group_t n_group;
1585 ext4_grpblk_t offset;
1586 unsigned long n_desc_blocks;
1587 unsigned long o_desc_blocks;
1588 unsigned long desc_blocks;
1589 int err = 0, flexbg_size = 1;
1590
1591 o_blocks_count = ext4_blocks_count(es);
1592
1593 if (test_opt(sb, DEBUG))
1594 printk(KERN_DEBUG "EXT4-fs: resizing filesystem from %llu "
1595 "upto %llu blocks\n", o_blocks_count, n_blocks_count);
1596
1597 if (n_blocks_count < o_blocks_count) {
1598 /* On-line shrinking not supported */
1599 ext4_warning(sb, "can't shrink FS - resize aborted");
1600 return -EINVAL;
1053 } 1601 }
1054 1602
1055 if ((err = ext4_journal_get_write_access(handle, 1603 if (n_blocks_count == o_blocks_count)
1056 EXT4_SB(sb)->s_sbh))) { 1604 /* Nothing need to do */
1057 ext4_warning(sb, "error %d on journal write access", err); 1605 return 0;
1058 ext4_journal_stop(handle); 1606
1059 goto exit_put; 1607 ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset);
1608 ext4_get_group_no_and_offset(sb, o_blocks_count, &o_group, &offset);
1609
1610 n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) /
1611 EXT4_DESC_PER_BLOCK(sb);
1612 o_desc_blocks = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
1613 EXT4_DESC_PER_BLOCK(sb);
1614 desc_blocks = n_desc_blocks - o_desc_blocks;
1615
1616 if (desc_blocks &&
1617 (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE) ||
1618 le16_to_cpu(es->s_reserved_gdt_blocks) < desc_blocks)) {
1619 ext4_warning(sb, "No reserved GDT blocks, can't resize");
1620 return -EPERM;
1060 } 1621 }
1061 ext4_blocks_count_set(es, o_blocks_count + add);
1062 ext4_debug("freeing blocks %llu through %llu\n", o_blocks_count,
1063 o_blocks_count + add);
1064 /* We add the blocks to the bitmap and set the group need init bit */
1065 err = ext4_group_add_blocks(handle, sb, o_blocks_count, add);
1066 ext4_handle_dirty_super(handle, sb);
1067 ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
1068 o_blocks_count + add);
1069 err2 = ext4_journal_stop(handle);
1070 if (!err && err2)
1071 err = err2;
1072 1622
1073 if (err) 1623 resize_inode = ext4_iget(sb, EXT4_RESIZE_INO);
1074 goto exit_put; 1624 if (IS_ERR(resize_inode)) {
1625 ext4_warning(sb, "Error opening resize inode");
1626 return PTR_ERR(resize_inode);
1627 }
1075 1628
1629 /* See if the device is actually as big as what was requested */
1630 bh = sb_bread(sb, n_blocks_count - 1);
1631 if (!bh) {
1632 ext4_warning(sb, "can't read last block, resize aborted");
1633 return -ENOSPC;
1634 }
1635 brelse(bh);
1636
1637 if (offset != 0) {
1638 /* extend the last group */
1639 ext4_grpblk_t add;
1640 add = EXT4_BLOCKS_PER_GROUP(sb) - offset;
1641 err = ext4_group_extend_no_check(sb, o_blocks_count, add);
1642 if (err)
1643 goto out;
1644 }
1645
1646 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
1647 es->s_log_groups_per_flex)
1648 flexbg_size = 1 << es->s_log_groups_per_flex;
1649
1650 o_blocks_count = ext4_blocks_count(es);
1651 if (o_blocks_count == n_blocks_count)
1652 goto out;
1653
1654 flex_gd = alloc_flex_gd(flexbg_size);
1655 if (flex_gd == NULL) {
1656 err = -ENOMEM;
1657 goto out;
1658 }
1659
1660 /* Add flex groups. Note that a regular group is a
1661 * flex group with 1 group.
1662 */
1663 while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count,
1664 flexbg_size)) {
1665 ext4_alloc_group_tables(sb, flex_gd, flexbg_size);
1666 err = ext4_flex_group_add(sb, resize_inode, flex_gd);
1667 if (unlikely(err))
1668 break;
1669 }
1670
1671out:
1672 if (flex_gd)
1673 free_flex_gd(flex_gd);
1674
1675 iput(resize_inode);
1076 if (test_opt(sb, DEBUG)) 1676 if (test_opt(sb, DEBUG))
1077 printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n", 1677 printk(KERN_DEBUG "EXT4-fs: resized filesystem from %llu "
1078 ext4_blocks_count(es)); 1678 "upto %llu blocks\n", o_blocks_count, n_blocks_count);
1079 update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es,
1080 sizeof(struct ext4_super_block));
1081exit_put:
1082 return err; 1679 return err;
1083} /* ext4_group_extend */ 1680}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ed3ce82e2de4..502c61fd7392 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1095,7 +1095,7 @@ static int ext4_show_options(struct seq_file *seq, struct dentry *root)
1095 } 1095 }
1096 if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) { 1096 if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) {
1097 seq_printf(seq, ",max_batch_time=%u", 1097 seq_printf(seq, ",max_batch_time=%u",
1098 (unsigned) sbi->s_min_batch_time); 1098 (unsigned) sbi->s_max_batch_time);
1099 } 1099 }
1100 1100
1101 /* 1101 /*
@@ -2005,17 +2005,16 @@ static int ext4_fill_flex_info(struct super_block *sb)
2005 struct ext4_group_desc *gdp = NULL; 2005 struct ext4_group_desc *gdp = NULL;
2006 ext4_group_t flex_group_count; 2006 ext4_group_t flex_group_count;
2007 ext4_group_t flex_group; 2007 ext4_group_t flex_group;
2008 int groups_per_flex = 0; 2008 unsigned int groups_per_flex = 0;
2009 size_t size; 2009 size_t size;
2010 int i; 2010 int i;
2011 2011
2012 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; 2012 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
2013 groups_per_flex = 1 << sbi->s_log_groups_per_flex; 2013 if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
2014
2015 if (groups_per_flex < 2) {
2016 sbi->s_log_groups_per_flex = 0; 2014 sbi->s_log_groups_per_flex = 0;
2017 return 1; 2015 return 1;
2018 } 2016 }
2017 groups_per_flex = 1 << sbi->s_log_groups_per_flex;
2019 2018
2020 /* We allocate both existing and potentially added groups */ 2019 /* We allocate both existing and potentially added groups */
2021 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + 2020 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
@@ -3506,7 +3505,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3506 * of the filesystem. 3505 * of the filesystem.
3507 */ 3506 */
3508 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { 3507 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
3509 ext4_msg(sb, KERN_WARNING, "bad geometry: first data" 3508 ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
3510 "block %u is beyond end of filesystem (%llu)", 3509 "block %u is beyond end of filesystem (%llu)",
3511 le32_to_cpu(es->s_first_data_block), 3510 le32_to_cpu(es->s_first_data_block),
3512 ext4_blocks_count(es)); 3511 ext4_blocks_count(es));
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index b60f9f81e33c..d2a200624af5 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -47,8 +47,9 @@ ext4_xattr_security_set(struct dentry *dentry, const char *name,
47 name, value, size, flags); 47 name, value, size, flags);
48} 48}
49 49
50int ext4_initxattrs(struct inode *inode, const struct xattr *xattr_array, 50static int
51 void *fs_info) 51ext4_initxattrs(struct inode *inode, const struct xattr *xattr_array,
52 void *fs_info)
52{ 53{
53 const struct xattr *xattr; 54 const struct xattr *xattr;
54 handle_t *handle = fs_info; 55 handle_t *handle = fs_info;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 68d704db787f..5069b8475150 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -430,6 +430,12 @@ void jbd2_journal_commit_transaction(journal_t *journal)
430 jbd_debug(3, "JBD2: commit phase 1\n"); 430 jbd_debug(3, "JBD2: commit phase 1\n");
431 431
432 /* 432 /*
433 * Clear revoked flag to reflect there is no revoked buffers
434 * in the next transaction which is going to be started.
435 */
436 jbd2_clear_buffer_revoked_flags(journal);
437
438 /*
433 * Switch to a new revoke table. 439 * Switch to a new revoke table.
434 */ 440 */
435 jbd2_journal_switch_revoke_table(journal); 441 jbd2_journal_switch_revoke_table(journal);
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 69fd93588118..30b2867d6cc9 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -47,6 +47,10 @@
47 * overwriting the new data. We don't even need to clear the revoke 47 * overwriting the new data. We don't even need to clear the revoke
48 * bit here. 48 * bit here.
49 * 49 *
50 * We cache revoke status of a buffer in the current transaction in b_states
51 * bits. As the name says, revokevalid flag indicates that the cached revoke
52 * status of a buffer is valid and we can rely on the cached status.
53 *
50 * Revoke information on buffers is a tri-state value: 54 * Revoke information on buffers is a tri-state value:
51 * 55 *
52 * RevokeValid clear: no cached revoke status, need to look it up 56 * RevokeValid clear: no cached revoke status, need to look it up
@@ -478,6 +482,36 @@ int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
478 return did_revoke; 482 return did_revoke;
479} 483}
480 484
485/*
486 * journal_clear_revoked_flag clears revoked flag of buffers in
487 * revoke table to reflect there is no revoked buffers in the next
488 * transaction which is going to be started.
489 */
490void jbd2_clear_buffer_revoked_flags(journal_t *journal)
491{
492 struct jbd2_revoke_table_s *revoke = journal->j_revoke;
493 int i = 0;
494
495 for (i = 0; i < revoke->hash_size; i++) {
496 struct list_head *hash_list;
497 struct list_head *list_entry;
498 hash_list = &revoke->hash_table[i];
499
500 list_for_each(list_entry, hash_list) {
501 struct jbd2_revoke_record_s *record;
502 struct buffer_head *bh;
503 record = (struct jbd2_revoke_record_s *)list_entry;
504 bh = __find_get_block(journal->j_fs_dev,
505 record->blocknr,
506 journal->j_blocksize);
507 if (bh) {
508 clear_buffer_revoked(bh);
509 __brelse(bh);
510 }
511 }
512 }
513}
514
481/* journal_switch_revoke table select j_revoke for next transaction 515/* journal_switch_revoke table select j_revoke for next transaction
482 * we do not want to suspend any processing until all revokes are 516 * we do not want to suspend any processing until all revokes are
483 * written -bzzz 517 * written -bzzz
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index a0e41a4c080e..35ae096bed5d 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -517,12 +517,13 @@ void jbd2_journal_lock_updates(journal_t *journal)
517 break; 517 break;
518 518
519 spin_lock(&transaction->t_handle_lock); 519 spin_lock(&transaction->t_handle_lock);
520 prepare_to_wait(&journal->j_wait_updates, &wait,
521 TASK_UNINTERRUPTIBLE);
520 if (!atomic_read(&transaction->t_updates)) { 522 if (!atomic_read(&transaction->t_updates)) {
521 spin_unlock(&transaction->t_handle_lock); 523 spin_unlock(&transaction->t_handle_lock);
524 finish_wait(&journal->j_wait_updates, &wait);
522 break; 525 break;
523 } 526 }
524 prepare_to_wait(&journal->j_wait_updates, &wait,
525 TASK_UNINTERRUPTIBLE);
526 spin_unlock(&transaction->t_handle_lock); 527 spin_unlock(&transaction->t_handle_lock);
527 write_unlock(&journal->j_state_lock); 528 write_unlock(&journal->j_state_lock);
528 schedule(); 529 schedule();
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 2092ea21e469..5557baefed60 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1151,6 +1151,7 @@ extern int jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t);
1151extern int jbd2_journal_test_revoke(journal_t *, unsigned long long, tid_t); 1151extern int jbd2_journal_test_revoke(journal_t *, unsigned long long, tid_t);
1152extern void jbd2_journal_clear_revoke(journal_t *); 1152extern void jbd2_journal_clear_revoke(journal_t *);
1153extern void jbd2_journal_switch_revoke_table(journal_t *journal); 1153extern void jbd2_journal_switch_revoke_table(journal_t *journal);
1154extern void jbd2_clear_buffer_revoked_flags(journal_t *journal);
1154 1155
1155/* 1156/*
1156 * The log thread user interface: 1157 * The log thread user interface:
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 748ff7cbe555..319538bf17d2 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -573,9 +573,9 @@ TRACE_EVENT(ext4_mb_release_inode_pa,
573); 573);
574 574
575TRACE_EVENT(ext4_mb_release_group_pa, 575TRACE_EVENT(ext4_mb_release_group_pa,
576 TP_PROTO(struct ext4_prealloc_space *pa), 576 TP_PROTO(struct super_block *sb, struct ext4_prealloc_space *pa),
577 577
578 TP_ARGS(pa), 578 TP_ARGS(sb, pa),
579 579
580 TP_STRUCT__entry( 580 TP_STRUCT__entry(
581 __field( dev_t, dev ) 581 __field( dev_t, dev )
@@ -585,7 +585,7 @@ TRACE_EVENT(ext4_mb_release_group_pa,
585 ), 585 ),
586 586
587 TP_fast_assign( 587 TP_fast_assign(
588 __entry->dev = pa->pa_inode->i_sb->s_dev; 588 __entry->dev = sb->s_dev;
589 __entry->pa_pstart = pa->pa_pstart; 589 __entry->pa_pstart = pa->pa_pstart;
590 __entry->pa_len = pa->pa_len; 590 __entry->pa_len = pa->pa_len;
591 ), 591 ),