aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-fs-ext413
-rw-r--r--Documentation/filesystems/ext4.txt10
-rw-r--r--fs/buffer.c13
-rw-r--r--fs/ext4/ext4.h49
-rw-r--r--fs/ext4/extents.c258
-rw-r--r--fs/ext4/file.c6
-rw-r--r--fs/ext4/fsync.c92
-rw-r--r--fs/ext4/ialloc.c9
-rw-r--r--fs/ext4/indirect.c18
-rw-r--r--fs/ext4/inode.c83
-rw-r--r--fs/ext4/ioctl.c22
-rw-r--r--fs/ext4/mballoc.c129
-rw-r--r--fs/ext4/mballoc.h5
-rw-r--r--fs/ext4/move_extent.c520
-rw-r--r--fs/ext4/namei.c105
-rw-r--r--fs/ext4/page-io.c176
-rw-r--r--fs/ext4/resize.c432
-rw-r--r--fs/ext4/super.c92
-rw-r--r--fs/fs-writeback.c1
-rw-r--r--fs/jbd2/commit.c40
-rw-r--r--fs/jbd2/journal.c5
-rw-r--r--fs/jbd2/recovery.c7
-rw-r--r--fs/jbd2/transaction.c65
-rw-r--r--fs/nilfs2/file.c1
-rw-r--r--include/linux/falloc.h1
-rw-r--r--include/trace/events/ext4.h242
26 files changed, 1498 insertions, 896 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-ext4 b/Documentation/ABI/testing/sysfs-fs-ext4
index f22ac0872ae8..c631253cf85c 100644
--- a/Documentation/ABI/testing/sysfs-fs-ext4
+++ b/Documentation/ABI/testing/sysfs-fs-ext4
@@ -96,3 +96,16 @@ Contact: "Theodore Ts'o" <tytso@mit.edu>
96Description: 96Description:
97 The maximum number of megabytes the writeback code will 97 The maximum number of megabytes the writeback code will
98 try to write out before move on to another inode. 98 try to write out before move on to another inode.
99
100What: /sys/fs/ext4/<disk>/extent_max_zeroout_kb
101Date: August 2012
102Contact: "Theodore Ts'o" <tytso@mit.edu>
103Description:
104 The maximum number of kilobytes which will be zeroed
105 out in preference to creating a new uninitialized
106 extent when manipulating an inode's extent tree. Note
107 that using a larger value will increase the
108 variability of time necessary to complete a random
109 write operation (since a 4k random write might turn
110 into a much larger write due to the zeroout
111 operation).
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt
index 1b7f9acbcbbe..104322bf378c 100644
--- a/Documentation/filesystems/ext4.txt
+++ b/Documentation/filesystems/ext4.txt
@@ -375,6 +375,16 @@ dioread_nolock locking. If the dioread_nolock option is specified
375 Because of the restrictions this options comprises 375 Because of the restrictions this options comprises
376 it is off by default (e.g. dioread_lock). 376 it is off by default (e.g. dioread_lock).
377 377
378max_dir_size_kb=n This limits the size of directories so that any
379 attempt to expand them beyond the specified
380 limit in kilobytes will cause an ENOSPC error.
381 This is useful in memory constrained
382 environments, where a very large directory can
383 cause severe performance problems or even
384 provoke the Out Of Memory killer. (For example,
385 if there is only 512mb memory available, a 176mb
386 directory may seriously cramp the system's style.)
387
378i_version Enable 64-bit inode version support. This option is 388i_version Enable 64-bit inode version support. This option is
379 off by default. 389 off by default.
380 390
diff --git a/fs/buffer.c b/fs/buffer.c
index 58e2e7b77372..b5f044283edb 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2312,12 +2312,6 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2312 loff_t size; 2312 loff_t size;
2313 int ret; 2313 int ret;
2314 2314
2315 /*
2316 * Update file times before taking page lock. We may end up failing the
2317 * fault so this update may be superfluous but who really cares...
2318 */
2319 file_update_time(vma->vm_file);
2320
2321 lock_page(page); 2315 lock_page(page);
2322 size = i_size_read(inode); 2316 size = i_size_read(inode);
2323 if ((page->mapping != inode->i_mapping) || 2317 if ((page->mapping != inode->i_mapping) ||
@@ -2355,6 +2349,13 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
2355 struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb; 2349 struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;
2356 2350
2357 sb_start_pagefault(sb); 2351 sb_start_pagefault(sb);
2352
2353 /*
2354 * Update file times before taking page lock. We may end up failing the
2355 * fault so this update may be superfluous but who really cares...
2356 */
2357 file_update_time(vma->vm_file);
2358
2358 ret = __block_page_mkwrite(vma, vmf, get_block); 2359 ret = __block_page_mkwrite(vma, vmf, get_block);
2359 sb_end_pagefault(sb); 2360 sb_end_pagefault(sb);
2360 return block_page_mkwrite_return(ret); 2361 return block_page_mkwrite_return(ret);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index c3411d4ce2da..3ab2539b7b2e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -186,7 +186,6 @@ struct mpage_da_data {
186#define EXT4_IO_END_ERROR 0x0002 186#define EXT4_IO_END_ERROR 0x0002
187#define EXT4_IO_END_QUEUED 0x0004 187#define EXT4_IO_END_QUEUED 0x0004
188#define EXT4_IO_END_DIRECT 0x0008 188#define EXT4_IO_END_DIRECT 0x0008
189#define EXT4_IO_END_IN_FSYNC 0x0010
190 189
191struct ext4_io_page { 190struct ext4_io_page {
192 struct page *p_page; 191 struct page *p_page;
@@ -912,9 +911,7 @@ struct ext4_inode_info {
912 struct list_head i_completed_io_list; 911 struct list_head i_completed_io_list;
913 spinlock_t i_completed_io_lock; 912 spinlock_t i_completed_io_lock;
914 atomic_t i_ioend_count; /* Number of outstanding io_end structs */ 913 atomic_t i_ioend_count; /* Number of outstanding io_end structs */
915 /* current io_end structure for async DIO write*/ 914 atomic_t i_unwritten; /* Nr. of inflight conversions pending */
916 ext4_io_end_t *cur_aio_dio;
917 atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */
918 915
919 spinlock_t i_block_reservation_lock; 916 spinlock_t i_block_reservation_lock;
920 917
@@ -1233,6 +1230,7 @@ struct ext4_sb_info {
1233 spinlock_t s_md_lock; 1230 spinlock_t s_md_lock;
1234 unsigned short *s_mb_offsets; 1231 unsigned short *s_mb_offsets;
1235 unsigned int *s_mb_maxs; 1232 unsigned int *s_mb_maxs;
1233 unsigned int s_group_info_size;
1236 1234
1237 /* tunables */ 1235 /* tunables */
1238 unsigned long s_stripe; 1236 unsigned long s_stripe;
@@ -1243,6 +1241,7 @@ struct ext4_sb_info {
1243 unsigned int s_mb_order2_reqs; 1241 unsigned int s_mb_order2_reqs;
1244 unsigned int s_mb_group_prealloc; 1242 unsigned int s_mb_group_prealloc;
1245 unsigned int s_max_writeback_mb_bump; 1243 unsigned int s_max_writeback_mb_bump;
1244 unsigned int s_max_dir_size_kb;
1246 /* where last allocation was done - for stream allocation */ 1245 /* where last allocation was done - for stream allocation */
1247 unsigned long s_mb_last_group; 1246 unsigned long s_mb_last_group;
1248 unsigned long s_mb_last_start; 1247 unsigned long s_mb_last_start;
@@ -1270,8 +1269,12 @@ struct ext4_sb_info {
1270 unsigned long s_sectors_written_start; 1269 unsigned long s_sectors_written_start;
1271 u64 s_kbytes_written; 1270 u64 s_kbytes_written;
1272 1271
1272 /* the size of zero-out chunk */
1273 unsigned int s_extent_max_zeroout_kb;
1274
1273 unsigned int s_log_groups_per_flex; 1275 unsigned int s_log_groups_per_flex;
1274 struct flex_groups *s_flex_groups; 1276 struct flex_groups *s_flex_groups;
1277 ext4_group_t s_flex_groups_allocated;
1275 1278
1276 /* workqueue for dio unwritten */ 1279 /* workqueue for dio unwritten */
1277 struct workqueue_struct *dio_unwritten_wq; 1280 struct workqueue_struct *dio_unwritten_wq;
@@ -1328,10 +1331,20 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode,
1328{ 1331{
1329 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { 1332 if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
1330 io_end->flag |= EXT4_IO_END_UNWRITTEN; 1333 io_end->flag |= EXT4_IO_END_UNWRITTEN;
1331 atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten); 1334 atomic_inc(&EXT4_I(inode)->i_unwritten);
1332 } 1335 }
1333} 1336}
1334 1337
1338static inline ext4_io_end_t *ext4_inode_aio(struct inode *inode)
1339{
1340 return inode->i_private;
1341}
1342
1343static inline void ext4_inode_aio_set(struct inode *inode, ext4_io_end_t *io)
1344{
1345 inode->i_private = io;
1346}
1347
1335/* 1348/*
1336 * Inode dynamic state flags 1349 * Inode dynamic state flags
1337 */ 1350 */
@@ -1345,6 +1358,8 @@ enum {
1345 EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ 1358 EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/
1346 EXT4_STATE_NEWENTRY, /* File just added to dir */ 1359 EXT4_STATE_NEWENTRY, /* File just added to dir */
1347 EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */ 1360 EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */
1361 EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read
1362 nolocking */
1348}; 1363};
1349 1364
1350#define EXT4_INODE_BIT_FNS(name, field, offset) \ 1365#define EXT4_INODE_BIT_FNS(name, field, offset) \
@@ -1932,7 +1947,7 @@ extern void ext4_htree_free_dir_info(struct dir_private_info *p);
1932 1947
1933/* fsync.c */ 1948/* fsync.c */
1934extern int ext4_sync_file(struct file *, loff_t, loff_t, int); 1949extern int ext4_sync_file(struct file *, loff_t, loff_t, int);
1935extern int ext4_flush_completed_IO(struct inode *); 1950extern int ext4_flush_unwritten_io(struct inode *);
1936 1951
1937/* hash.c */ 1952/* hash.c */
1938extern int ext4fs_dirhash(const char *name, int len, struct 1953extern int ext4fs_dirhash(const char *name, int len, struct
@@ -1966,6 +1981,8 @@ extern void ext4_exit_mballoc(void);
1966extern void ext4_free_blocks(handle_t *handle, struct inode *inode, 1981extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
1967 struct buffer_head *bh, ext4_fsblk_t block, 1982 struct buffer_head *bh, ext4_fsblk_t block,
1968 unsigned long count, int flags); 1983 unsigned long count, int flags);
1984extern int ext4_mb_alloc_groupinfo(struct super_block *sb,
1985 ext4_group_t ngroups);
1969extern int ext4_mb_add_groupinfo(struct super_block *sb, 1986extern int ext4_mb_add_groupinfo(struct super_block *sb,
1970 ext4_group_t i, struct ext4_group_desc *desc); 1987 ext4_group_t i, struct ext4_group_desc *desc);
1971extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb, 1988extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
@@ -2051,6 +2068,8 @@ extern void ext4_superblock_csum_set(struct super_block *sb,
2051extern void *ext4_kvmalloc(size_t size, gfp_t flags); 2068extern void *ext4_kvmalloc(size_t size, gfp_t flags);
2052extern void *ext4_kvzalloc(size_t size, gfp_t flags); 2069extern void *ext4_kvzalloc(size_t size, gfp_t flags);
2053extern void ext4_kvfree(void *ptr); 2070extern void ext4_kvfree(void *ptr);
2071extern int ext4_alloc_flex_bg_array(struct super_block *sb,
2072 ext4_group_t ngroup);
2054extern __printf(4, 5) 2073extern __printf(4, 5)
2055void __ext4_error(struct super_block *, const char *, unsigned int, 2074void __ext4_error(struct super_block *, const char *, unsigned int,
2056 const char *, ...); 2075 const char *, ...);
@@ -2352,6 +2371,7 @@ extern const struct file_operations ext4_dir_operations;
2352extern const struct inode_operations ext4_file_inode_operations; 2371extern const struct inode_operations ext4_file_inode_operations;
2353extern const struct file_operations ext4_file_operations; 2372extern const struct file_operations ext4_file_operations;
2354extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); 2373extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
2374extern void ext4_unwritten_wait(struct inode *inode);
2355 2375
2356/* namei.c */ 2376/* namei.c */
2357extern const struct inode_operations ext4_dir_inode_operations; 2377extern const struct inode_operations ext4_dir_inode_operations;
@@ -2400,11 +2420,11 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
2400 2420
2401/* page-io.c */ 2421/* page-io.c */
2402extern int __init ext4_init_pageio(void); 2422extern int __init ext4_init_pageio(void);
2423extern void ext4_add_complete_io(ext4_io_end_t *io_end);
2403extern void ext4_exit_pageio(void); 2424extern void ext4_exit_pageio(void);
2404extern void ext4_ioend_wait(struct inode *); 2425extern void ext4_ioend_wait(struct inode *);
2405extern void ext4_free_io_end(ext4_io_end_t *io); 2426extern void ext4_free_io_end(ext4_io_end_t *io);
2406extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); 2427extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
2407extern int ext4_end_io_nolock(ext4_io_end_t *io);
2408extern void ext4_io_submit(struct ext4_io_submit *io); 2428extern void ext4_io_submit(struct ext4_io_submit *io);
2409extern int ext4_bio_write_page(struct ext4_io_submit *io, 2429extern int ext4_bio_write_page(struct ext4_io_submit *io,
2410 struct page *page, 2430 struct page *page,
@@ -2452,6 +2472,21 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh)
2452 set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state); 2472 set_bit(BH_BITMAP_UPTODATE, &(bh)->b_state);
2453} 2473}
2454 2474
2475/*
2476 * Disable DIO read nolock optimization, so new dioreaders will be forced
2477 * to grab i_mutex
2478 */
2479static inline void ext4_inode_block_unlocked_dio(struct inode *inode)
2480{
2481 ext4_set_inode_state(inode, EXT4_STATE_DIOREAD_LOCK);
2482 smp_mb();
2483}
2484static inline void ext4_inode_resume_unlocked_dio(struct inode *inode)
2485{
2486 smp_mb();
2487 ext4_clear_inode_state(inode, EXT4_STATE_DIOREAD_LOCK);
2488}
2489
2455#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) 2490#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
2456 2491
2457/* For ioend & aio unwritten conversion wait queues */ 2492/* For ioend & aio unwritten conversion wait queues */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index aabbb3f53683..1c94cca35ed1 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1177,7 +1177,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1177 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block), 1177 le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
1178 ext4_idx_pblock(EXT_FIRST_INDEX(neh))); 1178 ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
1179 1179
1180 neh->eh_depth = cpu_to_le16(le16_to_cpu(neh->eh_depth) + 1); 1180 le16_add_cpu(&neh->eh_depth, 1);
1181 ext4_mark_inode_dirty(handle, inode); 1181 ext4_mark_inode_dirty(handle, inode);
1182out: 1182out:
1183 brelse(bh); 1183 brelse(bh);
@@ -1656,16 +1656,60 @@ static int ext4_ext_try_to_merge_right(struct inode *inode,
1656} 1656}
1657 1657
1658/* 1658/*
1659 * This function does a very simple check to see if we can collapse
1660 * an extent tree with a single extent tree leaf block into the inode.
1661 */
1662static void ext4_ext_try_to_merge_up(handle_t *handle,
1663 struct inode *inode,
1664 struct ext4_ext_path *path)
1665{
1666 size_t s;
1667 unsigned max_root = ext4_ext_space_root(inode, 0);
1668 ext4_fsblk_t blk;
1669
1670 if ((path[0].p_depth != 1) ||
1671 (le16_to_cpu(path[0].p_hdr->eh_entries) != 1) ||
1672 (le16_to_cpu(path[1].p_hdr->eh_entries) > max_root))
1673 return;
1674
1675 /*
1676 * We need to modify the block allocation bitmap and the block
1677 * group descriptor to release the extent tree block. If we
1678 * can't get the journal credits, give up.
1679 */
1680 if (ext4_journal_extend(handle, 2))
1681 return;
1682
1683 /*
1684 * Copy the extent data up to the inode
1685 */
1686 blk = ext4_idx_pblock(path[0].p_idx);
1687 s = le16_to_cpu(path[1].p_hdr->eh_entries) *
1688 sizeof(struct ext4_extent_idx);
1689 s += sizeof(struct ext4_extent_header);
1690
1691 memcpy(path[0].p_hdr, path[1].p_hdr, s);
1692 path[0].p_depth = 0;
1693 path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) +
1694 (path[1].p_ext - EXT_FIRST_EXTENT(path[1].p_hdr));
1695 path[0].p_hdr->eh_max = cpu_to_le16(max_root);
1696
1697 brelse(path[1].p_bh);
1698 ext4_free_blocks(handle, inode, NULL, blk, 1,
1699 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
1700}
1701
1702/*
1659 * This function tries to merge the @ex extent to neighbours in the tree. 1703 * This function tries to merge the @ex extent to neighbours in the tree.
1660 * return 1 if merge left else 0. 1704 * return 1 if merge left else 0.
1661 */ 1705 */
1662static int ext4_ext_try_to_merge(struct inode *inode, 1706static void ext4_ext_try_to_merge(handle_t *handle,
1707 struct inode *inode,
1663 struct ext4_ext_path *path, 1708 struct ext4_ext_path *path,
1664 struct ext4_extent *ex) { 1709 struct ext4_extent *ex) {
1665 struct ext4_extent_header *eh; 1710 struct ext4_extent_header *eh;
1666 unsigned int depth; 1711 unsigned int depth;
1667 int merge_done = 0; 1712 int merge_done = 0;
1668 int ret = 0;
1669 1713
1670 depth = ext_depth(inode); 1714 depth = ext_depth(inode);
1671 BUG_ON(path[depth].p_hdr == NULL); 1715 BUG_ON(path[depth].p_hdr == NULL);
@@ -1675,9 +1719,9 @@ static int ext4_ext_try_to_merge(struct inode *inode,
1675 merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1); 1719 merge_done = ext4_ext_try_to_merge_right(inode, path, ex - 1);
1676 1720
1677 if (!merge_done) 1721 if (!merge_done)
1678 ret = ext4_ext_try_to_merge_right(inode, path, ex); 1722 (void) ext4_ext_try_to_merge_right(inode, path, ex);
1679 1723
1680 return ret; 1724 ext4_ext_try_to_merge_up(handle, inode, path);
1681} 1725}
1682 1726
1683/* 1727/*
@@ -1893,7 +1937,7 @@ has_space:
1893merge: 1937merge:
1894 /* try to merge extents */ 1938 /* try to merge extents */
1895 if (!(flag & EXT4_GET_BLOCKS_PRE_IO)) 1939 if (!(flag & EXT4_GET_BLOCKS_PRE_IO))
1896 ext4_ext_try_to_merge(inode, path, nearex); 1940 ext4_ext_try_to_merge(handle, inode, path, nearex);
1897 1941
1898 1942
1899 /* time to correct all indexes above */ 1943 /* time to correct all indexes above */
@@ -1901,7 +1945,7 @@ merge:
1901 if (err) 1945 if (err)
1902 goto cleanup; 1946 goto cleanup;
1903 1947
1904 err = ext4_ext_dirty(handle, inode, path + depth); 1948 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
1905 1949
1906cleanup: 1950cleanup:
1907 if (npath) { 1951 if (npath) {
@@ -2092,13 +2136,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
2092} 2136}
2093 2137
2094/* 2138/*
2095 * ext4_ext_check_cache() 2139 * ext4_ext_in_cache()
2096 * Checks to see if the given block is in the cache. 2140 * Checks to see if the given block is in the cache.
2097 * If it is, the cached extent is stored in the given 2141 * If it is, the cached extent is stored in the given
2098 * cache extent pointer. If the cached extent is a hole, 2142 * cache extent pointer.
2099 * this routine should be used instead of
2100 * ext4_ext_in_cache if the calling function needs to
2101 * know the size of the hole.
2102 * 2143 *
2103 * @inode: The files inode 2144 * @inode: The files inode
2104 * @block: The block to look for in the cache 2145 * @block: The block to look for in the cache
@@ -2107,8 +2148,10 @@ ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
2107 * 2148 *
2108 * Return 0 if cache is invalid; 1 if the cache is valid 2149 * Return 0 if cache is invalid; 1 if the cache is valid
2109 */ 2150 */
2110static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block, 2151static int
2111 struct ext4_ext_cache *ex){ 2152ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
2153 struct ext4_extent *ex)
2154{
2112 struct ext4_ext_cache *cex; 2155 struct ext4_ext_cache *cex;
2113 struct ext4_sb_info *sbi; 2156 struct ext4_sb_info *sbi;
2114 int ret = 0; 2157 int ret = 0;
@@ -2125,7 +2168,9 @@ static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block,
2125 goto errout; 2168 goto errout;
2126 2169
2127 if (in_range(block, cex->ec_block, cex->ec_len)) { 2170 if (in_range(block, cex->ec_block, cex->ec_len)) {
2128 memcpy(ex, cex, sizeof(struct ext4_ext_cache)); 2171 ex->ee_block = cpu_to_le32(cex->ec_block);
2172 ext4_ext_store_pblock(ex, cex->ec_start);
2173 ex->ee_len = cpu_to_le16(cex->ec_len);
2129 ext_debug("%u cached by %u:%u:%llu\n", 2174 ext_debug("%u cached by %u:%u:%llu\n",
2130 block, 2175 block,
2131 cex->ec_block, cex->ec_len, cex->ec_start); 2176 cex->ec_block, cex->ec_len, cex->ec_start);
@@ -2138,37 +2183,6 @@ errout:
2138} 2183}
2139 2184
2140/* 2185/*
2141 * ext4_ext_in_cache()
2142 * Checks to see if the given block is in the cache.
2143 * If it is, the cached extent is stored in the given
2144 * extent pointer.
2145 *
2146 * @inode: The files inode
2147 * @block: The block to look for in the cache
2148 * @ex: Pointer where the cached extent will be stored
2149 * if it contains block
2150 *
2151 * Return 0 if cache is invalid; 1 if the cache is valid
2152 */
2153static int
2154ext4_ext_in_cache(struct inode *inode, ext4_lblk_t block,
2155 struct ext4_extent *ex)
2156{
2157 struct ext4_ext_cache cex;
2158 int ret = 0;
2159
2160 if (ext4_ext_check_cache(inode, block, &cex)) {
2161 ex->ee_block = cpu_to_le32(cex.ec_block);
2162 ext4_ext_store_pblock(ex, cex.ec_start);
2163 ex->ee_len = cpu_to_le16(cex.ec_len);
2164 ret = 1;
2165 }
2166
2167 return ret;
2168}
2169
2170
2171/*
2172 * ext4_ext_rm_idx: 2186 * ext4_ext_rm_idx:
2173 * removes index from the index block. 2187 * removes index from the index block.
2174 */ 2188 */
@@ -2274,10 +2288,13 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2274 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 2288 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2275 unsigned short ee_len = ext4_ext_get_actual_len(ex); 2289 unsigned short ee_len = ext4_ext_get_actual_len(ex);
2276 ext4_fsblk_t pblk; 2290 ext4_fsblk_t pblk;
2277 int flags = EXT4_FREE_BLOCKS_FORGET; 2291 int flags = 0;
2278 2292
2279 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 2293 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
2280 flags |= EXT4_FREE_BLOCKS_METADATA; 2294 flags |= EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET;
2295 else if (ext4_should_journal_data(inode))
2296 flags |= EXT4_FREE_BLOCKS_FORGET;
2297
2281 /* 2298 /*
2282 * For bigalloc file systems, we never free a partial cluster 2299 * For bigalloc file systems, we never free a partial cluster
2283 * at the beginning of the extent. Instead, we make a note 2300 * at the beginning of the extent. Instead, we make a note
@@ -2572,7 +2589,7 @@ static int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,
2572 struct ext4_ext_path *path = NULL; 2589 struct ext4_ext_path *path = NULL;
2573 ext4_fsblk_t partial_cluster = 0; 2590 ext4_fsblk_t partial_cluster = 0;
2574 handle_t *handle; 2591 handle_t *handle;
2575 int i = 0, err; 2592 int i = 0, err = 0;
2576 2593
2577 ext_debug("truncate since %u to %u\n", start, end); 2594 ext_debug("truncate since %u to %u\n", start, end);
2578 2595
@@ -2604,12 +2621,16 @@ again:
2604 return PTR_ERR(path); 2621 return PTR_ERR(path);
2605 } 2622 }
2606 depth = ext_depth(inode); 2623 depth = ext_depth(inode);
2624 /* Leaf not may not exist only if inode has no blocks at all */
2607 ex = path[depth].p_ext; 2625 ex = path[depth].p_ext;
2608 if (!ex) { 2626 if (!ex) {
2609 ext4_ext_drop_refs(path); 2627 if (depth) {
2610 kfree(path); 2628 EXT4_ERROR_INODE(inode,
2611 path = NULL; 2629 "path[%d].p_hdr == NULL",
2612 goto cont; 2630 depth);
2631 err = -EIO;
2632 }
2633 goto out;
2613 } 2634 }
2614 2635
2615 ee_block = le32_to_cpu(ex->ee_block); 2636 ee_block = le32_to_cpu(ex->ee_block);
@@ -2641,8 +2662,6 @@ again:
2641 goto out; 2662 goto out;
2642 } 2663 }
2643 } 2664 }
2644cont:
2645
2646 /* 2665 /*
2647 * We start scanning from right side, freeing all the blocks 2666 * We start scanning from right side, freeing all the blocks
2648 * after i_size and walking into the tree depth-wise. 2667 * after i_size and walking into the tree depth-wise.
@@ -2924,9 +2943,9 @@ static int ext4_split_extent_at(handle_t *handle,
2924 ext4_ext_mark_initialized(ex); 2943 ext4_ext_mark_initialized(ex);
2925 2944
2926 if (!(flags & EXT4_GET_BLOCKS_PRE_IO)) 2945 if (!(flags & EXT4_GET_BLOCKS_PRE_IO))
2927 ext4_ext_try_to_merge(inode, path, ex); 2946 ext4_ext_try_to_merge(handle, inode, path, ex);
2928 2947
2929 err = ext4_ext_dirty(handle, inode, path + depth); 2948 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
2930 goto out; 2949 goto out;
2931 } 2950 }
2932 2951
@@ -2958,8 +2977,8 @@ static int ext4_split_extent_at(handle_t *handle,
2958 goto fix_extent_len; 2977 goto fix_extent_len;
2959 /* update the extent length and mark as initialized */ 2978 /* update the extent length and mark as initialized */
2960 ex->ee_len = cpu_to_le16(ee_len); 2979 ex->ee_len = cpu_to_le16(ee_len);
2961 ext4_ext_try_to_merge(inode, path, ex); 2980 ext4_ext_try_to_merge(handle, inode, path, ex);
2962 err = ext4_ext_dirty(handle, inode, path + depth); 2981 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
2963 goto out; 2982 goto out;
2964 } else if (err) 2983 } else if (err)
2965 goto fix_extent_len; 2984 goto fix_extent_len;
@@ -3041,7 +3060,6 @@ out:
3041 return err ? err : map->m_len; 3060 return err ? err : map->m_len;
3042} 3061}
3043 3062
3044#define EXT4_EXT_ZERO_LEN 7
3045/* 3063/*
3046 * This function is called by ext4_ext_map_blocks() if someone tries to write 3064 * This function is called by ext4_ext_map_blocks() if someone tries to write
3047 * to an uninitialized extent. It may result in splitting the uninitialized 3065 * to an uninitialized extent. It may result in splitting the uninitialized
@@ -3067,13 +3085,14 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3067 struct ext4_map_blocks *map, 3085 struct ext4_map_blocks *map,
3068 struct ext4_ext_path *path) 3086 struct ext4_ext_path *path)
3069{ 3087{
3088 struct ext4_sb_info *sbi;
3070 struct ext4_extent_header *eh; 3089 struct ext4_extent_header *eh;
3071 struct ext4_map_blocks split_map; 3090 struct ext4_map_blocks split_map;
3072 struct ext4_extent zero_ex; 3091 struct ext4_extent zero_ex;
3073 struct ext4_extent *ex; 3092 struct ext4_extent *ex;
3074 ext4_lblk_t ee_block, eof_block; 3093 ext4_lblk_t ee_block, eof_block;
3075 unsigned int ee_len, depth; 3094 unsigned int ee_len, depth;
3076 int allocated; 3095 int allocated, max_zeroout = 0;
3077 int err = 0; 3096 int err = 0;
3078 int split_flag = 0; 3097 int split_flag = 0;
3079 3098
@@ -3081,6 +3100,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3081 "block %llu, max_blocks %u\n", inode->i_ino, 3100 "block %llu, max_blocks %u\n", inode->i_ino,
3082 (unsigned long long)map->m_lblk, map->m_len); 3101 (unsigned long long)map->m_lblk, map->m_len);
3083 3102
3103 sbi = EXT4_SB(inode->i_sb);
3084 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> 3104 eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
3085 inode->i_sb->s_blocksize_bits; 3105 inode->i_sb->s_blocksize_bits;
3086 if (eof_block < map->m_lblk + map->m_len) 3106 if (eof_block < map->m_lblk + map->m_len)
@@ -3180,9 +3200,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3180 */ 3200 */
3181 split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; 3201 split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0;
3182 3202
3183 /* If extent has less than 2*EXT4_EXT_ZERO_LEN zerout directly */ 3203 if (EXT4_EXT_MAY_ZEROOUT & split_flag)
3184 if (ee_len <= 2*EXT4_EXT_ZERO_LEN && 3204 max_zeroout = sbi->s_extent_max_zeroout_kb >>
3185 (EXT4_EXT_MAY_ZEROOUT & split_flag)) { 3205 inode->i_sb->s_blocksize_bits;
3206
3207 /* If extent is less than s_max_zeroout_kb, zeroout directly */
3208 if (max_zeroout && (ee_len <= max_zeroout)) {
3186 err = ext4_ext_zeroout(inode, ex); 3209 err = ext4_ext_zeroout(inode, ex);
3187 if (err) 3210 if (err)
3188 goto out; 3211 goto out;
@@ -3191,8 +3214,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3191 if (err) 3214 if (err)
3192 goto out; 3215 goto out;
3193 ext4_ext_mark_initialized(ex); 3216 ext4_ext_mark_initialized(ex);
3194 ext4_ext_try_to_merge(inode, path, ex); 3217 ext4_ext_try_to_merge(handle, inode, path, ex);
3195 err = ext4_ext_dirty(handle, inode, path + depth); 3218 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3196 goto out; 3219 goto out;
3197 } 3220 }
3198 3221
@@ -3206,9 +3229,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3206 split_map.m_lblk = map->m_lblk; 3229 split_map.m_lblk = map->m_lblk;
3207 split_map.m_len = map->m_len; 3230 split_map.m_len = map->m_len;
3208 3231
3209 if (allocated > map->m_len) { 3232 if (max_zeroout && (allocated > map->m_len)) {
3210 if (allocated <= EXT4_EXT_ZERO_LEN && 3233 if (allocated <= max_zeroout) {
3211 (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
3212 /* case 3 */ 3234 /* case 3 */
3213 zero_ex.ee_block = 3235 zero_ex.ee_block =
3214 cpu_to_le32(map->m_lblk); 3236 cpu_to_le32(map->m_lblk);
@@ -3220,9 +3242,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3220 goto out; 3242 goto out;
3221 split_map.m_lblk = map->m_lblk; 3243 split_map.m_lblk = map->m_lblk;
3222 split_map.m_len = allocated; 3244 split_map.m_len = allocated;
3223 } else if ((map->m_lblk - ee_block + map->m_len < 3245 } else if (map->m_lblk - ee_block + map->m_len < max_zeroout) {
3224 EXT4_EXT_ZERO_LEN) &&
3225 (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
3226 /* case 2 */ 3246 /* case 2 */
3227 if (map->m_lblk != ee_block) { 3247 if (map->m_lblk != ee_block) {
3228 zero_ex.ee_block = ex->ee_block; 3248 zero_ex.ee_block = ex->ee_block;
@@ -3242,7 +3262,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3242 } 3262 }
3243 3263
3244 allocated = ext4_split_extent(handle, inode, path, 3264 allocated = ext4_split_extent(handle, inode, path,
3245 &split_map, split_flag, 0); 3265 &split_map, split_flag, 0);
3246 if (allocated < 0) 3266 if (allocated < 0)
3247 err = allocated; 3267 err = allocated;
3248 3268
@@ -3256,7 +3276,7 @@ out:
3256 * to an uninitialized extent. 3276 * to an uninitialized extent.
3257 * 3277 *
3258 * Writing to an uninitialized extent may result in splitting the uninitialized 3278 * Writing to an uninitialized extent may result in splitting the uninitialized
3259 * extent into multiple /initialized uninitialized extents (up to three) 3279 * extent into multiple initialized/uninitialized extents (up to three)
3260 * There are three possibilities: 3280 * There are three possibilities:
3261 * a> There is no split required: Entire extent should be uninitialized 3281 * a> There is no split required: Entire extent should be uninitialized
3262 * b> Splits in two extents: Write is happening at either end of the extent 3282 * b> Splits in two extents: Write is happening at either end of the extent
@@ -3333,10 +3353,10 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3333 /* note: ext4_ext_correct_indexes() isn't needed here because 3353 /* note: ext4_ext_correct_indexes() isn't needed here because
3334 * borders are not changed 3354 * borders are not changed
3335 */ 3355 */
3336 ext4_ext_try_to_merge(inode, path, ex); 3356 ext4_ext_try_to_merge(handle, inode, path, ex);
3337 3357
3338 /* Mark modified extent as dirty */ 3358 /* Mark modified extent as dirty */
3339 err = ext4_ext_dirty(handle, inode, path + depth); 3359 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3340out: 3360out:
3341 ext4_ext_show_leaf(inode, path); 3361 ext4_ext_show_leaf(inode, path);
3342 return err; 3362 return err;
@@ -3600,7 +3620,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3600{ 3620{
3601 int ret = 0; 3621 int ret = 0;
3602 int err = 0; 3622 int err = 0;
3603 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3623 ext4_io_end_t *io = ext4_inode_aio(inode);
3604 3624
3605 ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical " 3625 ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical "
3606 "block %llu, max_blocks %u, flags %x, allocated %u\n", 3626 "block %llu, max_blocks %u, flags %x, allocated %u\n",
@@ -3615,6 +3635,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3615 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 3635 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
3616 ret = ext4_split_unwritten_extents(handle, inode, map, 3636 ret = ext4_split_unwritten_extents(handle, inode, map,
3617 path, flags); 3637 path, flags);
3638 if (ret <= 0)
3639 goto out;
3618 /* 3640 /*
3619 * Flag the inode(non aio case) or end_io struct (aio case) 3641 * Flag the inode(non aio case) or end_io struct (aio case)
3620 * that this IO needs to conversion to written when IO is 3642 * that this IO needs to conversion to written when IO is
@@ -3858,8 +3880,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
3858 unsigned int allocated = 0, offset = 0; 3880 unsigned int allocated = 0, offset = 0;
3859 unsigned int allocated_clusters = 0; 3881 unsigned int allocated_clusters = 0;
3860 struct ext4_allocation_request ar; 3882 struct ext4_allocation_request ar;
3861 ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio; 3883 ext4_io_end_t *io = ext4_inode_aio(inode);
3862 ext4_lblk_t cluster_offset; 3884 ext4_lblk_t cluster_offset;
3885 int set_unwritten = 0;
3863 3886
3864 ext_debug("blocks %u/%u requested for inode %lu\n", 3887 ext_debug("blocks %u/%u requested for inode %lu\n",
3865 map->m_lblk, map->m_len, inode->i_ino); 3888 map->m_lblk, map->m_len, inode->i_ino);
@@ -4082,13 +4105,8 @@ got_allocated_blocks:
4082 * For non asycn direct IO case, flag the inode state 4105 * For non asycn direct IO case, flag the inode state
4083 * that we need to perform conversion when IO is done. 4106 * that we need to perform conversion when IO is done.
4084 */ 4107 */
4085 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 4108 if ((flags & EXT4_GET_BLOCKS_PRE_IO))
4086 if (io) 4109 set_unwritten = 1;
4087 ext4_set_io_unwritten_flag(inode, io);
4088 else
4089 ext4_set_inode_state(inode,
4090 EXT4_STATE_DIO_UNWRITTEN);
4091 }
4092 if (ext4_should_dioread_nolock(inode)) 4110 if (ext4_should_dioread_nolock(inode))
4093 map->m_flags |= EXT4_MAP_UNINIT; 4111 map->m_flags |= EXT4_MAP_UNINIT;
4094 } 4112 }
@@ -4100,6 +4118,15 @@ got_allocated_blocks:
4100 if (!err) 4118 if (!err)
4101 err = ext4_ext_insert_extent(handle, inode, path, 4119 err = ext4_ext_insert_extent(handle, inode, path,
4102 &newex, flags); 4120 &newex, flags);
4121
4122 if (!err && set_unwritten) {
4123 if (io)
4124 ext4_set_io_unwritten_flag(inode, io);
4125 else
4126 ext4_set_inode_state(inode,
4127 EXT4_STATE_DIO_UNWRITTEN);
4128 }
4129
4103 if (err && free_on_err) { 4130 if (err && free_on_err) {
4104 int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ? 4131 int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
4105 EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0; 4132 EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;
@@ -4241,7 +4268,7 @@ void ext4_ext_truncate(struct inode *inode)
4241 * finish any pending end_io work so we won't run the risk of 4268 * finish any pending end_io work so we won't run the risk of
4242 * converting any truncated blocks to initialized later 4269 * converting any truncated blocks to initialized later
4243 */ 4270 */
4244 ext4_flush_completed_IO(inode); 4271 ext4_flush_unwritten_io(inode);
4245 4272
4246 /* 4273 /*
4247 * probably first extent we're gonna free will be last in block 4274 * probably first extent we're gonna free will be last in block
@@ -4769,9 +4796,32 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4769 loff_t first_page_offset, last_page_offset; 4796 loff_t first_page_offset, last_page_offset;
4770 int credits, err = 0; 4797 int credits, err = 0;
4771 4798
4799 /*
4800 * Write out all dirty pages to avoid race conditions
4801 * Then release them.
4802 */
4803 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
4804 err = filemap_write_and_wait_range(mapping,
4805 offset, offset + length - 1);
4806
4807 if (err)
4808 return err;
4809 }
4810
4811 mutex_lock(&inode->i_mutex);
4812 /* It's not possible punch hole on append only file */
4813 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
4814 err = -EPERM;
4815 goto out_mutex;
4816 }
4817 if (IS_SWAPFILE(inode)) {
4818 err = -ETXTBSY;
4819 goto out_mutex;
4820 }
4821
4772 /* No need to punch hole beyond i_size */ 4822 /* No need to punch hole beyond i_size */
4773 if (offset >= inode->i_size) 4823 if (offset >= inode->i_size)
4774 return 0; 4824 goto out_mutex;
4775 4825
4776 /* 4826 /*
4777 * If the hole extends beyond i_size, set the hole 4827 * If the hole extends beyond i_size, set the hole
@@ -4789,35 +4839,26 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4789 first_page_offset = first_page << PAGE_CACHE_SHIFT; 4839 first_page_offset = first_page << PAGE_CACHE_SHIFT;
4790 last_page_offset = last_page << PAGE_CACHE_SHIFT; 4840 last_page_offset = last_page << PAGE_CACHE_SHIFT;
4791 4841
4792 /*
4793 * Write out all dirty pages to avoid race conditions
4794 * Then release them.
4795 */
4796 if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
4797 err = filemap_write_and_wait_range(mapping,
4798 offset, offset + length - 1);
4799
4800 if (err)
4801 return err;
4802 }
4803
4804 /* Now release the pages */ 4842 /* Now release the pages */
4805 if (last_page_offset > first_page_offset) { 4843 if (last_page_offset > first_page_offset) {
4806 truncate_pagecache_range(inode, first_page_offset, 4844 truncate_pagecache_range(inode, first_page_offset,
4807 last_page_offset - 1); 4845 last_page_offset - 1);
4808 } 4846 }
4809 4847
4810 /* finish any pending end_io work */ 4848 /* Wait all existing dio workers, newcomers will block on i_mutex */
4811 ext4_flush_completed_IO(inode); 4849 ext4_inode_block_unlocked_dio(inode);
4850 err = ext4_flush_unwritten_io(inode);
4851 if (err)
4852 goto out_dio;
4853 inode_dio_wait(inode);
4812 4854
4813 credits = ext4_writepage_trans_blocks(inode); 4855 credits = ext4_writepage_trans_blocks(inode);
4814 handle = ext4_journal_start(inode, credits); 4856 handle = ext4_journal_start(inode, credits);
4815 if (IS_ERR(handle)) 4857 if (IS_ERR(handle)) {
4816 return PTR_ERR(handle); 4858 err = PTR_ERR(handle);
4859 goto out_dio;
4860 }
4817 4861
4818 err = ext4_orphan_add(handle, inode);
4819 if (err)
4820 goto out;
4821 4862
4822 /* 4863 /*
4823 * Now we need to zero out the non-page-aligned data in the 4864 * Now we need to zero out the non-page-aligned data in the
@@ -4903,10 +4944,13 @@ int ext4_ext_punch_hole(struct file *file, loff_t offset, loff_t length)
4903 up_write(&EXT4_I(inode)->i_data_sem); 4944 up_write(&EXT4_I(inode)->i_data_sem);
4904 4945
4905out: 4946out:
4906 ext4_orphan_del(handle, inode);
4907 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 4947 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4908 ext4_mark_inode_dirty(handle, inode); 4948 ext4_mark_inode_dirty(handle, inode);
4909 ext4_journal_stop(handle); 4949 ext4_journal_stop(handle);
4950out_dio:
4951 ext4_inode_resume_unlocked_dio(inode);
4952out_mutex:
4953 mutex_unlock(&inode->i_mutex);
4910 return err; 4954 return err;
4911} 4955}
4912int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 4956int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 3b0e3bdaabfc..ca6f07afe601 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -55,11 +55,11 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
55 return 0; 55 return 0;
56} 56}
57 57
58static void ext4_aiodio_wait(struct inode *inode) 58void ext4_unwritten_wait(struct inode *inode)
59{ 59{
60 wait_queue_head_t *wq = ext4_ioend_wq(inode); 60 wait_queue_head_t *wq = ext4_ioend_wq(inode);
61 61
62 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_aiodio_unwritten) == 0)); 62 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_unwritten) == 0));
63} 63}
64 64
65/* 65/*
@@ -116,7 +116,7 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
116 "performance will be poor.", 116 "performance will be poor.",
117 inode->i_ino, current->comm); 117 inode->i_ino, current->comm);
118 mutex_lock(ext4_aio_mutex(inode)); 118 mutex_lock(ext4_aio_mutex(inode));
119 ext4_aiodio_wait(inode); 119 ext4_unwritten_wait(inode);
120 } 120 }
121 121
122 BUG_ON(iocb->ki_pos != pos); 122 BUG_ON(iocb->ki_pos != pos);
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 2a1dcea4f12e..be1d89f385b4 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -34,87 +34,6 @@
34 34
35#include <trace/events/ext4.h> 35#include <trace/events/ext4.h>
36 36
37static void dump_completed_IO(struct inode * inode)
38{
39#ifdef EXT4FS_DEBUG
40 struct list_head *cur, *before, *after;
41 ext4_io_end_t *io, *io0, *io1;
42 unsigned long flags;
43
44 if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
45 ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
46 return;
47 }
48
49 ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
50 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
51 list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
52 cur = &io->list;
53 before = cur->prev;
54 io0 = container_of(before, ext4_io_end_t, list);
55 after = cur->next;
56 io1 = container_of(after, ext4_io_end_t, list);
57
58 ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
59 io, inode->i_ino, io0, io1);
60 }
61 spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
62#endif
63}
64
65/*
66 * This function is called from ext4_sync_file().
67 *
68 * When IO is completed, the work to convert unwritten extents to
69 * written is queued on workqueue but may not get immediately
70 * scheduled. When fsync is called, we need to ensure the
71 * conversion is complete before fsync returns.
72 * The inode keeps track of a list of pending/completed IO that
73 * might needs to do the conversion. This function walks through
74 * the list and convert the related unwritten extents for completed IO
75 * to written.
76 * The function return the number of pending IOs on success.
77 */
78int ext4_flush_completed_IO(struct inode *inode)
79{
80 ext4_io_end_t *io;
81 struct ext4_inode_info *ei = EXT4_I(inode);
82 unsigned long flags;
83 int ret = 0;
84 int ret2 = 0;
85
86 dump_completed_IO(inode);
87 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
88 while (!list_empty(&ei->i_completed_io_list)){
89 io = list_entry(ei->i_completed_io_list.next,
90 ext4_io_end_t, list);
91 list_del_init(&io->list);
92 io->flag |= EXT4_IO_END_IN_FSYNC;
93 /*
94 * Calling ext4_end_io_nolock() to convert completed
95 * IO to written.
96 *
97 * When ext4_sync_file() is called, run_queue() may already
98 * about to flush the work corresponding to this io structure.
99 * It will be upset if it founds the io structure related
100 * to the work-to-be schedule is freed.
101 *
102 * Thus we need to keep the io structure still valid here after
103 * conversion finished. The io structure has a flag to
104 * avoid double converting from both fsync and background work
105 * queue work.
106 */
107 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
108 ret = ext4_end_io_nolock(io);
109 if (ret < 0)
110 ret2 = ret;
111 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
112 io->flag &= ~EXT4_IO_END_IN_FSYNC;
113 }
114 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
115 return (ret2 < 0) ? ret2 : 0;
116}
117
118/* 37/*
119 * If we're not journaling and this is a just-created file, we have to 38 * If we're not journaling and this is a just-created file, we have to
120 * sync our parent directory (if it was freshly created) since 39 * sync our parent directory (if it was freshly created) since
@@ -203,7 +122,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
203 struct inode *inode = file->f_mapping->host; 122 struct inode *inode = file->f_mapping->host;
204 struct ext4_inode_info *ei = EXT4_I(inode); 123 struct ext4_inode_info *ei = EXT4_I(inode);
205 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; 124 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
206 int ret; 125 int ret, err;
207 tid_t commit_tid; 126 tid_t commit_tid;
208 bool needs_barrier = false; 127 bool needs_barrier = false;
209 128
@@ -219,7 +138,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
219 if (inode->i_sb->s_flags & MS_RDONLY) 138 if (inode->i_sb->s_flags & MS_RDONLY)
220 goto out; 139 goto out;
221 140
222 ret = ext4_flush_completed_IO(inode); 141 ret = ext4_flush_unwritten_io(inode);
223 if (ret < 0) 142 if (ret < 0)
224 goto out; 143 goto out;
225 144
@@ -255,8 +174,11 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
255 needs_barrier = true; 174 needs_barrier = true;
256 jbd2_log_start_commit(journal, commit_tid); 175 jbd2_log_start_commit(journal, commit_tid);
257 ret = jbd2_log_wait_commit(journal, commit_tid); 176 ret = jbd2_log_wait_commit(journal, commit_tid);
258 if (needs_barrier) 177 if (needs_barrier) {
259 blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); 178 err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
179 if (!ret)
180 ret = err;
181 }
260 out: 182 out:
261 mutex_unlock(&inode->i_mutex); 183 mutex_unlock(&inode->i_mutex);
262 trace_ext4_sync_file_exit(inode, ret); 184 trace_ext4_sync_file_exit(inode, ret);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 26154b81b836..fa36372f3fdf 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -697,6 +697,15 @@ got_group:
697 if (!gdp) 697 if (!gdp)
698 goto fail; 698 goto fail;
699 699
700 /*
701 * Check free inodes count before loading bitmap.
702 */
703 if (ext4_free_inodes_count(sb, gdp) == 0) {
704 if (++group == ngroups)
705 group = 0;
706 continue;
707 }
708
700 brelse(inode_bitmap_bh); 709 brelse(inode_bitmap_bh);
701 inode_bitmap_bh = ext4_read_inode_bitmap(sb, group); 710 inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
702 if (!inode_bitmap_bh) 711 if (!inode_bitmap_bh)
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 830e1b2bf145..792e388e7b44 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -807,16 +807,30 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
807 807
808retry: 808retry:
809 if (rw == READ && ext4_should_dioread_nolock(inode)) { 809 if (rw == READ && ext4_should_dioread_nolock(inode)) {
810 if (unlikely(!list_empty(&ei->i_completed_io_list))) { 810 if (unlikely(atomic_read(&EXT4_I(inode)->i_unwritten))) {
811 mutex_lock(&inode->i_mutex); 811 mutex_lock(&inode->i_mutex);
812 ext4_flush_completed_IO(inode); 812 ext4_flush_unwritten_io(inode);
813 mutex_unlock(&inode->i_mutex); 813 mutex_unlock(&inode->i_mutex);
814 } 814 }
815 /*
816 * Nolock dioread optimization may be dynamically disabled
817 * via ext4_inode_block_unlocked_dio(). Check inode's state
818 * while holding extra i_dio_count ref.
819 */
820 atomic_inc(&inode->i_dio_count);
821 smp_mb();
822 if (unlikely(ext4_test_inode_state(inode,
823 EXT4_STATE_DIOREAD_LOCK))) {
824 inode_dio_done(inode);
825 goto locked;
826 }
815 ret = __blockdev_direct_IO(rw, iocb, inode, 827 ret = __blockdev_direct_IO(rw, iocb, inode,
816 inode->i_sb->s_bdev, iov, 828 inode->i_sb->s_bdev, iov,
817 offset, nr_segs, 829 offset, nr_segs,
818 ext4_get_block, NULL, NULL, 0); 830 ext4_get_block, NULL, NULL, 0);
831 inode_dio_done(inode);
819 } else { 832 } else {
833locked:
820 ret = blockdev_direct_IO(rw, iocb, inode, iov, 834 ret = blockdev_direct_IO(rw, iocb, inode, iov,
821 offset, nr_segs, ext4_get_block); 835 offset, nr_segs, ext4_get_block);
822 836
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c862ee5fe79d..b3c243b9afa5 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -732,11 +732,13 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
732 err = ext4_map_blocks(handle, inode, &map, 732 err = ext4_map_blocks(handle, inode, &map,
733 create ? EXT4_GET_BLOCKS_CREATE : 0); 733 create ? EXT4_GET_BLOCKS_CREATE : 0);
734 734
735 /* ensure we send some value back into *errp */
736 *errp = 0;
737
735 if (err < 0) 738 if (err < 0)
736 *errp = err; 739 *errp = err;
737 if (err <= 0) 740 if (err <= 0)
738 return NULL; 741 return NULL;
739 *errp = 0;
740 742
741 bh = sb_getblk(inode->i_sb, map.m_pblk); 743 bh = sb_getblk(inode->i_sb, map.m_pblk);
742 if (!bh) { 744 if (!bh) {
@@ -1954,9 +1956,6 @@ out:
1954 return ret; 1956 return ret;
1955} 1957}
1956 1958
1957static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
1958static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
1959
1960/* 1959/*
1961 * Note that we don't need to start a transaction unless we're journaling data 1960 * Note that we don't need to start a transaction unless we're journaling data
1962 * because we should have holes filled from ext4_page_mkwrite(). We even don't 1961 * because we should have holes filled from ext4_page_mkwrite(). We even don't
@@ -2463,6 +2462,16 @@ static int ext4_nonda_switch(struct super_block *sb)
2463 free_blocks = EXT4_C2B(sbi, 2462 free_blocks = EXT4_C2B(sbi,
2464 percpu_counter_read_positive(&sbi->s_freeclusters_counter)); 2463 percpu_counter_read_positive(&sbi->s_freeclusters_counter));
2465 dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter); 2464 dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyclusters_counter);
2465 /*
2466 * Start pushing delalloc when 1/2 of free blocks are dirty.
2467 */
2468 if (dirty_blocks && (free_blocks < 2 * dirty_blocks) &&
2469 !writeback_in_progress(sb->s_bdi) &&
2470 down_read_trylock(&sb->s_umount)) {
2471 writeback_inodes_sb(sb, WB_REASON_FS_FREE_SPACE);
2472 up_read(&sb->s_umount);
2473 }
2474
2466 if (2 * free_blocks < 3 * dirty_blocks || 2475 if (2 * free_blocks < 3 * dirty_blocks ||
2467 free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) { 2476 free_blocks < (dirty_blocks + EXT4_FREECLUSTERS_WATERMARK)) {
2468 /* 2477 /*
@@ -2471,13 +2480,6 @@ static int ext4_nonda_switch(struct super_block *sb)
2471 */ 2480 */
2472 return 1; 2481 return 1;
2473 } 2482 }
2474 /*
2475 * Even if we don't switch but are nearing capacity,
2476 * start pushing delalloc when 1/2 of free blocks are dirty.
2477 */
2478 if (free_blocks < 2 * dirty_blocks)
2479 writeback_inodes_sb_if_idle(sb, WB_REASON_FS_FREE_SPACE);
2480
2481 return 0; 2483 return 0;
2482} 2484}
2483 2485
@@ -2879,9 +2881,6 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
2879{ 2881{
2880 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode; 2882 struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
2881 ext4_io_end_t *io_end = iocb->private; 2883 ext4_io_end_t *io_end = iocb->private;
2882 struct workqueue_struct *wq;
2883 unsigned long flags;
2884 struct ext4_inode_info *ei;
2885 2884
2886 /* if not async direct IO or dio with 0 bytes write, just return */ 2885 /* if not async direct IO or dio with 0 bytes write, just return */
2887 if (!io_end || !size) 2886 if (!io_end || !size)
@@ -2910,24 +2909,14 @@ out:
2910 io_end->iocb = iocb; 2909 io_end->iocb = iocb;
2911 io_end->result = ret; 2910 io_end->result = ret;
2912 } 2911 }
2913 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
2914 2912
2915 /* Add the io_end to per-inode completed aio dio list*/ 2913 ext4_add_complete_io(io_end);
2916 ei = EXT4_I(io_end->inode);
2917 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
2918 list_add_tail(&io_end->list, &ei->i_completed_io_list);
2919 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
2920
2921 /* queue the work to convert unwritten extents to written */
2922 queue_work(wq, &io_end->work);
2923} 2914}
2924 2915
2925static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate) 2916static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
2926{ 2917{
2927 ext4_io_end_t *io_end = bh->b_private; 2918 ext4_io_end_t *io_end = bh->b_private;
2928 struct workqueue_struct *wq;
2929 struct inode *inode; 2919 struct inode *inode;
2930 unsigned long flags;
2931 2920
2932 if (!test_clear_buffer_uninit(bh) || !io_end) 2921 if (!test_clear_buffer_uninit(bh) || !io_end)
2933 goto out; 2922 goto out;
@@ -2946,15 +2935,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
2946 */ 2935 */
2947 inode = io_end->inode; 2936 inode = io_end->inode;
2948 ext4_set_io_unwritten_flag(inode, io_end); 2937 ext4_set_io_unwritten_flag(inode, io_end);
2949 2938 ext4_add_complete_io(io_end);
2950 /* Add the io_end to per-inode completed io list*/
2951 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
2952 list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
2953 spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
2954
2955 wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
2956 /* queue the work to convert unwritten extents to written */
2957 queue_work(wq, &io_end->work);
2958out: 2939out:
2959 bh->b_private = NULL; 2940 bh->b_private = NULL;
2960 bh->b_end_io = NULL; 2941 bh->b_end_io = NULL;
@@ -3029,6 +3010,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3029 overwrite = *((int *)iocb->private); 3010 overwrite = *((int *)iocb->private);
3030 3011
3031 if (overwrite) { 3012 if (overwrite) {
3013 atomic_inc(&inode->i_dio_count);
3032 down_read(&EXT4_I(inode)->i_data_sem); 3014 down_read(&EXT4_I(inode)->i_data_sem);
3033 mutex_unlock(&inode->i_mutex); 3015 mutex_unlock(&inode->i_mutex);
3034 } 3016 }
@@ -3054,7 +3036,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3054 * hook to the iocb. 3036 * hook to the iocb.
3055 */ 3037 */
3056 iocb->private = NULL; 3038 iocb->private = NULL;
3057 EXT4_I(inode)->cur_aio_dio = NULL; 3039 ext4_inode_aio_set(inode, NULL);
3058 if (!is_sync_kiocb(iocb)) { 3040 if (!is_sync_kiocb(iocb)) {
3059 ext4_io_end_t *io_end = 3041 ext4_io_end_t *io_end =
3060 ext4_init_io_end(inode, GFP_NOFS); 3042 ext4_init_io_end(inode, GFP_NOFS);
@@ -3071,7 +3053,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3071 * is a unwritten extents needs to be converted 3053 * is a unwritten extents needs to be converted
3072 * when IO is completed. 3054 * when IO is completed.
3073 */ 3055 */
3074 EXT4_I(inode)->cur_aio_dio = iocb->private; 3056 ext4_inode_aio_set(inode, io_end);
3075 } 3057 }
3076 3058
3077 if (overwrite) 3059 if (overwrite)
@@ -3091,7 +3073,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3091 NULL, 3073 NULL,
3092 DIO_LOCKING); 3074 DIO_LOCKING);
3093 if (iocb->private) 3075 if (iocb->private)
3094 EXT4_I(inode)->cur_aio_dio = NULL; 3076 ext4_inode_aio_set(inode, NULL);
3095 /* 3077 /*
3096 * The io_end structure takes a reference to the inode, 3078 * The io_end structure takes a reference to the inode,
3097 * that structure needs to be destroyed and the 3079 * that structure needs to be destroyed and the
@@ -3126,6 +3108,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3126 retake_lock: 3108 retake_lock:
3127 /* take i_mutex locking again if we do a ovewrite dio */ 3109 /* take i_mutex locking again if we do a ovewrite dio */
3128 if (overwrite) { 3110 if (overwrite) {
3111 inode_dio_done(inode);
3129 up_read(&EXT4_I(inode)->i_data_sem); 3112 up_read(&EXT4_I(inode)->i_data_sem);
3130 mutex_lock(&inode->i_mutex); 3113 mutex_lock(&inode->i_mutex);
3131 } 3114 }
@@ -4052,6 +4035,7 @@ static int ext4_do_update_inode(handle_t *handle,
4052 struct ext4_inode_info *ei = EXT4_I(inode); 4035 struct ext4_inode_info *ei = EXT4_I(inode);
4053 struct buffer_head *bh = iloc->bh; 4036 struct buffer_head *bh = iloc->bh;
4054 int err = 0, rc, block; 4037 int err = 0, rc, block;
4038 int need_datasync = 0;
4055 uid_t i_uid; 4039 uid_t i_uid;
4056 gid_t i_gid; 4040 gid_t i_gid;
4057 4041
@@ -4102,7 +4086,10 @@ static int ext4_do_update_inode(handle_t *handle,
4102 raw_inode->i_file_acl_high = 4086 raw_inode->i_file_acl_high =
4103 cpu_to_le16(ei->i_file_acl >> 32); 4087 cpu_to_le16(ei->i_file_acl >> 32);
4104 raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); 4088 raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl);
4105 ext4_isize_set(raw_inode, ei->i_disksize); 4089 if (ei->i_disksize != ext4_isize(raw_inode)) {
4090 ext4_isize_set(raw_inode, ei->i_disksize);
4091 need_datasync = 1;
4092 }
4106 if (ei->i_disksize > 0x7fffffffULL) { 4093 if (ei->i_disksize > 0x7fffffffULL) {
4107 struct super_block *sb = inode->i_sb; 4094 struct super_block *sb = inode->i_sb;
4108 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 4095 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
@@ -4155,7 +4142,7 @@ static int ext4_do_update_inode(handle_t *handle,
4155 err = rc; 4142 err = rc;
4156 ext4_clear_inode_state(inode, EXT4_STATE_NEW); 4143 ext4_clear_inode_state(inode, EXT4_STATE_NEW);
4157 4144
4158 ext4_update_inode_fsync_trans(handle, inode, 0); 4145 ext4_update_inode_fsync_trans(handle, inode, need_datasync);
4159out_brelse: 4146out_brelse:
4160 brelse(bh); 4147 brelse(bh);
4161 ext4_std_error(inode->i_sb, err); 4148 ext4_std_error(inode->i_sb, err);
@@ -4298,7 +4285,6 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4298 } 4285 }
4299 4286
4300 if (attr->ia_valid & ATTR_SIZE) { 4287 if (attr->ia_valid & ATTR_SIZE) {
4301 inode_dio_wait(inode);
4302 4288
4303 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { 4289 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
4304 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 4290 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -4347,8 +4333,17 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4347 } 4333 }
4348 4334
4349 if (attr->ia_valid & ATTR_SIZE) { 4335 if (attr->ia_valid & ATTR_SIZE) {
4350 if (attr->ia_size != i_size_read(inode)) 4336 if (attr->ia_size != i_size_read(inode)) {
4351 truncate_setsize(inode, attr->ia_size); 4337 truncate_setsize(inode, attr->ia_size);
4338 /* Inode size will be reduced, wait for dio in flight.
4339 * Temporarily disable dioread_nolock to prevent
4340 * livelock. */
4341 if (orphan) {
4342 ext4_inode_block_unlocked_dio(inode);
4343 inode_dio_wait(inode);
4344 ext4_inode_resume_unlocked_dio(inode);
4345 }
4346 }
4352 ext4_truncate(inode); 4347 ext4_truncate(inode);
4353 } 4348 }
4354 4349
@@ -4727,6 +4722,10 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
4727 return err; 4722 return err;
4728 } 4723 }
4729 4724
4725 /* Wait for all existing dio workers */
4726 ext4_inode_block_unlocked_dio(inode);
4727 inode_dio_wait(inode);
4728
4730 jbd2_journal_lock_updates(journal); 4729 jbd2_journal_lock_updates(journal);
4731 4730
4732 /* 4731 /*
@@ -4746,6 +4745,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
4746 ext4_set_aops(inode); 4745 ext4_set_aops(inode);
4747 4746
4748 jbd2_journal_unlock_updates(journal); 4747 jbd2_journal_unlock_updates(journal);
4748 ext4_inode_resume_unlocked_dio(inode);
4749 4749
4750 /* Finally we can mark the inode as dirty. */ 4750 /* Finally we can mark the inode as dirty. */
4751 4751
@@ -4780,6 +4780,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
4780 int retries = 0; 4780 int retries = 0;
4781 4781
4782 sb_start_pagefault(inode->i_sb); 4782 sb_start_pagefault(inode->i_sb);
4783 file_update_time(vma->vm_file);
4783 /* Delalloc case is easy... */ 4784 /* Delalloc case is easy... */
4784 if (test_opt(inode->i_sb, DELALLOC) && 4785 if (test_opt(inode->i_sb, DELALLOC) &&
4785 !ext4_should_journal_data(inode) && 4786 !ext4_should_journal_data(inode) &&
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 5439d6a56e99..5747f52f7c72 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -366,26 +366,11 @@ group_add_out:
366 return -EOPNOTSUPP; 366 return -EOPNOTSUPP;
367 } 367 }
368 368
369 if (EXT4_HAS_INCOMPAT_FEATURE(sb,
370 EXT4_FEATURE_INCOMPAT_META_BG)) {
371 ext4_msg(sb, KERN_ERR,
372 "Online resizing not (yet) supported with meta_bg");
373 return -EOPNOTSUPP;
374 }
375
376 if (copy_from_user(&n_blocks_count, (__u64 __user *)arg, 369 if (copy_from_user(&n_blocks_count, (__u64 __user *)arg,
377 sizeof(__u64))) { 370 sizeof(__u64))) {
378 return -EFAULT; 371 return -EFAULT;
379 } 372 }
380 373
381 if (n_blocks_count > MAX_32_NUM &&
382 !EXT4_HAS_INCOMPAT_FEATURE(sb,
383 EXT4_FEATURE_INCOMPAT_64BIT)) {
384 ext4_msg(sb, KERN_ERR,
385 "File system only supports 32-bit block numbers");
386 return -EOPNOTSUPP;
387 }
388
389 err = ext4_resize_begin(sb); 374 err = ext4_resize_begin(sb);
390 if (err) 375 if (err)
391 return err; 376 return err;
@@ -420,13 +405,6 @@ resizefs_out:
420 if (!blk_queue_discard(q)) 405 if (!blk_queue_discard(q))
421 return -EOPNOTSUPP; 406 return -EOPNOTSUPP;
422 407
423 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
424 EXT4_FEATURE_RO_COMPAT_BIGALLOC)) {
425 ext4_msg(sb, KERN_ERR,
426 "FITRIM not supported with bigalloc");
427 return -EOPNOTSUPP;
428 }
429
430 if (copy_from_user(&range, (struct fstrim_range __user *)arg, 408 if (copy_from_user(&range, (struct fstrim_range __user *)arg,
431 sizeof(range))) 409 sizeof(range)))
432 return -EFAULT; 410 return -EFAULT;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 08778f6cdfe9..f8b27bf80aca 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -24,6 +24,7 @@
24#include "ext4_jbd2.h" 24#include "ext4_jbd2.h"
25#include "mballoc.h" 25#include "mballoc.h"
26#include <linux/debugfs.h> 26#include <linux/debugfs.h>
27#include <linux/log2.h>
27#include <linux/slab.h> 28#include <linux/slab.h>
28#include <trace/events/ext4.h> 29#include <trace/events/ext4.h>
29 30
@@ -1338,17 +1339,17 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1338 mb_check_buddy(e4b); 1339 mb_check_buddy(e4b);
1339} 1340}
1340 1341
1341static int mb_find_extent(struct ext4_buddy *e4b, int order, int block, 1342static int mb_find_extent(struct ext4_buddy *e4b, int block,
1342 int needed, struct ext4_free_extent *ex) 1343 int needed, struct ext4_free_extent *ex)
1343{ 1344{
1344 int next = block; 1345 int next = block;
1345 int max; 1346 int max, order;
1346 void *buddy; 1347 void *buddy;
1347 1348
1348 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group)); 1349 assert_spin_locked(ext4_group_lock_ptr(e4b->bd_sb, e4b->bd_group));
1349 BUG_ON(ex == NULL); 1350 BUG_ON(ex == NULL);
1350 1351
1351 buddy = mb_find_buddy(e4b, order, &max); 1352 buddy = mb_find_buddy(e4b, 0, &max);
1352 BUG_ON(buddy == NULL); 1353 BUG_ON(buddy == NULL);
1353 BUG_ON(block >= max); 1354 BUG_ON(block >= max);
1354 if (mb_test_bit(block, buddy)) { 1355 if (mb_test_bit(block, buddy)) {
@@ -1358,12 +1359,9 @@ static int mb_find_extent(struct ext4_buddy *e4b, int order, int block,
1358 return 0; 1359 return 0;
1359 } 1360 }
1360 1361
1361 /* FIXME dorp order completely ? */ 1362 /* find actual order */
1362 if (likely(order == 0)) { 1363 order = mb_find_order_for_block(e4b, block);
1363 /* find actual order */ 1364 block = block >> order;
1364 order = mb_find_order_for_block(e4b, block);
1365 block = block >> order;
1366 }
1367 1365
1368 ex->fe_len = 1 << order; 1366 ex->fe_len = 1 << order;
1369 ex->fe_start = block << order; 1367 ex->fe_start = block << order;
@@ -1549,7 +1547,7 @@ static void ext4_mb_check_limits(struct ext4_allocation_context *ac,
1549 /* recheck chunk's availability - we don't know 1547 /* recheck chunk's availability - we don't know
1550 * when it was found (within this lock-unlock 1548 * when it was found (within this lock-unlock
1551 * period or not) */ 1549 * period or not) */
1552 max = mb_find_extent(e4b, 0, bex->fe_start, gex->fe_len, &ex); 1550 max = mb_find_extent(e4b, bex->fe_start, gex->fe_len, &ex);
1553 if (max >= gex->fe_len) { 1551 if (max >= gex->fe_len) {
1554 ext4_mb_use_best_found(ac, e4b); 1552 ext4_mb_use_best_found(ac, e4b);
1555 return; 1553 return;
@@ -1641,7 +1639,7 @@ int ext4_mb_try_best_found(struct ext4_allocation_context *ac,
1641 return err; 1639 return err;
1642 1640
1643 ext4_lock_group(ac->ac_sb, group); 1641 ext4_lock_group(ac->ac_sb, group);
1644 max = mb_find_extent(e4b, 0, ex.fe_start, ex.fe_len, &ex); 1642 max = mb_find_extent(e4b, ex.fe_start, ex.fe_len, &ex);
1645 1643
1646 if (max > 0) { 1644 if (max > 0) {
1647 ac->ac_b_ex = ex; 1645 ac->ac_b_ex = ex;
@@ -1662,17 +1660,20 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
1662 int max; 1660 int max;
1663 int err; 1661 int err;
1664 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb); 1662 struct ext4_sb_info *sbi = EXT4_SB(ac->ac_sb);
1663 struct ext4_group_info *grp = ext4_get_group_info(ac->ac_sb, group);
1665 struct ext4_free_extent ex; 1664 struct ext4_free_extent ex;
1666 1665
1667 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL)) 1666 if (!(ac->ac_flags & EXT4_MB_HINT_TRY_GOAL))
1668 return 0; 1667 return 0;
1668 if (grp->bb_free == 0)
1669 return 0;
1669 1670
1670 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b); 1671 err = ext4_mb_load_buddy(ac->ac_sb, group, e4b);
1671 if (err) 1672 if (err)
1672 return err; 1673 return err;
1673 1674
1674 ext4_lock_group(ac->ac_sb, group); 1675 ext4_lock_group(ac->ac_sb, group);
1675 max = mb_find_extent(e4b, 0, ac->ac_g_ex.fe_start, 1676 max = mb_find_extent(e4b, ac->ac_g_ex.fe_start,
1676 ac->ac_g_ex.fe_len, &ex); 1677 ac->ac_g_ex.fe_len, &ex);
1677 1678
1678 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { 1679 if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
@@ -1788,7 +1789,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac,
1788 break; 1789 break;
1789 } 1790 }
1790 1791
1791 mb_find_extent(e4b, 0, i, ac->ac_g_ex.fe_len, &ex); 1792 mb_find_extent(e4b, i, ac->ac_g_ex.fe_len, &ex);
1792 BUG_ON(ex.fe_len <= 0); 1793 BUG_ON(ex.fe_len <= 0);
1793 if (free < ex.fe_len) { 1794 if (free < ex.fe_len) {
1794 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0, 1795 ext4_grp_locked_error(sb, e4b->bd_group, 0, 0,
@@ -1840,7 +1841,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac,
1840 1841
1841 while (i < EXT4_CLUSTERS_PER_GROUP(sb)) { 1842 while (i < EXT4_CLUSTERS_PER_GROUP(sb)) {
1842 if (!mb_test_bit(i, bitmap)) { 1843 if (!mb_test_bit(i, bitmap)) {
1843 max = mb_find_extent(e4b, 0, i, sbi->s_stripe, &ex); 1844 max = mb_find_extent(e4b, i, sbi->s_stripe, &ex);
1844 if (max >= sbi->s_stripe) { 1845 if (max >= sbi->s_stripe) {
1845 ac->ac_found++; 1846 ac->ac_found++;
1846 ac->ac_b_ex = ex; 1847 ac->ac_b_ex = ex;
@@ -1862,6 +1863,12 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1862 1863
1863 BUG_ON(cr < 0 || cr >= 4); 1864 BUG_ON(cr < 0 || cr >= 4);
1864 1865
1866 free = grp->bb_free;
1867 if (free == 0)
1868 return 0;
1869 if (cr <= 2 && free < ac->ac_g_ex.fe_len)
1870 return 0;
1871
1865 /* We only do this if the grp has never been initialized */ 1872 /* We only do this if the grp has never been initialized */
1866 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) { 1873 if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
1867 int ret = ext4_mb_init_group(ac->ac_sb, group); 1874 int ret = ext4_mb_init_group(ac->ac_sb, group);
@@ -1869,10 +1876,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
1869 return 0; 1876 return 0;
1870 } 1877 }
1871 1878
1872 free = grp->bb_free;
1873 fragments = grp->bb_fragments; 1879 fragments = grp->bb_fragments;
1874 if (free == 0)
1875 return 0;
1876 if (fragments == 0) 1880 if (fragments == 0)
1877 return 0; 1881 return 0;
1878 1882
@@ -2163,6 +2167,39 @@ static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
2163 return cachep; 2167 return cachep;
2164} 2168}
2165 2169
2170/*
2171 * Allocate the top-level s_group_info array for the specified number
2172 * of groups
2173 */
2174int ext4_mb_alloc_groupinfo(struct super_block *sb, ext4_group_t ngroups)
2175{
2176 struct ext4_sb_info *sbi = EXT4_SB(sb);
2177 unsigned size;
2178 struct ext4_group_info ***new_groupinfo;
2179
2180 size = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 1) >>
2181 EXT4_DESC_PER_BLOCK_BITS(sb);
2182 if (size <= sbi->s_group_info_size)
2183 return 0;
2184
2185 size = roundup_pow_of_two(sizeof(*sbi->s_group_info) * size);
2186 new_groupinfo = ext4_kvzalloc(size, GFP_KERNEL);
2187 if (!new_groupinfo) {
2188 ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
2189 return -ENOMEM;
2190 }
2191 if (sbi->s_group_info) {
2192 memcpy(new_groupinfo, sbi->s_group_info,
2193 sbi->s_group_info_size * sizeof(*sbi->s_group_info));
2194 ext4_kvfree(sbi->s_group_info);
2195 }
2196 sbi->s_group_info = new_groupinfo;
2197 sbi->s_group_info_size = size / sizeof(*sbi->s_group_info);
2198 ext4_debug("allocated s_groupinfo array for %d meta_bg's\n",
2199 sbi->s_group_info_size);
2200 return 0;
2201}
2202
2166/* Create and initialize ext4_group_info data for the given group. */ 2203/* Create and initialize ext4_group_info data for the given group. */
2167int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, 2204int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2168 struct ext4_group_desc *desc) 2205 struct ext4_group_desc *desc)
@@ -2195,12 +2232,11 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
2195 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)]; 2232 sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
2196 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1); 2233 i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
2197 2234
2198 meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL); 2235 meta_group_info[i] = kmem_cache_zalloc(cachep, GFP_KERNEL);
2199 if (meta_group_info[i] == NULL) { 2236 if (meta_group_info[i] == NULL) {
2200 ext4_msg(sb, KERN_ERR, "can't allocate buddy mem"); 2237 ext4_msg(sb, KERN_ERR, "can't allocate buddy mem");
2201 goto exit_group_info; 2238 goto exit_group_info;
2202 } 2239 }
2203 memset(meta_group_info[i], 0, kmem_cache_size(cachep));
2204 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, 2240 set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
2205 &(meta_group_info[i]->bb_state)); 2241 &(meta_group_info[i]->bb_state));
2206 2242
@@ -2252,49 +2288,14 @@ static int ext4_mb_init_backend(struct super_block *sb)
2252 ext4_group_t ngroups = ext4_get_groups_count(sb); 2288 ext4_group_t ngroups = ext4_get_groups_count(sb);
2253 ext4_group_t i; 2289 ext4_group_t i;
2254 struct ext4_sb_info *sbi = EXT4_SB(sb); 2290 struct ext4_sb_info *sbi = EXT4_SB(sb);
2255 struct ext4_super_block *es = sbi->s_es; 2291 int err;
2256 int num_meta_group_infos;
2257 int num_meta_group_infos_max;
2258 int array_size;
2259 struct ext4_group_desc *desc; 2292 struct ext4_group_desc *desc;
2260 struct kmem_cache *cachep; 2293 struct kmem_cache *cachep;
2261 2294
2262 /* This is the number of blocks used by GDT */ 2295 err = ext4_mb_alloc_groupinfo(sb, ngroups);
2263 num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) - 2296 if (err)
2264 1) >> EXT4_DESC_PER_BLOCK_BITS(sb); 2297 return err;
2265
2266 /*
2267 * This is the total number of blocks used by GDT including
2268 * the number of reserved blocks for GDT.
2269 * The s_group_info array is allocated with this value
2270 * to allow a clean online resize without a complex
2271 * manipulation of pointer.
2272 * The drawback is the unused memory when no resize
2273 * occurs but it's very low in terms of pages
2274 * (see comments below)
2275 * Need to handle this properly when META_BG resizing is allowed
2276 */
2277 num_meta_group_infos_max = num_meta_group_infos +
2278 le16_to_cpu(es->s_reserved_gdt_blocks);
2279 2298
2280 /*
2281 * array_size is the size of s_group_info array. We round it
2282 * to the next power of two because this approximation is done
2283 * internally by kmalloc so we can have some more memory
2284 * for free here (e.g. may be used for META_BG resize).
2285 */
2286 array_size = 1;
2287 while (array_size < sizeof(*sbi->s_group_info) *
2288 num_meta_group_infos_max)
2289 array_size = array_size << 1;
2290 /* An 8TB filesystem with 64-bit pointers requires a 4096 byte
2291 * kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
2292 * So a two level scheme suffices for now. */
2293 sbi->s_group_info = ext4_kvzalloc(array_size, GFP_KERNEL);
2294 if (sbi->s_group_info == NULL) {
2295 ext4_msg(sb, KERN_ERR, "can't allocate buddy meta group");
2296 return -ENOMEM;
2297 }
2298 sbi->s_buddy_cache = new_inode(sb); 2299 sbi->s_buddy_cache = new_inode(sb);
2299 if (sbi->s_buddy_cache == NULL) { 2300 if (sbi->s_buddy_cache == NULL) {
2300 ext4_msg(sb, KERN_ERR, "can't get new inode"); 2301 ext4_msg(sb, KERN_ERR, "can't get new inode");
@@ -2322,7 +2323,7 @@ err_freebuddy:
2322 cachep = get_groupinfo_cache(sb->s_blocksize_bits); 2323 cachep = get_groupinfo_cache(sb->s_blocksize_bits);
2323 while (i-- > 0) 2324 while (i-- > 0)
2324 kmem_cache_free(cachep, ext4_get_group_info(sb, i)); 2325 kmem_cache_free(cachep, ext4_get_group_info(sb, i));
2325 i = num_meta_group_infos; 2326 i = sbi->s_group_info_size;
2326 while (i-- > 0) 2327 while (i-- > 0)
2327 kfree(sbi->s_group_info[i]); 2328 kfree(sbi->s_group_info[i]);
2328 iput(sbi->s_buddy_cache); 2329 iput(sbi->s_buddy_cache);
@@ -4008,7 +4009,6 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4008 ext4_get_group_no_and_offset(sb, goal, &group, &block); 4009 ext4_get_group_no_and_offset(sb, goal, &group, &block);
4009 4010
4010 /* set up allocation goals */ 4011 /* set up allocation goals */
4011 memset(ac, 0, sizeof(struct ext4_allocation_context));
4012 ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1); 4012 ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1);
4013 ac->ac_status = AC_STATUS_CONTINUE; 4013 ac->ac_status = AC_STATUS_CONTINUE;
4014 ac->ac_sb = sb; 4014 ac->ac_sb = sb;
@@ -4291,7 +4291,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4291 } 4291 }
4292 } 4292 }
4293 4293
4294 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4294 ac = kmem_cache_zalloc(ext4_ac_cachep, GFP_NOFS);
4295 if (!ac) { 4295 if (!ac) {
4296 ar->len = 0; 4296 ar->len = 0;
4297 *errp = -ENOMEM; 4297 *errp = -ENOMEM;
@@ -4657,6 +4657,8 @@ do_more:
4657 * with group lock held. generate_buddy look at 4657 * with group lock held. generate_buddy look at
4658 * them with group lock_held 4658 * them with group lock_held
4659 */ 4659 */
4660 if (test_opt(sb, DISCARD))
4661 ext4_issue_discard(sb, block_group, bit, count);
4660 ext4_lock_group(sb, block_group); 4662 ext4_lock_group(sb, block_group);
4661 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); 4663 mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
4662 mb_free_blocks(inode, &e4b, bit, count_clusters); 4664 mb_free_blocks(inode, &e4b, bit, count_clusters);
@@ -4988,7 +4990,8 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
4988 4990
4989 start = range->start >> sb->s_blocksize_bits; 4991 start = range->start >> sb->s_blocksize_bits;
4990 end = start + (range->len >> sb->s_blocksize_bits) - 1; 4992 end = start + (range->len >> sb->s_blocksize_bits) - 1;
4991 minlen = range->minlen >> sb->s_blocksize_bits; 4993 minlen = EXT4_NUM_B2C(EXT4_SB(sb),
4994 range->minlen >> sb->s_blocksize_bits);
4992 4995
4993 if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) || 4996 if (unlikely(minlen > EXT4_CLUSTERS_PER_GROUP(sb)) ||
4994 unlikely(start >= max_blks)) 4997 unlikely(start >= max_blks))
@@ -5048,6 +5051,6 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
5048 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen); 5051 atomic_set(&EXT4_SB(sb)->s_last_trim_minblks, minlen);
5049 5052
5050out: 5053out:
5051 range->len = trimmed * sb->s_blocksize; 5054 range->len = EXT4_C2B(EXT4_SB(sb), trimmed) << sb->s_blocksize_bits;
5052 return ret; 5055 return ret;
5053} 5056}
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index c070618c21ce..3ccd889ba953 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -65,11 +65,6 @@ extern u8 mb_enable_debug;
65#define MB_DEFAULT_MIN_TO_SCAN 10 65#define MB_DEFAULT_MIN_TO_SCAN 10
66 66
67/* 67/*
68 * How many groups mballoc will scan looking for the best chunk
69 */
70#define MB_DEFAULT_MAX_GROUPS_TO_SCAN 5
71
72/*
73 * with 'ext4_mb_stats' allocator will collect stats that will be 68 * with 'ext4_mb_stats' allocator will collect stats that will be
74 * shown at umount. The collecting costs though! 69 * shown at umount. The collecting costs though!
75 */ 70 */
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index c5826c623e7a..292daeeed455 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -141,55 +141,21 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
141} 141}
142 142
143/** 143/**
144 * mext_check_null_inode - NULL check for two inodes
145 *
146 * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
147 */
148static int
149mext_check_null_inode(struct inode *inode1, struct inode *inode2,
150 const char *function, unsigned int line)
151{
152 int ret = 0;
153
154 if (inode1 == NULL) {
155 __ext4_error(inode2->i_sb, function, line,
156 "Both inodes should not be NULL: "
157 "inode1 NULL inode2 %lu", inode2->i_ino);
158 ret = -EIO;
159 } else if (inode2 == NULL) {
160 __ext4_error(inode1->i_sb, function, line,
161 "Both inodes should not be NULL: "
162 "inode1 %lu inode2 NULL", inode1->i_ino);
163 ret = -EIO;
164 }
165 return ret;
166}
167
168/**
169 * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem 144 * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem
170 * 145 *
171 * @orig_inode: original inode structure 146 * Acquire write lock of i_data_sem of the two inodes
172 * @donor_inode: donor inode structure
173 * Acquire write lock of i_data_sem of the two inodes (orig and donor) by
174 * i_ino order.
175 */ 147 */
176static void 148static void
177double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode) 149double_down_write_data_sem(struct inode *first, struct inode *second)
178{ 150{
179 struct inode *first = orig_inode, *second = donor_inode; 151 if (first < second) {
152 down_write(&EXT4_I(first)->i_data_sem);
153 down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
154 } else {
155 down_write(&EXT4_I(second)->i_data_sem);
156 down_write_nested(&EXT4_I(first)->i_data_sem, SINGLE_DEPTH_NESTING);
180 157
181 /*
182 * Use the inode number to provide the stable locking order instead
183 * of its address, because the C language doesn't guarantee you can
184 * compare pointers that don't come from the same array.
185 */
186 if (donor_inode->i_ino < orig_inode->i_ino) {
187 first = donor_inode;
188 second = orig_inode;
189 } 158 }
190
191 down_write(&EXT4_I(first)->i_data_sem);
192 down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
193} 159}
194 160
195/** 161/**
@@ -604,9 +570,8 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
604 diff = donor_off - le32_to_cpu(tmp_dext->ee_block); 570 diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
605 571
606 ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff); 572 ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff);
607 tmp_dext->ee_block = 573 le32_add_cpu(&tmp_dext->ee_block, diff);
608 cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff); 574 le16_add_cpu(&tmp_dext->ee_len, -diff);
609 tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff);
610 575
611 if (max_count < ext4_ext_get_actual_len(tmp_dext)) 576 if (max_count < ext4_ext_get_actual_len(tmp_dext))
612 tmp_dext->ee_len = cpu_to_le16(max_count); 577 tmp_dext->ee_len = cpu_to_le16(max_count);
@@ -629,6 +594,43 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
629} 594}
630 595
631/** 596/**
597 * mext_check_coverage - Check that all extents in range has the same type
598 *
599 * @inode: inode in question
600 * @from: block offset of inode
601 * @count: block count to be checked
602 * @uninit: extents expected to be uninitialized
603 * @err: pointer to save error value
604 *
605 * Return 1 if all extents in range has expected type, and zero otherwise.
606 */
607static int
608mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
609 int uninit, int *err)
610{
611 struct ext4_ext_path *path = NULL;
612 struct ext4_extent *ext;
613 ext4_lblk_t last = from + count;
614 while (from < last) {
615 *err = get_ext_path(inode, from, &path);
616 if (*err)
617 return 0;
618 ext = path[ext_depth(inode)].p_ext;
619 if (!ext) {
620 ext4_ext_drop_refs(path);
621 return 0;
622 }
623 if (uninit != ext4_ext_is_uninitialized(ext)) {
624 ext4_ext_drop_refs(path);
625 return 0;
626 }
627 from += ext4_ext_get_actual_len(ext);
628 ext4_ext_drop_refs(path);
629 }
630 return 1;
631}
632
633/**
632 * mext_replace_branches - Replace original extents with new extents 634 * mext_replace_branches - Replace original extents with new extents
633 * 635 *
634 * @handle: journal handle 636 * @handle: journal handle
@@ -663,9 +665,6 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
663 int replaced_count = 0; 665 int replaced_count = 0;
664 int dext_alen; 666 int dext_alen;
665 667
666 /* Protect extent trees against block allocations via delalloc */
667 double_down_write_data_sem(orig_inode, donor_inode);
668
669 /* Get the original extent for the block "orig_off" */ 668 /* Get the original extent for the block "orig_off" */
670 *err = get_ext_path(orig_inode, orig_off, &orig_path); 669 *err = get_ext_path(orig_inode, orig_off, &orig_path);
671 if (*err) 670 if (*err)
@@ -764,12 +763,122 @@ out:
764 ext4_ext_invalidate_cache(orig_inode); 763 ext4_ext_invalidate_cache(orig_inode);
765 ext4_ext_invalidate_cache(donor_inode); 764 ext4_ext_invalidate_cache(donor_inode);
766 765
767 double_up_write_data_sem(orig_inode, donor_inode);
768
769 return replaced_count; 766 return replaced_count;
770} 767}
771 768
772/** 769/**
770 * mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2
771 *
772 * @inode1: the inode structure
773 * @inode2: the inode structure
774 * @index: page index
775 * @page: result page vector
776 *
777 * Grab two locked pages for inode's by inode order
778 */
779static int
780mext_page_double_lock(struct inode *inode1, struct inode *inode2,
781 pgoff_t index, struct page *page[2])
782{
783 struct address_space *mapping[2];
784 unsigned fl = AOP_FLAG_NOFS;
785
786 BUG_ON(!inode1 || !inode2);
787 if (inode1 < inode2) {
788 mapping[0] = inode1->i_mapping;
789 mapping[1] = inode2->i_mapping;
790 } else {
791 mapping[0] = inode2->i_mapping;
792 mapping[1] = inode1->i_mapping;
793 }
794
795 page[0] = grab_cache_page_write_begin(mapping[0], index, fl);
796 if (!page[0])
797 return -ENOMEM;
798
799 page[1] = grab_cache_page_write_begin(mapping[1], index, fl);
800 if (!page[1]) {
801 unlock_page(page[0]);
802 page_cache_release(page[0]);
803 return -ENOMEM;
804 }
805
806 if (inode1 > inode2) {
807 struct page *tmp;
808 tmp = page[0];
809 page[0] = page[1];
810 page[1] = tmp;
811 }
812 return 0;
813}
814
815/* Force page buffers uptodate w/o dropping page's lock */
816static int
817mext_page_mkuptodate(struct page *page, unsigned from, unsigned to)
818{
819 struct inode *inode = page->mapping->host;
820 sector_t block;
821 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
822 unsigned int blocksize, block_start, block_end;
823 int i, err, nr = 0, partial = 0;
824 BUG_ON(!PageLocked(page));
825 BUG_ON(PageWriteback(page));
826
827 if (PageUptodate(page))
828 return 0;
829
830 blocksize = 1 << inode->i_blkbits;
831 if (!page_has_buffers(page))
832 create_empty_buffers(page, blocksize, 0);
833
834 head = page_buffers(page);
835 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
836 for (bh = head, block_start = 0; bh != head || !block_start;
837 block++, block_start = block_end, bh = bh->b_this_page) {
838 block_end = block_start + blocksize;
839 if (block_end <= from || block_start >= to) {
840 if (!buffer_uptodate(bh))
841 partial = 1;
842 continue;
843 }
844 if (buffer_uptodate(bh))
845 continue;
846 if (!buffer_mapped(bh)) {
847 int err = 0;
848 err = ext4_get_block(inode, block, bh, 0);
849 if (err) {
850 SetPageError(page);
851 return err;
852 }
853 if (!buffer_mapped(bh)) {
854 zero_user(page, block_start, blocksize);
855 if (!err)
856 set_buffer_uptodate(bh);
857 continue;
858 }
859 }
860 BUG_ON(nr >= MAX_BUF_PER_PAGE);
861 arr[nr++] = bh;
862 }
863 /* No io required */
864 if (!nr)
865 goto out;
866
867 for (i = 0; i < nr; i++) {
868 bh = arr[i];
869 if (!bh_uptodate_or_lock(bh)) {
870 err = bh_submit_read(bh);
871 if (err)
872 return err;
873 }
874 }
875out:
876 if (!partial)
877 SetPageUptodate(page);
878 return 0;
879}
880
881/**
773 * move_extent_per_page - Move extent data per page 882 * move_extent_per_page - Move extent data per page
774 * 883 *
775 * @o_filp: file structure of original file 884 * @o_filp: file structure of original file
@@ -791,26 +900,24 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
791 int block_len_in_page, int uninit, int *err) 900 int block_len_in_page, int uninit, int *err)
792{ 901{
793 struct inode *orig_inode = o_filp->f_dentry->d_inode; 902 struct inode *orig_inode = o_filp->f_dentry->d_inode;
794 struct address_space *mapping = orig_inode->i_mapping; 903 struct page *pagep[2] = {NULL, NULL};
795 struct buffer_head *bh;
796 struct page *page = NULL;
797 const struct address_space_operations *a_ops = mapping->a_ops;
798 handle_t *handle; 904 handle_t *handle;
799 ext4_lblk_t orig_blk_offset; 905 ext4_lblk_t orig_blk_offset;
800 long long offs = orig_page_offset << PAGE_CACHE_SHIFT; 906 long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
801 unsigned long blocksize = orig_inode->i_sb->s_blocksize; 907 unsigned long blocksize = orig_inode->i_sb->s_blocksize;
802 unsigned int w_flags = 0; 908 unsigned int w_flags = 0;
803 unsigned int tmp_data_size, data_size, replaced_size; 909 unsigned int tmp_data_size, data_size, replaced_size;
804 void *fsdata; 910 int err2, jblocks, retries = 0;
805 int i, jblocks;
806 int err2 = 0;
807 int replaced_count = 0; 911 int replaced_count = 0;
912 int from = data_offset_in_page << orig_inode->i_blkbits;
808 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; 913 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
809 914
810 /* 915 /*
811 * It needs twice the amount of ordinary journal buffers because 916 * It needs twice the amount of ordinary journal buffers because
812 * inode and donor_inode may change each different metadata blocks. 917 * inode and donor_inode may change each different metadata blocks.
813 */ 918 */
919again:
920 *err = 0;
814 jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; 921 jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
815 handle = ext4_journal_start(orig_inode, jblocks); 922 handle = ext4_journal_start(orig_inode, jblocks);
816 if (IS_ERR(handle)) { 923 if (IS_ERR(handle)) {
@@ -824,19 +931,6 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
824 orig_blk_offset = orig_page_offset * blocks_per_page + 931 orig_blk_offset = orig_page_offset * blocks_per_page +
825 data_offset_in_page; 932 data_offset_in_page;
826 933
827 /*
828 * If orig extent is uninitialized one,
829 * it's not necessary force the page into memory
830 * and then force it to be written out again.
831 * Just swap data blocks between orig and donor.
832 */
833 if (uninit) {
834 replaced_count = mext_replace_branches(handle, orig_inode,
835 donor_inode, orig_blk_offset,
836 block_len_in_page, err);
837 goto out2;
838 }
839
840 offs = (long long)orig_blk_offset << orig_inode->i_blkbits; 934 offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
841 935
842 /* Calculate data_size */ 936 /* Calculate data_size */
@@ -858,75 +952,120 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
858 952
859 replaced_size = data_size; 953 replaced_size = data_size;
860 954
861 *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags, 955 *err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset,
862 &page, &fsdata); 956 pagep);
863 if (unlikely(*err < 0)) 957 if (unlikely(*err < 0))
864 goto out; 958 goto stop_journal;
865
866 if (!PageUptodate(page)) {
867 mapping->a_ops->readpage(o_filp, page);
868 lock_page(page);
869 }
870
871 /* 959 /*
872 * try_to_release_page() doesn't call releasepage in writeback mode. 960 * If orig extent was uninitialized it can become initialized
873 * We should care about the order of writing to the same file 961 * at any time after i_data_sem was dropped, in order to
874 * by multiple move extent processes. 962 * serialize with delalloc we have recheck extent while we
875 * It needs to call wait_on_page_writeback() to wait for the 963 * hold page's lock, if it is still the case data copy is not
876 * writeback of the page. 964 * necessary, just swap data blocks between orig and donor.
877 */ 965 */
878 wait_on_page_writeback(page); 966 if (uninit) {
967 double_down_write_data_sem(orig_inode, donor_inode);
968 /* If any of extents in range became initialized we have to
969 * fallback to data copying */
970 uninit = mext_check_coverage(orig_inode, orig_blk_offset,
971 block_len_in_page, 1, err);
972 if (*err)
973 goto drop_data_sem;
879 974
880 /* Release old bh and drop refs */ 975 uninit &= mext_check_coverage(donor_inode, orig_blk_offset,
881 try_to_release_page(page, 0); 976 block_len_in_page, 1, err);
977 if (*err)
978 goto drop_data_sem;
979
980 if (!uninit) {
981 double_up_write_data_sem(orig_inode, donor_inode);
982 goto data_copy;
983 }
984 if ((page_has_private(pagep[0]) &&
985 !try_to_release_page(pagep[0], 0)) ||
986 (page_has_private(pagep[1]) &&
987 !try_to_release_page(pagep[1], 0))) {
988 *err = -EBUSY;
989 goto drop_data_sem;
990 }
991 replaced_count = mext_replace_branches(handle, orig_inode,
992 donor_inode, orig_blk_offset,
993 block_len_in_page, err);
994 drop_data_sem:
995 double_up_write_data_sem(orig_inode, donor_inode);
996 goto unlock_pages;
997 }
998data_copy:
999 *err = mext_page_mkuptodate(pagep[0], from, from + replaced_size);
1000 if (*err)
1001 goto unlock_pages;
1002
1003 /* At this point all buffers in range are uptodate, old mapping layout
1004 * is no longer required, try to drop it now. */
1005 if ((page_has_private(pagep[0]) && !try_to_release_page(pagep[0], 0)) ||
1006 (page_has_private(pagep[1]) && !try_to_release_page(pagep[1], 0))) {
1007 *err = -EBUSY;
1008 goto unlock_pages;
1009 }
882 1010
883 replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, 1011 replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
884 orig_blk_offset, block_len_in_page, 1012 orig_blk_offset,
885 &err2); 1013 block_len_in_page, err);
886 if (err2) { 1014 if (*err) {
887 if (replaced_count) { 1015 if (replaced_count) {
888 block_len_in_page = replaced_count; 1016 block_len_in_page = replaced_count;
889 replaced_size = 1017 replaced_size =
890 block_len_in_page << orig_inode->i_blkbits; 1018 block_len_in_page << orig_inode->i_blkbits;
891 } else 1019 } else
892 goto out; 1020 goto unlock_pages;
893 } 1021 }
1022 /* Perform all necessary steps similar write_begin()/write_end()
1023 * but keeping in mind that i_size will not change */
1024 *err = __block_write_begin(pagep[0], from, from + replaced_size,
1025 ext4_get_block);
1026 if (!*err)
1027 *err = block_commit_write(pagep[0], from, from + replaced_size);
894 1028
895 if (!page_has_buffers(page)) 1029 if (unlikely(*err < 0))
896 create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0); 1030 goto repair_branches;
897 1031
898 bh = page_buffers(page); 1032 /* Even in case of data=writeback it is reasonable to pin
899 for (i = 0; i < data_offset_in_page; i++) 1033 * inode to transaction, to prevent unexpected data loss */
900 bh = bh->b_this_page; 1034 *err = ext4_jbd2_file_inode(handle, orig_inode);
901 1035
902 for (i = 0; i < block_len_in_page; i++) { 1036unlock_pages:
903 *err = ext4_get_block(orig_inode, 1037 unlock_page(pagep[0]);
904 (sector_t)(orig_blk_offset + i), bh, 0); 1038 page_cache_release(pagep[0]);
905 if (*err < 0) 1039 unlock_page(pagep[1]);
906 goto out; 1040 page_cache_release(pagep[1]);
907 1041stop_journal:
908 if (bh->b_this_page != NULL)
909 bh = bh->b_this_page;
910 }
911
912 *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size,
913 page, fsdata);
914 page = NULL;
915
916out:
917 if (unlikely(page)) {
918 if (PageLocked(page))
919 unlock_page(page);
920 page_cache_release(page);
921 ext4_journal_stop(handle);
922 }
923out2:
924 ext4_journal_stop(handle); 1042 ext4_journal_stop(handle);
925 1043 /* Buffer was busy because probably is pinned to journal transaction,
926 if (err2) 1044 * force transaction commit may help to free it. */
927 *err = err2; 1045 if (*err == -EBUSY && ext4_should_retry_alloc(orig_inode->i_sb,
928 1046 &retries))
1047 goto again;
929 return replaced_count; 1048 return replaced_count;
1049
1050repair_branches:
1051 /*
1052 * This should never ever happen!
1053 * Extents are swapped already, but we are not able to copy data.
1054 * Try to swap extents to it's original places
1055 */
1056 double_down_write_data_sem(orig_inode, donor_inode);
1057 replaced_count = mext_replace_branches(handle, donor_inode, orig_inode,
1058 orig_blk_offset,
1059 block_len_in_page, &err2);
1060 double_up_write_data_sem(orig_inode, donor_inode);
1061 if (replaced_count != block_len_in_page) {
1062 EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset),
1063 "Unable to copy data block,"
1064 " data will be lost.");
1065 *err = -EIO;
1066 }
1067 replaced_count = 0;
1068 goto unlock_pages;
930} 1069}
931 1070
932/** 1071/**
@@ -969,14 +1108,6 @@ mext_check_arguments(struct inode *orig_inode,
969 return -EINVAL; 1108 return -EINVAL;
970 } 1109 }
971 1110
972 /* Files should be in the same ext4 FS */
973 if (orig_inode->i_sb != donor_inode->i_sb) {
974 ext4_debug("ext4 move extent: The argument files "
975 "should be in same FS [ino:orig %lu, donor %lu]\n",
976 orig_inode->i_ino, donor_inode->i_ino);
977 return -EINVAL;
978 }
979
980 /* Ext4 move extent supports only extent based file */ 1111 /* Ext4 move extent supports only extent based file */
981 if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) { 1112 if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) {
982 ext4_debug("ext4 move extent: orig file is not extents " 1113 ext4_debug("ext4 move extent: orig file is not extents "
@@ -1002,7 +1133,6 @@ mext_check_arguments(struct inode *orig_inode,
1002 } 1133 }
1003 1134
1004 if ((orig_start >= EXT_MAX_BLOCKS) || 1135 if ((orig_start >= EXT_MAX_BLOCKS) ||
1005 (donor_start >= EXT_MAX_BLOCKS) ||
1006 (*len > EXT_MAX_BLOCKS) || 1136 (*len > EXT_MAX_BLOCKS) ||
1007 (orig_start + *len >= EXT_MAX_BLOCKS)) { 1137 (orig_start + *len >= EXT_MAX_BLOCKS)) {
1008 ext4_debug("ext4 move extent: Can't handle over [%u] blocks " 1138 ext4_debug("ext4 move extent: Can't handle over [%u] blocks "
@@ -1072,35 +1202,19 @@ mext_check_arguments(struct inode *orig_inode,
1072 * @inode1: the inode structure 1202 * @inode1: the inode structure
1073 * @inode2: the inode structure 1203 * @inode2: the inode structure
1074 * 1204 *
1075 * Lock two inodes' i_mutex by i_ino order. 1205 * Lock two inodes' i_mutex
1076 * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
1077 */ 1206 */
1078static int 1207static void
1079mext_inode_double_lock(struct inode *inode1, struct inode *inode2) 1208mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
1080{ 1209{
1081 int ret = 0; 1210 BUG_ON(inode1 == inode2);
1082 1211 if (inode1 < inode2) {
1083 BUG_ON(inode1 == NULL && inode2 == NULL);
1084
1085 ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
1086 if (ret < 0)
1087 goto out;
1088
1089 if (inode1 == inode2) {
1090 mutex_lock(&inode1->i_mutex);
1091 goto out;
1092 }
1093
1094 if (inode1->i_ino < inode2->i_ino) {
1095 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); 1212 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
1096 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); 1213 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
1097 } else { 1214 } else {
1098 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); 1215 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
1099 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); 1216 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
1100 } 1217 }
1101
1102out:
1103 return ret;
1104} 1218}
1105 1219
1106/** 1220/**
@@ -1109,28 +1223,13 @@ out:
1109 * @inode1: the inode that is released first 1223 * @inode1: the inode that is released first
1110 * @inode2: the inode that is released second 1224 * @inode2: the inode that is released second
1111 * 1225 *
1112 * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
1113 */ 1226 */
1114 1227
1115static int 1228static void
1116mext_inode_double_unlock(struct inode *inode1, struct inode *inode2) 1229mext_inode_double_unlock(struct inode *inode1, struct inode *inode2)
1117{ 1230{
1118 int ret = 0; 1231 mutex_unlock(&inode1->i_mutex);
1119 1232 mutex_unlock(&inode2->i_mutex);
1120 BUG_ON(inode1 == NULL && inode2 == NULL);
1121
1122 ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
1123 if (ret < 0)
1124 goto out;
1125
1126 if (inode1)
1127 mutex_unlock(&inode1->i_mutex);
1128
1129 if (inode2 && inode2 != inode1)
1130 mutex_unlock(&inode2->i_mutex);
1131
1132out:
1133 return ret;
1134} 1233}
1135 1234
1136/** 1235/**
@@ -1187,16 +1286,23 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1187 ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0; 1286 ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0;
1188 ext4_lblk_t rest_blocks; 1287 ext4_lblk_t rest_blocks;
1189 pgoff_t orig_page_offset = 0, seq_end_page; 1288 pgoff_t orig_page_offset = 0, seq_end_page;
1190 int ret1, ret2, depth, last_extent = 0; 1289 int ret, depth, last_extent = 0;
1191 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; 1290 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
1192 int data_offset_in_page; 1291 int data_offset_in_page;
1193 int block_len_in_page; 1292 int block_len_in_page;
1194 int uninit; 1293 int uninit;
1195 1294
1196 /* orig and donor should be different file */ 1295 if (orig_inode->i_sb != donor_inode->i_sb) {
1197 if (orig_inode->i_ino == donor_inode->i_ino) { 1296 ext4_debug("ext4 move extent: The argument files "
1297 "should be in same FS [ino:orig %lu, donor %lu]\n",
1298 orig_inode->i_ino, donor_inode->i_ino);
1299 return -EINVAL;
1300 }
1301
1302 /* orig and donor should be different inodes */
1303 if (orig_inode == donor_inode) {
1198 ext4_debug("ext4 move extent: The argument files should not " 1304 ext4_debug("ext4 move extent: The argument files should not "
1199 "be same file [ino:orig %lu, donor %lu]\n", 1305 "be same inode [ino:orig %lu, donor %lu]\n",
1200 orig_inode->i_ino, donor_inode->i_ino); 1306 orig_inode->i_ino, donor_inode->i_ino);
1201 return -EINVAL; 1307 return -EINVAL;
1202 } 1308 }
@@ -1208,18 +1314,27 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1208 orig_inode->i_ino, donor_inode->i_ino); 1314 orig_inode->i_ino, donor_inode->i_ino);
1209 return -EINVAL; 1315 return -EINVAL;
1210 } 1316 }
1211 1317 /* TODO: This is non obvious task to swap blocks for inodes with full
1318 jornaling enabled */
1319 if (ext4_should_journal_data(orig_inode) ||
1320 ext4_should_journal_data(donor_inode)) {
1321 return -EINVAL;
1322 }
1212 /* Protect orig and donor inodes against a truncate */ 1323 /* Protect orig and donor inodes against a truncate */
1213 ret1 = mext_inode_double_lock(orig_inode, donor_inode); 1324 mext_inode_double_lock(orig_inode, donor_inode);
1214 if (ret1 < 0) 1325
1215 return ret1; 1326 /* Wait for all existing dio workers */
1327 ext4_inode_block_unlocked_dio(orig_inode);
1328 ext4_inode_block_unlocked_dio(donor_inode);
1329 inode_dio_wait(orig_inode);
1330 inode_dio_wait(donor_inode);
1216 1331
1217 /* Protect extent tree against block allocations via delalloc */ 1332 /* Protect extent tree against block allocations via delalloc */
1218 double_down_write_data_sem(orig_inode, donor_inode); 1333 double_down_write_data_sem(orig_inode, donor_inode);
1219 /* Check the filesystem environment whether move_extent can be done */ 1334 /* Check the filesystem environment whether move_extent can be done */
1220 ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start, 1335 ret = mext_check_arguments(orig_inode, donor_inode, orig_start,
1221 donor_start, &len); 1336 donor_start, &len);
1222 if (ret1) 1337 if (ret)
1223 goto out; 1338 goto out;
1224 1339
1225 file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; 1340 file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits;
@@ -1227,13 +1342,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1227 if (file_end < block_end) 1342 if (file_end < block_end)
1228 len -= block_end - file_end; 1343 len -= block_end - file_end;
1229 1344
1230 ret1 = get_ext_path(orig_inode, block_start, &orig_path); 1345 ret = get_ext_path(orig_inode, block_start, &orig_path);
1231 if (ret1) 1346 if (ret)
1232 goto out; 1347 goto out;
1233 1348
1234 /* Get path structure to check the hole */ 1349 /* Get path structure to check the hole */
1235 ret1 = get_ext_path(orig_inode, block_start, &holecheck_path); 1350 ret = get_ext_path(orig_inode, block_start, &holecheck_path);
1236 if (ret1) 1351 if (ret)
1237 goto out; 1352 goto out;
1238 1353
1239 depth = ext_depth(orig_inode); 1354 depth = ext_depth(orig_inode);
@@ -1252,13 +1367,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1252 last_extent = mext_next_extent(orig_inode, 1367 last_extent = mext_next_extent(orig_inode,
1253 holecheck_path, &ext_cur); 1368 holecheck_path, &ext_cur);
1254 if (last_extent < 0) { 1369 if (last_extent < 0) {
1255 ret1 = last_extent; 1370 ret = last_extent;
1256 goto out; 1371 goto out;
1257 } 1372 }
1258 last_extent = mext_next_extent(orig_inode, orig_path, 1373 last_extent = mext_next_extent(orig_inode, orig_path,
1259 &ext_dummy); 1374 &ext_dummy);
1260 if (last_extent < 0) { 1375 if (last_extent < 0) {
1261 ret1 = last_extent; 1376 ret = last_extent;
1262 goto out; 1377 goto out;
1263 } 1378 }
1264 seq_start = le32_to_cpu(ext_cur->ee_block); 1379 seq_start = le32_to_cpu(ext_cur->ee_block);
@@ -1272,7 +1387,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1272 if (le32_to_cpu(ext_cur->ee_block) > block_end) { 1387 if (le32_to_cpu(ext_cur->ee_block) > block_end) {
1273 ext4_debug("ext4 move extent: The specified range of file " 1388 ext4_debug("ext4 move extent: The specified range of file "
1274 "may be the hole\n"); 1389 "may be the hole\n");
1275 ret1 = -EINVAL; 1390 ret = -EINVAL;
1276 goto out; 1391 goto out;
1277 } 1392 }
1278 1393
@@ -1292,7 +1407,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1292 last_extent = mext_next_extent(orig_inode, holecheck_path, 1407 last_extent = mext_next_extent(orig_inode, holecheck_path,
1293 &ext_cur); 1408 &ext_cur);
1294 if (last_extent < 0) { 1409 if (last_extent < 0) {
1295 ret1 = last_extent; 1410 ret = last_extent;
1296 break; 1411 break;
1297 } 1412 }
1298 add_blocks = ext4_ext_get_actual_len(ext_cur); 1413 add_blocks = ext4_ext_get_actual_len(ext_cur);
@@ -1349,18 +1464,18 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1349 orig_page_offset, 1464 orig_page_offset,
1350 data_offset_in_page, 1465 data_offset_in_page,
1351 block_len_in_page, uninit, 1466 block_len_in_page, uninit,
1352 &ret1); 1467 &ret);
1353 1468
1354 /* Count how many blocks we have exchanged */ 1469 /* Count how many blocks we have exchanged */
1355 *moved_len += block_len_in_page; 1470 *moved_len += block_len_in_page;
1356 if (ret1 < 0) 1471 if (ret < 0)
1357 break; 1472 break;
1358 if (*moved_len > len) { 1473 if (*moved_len > len) {
1359 EXT4_ERROR_INODE(orig_inode, 1474 EXT4_ERROR_INODE(orig_inode,
1360 "We replaced blocks too much! " 1475 "We replaced blocks too much! "
1361 "sum of replaced: %llu requested: %llu", 1476 "sum of replaced: %llu requested: %llu",
1362 *moved_len, len); 1477 *moved_len, len);
1363 ret1 = -EIO; 1478 ret = -EIO;
1364 break; 1479 break;
1365 } 1480 }
1366 1481
@@ -1374,22 +1489,22 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1374 } 1489 }
1375 1490
1376 double_down_write_data_sem(orig_inode, donor_inode); 1491 double_down_write_data_sem(orig_inode, donor_inode);
1377 if (ret1 < 0) 1492 if (ret < 0)
1378 break; 1493 break;
1379 1494
1380 /* Decrease buffer counter */ 1495 /* Decrease buffer counter */
1381 if (holecheck_path) 1496 if (holecheck_path)
1382 ext4_ext_drop_refs(holecheck_path); 1497 ext4_ext_drop_refs(holecheck_path);
1383 ret1 = get_ext_path(orig_inode, seq_start, &holecheck_path); 1498 ret = get_ext_path(orig_inode, seq_start, &holecheck_path);
1384 if (ret1) 1499 if (ret)
1385 break; 1500 break;
1386 depth = holecheck_path->p_depth; 1501 depth = holecheck_path->p_depth;
1387 1502
1388 /* Decrease buffer counter */ 1503 /* Decrease buffer counter */
1389 if (orig_path) 1504 if (orig_path)
1390 ext4_ext_drop_refs(orig_path); 1505 ext4_ext_drop_refs(orig_path);
1391 ret1 = get_ext_path(orig_inode, seq_start, &orig_path); 1506 ret = get_ext_path(orig_inode, seq_start, &orig_path);
1392 if (ret1) 1507 if (ret)
1393 break; 1508 break;
1394 1509
1395 ext_cur = holecheck_path[depth].p_ext; 1510 ext_cur = holecheck_path[depth].p_ext;
@@ -1412,12 +1527,9 @@ out:
1412 kfree(holecheck_path); 1527 kfree(holecheck_path);
1413 } 1528 }
1414 double_up_write_data_sem(orig_inode, donor_inode); 1529 double_up_write_data_sem(orig_inode, donor_inode);
1415 ret2 = mext_inode_double_unlock(orig_inode, donor_inode); 1530 ext4_inode_resume_unlocked_dio(orig_inode);
1416 1531 ext4_inode_resume_unlocked_dio(donor_inode);
1417 if (ret1) 1532 mext_inode_double_unlock(orig_inode, donor_inode);
1418 return ret1;
1419 else if (ret2)
1420 return ret2;
1421 1533
1422 return 0; 1534 return ret;
1423} 1535}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 2a42cc04466f..6d600a69fc9d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -55,6 +55,13 @@ static struct buffer_head *ext4_append(handle_t *handle,
55{ 55{
56 struct buffer_head *bh; 56 struct buffer_head *bh;
57 57
58 if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
59 ((inode->i_size >> 10) >=
60 EXT4_SB(inode->i_sb)->s_max_dir_size_kb))) {
61 *err = -ENOSPC;
62 return NULL;
63 }
64
58 *block = inode->i_size >> inode->i_sb->s_blocksize_bits; 65 *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
59 66
60 bh = ext4_bread(handle, inode, *block, 1, err); 67 bh = ext4_bread(handle, inode, *block, 1, err);
@@ -67,6 +74,12 @@ static struct buffer_head *ext4_append(handle_t *handle,
67 bh = NULL; 74 bh = NULL;
68 } 75 }
69 } 76 }
77 if (!bh && !(*err)) {
78 *err = -EIO;
79 ext4_error(inode->i_sb,
80 "Directory hole detected on inode %lu\n",
81 inode->i_ino);
82 }
70 return bh; 83 return bh;
71} 84}
72 85
@@ -594,8 +607,11 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
594 u32 hash; 607 u32 hash;
595 608
596 frame->bh = NULL; 609 frame->bh = NULL;
597 if (!(bh = ext4_bread (NULL,dir, 0, 0, err))) 610 if (!(bh = ext4_bread(NULL, dir, 0, 0, err))) {
611 if (*err == 0)
612 *err = ERR_BAD_DX_DIR;
598 goto fail; 613 goto fail;
614 }
599 root = (struct dx_root *) bh->b_data; 615 root = (struct dx_root *) bh->b_data;
600 if (root->info.hash_version != DX_HASH_TEA && 616 if (root->info.hash_version != DX_HASH_TEA &&
601 root->info.hash_version != DX_HASH_HALF_MD4 && 617 root->info.hash_version != DX_HASH_HALF_MD4 &&
@@ -696,8 +712,11 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
696 frame->entries = entries; 712 frame->entries = entries;
697 frame->at = at; 713 frame->at = at;
698 if (!indirect--) return frame; 714 if (!indirect--) return frame;
699 if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err))) 715 if (!(bh = ext4_bread(NULL, dir, dx_get_block(at), 0, err))) {
716 if (!(*err))
717 *err = ERR_BAD_DX_DIR;
700 goto fail2; 718 goto fail2;
719 }
701 at = entries = ((struct dx_node *) bh->b_data)->entries; 720 at = entries = ((struct dx_node *) bh->b_data)->entries;
702 721
703 if (!buffer_verified(bh) && 722 if (!buffer_verified(bh) &&
@@ -807,8 +826,15 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
807 */ 826 */
808 while (num_frames--) { 827 while (num_frames--) {
809 if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at), 828 if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at),
810 0, &err))) 829 0, &err))) {
830 if (!err) {
831 ext4_error(dir->i_sb,
832 "Directory hole detected on inode %lu\n",
833 dir->i_ino);
834 return -EIO;
835 }
811 return err; /* Failure */ 836 return err; /* Failure */
837 }
812 838
813 if (!buffer_verified(bh) && 839 if (!buffer_verified(bh) &&
814 !ext4_dx_csum_verify(dir, 840 !ext4_dx_csum_verify(dir,
@@ -839,12 +865,19 @@ static int htree_dirblock_to_tree(struct file *dir_file,
839{ 865{
840 struct buffer_head *bh; 866 struct buffer_head *bh;
841 struct ext4_dir_entry_2 *de, *top; 867 struct ext4_dir_entry_2 *de, *top;
842 int err, count = 0; 868 int err = 0, count = 0;
843 869
844 dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n", 870 dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
845 (unsigned long)block)); 871 (unsigned long)block));
846 if (!(bh = ext4_bread (NULL, dir, block, 0, &err))) 872 if (!(bh = ext4_bread(NULL, dir, block, 0, &err))) {
873 if (!err) {
874 err = -EIO;
875 ext4_error(dir->i_sb,
876 "Directory hole detected on inode %lu\n",
877 dir->i_ino);
878 }
847 return err; 879 return err;
880 }
848 881
849 if (!buffer_verified(bh) && 882 if (!buffer_verified(bh) &&
850 !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) 883 !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data))
@@ -1267,8 +1300,15 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
1267 return NULL; 1300 return NULL;
1268 do { 1301 do {
1269 block = dx_get_block(frame->at); 1302 block = dx_get_block(frame->at);
1270 if (!(bh = ext4_bread(NULL, dir, block, 0, err))) 1303 if (!(bh = ext4_bread(NULL, dir, block, 0, err))) {
1304 if (!(*err)) {
1305 *err = -EIO;
1306 ext4_error(dir->i_sb,
1307 "Directory hole detected on inode %lu\n",
1308 dir->i_ino);
1309 }
1271 goto errout; 1310 goto errout;
1311 }
1272 1312
1273 if (!buffer_verified(bh) && 1313 if (!buffer_verified(bh) &&
1274 !ext4_dirent_csum_verify(dir, 1314 !ext4_dirent_csum_verify(dir,
@@ -1801,9 +1841,15 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1801 } 1841 }
1802 blocks = dir->i_size >> sb->s_blocksize_bits; 1842 blocks = dir->i_size >> sb->s_blocksize_bits;
1803 for (block = 0; block < blocks; block++) { 1843 for (block = 0; block < blocks; block++) {
1804 bh = ext4_bread(handle, dir, block, 0, &retval); 1844 if (!(bh = ext4_bread(handle, dir, block, 0, &retval))) {
1805 if(!bh) 1845 if (!retval) {
1846 retval = -EIO;
1847 ext4_error(inode->i_sb,
1848 "Directory hole detected on inode %lu\n",
1849 inode->i_ino);
1850 }
1806 return retval; 1851 return retval;
1852 }
1807 if (!buffer_verified(bh) && 1853 if (!buffer_verified(bh) &&
1808 !ext4_dirent_csum_verify(dir, 1854 !ext4_dirent_csum_verify(dir,
1809 (struct ext4_dir_entry *)bh->b_data)) 1855 (struct ext4_dir_entry *)bh->b_data))
@@ -1860,8 +1906,15 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1860 entries = frame->entries; 1906 entries = frame->entries;
1861 at = frame->at; 1907 at = frame->at;
1862 1908
1863 if (!(bh = ext4_bread(handle,dir, dx_get_block(frame->at), 0, &err))) 1909 if (!(bh = ext4_bread(handle, dir, dx_get_block(frame->at), 0, &err))) {
1910 if (!err) {
1911 err = -EIO;
1912 ext4_error(dir->i_sb,
1913 "Directory hole detected on inode %lu\n",
1914 dir->i_ino);
1915 }
1864 goto cleanup; 1916 goto cleanup;
1917 }
1865 1918
1866 if (!buffer_verified(bh) && 1919 if (!buffer_verified(bh) &&
1867 !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) 1920 !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data))
@@ -2149,9 +2202,7 @@ retry:
2149 err = PTR_ERR(inode); 2202 err = PTR_ERR(inode);
2150 if (!IS_ERR(inode)) { 2203 if (!IS_ERR(inode)) {
2151 init_special_inode(inode, inode->i_mode, rdev); 2204 init_special_inode(inode, inode->i_mode, rdev);
2152#ifdef CONFIG_EXT4_FS_XATTR
2153 inode->i_op = &ext4_special_inode_operations; 2205 inode->i_op = &ext4_special_inode_operations;
2154#endif
2155 err = ext4_add_nondir(handle, dentry, inode); 2206 err = ext4_add_nondir(handle, dentry, inode);
2156 } 2207 }
2157 ext4_journal_stop(handle); 2208 ext4_journal_stop(handle);
@@ -2199,9 +2250,15 @@ retry:
2199 inode->i_op = &ext4_dir_inode_operations; 2250 inode->i_op = &ext4_dir_inode_operations;
2200 inode->i_fop = &ext4_dir_operations; 2251 inode->i_fop = &ext4_dir_operations;
2201 inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; 2252 inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
2202 dir_block = ext4_bread(handle, inode, 0, 1, &err); 2253 if (!(dir_block = ext4_bread(handle, inode, 0, 1, &err))) {
2203 if (!dir_block) 2254 if (!err) {
2255 err = -EIO;
2256 ext4_error(inode->i_sb,
2257 "Directory hole detected on inode %lu\n",
2258 inode->i_ino);
2259 }
2204 goto out_clear_inode; 2260 goto out_clear_inode;
2261 }
2205 BUFFER_TRACE(dir_block, "get_write_access"); 2262 BUFFER_TRACE(dir_block, "get_write_access");
2206 err = ext4_journal_get_write_access(handle, dir_block); 2263 err = ext4_journal_get_write_access(handle, dir_block);
2207 if (err) 2264 if (err)
@@ -2318,6 +2375,11 @@ static int empty_dir(struct inode *inode)
2318 EXT4_ERROR_INODE(inode, 2375 EXT4_ERROR_INODE(inode,
2319 "error %d reading directory " 2376 "error %d reading directory "
2320 "lblock %u", err, lblock); 2377 "lblock %u", err, lblock);
2378 else
2379 ext4_warning(inode->i_sb,
2380 "bad directory (dir #%lu) - no data block",
2381 inode->i_ino);
2382
2321 offset += sb->s_blocksize; 2383 offset += sb->s_blocksize;
2322 continue; 2384 continue;
2323 } 2385 }
@@ -2362,7 +2424,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
2362 struct ext4_iloc iloc; 2424 struct ext4_iloc iloc;
2363 int err = 0, rc; 2425 int err = 0, rc;
2364 2426
2365 if (!ext4_handle_valid(handle)) 2427 if (!EXT4_SB(sb)->s_journal)
2366 return 0; 2428 return 0;
2367 2429
2368 mutex_lock(&EXT4_SB(sb)->s_orphan_lock); 2430 mutex_lock(&EXT4_SB(sb)->s_orphan_lock);
@@ -2436,8 +2498,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
2436 struct ext4_iloc iloc; 2498 struct ext4_iloc iloc;
2437 int err = 0; 2499 int err = 0;
2438 2500
2439 /* ext4_handle_valid() assumes a valid handle_t pointer */ 2501 if (!EXT4_SB(inode->i_sb)->s_journal)
2440 if (handle && !ext4_handle_valid(handle))
2441 return 0; 2502 return 0;
2442 2503
2443 mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock); 2504 mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
@@ -2456,7 +2517,7 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
2456 * transaction handle with which to update the orphan list on 2517 * transaction handle with which to update the orphan list on
2457 * disk, but we still need to remove the inode from the linked 2518 * disk, but we still need to remove the inode from the linked
2458 * list in memory. */ 2519 * list in memory. */
2459 if (sbi->s_journal && !handle) 2520 if (!handle)
2460 goto out; 2521 goto out;
2461 2522
2462 err = ext4_reserve_inode_write(handle, inode, &iloc); 2523 err = ext4_reserve_inode_write(handle, inode, &iloc);
@@ -2826,9 +2887,15 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2826 goto end_rename; 2887 goto end_rename;
2827 } 2888 }
2828 retval = -EIO; 2889 retval = -EIO;
2829 dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval); 2890 if (!(dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval))) {
2830 if (!dir_bh) 2891 if (!retval) {
2892 retval = -EIO;
2893 ext4_error(old_inode->i_sb,
2894 "Directory hole detected on inode %lu\n",
2895 old_inode->i_ino);
2896 }
2831 goto end_rename; 2897 goto end_rename;
2898 }
2832 if (!buffer_verified(dir_bh) && 2899 if (!buffer_verified(dir_bh) &&
2833 !ext4_dirent_csum_verify(old_inode, 2900 !ext4_dirent_csum_verify(old_inode,
2834 (struct ext4_dir_entry *)dir_bh->b_data)) 2901 (struct ext4_dir_entry *)dir_bh->b_data))
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index dcdeef169a69..68e896e12a67 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -71,6 +71,9 @@ void ext4_free_io_end(ext4_io_end_t *io)
71 int i; 71 int i;
72 72
73 BUG_ON(!io); 73 BUG_ON(!io);
74 BUG_ON(!list_empty(&io->list));
75 BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN);
76
74 if (io->page) 77 if (io->page)
75 put_page(io->page); 78 put_page(io->page);
76 for (i = 0; i < io->num_io_pages; i++) 79 for (i = 0; i < io->num_io_pages; i++)
@@ -81,13 +84,8 @@ void ext4_free_io_end(ext4_io_end_t *io)
81 kmem_cache_free(io_end_cachep, io); 84 kmem_cache_free(io_end_cachep, io);
82} 85}
83 86
84/* 87/* check a range of space and convert unwritten extents to written. */
85 * check a range of space and convert unwritten extents to written. 88static int ext4_end_io(ext4_io_end_t *io)
86 *
87 * Called with inode->i_mutex; we depend on this when we manipulate
88 * io->flag, since we could otherwise race with ext4_flush_completed_IO()
89 */
90int ext4_end_io_nolock(ext4_io_end_t *io)
91{ 89{
92 struct inode *inode = io->inode; 90 struct inode *inode = io->inode;
93 loff_t offset = io->offset; 91 loff_t offset = io->offset;
@@ -106,63 +104,136 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
106 "(inode %lu, offset %llu, size %zd, error %d)", 104 "(inode %lu, offset %llu, size %zd, error %d)",
107 inode->i_ino, offset, size, ret); 105 inode->i_ino, offset, size, ret);
108 } 106 }
109
110 if (io->iocb) 107 if (io->iocb)
111 aio_complete(io->iocb, io->result, 0); 108 aio_complete(io->iocb, io->result, 0);
112 109
113 if (io->flag & EXT4_IO_END_DIRECT) 110 if (io->flag & EXT4_IO_END_DIRECT)
114 inode_dio_done(inode); 111 inode_dio_done(inode);
115 /* Wake up anyone waiting on unwritten extent conversion */ 112 /* Wake up anyone waiting on unwritten extent conversion */
116 if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten)) 113 if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
117 wake_up_all(ext4_ioend_wq(io->inode)); 114 wake_up_all(ext4_ioend_wq(io->inode));
118 return ret; 115 return ret;
119} 116}
120 117
121/* 118static void dump_completed_IO(struct inode *inode)
122 * work on completed aio dio IO, to convert unwritten extents to extents 119{
123 */ 120#ifdef EXT4FS_DEBUG
124static void ext4_end_io_work(struct work_struct *work) 121 struct list_head *cur, *before, *after;
122 ext4_io_end_t *io, *io0, *io1;
123 unsigned long flags;
124
125 if (list_empty(&EXT4_I(inode)->i_completed_io_list)) {
126 ext4_debug("inode %lu completed_io list is empty\n",
127 inode->i_ino);
128 return;
129 }
130
131 ext4_debug("Dump inode %lu completed_io list\n", inode->i_ino);
132 list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list) {
133 cur = &io->list;
134 before = cur->prev;
135 io0 = container_of(before, ext4_io_end_t, list);
136 after = cur->next;
137 io1 = container_of(after, ext4_io_end_t, list);
138
139 ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
140 io, inode->i_ino, io0, io1);
141 }
142#endif
143}
144
145/* Add the io_end to per-inode completed end_io list. */
146void ext4_add_complete_io(ext4_io_end_t *io_end)
125{ 147{
126 ext4_io_end_t *io = container_of(work, ext4_io_end_t, work); 148 struct ext4_inode_info *ei = EXT4_I(io_end->inode);
127 struct inode *inode = io->inode; 149 struct workqueue_struct *wq;
128 struct ext4_inode_info *ei = EXT4_I(inode); 150 unsigned long flags;
129 unsigned long flags; 151
152 BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
153 wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
130 154
131 spin_lock_irqsave(&ei->i_completed_io_lock, flags); 155 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
132 if (io->flag & EXT4_IO_END_IN_FSYNC) 156 if (list_empty(&ei->i_completed_io_list)) {
133 goto requeue; 157 io_end->flag |= EXT4_IO_END_QUEUED;
134 if (list_empty(&io->list)) { 158 queue_work(wq, &io_end->work);
135 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
136 goto free;
137 } 159 }
160 list_add_tail(&io_end->list, &ei->i_completed_io_list);
161 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
162}
138 163
139 if (!mutex_trylock(&inode->i_mutex)) { 164static int ext4_do_flush_completed_IO(struct inode *inode,
140 bool was_queued; 165 ext4_io_end_t *work_io)
141requeue: 166{
142 was_queued = !!(io->flag & EXT4_IO_END_QUEUED); 167 ext4_io_end_t *io;
143 io->flag |= EXT4_IO_END_QUEUED; 168 struct list_head unwritten, complete, to_free;
144 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 169 unsigned long flags;
145 /* 170 struct ext4_inode_info *ei = EXT4_I(inode);
146 * Requeue the work instead of waiting so that the work 171 int err, ret = 0;
147 * items queued after this can be processed. 172
148 */ 173 INIT_LIST_HEAD(&complete);
149 queue_work(EXT4_SB(inode->i_sb)->dio_unwritten_wq, &io->work); 174 INIT_LIST_HEAD(&to_free);
150 /* 175
151 * To prevent the ext4-dio-unwritten thread from keeping 176 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
152 * requeueing end_io requests and occupying cpu for too long, 177 dump_completed_IO(inode);
153 * yield the cpu if it sees an end_io request that has already 178 list_replace_init(&ei->i_completed_io_list, &unwritten);
154 * been requeued. 179 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
155 */ 180
156 if (was_queued) 181 while (!list_empty(&unwritten)) {
157 yield(); 182 io = list_entry(unwritten.next, ext4_io_end_t, list);
158 return; 183 BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN));
184 list_del_init(&io->list);
185
186 err = ext4_end_io(io);
187 if (unlikely(!ret && err))
188 ret = err;
189
190 list_add_tail(&io->list, &complete);
191 }
192 spin_lock_irqsave(&ei->i_completed_io_lock, flags);
193 while (!list_empty(&complete)) {
194 io = list_entry(complete.next, ext4_io_end_t, list);
195 io->flag &= ~EXT4_IO_END_UNWRITTEN;
196 /* end_io context can not be destroyed now because it still
197 * used by queued worker. Worker thread will destroy it later */
198 if (io->flag & EXT4_IO_END_QUEUED)
199 list_del_init(&io->list);
200 else
201 list_move(&io->list, &to_free);
202 }
203 /* If we are called from worker context, it is time to clear queued
204 * flag, and destroy it's end_io if it was converted already */
205 if (work_io) {
206 work_io->flag &= ~EXT4_IO_END_QUEUED;
207 if (!(work_io->flag & EXT4_IO_END_UNWRITTEN))
208 list_add_tail(&work_io->list, &to_free);
159 } 209 }
160 list_del_init(&io->list);
161 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); 210 spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
162 (void) ext4_end_io_nolock(io); 211
163 mutex_unlock(&inode->i_mutex); 212 while (!list_empty(&to_free)) {
164free: 213 io = list_entry(to_free.next, ext4_io_end_t, list);
165 ext4_free_io_end(io); 214 list_del_init(&io->list);
215 ext4_free_io_end(io);
216 }
217 return ret;
218}
219
220/*
221 * work on completed aio dio IO, to convert unwritten extents to extents
222 */
223static void ext4_end_io_work(struct work_struct *work)
224{
225 ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
226 ext4_do_flush_completed_IO(io->inode, io);
227}
228
229int ext4_flush_unwritten_io(struct inode *inode)
230{
231 int ret;
232 WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex) &&
233 !(inode->i_state & I_FREEING));
234 ret = ext4_do_flush_completed_IO(inode, NULL);
235 ext4_unwritten_wait(inode);
236 return ret;
166} 237}
167 238
168ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) 239ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
@@ -195,9 +266,7 @@ static void buffer_io_error(struct buffer_head *bh)
195static void ext4_end_bio(struct bio *bio, int error) 266static void ext4_end_bio(struct bio *bio, int error)
196{ 267{
197 ext4_io_end_t *io_end = bio->bi_private; 268 ext4_io_end_t *io_end = bio->bi_private;
198 struct workqueue_struct *wq;
199 struct inode *inode; 269 struct inode *inode;
200 unsigned long flags;
201 int i; 270 int i;
202 sector_t bi_sector = bio->bi_sector; 271 sector_t bi_sector = bio->bi_sector;
203 272
@@ -255,14 +324,7 @@ static void ext4_end_bio(struct bio *bio, int error)
255 return; 324 return;
256 } 325 }
257 326
258 /* Add the io_end to per-inode completed io list*/ 327 ext4_add_complete_io(io_end);
259 spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
260 list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
261 spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
262
263 wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
264 /* queue the work to convert unwritten extents to written */
265 queue_work(wq, &io_end->work);
266} 328}
267 329
268void ext4_io_submit(struct ext4_io_submit *io) 330void ext4_io_submit(struct ext4_io_submit *io)
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 41f6ef68e2e1..7a75e1086961 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -45,6 +45,28 @@ void ext4_resize_end(struct super_block *sb)
45 smp_mb__after_clear_bit(); 45 smp_mb__after_clear_bit();
46} 46}
47 47
48static ext4_group_t ext4_meta_bg_first_group(struct super_block *sb,
49 ext4_group_t group) {
50 return (group >> EXT4_DESC_PER_BLOCK_BITS(sb)) <<
51 EXT4_DESC_PER_BLOCK_BITS(sb);
52}
53
54static ext4_fsblk_t ext4_meta_bg_first_block_no(struct super_block *sb,
55 ext4_group_t group) {
56 group = ext4_meta_bg_first_group(sb, group);
57 return ext4_group_first_block_no(sb, group);
58}
59
60static ext4_grpblk_t ext4_group_overhead_blocks(struct super_block *sb,
61 ext4_group_t group) {
62 ext4_grpblk_t overhead;
63 overhead = ext4_bg_num_gdb(sb, group);
64 if (ext4_bg_has_super(sb, group))
65 overhead += 1 +
66 le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks);
67 return overhead;
68}
69
48#define outside(b, first, last) ((b) < (first) || (b) >= (last)) 70#define outside(b, first, last) ((b) < (first) || (b) >= (last))
49#define inside(b, first, last) ((b) >= (first) && (b) < (last)) 71#define inside(b, first, last) ((b) >= (first) && (b) < (last))
50 72
@@ -57,9 +79,7 @@ static int verify_group_input(struct super_block *sb,
57 ext4_fsblk_t end = start + input->blocks_count; 79 ext4_fsblk_t end = start + input->blocks_count;
58 ext4_group_t group = input->group; 80 ext4_group_t group = input->group;
59 ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; 81 ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group;
60 unsigned overhead = ext4_bg_has_super(sb, group) ? 82 unsigned overhead = ext4_group_overhead_blocks(sb, group);
61 (1 + ext4_bg_num_gdb(sb, group) +
62 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
63 ext4_fsblk_t metaend = start + overhead; 83 ext4_fsblk_t metaend = start + overhead;
64 struct buffer_head *bh = NULL; 84 struct buffer_head *bh = NULL;
65 ext4_grpblk_t free_blocks_count, offset; 85 ext4_grpblk_t free_blocks_count, offset;
@@ -200,13 +220,15 @@ static void free_flex_gd(struct ext4_new_flex_group_data *flex_gd)
200 * be a partial of a flex group. 220 * be a partial of a flex group.
201 * 221 *
202 * @sb: super block of fs to which the groups belongs 222 * @sb: super block of fs to which the groups belongs
223 *
224 * Returns 0 on a successful allocation of the metadata blocks in the
225 * block group.
203 */ 226 */
204static void ext4_alloc_group_tables(struct super_block *sb, 227static int ext4_alloc_group_tables(struct super_block *sb,
205 struct ext4_new_flex_group_data *flex_gd, 228 struct ext4_new_flex_group_data *flex_gd,
206 int flexbg_size) 229 int flexbg_size)
207{ 230{
208 struct ext4_new_group_data *group_data = flex_gd->groups; 231 struct ext4_new_group_data *group_data = flex_gd->groups;
209 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
210 ext4_fsblk_t start_blk; 232 ext4_fsblk_t start_blk;
211 ext4_fsblk_t last_blk; 233 ext4_fsblk_t last_blk;
212 ext4_group_t src_group; 234 ext4_group_t src_group;
@@ -226,23 +248,24 @@ static void ext4_alloc_group_tables(struct super_block *sb,
226 (last_group & ~(flexbg_size - 1)))); 248 (last_group & ~(flexbg_size - 1))));
227next_group: 249next_group:
228 group = group_data[0].group; 250 group = group_data[0].group;
251 if (src_group >= group_data[0].group + flex_gd->count)
252 return -ENOSPC;
229 start_blk = ext4_group_first_block_no(sb, src_group); 253 start_blk = ext4_group_first_block_no(sb, src_group);
230 last_blk = start_blk + group_data[src_group - group].blocks_count; 254 last_blk = start_blk + group_data[src_group - group].blocks_count;
231 255
232 overhead = ext4_bg_has_super(sb, src_group) ? 256 overhead = ext4_group_overhead_blocks(sb, src_group);
233 (1 + ext4_bg_num_gdb(sb, src_group) +
234 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
235 257
236 start_blk += overhead; 258 start_blk += overhead;
237 259
238 BUG_ON(src_group >= group_data[0].group + flex_gd->count);
239 /* We collect contiguous blocks as much as possible. */ 260 /* We collect contiguous blocks as much as possible. */
240 src_group++; 261 src_group++;
241 for (; src_group <= last_group; src_group++) 262 for (; src_group <= last_group; src_group++) {
242 if (!ext4_bg_has_super(sb, src_group)) 263 overhead = ext4_group_overhead_blocks(sb, src_group);
264 if (overhead != 0)
243 last_blk += group_data[src_group - group].blocks_count; 265 last_blk += group_data[src_group - group].blocks_count;
244 else 266 else
245 break; 267 break;
268 }
246 269
247 /* Allocate block bitmaps */ 270 /* Allocate block bitmaps */
248 for (; bb_index < flex_gd->count; bb_index++) { 271 for (; bb_index < flex_gd->count; bb_index++) {
@@ -300,6 +323,7 @@ next_group:
300 group_data[i].free_blocks_count); 323 group_data[i].free_blocks_count);
301 } 324 }
302 } 325 }
326 return 0;
303} 327}
304 328
305static struct buffer_head *bclean(handle_t *handle, struct super_block *sb, 329static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
@@ -433,11 +457,13 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
433 ext4_group_t group, count; 457 ext4_group_t group, count;
434 struct buffer_head *bh = NULL; 458 struct buffer_head *bh = NULL;
435 int reserved_gdb, i, j, err = 0, err2; 459 int reserved_gdb, i, j, err = 0, err2;
460 int meta_bg;
436 461
437 BUG_ON(!flex_gd->count || !group_data || 462 BUG_ON(!flex_gd->count || !group_data ||
438 group_data[0].group != sbi->s_groups_count); 463 group_data[0].group != sbi->s_groups_count);
439 464
440 reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks); 465 reserved_gdb = le16_to_cpu(es->s_reserved_gdt_blocks);
466 meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG);
441 467
442 /* This transaction may be extended/restarted along the way */ 468 /* This transaction may be extended/restarted along the way */
443 handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA); 469 handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA);
@@ -447,12 +473,25 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
447 group = group_data[0].group; 473 group = group_data[0].group;
448 for (i = 0; i < flex_gd->count; i++, group++) { 474 for (i = 0; i < flex_gd->count; i++, group++) {
449 unsigned long gdblocks; 475 unsigned long gdblocks;
476 ext4_grpblk_t overhead;
450 477
451 gdblocks = ext4_bg_num_gdb(sb, group); 478 gdblocks = ext4_bg_num_gdb(sb, group);
452 start = ext4_group_first_block_no(sb, group); 479 start = ext4_group_first_block_no(sb, group);
453 480
481 if (meta_bg == 0 && !ext4_bg_has_super(sb, group))
482 goto handle_itb;
483
484 if (meta_bg == 1) {
485 ext4_group_t first_group;
486 first_group = ext4_meta_bg_first_group(sb, group);
487 if (first_group != group + 1 &&
488 first_group != group + EXT4_DESC_PER_BLOCK(sb) - 1)
489 goto handle_itb;
490 }
491
492 block = start + ext4_bg_has_super(sb, group);
454 /* Copy all of the GDT blocks into the backup in this group */ 493 /* Copy all of the GDT blocks into the backup in this group */
455 for (j = 0, block = start + 1; j < gdblocks; j++, block++) { 494 for (j = 0; j < gdblocks; j++, block++) {
456 struct buffer_head *gdb; 495 struct buffer_head *gdb;
457 496
458 ext4_debug("update backup group %#04llx\n", block); 497 ext4_debug("update backup group %#04llx\n", block);
@@ -493,6 +532,7 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
493 goto out; 532 goto out;
494 } 533 }
495 534
535handle_itb:
496 /* Initialize group tables of the grop @group */ 536 /* Initialize group tables of the grop @group */
497 if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED)) 537 if (!(bg_flags[i] & EXT4_BG_INODE_ZEROED))
498 goto handle_bb; 538 goto handle_bb;
@@ -521,11 +561,11 @@ handle_bb:
521 err = PTR_ERR(bh); 561 err = PTR_ERR(bh);
522 goto out; 562 goto out;
523 } 563 }
524 if (ext4_bg_has_super(sb, group)) { 564 overhead = ext4_group_overhead_blocks(sb, group);
565 if (overhead != 0) {
525 ext4_debug("mark backup superblock %#04llx (+0)\n", 566 ext4_debug("mark backup superblock %#04llx (+0)\n",
526 start); 567 start);
527 ext4_set_bits(bh->b_data, 0, gdblocks + reserved_gdb + 568 ext4_set_bits(bh->b_data, 0, overhead);
528 1);
529 } 569 }
530 ext4_mark_bitmap_end(group_data[i].blocks_count, 570 ext4_mark_bitmap_end(group_data[i].blocks_count,
531 sb->s_blocksize * 8, bh->b_data); 571 sb->s_blocksize * 8, bh->b_data);
@@ -822,6 +862,45 @@ exit_bh:
822} 862}
823 863
824/* 864/*
865 * add_new_gdb_meta_bg is the sister of add_new_gdb.
866 */
867static int add_new_gdb_meta_bg(struct super_block *sb,
868 handle_t *handle, ext4_group_t group) {
869 ext4_fsblk_t gdblock;
870 struct buffer_head *gdb_bh;
871 struct buffer_head **o_group_desc, **n_group_desc;
872 unsigned long gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
873 int err;
874
875 gdblock = ext4_meta_bg_first_block_no(sb, group) +
876 ext4_bg_has_super(sb, group);
877 gdb_bh = sb_bread(sb, gdblock);
878 if (!gdb_bh)
879 return -EIO;
880 n_group_desc = ext4_kvmalloc((gdb_num + 1) *
881 sizeof(struct buffer_head *),
882 GFP_NOFS);
883 if (!n_group_desc) {
884 err = -ENOMEM;
885 ext4_warning(sb, "not enough memory for %lu groups",
886 gdb_num + 1);
887 return err;
888 }
889
890 o_group_desc = EXT4_SB(sb)->s_group_desc;
891 memcpy(n_group_desc, o_group_desc,
892 EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
893 n_group_desc[gdb_num] = gdb_bh;
894 EXT4_SB(sb)->s_group_desc = n_group_desc;
895 EXT4_SB(sb)->s_gdb_count++;
896 ext4_kvfree(o_group_desc);
897 err = ext4_journal_get_write_access(handle, gdb_bh);
898 if (unlikely(err))
899 brelse(gdb_bh);
900 return err;
901}
902
903/*
825 * Called when we are adding a new group which has a backup copy of each of 904 * Called when we are adding a new group which has a backup copy of each of
826 * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks. 905 * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks.
827 * We need to add these reserved backup GDT blocks to the resize inode, so 906 * We need to add these reserved backup GDT blocks to the resize inode, so
@@ -949,16 +1028,16 @@ exit_free:
949 * do not copy the full number of backups at this time. The resize 1028 * do not copy the full number of backups at this time. The resize
950 * which changed s_groups_count will backup again. 1029 * which changed s_groups_count will backup again.
951 */ 1030 */
952static void update_backups(struct super_block *sb, 1031static void update_backups(struct super_block *sb, int blk_off, char *data,
953 int blk_off, char *data, int size) 1032 int size, int meta_bg)
954{ 1033{
955 struct ext4_sb_info *sbi = EXT4_SB(sb); 1034 struct ext4_sb_info *sbi = EXT4_SB(sb);
956 const ext4_group_t last = sbi->s_groups_count; 1035 ext4_group_t last;
957 const int bpg = EXT4_BLOCKS_PER_GROUP(sb); 1036 const int bpg = EXT4_BLOCKS_PER_GROUP(sb);
958 unsigned three = 1; 1037 unsigned three = 1;
959 unsigned five = 5; 1038 unsigned five = 5;
960 unsigned seven = 7; 1039 unsigned seven = 7;
961 ext4_group_t group; 1040 ext4_group_t group = 0;
962 int rest = sb->s_blocksize - size; 1041 int rest = sb->s_blocksize - size;
963 handle_t *handle; 1042 handle_t *handle;
964 int err = 0, err2; 1043 int err = 0, err2;
@@ -970,10 +1049,17 @@ static void update_backups(struct super_block *sb,
970 goto exit_err; 1049 goto exit_err;
971 } 1050 }
972 1051
973 ext4_superblock_csum_set(sb, (struct ext4_super_block *)data); 1052 if (meta_bg == 0) {
1053 group = ext4_list_backups(sb, &three, &five, &seven);
1054 last = sbi->s_groups_count;
1055 } else {
1056 group = ext4_meta_bg_first_group(sb, group) + 1;
1057 last = (ext4_group_t)(group + EXT4_DESC_PER_BLOCK(sb) - 2);
1058 }
974 1059
975 while ((group = ext4_list_backups(sb, &three, &five, &seven)) < last) { 1060 while (group < sbi->s_groups_count) {
976 struct buffer_head *bh; 1061 struct buffer_head *bh;
1062 ext4_fsblk_t backup_block;
977 1063
978 /* Out of journal space, and can't get more - abort - so sad */ 1064 /* Out of journal space, and can't get more - abort - so sad */
979 if (ext4_handle_valid(handle) && 1065 if (ext4_handle_valid(handle) &&
@@ -982,13 +1068,20 @@ static void update_backups(struct super_block *sb,
982 (err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA))) 1068 (err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
983 break; 1069 break;
984 1070
985 bh = sb_getblk(sb, group * bpg + blk_off); 1071 if (meta_bg == 0)
1072 backup_block = group * bpg + blk_off;
1073 else
1074 backup_block = (ext4_group_first_block_no(sb, group) +
1075 ext4_bg_has_super(sb, group));
1076
1077 bh = sb_getblk(sb, backup_block);
986 if (!bh) { 1078 if (!bh) {
987 err = -EIO; 1079 err = -EIO;
988 break; 1080 break;
989 } 1081 }
990 ext4_debug("update metadata backup %#04lx\n", 1082 ext4_debug("update metadata backup %llu(+%llu)\n",
991 (unsigned long)bh->b_blocknr); 1083 backup_block, backup_block -
1084 ext4_group_first_block_no(sb, group));
992 if ((err = ext4_journal_get_write_access(handle, bh))) 1085 if ((err = ext4_journal_get_write_access(handle, bh)))
993 break; 1086 break;
994 lock_buffer(bh); 1087 lock_buffer(bh);
@@ -1001,6 +1094,13 @@ static void update_backups(struct super_block *sb,
1001 if (unlikely(err)) 1094 if (unlikely(err))
1002 ext4_std_error(sb, err); 1095 ext4_std_error(sb, err);
1003 brelse(bh); 1096 brelse(bh);
1097
1098 if (meta_bg == 0)
1099 group = ext4_list_backups(sb, &three, &five, &seven);
1100 else if (group == last)
1101 break;
1102 else
1103 group = last;
1004 } 1104 }
1005 if ((err2 = ext4_journal_stop(handle)) && !err) 1105 if ((err2 = ext4_journal_stop(handle)) && !err)
1006 err = err2; 1106 err = err2;
@@ -1043,7 +1143,9 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
1043 struct ext4_super_block *es = sbi->s_es; 1143 struct ext4_super_block *es = sbi->s_es;
1044 struct buffer_head *gdb_bh; 1144 struct buffer_head *gdb_bh;
1045 int i, gdb_off, gdb_num, err = 0; 1145 int i, gdb_off, gdb_num, err = 0;
1146 int meta_bg;
1046 1147
1148 meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG);
1047 for (i = 0; i < count; i++, group++) { 1149 for (i = 0; i < count; i++, group++) {
1048 int reserved_gdb = ext4_bg_has_super(sb, group) ? 1150 int reserved_gdb = ext4_bg_has_super(sb, group) ?
1049 le16_to_cpu(es->s_reserved_gdt_blocks) : 0; 1151 le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
@@ -1063,8 +1165,11 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
1063 1165
1064 if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group)) 1166 if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group))
1065 err = reserve_backup_gdb(handle, resize_inode, group); 1167 err = reserve_backup_gdb(handle, resize_inode, group);
1066 } else 1168 } else if (meta_bg != 0) {
1169 err = add_new_gdb_meta_bg(sb, handle, group);
1170 } else {
1067 err = add_new_gdb(handle, resize_inode, group); 1171 err = add_new_gdb(handle, resize_inode, group);
1172 }
1068 if (err) 1173 if (err)
1069 break; 1174 break;
1070 } 1175 }
@@ -1076,17 +1181,12 @@ static struct buffer_head *ext4_get_bitmap(struct super_block *sb, __u64 block)
1076 struct buffer_head *bh = sb_getblk(sb, block); 1181 struct buffer_head *bh = sb_getblk(sb, block);
1077 if (!bh) 1182 if (!bh)
1078 return NULL; 1183 return NULL;
1079 1184 if (!bh_uptodate_or_lock(bh)) {
1080 if (bitmap_uptodate(bh)) 1185 if (bh_submit_read(bh) < 0) {
1081 return bh; 1186 brelse(bh);
1082 1187 return NULL;
1083 lock_buffer(bh); 1188 }
1084 if (bh_submit_read(bh) < 0) {
1085 unlock_buffer(bh);
1086 brelse(bh);
1087 return NULL;
1088 } 1189 }
1089 unlock_buffer(bh);
1090 1190
1091 return bh; 1191 return bh;
1092} 1192}
@@ -1161,6 +1261,9 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
1161 ext4_free_group_clusters_set(sb, gdp, 1261 ext4_free_group_clusters_set(sb, gdp,
1162 EXT4_B2C(sbi, group_data->free_blocks_count)); 1262 EXT4_B2C(sbi, group_data->free_blocks_count));
1163 ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); 1263 ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
1264 if (ext4_has_group_desc_csum(sb))
1265 ext4_itable_unused_set(sb, gdp,
1266 EXT4_INODES_PER_GROUP(sb));
1164 gdp->bg_flags = cpu_to_le16(*bg_flags); 1267 gdp->bg_flags = cpu_to_le16(*bg_flags);
1165 ext4_group_desc_csum_set(sb, group, gdp); 1268 ext4_group_desc_csum_set(sb, group, gdp);
1166 1269
@@ -1216,7 +1319,7 @@ static void ext4_update_super(struct super_block *sb,
1216 } 1319 }
1217 1320
1218 reserved_blocks = ext4_r_blocks_count(es) * 100; 1321 reserved_blocks = ext4_r_blocks_count(es) * 100;
1219 do_div(reserved_blocks, ext4_blocks_count(es)); 1322 reserved_blocks = div64_u64(reserved_blocks, ext4_blocks_count(es));
1220 reserved_blocks *= blocks_count; 1323 reserved_blocks *= blocks_count;
1221 do_div(reserved_blocks, 100); 1324 do_div(reserved_blocks, 100);
1222 1325
@@ -1227,6 +1330,7 @@ static void ext4_update_super(struct super_block *sb,
1227 le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) * 1330 le32_add_cpu(&es->s_free_inodes_count, EXT4_INODES_PER_GROUP(sb) *
1228 flex_gd->count); 1331 flex_gd->count);
1229 1332
1333 ext4_debug("free blocks count %llu", ext4_free_blocks_count(es));
1230 /* 1334 /*
1231 * We need to protect s_groups_count against other CPUs seeing 1335 * We need to protect s_groups_count against other CPUs seeing
1232 * inconsistent state in the superblock. 1336 * inconsistent state in the superblock.
@@ -1261,6 +1365,8 @@ static void ext4_update_super(struct super_block *sb,
1261 percpu_counter_add(&sbi->s_freeinodes_counter, 1365 percpu_counter_add(&sbi->s_freeinodes_counter,
1262 EXT4_INODES_PER_GROUP(sb) * flex_gd->count); 1366 EXT4_INODES_PER_GROUP(sb) * flex_gd->count);
1263 1367
1368 ext4_debug("free blocks count %llu",
1369 percpu_counter_read(&sbi->s_freeclusters_counter));
1264 if (EXT4_HAS_INCOMPAT_FEATURE(sb, 1370 if (EXT4_HAS_INCOMPAT_FEATURE(sb,
1265 EXT4_FEATURE_INCOMPAT_FLEX_BG) && 1371 EXT4_FEATURE_INCOMPAT_FLEX_BG) &&
1266 sbi->s_log_groups_per_flex) { 1372 sbi->s_log_groups_per_flex) {
@@ -1349,16 +1455,24 @@ exit_journal:
1349 err = err2; 1455 err = err2;
1350 1456
1351 if (!err) { 1457 if (!err) {
1352 int i; 1458 int gdb_num = group / EXT4_DESC_PER_BLOCK(sb);
1459 int gdb_num_end = ((group + flex_gd->count - 1) /
1460 EXT4_DESC_PER_BLOCK(sb));
1461 int meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb,
1462 EXT4_FEATURE_INCOMPAT_META_BG);
1463 sector_t old_gdb = 0;
1464
1353 update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es, 1465 update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
1354 sizeof(struct ext4_super_block)); 1466 sizeof(struct ext4_super_block), 0);
1355 for (i = 0; i < flex_gd->count; i++, group++) { 1467 for (; gdb_num <= gdb_num_end; gdb_num++) {
1356 struct buffer_head *gdb_bh; 1468 struct buffer_head *gdb_bh;
1357 int gdb_num; 1469
1358 gdb_num = group / EXT4_BLOCKS_PER_GROUP(sb);
1359 gdb_bh = sbi->s_group_desc[gdb_num]; 1470 gdb_bh = sbi->s_group_desc[gdb_num];
1471 if (old_gdb == gdb_bh->b_blocknr)
1472 continue;
1360 update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data, 1473 update_backups(sb, gdb_bh->b_blocknr, gdb_bh->b_data,
1361 gdb_bh->b_size); 1474 gdb_bh->b_size, meta_bg);
1475 old_gdb = gdb_bh->b_blocknr;
1362 } 1476 }
1363 } 1477 }
1364exit: 1478exit:
@@ -1402,9 +1516,7 @@ static int ext4_setup_next_flex_gd(struct super_block *sb,
1402 1516
1403 group_data[i].group = group + i; 1517 group_data[i].group = group + i;
1404 group_data[i].blocks_count = blocks_per_group; 1518 group_data[i].blocks_count = blocks_per_group;
1405 overhead = ext4_bg_has_super(sb, group + i) ? 1519 overhead = ext4_group_overhead_blocks(sb, group + i);
1406 (1 + ext4_bg_num_gdb(sb, group + i) +
1407 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
1408 group_data[i].free_blocks_count = blocks_per_group - overhead; 1520 group_data[i].free_blocks_count = blocks_per_group - overhead;
1409 if (ext4_has_group_desc_csum(sb)) 1521 if (ext4_has_group_desc_csum(sb))
1410 flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT | 1522 flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT |
@@ -1492,6 +1604,14 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
1492 if (err) 1604 if (err)
1493 goto out; 1605 goto out;
1494 1606
1607 err = ext4_alloc_flex_bg_array(sb, input->group + 1);
1608 if (err)
1609 return err;
1610
1611 err = ext4_mb_alloc_groupinfo(sb, input->group + 1);
1612 if (err)
1613 goto out;
1614
1495 flex_gd.count = 1; 1615 flex_gd.count = 1;
1496 flex_gd.groups = input; 1616 flex_gd.groups = input;
1497 flex_gd.bg_flags = &bg_flags; 1617 flex_gd.bg_flags = &bg_flags;
@@ -1544,11 +1664,13 @@ errout:
1544 err = err2; 1664 err = err2;
1545 1665
1546 if (!err) { 1666 if (!err) {
1667 ext4_fsblk_t first_block;
1668 first_block = ext4_group_first_block_no(sb, 0);
1547 if (test_opt(sb, DEBUG)) 1669 if (test_opt(sb, DEBUG))
1548 printk(KERN_DEBUG "EXT4-fs: extended group to %llu " 1670 printk(KERN_DEBUG "EXT4-fs: extended group to %llu "
1549 "blocks\n", ext4_blocks_count(es)); 1671 "blocks\n", ext4_blocks_count(es));
1550 update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es, 1672 update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr - first_block,
1551 sizeof(struct ext4_super_block)); 1673 (char *)es, sizeof(struct ext4_super_block), 0);
1552 } 1674 }
1553 return err; 1675 return err;
1554} 1676}
@@ -1631,6 +1753,94 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
1631 return err; 1753 return err;
1632} /* ext4_group_extend */ 1754} /* ext4_group_extend */
1633 1755
1756
1757static int num_desc_blocks(struct super_block *sb, ext4_group_t groups)
1758{
1759 return (groups + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb);
1760}
1761
1762/*
1763 * Release the resize inode and drop the resize_inode feature if there
1764 * are no more reserved gdt blocks, and then convert the file system
1765 * to enable meta_bg
1766 */
1767static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode)
1768{
1769 handle_t *handle;
1770 struct ext4_sb_info *sbi = EXT4_SB(sb);
1771 struct ext4_super_block *es = sbi->s_es;
1772 struct ext4_inode_info *ei = EXT4_I(inode);
1773 ext4_fsblk_t nr;
1774 int i, ret, err = 0;
1775 int credits = 1;
1776
1777 ext4_msg(sb, KERN_INFO, "Converting file system to meta_bg");
1778 if (inode) {
1779 if (es->s_reserved_gdt_blocks) {
1780 ext4_error(sb, "Unexpected non-zero "
1781 "s_reserved_gdt_blocks");
1782 return -EPERM;
1783 }
1784
1785 /* Do a quick sanity check of the resize inode */
1786 if (inode->i_blocks != 1 << (inode->i_blkbits - 9))
1787 goto invalid_resize_inode;
1788 for (i = 0; i < EXT4_N_BLOCKS; i++) {
1789 if (i == EXT4_DIND_BLOCK) {
1790 if (ei->i_data[i])
1791 continue;
1792 else
1793 goto invalid_resize_inode;
1794 }
1795 if (ei->i_data[i])
1796 goto invalid_resize_inode;
1797 }
1798 credits += 3; /* block bitmap, bg descriptor, resize inode */
1799 }
1800
1801 handle = ext4_journal_start_sb(sb, credits);
1802 if (IS_ERR(handle))
1803 return PTR_ERR(handle);
1804
1805 err = ext4_journal_get_write_access(handle, sbi->s_sbh);
1806 if (err)
1807 goto errout;
1808
1809 EXT4_CLEAR_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE);
1810 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG);
1811 sbi->s_es->s_first_meta_bg =
1812 cpu_to_le32(num_desc_blocks(sb, sbi->s_groups_count));
1813
1814 err = ext4_handle_dirty_super(handle, sb);
1815 if (err) {
1816 ext4_std_error(sb, err);
1817 goto errout;
1818 }
1819
1820 if (inode) {
1821 nr = le32_to_cpu(ei->i_data[EXT4_DIND_BLOCK]);
1822 ext4_free_blocks(handle, inode, NULL, nr, 1,
1823 EXT4_FREE_BLOCKS_METADATA |
1824 EXT4_FREE_BLOCKS_FORGET);
1825 ei->i_data[EXT4_DIND_BLOCK] = 0;
1826 inode->i_blocks = 0;
1827
1828 err = ext4_mark_inode_dirty(handle, inode);
1829 if (err)
1830 ext4_std_error(sb, err);
1831 }
1832
1833errout:
1834 ret = ext4_journal_stop(handle);
1835 if (!err)
1836 err = ret;
1837 return ret;
1838
1839invalid_resize_inode:
1840 ext4_error(sb, "corrupted/inconsistent resize inode");
1841 return -EINVAL;
1842}
1843
1634/* 1844/*
1635 * ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count 1845 * ext4_resize_fs() resizes a fs to new size specified by @n_blocks_count
1636 * 1846 *
@@ -1643,21 +1853,31 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
1643 struct ext4_sb_info *sbi = EXT4_SB(sb); 1853 struct ext4_sb_info *sbi = EXT4_SB(sb);
1644 struct ext4_super_block *es = sbi->s_es; 1854 struct ext4_super_block *es = sbi->s_es;
1645 struct buffer_head *bh; 1855 struct buffer_head *bh;
1646 struct inode *resize_inode; 1856 struct inode *resize_inode = NULL;
1647 ext4_fsblk_t o_blocks_count; 1857 ext4_grpblk_t add, offset;
1648 ext4_group_t o_group;
1649 ext4_group_t n_group;
1650 ext4_grpblk_t offset, add;
1651 unsigned long n_desc_blocks; 1858 unsigned long n_desc_blocks;
1652 unsigned long o_desc_blocks; 1859 unsigned long o_desc_blocks;
1653 unsigned long desc_blocks; 1860 ext4_group_t o_group;
1654 int err = 0, flexbg_size = 1; 1861 ext4_group_t n_group;
1862 ext4_fsblk_t o_blocks_count;
1863 ext4_fsblk_t n_blocks_count_retry = 0;
1864 unsigned long last_update_time = 0;
1865 int err = 0, flexbg_size = 1 << sbi->s_log_groups_per_flex;
1866 int meta_bg;
1655 1867
1868 /* See if the device is actually as big as what was requested */
1869 bh = sb_bread(sb, n_blocks_count - 1);
1870 if (!bh) {
1871 ext4_warning(sb, "can't read last block, resize aborted");
1872 return -ENOSPC;
1873 }
1874 brelse(bh);
1875
1876retry:
1656 o_blocks_count = ext4_blocks_count(es); 1877 o_blocks_count = ext4_blocks_count(es);
1657 1878
1658 if (test_opt(sb, DEBUG)) 1879 ext4_msg(sb, KERN_INFO, "resizing filesystem from %llu "
1659 ext4_msg(sb, KERN_DEBUG, "resizing filesystem from %llu " 1880 "to %llu blocks", o_blocks_count, n_blocks_count);
1660 "to %llu blocks", o_blocks_count, n_blocks_count);
1661 1881
1662 if (n_blocks_count < o_blocks_count) { 1882 if (n_blocks_count < o_blocks_count) {
1663 /* On-line shrinking not supported */ 1883 /* On-line shrinking not supported */
@@ -1672,32 +1892,49 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
1672 ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset); 1892 ext4_get_group_no_and_offset(sb, n_blocks_count - 1, &n_group, &offset);
1673 ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset); 1893 ext4_get_group_no_and_offset(sb, o_blocks_count - 1, &o_group, &offset);
1674 1894
1675 n_desc_blocks = (n_group + EXT4_DESC_PER_BLOCK(sb)) / 1895 n_desc_blocks = num_desc_blocks(sb, n_group + 1);
1676 EXT4_DESC_PER_BLOCK(sb); 1896 o_desc_blocks = num_desc_blocks(sb, sbi->s_groups_count);
1677 o_desc_blocks = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
1678 EXT4_DESC_PER_BLOCK(sb);
1679 desc_blocks = n_desc_blocks - o_desc_blocks;
1680 1897
1681 if (desc_blocks && 1898 meta_bg = EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG);
1682 (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE) ||
1683 le16_to_cpu(es->s_reserved_gdt_blocks) < desc_blocks)) {
1684 ext4_warning(sb, "No reserved GDT blocks, can't resize");
1685 return -EPERM;
1686 }
1687 1899
1688 resize_inode = ext4_iget(sb, EXT4_RESIZE_INO); 1900 if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_RESIZE_INODE)) {
1689 if (IS_ERR(resize_inode)) { 1901 if (meta_bg) {
1690 ext4_warning(sb, "Error opening resize inode"); 1902 ext4_error(sb, "resize_inode and meta_bg enabled "
1691 return PTR_ERR(resize_inode); 1903 "simultaneously");
1904 return -EINVAL;
1905 }
1906 if (n_desc_blocks > o_desc_blocks +
1907 le16_to_cpu(es->s_reserved_gdt_blocks)) {
1908 n_blocks_count_retry = n_blocks_count;
1909 n_desc_blocks = o_desc_blocks +
1910 le16_to_cpu(es->s_reserved_gdt_blocks);
1911 n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb);
1912 n_blocks_count = n_group * EXT4_BLOCKS_PER_GROUP(sb);
1913 n_group--; /* set to last group number */
1914 }
1915
1916 if (!resize_inode)
1917 resize_inode = ext4_iget(sb, EXT4_RESIZE_INO);
1918 if (IS_ERR(resize_inode)) {
1919 ext4_warning(sb, "Error opening resize inode");
1920 return PTR_ERR(resize_inode);
1921 }
1692 } 1922 }
1693 1923
1694 /* See if the device is actually as big as what was requested */ 1924 if ((!resize_inode && !meta_bg) || n_blocks_count == o_blocks_count) {
1695 bh = sb_bread(sb, n_blocks_count - 1); 1925 err = ext4_convert_meta_bg(sb, resize_inode);
1696 if (!bh) { 1926 if (err)
1697 ext4_warning(sb, "can't read last block, resize aborted"); 1927 goto out;
1698 return -ENOSPC; 1928 if (resize_inode) {
1929 iput(resize_inode);
1930 resize_inode = NULL;
1931 }
1932 if (n_blocks_count_retry) {
1933 n_blocks_count = n_blocks_count_retry;
1934 n_blocks_count_retry = 0;
1935 goto retry;
1936 }
1699 } 1937 }
1700 brelse(bh);
1701 1938
1702 /* extend the last group */ 1939 /* extend the last group */
1703 if (n_group == o_group) 1940 if (n_group == o_group)
@@ -1710,12 +1947,15 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
1710 goto out; 1947 goto out;
1711 } 1948 }
1712 1949
1713 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG) && 1950 if (ext4_blocks_count(es) == n_blocks_count)
1714 es->s_log_groups_per_flex) 1951 goto out;
1715 flexbg_size = 1 << es->s_log_groups_per_flex;
1716 1952
1717 o_blocks_count = ext4_blocks_count(es); 1953 err = ext4_alloc_flex_bg_array(sb, n_group + 1);
1718 if (o_blocks_count == n_blocks_count) 1954 if (err)
1955 return err;
1956
1957 err = ext4_mb_alloc_groupinfo(sb, n_group + 1);
1958 if (err)
1719 goto out; 1959 goto out;
1720 1960
1721 flex_gd = alloc_flex_gd(flexbg_size); 1961 flex_gd = alloc_flex_gd(flexbg_size);
@@ -1729,19 +1969,33 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
1729 */ 1969 */
1730 while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count, 1970 while (ext4_setup_next_flex_gd(sb, flex_gd, n_blocks_count,
1731 flexbg_size)) { 1971 flexbg_size)) {
1732 ext4_alloc_group_tables(sb, flex_gd, flexbg_size); 1972 if (jiffies - last_update_time > HZ * 10) {
1973 if (last_update_time)
1974 ext4_msg(sb, KERN_INFO,
1975 "resized to %llu blocks",
1976 ext4_blocks_count(es));
1977 last_update_time = jiffies;
1978 }
1979 if (ext4_alloc_group_tables(sb, flex_gd, flexbg_size) != 0)
1980 break;
1733 err = ext4_flex_group_add(sb, resize_inode, flex_gd); 1981 err = ext4_flex_group_add(sb, resize_inode, flex_gd);
1734 if (unlikely(err)) 1982 if (unlikely(err))
1735 break; 1983 break;
1736 } 1984 }
1737 1985
1986 if (!err && n_blocks_count_retry) {
1987 n_blocks_count = n_blocks_count_retry;
1988 n_blocks_count_retry = 0;
1989 free_flex_gd(flex_gd);
1990 flex_gd = NULL;
1991 goto retry;
1992 }
1993
1738out: 1994out:
1739 if (flex_gd) 1995 if (flex_gd)
1740 free_flex_gd(flex_gd); 1996 free_flex_gd(flex_gd);
1741 1997 if (resize_inode != NULL)
1742 iput(resize_inode); 1998 iput(resize_inode);
1743 if (test_opt(sb, DEBUG)) 1999 ext4_msg(sb, KERN_INFO, "resized filesystem to %llu", n_blocks_count);
1744 ext4_msg(sb, KERN_DEBUG, "resized filesystem from %llu "
1745 "upto %llu blocks", o_blocks_count, n_blocks_count);
1746 return err; 2000 return err;
1747} 2001}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 69c55d4e4626..7265a0367476 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -420,7 +420,7 @@ static void __save_error_info(struct super_block *sb, const char *func,
420 */ 420 */
421 if (!es->s_error_count) 421 if (!es->s_error_count)
422 mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ); 422 mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
423 es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1); 423 le32_add_cpu(&es->s_error_count, 1);
424} 424}
425 425
426static void save_error_info(struct super_block *sb, const char *func, 426static void save_error_info(struct super_block *sb, const char *func,
@@ -850,7 +850,6 @@ static void ext4_put_super(struct super_block *sb)
850 flush_workqueue(sbi->dio_unwritten_wq); 850 flush_workqueue(sbi->dio_unwritten_wq);
851 destroy_workqueue(sbi->dio_unwritten_wq); 851 destroy_workqueue(sbi->dio_unwritten_wq);
852 852
853 lock_super(sb);
854 if (sbi->s_journal) { 853 if (sbi->s_journal) {
855 err = jbd2_journal_destroy(sbi->s_journal); 854 err = jbd2_journal_destroy(sbi->s_journal);
856 sbi->s_journal = NULL; 855 sbi->s_journal = NULL;
@@ -917,7 +916,6 @@ static void ext4_put_super(struct super_block *sb)
917 * Now that we are completely done shutting down the 916 * Now that we are completely done shutting down the
918 * superblock, we need to actually destroy the kobject. 917 * superblock, we need to actually destroy the kobject.
919 */ 918 */
920 unlock_super(sb);
921 kobject_put(&sbi->s_kobj); 919 kobject_put(&sbi->s_kobj);
922 wait_for_completion(&sbi->s_kobj_unregister); 920 wait_for_completion(&sbi->s_kobj_unregister);
923 if (sbi->s_chksum_driver) 921 if (sbi->s_chksum_driver)
@@ -956,11 +954,10 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
956 ei->jinode = NULL; 954 ei->jinode = NULL;
957 INIT_LIST_HEAD(&ei->i_completed_io_list); 955 INIT_LIST_HEAD(&ei->i_completed_io_list);
958 spin_lock_init(&ei->i_completed_io_lock); 956 spin_lock_init(&ei->i_completed_io_lock);
959 ei->cur_aio_dio = NULL;
960 ei->i_sync_tid = 0; 957 ei->i_sync_tid = 0;
961 ei->i_datasync_tid = 0; 958 ei->i_datasync_tid = 0;
962 atomic_set(&ei->i_ioend_count, 0); 959 atomic_set(&ei->i_ioend_count, 0);
963 atomic_set(&ei->i_aiodio_unwritten, 0); 960 atomic_set(&ei->i_unwritten, 0);
964 961
965 return &ei->vfs_inode; 962 return &ei->vfs_inode;
966} 963}
@@ -1224,6 +1221,7 @@ enum {
1224 Opt_inode_readahead_blks, Opt_journal_ioprio, 1221 Opt_inode_readahead_blks, Opt_journal_ioprio,
1225 Opt_dioread_nolock, Opt_dioread_lock, 1222 Opt_dioread_nolock, Opt_dioread_lock,
1226 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable, 1223 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
1224 Opt_max_dir_size_kb,
1227}; 1225};
1228 1226
1229static const match_table_t tokens = { 1227static const match_table_t tokens = {
@@ -1297,6 +1295,7 @@ static const match_table_t tokens = {
1297 {Opt_init_itable, "init_itable=%u"}, 1295 {Opt_init_itable, "init_itable=%u"},
1298 {Opt_init_itable, "init_itable"}, 1296 {Opt_init_itable, "init_itable"},
1299 {Opt_noinit_itable, "noinit_itable"}, 1297 {Opt_noinit_itable, "noinit_itable"},
1298 {Opt_max_dir_size_kb, "max_dir_size_kb=%u"},
1300 {Opt_removed, "check=none"}, /* mount option from ext2/3 */ 1299 {Opt_removed, "check=none"}, /* mount option from ext2/3 */
1301 {Opt_removed, "nocheck"}, /* mount option from ext2/3 */ 1300 {Opt_removed, "nocheck"}, /* mount option from ext2/3 */
1302 {Opt_removed, "reservation"}, /* mount option from ext2/3 */ 1301 {Opt_removed, "reservation"}, /* mount option from ext2/3 */
@@ -1477,6 +1476,7 @@ static const struct mount_opts {
1477 {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT}, 1476 {Opt_jqfmt_vfsold, QFMT_VFS_OLD, MOPT_QFMT},
1478 {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT}, 1477 {Opt_jqfmt_vfsv0, QFMT_VFS_V0, MOPT_QFMT},
1479 {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT}, 1478 {Opt_jqfmt_vfsv1, QFMT_VFS_V1, MOPT_QFMT},
1479 {Opt_max_dir_size_kb, 0, MOPT_GTE0},
1480 {Opt_err, 0, 0} 1480 {Opt_err, 0, 0}
1481}; 1481};
1482 1482
@@ -1592,6 +1592,8 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
1592 if (!args->from) 1592 if (!args->from)
1593 arg = EXT4_DEF_LI_WAIT_MULT; 1593 arg = EXT4_DEF_LI_WAIT_MULT;
1594 sbi->s_li_wait_mult = arg; 1594 sbi->s_li_wait_mult = arg;
1595 } else if (token == Opt_max_dir_size_kb) {
1596 sbi->s_max_dir_size_kb = arg;
1595 } else if (token == Opt_stripe) { 1597 } else if (token == Opt_stripe) {
1596 sbi->s_stripe = arg; 1598 sbi->s_stripe = arg;
1597 } else if (m->flags & MOPT_DATAJ) { 1599 } else if (m->flags & MOPT_DATAJ) {
@@ -1664,7 +1666,7 @@ static int parse_options(char *options, struct super_block *sb,
1664 * Initialize args struct so we know whether arg was 1666 * Initialize args struct so we know whether arg was
1665 * found; some options take optional arguments. 1667 * found; some options take optional arguments.
1666 */ 1668 */
1667 args[0].to = args[0].from = 0; 1669 args[0].to = args[0].from = NULL;
1668 token = match_token(p, tokens, args); 1670 token = match_token(p, tokens, args);
1669 if (handle_mount_opt(sb, p, token, args, journal_devnum, 1671 if (handle_mount_opt(sb, p, token, args, journal_devnum,
1670 journal_ioprio, is_remount) < 0) 1672 journal_ioprio, is_remount) < 0)
@@ -1740,7 +1742,7 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
1740 1742
1741static const char *token2str(int token) 1743static const char *token2str(int token)
1742{ 1744{
1743 static const struct match_token *t; 1745 const struct match_token *t;
1744 1746
1745 for (t = tokens; t->token != Opt_err; t++) 1747 for (t = tokens; t->token != Opt_err; t++)
1746 if (t->token == token && !strchr(t->pattern, '=')) 1748 if (t->token == token && !strchr(t->pattern, '='))
@@ -1823,6 +1825,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
1823 if (nodefs || (test_opt(sb, INIT_INODE_TABLE) && 1825 if (nodefs || (test_opt(sb, INIT_INODE_TABLE) &&
1824 (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT))) 1826 (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
1825 SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult); 1827 SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
1828 if (nodefs || sbi->s_max_dir_size_kb)
1829 SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
1826 1830
1827 ext4_show_quota_options(seq, sb); 1831 ext4_show_quota_options(seq, sb);
1828 return 0; 1832 return 0;
@@ -1914,15 +1918,45 @@ done:
1914 return res; 1918 return res;
1915} 1919}
1916 1920
1921int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
1922{
1923 struct ext4_sb_info *sbi = EXT4_SB(sb);
1924 struct flex_groups *new_groups;
1925 int size;
1926
1927 if (!sbi->s_log_groups_per_flex)
1928 return 0;
1929
1930 size = ext4_flex_group(sbi, ngroup - 1) + 1;
1931 if (size <= sbi->s_flex_groups_allocated)
1932 return 0;
1933
1934 size = roundup_pow_of_two(size * sizeof(struct flex_groups));
1935 new_groups = ext4_kvzalloc(size, GFP_KERNEL);
1936 if (!new_groups) {
1937 ext4_msg(sb, KERN_ERR, "not enough memory for %d flex groups",
1938 size / (int) sizeof(struct flex_groups));
1939 return -ENOMEM;
1940 }
1941
1942 if (sbi->s_flex_groups) {
1943 memcpy(new_groups, sbi->s_flex_groups,
1944 (sbi->s_flex_groups_allocated *
1945 sizeof(struct flex_groups)));
1946 ext4_kvfree(sbi->s_flex_groups);
1947 }
1948 sbi->s_flex_groups = new_groups;
1949 sbi->s_flex_groups_allocated = size / sizeof(struct flex_groups);
1950 return 0;
1951}
1952
1917static int ext4_fill_flex_info(struct super_block *sb) 1953static int ext4_fill_flex_info(struct super_block *sb)
1918{ 1954{
1919 struct ext4_sb_info *sbi = EXT4_SB(sb); 1955 struct ext4_sb_info *sbi = EXT4_SB(sb);
1920 struct ext4_group_desc *gdp = NULL; 1956 struct ext4_group_desc *gdp = NULL;
1921 ext4_group_t flex_group_count;
1922 ext4_group_t flex_group; 1957 ext4_group_t flex_group;
1923 unsigned int groups_per_flex = 0; 1958 unsigned int groups_per_flex = 0;
1924 size_t size; 1959 int i, err;
1925 int i;
1926 1960
1927 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; 1961 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1928 if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) { 1962 if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
@@ -1931,17 +1965,9 @@ static int ext4_fill_flex_info(struct super_block *sb)
1931 } 1965 }
1932 groups_per_flex = 1 << sbi->s_log_groups_per_flex; 1966 groups_per_flex = 1 << sbi->s_log_groups_per_flex;
1933 1967
1934 /* We allocate both existing and potentially added groups */ 1968 err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
1935 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + 1969 if (err)
1936 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
1937 EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
1938 size = flex_group_count * sizeof(struct flex_groups);
1939 sbi->s_flex_groups = ext4_kvzalloc(size, GFP_KERNEL);
1940 if (sbi->s_flex_groups == NULL) {
1941 ext4_msg(sb, KERN_ERR, "not enough memory for %u flex groups",
1942 flex_group_count);
1943 goto failed; 1970 goto failed;
1944 }
1945 1971
1946 for (i = 0; i < sbi->s_groups_count; i++) { 1972 for (i = 0; i < sbi->s_groups_count; i++) {
1947 gdp = ext4_get_group_desc(sb, i, NULL); 1973 gdp = ext4_get_group_desc(sb, i, NULL);
@@ -2144,10 +2170,12 @@ static void ext4_orphan_cleanup(struct super_block *sb,
2144 } 2170 }
2145 2171
2146 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { 2172 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
2147 if (es->s_last_orphan) 2173 /* don't clear list on RO mount w/ errors */
2174 if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
2148 jbd_debug(1, "Errors on filesystem, " 2175 jbd_debug(1, "Errors on filesystem, "
2149 "clearing orphan list.\n"); 2176 "clearing orphan list.\n");
2150 es->s_last_orphan = 0; 2177 es->s_last_orphan = 0;
2178 }
2151 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n"); 2179 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
2152 return; 2180 return;
2153 } 2181 }
@@ -2528,6 +2556,7 @@ EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs);
2528EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); 2556EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request);
2529EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); 2557EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc);
2530EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); 2558EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump);
2559EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb);
2531EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); 2560EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error);
2532 2561
2533static struct attribute *ext4_attrs[] = { 2562static struct attribute *ext4_attrs[] = {
@@ -2543,6 +2572,7 @@ static struct attribute *ext4_attrs[] = {
2543 ATTR_LIST(mb_stream_req), 2572 ATTR_LIST(mb_stream_req),
2544 ATTR_LIST(mb_group_prealloc), 2573 ATTR_LIST(mb_group_prealloc),
2545 ATTR_LIST(max_writeback_mb_bump), 2574 ATTR_LIST(max_writeback_mb_bump),
2575 ATTR_LIST(extent_max_zeroout_kb),
2546 ATTR_LIST(trigger_fs_error), 2576 ATTR_LIST(trigger_fs_error),
2547 NULL, 2577 NULL,
2548}; 2578};
@@ -2550,10 +2580,12 @@ static struct attribute *ext4_attrs[] = {
2550/* Features this copy of ext4 supports */ 2580/* Features this copy of ext4 supports */
2551EXT4_INFO_ATTR(lazy_itable_init); 2581EXT4_INFO_ATTR(lazy_itable_init);
2552EXT4_INFO_ATTR(batched_discard); 2582EXT4_INFO_ATTR(batched_discard);
2583EXT4_INFO_ATTR(meta_bg_resize);
2553 2584
2554static struct attribute *ext4_feat_attrs[] = { 2585static struct attribute *ext4_feat_attrs[] = {
2555 ATTR_LIST(lazy_itable_init), 2586 ATTR_LIST(lazy_itable_init),
2556 ATTR_LIST(batched_discard), 2587 ATTR_LIST(batched_discard),
2588 ATTR_LIST(meta_bg_resize),
2557 NULL, 2589 NULL,
2558}; 2590};
2559 2591
@@ -3374,7 +3406,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3374 * enable delayed allocation by default 3406 * enable delayed allocation by default
3375 * Use -o nodelalloc to turn it off 3407 * Use -o nodelalloc to turn it off
3376 */ 3408 */
3377 if (!IS_EXT3_SB(sb) && 3409 if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
3378 ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) 3410 ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
3379 set_opt(sb, DELALLOC); 3411 set_opt(sb, DELALLOC);
3380 3412
@@ -3743,6 +3775,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3743 3775
3744 sbi->s_stripe = ext4_get_stripe_size(sbi); 3776 sbi->s_stripe = ext4_get_stripe_size(sbi);
3745 sbi->s_max_writeback_mb_bump = 128; 3777 sbi->s_max_writeback_mb_bump = 128;
3778 sbi->s_extent_max_zeroout_kb = 32;
3746 3779
3747 /* 3780 /*
3748 * set up enough so that it can read an inode 3781 * set up enough so that it can read an inode
@@ -4519,11 +4552,9 @@ static int ext4_unfreeze(struct super_block *sb)
4519 if (sb->s_flags & MS_RDONLY) 4552 if (sb->s_flags & MS_RDONLY)
4520 return 0; 4553 return 0;
4521 4554
4522 lock_super(sb);
4523 /* Reset the needs_recovery flag before the fs is unlocked. */ 4555 /* Reset the needs_recovery flag before the fs is unlocked. */
4524 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 4556 EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
4525 ext4_commit_super(sb, 1); 4557 ext4_commit_super(sb, 1);
4526 unlock_super(sb);
4527 return 0; 4558 return 0;
4528} 4559}
4529 4560
@@ -4559,7 +4590,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4559 char *orig_data = kstrdup(data, GFP_KERNEL); 4590 char *orig_data = kstrdup(data, GFP_KERNEL);
4560 4591
4561 /* Store the original options */ 4592 /* Store the original options */
4562 lock_super(sb);
4563 old_sb_flags = sb->s_flags; 4593 old_sb_flags = sb->s_flags;
4564 old_opts.s_mount_opt = sbi->s_mount_opt; 4594 old_opts.s_mount_opt = sbi->s_mount_opt;
4565 old_opts.s_mount_opt2 = sbi->s_mount_opt2; 4595 old_opts.s_mount_opt2 = sbi->s_mount_opt2;
@@ -4701,7 +4731,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4701 if (sbi->s_journal == NULL) 4731 if (sbi->s_journal == NULL)
4702 ext4_commit_super(sb, 1); 4732 ext4_commit_super(sb, 1);
4703 4733
4704 unlock_super(sb);
4705#ifdef CONFIG_QUOTA 4734#ifdef CONFIG_QUOTA
4706 /* Release old quota file names */ 4735 /* Release old quota file names */
4707 for (i = 0; i < MAXQUOTAS; i++) 4736 for (i = 0; i < MAXQUOTAS; i++)
@@ -4714,10 +4743,8 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4714 else if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 4743 else if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
4715 EXT4_FEATURE_RO_COMPAT_QUOTA)) { 4744 EXT4_FEATURE_RO_COMPAT_QUOTA)) {
4716 err = ext4_enable_quotas(sb); 4745 err = ext4_enable_quotas(sb);
4717 if (err) { 4746 if (err)
4718 lock_super(sb);
4719 goto restore_opts; 4747 goto restore_opts;
4720 }
4721 } 4748 }
4722 } 4749 }
4723#endif 4750#endif
@@ -4744,7 +4771,6 @@ restore_opts:
4744 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 4771 sbi->s_qf_names[i] = old_opts.s_qf_names[i];
4745 } 4772 }
4746#endif 4773#endif
4747 unlock_super(sb);
4748 kfree(orig_data); 4774 kfree(orig_data);
4749 return err; 4775 return err;
4750} 4776}
@@ -5269,8 +5295,10 @@ static int __init ext4_init_fs(void)
5269 if (err) 5295 if (err)
5270 goto out6; 5296 goto out6;
5271 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj); 5297 ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
5272 if (!ext4_kset) 5298 if (!ext4_kset) {
5299 err = -ENOMEM;
5273 goto out5; 5300 goto out5;
5301 }
5274 ext4_proc_root = proc_mkdir("fs/ext4", NULL); 5302 ext4_proc_root = proc_mkdir("fs/ext4", NULL);
5275 5303
5276 err = ext4_init_feat_adverts(); 5304 err = ext4_init_feat_adverts();
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 6d46c0d78338..8e1d7b9e4a33 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -63,6 +63,7 @@ int writeback_in_progress(struct backing_dev_info *bdi)
63{ 63{
64 return test_bit(BDI_writeback_running, &bdi->state); 64 return test_bit(BDI_writeback_running, &bdi->state);
65} 65}
66EXPORT_SYMBOL(writeback_in_progress);
66 67
67static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) 68static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
68{ 69{
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index af5280fb579b..3091d42992f0 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -1014,17 +1014,35 @@ restart_loop:
1014 * there's no point in keeping a checkpoint record for 1014 * there's no point in keeping a checkpoint record for
1015 * it. */ 1015 * it. */
1016 1016
1017 /* A buffer which has been freed while still being 1017 /*
1018 * journaled by a previous transaction may end up still 1018 * A buffer which has been freed while still being journaled by
1019 * being dirty here, but we want to avoid writing back 1019 * a previous transaction.
1020 * that buffer in the future after the "add to orphan" 1020 */
1021 * operation been committed, That's not only a performance 1021 if (buffer_freed(bh)) {
1022 * gain, it also stops aliasing problems if the buffer is 1022 /*
1023 * left behind for writeback and gets reallocated for another 1023 * If the running transaction is the one containing
1024 * use in a different page. */ 1024 * "add to orphan" operation (b_next_transaction !=
1025 if (buffer_freed(bh) && !jh->b_next_transaction) { 1025 * NULL), we have to wait for that transaction to
1026 clear_buffer_freed(bh); 1026 * commit before we can really get rid of the buffer.
1027 clear_buffer_jbddirty(bh); 1027 * So just clear b_modified to not confuse transaction
1028 * credit accounting and refile the buffer to
1029 * BJ_Forget of the running transaction. If the just
1030 * committed transaction contains "add to orphan"
1031 * operation, we can completely invalidate the buffer
1032 * now. We are rather through in that since the
1033 * buffer may be still accessible when blocksize <
1034 * pagesize and it is attached to the last partial
1035 * page.
1036 */
1037 jh->b_modified = 0;
1038 if (!jh->b_next_transaction) {
1039 clear_buffer_freed(bh);
1040 clear_buffer_jbddirty(bh);
1041 clear_buffer_mapped(bh);
1042 clear_buffer_new(bh);
1043 clear_buffer_req(bh);
1044 bh->b_bdev = NULL;
1045 }
1028 } 1046 }
1029 1047
1030 if (buffer_jbddirty(bh)) { 1048 if (buffer_jbddirty(bh)) {
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index e149b99a7ffb..484b8d1c6cb6 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1354,6 +1354,11 @@ static void jbd2_mark_journal_empty(journal_t *journal)
1354 1354
1355 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); 1355 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1356 read_lock(&journal->j_state_lock); 1356 read_lock(&journal->j_state_lock);
1357 /* Is it already empty? */
1358 if (sb->s_start == 0) {
1359 read_unlock(&journal->j_state_lock);
1360 return;
1361 }
1357 jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n", 1362 jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n",
1358 journal->j_tail_sequence); 1363 journal->j_tail_sequence);
1359 1364
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 0131e4362534..626846bac32f 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -289,8 +289,11 @@ int jbd2_journal_recover(journal_t *journal)
289 if (!err) 289 if (!err)
290 err = err2; 290 err = err2;
291 /* Make sure all replayed data is on permanent storage */ 291 /* Make sure all replayed data is on permanent storage */
292 if (journal->j_flags & JBD2_BARRIER) 292 if (journal->j_flags & JBD2_BARRIER) {
293 blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); 293 err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
294 if (!err)
295 err = err2;
296 }
294 return err; 297 return err;
295} 298}
296 299
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index fb1ab9533b67..a74ba4659549 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1841,15 +1841,16 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
1841 * We're outside-transaction here. Either or both of j_running_transaction 1841 * We're outside-transaction here. Either or both of j_running_transaction
1842 * and j_committing_transaction may be NULL. 1842 * and j_committing_transaction may be NULL.
1843 */ 1843 */
1844static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) 1844static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh,
1845 int partial_page)
1845{ 1846{
1846 transaction_t *transaction; 1847 transaction_t *transaction;
1847 struct journal_head *jh; 1848 struct journal_head *jh;
1848 int may_free = 1; 1849 int may_free = 1;
1849 int ret;
1850 1850
1851 BUFFER_TRACE(bh, "entry"); 1851 BUFFER_TRACE(bh, "entry");
1852 1852
1853retry:
1853 /* 1854 /*
1854 * It is safe to proceed here without the j_list_lock because the 1855 * It is safe to proceed here without the j_list_lock because the
1855 * buffers cannot be stolen by try_to_free_buffers as long as we are 1856 * buffers cannot be stolen by try_to_free_buffers as long as we are
@@ -1878,10 +1879,18 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1878 * clear the buffer dirty bit at latest at the moment when the 1879 * clear the buffer dirty bit at latest at the moment when the
1879 * transaction marking the buffer as freed in the filesystem 1880 * transaction marking the buffer as freed in the filesystem
1880 * structures is committed because from that moment on the 1881 * structures is committed because from that moment on the
1881 * buffer can be reallocated and used by a different page. 1882 * block can be reallocated and used by a different page.
1882 * Since the block hasn't been freed yet but the inode has 1883 * Since the block hasn't been freed yet but the inode has
1883 * already been added to orphan list, it is safe for us to add 1884 * already been added to orphan list, it is safe for us to add
1884 * the buffer to BJ_Forget list of the newest transaction. 1885 * the buffer to BJ_Forget list of the newest transaction.
1886 *
1887 * Also we have to clear buffer_mapped flag of a truncated buffer
1888 * because the buffer_head may be attached to the page straddling
1889 * i_size (can happen only when blocksize < pagesize) and thus the
1890 * buffer_head can be reused when the file is extended again. So we end
1891 * up keeping around invalidated buffers attached to transactions'
1892 * BJ_Forget list just to stop checkpointing code from cleaning up
1893 * the transaction this buffer was modified in.
1885 */ 1894 */
1886 transaction = jh->b_transaction; 1895 transaction = jh->b_transaction;
1887 if (transaction == NULL) { 1896 if (transaction == NULL) {
@@ -1908,13 +1917,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1908 * committed, the buffer won't be needed any 1917 * committed, the buffer won't be needed any
1909 * longer. */ 1918 * longer. */
1910 JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); 1919 JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
1911 ret = __dispose_buffer(jh, 1920 may_free = __dispose_buffer(jh,
1912 journal->j_running_transaction); 1921 journal->j_running_transaction);
1913 jbd2_journal_put_journal_head(jh); 1922 goto zap_buffer;
1914 spin_unlock(&journal->j_list_lock);
1915 jbd_unlock_bh_state(bh);
1916 write_unlock(&journal->j_state_lock);
1917 return ret;
1918 } else { 1923 } else {
1919 /* There is no currently-running transaction. So the 1924 /* There is no currently-running transaction. So the
1920 * orphan record which we wrote for this file must have 1925 * orphan record which we wrote for this file must have
@@ -1922,13 +1927,9 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1922 * the committing transaction, if it exists. */ 1927 * the committing transaction, if it exists. */
1923 if (journal->j_committing_transaction) { 1928 if (journal->j_committing_transaction) {
1924 JBUFFER_TRACE(jh, "give to committing trans"); 1929 JBUFFER_TRACE(jh, "give to committing trans");
1925 ret = __dispose_buffer(jh, 1930 may_free = __dispose_buffer(jh,
1926 journal->j_committing_transaction); 1931 journal->j_committing_transaction);
1927 jbd2_journal_put_journal_head(jh); 1932 goto zap_buffer;
1928 spin_unlock(&journal->j_list_lock);
1929 jbd_unlock_bh_state(bh);
1930 write_unlock(&journal->j_state_lock);
1931 return ret;
1932 } else { 1933 } else {
1933 /* The orphan record's transaction has 1934 /* The orphan record's transaction has
1934 * committed. We can cleanse this buffer */ 1935 * committed. We can cleanse this buffer */
@@ -1940,10 +1941,24 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1940 JBUFFER_TRACE(jh, "on committing transaction"); 1941 JBUFFER_TRACE(jh, "on committing transaction");
1941 /* 1942 /*
1942 * The buffer is committing, we simply cannot touch 1943 * The buffer is committing, we simply cannot touch
1943 * it. So we just set j_next_transaction to the 1944 * it. If the page is straddling i_size we have to wait
1944 * running transaction (if there is one) and mark 1945 * for commit and try again.
1945 * buffer as freed so that commit code knows it should 1946 */
1946 * clear dirty bits when it is done with the buffer. 1947 if (partial_page) {
1948 tid_t tid = journal->j_committing_transaction->t_tid;
1949
1950 jbd2_journal_put_journal_head(jh);
1951 spin_unlock(&journal->j_list_lock);
1952 jbd_unlock_bh_state(bh);
1953 write_unlock(&journal->j_state_lock);
1954 jbd2_log_wait_commit(journal, tid);
1955 goto retry;
1956 }
1957 /*
1958 * OK, buffer won't be reachable after truncate. We just set
1959 * j_next_transaction to the running transaction (if there is
1960 * one) and mark buffer as freed so that commit code knows it
1961 * should clear dirty bits when it is done with the buffer.
1947 */ 1962 */
1948 set_buffer_freed(bh); 1963 set_buffer_freed(bh);
1949 if (journal->j_running_transaction && buffer_jbddirty(bh)) 1964 if (journal->j_running_transaction && buffer_jbddirty(bh))
@@ -1966,6 +1981,15 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
1966 } 1981 }
1967 1982
1968zap_buffer: 1983zap_buffer:
1984 /*
1985 * This is tricky. Although the buffer is truncated, it may be reused
1986 * if blocksize < pagesize and it is attached to the page straddling
1987 * EOF. Since the buffer might have been added to BJ_Forget list of the
1988 * running transaction, journal_get_write_access() won't clear
1989 * b_modified and credit accounting gets confused. So clear b_modified
1990 * here.
1991 */
1992 jh->b_modified = 0;
1969 jbd2_journal_put_journal_head(jh); 1993 jbd2_journal_put_journal_head(jh);
1970zap_buffer_no_jh: 1994zap_buffer_no_jh:
1971 spin_unlock(&journal->j_list_lock); 1995 spin_unlock(&journal->j_list_lock);
@@ -2017,7 +2041,8 @@ void jbd2_journal_invalidatepage(journal_t *journal,
2017 if (offset <= curr_off) { 2041 if (offset <= curr_off) {
2018 /* This block is wholly outside the truncation point */ 2042 /* This block is wholly outside the truncation point */
2019 lock_buffer(bh); 2043 lock_buffer(bh);
2020 may_free &= journal_unmap_buffer(journal, bh); 2044 may_free &= journal_unmap_buffer(journal, bh,
2045 offset > 0);
2021 unlock_buffer(bh); 2046 unlock_buffer(bh);
2022 } 2047 }
2023 curr_off = next_off; 2048 curr_off = next_off;
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index a4d56ac02e6c..5b387a4c293e 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -116,6 +116,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
116 if (unlikely(ret)) 116 if (unlikely(ret))
117 goto out; 117 goto out;
118 118
119 file_update_time(vma->vm_file);
119 ret = __block_page_mkwrite(vma, vmf, nilfs_get_block); 120 ret = __block_page_mkwrite(vma, vmf, nilfs_get_block);
120 if (ret) { 121 if (ret) {
121 nilfs_transaction_abort(inode->i_sb); 122 nilfs_transaction_abort(inode->i_sb);
diff --git a/include/linux/falloc.h b/include/linux/falloc.h
index 73e0b628e058..d39b824a780c 100644
--- a/include/linux/falloc.h
+++ b/include/linux/falloc.h
@@ -3,6 +3,7 @@
3 3
4#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */ 4#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */
5#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */ 5#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */
6#define FALLOC_FL_NO_HIDE_STALE 0x04 /* reserved codepoint */
6 7
7#ifdef __KERNEL__ 8#ifdef __KERNEL__
8 9
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 69d8a69ea831..d49b285385e8 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -26,19 +26,19 @@ TRACE_EVENT(ext4_free_inode,
26 TP_STRUCT__entry( 26 TP_STRUCT__entry(
27 __field( dev_t, dev ) 27 __field( dev_t, dev )
28 __field( ino_t, ino ) 28 __field( ino_t, ino )
29 __field( __u16, mode )
30 __field( uid_t, uid ) 29 __field( uid_t, uid )
31 __field( gid_t, gid ) 30 __field( gid_t, gid )
32 __field( __u64, blocks ) 31 __field( __u64, blocks )
32 __field( __u16, mode )
33 ), 33 ),
34 34
35 TP_fast_assign( 35 TP_fast_assign(
36 __entry->dev = inode->i_sb->s_dev; 36 __entry->dev = inode->i_sb->s_dev;
37 __entry->ino = inode->i_ino; 37 __entry->ino = inode->i_ino;
38 __entry->mode = inode->i_mode;
39 __entry->uid = i_uid_read(inode); 38 __entry->uid = i_uid_read(inode);
40 __entry->gid = i_gid_read(inode); 39 __entry->gid = i_gid_read(inode);
41 __entry->blocks = inode->i_blocks; 40 __entry->blocks = inode->i_blocks;
41 __entry->mode = inode->i_mode;
42 ), 42 ),
43 43
44 TP_printk("dev %d,%d ino %lu mode 0%o uid %u gid %u blocks %llu", 44 TP_printk("dev %d,%d ino %lu mode 0%o uid %u gid %u blocks %llu",
@@ -300,10 +300,10 @@ TRACE_EVENT(ext4_da_writepages,
300 __field( long, pages_skipped ) 300 __field( long, pages_skipped )
301 __field( loff_t, range_start ) 301 __field( loff_t, range_start )
302 __field( loff_t, range_end ) 302 __field( loff_t, range_end )
303 __field( pgoff_t, writeback_index )
303 __field( int, sync_mode ) 304 __field( int, sync_mode )
304 __field( char, for_kupdate ) 305 __field( char, for_kupdate )
305 __field( char, range_cyclic ) 306 __field( char, range_cyclic )
306 __field( pgoff_t, writeback_index )
307 ), 307 ),
308 308
309 TP_fast_assign( 309 TP_fast_assign(
@@ -313,14 +313,14 @@ TRACE_EVENT(ext4_da_writepages,
313 __entry->pages_skipped = wbc->pages_skipped; 313 __entry->pages_skipped = wbc->pages_skipped;
314 __entry->range_start = wbc->range_start; 314 __entry->range_start = wbc->range_start;
315 __entry->range_end = wbc->range_end; 315 __entry->range_end = wbc->range_end;
316 __entry->writeback_index = inode->i_mapping->writeback_index;
316 __entry->sync_mode = wbc->sync_mode; 317 __entry->sync_mode = wbc->sync_mode;
317 __entry->for_kupdate = wbc->for_kupdate; 318 __entry->for_kupdate = wbc->for_kupdate;
318 __entry->range_cyclic = wbc->range_cyclic; 319 __entry->range_cyclic = wbc->range_cyclic;
319 __entry->writeback_index = inode->i_mapping->writeback_index;
320 ), 320 ),
321 321
322 TP_printk("dev %d,%d ino %lu nr_to_write %ld pages_skipped %ld " 322 TP_printk("dev %d,%d ino %lu nr_to_write %ld pages_skipped %ld "
323 "range_start %lld range_end %lld sync_mode %d" 323 "range_start %lld range_end %lld sync_mode %d "
324 "for_kupdate %d range_cyclic %d writeback_index %lu", 324 "for_kupdate %d range_cyclic %d writeback_index %lu",
325 MAJOR(__entry->dev), MINOR(__entry->dev), 325 MAJOR(__entry->dev), MINOR(__entry->dev),
326 (unsigned long) __entry->ino, __entry->nr_to_write, 326 (unsigned long) __entry->ino, __entry->nr_to_write,
@@ -382,8 +382,8 @@ TRACE_EVENT(ext4_da_writepages_result,
382 __field( int, ret ) 382 __field( int, ret )
383 __field( int, pages_written ) 383 __field( int, pages_written )
384 __field( long, pages_skipped ) 384 __field( long, pages_skipped )
385 __field( int, sync_mode )
386 __field( pgoff_t, writeback_index ) 385 __field( pgoff_t, writeback_index )
386 __field( int, sync_mode )
387 ), 387 ),
388 388
389 TP_fast_assign( 389 TP_fast_assign(
@@ -392,8 +392,8 @@ TRACE_EVENT(ext4_da_writepages_result,
392 __entry->ret = ret; 392 __entry->ret = ret;
393 __entry->pages_written = pages_written; 393 __entry->pages_written = pages_written;
394 __entry->pages_skipped = wbc->pages_skipped; 394 __entry->pages_skipped = wbc->pages_skipped;
395 __entry->sync_mode = wbc->sync_mode;
396 __entry->writeback_index = inode->i_mapping->writeback_index; 395 __entry->writeback_index = inode->i_mapping->writeback_index;
396 __entry->sync_mode = wbc->sync_mode;
397 ), 397 ),
398 398
399 TP_printk("dev %d,%d ino %lu ret %d pages_written %d pages_skipped %ld " 399 TP_printk("dev %d,%d ino %lu ret %d pages_written %d pages_skipped %ld "
@@ -411,16 +411,16 @@ DECLARE_EVENT_CLASS(ext4__page_op,
411 TP_ARGS(page), 411 TP_ARGS(page),
412 412
413 TP_STRUCT__entry( 413 TP_STRUCT__entry(
414 __field( pgoff_t, index )
415 __field( ino_t, ino )
416 __field( dev_t, dev ) 414 __field( dev_t, dev )
415 __field( ino_t, ino )
416 __field( pgoff_t, index )
417 417
418 ), 418 ),
419 419
420 TP_fast_assign( 420 TP_fast_assign(
421 __entry->index = page->index;
422 __entry->ino = page->mapping->host->i_ino;
423 __entry->dev = page->mapping->host->i_sb->s_dev; 421 __entry->dev = page->mapping->host->i_sb->s_dev;
422 __entry->ino = page->mapping->host->i_ino;
423 __entry->index = page->index;
424 ), 424 ),
425 425
426 TP_printk("dev %d,%d ino %lu page_index %lu", 426 TP_printk("dev %d,%d ino %lu page_index %lu",
@@ -456,18 +456,18 @@ TRACE_EVENT(ext4_invalidatepage,
456 TP_ARGS(page, offset), 456 TP_ARGS(page, offset),
457 457
458 TP_STRUCT__entry( 458 TP_STRUCT__entry(
459 __field( dev_t, dev )
460 __field( ino_t, ino )
459 __field( pgoff_t, index ) 461 __field( pgoff_t, index )
460 __field( unsigned long, offset ) 462 __field( unsigned long, offset )
461 __field( ino_t, ino )
462 __field( dev_t, dev )
463 463
464 ), 464 ),
465 465
466 TP_fast_assign( 466 TP_fast_assign(
467 __entry->dev = page->mapping->host->i_sb->s_dev;
468 __entry->ino = page->mapping->host->i_ino;
467 __entry->index = page->index; 469 __entry->index = page->index;
468 __entry->offset = offset; 470 __entry->offset = offset;
469 __entry->ino = page->mapping->host->i_ino;
470 __entry->dev = page->mapping->host->i_sb->s_dev;
471 ), 471 ),
472 472
473 TP_printk("dev %d,%d ino %lu page_index %lu offset %lu", 473 TP_printk("dev %d,%d ino %lu page_index %lu offset %lu",
@@ -510,8 +510,8 @@ DECLARE_EVENT_CLASS(ext4__mb_new_pa,
510 __field( dev_t, dev ) 510 __field( dev_t, dev )
511 __field( ino_t, ino ) 511 __field( ino_t, ino )
512 __field( __u64, pa_pstart ) 512 __field( __u64, pa_pstart )
513 __field( __u32, pa_len )
514 __field( __u64, pa_lstart ) 513 __field( __u64, pa_lstart )
514 __field( __u32, pa_len )
515 515
516 ), 516 ),
517 517
@@ -519,8 +519,8 @@ DECLARE_EVENT_CLASS(ext4__mb_new_pa,
519 __entry->dev = ac->ac_sb->s_dev; 519 __entry->dev = ac->ac_sb->s_dev;
520 __entry->ino = ac->ac_inode->i_ino; 520 __entry->ino = ac->ac_inode->i_ino;
521 __entry->pa_pstart = pa->pa_pstart; 521 __entry->pa_pstart = pa->pa_pstart;
522 __entry->pa_len = pa->pa_len;
523 __entry->pa_lstart = pa->pa_lstart; 522 __entry->pa_lstart = pa->pa_lstart;
523 __entry->pa_len = pa->pa_len;
524 ), 524 ),
525 525
526 TP_printk("dev %d,%d ino %lu pstart %llu len %u lstart %llu", 526 TP_printk("dev %d,%d ino %lu pstart %llu len %u lstart %llu",
@@ -645,7 +645,6 @@ TRACE_EVENT(ext4_request_blocks,
645 TP_STRUCT__entry( 645 TP_STRUCT__entry(
646 __field( dev_t, dev ) 646 __field( dev_t, dev )
647 __field( ino_t, ino ) 647 __field( ino_t, ino )
648 __field( unsigned int, flags )
649 __field( unsigned int, len ) 648 __field( unsigned int, len )
650 __field( __u32, logical ) 649 __field( __u32, logical )
651 __field( __u32, lleft ) 650 __field( __u32, lleft )
@@ -653,12 +652,12 @@ TRACE_EVENT(ext4_request_blocks,
653 __field( __u64, goal ) 652 __field( __u64, goal )
654 __field( __u64, pleft ) 653 __field( __u64, pleft )
655 __field( __u64, pright ) 654 __field( __u64, pright )
655 __field( unsigned int, flags )
656 ), 656 ),
657 657
658 TP_fast_assign( 658 TP_fast_assign(
659 __entry->dev = ar->inode->i_sb->s_dev; 659 __entry->dev = ar->inode->i_sb->s_dev;
660 __entry->ino = ar->inode->i_ino; 660 __entry->ino = ar->inode->i_ino;
661 __entry->flags = ar->flags;
662 __entry->len = ar->len; 661 __entry->len = ar->len;
663 __entry->logical = ar->logical; 662 __entry->logical = ar->logical;
664 __entry->goal = ar->goal; 663 __entry->goal = ar->goal;
@@ -666,6 +665,7 @@ TRACE_EVENT(ext4_request_blocks,
666 __entry->lright = ar->lright; 665 __entry->lright = ar->lright;
667 __entry->pleft = ar->pleft; 666 __entry->pleft = ar->pleft;
668 __entry->pright = ar->pright; 667 __entry->pright = ar->pright;
668 __entry->flags = ar->flags;
669 ), 669 ),
670 670
671 TP_printk("dev %d,%d ino %lu flags %u len %u lblk %u goal %llu " 671 TP_printk("dev %d,%d ino %lu flags %u len %u lblk %u goal %llu "
@@ -686,7 +686,6 @@ TRACE_EVENT(ext4_allocate_blocks,
686 __field( dev_t, dev ) 686 __field( dev_t, dev )
687 __field( ino_t, ino ) 687 __field( ino_t, ino )
688 __field( __u64, block ) 688 __field( __u64, block )
689 __field( unsigned int, flags )
690 __field( unsigned int, len ) 689 __field( unsigned int, len )
691 __field( __u32, logical ) 690 __field( __u32, logical )
692 __field( __u32, lleft ) 691 __field( __u32, lleft )
@@ -694,13 +693,13 @@ TRACE_EVENT(ext4_allocate_blocks,
694 __field( __u64, goal ) 693 __field( __u64, goal )
695 __field( __u64, pleft ) 694 __field( __u64, pleft )
696 __field( __u64, pright ) 695 __field( __u64, pright )
696 __field( unsigned int, flags )
697 ), 697 ),
698 698
699 TP_fast_assign( 699 TP_fast_assign(
700 __entry->dev = ar->inode->i_sb->s_dev; 700 __entry->dev = ar->inode->i_sb->s_dev;
701 __entry->ino = ar->inode->i_ino; 701 __entry->ino = ar->inode->i_ino;
702 __entry->block = block; 702 __entry->block = block;
703 __entry->flags = ar->flags;
704 __entry->len = ar->len; 703 __entry->len = ar->len;
705 __entry->logical = ar->logical; 704 __entry->logical = ar->logical;
706 __entry->goal = ar->goal; 705 __entry->goal = ar->goal;
@@ -708,6 +707,7 @@ TRACE_EVENT(ext4_allocate_blocks,
708 __entry->lright = ar->lright; 707 __entry->lright = ar->lright;
709 __entry->pleft = ar->pleft; 708 __entry->pleft = ar->pleft;
710 __entry->pright = ar->pright; 709 __entry->pright = ar->pright;
710 __entry->flags = ar->flags;
711 ), 711 ),
712 712
713 TP_printk("dev %d,%d ino %lu flags %u len %u block %llu lblk %u " 713 TP_printk("dev %d,%d ino %lu flags %u len %u block %llu lblk %u "
@@ -728,19 +728,19 @@ TRACE_EVENT(ext4_free_blocks,
728 TP_STRUCT__entry( 728 TP_STRUCT__entry(
729 __field( dev_t, dev ) 729 __field( dev_t, dev )
730 __field( ino_t, ino ) 730 __field( ino_t, ino )
731 __field( __u16, mode )
732 __field( __u64, block ) 731 __field( __u64, block )
733 __field( unsigned long, count ) 732 __field( unsigned long, count )
734 __field( int, flags ) 733 __field( int, flags )
734 __field( __u16, mode )
735 ), 735 ),
736 736
737 TP_fast_assign( 737 TP_fast_assign(
738 __entry->dev = inode->i_sb->s_dev; 738 __entry->dev = inode->i_sb->s_dev;
739 __entry->ino = inode->i_ino; 739 __entry->ino = inode->i_ino;
740 __entry->mode = inode->i_mode;
741 __entry->block = block; 740 __entry->block = block;
742 __entry->count = count; 741 __entry->count = count;
743 __entry->flags = flags; 742 __entry->flags = flags;
743 __entry->mode = inode->i_mode;
744 ), 744 ),
745 745
746 TP_printk("dev %d,%d ino %lu mode 0%o block %llu count %lu flags %d", 746 TP_printk("dev %d,%d ino %lu mode 0%o block %llu count %lu flags %d",
@@ -783,15 +783,15 @@ TRACE_EVENT(ext4_sync_file_exit,
783 TP_ARGS(inode, ret), 783 TP_ARGS(inode, ret),
784 784
785 TP_STRUCT__entry( 785 TP_STRUCT__entry(
786 __field( int, ret )
787 __field( ino_t, ino )
788 __field( dev_t, dev ) 786 __field( dev_t, dev )
787 __field( ino_t, ino )
788 __field( int, ret )
789 ), 789 ),
790 790
791 TP_fast_assign( 791 TP_fast_assign(
792 __entry->ret = ret;
793 __entry->ino = inode->i_ino;
794 __entry->dev = inode->i_sb->s_dev; 792 __entry->dev = inode->i_sb->s_dev;
793 __entry->ino = inode->i_ino;
794 __entry->ret = ret;
795 ), 795 ),
796 796
797 TP_printk("dev %d,%d ino %lu ret %d", 797 TP_printk("dev %d,%d ino %lu ret %d",
@@ -854,12 +854,6 @@ TRACE_EVENT(ext4_mballoc_alloc,
854 TP_STRUCT__entry( 854 TP_STRUCT__entry(
855 __field( dev_t, dev ) 855 __field( dev_t, dev )
856 __field( ino_t, ino ) 856 __field( ino_t, ino )
857 __field( __u16, found )
858 __field( __u16, groups )
859 __field( __u16, buddy )
860 __field( __u16, flags )
861 __field( __u16, tail )
862 __field( __u8, cr )
863 __field( __u32, orig_logical ) 857 __field( __u32, orig_logical )
864 __field( int, orig_start ) 858 __field( int, orig_start )
865 __field( __u32, orig_group ) 859 __field( __u32, orig_group )
@@ -872,17 +866,17 @@ TRACE_EVENT(ext4_mballoc_alloc,
872 __field( int, result_start ) 866 __field( int, result_start )
873 __field( __u32, result_group ) 867 __field( __u32, result_group )
874 __field( int, result_len ) 868 __field( int, result_len )
869 __field( __u16, found )
870 __field( __u16, groups )
871 __field( __u16, buddy )
872 __field( __u16, flags )
873 __field( __u16, tail )
874 __field( __u8, cr )
875 ), 875 ),
876 876
877 TP_fast_assign( 877 TP_fast_assign(
878 __entry->dev = ac->ac_inode->i_sb->s_dev; 878 __entry->dev = ac->ac_inode->i_sb->s_dev;
879 __entry->ino = ac->ac_inode->i_ino; 879 __entry->ino = ac->ac_inode->i_ino;
880 __entry->found = ac->ac_found;
881 __entry->flags = ac->ac_flags;
882 __entry->groups = ac->ac_groups_scanned;
883 __entry->buddy = ac->ac_buddy;
884 __entry->tail = ac->ac_tail;
885 __entry->cr = ac->ac_criteria;
886 __entry->orig_logical = ac->ac_o_ex.fe_logical; 880 __entry->orig_logical = ac->ac_o_ex.fe_logical;
887 __entry->orig_start = ac->ac_o_ex.fe_start; 881 __entry->orig_start = ac->ac_o_ex.fe_start;
888 __entry->orig_group = ac->ac_o_ex.fe_group; 882 __entry->orig_group = ac->ac_o_ex.fe_group;
@@ -895,6 +889,12 @@ TRACE_EVENT(ext4_mballoc_alloc,
895 __entry->result_start = ac->ac_f_ex.fe_start; 889 __entry->result_start = ac->ac_f_ex.fe_start;
896 __entry->result_group = ac->ac_f_ex.fe_group; 890 __entry->result_group = ac->ac_f_ex.fe_group;
897 __entry->result_len = ac->ac_f_ex.fe_len; 891 __entry->result_len = ac->ac_f_ex.fe_len;
892 __entry->found = ac->ac_found;
893 __entry->flags = ac->ac_flags;
894 __entry->groups = ac->ac_groups_scanned;
895 __entry->buddy = ac->ac_buddy;
896 __entry->tail = ac->ac_tail;
897 __entry->cr = ac->ac_criteria;
898 ), 898 ),
899 899
900 TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u " 900 TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u "
@@ -1015,17 +1015,17 @@ TRACE_EVENT(ext4_forget,
1015 TP_STRUCT__entry( 1015 TP_STRUCT__entry(
1016 __field( dev_t, dev ) 1016 __field( dev_t, dev )
1017 __field( ino_t, ino ) 1017 __field( ino_t, ino )
1018 __field( __u16, mode )
1019 __field( int, is_metadata )
1020 __field( __u64, block ) 1018 __field( __u64, block )
1019 __field( int, is_metadata )
1020 __field( __u16, mode )
1021 ), 1021 ),
1022 1022
1023 TP_fast_assign( 1023 TP_fast_assign(
1024 __entry->dev = inode->i_sb->s_dev; 1024 __entry->dev = inode->i_sb->s_dev;
1025 __entry->ino = inode->i_ino; 1025 __entry->ino = inode->i_ino;
1026 __entry->mode = inode->i_mode;
1027 __entry->is_metadata = is_metadata;
1028 __entry->block = block; 1026 __entry->block = block;
1027 __entry->is_metadata = is_metadata;
1028 __entry->mode = inode->i_mode;
1029 ), 1029 ),
1030 1030
1031 TP_printk("dev %d,%d ino %lu mode 0%o is_metadata %d block %llu", 1031 TP_printk("dev %d,%d ino %lu mode 0%o is_metadata %d block %llu",
@@ -1042,19 +1042,18 @@ TRACE_EVENT(ext4_da_update_reserve_space,
1042 TP_STRUCT__entry( 1042 TP_STRUCT__entry(
1043 __field( dev_t, dev ) 1043 __field( dev_t, dev )
1044 __field( ino_t, ino ) 1044 __field( ino_t, ino )
1045 __field( __u16, mode )
1046 __field( __u64, i_blocks ) 1045 __field( __u64, i_blocks )
1047 __field( int, used_blocks ) 1046 __field( int, used_blocks )
1048 __field( int, reserved_data_blocks ) 1047 __field( int, reserved_data_blocks )
1049 __field( int, reserved_meta_blocks ) 1048 __field( int, reserved_meta_blocks )
1050 __field( int, allocated_meta_blocks ) 1049 __field( int, allocated_meta_blocks )
1051 __field( int, quota_claim ) 1050 __field( int, quota_claim )
1051 __field( __u16, mode )
1052 ), 1052 ),
1053 1053
1054 TP_fast_assign( 1054 TP_fast_assign(
1055 __entry->dev = inode->i_sb->s_dev; 1055 __entry->dev = inode->i_sb->s_dev;
1056 __entry->ino = inode->i_ino; 1056 __entry->ino = inode->i_ino;
1057 __entry->mode = inode->i_mode;
1058 __entry->i_blocks = inode->i_blocks; 1057 __entry->i_blocks = inode->i_blocks;
1059 __entry->used_blocks = used_blocks; 1058 __entry->used_blocks = used_blocks;
1060 __entry->reserved_data_blocks = 1059 __entry->reserved_data_blocks =
@@ -1064,6 +1063,7 @@ TRACE_EVENT(ext4_da_update_reserve_space,
1064 __entry->allocated_meta_blocks = 1063 __entry->allocated_meta_blocks =
1065 EXT4_I(inode)->i_allocated_meta_blocks; 1064 EXT4_I(inode)->i_allocated_meta_blocks;
1066 __entry->quota_claim = quota_claim; 1065 __entry->quota_claim = quota_claim;
1066 __entry->mode = inode->i_mode;
1067 ), 1067 ),
1068 1068
1069 TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d " 1069 TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d "
@@ -1085,21 +1085,21 @@ TRACE_EVENT(ext4_da_reserve_space,
1085 TP_STRUCT__entry( 1085 TP_STRUCT__entry(
1086 __field( dev_t, dev ) 1086 __field( dev_t, dev )
1087 __field( ino_t, ino ) 1087 __field( ino_t, ino )
1088 __field( __u16, mode )
1089 __field( __u64, i_blocks ) 1088 __field( __u64, i_blocks )
1090 __field( int, md_needed ) 1089 __field( int, md_needed )
1091 __field( int, reserved_data_blocks ) 1090 __field( int, reserved_data_blocks )
1092 __field( int, reserved_meta_blocks ) 1091 __field( int, reserved_meta_blocks )
1092 __field( __u16, mode )
1093 ), 1093 ),
1094 1094
1095 TP_fast_assign( 1095 TP_fast_assign(
1096 __entry->dev = inode->i_sb->s_dev; 1096 __entry->dev = inode->i_sb->s_dev;
1097 __entry->ino = inode->i_ino; 1097 __entry->ino = inode->i_ino;
1098 __entry->mode = inode->i_mode;
1099 __entry->i_blocks = inode->i_blocks; 1098 __entry->i_blocks = inode->i_blocks;
1100 __entry->md_needed = md_needed; 1099 __entry->md_needed = md_needed;
1101 __entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks; 1100 __entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
1102 __entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks; 1101 __entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
1102 __entry->mode = inode->i_mode;
1103 ), 1103 ),
1104 1104
1105 TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu md_needed %d " 1105 TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu md_needed %d "
@@ -1119,23 +1119,23 @@ TRACE_EVENT(ext4_da_release_space,
1119 TP_STRUCT__entry( 1119 TP_STRUCT__entry(
1120 __field( dev_t, dev ) 1120 __field( dev_t, dev )
1121 __field( ino_t, ino ) 1121 __field( ino_t, ino )
1122 __field( __u16, mode )
1123 __field( __u64, i_blocks ) 1122 __field( __u64, i_blocks )
1124 __field( int, freed_blocks ) 1123 __field( int, freed_blocks )
1125 __field( int, reserved_data_blocks ) 1124 __field( int, reserved_data_blocks )
1126 __field( int, reserved_meta_blocks ) 1125 __field( int, reserved_meta_blocks )
1127 __field( int, allocated_meta_blocks ) 1126 __field( int, allocated_meta_blocks )
1127 __field( __u16, mode )
1128 ), 1128 ),
1129 1129
1130 TP_fast_assign( 1130 TP_fast_assign(
1131 __entry->dev = inode->i_sb->s_dev; 1131 __entry->dev = inode->i_sb->s_dev;
1132 __entry->ino = inode->i_ino; 1132 __entry->ino = inode->i_ino;
1133 __entry->mode = inode->i_mode;
1134 __entry->i_blocks = inode->i_blocks; 1133 __entry->i_blocks = inode->i_blocks;
1135 __entry->freed_blocks = freed_blocks; 1134 __entry->freed_blocks = freed_blocks;
1136 __entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks; 1135 __entry->reserved_data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
1137 __entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks; 1136 __entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
1138 __entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks; 1137 __entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks;
1138 __entry->mode = inode->i_mode;
1139 ), 1139 ),
1140 1140
1141 TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu freed_blocks %d " 1141 TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu freed_blocks %d "
@@ -1203,16 +1203,16 @@ TRACE_EVENT(ext4_direct_IO_enter,
1203 TP_ARGS(inode, offset, len, rw), 1203 TP_ARGS(inode, offset, len, rw),
1204 1204
1205 TP_STRUCT__entry( 1205 TP_STRUCT__entry(
1206 __field( ino_t, ino )
1207 __field( dev_t, dev ) 1206 __field( dev_t, dev )
1207 __field( ino_t, ino )
1208 __field( loff_t, pos ) 1208 __field( loff_t, pos )
1209 __field( unsigned long, len ) 1209 __field( unsigned long, len )
1210 __field( int, rw ) 1210 __field( int, rw )
1211 ), 1211 ),
1212 1212
1213 TP_fast_assign( 1213 TP_fast_assign(
1214 __entry->ino = inode->i_ino;
1215 __entry->dev = inode->i_sb->s_dev; 1214 __entry->dev = inode->i_sb->s_dev;
1215 __entry->ino = inode->i_ino;
1216 __entry->pos = offset; 1216 __entry->pos = offset;
1217 __entry->len = len; 1217 __entry->len = len;
1218 __entry->rw = rw; 1218 __entry->rw = rw;
@@ -1231,8 +1231,8 @@ TRACE_EVENT(ext4_direct_IO_exit,
1231 TP_ARGS(inode, offset, len, rw, ret), 1231 TP_ARGS(inode, offset, len, rw, ret),
1232 1232
1233 TP_STRUCT__entry( 1233 TP_STRUCT__entry(
1234 __field( ino_t, ino )
1235 __field( dev_t, dev ) 1234 __field( dev_t, dev )
1235 __field( ino_t, ino )
1236 __field( loff_t, pos ) 1236 __field( loff_t, pos )
1237 __field( unsigned long, len ) 1237 __field( unsigned long, len )
1238 __field( int, rw ) 1238 __field( int, rw )
@@ -1240,8 +1240,8 @@ TRACE_EVENT(ext4_direct_IO_exit,
1240 ), 1240 ),
1241 1241
1242 TP_fast_assign( 1242 TP_fast_assign(
1243 __entry->ino = inode->i_ino;
1244 __entry->dev = inode->i_sb->s_dev; 1243 __entry->dev = inode->i_sb->s_dev;
1244 __entry->ino = inode->i_ino;
1245 __entry->pos = offset; 1245 __entry->pos = offset;
1246 __entry->len = len; 1246 __entry->len = len;
1247 __entry->rw = rw; 1247 __entry->rw = rw;
@@ -1261,16 +1261,16 @@ TRACE_EVENT(ext4_fallocate_enter,
1261 TP_ARGS(inode, offset, len, mode), 1261 TP_ARGS(inode, offset, len, mode),
1262 1262
1263 TP_STRUCT__entry( 1263 TP_STRUCT__entry(
1264 __field( ino_t, ino )
1265 __field( dev_t, dev ) 1264 __field( dev_t, dev )
1265 __field( ino_t, ino )
1266 __field( loff_t, pos ) 1266 __field( loff_t, pos )
1267 __field( loff_t, len ) 1267 __field( loff_t, len )
1268 __field( int, mode ) 1268 __field( int, mode )
1269 ), 1269 ),
1270 1270
1271 TP_fast_assign( 1271 TP_fast_assign(
1272 __entry->ino = inode->i_ino;
1273 __entry->dev = inode->i_sb->s_dev; 1272 __entry->dev = inode->i_sb->s_dev;
1273 __entry->ino = inode->i_ino;
1274 __entry->pos = offset; 1274 __entry->pos = offset;
1275 __entry->len = len; 1275 __entry->len = len;
1276 __entry->mode = mode; 1276 __entry->mode = mode;
@@ -1289,16 +1289,16 @@ TRACE_EVENT(ext4_fallocate_exit,
1289 TP_ARGS(inode, offset, max_blocks, ret), 1289 TP_ARGS(inode, offset, max_blocks, ret),
1290 1290
1291 TP_STRUCT__entry( 1291 TP_STRUCT__entry(
1292 __field( ino_t, ino )
1293 __field( dev_t, dev ) 1292 __field( dev_t, dev )
1293 __field( ino_t, ino )
1294 __field( loff_t, pos ) 1294 __field( loff_t, pos )
1295 __field( unsigned int, blocks ) 1295 __field( unsigned int, blocks )
1296 __field( int, ret ) 1296 __field( int, ret )
1297 ), 1297 ),
1298 1298
1299 TP_fast_assign( 1299 TP_fast_assign(
1300 __entry->ino = inode->i_ino;
1301 __entry->dev = inode->i_sb->s_dev; 1300 __entry->dev = inode->i_sb->s_dev;
1301 __entry->ino = inode->i_ino;
1302 __entry->pos = offset; 1302 __entry->pos = offset;
1303 __entry->blocks = max_blocks; 1303 __entry->blocks = max_blocks;
1304 __entry->ret = ret; 1304 __entry->ret = ret;
@@ -1317,17 +1317,17 @@ TRACE_EVENT(ext4_unlink_enter,
1317 TP_ARGS(parent, dentry), 1317 TP_ARGS(parent, dentry),
1318 1318
1319 TP_STRUCT__entry( 1319 TP_STRUCT__entry(
1320 __field( ino_t, parent ) 1320 __field( dev_t, dev )
1321 __field( ino_t, ino ) 1321 __field( ino_t, ino )
1322 __field( ino_t, parent )
1322 __field( loff_t, size ) 1323 __field( loff_t, size )
1323 __field( dev_t, dev )
1324 ), 1324 ),
1325 1325
1326 TP_fast_assign( 1326 TP_fast_assign(
1327 __entry->parent = parent->i_ino; 1327 __entry->dev = dentry->d_inode->i_sb->s_dev;
1328 __entry->ino = dentry->d_inode->i_ino; 1328 __entry->ino = dentry->d_inode->i_ino;
1329 __entry->parent = parent->i_ino;
1329 __entry->size = dentry->d_inode->i_size; 1330 __entry->size = dentry->d_inode->i_size;
1330 __entry->dev = dentry->d_inode->i_sb->s_dev;
1331 ), 1331 ),
1332 1332
1333 TP_printk("dev %d,%d ino %lu size %lld parent %lu", 1333 TP_printk("dev %d,%d ino %lu size %lld parent %lu",
@@ -1342,14 +1342,14 @@ TRACE_EVENT(ext4_unlink_exit,
1342 TP_ARGS(dentry, ret), 1342 TP_ARGS(dentry, ret),
1343 1343
1344 TP_STRUCT__entry( 1344 TP_STRUCT__entry(
1345 __field( ino_t, ino )
1346 __field( dev_t, dev ) 1345 __field( dev_t, dev )
1346 __field( ino_t, ino )
1347 __field( int, ret ) 1347 __field( int, ret )
1348 ), 1348 ),
1349 1349
1350 TP_fast_assign( 1350 TP_fast_assign(
1351 __entry->ino = dentry->d_inode->i_ino;
1352 __entry->dev = dentry->d_inode->i_sb->s_dev; 1351 __entry->dev = dentry->d_inode->i_sb->s_dev;
1352 __entry->ino = dentry->d_inode->i_ino;
1353 __entry->ret = ret; 1353 __entry->ret = ret;
1354 ), 1354 ),
1355 1355
@@ -1365,14 +1365,14 @@ DECLARE_EVENT_CLASS(ext4__truncate,
1365 TP_ARGS(inode), 1365 TP_ARGS(inode),
1366 1366
1367 TP_STRUCT__entry( 1367 TP_STRUCT__entry(
1368 __field( ino_t, ino ) 1368 __field( dev_t, dev )
1369 __field( dev_t, dev ) 1369 __field( ino_t, ino )
1370 __field( __u64, blocks ) 1370 __field( __u64, blocks )
1371 ), 1371 ),
1372 1372
1373 TP_fast_assign( 1373 TP_fast_assign(
1374 __entry->ino = inode->i_ino;
1375 __entry->dev = inode->i_sb->s_dev; 1374 __entry->dev = inode->i_sb->s_dev;
1375 __entry->ino = inode->i_ino;
1376 __entry->blocks = inode->i_blocks; 1376 __entry->blocks = inode->i_blocks;
1377 ), 1377 ),
1378 1378
@@ -1403,8 +1403,8 @@ TRACE_EVENT(ext4_ext_convert_to_initialized_enter,
1403 TP_ARGS(inode, map, ux), 1403 TP_ARGS(inode, map, ux),
1404 1404
1405 TP_STRUCT__entry( 1405 TP_STRUCT__entry(
1406 __field( ino_t, ino )
1407 __field( dev_t, dev ) 1406 __field( dev_t, dev )
1407 __field( ino_t, ino )
1408 __field( ext4_lblk_t, m_lblk ) 1408 __field( ext4_lblk_t, m_lblk )
1409 __field( unsigned, m_len ) 1409 __field( unsigned, m_len )
1410 __field( ext4_lblk_t, u_lblk ) 1410 __field( ext4_lblk_t, u_lblk )
@@ -1413,8 +1413,8 @@ TRACE_EVENT(ext4_ext_convert_to_initialized_enter,
1413 ), 1413 ),
1414 1414
1415 TP_fast_assign( 1415 TP_fast_assign(
1416 __entry->ino = inode->i_ino;
1417 __entry->dev = inode->i_sb->s_dev; 1416 __entry->dev = inode->i_sb->s_dev;
1417 __entry->ino = inode->i_ino;
1418 __entry->m_lblk = map->m_lblk; 1418 __entry->m_lblk = map->m_lblk;
1419 __entry->m_len = map->m_len; 1419 __entry->m_len = map->m_len;
1420 __entry->u_lblk = le32_to_cpu(ux->ee_block); 1420 __entry->u_lblk = le32_to_cpu(ux->ee_block);
@@ -1441,8 +1441,8 @@ TRACE_EVENT(ext4_ext_convert_to_initialized_fastpath,
1441 TP_ARGS(inode, map, ux, ix), 1441 TP_ARGS(inode, map, ux, ix),
1442 1442
1443 TP_STRUCT__entry( 1443 TP_STRUCT__entry(
1444 __field( ino_t, ino )
1445 __field( dev_t, dev ) 1444 __field( dev_t, dev )
1445 __field( ino_t, ino )
1446 __field( ext4_lblk_t, m_lblk ) 1446 __field( ext4_lblk_t, m_lblk )
1447 __field( unsigned, m_len ) 1447 __field( unsigned, m_len )
1448 __field( ext4_lblk_t, u_lblk ) 1448 __field( ext4_lblk_t, u_lblk )
@@ -1454,8 +1454,8 @@ TRACE_EVENT(ext4_ext_convert_to_initialized_fastpath,
1454 ), 1454 ),
1455 1455
1456 TP_fast_assign( 1456 TP_fast_assign(
1457 __entry->ino = inode->i_ino;
1458 __entry->dev = inode->i_sb->s_dev; 1457 __entry->dev = inode->i_sb->s_dev;
1458 __entry->ino = inode->i_ino;
1459 __entry->m_lblk = map->m_lblk; 1459 __entry->m_lblk = map->m_lblk;
1460 __entry->m_len = map->m_len; 1460 __entry->m_len = map->m_len;
1461 __entry->u_lblk = le32_to_cpu(ux->ee_block); 1461 __entry->u_lblk = le32_to_cpu(ux->ee_block);
@@ -1483,16 +1483,16 @@ DECLARE_EVENT_CLASS(ext4__map_blocks_enter,
1483 TP_ARGS(inode, lblk, len, flags), 1483 TP_ARGS(inode, lblk, len, flags),
1484 1484
1485 TP_STRUCT__entry( 1485 TP_STRUCT__entry(
1486 __field( ino_t, ino ) 1486 __field( dev_t, dev )
1487 __field( dev_t, dev ) 1487 __field( ino_t, ino )
1488 __field( ext4_lblk_t, lblk ) 1488 __field( ext4_lblk_t, lblk )
1489 __field( unsigned int, len ) 1489 __field( unsigned int, len )
1490 __field( unsigned int, flags ) 1490 __field( unsigned int, flags )
1491 ), 1491 ),
1492 1492
1493 TP_fast_assign( 1493 TP_fast_assign(
1494 __entry->ino = inode->i_ino;
1495 __entry->dev = inode->i_sb->s_dev; 1494 __entry->dev = inode->i_sb->s_dev;
1495 __entry->ino = inode->i_ino;
1496 __entry->lblk = lblk; 1496 __entry->lblk = lblk;
1497 __entry->len = len; 1497 __entry->len = len;
1498 __entry->flags = flags; 1498 __entry->flags = flags;
@@ -1525,19 +1525,19 @@ DECLARE_EVENT_CLASS(ext4__map_blocks_exit,
1525 TP_ARGS(inode, lblk, pblk, len, ret), 1525 TP_ARGS(inode, lblk, pblk, len, ret),
1526 1526
1527 TP_STRUCT__entry( 1527 TP_STRUCT__entry(
1528 __field( ino_t, ino )
1529 __field( dev_t, dev ) 1528 __field( dev_t, dev )
1530 __field( ext4_lblk_t, lblk ) 1529 __field( ino_t, ino )
1531 __field( ext4_fsblk_t, pblk ) 1530 __field( ext4_fsblk_t, pblk )
1531 __field( ext4_lblk_t, lblk )
1532 __field( unsigned int, len ) 1532 __field( unsigned int, len )
1533 __field( int, ret ) 1533 __field( int, ret )
1534 ), 1534 ),
1535 1535
1536 TP_fast_assign( 1536 TP_fast_assign(
1537 __entry->ino = inode->i_ino;
1538 __entry->dev = inode->i_sb->s_dev; 1537 __entry->dev = inode->i_sb->s_dev;
1539 __entry->lblk = lblk; 1538 __entry->ino = inode->i_ino;
1540 __entry->pblk = pblk; 1539 __entry->pblk = pblk;
1540 __entry->lblk = lblk;
1541 __entry->len = len; 1541 __entry->len = len;
1542 __entry->ret = ret; 1542 __entry->ret = ret;
1543 ), 1543 ),
@@ -1569,17 +1569,17 @@ TRACE_EVENT(ext4_ext_load_extent,
1569 TP_ARGS(inode, lblk, pblk), 1569 TP_ARGS(inode, lblk, pblk),
1570 1570
1571 TP_STRUCT__entry( 1571 TP_STRUCT__entry(
1572 __field( ino_t, ino )
1573 __field( dev_t, dev ) 1572 __field( dev_t, dev )
1574 __field( ext4_lblk_t, lblk ) 1573 __field( ino_t, ino )
1575 __field( ext4_fsblk_t, pblk ) 1574 __field( ext4_fsblk_t, pblk )
1575 __field( ext4_lblk_t, lblk )
1576 ), 1576 ),
1577 1577
1578 TP_fast_assign( 1578 TP_fast_assign(
1579 __entry->ino = inode->i_ino;
1580 __entry->dev = inode->i_sb->s_dev; 1579 __entry->dev = inode->i_sb->s_dev;
1581 __entry->lblk = lblk; 1580 __entry->ino = inode->i_ino;
1582 __entry->pblk = pblk; 1581 __entry->pblk = pblk;
1582 __entry->lblk = lblk;
1583 ), 1583 ),
1584 1584
1585 TP_printk("dev %d,%d ino %lu lblk %u pblk %llu", 1585 TP_printk("dev %d,%d ino %lu lblk %u pblk %llu",
@@ -1594,13 +1594,13 @@ TRACE_EVENT(ext4_load_inode,
1594 TP_ARGS(inode), 1594 TP_ARGS(inode),
1595 1595
1596 TP_STRUCT__entry( 1596 TP_STRUCT__entry(
1597 __field( ino_t, ino )
1598 __field( dev_t, dev ) 1597 __field( dev_t, dev )
1598 __field( ino_t, ino )
1599 ), 1599 ),
1600 1600
1601 TP_fast_assign( 1601 TP_fast_assign(
1602 __entry->ino = inode->i_ino;
1603 __entry->dev = inode->i_sb->s_dev; 1602 __entry->dev = inode->i_sb->s_dev;
1603 __entry->ino = inode->i_ino;
1604 ), 1604 ),
1605 1605
1606 TP_printk("dev %d,%d ino %ld", 1606 TP_printk("dev %d,%d ino %ld",
@@ -1615,14 +1615,14 @@ TRACE_EVENT(ext4_journal_start,
1615 1615
1616 TP_STRUCT__entry( 1616 TP_STRUCT__entry(
1617 __field( dev_t, dev ) 1617 __field( dev_t, dev )
1618 __field( int, nblocks )
1619 __field(unsigned long, ip ) 1618 __field(unsigned long, ip )
1619 __field( int, nblocks )
1620 ), 1620 ),
1621 1621
1622 TP_fast_assign( 1622 TP_fast_assign(
1623 __entry->dev = sb->s_dev; 1623 __entry->dev = sb->s_dev;
1624 __entry->nblocks = nblocks;
1625 __entry->ip = IP; 1624 __entry->ip = IP;
1625 __entry->nblocks = nblocks;
1626 ), 1626 ),
1627 1627
1628 TP_printk("dev %d,%d nblocks %d caller %pF", 1628 TP_printk("dev %d,%d nblocks %d caller %pF",
@@ -1686,23 +1686,23 @@ TRACE_EVENT(ext4_ext_handle_uninitialized_extents,
1686 TP_ARGS(inode, map, allocated, newblock), 1686 TP_ARGS(inode, map, allocated, newblock),
1687 1687
1688 TP_STRUCT__entry( 1688 TP_STRUCT__entry(
1689 __field( ino_t, ino )
1690 __field( dev_t, dev ) 1689 __field( dev_t, dev )
1690 __field( ino_t, ino )
1691 __field( int, flags )
1691 __field( ext4_lblk_t, lblk ) 1692 __field( ext4_lblk_t, lblk )
1692 __field( ext4_fsblk_t, pblk ) 1693 __field( ext4_fsblk_t, pblk )
1693 __field( unsigned int, len ) 1694 __field( unsigned int, len )
1694 __field( int, flags )
1695 __field( unsigned int, allocated ) 1695 __field( unsigned int, allocated )
1696 __field( ext4_fsblk_t, newblk ) 1696 __field( ext4_fsblk_t, newblk )
1697 ), 1697 ),
1698 1698
1699 TP_fast_assign( 1699 TP_fast_assign(
1700 __entry->ino = inode->i_ino;
1701 __entry->dev = inode->i_sb->s_dev; 1700 __entry->dev = inode->i_sb->s_dev;
1701 __entry->ino = inode->i_ino;
1702 __entry->flags = map->m_flags;
1702 __entry->lblk = map->m_lblk; 1703 __entry->lblk = map->m_lblk;
1703 __entry->pblk = map->m_pblk; 1704 __entry->pblk = map->m_pblk;
1704 __entry->len = map->m_len; 1705 __entry->len = map->m_len;
1705 __entry->flags = map->m_flags;
1706 __entry->allocated = allocated; 1706 __entry->allocated = allocated;
1707 __entry->newblk = newblock; 1707 __entry->newblk = newblock;
1708 ), 1708 ),
@@ -1724,19 +1724,19 @@ TRACE_EVENT(ext4_get_implied_cluster_alloc_exit,
1724 1724
1725 TP_STRUCT__entry( 1725 TP_STRUCT__entry(
1726 __field( dev_t, dev ) 1726 __field( dev_t, dev )
1727 __field( unsigned int, flags )
1727 __field( ext4_lblk_t, lblk ) 1728 __field( ext4_lblk_t, lblk )
1728 __field( ext4_fsblk_t, pblk ) 1729 __field( ext4_fsblk_t, pblk )
1729 __field( unsigned int, len ) 1730 __field( unsigned int, len )
1730 __field( unsigned int, flags )
1731 __field( int, ret ) 1731 __field( int, ret )
1732 ), 1732 ),
1733 1733
1734 TP_fast_assign( 1734 TP_fast_assign(
1735 __entry->dev = sb->s_dev; 1735 __entry->dev = sb->s_dev;
1736 __entry->flags = map->m_flags;
1736 __entry->lblk = map->m_lblk; 1737 __entry->lblk = map->m_lblk;
1737 __entry->pblk = map->m_pblk; 1738 __entry->pblk = map->m_pblk;
1738 __entry->len = map->m_len; 1739 __entry->len = map->m_len;
1739 __entry->flags = map->m_flags;
1740 __entry->ret = ret; 1740 __entry->ret = ret;
1741 ), 1741 ),
1742 1742
@@ -1753,16 +1753,16 @@ TRACE_EVENT(ext4_ext_put_in_cache,
1753 TP_ARGS(inode, lblk, len, start), 1753 TP_ARGS(inode, lblk, len, start),
1754 1754
1755 TP_STRUCT__entry( 1755 TP_STRUCT__entry(
1756 __field( ino_t, ino )
1757 __field( dev_t, dev ) 1756 __field( dev_t, dev )
1757 __field( ino_t, ino )
1758 __field( ext4_lblk_t, lblk ) 1758 __field( ext4_lblk_t, lblk )
1759 __field( unsigned int, len ) 1759 __field( unsigned int, len )
1760 __field( ext4_fsblk_t, start ) 1760 __field( ext4_fsblk_t, start )
1761 ), 1761 ),
1762 1762
1763 TP_fast_assign( 1763 TP_fast_assign(
1764 __entry->ino = inode->i_ino;
1765 __entry->dev = inode->i_sb->s_dev; 1764 __entry->dev = inode->i_sb->s_dev;
1765 __entry->ino = inode->i_ino;
1766 __entry->lblk = lblk; 1766 __entry->lblk = lblk;
1767 __entry->len = len; 1767 __entry->len = len;
1768 __entry->start = start; 1768 __entry->start = start;
@@ -1782,15 +1782,15 @@ TRACE_EVENT(ext4_ext_in_cache,
1782 TP_ARGS(inode, lblk, ret), 1782 TP_ARGS(inode, lblk, ret),
1783 1783
1784 TP_STRUCT__entry( 1784 TP_STRUCT__entry(
1785 __field( ino_t, ino )
1786 __field( dev_t, dev ) 1785 __field( dev_t, dev )
1786 __field( ino_t, ino )
1787 __field( ext4_lblk_t, lblk ) 1787 __field( ext4_lblk_t, lblk )
1788 __field( int, ret ) 1788 __field( int, ret )
1789 ), 1789 ),
1790 1790
1791 TP_fast_assign( 1791 TP_fast_assign(
1792 __entry->ino = inode->i_ino;
1793 __entry->dev = inode->i_sb->s_dev; 1792 __entry->dev = inode->i_sb->s_dev;
1793 __entry->ino = inode->i_ino;
1794 __entry->lblk = lblk; 1794 __entry->lblk = lblk;
1795 __entry->ret = ret; 1795 __entry->ret = ret;
1796 ), 1796 ),
@@ -1810,8 +1810,8 @@ TRACE_EVENT(ext4_find_delalloc_range,
1810 TP_ARGS(inode, from, to, reverse, found, found_blk), 1810 TP_ARGS(inode, from, to, reverse, found, found_blk),
1811 1811
1812 TP_STRUCT__entry( 1812 TP_STRUCT__entry(
1813 __field( ino_t, ino )
1814 __field( dev_t, dev ) 1813 __field( dev_t, dev )
1814 __field( ino_t, ino )
1815 __field( ext4_lblk_t, from ) 1815 __field( ext4_lblk_t, from )
1816 __field( ext4_lblk_t, to ) 1816 __field( ext4_lblk_t, to )
1817 __field( int, reverse ) 1817 __field( int, reverse )
@@ -1820,8 +1820,8 @@ TRACE_EVENT(ext4_find_delalloc_range,
1820 ), 1820 ),
1821 1821
1822 TP_fast_assign( 1822 TP_fast_assign(
1823 __entry->ino = inode->i_ino;
1824 __entry->dev = inode->i_sb->s_dev; 1823 __entry->dev = inode->i_sb->s_dev;
1824 __entry->ino = inode->i_ino;
1825 __entry->from = from; 1825 __entry->from = from;
1826 __entry->to = to; 1826 __entry->to = to;
1827 __entry->reverse = reverse; 1827 __entry->reverse = reverse;
@@ -1844,15 +1844,15 @@ TRACE_EVENT(ext4_get_reserved_cluster_alloc,
1844 TP_ARGS(inode, lblk, len), 1844 TP_ARGS(inode, lblk, len),
1845 1845
1846 TP_STRUCT__entry( 1846 TP_STRUCT__entry(
1847 __field( ino_t, ino )
1848 __field( dev_t, dev ) 1847 __field( dev_t, dev )
1848 __field( ino_t, ino )
1849 __field( ext4_lblk_t, lblk ) 1849 __field( ext4_lblk_t, lblk )
1850 __field( unsigned int, len ) 1850 __field( unsigned int, len )
1851 ), 1851 ),
1852 1852
1853 TP_fast_assign( 1853 TP_fast_assign(
1854 __entry->ino = inode->i_ino;
1855 __entry->dev = inode->i_sb->s_dev; 1854 __entry->dev = inode->i_sb->s_dev;
1855 __entry->ino = inode->i_ino;
1856 __entry->lblk = lblk; 1856 __entry->lblk = lblk;
1857 __entry->len = len; 1857 __entry->len = len;
1858 ), 1858 ),
@@ -1871,18 +1871,18 @@ TRACE_EVENT(ext4_ext_show_extent,
1871 TP_ARGS(inode, lblk, pblk, len), 1871 TP_ARGS(inode, lblk, pblk, len),
1872 1872
1873 TP_STRUCT__entry( 1873 TP_STRUCT__entry(
1874 __field( ino_t, ino )
1875 __field( dev_t, dev ) 1874 __field( dev_t, dev )
1876 __field( ext4_lblk_t, lblk ) 1875 __field( ino_t, ino )
1877 __field( ext4_fsblk_t, pblk ) 1876 __field( ext4_fsblk_t, pblk )
1877 __field( ext4_lblk_t, lblk )
1878 __field( unsigned short, len ) 1878 __field( unsigned short, len )
1879 ), 1879 ),
1880 1880
1881 TP_fast_assign( 1881 TP_fast_assign(
1882 __entry->ino = inode->i_ino;
1883 __entry->dev = inode->i_sb->s_dev; 1882 __entry->dev = inode->i_sb->s_dev;
1884 __entry->lblk = lblk; 1883 __entry->ino = inode->i_ino;
1885 __entry->pblk = pblk; 1884 __entry->pblk = pblk;
1885 __entry->lblk = lblk;
1886 __entry->len = len; 1886 __entry->len = len;
1887 ), 1887 ),
1888 1888
@@ -1902,25 +1902,25 @@ TRACE_EVENT(ext4_remove_blocks,
1902 TP_ARGS(inode, ex, from, to, partial_cluster), 1902 TP_ARGS(inode, ex, from, to, partial_cluster),
1903 1903
1904 TP_STRUCT__entry( 1904 TP_STRUCT__entry(
1905 __field( ino_t, ino )
1906 __field( dev_t, dev ) 1905 __field( dev_t, dev )
1907 __field( ext4_lblk_t, ee_lblk ) 1906 __field( ino_t, ino )
1908 __field( ext4_fsblk_t, ee_pblk )
1909 __field( unsigned short, ee_len )
1910 __field( ext4_lblk_t, from ) 1907 __field( ext4_lblk_t, from )
1911 __field( ext4_lblk_t, to ) 1908 __field( ext4_lblk_t, to )
1912 __field( ext4_fsblk_t, partial ) 1909 __field( ext4_fsblk_t, partial )
1910 __field( ext4_fsblk_t, ee_pblk )
1911 __field( ext4_lblk_t, ee_lblk )
1912 __field( unsigned short, ee_len )
1913 ), 1913 ),
1914 1914
1915 TP_fast_assign( 1915 TP_fast_assign(
1916 __entry->ino = inode->i_ino;
1917 __entry->dev = inode->i_sb->s_dev; 1916 __entry->dev = inode->i_sb->s_dev;
1918 __entry->ee_lblk = cpu_to_le32(ex->ee_block); 1917 __entry->ino = inode->i_ino;
1919 __entry->ee_pblk = ext4_ext_pblock(ex);
1920 __entry->ee_len = ext4_ext_get_actual_len(ex);
1921 __entry->from = from; 1918 __entry->from = from;
1922 __entry->to = to; 1919 __entry->to = to;
1923 __entry->partial = partial_cluster; 1920 __entry->partial = partial_cluster;
1921 __entry->ee_pblk = ext4_ext_pblock(ex);
1922 __entry->ee_lblk = cpu_to_le32(ex->ee_block);
1923 __entry->ee_len = ext4_ext_get_actual_len(ex);
1924 ), 1924 ),
1925 1925
1926 TP_printk("dev %d,%d ino %lu extent [%u(%llu), %u]" 1926 TP_printk("dev %d,%d ino %lu extent [%u(%llu), %u]"
@@ -1942,23 +1942,23 @@ TRACE_EVENT(ext4_ext_rm_leaf,
1942 TP_ARGS(inode, start, ex, partial_cluster), 1942 TP_ARGS(inode, start, ex, partial_cluster),
1943 1943
1944 TP_STRUCT__entry( 1944 TP_STRUCT__entry(
1945 __field( ino_t, ino )
1946 __field( dev_t, dev ) 1945 __field( dev_t, dev )
1946 __field( ino_t, ino )
1947 __field( ext4_fsblk_t, partial )
1947 __field( ext4_lblk_t, start ) 1948 __field( ext4_lblk_t, start )
1948 __field( ext4_lblk_t, ee_lblk ) 1949 __field( ext4_lblk_t, ee_lblk )
1949 __field( ext4_fsblk_t, ee_pblk ) 1950 __field( ext4_fsblk_t, ee_pblk )
1950 __field( short, ee_len ) 1951 __field( short, ee_len )
1951 __field( ext4_fsblk_t, partial )
1952 ), 1952 ),
1953 1953
1954 TP_fast_assign( 1954 TP_fast_assign(
1955 __entry->ino = inode->i_ino;
1956 __entry->dev = inode->i_sb->s_dev; 1955 __entry->dev = inode->i_sb->s_dev;
1956 __entry->ino = inode->i_ino;
1957 __entry->partial = partial_cluster;
1957 __entry->start = start; 1958 __entry->start = start;
1958 __entry->ee_lblk = le32_to_cpu(ex->ee_block); 1959 __entry->ee_lblk = le32_to_cpu(ex->ee_block);
1959 __entry->ee_pblk = ext4_ext_pblock(ex); 1960 __entry->ee_pblk = ext4_ext_pblock(ex);
1960 __entry->ee_len = ext4_ext_get_actual_len(ex); 1961 __entry->ee_len = ext4_ext_get_actual_len(ex);
1961 __entry->partial = partial_cluster;
1962 ), 1962 ),
1963 1963
1964 TP_printk("dev %d,%d ino %lu start_lblk %u last_extent [%u(%llu), %u]" 1964 TP_printk("dev %d,%d ino %lu start_lblk %u last_extent [%u(%llu), %u]"
@@ -1978,14 +1978,14 @@ TRACE_EVENT(ext4_ext_rm_idx,
1978 TP_ARGS(inode, pblk), 1978 TP_ARGS(inode, pblk),
1979 1979
1980 TP_STRUCT__entry( 1980 TP_STRUCT__entry(
1981 __field( ino_t, ino )
1982 __field( dev_t, dev ) 1981 __field( dev_t, dev )
1982 __field( ino_t, ino )
1983 __field( ext4_fsblk_t, pblk ) 1983 __field( ext4_fsblk_t, pblk )
1984 ), 1984 ),
1985 1985
1986 TP_fast_assign( 1986 TP_fast_assign(
1987 __entry->ino = inode->i_ino;
1988 __entry->dev = inode->i_sb->s_dev; 1987 __entry->dev = inode->i_sb->s_dev;
1988 __entry->ino = inode->i_ino;
1989 __entry->pblk = pblk; 1989 __entry->pblk = pblk;
1990 ), 1990 ),
1991 1991
@@ -2001,15 +2001,15 @@ TRACE_EVENT(ext4_ext_remove_space,
2001 TP_ARGS(inode, start, depth), 2001 TP_ARGS(inode, start, depth),
2002 2002
2003 TP_STRUCT__entry( 2003 TP_STRUCT__entry(
2004 __field( ino_t, ino )
2005 __field( dev_t, dev ) 2004 __field( dev_t, dev )
2005 __field( ino_t, ino )
2006 __field( ext4_lblk_t, start ) 2006 __field( ext4_lblk_t, start )
2007 __field( int, depth ) 2007 __field( int, depth )
2008 ), 2008 ),
2009 2009
2010 TP_fast_assign( 2010 TP_fast_assign(
2011 __entry->ino = inode->i_ino;
2012 __entry->dev = inode->i_sb->s_dev; 2011 __entry->dev = inode->i_sb->s_dev;
2012 __entry->ino = inode->i_ino;
2013 __entry->start = start; 2013 __entry->start = start;
2014 __entry->depth = depth; 2014 __entry->depth = depth;
2015 ), 2015 ),
@@ -2028,8 +2028,8 @@ TRACE_EVENT(ext4_ext_remove_space_done,
2028 TP_ARGS(inode, start, depth, partial, eh_entries), 2028 TP_ARGS(inode, start, depth, partial, eh_entries),
2029 2029
2030 TP_STRUCT__entry( 2030 TP_STRUCT__entry(
2031 __field( ino_t, ino )
2032 __field( dev_t, dev ) 2031 __field( dev_t, dev )
2032 __field( ino_t, ino )
2033 __field( ext4_lblk_t, start ) 2033 __field( ext4_lblk_t, start )
2034 __field( int, depth ) 2034 __field( int, depth )
2035 __field( ext4_lblk_t, partial ) 2035 __field( ext4_lblk_t, partial )
@@ -2037,8 +2037,8 @@ TRACE_EVENT(ext4_ext_remove_space_done,
2037 ), 2037 ),
2038 2038
2039 TP_fast_assign( 2039 TP_fast_assign(
2040 __entry->ino = inode->i_ino;
2041 __entry->dev = inode->i_sb->s_dev; 2040 __entry->dev = inode->i_sb->s_dev;
2041 __entry->ino = inode->i_ino;
2042 __entry->start = start; 2042 __entry->start = start;
2043 __entry->depth = depth; 2043 __entry->depth = depth;
2044 __entry->partial = partial; 2044 __entry->partial = partial;