summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-03-12 18:03:21 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2019-03-12 18:03:21 -0400
commita5adcfcad55d5f034b33f79f1a873229d1e77b24 (patch)
treee9548efcccb8f5ed3e120b0ca36ad04de116cdb7
parent2b0a80b0d0bb0a3db74588279bf851b28c6c4705 (diff)
parent0df6f46995a9fc92a6b9e591428e77527dd9609a (diff)
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o: "A large number of bug fixes and cleanups. One new feature to allow users to more easily find the jbd2 journal thread for a particular ext4 file system" * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (25 commits) jbd2: jbd2_get_transaction does not need to return a value jbd2: fix invalid descriptor block checksum ext4: fix bigalloc cluster freeing when hole punching under load ext4: add sysfs attr /sys/fs/ext4/<disk>/journal_task ext4: Change debugging support help prefix from EXT4 to Ext4 ext4: fix compile error when using BUFFER_TRACE jbd2: fix compile warning when using JBUFFER_TRACE ext4: fix some error pointer dereferences ext4: annotate more implicit fall throughs ext4: annotate implicit fall throughs ext4: don't update s_rev_level if not required jbd2: fold jbd2_superblock_csum_{verify,set} into their callers jbd2: fix race when writing superblock ext4: fix crash during online resizing ext4: disallow files with EXT4_JOURNAL_DATA_FL from EXT4_IOC_SWAP_BOOT ext4: add mask of ext4 flags to swap ext4: update quota information while swapping boot loader inode ext4: cleanup pagecache before swap i_data ext4: fix check of inode in swap_inode_boot_loader ext4: unlock unused_pages timely when doing writeback ...
-rw-r--r--Documentation/ABI/testing/sysfs-fs-ext47
-rw-r--r--fs/ext4/Kconfig2
-rw-r--r--fs/ext4/ext4.h9
-rw-r--r--fs/ext4/extents.c29
-rw-r--r--fs/ext4/hash.c2
-rw-r--r--fs/ext4/indirect.c6
-rw-r--r--fs/ext4/inode.c21
-rw-r--r--fs/ext4/ioctl.c101
-rw-r--r--fs/ext4/mballoc.c7
-rw-r--r--fs/ext4/page-io.c4
-rw-r--r--fs/ext4/resize.c3
-rw-r--r--fs/ext4/super.c1
-rw-r--r--fs/ext4/sysfs.c13
-rw-r--r--fs/ext4/xattr.c3
-rw-r--r--fs/jbd2/checkpoint.c17
-rw-r--r--fs/jbd2/commit.c6
-rw-r--r--fs/jbd2/journal.c90
-rw-r--r--fs/jbd2/transaction.c83
18 files changed, 257 insertions, 147 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-ext4 b/Documentation/ABI/testing/sysfs-fs-ext4
index c631253cf85c..78604db56279 100644
--- a/Documentation/ABI/testing/sysfs-fs-ext4
+++ b/Documentation/ABI/testing/sysfs-fs-ext4
@@ -109,3 +109,10 @@ Description:
109 write operation (since a 4k random write might turn 109 write operation (since a 4k random write might turn
110 into a much larger write due to the zeroout 110 into a much larger write due to the zeroout
111 operation). 111 operation).
112
113What: /sys/fs/ext4/<disk>/journal_task
114Date: February 2019
115Contact: "Theodore Ts'o" <tytso@mit.edu>
116Description:
117 This file is read-only and shows the pid of journal thread in
118 current pid-namespace or 0 if task is unreachable.
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 031e5a82d556..06f77ca7f36e 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -97,7 +97,7 @@ config EXT4_FS_SECURITY
97 extended attributes for file security labels, say N. 97 extended attributes for file security labels, say N.
98 98
99config EXT4_DEBUG 99config EXT4_DEBUG
100 bool "EXT4 debugging support" 100 bool "Ext4 debugging support"
101 depends on EXT4_FS 101 depends on EXT4_FS
102 help 102 help
103 Enables run-time debugging support for the ext4 filesystem. 103 Enables run-time debugging support for the ext4 filesystem.
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 5012ddb6daf9..82ffdacdc7fa 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -425,6 +425,9 @@ struct flex_groups {
425/* Flags that are appropriate for non-directories/regular files. */ 425/* Flags that are appropriate for non-directories/regular files. */
426#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL) 426#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL)
427 427
428/* The only flags that should be swapped */
429#define EXT4_FL_SHOULD_SWAP (EXT4_HUGE_FILE_FL | EXT4_EXTENTS_FL)
430
428/* Mask out flags that are inappropriate for the given type of inode. */ 431/* Mask out flags that are inappropriate for the given type of inode. */
429static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags) 432static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
430{ 433{
@@ -1661,6 +1664,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1661#define EXT4_FEATURE_INCOMPAT_INLINE_DATA 0x8000 /* data in inode */ 1664#define EXT4_FEATURE_INCOMPAT_INLINE_DATA 0x8000 /* data in inode */
1662#define EXT4_FEATURE_INCOMPAT_ENCRYPT 0x10000 1665#define EXT4_FEATURE_INCOMPAT_ENCRYPT 0x10000
1663 1666
1667extern void ext4_update_dynamic_rev(struct super_block *sb);
1668
1664#define EXT4_FEATURE_COMPAT_FUNCS(name, flagname) \ 1669#define EXT4_FEATURE_COMPAT_FUNCS(name, flagname) \
1665static inline bool ext4_has_feature_##name(struct super_block *sb) \ 1670static inline bool ext4_has_feature_##name(struct super_block *sb) \
1666{ \ 1671{ \
@@ -1669,6 +1674,7 @@ static inline bool ext4_has_feature_##name(struct super_block *sb) \
1669} \ 1674} \
1670static inline void ext4_set_feature_##name(struct super_block *sb) \ 1675static inline void ext4_set_feature_##name(struct super_block *sb) \
1671{ \ 1676{ \
1677 ext4_update_dynamic_rev(sb); \
1672 EXT4_SB(sb)->s_es->s_feature_compat |= \ 1678 EXT4_SB(sb)->s_es->s_feature_compat |= \
1673 cpu_to_le32(EXT4_FEATURE_COMPAT_##flagname); \ 1679 cpu_to_le32(EXT4_FEATURE_COMPAT_##flagname); \
1674} \ 1680} \
@@ -1686,6 +1692,7 @@ static inline bool ext4_has_feature_##name(struct super_block *sb) \
1686} \ 1692} \
1687static inline void ext4_set_feature_##name(struct super_block *sb) \ 1693static inline void ext4_set_feature_##name(struct super_block *sb) \
1688{ \ 1694{ \
1695 ext4_update_dynamic_rev(sb); \
1689 EXT4_SB(sb)->s_es->s_feature_ro_compat |= \ 1696 EXT4_SB(sb)->s_es->s_feature_ro_compat |= \
1690 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_##flagname); \ 1697 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_##flagname); \
1691} \ 1698} \
@@ -1703,6 +1710,7 @@ static inline bool ext4_has_feature_##name(struct super_block *sb) \
1703} \ 1710} \
1704static inline void ext4_set_feature_##name(struct super_block *sb) \ 1711static inline void ext4_set_feature_##name(struct super_block *sb) \
1705{ \ 1712{ \
1713 ext4_update_dynamic_rev(sb); \
1706 EXT4_SB(sb)->s_es->s_feature_incompat |= \ 1714 EXT4_SB(sb)->s_es->s_feature_incompat |= \
1707 cpu_to_le32(EXT4_FEATURE_INCOMPAT_##flagname); \ 1715 cpu_to_le32(EXT4_FEATURE_INCOMPAT_##flagname); \
1708} \ 1716} \
@@ -2666,7 +2674,6 @@ do { \
2666 2674
2667#endif 2675#endif
2668 2676
2669extern void ext4_update_dynamic_rev(struct super_block *sb);
2670extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, 2677extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
2671 __u32 compat); 2678 __u32 compat);
2672extern int ext4_update_rocompat_feature(handle_t *handle, 2679extern int ext4_update_rocompat_feature(handle_t *handle,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 79d986dbf5af..0f89f5190cd7 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2956,14 +2956,17 @@ again:
2956 if (err < 0) 2956 if (err < 0)
2957 goto out; 2957 goto out;
2958 2958
2959 } else if (sbi->s_cluster_ratio > 1 && end >= ex_end) { 2959 } else if (sbi->s_cluster_ratio > 1 && end >= ex_end &&
2960 partial.state == initial) {
2960 /* 2961 /*
2961 * If there's an extent to the right its first cluster 2962 * If we're punching, there's an extent to the right.
2962 * contains the immediate right boundary of the 2963 * If the partial cluster hasn't been set, set it to
2963 * truncated/punched region. Set partial_cluster to 2964 * that extent's first cluster and its state to nofree
2964 * its negative value so it won't be freed if shared 2965 * so it won't be freed should it contain blocks to be
2965 * with the current extent. The end < ee_block case 2966 * removed. If it's already set (tofree/nofree), we're
2966 * is handled in ext4_ext_rm_leaf(). 2967 * retrying and keep the original partial cluster info
2968 * so a cluster marked tofree as a result of earlier
2969 * extent removal is not lost.
2967 */ 2970 */
2968 lblk = ex_end + 1; 2971 lblk = ex_end + 1;
2969 err = ext4_ext_search_right(inode, path, &lblk, &pblk, 2972 err = ext4_ext_search_right(inode, path, &lblk, &pblk,
@@ -4048,18 +4051,8 @@ out:
4048 } else 4051 } else
4049 allocated = ret; 4052 allocated = ret;
4050 map->m_flags |= EXT4_MAP_NEW; 4053 map->m_flags |= EXT4_MAP_NEW;
4051 /* 4054 if (allocated > map->m_len)
4052 * if we allocated more blocks than requested
4053 * we need to make sure we unmap the extra block
4054 * allocated. The actual needed block will get
4055 * unmapped later when we find the buffer_head marked
4056 * new.
4057 */
4058 if (allocated > map->m_len) {
4059 clean_bdev_aliases(inode->i_sb->s_bdev, newblock + map->m_len,
4060 allocated - map->m_len);
4061 allocated = map->m_len; 4055 allocated = map->m_len;
4062 }
4063 map->m_len = allocated; 4056 map->m_len = allocated;
4064 4057
4065map_out: 4058map_out:
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index e22dcfab308b..46b24da33a28 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -231,6 +231,7 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
231 break; 231 break;
232 case DX_HASH_HALF_MD4_UNSIGNED: 232 case DX_HASH_HALF_MD4_UNSIGNED:
233 str2hashbuf = str2hashbuf_unsigned; 233 str2hashbuf = str2hashbuf_unsigned;
234 /* fall through */
234 case DX_HASH_HALF_MD4: 235 case DX_HASH_HALF_MD4:
235 p = name; 236 p = name;
236 while (len > 0) { 237 while (len > 0) {
@@ -244,6 +245,7 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
244 break; 245 break;
245 case DX_HASH_TEA_UNSIGNED: 246 case DX_HASH_TEA_UNSIGNED:
246 str2hashbuf = str2hashbuf_unsigned; 247 str2hashbuf = str2hashbuf_unsigned;
248 /* fall through */
247 case DX_HASH_TEA: 249 case DX_HASH_TEA:
248 p = name; 250 p = name;
249 while (len > 0) { 251 while (len > 0) {
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index bf7fa1507e81..c2225f0d31b5 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -1183,18 +1183,21 @@ do_indirects:
1183 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); 1183 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
1184 i_data[EXT4_IND_BLOCK] = 0; 1184 i_data[EXT4_IND_BLOCK] = 0;
1185 } 1185 }
1186 /* fall through */
1186 case EXT4_IND_BLOCK: 1187 case EXT4_IND_BLOCK:
1187 nr = i_data[EXT4_DIND_BLOCK]; 1188 nr = i_data[EXT4_DIND_BLOCK];
1188 if (nr) { 1189 if (nr) {
1189 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); 1190 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
1190 i_data[EXT4_DIND_BLOCK] = 0; 1191 i_data[EXT4_DIND_BLOCK] = 0;
1191 } 1192 }
1193 /* fall through */
1192 case EXT4_DIND_BLOCK: 1194 case EXT4_DIND_BLOCK:
1193 nr = i_data[EXT4_TIND_BLOCK]; 1195 nr = i_data[EXT4_TIND_BLOCK];
1194 if (nr) { 1196 if (nr) {
1195 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); 1197 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
1196 i_data[EXT4_TIND_BLOCK] = 0; 1198 i_data[EXT4_TIND_BLOCK] = 0;
1197 } 1199 }
1200 /* fall through */
1198 case EXT4_TIND_BLOCK: 1201 case EXT4_TIND_BLOCK:
1199 ; 1202 ;
1200 } 1203 }
@@ -1433,6 +1436,7 @@ do_indirects:
1433 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); 1436 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
1434 i_data[EXT4_IND_BLOCK] = 0; 1437 i_data[EXT4_IND_BLOCK] = 0;
1435 } 1438 }
1439 /* fall through */
1436 case EXT4_IND_BLOCK: 1440 case EXT4_IND_BLOCK:
1437 if (++n >= n2) 1441 if (++n >= n2)
1438 return 0; 1442 return 0;
@@ -1441,6 +1445,7 @@ do_indirects:
1441 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); 1445 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
1442 i_data[EXT4_DIND_BLOCK] = 0; 1446 i_data[EXT4_DIND_BLOCK] = 0;
1443 } 1447 }
1448 /* fall through */
1444 case EXT4_DIND_BLOCK: 1449 case EXT4_DIND_BLOCK:
1445 if (++n >= n2) 1450 if (++n >= n2)
1446 return 0; 1451 return 0;
@@ -1449,6 +1454,7 @@ do_indirects:
1449 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); 1454 ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
1450 i_data[EXT4_TIND_BLOCK] = 0; 1455 i_data[EXT4_TIND_BLOCK] = 0;
1451 } 1456 }
1457 /* fall through */
1452 case EXT4_TIND_BLOCK: 1458 case EXT4_TIND_BLOCK:
1453 ; 1459 ;
1454 } 1460 }
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4356ef6d728e..b54b261ded36 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -391,7 +391,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
391 * inode's preallocations. 391 * inode's preallocations.
392 */ 392 */
393 if ((ei->i_reserved_data_blocks == 0) && 393 if ((ei->i_reserved_data_blocks == 0) &&
394 (atomic_read(&inode->i_writecount) == 0)) 394 !inode_is_open_for_write(inode))
395 ext4_discard_preallocations(inode); 395 ext4_discard_preallocations(inode);
396} 396}
397 397
@@ -678,8 +678,6 @@ found:
678 if (flags & EXT4_GET_BLOCKS_ZERO && 678 if (flags & EXT4_GET_BLOCKS_ZERO &&
679 map->m_flags & EXT4_MAP_MAPPED && 679 map->m_flags & EXT4_MAP_MAPPED &&
680 map->m_flags & EXT4_MAP_NEW) { 680 map->m_flags & EXT4_MAP_NEW) {
681 clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk,
682 map->m_len);
683 ret = ext4_issue_zeroout(inode, map->m_lblk, 681 ret = ext4_issue_zeroout(inode, map->m_lblk,
684 map->m_pblk, map->m_len); 682 map->m_pblk, map->m_len);
685 if (ret) { 683 if (ret) {
@@ -1194,7 +1192,6 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
1194 if (err) 1192 if (err)
1195 break; 1193 break;
1196 if (buffer_new(bh)) { 1194 if (buffer_new(bh)) {
1197 clean_bdev_bh_alias(bh);
1198 if (PageUptodate(page)) { 1195 if (PageUptodate(page)) {
1199 clear_buffer_new(bh); 1196 clear_buffer_new(bh);
1200 set_buffer_uptodate(bh); 1197 set_buffer_uptodate(bh);
@@ -2489,10 +2486,6 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
2489 } 2486 }
2490 2487
2491 BUG_ON(map->m_len == 0); 2488 BUG_ON(map->m_len == 0);
2492 if (map->m_flags & EXT4_MAP_NEW) {
2493 clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk,
2494 map->m_len);
2495 }
2496 return 0; 2489 return 0;
2497} 2490}
2498 2491
@@ -2835,12 +2828,12 @@ retry:
2835 goto unplug; 2828 goto unplug;
2836 } 2829 }
2837 ret = mpage_prepare_extent_to_map(&mpd); 2830 ret = mpage_prepare_extent_to_map(&mpd);
2831 /* Unlock pages we didn't use */
2832 mpage_release_unused_pages(&mpd, false);
2838 /* Submit prepared bio */ 2833 /* Submit prepared bio */
2839 ext4_io_submit(&mpd.io_submit); 2834 ext4_io_submit(&mpd.io_submit);
2840 ext4_put_io_end_defer(mpd.io_submit.io_end); 2835 ext4_put_io_end_defer(mpd.io_submit.io_end);
2841 mpd.io_submit.io_end = NULL; 2836 mpd.io_submit.io_end = NULL;
2842 /* Unlock pages we didn't use */
2843 mpage_release_unused_pages(&mpd, false);
2844 if (ret < 0) 2837 if (ret < 0)
2845 goto unplug; 2838 goto unplug;
2846 2839
@@ -2908,10 +2901,11 @@ retry:
2908 handle = NULL; 2901 handle = NULL;
2909 mpd.do_map = 0; 2902 mpd.do_map = 0;
2910 } 2903 }
2911 /* Submit prepared bio */
2912 ext4_io_submit(&mpd.io_submit);
2913 /* Unlock pages we didn't use */ 2904 /* Unlock pages we didn't use */
2914 mpage_release_unused_pages(&mpd, give_up_on_write); 2905 mpage_release_unused_pages(&mpd, give_up_on_write);
2906 /* Submit prepared bio */
2907 ext4_io_submit(&mpd.io_submit);
2908
2915 /* 2909 /*
2916 * Drop our io_end reference we got from init. We have 2910 * Drop our io_end reference we got from init. We have
2917 * to be careful and use deferred io_end finishing if 2911 * to be careful and use deferred io_end finishing if
@@ -5349,7 +5343,6 @@ static int ext4_do_update_inode(handle_t *handle,
5349 err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); 5343 err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
5350 if (err) 5344 if (err)
5351 goto out_brelse; 5345 goto out_brelse;
5352 ext4_update_dynamic_rev(sb);
5353 ext4_set_feature_large_file(sb); 5346 ext4_set_feature_large_file(sb);
5354 ext4_handle_sync(handle); 5347 ext4_handle_sync(handle);
5355 err = ext4_handle_dirty_super(handle, sb); 5348 err = ext4_handle_dirty_super(handle, sb);
@@ -6000,7 +5993,7 @@ int ext4_expand_extra_isize(struct inode *inode,
6000 5993
6001 ext4_write_lock_xattr(inode, &no_expand); 5994 ext4_write_lock_xattr(inode, &no_expand);
6002 5995
6003 BUFFER_TRACE(iloc.bh, "get_write_access"); 5996 BUFFER_TRACE(iloc->bh, "get_write_access");
6004 error = ext4_journal_get_write_access(handle, iloc->bh); 5997 error = ext4_journal_get_write_access(handle, iloc->bh);
6005 if (error) { 5998 if (error) {
6006 brelse(iloc->bh); 5999 brelse(iloc->bh);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index d26bcac291bb..3c4f8bb59f8a 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -63,18 +63,20 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2)
63 loff_t isize; 63 loff_t isize;
64 struct ext4_inode_info *ei1; 64 struct ext4_inode_info *ei1;
65 struct ext4_inode_info *ei2; 65 struct ext4_inode_info *ei2;
66 unsigned long tmp;
66 67
67 ei1 = EXT4_I(inode1); 68 ei1 = EXT4_I(inode1);
68 ei2 = EXT4_I(inode2); 69 ei2 = EXT4_I(inode2);
69 70
70 swap(inode1->i_version, inode2->i_version); 71 swap(inode1->i_version, inode2->i_version);
71 swap(inode1->i_blocks, inode2->i_blocks);
72 swap(inode1->i_bytes, inode2->i_bytes);
73 swap(inode1->i_atime, inode2->i_atime); 72 swap(inode1->i_atime, inode2->i_atime);
74 swap(inode1->i_mtime, inode2->i_mtime); 73 swap(inode1->i_mtime, inode2->i_mtime);
75 74
76 memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data)); 75 memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data));
77 swap(ei1->i_flags, ei2->i_flags); 76 tmp = ei1->i_flags & EXT4_FL_SHOULD_SWAP;
77 ei1->i_flags = (ei2->i_flags & EXT4_FL_SHOULD_SWAP) |
78 (ei1->i_flags & ~EXT4_FL_SHOULD_SWAP);
79 ei2->i_flags = tmp | (ei2->i_flags & ~EXT4_FL_SHOULD_SWAP);
78 swap(ei1->i_disksize, ei2->i_disksize); 80 swap(ei1->i_disksize, ei2->i_disksize);
79 ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS); 81 ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS);
80 ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS); 82 ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS);
@@ -115,28 +117,42 @@ static long swap_inode_boot_loader(struct super_block *sb,
115 int err; 117 int err;
116 struct inode *inode_bl; 118 struct inode *inode_bl;
117 struct ext4_inode_info *ei_bl; 119 struct ext4_inode_info *ei_bl;
118 120 qsize_t size, size_bl, diff;
119 if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode) || 121 blkcnt_t blocks;
120 IS_SWAPFILE(inode) || IS_ENCRYPTED(inode) || 122 unsigned short bytes;
121 ext4_has_inline_data(inode))
122 return -EINVAL;
123
124 if (IS_RDONLY(inode) || IS_APPEND(inode) || IS_IMMUTABLE(inode) ||
125 !inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN))
126 return -EPERM;
127 123
128 inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO, EXT4_IGET_SPECIAL); 124 inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO, EXT4_IGET_SPECIAL);
129 if (IS_ERR(inode_bl)) 125 if (IS_ERR(inode_bl))
130 return PTR_ERR(inode_bl); 126 return PTR_ERR(inode_bl);
131 ei_bl = EXT4_I(inode_bl); 127 ei_bl = EXT4_I(inode_bl);
132 128
133 filemap_flush(inode->i_mapping);
134 filemap_flush(inode_bl->i_mapping);
135
136 /* Protect orig inodes against a truncate and make sure, 129 /* Protect orig inodes against a truncate and make sure,
137 * that only 1 swap_inode_boot_loader is running. */ 130 * that only 1 swap_inode_boot_loader is running. */
138 lock_two_nondirectories(inode, inode_bl); 131 lock_two_nondirectories(inode, inode_bl);
139 132
133 if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode) ||
134 IS_SWAPFILE(inode) || IS_ENCRYPTED(inode) ||
135 (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) ||
136 ext4_has_inline_data(inode)) {
137 err = -EINVAL;
138 goto journal_err_out;
139 }
140
141 if (IS_RDONLY(inode) || IS_APPEND(inode) || IS_IMMUTABLE(inode) ||
142 !inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN)) {
143 err = -EPERM;
144 goto journal_err_out;
145 }
146
147 down_write(&EXT4_I(inode)->i_mmap_sem);
148 err = filemap_write_and_wait(inode->i_mapping);
149 if (err)
150 goto err_out;
151
152 err = filemap_write_and_wait(inode_bl->i_mapping);
153 if (err)
154 goto err_out;
155
140 /* Wait for all existing dio workers */ 156 /* Wait for all existing dio workers */
141 inode_dio_wait(inode); 157 inode_dio_wait(inode);
142 inode_dio_wait(inode_bl); 158 inode_dio_wait(inode_bl);
@@ -147,7 +163,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
147 handle = ext4_journal_start(inode_bl, EXT4_HT_MOVE_EXTENTS, 2); 163 handle = ext4_journal_start(inode_bl, EXT4_HT_MOVE_EXTENTS, 2);
148 if (IS_ERR(handle)) { 164 if (IS_ERR(handle)) {
149 err = -EINVAL; 165 err = -EINVAL;
150 goto journal_err_out; 166 goto err_out;
151 } 167 }
152 168
153 /* Protect extent tree against block allocations via delalloc */ 169 /* Protect extent tree against block allocations via delalloc */
@@ -170,6 +186,13 @@ static long swap_inode_boot_loader(struct super_block *sb,
170 memset(ei_bl->i_data, 0, sizeof(ei_bl->i_data)); 186 memset(ei_bl->i_data, 0, sizeof(ei_bl->i_data));
171 } 187 }
172 188
189 err = dquot_initialize(inode);
190 if (err)
191 goto err_out1;
192
193 size = (qsize_t)(inode->i_blocks) * (1 << 9) + inode->i_bytes;
194 size_bl = (qsize_t)(inode_bl->i_blocks) * (1 << 9) + inode_bl->i_bytes;
195 diff = size - size_bl;
173 swap_inode_data(inode, inode_bl); 196 swap_inode_data(inode, inode_bl);
174 197
175 inode->i_ctime = inode_bl->i_ctime = current_time(inode); 198 inode->i_ctime = inode_bl->i_ctime = current_time(inode);
@@ -183,27 +206,51 @@ static long swap_inode_boot_loader(struct super_block *sb,
183 206
184 err = ext4_mark_inode_dirty(handle, inode); 207 err = ext4_mark_inode_dirty(handle, inode);
185 if (err < 0) { 208 if (err < 0) {
209 /* No need to update quota information. */
186 ext4_warning(inode->i_sb, 210 ext4_warning(inode->i_sb,
187 "couldn't mark inode #%lu dirty (err %d)", 211 "couldn't mark inode #%lu dirty (err %d)",
188 inode->i_ino, err); 212 inode->i_ino, err);
189 /* Revert all changes: */ 213 /* Revert all changes: */
190 swap_inode_data(inode, inode_bl); 214 swap_inode_data(inode, inode_bl);
191 ext4_mark_inode_dirty(handle, inode); 215 ext4_mark_inode_dirty(handle, inode);
192 } else { 216 goto err_out1;
193 err = ext4_mark_inode_dirty(handle, inode_bl); 217 }
194 if (err < 0) { 218
195 ext4_warning(inode_bl->i_sb, 219 blocks = inode_bl->i_blocks;
196 "couldn't mark inode #%lu dirty (err %d)", 220 bytes = inode_bl->i_bytes;
197 inode_bl->i_ino, err); 221 inode_bl->i_blocks = inode->i_blocks;
198 /* Revert all changes: */ 222 inode_bl->i_bytes = inode->i_bytes;
199 swap_inode_data(inode, inode_bl); 223 err = ext4_mark_inode_dirty(handle, inode_bl);
200 ext4_mark_inode_dirty(handle, inode); 224 if (err < 0) {
201 ext4_mark_inode_dirty(handle, inode_bl); 225 /* No need to update quota information. */
202 } 226 ext4_warning(inode_bl->i_sb,
227 "couldn't mark inode #%lu dirty (err %d)",
228 inode_bl->i_ino, err);
229 goto revert;
230 }
231
232 /* Bootloader inode should not be counted into quota information. */
233 if (diff > 0)
234 dquot_free_space(inode, diff);
235 else
236 err = dquot_alloc_space(inode, -1 * diff);
237
238 if (err < 0) {
239revert:
240 /* Revert all changes: */
241 inode_bl->i_blocks = blocks;
242 inode_bl->i_bytes = bytes;
243 swap_inode_data(inode, inode_bl);
244 ext4_mark_inode_dirty(handle, inode);
245 ext4_mark_inode_dirty(handle, inode_bl);
203 } 246 }
247
248err_out1:
204 ext4_journal_stop(handle); 249 ext4_journal_stop(handle);
205 ext4_double_up_write_data_sem(inode, inode_bl); 250 ext4_double_up_write_data_sem(inode, inode_bl);
206 251
252err_out:
253 up_write(&EXT4_I(inode)->i_mmap_sem);
207journal_err_out: 254journal_err_out:
208 unlock_two_nondirectories(inode, inode_bl); 255 unlock_two_nondirectories(inode, inode_bl);
209 iput(inode_bl); 256 iput(inode_bl);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index e2248083cdca..6fb76d408093 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4176,9 +4176,8 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
4176 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1) 4176 isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
4177 >> bsbits; 4177 >> bsbits;
4178 4178
4179 if ((size == isize) && 4179 if ((size == isize) && !ext4_fs_is_busy(sbi) &&
4180 !ext4_fs_is_busy(sbi) && 4180 !inode_is_open_for_write(ac->ac_inode)) {
4181 (atomic_read(&ac->ac_inode->i_writecount) == 0)) {
4182 ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC; 4181 ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
4183 return; 4182 return;
4184 } 4183 }
@@ -4258,7 +4257,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
4258 (unsigned) ar->goal, ac->ac_flags, ac->ac_2order, 4257 (unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
4259 (unsigned) ar->lleft, (unsigned) ar->pleft, 4258 (unsigned) ar->lleft, (unsigned) ar->pleft,
4260 (unsigned) ar->lright, (unsigned) ar->pright, 4259 (unsigned) ar->lright, (unsigned) ar->pright,
4261 atomic_read(&ar->inode->i_writecount) ? "" : "non-"); 4260 inode_is_open_for_write(ar->inode) ? "" : "non-");
4262 return 0; 4261 return 0;
4263 4262
4264} 4263}
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 6f5305e9a6ac..3e9298e6a705 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -468,10 +468,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
468 ext4_io_submit(io); 468 ext4_io_submit(io);
469 continue; 469 continue;
470 } 470 }
471 if (buffer_new(bh)) { 471 if (buffer_new(bh))
472 clear_buffer_new(bh); 472 clear_buffer_new(bh);
473 clean_bdev_bh_alias(bh);
474 }
475 set_buffer_async_write(bh); 473 set_buffer_async_write(bh);
476 nr_to_submit++; 474 nr_to_submit++;
477 } while ((bh = bh->b_this_page) != head); 475 } while ((bh = bh->b_this_page) != head);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 48421de803b7..3d9b18505c0c 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1960,7 +1960,8 @@ retry:
1960 le16_to_cpu(es->s_reserved_gdt_blocks); 1960 le16_to_cpu(es->s_reserved_gdt_blocks);
1961 n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb); 1961 n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb);
1962 n_blocks_count = (ext4_fsblk_t)n_group * 1962 n_blocks_count = (ext4_fsblk_t)n_group *
1963 EXT4_BLOCKS_PER_GROUP(sb); 1963 EXT4_BLOCKS_PER_GROUP(sb) +
1964 le32_to_cpu(es->s_first_data_block);
1964 n_group--; /* set to last group number */ 1965 n_group--; /* set to last group number */
1965 } 1966 }
1966 1967
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 60da0a6e4d86..f5b828bf1299 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2249,7 +2249,6 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
2249 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT); 2249 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
2250 le16_add_cpu(&es->s_mnt_count, 1); 2250 le16_add_cpu(&es->s_mnt_count, 1);
2251 ext4_update_tstamp(es, s_mtime); 2251 ext4_update_tstamp(es, s_mtime);
2252 ext4_update_dynamic_rev(sb);
2253 if (sbi->s_journal) 2252 if (sbi->s_journal)
2254 ext4_set_feature_journal_needs_recovery(sb); 2253 ext4_set_feature_journal_needs_recovery(sb);
2255 2254
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 5e4e78fc0b3a..616c075da062 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -30,6 +30,7 @@ typedef enum {
30 attr_feature, 30 attr_feature,
31 attr_pointer_ui, 31 attr_pointer_ui,
32 attr_pointer_atomic, 32 attr_pointer_atomic,
33 attr_journal_task,
33} attr_id_t; 34} attr_id_t;
34 35
35typedef enum { 36typedef enum {
@@ -125,6 +126,14 @@ static ssize_t trigger_test_error(struct ext4_sb_info *sbi,
125 return count; 126 return count;
126} 127}
127 128
129static ssize_t journal_task_show(struct ext4_sb_info *sbi, char *buf)
130{
131 if (!sbi->s_journal)
132 return snprintf(buf, PAGE_SIZE, "<none>\n");
133 return snprintf(buf, PAGE_SIZE, "%d\n",
134 task_pid_vnr(sbi->s_journal->j_task));
135}
136
128#define EXT4_ATTR(_name,_mode,_id) \ 137#define EXT4_ATTR(_name,_mode,_id) \
129static struct ext4_attr ext4_attr_##_name = { \ 138static struct ext4_attr ext4_attr_##_name = { \
130 .attr = {.name = __stringify(_name), .mode = _mode }, \ 139 .attr = {.name = __stringify(_name), .mode = _mode }, \
@@ -188,6 +197,7 @@ EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
188EXT4_RO_ATTR_ES_UI(errors_count, s_error_count); 197EXT4_RO_ATTR_ES_UI(errors_count, s_error_count);
189EXT4_ATTR(first_error_time, 0444, first_error_time); 198EXT4_ATTR(first_error_time, 0444, first_error_time);
190EXT4_ATTR(last_error_time, 0444, last_error_time); 199EXT4_ATTR(last_error_time, 0444, last_error_time);
200EXT4_ATTR(journal_task, 0444, journal_task);
191 201
192static unsigned int old_bump_val = 128; 202static unsigned int old_bump_val = 128;
193EXT4_ATTR_PTR(max_writeback_mb_bump, 0444, pointer_ui, &old_bump_val); 203EXT4_ATTR_PTR(max_writeback_mb_bump, 0444, pointer_ui, &old_bump_val);
@@ -217,6 +227,7 @@ static struct attribute *ext4_attrs[] = {
217 ATTR_LIST(errors_count), 227 ATTR_LIST(errors_count),
218 ATTR_LIST(first_error_time), 228 ATTR_LIST(first_error_time),
219 ATTR_LIST(last_error_time), 229 ATTR_LIST(last_error_time),
230 ATTR_LIST(journal_task),
220 NULL, 231 NULL,
221}; 232};
222 233
@@ -304,6 +315,8 @@ static ssize_t ext4_attr_show(struct kobject *kobj,
304 return print_tstamp(buf, sbi->s_es, s_first_error_time); 315 return print_tstamp(buf, sbi->s_es, s_first_error_time);
305 case attr_last_error_time: 316 case attr_last_error_time:
306 return print_tstamp(buf, sbi->s_es, s_last_error_time); 317 return print_tstamp(buf, sbi->s_es, s_last_error_time);
318 case attr_journal_task:
319 return journal_task_show(sbi, buf);
307 } 320 }
308 321
309 return 0; 322 return 0;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 86ed9c686249..dc82e7757f67 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -829,6 +829,7 @@ int ext4_get_inode_usage(struct inode *inode, qsize_t *usage)
829 bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO); 829 bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
830 if (IS_ERR(bh)) { 830 if (IS_ERR(bh)) {
831 ret = PTR_ERR(bh); 831 ret = PTR_ERR(bh);
832 bh = NULL;
832 goto out; 833 goto out;
833 } 834 }
834 835
@@ -2903,6 +2904,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
2903 if (error == -EIO) 2904 if (error == -EIO)
2904 EXT4_ERROR_INODE(inode, "block %llu read error", 2905 EXT4_ERROR_INODE(inode, "block %llu read error",
2905 EXT4_I(inode)->i_file_acl); 2906 EXT4_I(inode)->i_file_acl);
2907 bh = NULL;
2906 goto cleanup; 2908 goto cleanup;
2907 } 2909 }
2908 error = ext4_xattr_check_block(inode, bh); 2910 error = ext4_xattr_check_block(inode, bh);
@@ -3059,6 +3061,7 @@ ext4_xattr_block_cache_find(struct inode *inode,
3059 if (IS_ERR(bh)) { 3061 if (IS_ERR(bh)) {
3060 if (PTR_ERR(bh) == -ENOMEM) 3062 if (PTR_ERR(bh) == -ENOMEM)
3061 return NULL; 3063 return NULL;
3064 bh = NULL;
3062 EXT4_ERROR_INODE(inode, "block %lu read error", 3065 EXT4_ERROR_INODE(inode, "block %lu read error",
3063 (unsigned long)ce->e_value); 3066 (unsigned long)ce->e_value);
3064 } else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) { 3067 } else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 26f8d7e46462..02e0b79753e7 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -113,7 +113,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
113 nblocks = jbd2_space_needed(journal); 113 nblocks = jbd2_space_needed(journal);
114 while (jbd2_log_space_left(journal) < nblocks) { 114 while (jbd2_log_space_left(journal) < nblocks) {
115 write_unlock(&journal->j_state_lock); 115 write_unlock(&journal->j_state_lock);
116 mutex_lock(&journal->j_checkpoint_mutex); 116 mutex_lock_io(&journal->j_checkpoint_mutex);
117 117
118 /* 118 /*
119 * Test again, another process may have checkpointed while we 119 * Test again, another process may have checkpointed while we
@@ -276,9 +276,22 @@ restart:
276 "JBD2: %s: Waiting for Godot: block %llu\n", 276 "JBD2: %s: Waiting for Godot: block %llu\n",
277 journal->j_devname, (unsigned long long) bh->b_blocknr); 277 journal->j_devname, (unsigned long long) bh->b_blocknr);
278 278
279 if (batch_count)
280 __flush_batch(journal, &batch_count);
279 jbd2_log_start_commit(journal, tid); 281 jbd2_log_start_commit(journal, tid);
282 /*
283 * jbd2_journal_commit_transaction() may want
284 * to take the checkpoint_mutex if JBD2_FLUSHED
285 * is set, jbd2_update_log_tail() called by
286 * jbd2_journal_commit_transaction() may also take
287 * checkpoint_mutex. So we need to temporarily
288 * drop it.
289 */
290 mutex_unlock(&journal->j_checkpoint_mutex);
280 jbd2_log_wait_commit(journal, tid); 291 jbd2_log_wait_commit(journal, tid);
281 goto retry; 292 mutex_lock_io(&journal->j_checkpoint_mutex);
293 spin_lock(&journal->j_list_lock);
294 goto restart;
282 } 295 }
283 if (!buffer_dirty(bh)) { 296 if (!buffer_dirty(bh)) {
284 if (unlikely(buffer_write_io_error(bh)) && !result) 297 if (unlikely(buffer_write_io_error(bh)) && !result)
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 2eb55c3361a8..efd0ce9489ae 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -694,9 +694,11 @@ void jbd2_journal_commit_transaction(journal_t *journal)
694 the last tag we set up. */ 694 the last tag we set up. */
695 695
696 tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG); 696 tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG);
697
698 jbd2_descriptor_block_csum_set(journal, descriptor);
699start_journal_io: 697start_journal_io:
698 if (descriptor)
699 jbd2_descriptor_block_csum_set(journal,
700 descriptor);
701
700 for (i = 0; i < bufs; i++) { 702 for (i = 0; i < bufs; i++) {
701 struct buffer_head *bh = wbuf[i]; 703 struct buffer_head *bh = wbuf[i];
702 /* 704 /*
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 8ef6b6daaa7a..382c030cc78b 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -142,22 +142,6 @@ static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
142 return cpu_to_be32(csum); 142 return cpu_to_be32(csum);
143} 143}
144 144
145static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb)
146{
147 if (!jbd2_journal_has_csum_v2or3(j))
148 return 1;
149
150 return sb->s_checksum == jbd2_superblock_csum(j, sb);
151}
152
153static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb)
154{
155 if (!jbd2_journal_has_csum_v2or3(j))
156 return;
157
158 sb->s_checksum = jbd2_superblock_csum(j, sb);
159}
160
161/* 145/*
162 * Helper function used to manage commit timeouts 146 * Helper function used to manage commit timeouts
163 */ 147 */
@@ -1356,6 +1340,10 @@ static int journal_reset(journal_t *journal)
1356 return jbd2_journal_start_thread(journal); 1340 return jbd2_journal_start_thread(journal);
1357} 1341}
1358 1342
1343/*
1344 * This function expects that the caller will have locked the journal
1345 * buffer head, and will return with it unlocked
1346 */
1359static int jbd2_write_superblock(journal_t *journal, int write_flags) 1347static int jbd2_write_superblock(journal_t *journal, int write_flags)
1360{ 1348{
1361 struct buffer_head *bh = journal->j_sb_buffer; 1349 struct buffer_head *bh = journal->j_sb_buffer;
@@ -1365,7 +1353,6 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags)
1365 trace_jbd2_write_superblock(journal, write_flags); 1353 trace_jbd2_write_superblock(journal, write_flags);
1366 if (!(journal->j_flags & JBD2_BARRIER)) 1354 if (!(journal->j_flags & JBD2_BARRIER))
1367 write_flags &= ~(REQ_FUA | REQ_PREFLUSH); 1355 write_flags &= ~(REQ_FUA | REQ_PREFLUSH);
1368 lock_buffer(bh);
1369 if (buffer_write_io_error(bh)) { 1356 if (buffer_write_io_error(bh)) {
1370 /* 1357 /*
1371 * Oh, dear. A previous attempt to write the journal 1358 * Oh, dear. A previous attempt to write the journal
@@ -1381,7 +1368,8 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags)
1381 clear_buffer_write_io_error(bh); 1368 clear_buffer_write_io_error(bh);
1382 set_buffer_uptodate(bh); 1369 set_buffer_uptodate(bh);
1383 } 1370 }
1384 jbd2_superblock_csum_set(journal, sb); 1371 if (jbd2_journal_has_csum_v2or3(journal))
1372 sb->s_checksum = jbd2_superblock_csum(journal, sb);
1385 get_bh(bh); 1373 get_bh(bh);
1386 bh->b_end_io = end_buffer_write_sync; 1374 bh->b_end_io = end_buffer_write_sync;
1387 ret = submit_bh(REQ_OP_WRITE, write_flags, bh); 1375 ret = submit_bh(REQ_OP_WRITE, write_flags, bh);
@@ -1424,6 +1412,7 @@ int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
1424 jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n", 1412 jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
1425 tail_block, tail_tid); 1413 tail_block, tail_tid);
1426 1414
1415 lock_buffer(journal->j_sb_buffer);
1427 sb->s_sequence = cpu_to_be32(tail_tid); 1416 sb->s_sequence = cpu_to_be32(tail_tid);
1428 sb->s_start = cpu_to_be32(tail_block); 1417 sb->s_start = cpu_to_be32(tail_block);
1429 1418
@@ -1454,18 +1443,17 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
1454 journal_superblock_t *sb = journal->j_superblock; 1443 journal_superblock_t *sb = journal->j_superblock;
1455 1444
1456 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); 1445 BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
1457 read_lock(&journal->j_state_lock); 1446 lock_buffer(journal->j_sb_buffer);
1458 /* Is it already empty? */ 1447 if (sb->s_start == 0) { /* Is it already empty? */
1459 if (sb->s_start == 0) { 1448 unlock_buffer(journal->j_sb_buffer);
1460 read_unlock(&journal->j_state_lock);
1461 return; 1449 return;
1462 } 1450 }
1451
1463 jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n", 1452 jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n",
1464 journal->j_tail_sequence); 1453 journal->j_tail_sequence);
1465 1454
1466 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); 1455 sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
1467 sb->s_start = cpu_to_be32(0); 1456 sb->s_start = cpu_to_be32(0);
1468 read_unlock(&journal->j_state_lock);
1469 1457
1470 jbd2_write_superblock(journal, write_op); 1458 jbd2_write_superblock(journal, write_op);
1471 1459
@@ -1488,9 +1476,8 @@ void jbd2_journal_update_sb_errno(journal_t *journal)
1488 journal_superblock_t *sb = journal->j_superblock; 1476 journal_superblock_t *sb = journal->j_superblock;
1489 int errcode; 1477 int errcode;
1490 1478
1491 read_lock(&journal->j_state_lock); 1479 lock_buffer(journal->j_sb_buffer);
1492 errcode = journal->j_errno; 1480 errcode = journal->j_errno;
1493 read_unlock(&journal->j_state_lock);
1494 if (errcode == -ESHUTDOWN) 1481 if (errcode == -ESHUTDOWN)
1495 errcode = 0; 1482 errcode = 0;
1496 jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", errcode); 1483 jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", errcode);
@@ -1595,17 +1582,18 @@ static int journal_get_superblock(journal_t *journal)
1595 } 1582 }
1596 } 1583 }
1597 1584
1598 /* Check superblock checksum */ 1585 if (jbd2_journal_has_csum_v2or3(journal)) {
1599 if (!jbd2_superblock_csum_verify(journal, sb)) { 1586 /* Check superblock checksum */
1600 printk(KERN_ERR "JBD2: journal checksum error\n"); 1587 if (sb->s_checksum != jbd2_superblock_csum(journal, sb)) {
1601 err = -EFSBADCRC; 1588 printk(KERN_ERR "JBD2: journal checksum error\n");
1602 goto out; 1589 err = -EFSBADCRC;
1603 } 1590 goto out;
1591 }
1604 1592
1605 /* Precompute checksum seed for all metadata */ 1593 /* Precompute checksum seed for all metadata */
1606 if (jbd2_journal_has_csum_v2or3(journal))
1607 journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, 1594 journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
1608 sizeof(sb->s_uuid)); 1595 sizeof(sb->s_uuid));
1596 }
1609 1597
1610 set_buffer_verified(bh); 1598 set_buffer_verified(bh);
1611 1599
@@ -1894,28 +1882,27 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
1894 1882
1895 sb = journal->j_superblock; 1883 sb = journal->j_superblock;
1896 1884
1885 /* Load the checksum driver if necessary */
1886 if ((journal->j_chksum_driver == NULL) &&
1887 INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
1888 journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
1889 if (IS_ERR(journal->j_chksum_driver)) {
1890 printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
1891 journal->j_chksum_driver = NULL;
1892 return 0;
1893 }
1894 /* Precompute checksum seed for all metadata */
1895 journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
1896 sizeof(sb->s_uuid));
1897 }
1898
1899 lock_buffer(journal->j_sb_buffer);
1900
1897 /* If enabling v3 checksums, update superblock */ 1901 /* If enabling v3 checksums, update superblock */
1898 if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) { 1902 if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
1899 sb->s_checksum_type = JBD2_CRC32C_CHKSUM; 1903 sb->s_checksum_type = JBD2_CRC32C_CHKSUM;
1900 sb->s_feature_compat &= 1904 sb->s_feature_compat &=
1901 ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM); 1905 ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM);
1902
1903 /* Load the checksum driver */
1904 if (journal->j_chksum_driver == NULL) {
1905 journal->j_chksum_driver = crypto_alloc_shash("crc32c",
1906 0, 0);
1907 if (IS_ERR(journal->j_chksum_driver)) {
1908 printk(KERN_ERR "JBD2: Cannot load crc32c "
1909 "driver.\n");
1910 journal->j_chksum_driver = NULL;
1911 return 0;
1912 }
1913
1914 /* Precompute checksum seed for all metadata */
1915 journal->j_csum_seed = jbd2_chksum(journal, ~0,
1916 sb->s_uuid,
1917 sizeof(sb->s_uuid));
1918 }
1919 } 1906 }
1920 1907
1921 /* If enabling v1 checksums, downgrade superblock */ 1908 /* If enabling v1 checksums, downgrade superblock */
@@ -1927,6 +1914,7 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
1927 sb->s_feature_compat |= cpu_to_be32(compat); 1914 sb->s_feature_compat |= cpu_to_be32(compat);
1928 sb->s_feature_ro_compat |= cpu_to_be32(ro); 1915 sb->s_feature_ro_compat |= cpu_to_be32(ro);
1929 sb->s_feature_incompat |= cpu_to_be32(incompat); 1916 sb->s_feature_incompat |= cpu_to_be32(incompat);
1917 unlock_buffer(journal->j_sb_buffer);
1930 1918
1931 return 1; 1919 return 1;
1932#undef COMPAT_FEATURE_ON 1920#undef COMPAT_FEATURE_ON
@@ -2067,7 +2055,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
2067 err = jbd2_journal_skip_recovery(journal); 2055 err = jbd2_journal_skip_recovery(journal);
2068 if (write) { 2056 if (write) {
2069 /* Lock to make assertions happy... */ 2057 /* Lock to make assertions happy... */
2070 mutex_lock(&journal->j_checkpoint_mutex); 2058 mutex_lock_io(&journal->j_checkpoint_mutex);
2071 jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA); 2059 jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA);
2072 mutex_unlock(&journal->j_checkpoint_mutex); 2060 mutex_unlock(&journal->j_checkpoint_mutex);
2073 } 2061 }
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index cc35537232f2..f940d31c2adc 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -63,7 +63,7 @@ void jbd2_journal_free_transaction(transaction_t *transaction)
63/* 63/*
64 * jbd2_get_transaction: obtain a new transaction_t object. 64 * jbd2_get_transaction: obtain a new transaction_t object.
65 * 65 *
66 * Simply allocate and initialise a new transaction. Create it in 66 * Simply initialise a new transaction. Initialize it in
67 * RUNNING state and add it to the current journal (which should not 67 * RUNNING state and add it to the current journal (which should not
68 * have an existing running transaction: we only make a new transaction 68 * have an existing running transaction: we only make a new transaction
69 * once we have started to commit the old one). 69 * once we have started to commit the old one).
@@ -75,8 +75,8 @@ void jbd2_journal_free_transaction(transaction_t *transaction)
75 * 75 *
76 */ 76 */
77 77
78static transaction_t * 78static void jbd2_get_transaction(journal_t *journal,
79jbd2_get_transaction(journal_t *journal, transaction_t *transaction) 79 transaction_t *transaction)
80{ 80{
81 transaction->t_journal = journal; 81 transaction->t_journal = journal;
82 transaction->t_state = T_RUNNING; 82 transaction->t_state = T_RUNNING;
@@ -100,8 +100,6 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
100 transaction->t_max_wait = 0; 100 transaction->t_max_wait = 0;
101 transaction->t_start = jiffies; 101 transaction->t_start = jiffies;
102 transaction->t_requested = 0; 102 transaction->t_requested = 0;
103
104 return transaction;
105} 103}
106 104
107/* 105/*
@@ -1252,11 +1250,12 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
1252 struct journal_head *jh; 1250 struct journal_head *jh;
1253 char *committed_data = NULL; 1251 char *committed_data = NULL;
1254 1252
1255 JBUFFER_TRACE(jh, "entry");
1256 if (jbd2_write_access_granted(handle, bh, true)) 1253 if (jbd2_write_access_granted(handle, bh, true))
1257 return 0; 1254 return 0;
1258 1255
1259 jh = jbd2_journal_add_journal_head(bh); 1256 jh = jbd2_journal_add_journal_head(bh);
1257 JBUFFER_TRACE(jh, "entry");
1258
1260 /* 1259 /*
1261 * Do this first --- it can drop the journal lock, so we want to 1260 * Do this first --- it can drop the journal lock, so we want to
1262 * make sure that obtaining the committed_data is done 1261 * make sure that obtaining the committed_data is done
@@ -1367,15 +1366,17 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1367 1366
1368 if (is_handle_aborted(handle)) 1367 if (is_handle_aborted(handle))
1369 return -EROFS; 1368 return -EROFS;
1370 if (!buffer_jbd(bh)) { 1369 if (!buffer_jbd(bh))
1371 ret = -EUCLEAN; 1370 return -EUCLEAN;
1372 goto out; 1371
1373 }
1374 /* 1372 /*
1375 * We don't grab jh reference here since the buffer must be part 1373 * We don't grab jh reference here since the buffer must be part
1376 * of the running transaction. 1374 * of the running transaction.
1377 */ 1375 */
1378 jh = bh2jh(bh); 1376 jh = bh2jh(bh);
1377 jbd_debug(5, "journal_head %p\n", jh);
1378 JBUFFER_TRACE(jh, "entry");
1379
1379 /* 1380 /*
1380 * This and the following assertions are unreliable since we may see jh 1381 * This and the following assertions are unreliable since we may see jh
1381 * in inconsistent state unless we grab bh_state lock. But this is 1382 * in inconsistent state unless we grab bh_state lock. But this is
@@ -1409,9 +1410,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
1409 } 1410 }
1410 1411
1411 journal = transaction->t_journal; 1412 journal = transaction->t_journal;
1412 jbd_debug(5, "journal_head %p\n", jh);
1413 JBUFFER_TRACE(jh, "entry");
1414
1415 jbd_lock_bh_state(bh); 1413 jbd_lock_bh_state(bh);
1416 1414
1417 if (jh->b_modified == 0) { 1415 if (jh->b_modified == 0) {
@@ -1597,9 +1595,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1597 __jbd2_journal_unfile_buffer(jh); 1595 __jbd2_journal_unfile_buffer(jh);
1598 if (!buffer_jbd(bh)) { 1596 if (!buffer_jbd(bh)) {
1599 spin_unlock(&journal->j_list_lock); 1597 spin_unlock(&journal->j_list_lock);
1600 jbd_unlock_bh_state(bh); 1598 goto not_jbd;
1601 __bforget(bh);
1602 goto drop;
1603 } 1599 }
1604 } 1600 }
1605 spin_unlock(&journal->j_list_lock); 1601 spin_unlock(&journal->j_list_lock);
@@ -1609,14 +1605,21 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1609 /* However, if the buffer is still owned by a prior 1605 /* However, if the buffer is still owned by a prior
1610 * (committing) transaction, we can't drop it yet... */ 1606 * (committing) transaction, we can't drop it yet... */
1611 JBUFFER_TRACE(jh, "belongs to older transaction"); 1607 JBUFFER_TRACE(jh, "belongs to older transaction");
1612 /* ... but we CAN drop it from the new transaction if we 1608 /* ... but we CAN drop it from the new transaction through
1613 * have also modified it since the original commit. */ 1609 * marking the buffer as freed and set j_next_transaction to
1610 * the new transaction, so that not only the commit code
1611 * knows it should clear dirty bits when it is done with the
1612 * buffer, but also the buffer can be checkpointed only
1613 * after the new transaction commits. */
1614 1614
1615 if (jh->b_next_transaction) { 1615 set_buffer_freed(bh);
1616 J_ASSERT(jh->b_next_transaction == transaction); 1616
1617 if (!jh->b_next_transaction) {
1617 spin_lock(&journal->j_list_lock); 1618 spin_lock(&journal->j_list_lock);
1618 jh->b_next_transaction = NULL; 1619 jh->b_next_transaction = transaction;
1619 spin_unlock(&journal->j_list_lock); 1620 spin_unlock(&journal->j_list_lock);
1621 } else {
1622 J_ASSERT(jh->b_next_transaction == transaction);
1620 1623
1621 /* 1624 /*
1622 * only drop a reference if this transaction modified 1625 * only drop a reference if this transaction modified
@@ -1625,9 +1628,40 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
1625 if (was_modified) 1628 if (was_modified)
1626 drop_reserve = 1; 1629 drop_reserve = 1;
1627 } 1630 }
1631 } else {
1632 /*
1633 * Finally, if the buffer is not belongs to any
1634 * transaction, we can just drop it now if it has no
1635 * checkpoint.
1636 */
1637 spin_lock(&journal->j_list_lock);
1638 if (!jh->b_cp_transaction) {
1639 JBUFFER_TRACE(jh, "belongs to none transaction");
1640 spin_unlock(&journal->j_list_lock);
1641 goto not_jbd;
1642 }
1643
1644 /*
1645 * Otherwise, if the buffer has been written to disk,
1646 * it is safe to remove the checkpoint and drop it.
1647 */
1648 if (!buffer_dirty(bh)) {
1649 __jbd2_journal_remove_checkpoint(jh);
1650 spin_unlock(&journal->j_list_lock);
1651 goto not_jbd;
1652 }
1653
1654 /*
1655 * The buffer is still not written to disk, we should
1656 * attach this buffer to current transaction so that the
1657 * buffer can be checkpointed only after the current
1658 * transaction commits.
1659 */
1660 clear_buffer_dirty(bh);
1661 __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
1662 spin_unlock(&journal->j_list_lock);
1628 } 1663 }
1629 1664
1630not_jbd:
1631 jbd_unlock_bh_state(bh); 1665 jbd_unlock_bh_state(bh);
1632 __brelse(bh); 1666 __brelse(bh);
1633drop: 1667drop:
@@ -1636,6 +1670,11 @@ drop:
1636 handle->h_buffer_credits++; 1670 handle->h_buffer_credits++;
1637 } 1671 }
1638 return err; 1672 return err;
1673
1674not_jbd:
1675 jbd_unlock_bh_state(bh);
1676 __bforget(bh);
1677 goto drop;
1639} 1678}
1640 1679
1641/** 1680/**