aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-08-29 14:52:46 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-29 14:52:46 -0400
commitd4f03186c8986ffde34d06fe74a99aab08f7ee0b (patch)
tree9887bc2405a4f9e17185aa1338dec3ae68246b16 /fs
parentef13c8afa67518e1d173a6f3b95dd02559879421 (diff)
parentd80d448c6c5bdd32605b78a60fe8081d82d4da0f (diff)
Merge tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 bugfixes from Ted Ts'o: "Ext4 bug fixes for 3.17, to provide better handling of memory allocation failures, and to fix some journaling bugs involving journal checksums and FALLOC_FL_ZERO_RANGE" * tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: ext4: fix same-dir rename when inline data directory overflows jbd2: fix descriptor block size handling errors with journal_csum jbd2: fix infinite loop when recovering corrupt journal blocks ext4: update i_disksize coherently with block allocation on error path ext4: fix transaction issues for ext4_fallocate and ext_zero_range ext4: fix incorect journal credits reservation in ext4_zero_range ext4: move i_size,i_disksize update routines to helper function ext4: fix BUG_ON in mb_free_blocks() ext4: propagate errors up to ext4_find_entry()'s callers
Diffstat (limited to 'fs')
-rw-r--r--fs/ext4/ext4.h18
-rw-r--r--fs/ext4/extents.c88
-rw-r--r--fs/ext4/inode.c44
-rw-r--r--fs/ext4/mballoc.c5
-rw-r--r--fs/ext4/namei.c56
-rw-r--r--fs/ext4/super.c5
-rw-r--r--fs/jbd2/commit.c21
-rw-r--r--fs/jbd2/journal.c56
-rw-r--r--fs/jbd2/recovery.c33
-rw-r--r--fs/jbd2/revoke.c6
10 files changed, 208 insertions, 124 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 5b19760b1de5..b0c225cdb52c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1825,7 +1825,7 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
1825/* 1825/*
1826 * Special error return code only used by dx_probe() and its callers. 1826 * Special error return code only used by dx_probe() and its callers.
1827 */ 1827 */
1828#define ERR_BAD_DX_DIR -75000 1828#define ERR_BAD_DX_DIR (-(MAX_ERRNO - 1))
1829 1829
1830/* 1830/*
1831 * Timeout and state flag for lazy initialization inode thread. 1831 * Timeout and state flag for lazy initialization inode thread.
@@ -2454,6 +2454,22 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
2454 up_write(&EXT4_I(inode)->i_data_sem); 2454 up_write(&EXT4_I(inode)->i_data_sem);
2455} 2455}
2456 2456
2457/* Update i_size, i_disksize. Requires i_mutex to avoid races with truncate */
2458static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
2459{
2460 int changed = 0;
2461
2462 if (newsize > inode->i_size) {
2463 i_size_write(inode, newsize);
2464 changed = 1;
2465 }
2466 if (newsize > EXT4_I(inode)->i_disksize) {
2467 ext4_update_i_disksize(inode, newsize);
2468 changed |= 2;
2469 }
2470 return changed;
2471}
2472
2457struct ext4_group_info { 2473struct ext4_group_info {
2458 unsigned long bb_state; 2474 unsigned long bb_state;
2459 struct rb_root bb_free_root; 2475 struct rb_root bb_free_root;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 76c2df382b7d..74292a71b384 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4665,7 +4665,8 @@ retry:
4665} 4665}
4666 4666
4667static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, 4667static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
4668 ext4_lblk_t len, int flags, int mode) 4668 ext4_lblk_t len, loff_t new_size,
4669 int flags, int mode)
4669{ 4670{
4670 struct inode *inode = file_inode(file); 4671 struct inode *inode = file_inode(file);
4671 handle_t *handle; 4672 handle_t *handle;
@@ -4674,8 +4675,10 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
4674 int retries = 0; 4675 int retries = 0;
4675 struct ext4_map_blocks map; 4676 struct ext4_map_blocks map;
4676 unsigned int credits; 4677 unsigned int credits;
4678 loff_t epos;
4677 4679
4678 map.m_lblk = offset; 4680 map.m_lblk = offset;
4681 map.m_len = len;
4679 /* 4682 /*
4680 * Don't normalize the request if it can fit in one extent so 4683 * Don't normalize the request if it can fit in one extent so
4681 * that it doesn't get unnecessarily split into multiple 4684 * that it doesn't get unnecessarily split into multiple
@@ -4690,9 +4693,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
4690 credits = ext4_chunk_trans_blocks(inode, len); 4693 credits = ext4_chunk_trans_blocks(inode, len);
4691 4694
4692retry: 4695retry:
4693 while (ret >= 0 && ret < len) { 4696 while (ret >= 0 && len) {
4694 map.m_lblk = map.m_lblk + ret;
4695 map.m_len = len = len - ret;
4696 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, 4697 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
4697 credits); 4698 credits);
4698 if (IS_ERR(handle)) { 4699 if (IS_ERR(handle)) {
@@ -4709,6 +4710,21 @@ retry:
4709 ret2 = ext4_journal_stop(handle); 4710 ret2 = ext4_journal_stop(handle);
4710 break; 4711 break;
4711 } 4712 }
4713 map.m_lblk += ret;
4714 map.m_len = len = len - ret;
4715 epos = (loff_t)map.m_lblk << inode->i_blkbits;
4716 inode->i_ctime = ext4_current_time(inode);
4717 if (new_size) {
4718 if (epos > new_size)
4719 epos = new_size;
4720 if (ext4_update_inode_size(inode, epos) & 0x1)
4721 inode->i_mtime = inode->i_ctime;
4722 } else {
4723 if (epos > inode->i_size)
4724 ext4_set_inode_flag(inode,
4725 EXT4_INODE_EOFBLOCKS);
4726 }
4727 ext4_mark_inode_dirty(handle, inode);
4712 ret2 = ext4_journal_stop(handle); 4728 ret2 = ext4_journal_stop(handle);
4713 if (ret2) 4729 if (ret2)
4714 break; 4730 break;
@@ -4731,7 +4747,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4731 loff_t new_size = 0; 4747 loff_t new_size = 0;
4732 int ret = 0; 4748 int ret = 0;
4733 int flags; 4749 int flags;
4734 int partial; 4750 int credits;
4751 int partial_begin, partial_end;
4735 loff_t start, end; 4752 loff_t start, end;
4736 ext4_lblk_t lblk; 4753 ext4_lblk_t lblk;
4737 struct address_space *mapping = inode->i_mapping; 4754 struct address_space *mapping = inode->i_mapping;
@@ -4771,7 +4788,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4771 4788
4772 if (start < offset || end > offset + len) 4789 if (start < offset || end > offset + len)
4773 return -EINVAL; 4790 return -EINVAL;
4774 partial = (offset + len) & ((1 << blkbits) - 1); 4791 partial_begin = offset & ((1 << blkbits) - 1);
4792 partial_end = (offset + len) & ((1 << blkbits) - 1);
4775 4793
4776 lblk = start >> blkbits; 4794 lblk = start >> blkbits;
4777 max_blocks = (end >> blkbits); 4795 max_blocks = (end >> blkbits);
@@ -4805,7 +4823,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4805 * If we have a partial block after EOF we have to allocate 4823 * If we have a partial block after EOF we have to allocate
4806 * the entire block. 4824 * the entire block.
4807 */ 4825 */
4808 if (partial) 4826 if (partial_end)
4809 max_blocks += 1; 4827 max_blocks += 1;
4810 } 4828 }
4811 4829
@@ -4813,6 +4831,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4813 4831
4814 /* Now release the pages and zero block aligned part of pages*/ 4832 /* Now release the pages and zero block aligned part of pages*/
4815 truncate_pagecache_range(inode, start, end - 1); 4833 truncate_pagecache_range(inode, start, end - 1);
4834 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4816 4835
4817 /* Wait all existing dio workers, newcomers will block on i_mutex */ 4836 /* Wait all existing dio workers, newcomers will block on i_mutex */
4818 ext4_inode_block_unlocked_dio(inode); 4837 ext4_inode_block_unlocked_dio(inode);
@@ -4825,13 +4844,22 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4825 if (ret) 4844 if (ret)
4826 goto out_dio; 4845 goto out_dio;
4827 4846
4828 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, 4847 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
4829 mode); 4848 flags, mode);
4830 if (ret) 4849 if (ret)
4831 goto out_dio; 4850 goto out_dio;
4832 } 4851 }
4852 if (!partial_begin && !partial_end)
4853 goto out_dio;
4833 4854
4834 handle = ext4_journal_start(inode, EXT4_HT_MISC, 4); 4855 /*
4856 * In worst case we have to writeout two nonadjacent unwritten
4857 * blocks and update the inode
4858 */
4859 credits = (2 * ext4_ext_index_trans_blocks(inode, 2)) + 1;
4860 if (ext4_should_journal_data(inode))
4861 credits += 2;
4862 handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
4835 if (IS_ERR(handle)) { 4863 if (IS_ERR(handle)) {
4836 ret = PTR_ERR(handle); 4864 ret = PTR_ERR(handle);
4837 ext4_std_error(inode->i_sb, ret); 4865 ext4_std_error(inode->i_sb, ret);
@@ -4839,12 +4867,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4839 } 4867 }
4840 4868
4841 inode->i_mtime = inode->i_ctime = ext4_current_time(inode); 4869 inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
4842
4843 if (new_size) { 4870 if (new_size) {
4844 if (new_size > i_size_read(inode)) 4871 ext4_update_inode_size(inode, new_size);
4845 i_size_write(inode, new_size);
4846 if (new_size > EXT4_I(inode)->i_disksize)
4847 ext4_update_i_disksize(inode, new_size);
4848 } else { 4872 } else {
4849 /* 4873 /*
4850 * Mark that we allocate beyond EOF so the subsequent truncate 4874 * Mark that we allocate beyond EOF so the subsequent truncate
@@ -4853,7 +4877,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4853 if ((offset + len) > i_size_read(inode)) 4877 if ((offset + len) > i_size_read(inode))
4854 ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); 4878 ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
4855 } 4879 }
4856
4857 ext4_mark_inode_dirty(handle, inode); 4880 ext4_mark_inode_dirty(handle, inode);
4858 4881
4859 /* Zero out partial block at the edges of the range */ 4882 /* Zero out partial block at the edges of the range */
@@ -4880,13 +4903,11 @@ out_mutex:
4880long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) 4903long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4881{ 4904{
4882 struct inode *inode = file_inode(file); 4905 struct inode *inode = file_inode(file);
4883 handle_t *handle;
4884 loff_t new_size = 0; 4906 loff_t new_size = 0;
4885 unsigned int max_blocks; 4907 unsigned int max_blocks;
4886 int ret = 0; 4908 int ret = 0;
4887 int flags; 4909 int flags;
4888 ext4_lblk_t lblk; 4910 ext4_lblk_t lblk;
4889 struct timespec tv;
4890 unsigned int blkbits = inode->i_blkbits; 4911 unsigned int blkbits = inode->i_blkbits;
4891 4912
4892 /* Return error if mode is not supported */ 4913 /* Return error if mode is not supported */
@@ -4937,36 +4958,15 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4937 goto out; 4958 goto out;
4938 } 4959 }
4939 4960
4940 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, mode); 4961 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
4962 flags, mode);
4941 if (ret) 4963 if (ret)
4942 goto out; 4964 goto out;
4943 4965
4944 handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); 4966 if (file->f_flags & O_SYNC && EXT4_SB(inode->i_sb)->s_journal) {
4945 if (IS_ERR(handle)) 4967 ret = jbd2_complete_transaction(EXT4_SB(inode->i_sb)->s_journal,
4946 goto out; 4968 EXT4_I(inode)->i_sync_tid);
4947
4948 tv = inode->i_ctime = ext4_current_time(inode);
4949
4950 if (new_size) {
4951 if (new_size > i_size_read(inode)) {
4952 i_size_write(inode, new_size);
4953 inode->i_mtime = tv;
4954 }
4955 if (new_size > EXT4_I(inode)->i_disksize)
4956 ext4_update_i_disksize(inode, new_size);
4957 } else {
4958 /*
4959 * Mark that we allocate beyond EOF so the subsequent truncate
4960 * can proceed even if the new size is the same as i_size.
4961 */
4962 if ((offset + len) > i_size_read(inode))
4963 ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
4964 } 4969 }
4965 ext4_mark_inode_dirty(handle, inode);
4966 if (file->f_flags & O_SYNC)
4967 ext4_handle_sync(handle);
4968
4969 ext4_journal_stop(handle);
4970out: 4970out:
4971 mutex_unlock(&inode->i_mutex); 4971 mutex_unlock(&inode->i_mutex);
4972 trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); 4972 trace_ext4_fallocate_exit(inode, offset, max_blocks, ret);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 367a60c07cf0..3aa26e9117c4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1055,27 +1055,11 @@ static int ext4_write_end(struct file *file,
1055 } else 1055 } else
1056 copied = block_write_end(file, mapping, pos, 1056 copied = block_write_end(file, mapping, pos,
1057 len, copied, page, fsdata); 1057 len, copied, page, fsdata);
1058
1059 /* 1058 /*
1060 * No need to use i_size_read() here, the i_size 1059 * it's important to update i_size while still holding page lock:
1061 * cannot change under us because we hole i_mutex.
1062 *
1063 * But it's important to update i_size while still holding page lock:
1064 * page writeout could otherwise come in and zero beyond i_size. 1060 * page writeout could otherwise come in and zero beyond i_size.
1065 */ 1061 */
1066 if (pos + copied > inode->i_size) { 1062 i_size_changed = ext4_update_inode_size(inode, pos + copied);
1067 i_size_write(inode, pos + copied);
1068 i_size_changed = 1;
1069 }
1070
1071 if (pos + copied > EXT4_I(inode)->i_disksize) {
1072 /* We need to mark inode dirty even if
1073 * new_i_size is less that inode->i_size
1074 * but greater than i_disksize. (hint delalloc)
1075 */
1076 ext4_update_i_disksize(inode, (pos + copied));
1077 i_size_changed = 1;
1078 }
1079 unlock_page(page); 1063 unlock_page(page);
1080 page_cache_release(page); 1064 page_cache_release(page);
1081 1065
@@ -1123,7 +1107,7 @@ static int ext4_journalled_write_end(struct file *file,
1123 int ret = 0, ret2; 1107 int ret = 0, ret2;
1124 int partial = 0; 1108 int partial = 0;
1125 unsigned from, to; 1109 unsigned from, to;
1126 loff_t new_i_size; 1110 int size_changed = 0;
1127 1111
1128 trace_ext4_journalled_write_end(inode, pos, len, copied); 1112 trace_ext4_journalled_write_end(inode, pos, len, copied);
1129 from = pos & (PAGE_CACHE_SIZE - 1); 1113 from = pos & (PAGE_CACHE_SIZE - 1);
@@ -1146,20 +1130,18 @@ static int ext4_journalled_write_end(struct file *file,
1146 if (!partial) 1130 if (!partial)
1147 SetPageUptodate(page); 1131 SetPageUptodate(page);
1148 } 1132 }
1149 new_i_size = pos + copied; 1133 size_changed = ext4_update_inode_size(inode, pos + copied);
1150 if (new_i_size > inode->i_size)
1151 i_size_write(inode, pos+copied);
1152 ext4_set_inode_state(inode, EXT4_STATE_JDATA); 1134 ext4_set_inode_state(inode, EXT4_STATE_JDATA);
1153 EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid; 1135 EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
1154 if (new_i_size > EXT4_I(inode)->i_disksize) { 1136 unlock_page(page);
1155 ext4_update_i_disksize(inode, new_i_size); 1137 page_cache_release(page);
1138
1139 if (size_changed) {
1156 ret2 = ext4_mark_inode_dirty(handle, inode); 1140 ret2 = ext4_mark_inode_dirty(handle, inode);
1157 if (!ret) 1141 if (!ret)
1158 ret = ret2; 1142 ret = ret2;
1159 } 1143 }
1160 1144
1161 unlock_page(page);
1162 page_cache_release(page);
1163 if (pos + len > inode->i_size && ext4_can_truncate(inode)) 1145 if (pos + len > inode->i_size && ext4_can_truncate(inode))
1164 /* if we have allocated more blocks and copied 1146 /* if we have allocated more blocks and copied
1165 * less. We will have blocks allocated outside 1147 * less. We will have blocks allocated outside
@@ -2095,6 +2077,7 @@ static int mpage_map_and_submit_extent(handle_t *handle,
2095 struct ext4_map_blocks *map = &mpd->map; 2077 struct ext4_map_blocks *map = &mpd->map;
2096 int err; 2078 int err;
2097 loff_t disksize; 2079 loff_t disksize;
2080 int progress = 0;
2098 2081
2099 mpd->io_submit.io_end->offset = 2082 mpd->io_submit.io_end->offset =
2100 ((loff_t)map->m_lblk) << inode->i_blkbits; 2083 ((loff_t)map->m_lblk) << inode->i_blkbits;
@@ -2111,8 +2094,11 @@ static int mpage_map_and_submit_extent(handle_t *handle,
2111 * is non-zero, a commit should free up blocks. 2094 * is non-zero, a commit should free up blocks.
2112 */ 2095 */
2113 if ((err == -ENOMEM) || 2096 if ((err == -ENOMEM) ||
2114 (err == -ENOSPC && ext4_count_free_clusters(sb))) 2097 (err == -ENOSPC && ext4_count_free_clusters(sb))) {
2098 if (progress)
2099 goto update_disksize;
2115 return err; 2100 return err;
2101 }
2116 ext4_msg(sb, KERN_CRIT, 2102 ext4_msg(sb, KERN_CRIT,
2117 "Delayed block allocation failed for " 2103 "Delayed block allocation failed for "
2118 "inode %lu at logical offset %llu with" 2104 "inode %lu at logical offset %llu with"
@@ -2129,15 +2115,17 @@ static int mpage_map_and_submit_extent(handle_t *handle,
2129 *give_up_on_write = true; 2115 *give_up_on_write = true;
2130 return err; 2116 return err;
2131 } 2117 }
2118 progress = 1;
2132 /* 2119 /*
2133 * Update buffer state, submit mapped pages, and get us new 2120 * Update buffer state, submit mapped pages, and get us new
2134 * extent to map 2121 * extent to map
2135 */ 2122 */
2136 err = mpage_map_and_submit_buffers(mpd); 2123 err = mpage_map_and_submit_buffers(mpd);
2137 if (err < 0) 2124 if (err < 0)
2138 return err; 2125 goto update_disksize;
2139 } while (map->m_len); 2126 } while (map->m_len);
2140 2127
2128update_disksize:
2141 /* 2129 /*
2142 * Update on-disk size after IO is submitted. Races with 2130 * Update on-disk size after IO is submitted. Races with
2143 * truncate are avoided by checking i_size under i_data_sem. 2131 * truncate are avoided by checking i_size under i_data_sem.
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 956027711faf..8b0f9ef517d6 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1412,6 +1412,8 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
1412 int last = first + count - 1; 1412 int last = first + count - 1;
1413 struct super_block *sb = e4b->bd_sb; 1413 struct super_block *sb = e4b->bd_sb;
1414 1414
1415 if (WARN_ON(count == 0))
1416 return;
1415 BUG_ON(last >= (sb->s_blocksize << 3)); 1417 BUG_ON(last >= (sb->s_blocksize << 3));
1416 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group)); 1418 assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
1417 /* Don't bother if the block group is corrupt. */ 1419 /* Don't bother if the block group is corrupt. */
@@ -3221,6 +3223,8 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3221 int err; 3223 int err;
3222 3224
3223 if (pa == NULL) { 3225 if (pa == NULL) {
3226 if (ac->ac_f_ex.fe_len == 0)
3227 return;
3224 err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b); 3228 err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
3225 if (err) { 3229 if (err) {
3226 /* 3230 /*
@@ -3235,6 +3239,7 @@ static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3235 mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start, 3239 mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
3236 ac->ac_f_ex.fe_len); 3240 ac->ac_f_ex.fe_len);
3237 ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group); 3241 ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
3242 ext4_mb_unload_buddy(&e4b);
3238 return; 3243 return;
3239 } 3244 }
3240 if (pa->pa_type == MB_INODE_PA) 3245 if (pa->pa_type == MB_INODE_PA)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index b147a67baa0d..90a3cdca3f88 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1227,7 +1227,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
1227 buffer */ 1227 buffer */
1228 int num = 0; 1228 int num = 0;
1229 ext4_lblk_t nblocks; 1229 ext4_lblk_t nblocks;
1230 int i, err; 1230 int i, err = 0;
1231 int namelen; 1231 int namelen;
1232 1232
1233 *res_dir = NULL; 1233 *res_dir = NULL;
@@ -1264,7 +1264,11 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
1264 * return. Otherwise, fall back to doing a search the 1264 * return. Otherwise, fall back to doing a search the
1265 * old fashioned way. 1265 * old fashioned way.
1266 */ 1266 */
1267 if (bh || (err != ERR_BAD_DX_DIR)) 1267 if (err == -ENOENT)
1268 return NULL;
1269 if (err && err != ERR_BAD_DX_DIR)
1270 return ERR_PTR(err);
1271 if (bh)
1268 return bh; 1272 return bh;
1269 dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " 1273 dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
1270 "falling back\n")); 1274 "falling back\n"));
@@ -1295,6 +1299,11 @@ restart:
1295 } 1299 }
1296 num++; 1300 num++;
1297 bh = ext4_getblk(NULL, dir, b++, 0, &err); 1301 bh = ext4_getblk(NULL, dir, b++, 0, &err);
1302 if (unlikely(err)) {
1303 if (ra_max == 0)
1304 return ERR_PTR(err);
1305 break;
1306 }
1298 bh_use[ra_max] = bh; 1307 bh_use[ra_max] = bh;
1299 if (bh) 1308 if (bh)
1300 ll_rw_block(READ | REQ_META | REQ_PRIO, 1309 ll_rw_block(READ | REQ_META | REQ_PRIO,
@@ -1417,6 +1426,8 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
1417 return ERR_PTR(-ENAMETOOLONG); 1426 return ERR_PTR(-ENAMETOOLONG);
1418 1427
1419 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); 1428 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
1429 if (IS_ERR(bh))
1430 return (struct dentry *) bh;
1420 inode = NULL; 1431 inode = NULL;
1421 if (bh) { 1432 if (bh) {
1422 __u32 ino = le32_to_cpu(de->inode); 1433 __u32 ino = le32_to_cpu(de->inode);
@@ -1450,6 +1461,8 @@ struct dentry *ext4_get_parent(struct dentry *child)
1450 struct buffer_head *bh; 1461 struct buffer_head *bh;
1451 1462
1452 bh = ext4_find_entry(child->d_inode, &dotdot, &de, NULL); 1463 bh = ext4_find_entry(child->d_inode, &dotdot, &de, NULL);
1464 if (IS_ERR(bh))
1465 return (struct dentry *) bh;
1453 if (!bh) 1466 if (!bh)
1454 return ERR_PTR(-ENOENT); 1467 return ERR_PTR(-ENOENT);
1455 ino = le32_to_cpu(de->inode); 1468 ino = le32_to_cpu(de->inode);
@@ -2727,6 +2740,8 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
2727 2740
2728 retval = -ENOENT; 2741 retval = -ENOENT;
2729 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); 2742 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
2743 if (IS_ERR(bh))
2744 return PTR_ERR(bh);
2730 if (!bh) 2745 if (!bh)
2731 goto end_rmdir; 2746 goto end_rmdir;
2732 2747
@@ -2794,6 +2809,8 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2794 2809
2795 retval = -ENOENT; 2810 retval = -ENOENT;
2796 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL); 2811 bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
2812 if (IS_ERR(bh))
2813 return PTR_ERR(bh);
2797 if (!bh) 2814 if (!bh)
2798 goto end_unlink; 2815 goto end_unlink;
2799 2816
@@ -3121,6 +3138,8 @@ static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
3121 struct ext4_dir_entry_2 *de; 3138 struct ext4_dir_entry_2 *de;
3122 3139
3123 bh = ext4_find_entry(dir, d_name, &de, NULL); 3140 bh = ext4_find_entry(dir, d_name, &de, NULL);
3141 if (IS_ERR(bh))
3142 return PTR_ERR(bh);
3124 if (bh) { 3143 if (bh) {
3125 retval = ext4_delete_entry(handle, dir, de, bh); 3144 retval = ext4_delete_entry(handle, dir, de, bh);
3126 brelse(bh); 3145 brelse(bh);
@@ -3128,7 +3147,8 @@ static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
3128 return retval; 3147 return retval;
3129} 3148}
3130 3149
3131static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent) 3150static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent,
3151 int force_reread)
3132{ 3152{
3133 int retval; 3153 int retval;
3134 /* 3154 /*
@@ -3140,7 +3160,8 @@ static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent)
3140 if (le32_to_cpu(ent->de->inode) != ent->inode->i_ino || 3160 if (le32_to_cpu(ent->de->inode) != ent->inode->i_ino ||
3141 ent->de->name_len != ent->dentry->d_name.len || 3161 ent->de->name_len != ent->dentry->d_name.len ||
3142 strncmp(ent->de->name, ent->dentry->d_name.name, 3162 strncmp(ent->de->name, ent->dentry->d_name.name,
3143 ent->de->name_len)) { 3163 ent->de->name_len) ||
3164 force_reread) {
3144 retval = ext4_find_delete_entry(handle, ent->dir, 3165 retval = ext4_find_delete_entry(handle, ent->dir,
3145 &ent->dentry->d_name); 3166 &ent->dentry->d_name);
3146 } else { 3167 } else {
@@ -3191,6 +3212,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3191 .dentry = new_dentry, 3212 .dentry = new_dentry,
3192 .inode = new_dentry->d_inode, 3213 .inode = new_dentry->d_inode,
3193 }; 3214 };
3215 int force_reread;
3194 int retval; 3216 int retval;
3195 3217
3196 dquot_initialize(old.dir); 3218 dquot_initialize(old.dir);
@@ -3202,6 +3224,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3202 dquot_initialize(new.inode); 3224 dquot_initialize(new.inode);
3203 3225
3204 old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL); 3226 old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
3227 if (IS_ERR(old.bh))
3228 return PTR_ERR(old.bh);
3205 /* 3229 /*
3206 * Check for inode number is _not_ due to possible IO errors. 3230 * Check for inode number is _not_ due to possible IO errors.
3207 * We might rmdir the source, keep it as pwd of some process 3231 * We might rmdir the source, keep it as pwd of some process
@@ -3214,6 +3238,10 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3214 3238
3215 new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, 3239 new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
3216 &new.de, &new.inlined); 3240 &new.de, &new.inlined);
3241 if (IS_ERR(new.bh)) {
3242 retval = PTR_ERR(new.bh);
3243 goto end_rename;
3244 }
3217 if (new.bh) { 3245 if (new.bh) {
3218 if (!new.inode) { 3246 if (!new.inode) {
3219 brelse(new.bh); 3247 brelse(new.bh);
@@ -3246,6 +3274,15 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3246 if (retval) 3274 if (retval)
3247 goto end_rename; 3275 goto end_rename;
3248 } 3276 }
3277 /*
3278 * If we're renaming a file within an inline_data dir and adding or
3279 * setting the new dirent causes a conversion from inline_data to
3280 * extents/blockmap, we need to force the dirent delete code to
3281 * re-read the directory, or else we end up trying to delete a dirent
3282 * from what is now the extent tree root (or a block map).
3283 */
3284 force_reread = (new.dir->i_ino == old.dir->i_ino &&
3285 ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA));
3249 if (!new.bh) { 3286 if (!new.bh) {
3250 retval = ext4_add_entry(handle, new.dentry, old.inode); 3287 retval = ext4_add_entry(handle, new.dentry, old.inode);
3251 if (retval) 3288 if (retval)
@@ -3256,6 +3293,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3256 if (retval) 3293 if (retval)
3257 goto end_rename; 3294 goto end_rename;
3258 } 3295 }
3296 if (force_reread)
3297 force_reread = !ext4_test_inode_flag(new.dir,
3298 EXT4_INODE_INLINE_DATA);
3259 3299
3260 /* 3300 /*
3261 * Like most other Unix systems, set the ctime for inodes on a 3301 * Like most other Unix systems, set the ctime for inodes on a
@@ -3267,7 +3307,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3267 /* 3307 /*
3268 * ok, that's it 3308 * ok, that's it
3269 */ 3309 */
3270 ext4_rename_delete(handle, &old); 3310 ext4_rename_delete(handle, &old, force_reread);
3271 3311
3272 if (new.inode) { 3312 if (new.inode) {
3273 ext4_dec_count(handle, new.inode); 3313 ext4_dec_count(handle, new.inode);
@@ -3330,6 +3370,8 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
3330 3370
3331 old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, 3371 old.bh = ext4_find_entry(old.dir, &old.dentry->d_name,
3332 &old.de, &old.inlined); 3372 &old.de, &old.inlined);
3373 if (IS_ERR(old.bh))
3374 return PTR_ERR(old.bh);
3333 /* 3375 /*
3334 * Check for inode number is _not_ due to possible IO errors. 3376 * Check for inode number is _not_ due to possible IO errors.
3335 * We might rmdir the source, keep it as pwd of some process 3377 * We might rmdir the source, keep it as pwd of some process
@@ -3342,6 +3384,10 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
3342 3384
3343 new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, 3385 new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
3344 &new.de, &new.inlined); 3386 &new.de, &new.inlined);
3387 if (IS_ERR(new.bh)) {
3388 retval = PTR_ERR(new.bh);
3389 goto end_rename;
3390 }
3345 3391
3346 /* RENAME_EXCHANGE case: old *and* new must both exist */ 3392 /* RENAME_EXCHANGE case: old *and* new must both exist */
3347 if (!new.bh || le32_to_cpu(new.de->inode) != new.inode->i_ino) 3393 if (!new.bh || le32_to_cpu(new.de->inode) != new.inode->i_ino)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 32b43ad154b9..0b28b36e7915 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3181,9 +3181,9 @@ static int set_journal_csum_feature_set(struct super_block *sb)
3181 3181
3182 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 3182 if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
3183 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) { 3183 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
3184 /* journal checksum v2 */ 3184 /* journal checksum v3 */
3185 compat = 0; 3185 compat = 0;
3186 incompat = JBD2_FEATURE_INCOMPAT_CSUM_V2; 3186 incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
3187 } else { 3187 } else {
3188 /* journal checksum v1 */ 3188 /* journal checksum v1 */
3189 compat = JBD2_FEATURE_COMPAT_CHECKSUM; 3189 compat = JBD2_FEATURE_COMPAT_CHECKSUM;
@@ -3205,6 +3205,7 @@ static int set_journal_csum_feature_set(struct super_block *sb)
3205 jbd2_journal_clear_features(sbi->s_journal, 3205 jbd2_journal_clear_features(sbi->s_journal,
3206 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 3206 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
3207 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | 3207 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
3208 JBD2_FEATURE_INCOMPAT_CSUM_V3 |
3208 JBD2_FEATURE_INCOMPAT_CSUM_V2); 3209 JBD2_FEATURE_INCOMPAT_CSUM_V2);
3209 } 3210 }
3210 3211
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6fac74349856..b73e0215baa7 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -97,7 +97,7 @@ static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh)
97 struct commit_header *h; 97 struct commit_header *h;
98 __u32 csum; 98 __u32 csum;
99 99
100 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 100 if (!jbd2_journal_has_csum_v2or3(j))
101 return; 101 return;
102 102
103 h = (struct commit_header *)(bh->b_data); 103 h = (struct commit_header *)(bh->b_data);
@@ -313,11 +313,11 @@ static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh)
313 return checksum; 313 return checksum;
314} 314}
315 315
316static void write_tag_block(int tag_bytes, journal_block_tag_t *tag, 316static void write_tag_block(journal_t *j, journal_block_tag_t *tag,
317 unsigned long long block) 317 unsigned long long block)
318{ 318{
319 tag->t_blocknr = cpu_to_be32(block & (u32)~0); 319 tag->t_blocknr = cpu_to_be32(block & (u32)~0);
320 if (tag_bytes > JBD2_TAG_SIZE32) 320 if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_64BIT))
321 tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1); 321 tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
322} 322}
323 323
@@ -327,7 +327,7 @@ static void jbd2_descr_block_csum_set(journal_t *j,
327 struct jbd2_journal_block_tail *tail; 327 struct jbd2_journal_block_tail *tail;
328 __u32 csum; 328 __u32 csum;
329 329
330 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 330 if (!jbd2_journal_has_csum_v2or3(j))
331 return; 331 return;
332 332
333 tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize - 333 tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize -
@@ -340,12 +340,13 @@ static void jbd2_descr_block_csum_set(journal_t *j,
340static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, 340static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
341 struct buffer_head *bh, __u32 sequence) 341 struct buffer_head *bh, __u32 sequence)
342{ 342{
343 journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
343 struct page *page = bh->b_page; 344 struct page *page = bh->b_page;
344 __u8 *addr; 345 __u8 *addr;
345 __u32 csum32; 346 __u32 csum32;
346 __be32 seq; 347 __be32 seq;
347 348
348 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 349 if (!jbd2_journal_has_csum_v2or3(j))
349 return; 350 return;
350 351
351 seq = cpu_to_be32(sequence); 352 seq = cpu_to_be32(sequence);
@@ -355,8 +356,10 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
355 bh->b_size); 356 bh->b_size);
356 kunmap_atomic(addr); 357 kunmap_atomic(addr);
357 358
358 /* We only have space to store the lower 16 bits of the crc32c. */ 359 if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3))
359 tag->t_checksum = cpu_to_be16(csum32); 360 tag3->t_checksum = cpu_to_be32(csum32);
361 else
362 tag->t_checksum = cpu_to_be16(csum32);
360} 363}
361/* 364/*
362 * jbd2_journal_commit_transaction 365 * jbd2_journal_commit_transaction
@@ -396,7 +399,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
396 LIST_HEAD(io_bufs); 399 LIST_HEAD(io_bufs);
397 LIST_HEAD(log_bufs); 400 LIST_HEAD(log_bufs);
398 401
399 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 402 if (jbd2_journal_has_csum_v2or3(journal))
400 csum_size = sizeof(struct jbd2_journal_block_tail); 403 csum_size = sizeof(struct jbd2_journal_block_tail);
401 404
402 /* 405 /*
@@ -690,7 +693,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
690 tag_flag |= JBD2_FLAG_SAME_UUID; 693 tag_flag |= JBD2_FLAG_SAME_UUID;
691 694
692 tag = (journal_block_tag_t *) tagp; 695 tag = (journal_block_tag_t *) tagp;
693 write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); 696 write_tag_block(journal, tag, jh2bh(jh)->b_blocknr);
694 tag->t_flags = cpu_to_be16(tag_flag); 697 tag->t_flags = cpu_to_be16(tag_flag);
695 jbd2_block_tag_csum_set(journal, tag, wbuf[bufs], 698 jbd2_block_tag_csum_set(journal, tag, wbuf[bufs],
696 commit_transaction->t_tid); 699 commit_transaction->t_tid);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 67b8e303946c..19d74d86d99c 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -124,7 +124,7 @@ EXPORT_SYMBOL(__jbd2_debug);
124/* Checksumming functions */ 124/* Checksumming functions */
125static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) 125static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb)
126{ 126{
127 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 127 if (!jbd2_journal_has_csum_v2or3(j))
128 return 1; 128 return 1;
129 129
130 return sb->s_checksum_type == JBD2_CRC32C_CHKSUM; 130 return sb->s_checksum_type == JBD2_CRC32C_CHKSUM;
@@ -145,7 +145,7 @@ static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
145 145
146static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) 146static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb)
147{ 147{
148 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 148 if (!jbd2_journal_has_csum_v2or3(j))
149 return 1; 149 return 1;
150 150
151 return sb->s_checksum == jbd2_superblock_csum(j, sb); 151 return sb->s_checksum == jbd2_superblock_csum(j, sb);
@@ -153,7 +153,7 @@ static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb)
153 153
154static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) 154static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb)
155{ 155{
156 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 156 if (!jbd2_journal_has_csum_v2or3(j))
157 return; 157 return;
158 158
159 sb->s_checksum = jbd2_superblock_csum(j, sb); 159 sb->s_checksum = jbd2_superblock_csum(j, sb);
@@ -1522,21 +1522,29 @@ static int journal_get_superblock(journal_t *journal)
1522 goto out; 1522 goto out;
1523 } 1523 }
1524 1524
1525 if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) && 1525 if (jbd2_journal_has_csum_v2or3(journal) &&
1526 JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { 1526 JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM)) {
1527 /* Can't have checksum v1 and v2 on at the same time! */ 1527 /* Can't have checksum v1 and v2 on at the same time! */
1528 printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 " 1528 printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 "
1529 "at the same time!\n"); 1529 "at the same time!\n");
1530 goto out; 1530 goto out;
1531 } 1531 }
1532 1532
1533 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) &&
1534 JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
1535 /* Can't have checksum v2 and v3 at the same time! */
1536 printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 "
1537 "at the same time!\n");
1538 goto out;
1539 }
1540
1533 if (!jbd2_verify_csum_type(journal, sb)) { 1541 if (!jbd2_verify_csum_type(journal, sb)) {
1534 printk(KERN_ERR "JBD2: Unknown checksum type\n"); 1542 printk(KERN_ERR "JBD2: Unknown checksum type\n");
1535 goto out; 1543 goto out;
1536 } 1544 }
1537 1545
1538 /* Load the checksum driver */ 1546 /* Load the checksum driver */
1539 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { 1547 if (jbd2_journal_has_csum_v2or3(journal)) {
1540 journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); 1548 journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
1541 if (IS_ERR(journal->j_chksum_driver)) { 1549 if (IS_ERR(journal->j_chksum_driver)) {
1542 printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n"); 1550 printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
@@ -1553,7 +1561,7 @@ static int journal_get_superblock(journal_t *journal)
1553 } 1561 }
1554 1562
1555 /* Precompute checksum seed for all metadata */ 1563 /* Precompute checksum seed for all metadata */
1556 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 1564 if (jbd2_journal_has_csum_v2or3(journal))
1557 journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid, 1565 journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
1558 sizeof(sb->s_uuid)); 1566 sizeof(sb->s_uuid));
1559 1567
@@ -1813,8 +1821,14 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
1813 if (!jbd2_journal_check_available_features(journal, compat, ro, incompat)) 1821 if (!jbd2_journal_check_available_features(journal, compat, ro, incompat))
1814 return 0; 1822 return 0;
1815 1823
1816 /* Asking for checksumming v2 and v1? Only give them v2. */ 1824 /* If enabling v2 checksums, turn on v3 instead */
1817 if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2 && 1825 if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2) {
1826 incompat &= ~JBD2_FEATURE_INCOMPAT_CSUM_V2;
1827 incompat |= JBD2_FEATURE_INCOMPAT_CSUM_V3;
1828 }
1829
1830 /* Asking for checksumming v3 and v1? Only give them v3. */
1831 if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V3 &&
1818 compat & JBD2_FEATURE_COMPAT_CHECKSUM) 1832 compat & JBD2_FEATURE_COMPAT_CHECKSUM)
1819 compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM; 1833 compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM;
1820 1834
@@ -1823,8 +1837,8 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
1823 1837
1824 sb = journal->j_superblock; 1838 sb = journal->j_superblock;
1825 1839
1826 /* If enabling v2 checksums, update superblock */ 1840 /* If enabling v3 checksums, update superblock */
1827 if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V2)) { 1841 if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
1828 sb->s_checksum_type = JBD2_CRC32C_CHKSUM; 1842 sb->s_checksum_type = JBD2_CRC32C_CHKSUM;
1829 sb->s_feature_compat &= 1843 sb->s_feature_compat &=
1830 ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM); 1844 ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM);
@@ -1842,8 +1856,7 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
1842 } 1856 }
1843 1857
1844 /* Precompute checksum seed for all metadata */ 1858 /* Precompute checksum seed for all metadata */
1845 if (JBD2_HAS_INCOMPAT_FEATURE(journal, 1859 if (jbd2_journal_has_csum_v2or3(journal))
1846 JBD2_FEATURE_INCOMPAT_CSUM_V2))
1847 journal->j_csum_seed = jbd2_chksum(journal, ~0, 1860 journal->j_csum_seed = jbd2_chksum(journal, ~0,
1848 sb->s_uuid, 1861 sb->s_uuid,
1849 sizeof(sb->s_uuid)); 1862 sizeof(sb->s_uuid));
@@ -1852,7 +1865,8 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
1852 /* If enabling v1 checksums, downgrade superblock */ 1865 /* If enabling v1 checksums, downgrade superblock */
1853 if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM)) 1866 if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM))
1854 sb->s_feature_incompat &= 1867 sb->s_feature_incompat &=
1855 ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2); 1868 ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2 |
1869 JBD2_FEATURE_INCOMPAT_CSUM_V3);
1856 1870
1857 sb->s_feature_compat |= cpu_to_be32(compat); 1871 sb->s_feature_compat |= cpu_to_be32(compat);
1858 sb->s_feature_ro_compat |= cpu_to_be32(ro); 1872 sb->s_feature_ro_compat |= cpu_to_be32(ro);
@@ -2165,16 +2179,20 @@ int jbd2_journal_blocks_per_page(struct inode *inode)
2165 */ 2179 */
2166size_t journal_tag_bytes(journal_t *journal) 2180size_t journal_tag_bytes(journal_t *journal)
2167{ 2181{
2168 journal_block_tag_t tag; 2182 size_t sz;
2169 size_t x = 0; 2183
2184 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3))
2185 return sizeof(journal_block_tag3_t);
2186
2187 sz = sizeof(journal_block_tag_t);
2170 2188
2171 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 2189 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
2172 x += sizeof(tag.t_checksum); 2190 sz += sizeof(__u16);
2173 2191
2174 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) 2192 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
2175 return x + JBD2_TAG_SIZE64; 2193 return sz;
2176 else 2194 else
2177 return x + JBD2_TAG_SIZE32; 2195 return sz - sizeof(__u32);
2178} 2196}
2179 2197
2180/* 2198/*
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 3b6bb19d60b1..9b329b55ffe3 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -181,7 +181,7 @@ static int jbd2_descr_block_csum_verify(journal_t *j,
181 __be32 provided; 181 __be32 provided;
182 __u32 calculated; 182 __u32 calculated;
183 183
184 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 184 if (!jbd2_journal_has_csum_v2or3(j))
185 return 1; 185 return 1;
186 186
187 tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize - 187 tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize -
@@ -205,7 +205,7 @@ static int count_tags(journal_t *journal, struct buffer_head *bh)
205 int nr = 0, size = journal->j_blocksize; 205 int nr = 0, size = journal->j_blocksize;
206 int tag_bytes = journal_tag_bytes(journal); 206 int tag_bytes = journal_tag_bytes(journal);
207 207
208 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 208 if (jbd2_journal_has_csum_v2or3(journal))
209 size -= sizeof(struct jbd2_journal_block_tail); 209 size -= sizeof(struct jbd2_journal_block_tail);
210 210
211 tagp = &bh->b_data[sizeof(journal_header_t)]; 211 tagp = &bh->b_data[sizeof(journal_header_t)];
@@ -338,10 +338,11 @@ int jbd2_journal_skip_recovery(journal_t *journal)
338 return err; 338 return err;
339} 339}
340 340
341static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag) 341static inline unsigned long long read_tag_block(journal_t *journal,
342 journal_block_tag_t *tag)
342{ 343{
343 unsigned long long block = be32_to_cpu(tag->t_blocknr); 344 unsigned long long block = be32_to_cpu(tag->t_blocknr);
344 if (tag_bytes > JBD2_TAG_SIZE32) 345 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
345 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; 346 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
346 return block; 347 return block;
347} 348}
@@ -384,7 +385,7 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
384 __be32 provided; 385 __be32 provided;
385 __u32 calculated; 386 __u32 calculated;
386 387
387 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 388 if (!jbd2_journal_has_csum_v2or3(j))
388 return 1; 389 return 1;
389 390
390 h = buf; 391 h = buf;
@@ -399,17 +400,21 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
399static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, 400static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
400 void *buf, __u32 sequence) 401 void *buf, __u32 sequence)
401{ 402{
403 journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag;
402 __u32 csum32; 404 __u32 csum32;
403 __be32 seq; 405 __be32 seq;
404 406
405 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 407 if (!jbd2_journal_has_csum_v2or3(j))
406 return 1; 408 return 1;
407 409
408 seq = cpu_to_be32(sequence); 410 seq = cpu_to_be32(sequence);
409 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); 411 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
410 csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); 412 csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize);
411 413
412 return tag->t_checksum == cpu_to_be16(csum32); 414 if (JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3))
415 return tag3->t_checksum == cpu_to_be32(csum32);
416 else
417 return tag->t_checksum == cpu_to_be16(csum32);
413} 418}
414 419
415static int do_one_pass(journal_t *journal, 420static int do_one_pass(journal_t *journal,
@@ -426,6 +431,7 @@ static int do_one_pass(journal_t *journal,
426 int tag_bytes = journal_tag_bytes(journal); 431 int tag_bytes = journal_tag_bytes(journal);
427 __u32 crc32_sum = ~0; /* Transactional Checksums */ 432 __u32 crc32_sum = ~0; /* Transactional Checksums */
428 int descr_csum_size = 0; 433 int descr_csum_size = 0;
434 int block_error = 0;
429 435
430 /* 436 /*
431 * First thing is to establish what we expect to find in the log 437 * First thing is to establish what we expect to find in the log
@@ -512,8 +518,7 @@ static int do_one_pass(journal_t *journal,
512 switch(blocktype) { 518 switch(blocktype) {
513 case JBD2_DESCRIPTOR_BLOCK: 519 case JBD2_DESCRIPTOR_BLOCK:
514 /* Verify checksum first */ 520 /* Verify checksum first */
515 if (JBD2_HAS_INCOMPAT_FEATURE(journal, 521 if (jbd2_journal_has_csum_v2or3(journal))
516 JBD2_FEATURE_INCOMPAT_CSUM_V2))
517 descr_csum_size = 522 descr_csum_size =
518 sizeof(struct jbd2_journal_block_tail); 523 sizeof(struct jbd2_journal_block_tail);
519 if (descr_csum_size > 0 && 524 if (descr_csum_size > 0 &&
@@ -574,7 +579,7 @@ static int do_one_pass(journal_t *journal,
574 unsigned long long blocknr; 579 unsigned long long blocknr;
575 580
576 J_ASSERT(obh != NULL); 581 J_ASSERT(obh != NULL);
577 blocknr = read_tag_block(tag_bytes, 582 blocknr = read_tag_block(journal,
578 tag); 583 tag);
579 584
580 /* If the block has been 585 /* If the block has been
@@ -598,7 +603,8 @@ static int do_one_pass(journal_t *journal,
598 "checksum recovering " 603 "checksum recovering "
599 "block %llu in log\n", 604 "block %llu in log\n",
600 blocknr); 605 blocknr);
601 continue; 606 block_error = 1;
607 goto skip_write;
602 } 608 }
603 609
604 /* Find a buffer for the new 610 /* Find a buffer for the new
@@ -797,7 +803,8 @@ static int do_one_pass(journal_t *journal,
797 success = -EIO; 803 success = -EIO;
798 } 804 }
799 } 805 }
800 806 if (block_error && success == 0)
807 success = -EIO;
801 return success; 808 return success;
802 809
803 failed: 810 failed:
@@ -811,7 +818,7 @@ static int jbd2_revoke_block_csum_verify(journal_t *j,
811 __be32 provided; 818 __be32 provided;
812 __u32 calculated; 819 __u32 calculated;
813 820
814 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 821 if (!jbd2_journal_has_csum_v2or3(j))
815 return 1; 822 return 1;
816 823
817 tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize - 824 tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize -
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 198c9c10276d..d5e95a175c92 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -91,8 +91,8 @@
91#include <linux/list.h> 91#include <linux/list.h>
92#include <linux/init.h> 92#include <linux/init.h>
93#include <linux/bio.h> 93#include <linux/bio.h>
94#endif
95#include <linux/log2.h> 94#include <linux/log2.h>
95#endif
96 96
97static struct kmem_cache *jbd2_revoke_record_cache; 97static struct kmem_cache *jbd2_revoke_record_cache;
98static struct kmem_cache *jbd2_revoke_table_cache; 98static struct kmem_cache *jbd2_revoke_table_cache;
@@ -597,7 +597,7 @@ static void write_one_revoke_record(journal_t *journal,
597 offset = *offsetp; 597 offset = *offsetp;
598 598
599 /* Do we need to leave space at the end for a checksum? */ 599 /* Do we need to leave space at the end for a checksum? */
600 if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 600 if (jbd2_journal_has_csum_v2or3(journal))
601 csum_size = sizeof(struct jbd2_journal_revoke_tail); 601 csum_size = sizeof(struct jbd2_journal_revoke_tail);
602 602
603 /* Make sure we have a descriptor with space left for the record */ 603 /* Make sure we have a descriptor with space left for the record */
@@ -644,7 +644,7 @@ static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh)
644 struct jbd2_journal_revoke_tail *tail; 644 struct jbd2_journal_revoke_tail *tail;
645 __u32 csum; 645 __u32 csum;
646 646
647 if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) 647 if (!jbd2_journal_has_csum_v2or3(j))
648 return; 648 return;
649 649
650 tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize - 650 tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize -