diff options
-rw-r--r-- | Documentation/filesystems/ext4.txt | 10 | ||||
-rw-r--r-- | fs/Kconfig | 1 | ||||
-rw-r--r-- | fs/ext4/super.c | 25 | ||||
-rw-r--r-- | fs/jbd2/commit.c | 198 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 26 | ||||
-rw-r--r-- | fs/jbd2/recovery.c | 151 | ||||
-rw-r--r-- | include/linux/ext4_fs.h | 3 | ||||
-rw-r--r-- | include/linux/jbd2.h | 36 |
8 files changed, 388 insertions, 62 deletions
diff --git a/Documentation/filesystems/ext4.txt b/Documentation/filesystems/ext4.txt index 6a4adcae9f9..4f329afe20e 100644 --- a/Documentation/filesystems/ext4.txt +++ b/Documentation/filesystems/ext4.txt | |||
@@ -89,6 +89,16 @@ When mounting an ext4 filesystem, the following option are accepted: | |||
89 | extents ext4 will use extents to address file data. The | 89 | extents ext4 will use extents to address file data. The |
90 | file system will no longer be mountable by ext3. | 90 | file system will no longer be mountable by ext3. |
91 | 91 | ||
92 | journal_checksum Enable checksumming of the journal transactions. | ||
93 | This will allow the recovery code in e2fsck and the | ||
94 | kernel to detect corruption in the kernel. It is a | ||
95 | compatible change and will be ignored by older kernels. | ||
96 | |||
97 | journal_async_commit Commit block can be written to disk without waiting | ||
98 | for descriptor blocks. If enabled older kernels cannot | ||
99 | mount the device. This will enable 'journal_checksum' | ||
100 | internally. | ||
101 | |||
92 | journal=update Update the ext4 file system's journal to the current | 102 | journal=update Update the ext4 file system's journal to the current |
93 | format. | 103 | format. |
94 | 104 | ||
diff --git a/fs/Kconfig b/fs/Kconfig index 9656139d2e9..219ec06a8c7 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -236,6 +236,7 @@ config JBD_DEBUG | |||
236 | 236 | ||
237 | config JBD2 | 237 | config JBD2 |
238 | tristate | 238 | tristate |
239 | select CRC32 | ||
239 | help | 240 | help |
240 | This is a generic journaling layer for block devices that support | 241 | This is a generic journaling layer for block devices that support |
241 | both 32-bit and 64-bit block numbers. It is currently used by | 242 | both 32-bit and 64-bit block numbers. It is currently used by |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c7305443e10..f7479d30735 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -869,6 +869,7 @@ enum { | |||
869 | Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, | 869 | Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, |
870 | Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, | 870 | Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, |
871 | Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, | 871 | Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, |
872 | Opt_journal_checksum, Opt_journal_async_commit, | ||
872 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, | 873 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, |
873 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, | 874 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, |
874 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, | 875 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, |
@@ -908,6 +909,8 @@ static match_table_t tokens = { | |||
908 | {Opt_journal_update, "journal=update"}, | 909 | {Opt_journal_update, "journal=update"}, |
909 | {Opt_journal_inum, "journal=%u"}, | 910 | {Opt_journal_inum, "journal=%u"}, |
910 | {Opt_journal_dev, "journal_dev=%u"}, | 911 | {Opt_journal_dev, "journal_dev=%u"}, |
912 | {Opt_journal_checksum, "journal_checksum"}, | ||
913 | {Opt_journal_async_commit, "journal_async_commit"}, | ||
911 | {Opt_abort, "abort"}, | 914 | {Opt_abort, "abort"}, |
912 | {Opt_data_journal, "data=journal"}, | 915 | {Opt_data_journal, "data=journal"}, |
913 | {Opt_data_ordered, "data=ordered"}, | 916 | {Opt_data_ordered, "data=ordered"}, |
@@ -1095,6 +1098,13 @@ static int parse_options (char *options, struct super_block *sb, | |||
1095 | return 0; | 1098 | return 0; |
1096 | *journal_devnum = option; | 1099 | *journal_devnum = option; |
1097 | break; | 1100 | break; |
1101 | case Opt_journal_checksum: | ||
1102 | set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); | ||
1103 | break; | ||
1104 | case Opt_journal_async_commit: | ||
1105 | set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT); | ||
1106 | set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM); | ||
1107 | break; | ||
1098 | case Opt_noload: | 1108 | case Opt_noload: |
1099 | set_opt (sbi->s_mount_opt, NOLOAD); | 1109 | set_opt (sbi->s_mount_opt, NOLOAD); |
1100 | break; | 1110 | break; |
@@ -2114,6 +2124,21 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent) | |||
2114 | goto failed_mount4; | 2124 | goto failed_mount4; |
2115 | } | 2125 | } |
2116 | 2126 | ||
2127 | if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { | ||
2128 | jbd2_journal_set_features(sbi->s_journal, | ||
2129 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, | ||
2130 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | ||
2131 | } else if (test_opt(sb, JOURNAL_CHECKSUM)) { | ||
2132 | jbd2_journal_set_features(sbi->s_journal, | ||
2133 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); | ||
2134 | jbd2_journal_clear_features(sbi->s_journal, 0, 0, | ||
2135 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | ||
2136 | } else { | ||
2137 | jbd2_journal_clear_features(sbi->s_journal, | ||
2138 | JBD2_FEATURE_COMPAT_CHECKSUM, 0, | ||
2139 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); | ||
2140 | } | ||
2141 | |||
2117 | /* We have now updated the journal if required, so we can | 2142 | /* We have now updated the journal if required, so we can |
2118 | * validate the data journaling mode. */ | 2143 | * validate the data journaling mode. */ |
2119 | switch (test_opt(sb, DATA_FLAGS)) { | 2144 | switch (test_opt(sb, DATA_FLAGS)) { |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 8749a86f417..da8d0eb3b7b 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/mm.h> | 21 | #include <linux/mm.h> |
22 | #include <linux/pagemap.h> | 22 | #include <linux/pagemap.h> |
23 | #include <linux/jiffies.h> | 23 | #include <linux/jiffies.h> |
24 | #include <linux/crc32.h> | ||
24 | 25 | ||
25 | /* | 26 | /* |
26 | * Default IO end handler for temporary BJ_IO buffer_heads. | 27 | * Default IO end handler for temporary BJ_IO buffer_heads. |
@@ -93,19 +94,23 @@ static int inverted_lock(journal_t *journal, struct buffer_head *bh) | |||
93 | return 1; | 94 | return 1; |
94 | } | 95 | } |
95 | 96 | ||
96 | /* Done it all: now write the commit record. We should have | 97 | /* |
98 | * Done it all: now submit the commit record. We should have | ||
97 | * cleaned up our previous buffers by now, so if we are in abort | 99 | * cleaned up our previous buffers by now, so if we are in abort |
98 | * mode we can now just skip the rest of the journal write | 100 | * mode we can now just skip the rest of the journal write |
99 | * entirely. | 101 | * entirely. |
100 | * | 102 | * |
101 | * Returns 1 if the journal needs to be aborted or 0 on success | 103 | * Returns 1 if the journal needs to be aborted or 0 on success |
102 | */ | 104 | */ |
103 | static int journal_write_commit_record(journal_t *journal, | 105 | static int journal_submit_commit_record(journal_t *journal, |
104 | transaction_t *commit_transaction) | 106 | transaction_t *commit_transaction, |
107 | struct buffer_head **cbh, | ||
108 | __u32 crc32_sum) | ||
105 | { | 109 | { |
106 | struct journal_head *descriptor; | 110 | struct journal_head *descriptor; |
111 | struct commit_header *tmp; | ||
107 | struct buffer_head *bh; | 112 | struct buffer_head *bh; |
108 | int i, ret; | 113 | int ret; |
109 | int barrier_done = 0; | 114 | int barrier_done = 0; |
110 | 115 | ||
111 | if (is_journal_aborted(journal)) | 116 | if (is_journal_aborted(journal)) |
@@ -117,21 +122,33 @@ static int journal_write_commit_record(journal_t *journal, | |||
117 | 122 | ||
118 | bh = jh2bh(descriptor); | 123 | bh = jh2bh(descriptor); |
119 | 124 | ||
120 | /* AKPM: buglet - add `i' to tmp! */ | 125 | tmp = (struct commit_header *)bh->b_data; |
121 | for (i = 0; i < bh->b_size; i += 512) { | 126 | tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); |
122 | journal_header_t *tmp = (journal_header_t*)bh->b_data; | 127 | tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); |
123 | tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); | 128 | tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); |
124 | tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); | 129 | |
125 | tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); | 130 | if (JBD2_HAS_COMPAT_FEATURE(journal, |
131 | JBD2_FEATURE_COMPAT_CHECKSUM)) { | ||
132 | tmp->h_chksum_type = JBD2_CRC32_CHKSUM; | ||
133 | tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE; | ||
134 | tmp->h_chksum[0] = cpu_to_be32(crc32_sum); | ||
126 | } | 135 | } |
127 | 136 | ||
128 | JBUFFER_TRACE(descriptor, "write commit block"); | 137 | JBUFFER_TRACE(descriptor, "submit commit block"); |
138 | lock_buffer(bh); | ||
139 | |||
129 | set_buffer_dirty(bh); | 140 | set_buffer_dirty(bh); |
130 | if (journal->j_flags & JBD2_BARRIER) { | 141 | set_buffer_uptodate(bh); |
142 | bh->b_end_io = journal_end_buffer_io_sync; | ||
143 | |||
144 | if (journal->j_flags & JBD2_BARRIER && | ||
145 | !JBD2_HAS_COMPAT_FEATURE(journal, | ||
146 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { | ||
131 | set_buffer_ordered(bh); | 147 | set_buffer_ordered(bh); |
132 | barrier_done = 1; | 148 | barrier_done = 1; |
133 | } | 149 | } |
134 | ret = sync_dirty_buffer(bh); | 150 | ret = submit_bh(WRITE, bh); |
151 | |||
135 | /* is it possible for another commit to fail at roughly | 152 | /* is it possible for another commit to fail at roughly |
136 | * the same time as this one? If so, we don't want to | 153 | * the same time as this one? If so, we don't want to |
137 | * trust the barrier flag in the super, but instead want | 154 | * trust the barrier flag in the super, but instead want |
@@ -152,14 +169,72 @@ static int journal_write_commit_record(journal_t *journal, | |||
152 | clear_buffer_ordered(bh); | 169 | clear_buffer_ordered(bh); |
153 | set_buffer_uptodate(bh); | 170 | set_buffer_uptodate(bh); |
154 | set_buffer_dirty(bh); | 171 | set_buffer_dirty(bh); |
155 | ret = sync_dirty_buffer(bh); | 172 | ret = submit_bh(WRITE, bh); |
156 | } | 173 | } |
157 | put_bh(bh); /* One for getblk() */ | 174 | *cbh = bh; |
158 | jbd2_journal_put_journal_head(descriptor); | 175 | return ret; |
176 | } | ||
177 | |||
178 | /* | ||
179 | * This function along with journal_submit_commit_record | ||
180 | * allows to write the commit record asynchronously. | ||
181 | */ | ||
182 | static int journal_wait_on_commit_record(struct buffer_head *bh) | ||
183 | { | ||
184 | int ret = 0; | ||
185 | |||
186 | clear_buffer_dirty(bh); | ||
187 | wait_on_buffer(bh); | ||
159 | 188 | ||
160 | return (ret == -EIO); | 189 | if (unlikely(!buffer_uptodate(bh))) |
190 | ret = -EIO; | ||
191 | put_bh(bh); /* One for getblk() */ | ||
192 | jbd2_journal_put_journal_head(bh2jh(bh)); | ||
193 | |||
194 | return ret; | ||
161 | } | 195 | } |
162 | 196 | ||
197 | /* | ||
198 | * Wait for all submitted IO to complete. | ||
199 | */ | ||
200 | static int journal_wait_on_locked_list(journal_t *journal, | ||
201 | transaction_t *commit_transaction) | ||
202 | { | ||
203 | int ret = 0; | ||
204 | struct journal_head *jh; | ||
205 | |||
206 | while (commit_transaction->t_locked_list) { | ||
207 | struct buffer_head *bh; | ||
208 | |||
209 | jh = commit_transaction->t_locked_list->b_tprev; | ||
210 | bh = jh2bh(jh); | ||
211 | get_bh(bh); | ||
212 | if (buffer_locked(bh)) { | ||
213 | spin_unlock(&journal->j_list_lock); | ||
214 | wait_on_buffer(bh); | ||
215 | if (unlikely(!buffer_uptodate(bh))) | ||
216 | ret = -EIO; | ||
217 | spin_lock(&journal->j_list_lock); | ||
218 | } | ||
219 | if (!inverted_lock(journal, bh)) { | ||
220 | put_bh(bh); | ||
221 | spin_lock(&journal->j_list_lock); | ||
222 | continue; | ||
223 | } | ||
224 | if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) { | ||
225 | __jbd2_journal_unfile_buffer(jh); | ||
226 | jbd_unlock_bh_state(bh); | ||
227 | jbd2_journal_remove_journal_head(bh); | ||
228 | put_bh(bh); | ||
229 | } else { | ||
230 | jbd_unlock_bh_state(bh); | ||
231 | } | ||
232 | put_bh(bh); | ||
233 | cond_resched_lock(&journal->j_list_lock); | ||
234 | } | ||
235 | return ret; | ||
236 | } | ||
237 | |||
163 | static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) | 238 | static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) |
164 | { | 239 | { |
165 | int i; | 240 | int i; |
@@ -275,7 +350,21 @@ write_out_data: | |||
275 | journal_do_submit_data(wbuf, bufs); | 350 | journal_do_submit_data(wbuf, bufs); |
276 | } | 351 | } |
277 | 352 | ||
278 | static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag, | 353 | static __u32 jbd2_checksum_data(__u32 crc32_sum, struct buffer_head *bh) |
354 | { | ||
355 | struct page *page = bh->b_page; | ||
356 | char *addr; | ||
357 | __u32 checksum; | ||
358 | |||
359 | addr = kmap_atomic(page, KM_USER0); | ||
360 | checksum = crc32_be(crc32_sum, | ||
361 | (void *)(addr + offset_in_page(bh->b_data)), bh->b_size); | ||
362 | kunmap_atomic(addr, KM_USER0); | ||
363 | |||
364 | return checksum; | ||
365 | } | ||
366 | |||
367 | static void write_tag_block(int tag_bytes, journal_block_tag_t *tag, | ||
279 | unsigned long long block) | 368 | unsigned long long block) |
280 | { | 369 | { |
281 | tag->t_blocknr = cpu_to_be32(block & (u32)~0); | 370 | tag->t_blocknr = cpu_to_be32(block & (u32)~0); |
@@ -307,6 +396,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
307 | int tag_flag; | 396 | int tag_flag; |
308 | int i; | 397 | int i; |
309 | int tag_bytes = journal_tag_bytes(journal); | 398 | int tag_bytes = journal_tag_bytes(journal); |
399 | struct buffer_head *cbh = NULL; /* For transactional checksums */ | ||
400 | __u32 crc32_sum = ~0; | ||
310 | 401 | ||
311 | /* | 402 | /* |
312 | * First job: lock down the current transaction and wait for | 403 | * First job: lock down the current transaction and wait for |
@@ -451,38 +542,15 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
451 | journal_submit_data_buffers(journal, commit_transaction); | 542 | journal_submit_data_buffers(journal, commit_transaction); |
452 | 543 | ||
453 | /* | 544 | /* |
454 | * Wait for all previously submitted IO to complete. | 545 | * Wait for all previously submitted IO to complete if commit |
546 | * record is to be written synchronously. | ||
455 | */ | 547 | */ |
456 | spin_lock(&journal->j_list_lock); | 548 | spin_lock(&journal->j_list_lock); |
457 | while (commit_transaction->t_locked_list) { | 549 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, |
458 | struct buffer_head *bh; | 550 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) |
551 | err = journal_wait_on_locked_list(journal, | ||
552 | commit_transaction); | ||
459 | 553 | ||
460 | jh = commit_transaction->t_locked_list->b_tprev; | ||
461 | bh = jh2bh(jh); | ||
462 | get_bh(bh); | ||
463 | if (buffer_locked(bh)) { | ||
464 | spin_unlock(&journal->j_list_lock); | ||
465 | wait_on_buffer(bh); | ||
466 | if (unlikely(!buffer_uptodate(bh))) | ||
467 | err = -EIO; | ||
468 | spin_lock(&journal->j_list_lock); | ||
469 | } | ||
470 | if (!inverted_lock(journal, bh)) { | ||
471 | put_bh(bh); | ||
472 | spin_lock(&journal->j_list_lock); | ||
473 | continue; | ||
474 | } | ||
475 | if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) { | ||
476 | __jbd2_journal_unfile_buffer(jh); | ||
477 | jbd_unlock_bh_state(bh); | ||
478 | jbd2_journal_remove_journal_head(bh); | ||
479 | put_bh(bh); | ||
480 | } else { | ||
481 | jbd_unlock_bh_state(bh); | ||
482 | } | ||
483 | put_bh(bh); | ||
484 | cond_resched_lock(&journal->j_list_lock); | ||
485 | } | ||
486 | spin_unlock(&journal->j_list_lock); | 554 | spin_unlock(&journal->j_list_lock); |
487 | 555 | ||
488 | if (err) | 556 | if (err) |
@@ -656,6 +724,15 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
656 | start_journal_io: | 724 | start_journal_io: |
657 | for (i = 0; i < bufs; i++) { | 725 | for (i = 0; i < bufs; i++) { |
658 | struct buffer_head *bh = wbuf[i]; | 726 | struct buffer_head *bh = wbuf[i]; |
727 | /* | ||
728 | * Compute checksum. | ||
729 | */ | ||
730 | if (JBD2_HAS_COMPAT_FEATURE(journal, | ||
731 | JBD2_FEATURE_COMPAT_CHECKSUM)) { | ||
732 | crc32_sum = | ||
733 | jbd2_checksum_data(crc32_sum, bh); | ||
734 | } | ||
735 | |||
659 | lock_buffer(bh); | 736 | lock_buffer(bh); |
660 | clear_buffer_dirty(bh); | 737 | clear_buffer_dirty(bh); |
661 | set_buffer_uptodate(bh); | 738 | set_buffer_uptodate(bh); |
@@ -672,6 +749,23 @@ start_journal_io: | |||
672 | } | 749 | } |
673 | } | 750 | } |
674 | 751 | ||
752 | /* Done it all: now write the commit record asynchronously. */ | ||
753 | |||
754 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, | ||
755 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { | ||
756 | err = journal_submit_commit_record(journal, commit_transaction, | ||
757 | &cbh, crc32_sum); | ||
758 | if (err) | ||
759 | __jbd2_journal_abort_hard(journal); | ||
760 | |||
761 | spin_lock(&journal->j_list_lock); | ||
762 | err = journal_wait_on_locked_list(journal, | ||
763 | commit_transaction); | ||
764 | spin_unlock(&journal->j_list_lock); | ||
765 | if (err) | ||
766 | __jbd2_journal_abort_hard(journal); | ||
767 | } | ||
768 | |||
675 | /* Lo and behold: we have just managed to send a transaction to | 769 | /* Lo and behold: we have just managed to send a transaction to |
676 | the log. Before we can commit it, wait for the IO so far to | 770 | the log. Before we can commit it, wait for the IO so far to |
677 | complete. Control buffers being written are on the | 771 | complete. Control buffers being written are on the |
@@ -771,8 +865,14 @@ wait_for_iobuf: | |||
771 | 865 | ||
772 | jbd_debug(3, "JBD: commit phase 6\n"); | 866 | jbd_debug(3, "JBD: commit phase 6\n"); |
773 | 867 | ||
774 | if (journal_write_commit_record(journal, commit_transaction)) | 868 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, |
775 | err = -EIO; | 869 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { |
870 | err = journal_submit_commit_record(journal, commit_transaction, | ||
871 | &cbh, crc32_sum); | ||
872 | if (err) | ||
873 | __jbd2_journal_abort_hard(journal); | ||
874 | } | ||
875 | err = journal_wait_on_commit_record(cbh); | ||
776 | 876 | ||
777 | if (err) | 877 | if (err) |
778 | jbd2_journal_abort(journal, err); | 878 | jbd2_journal_abort(journal, err); |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 3667c91bc78..59ba2494dca 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -1578,6 +1578,32 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, | |||
1578 | return 1; | 1578 | return 1; |
1579 | } | 1579 | } |
1580 | 1580 | ||
1581 | /* | ||
1582 | * jbd2_journal_clear_features () - Clear a given journal feature in the | ||
1583 | * superblock | ||
1584 | * @journal: Journal to act on. | ||
1585 | * @compat: bitmask of compatible features | ||
1586 | * @ro: bitmask of features that force read-only mount | ||
1587 | * @incompat: bitmask of incompatible features | ||
1588 | * | ||
1589 | * Clear a given journal feature as present on the | ||
1590 | * superblock. | ||
1591 | */ | ||
1592 | void jbd2_journal_clear_features(journal_t *journal, unsigned long compat, | ||
1593 | unsigned long ro, unsigned long incompat) | ||
1594 | { | ||
1595 | journal_superblock_t *sb; | ||
1596 | |||
1597 | jbd_debug(1, "Clear features 0x%lx/0x%lx/0x%lx\n", | ||
1598 | compat, ro, incompat); | ||
1599 | |||
1600 | sb = journal->j_superblock; | ||
1601 | |||
1602 | sb->s_feature_compat &= ~cpu_to_be32(compat); | ||
1603 | sb->s_feature_ro_compat &= ~cpu_to_be32(ro); | ||
1604 | sb->s_feature_incompat &= ~cpu_to_be32(incompat); | ||
1605 | } | ||
1606 | EXPORT_SYMBOL(jbd2_journal_clear_features); | ||
1581 | 1607 | ||
1582 | /** | 1608 | /** |
1583 | * int jbd2_journal_update_format () - Update on-disk journal structure. | 1609 | * int jbd2_journal_update_format () - Update on-disk journal structure. |
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index d0ce627539e..921680663fa 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/jbd2.h> | 21 | #include <linux/jbd2.h> |
22 | #include <linux/errno.h> | 22 | #include <linux/errno.h> |
23 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
24 | #include <linux/crc32.h> | ||
24 | #endif | 25 | #endif |
25 | 26 | ||
26 | /* | 27 | /* |
@@ -316,6 +317,37 @@ static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag | |||
316 | return block; | 317 | return block; |
317 | } | 318 | } |
318 | 319 | ||
320 | /* | ||
321 | * calc_chksums calculates the checksums for the blocks described in the | ||
322 | * descriptor block. | ||
323 | */ | ||
324 | static int calc_chksums(journal_t *journal, struct buffer_head *bh, | ||
325 | unsigned long *next_log_block, __u32 *crc32_sum) | ||
326 | { | ||
327 | int i, num_blks, err; | ||
328 | unsigned long io_block; | ||
329 | struct buffer_head *obh; | ||
330 | |||
331 | num_blks = count_tags(journal, bh); | ||
332 | /* Calculate checksum of the descriptor block. */ | ||
333 | *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size); | ||
334 | |||
335 | for (i = 0; i < num_blks; i++) { | ||
336 | io_block = (*next_log_block)++; | ||
337 | wrap(journal, *next_log_block); | ||
338 | err = jread(&obh, journal, io_block); | ||
339 | if (err) { | ||
340 | printk(KERN_ERR "JBD: IO error %d recovering block " | ||
341 | "%lu in log\n", err, io_block); | ||
342 | return 1; | ||
343 | } else { | ||
344 | *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data, | ||
345 | obh->b_size); | ||
346 | } | ||
347 | } | ||
348 | return 0; | ||
349 | } | ||
350 | |||
319 | static int do_one_pass(journal_t *journal, | 351 | static int do_one_pass(journal_t *journal, |
320 | struct recovery_info *info, enum passtype pass) | 352 | struct recovery_info *info, enum passtype pass) |
321 | { | 353 | { |
@@ -328,6 +360,7 @@ static int do_one_pass(journal_t *journal, | |||
328 | unsigned int sequence; | 360 | unsigned int sequence; |
329 | int blocktype; | 361 | int blocktype; |
330 | int tag_bytes = journal_tag_bytes(journal); | 362 | int tag_bytes = journal_tag_bytes(journal); |
363 | __u32 crc32_sum = ~0; /* Transactional Checksums */ | ||
331 | 364 | ||
332 | /* Precompute the maximum metadata descriptors in a descriptor block */ | 365 | /* Precompute the maximum metadata descriptors in a descriptor block */ |
333 | int MAX_BLOCKS_PER_DESC; | 366 | int MAX_BLOCKS_PER_DESC; |
@@ -419,12 +452,26 @@ static int do_one_pass(journal_t *journal, | |||
419 | switch(blocktype) { | 452 | switch(blocktype) { |
420 | case JBD2_DESCRIPTOR_BLOCK: | 453 | case JBD2_DESCRIPTOR_BLOCK: |
421 | /* If it is a valid descriptor block, replay it | 454 | /* If it is a valid descriptor block, replay it |
422 | * in pass REPLAY; otherwise, just skip over the | 455 | * in pass REPLAY; if journal_checksums enabled, then |
423 | * blocks it describes. */ | 456 | * calculate checksums in PASS_SCAN, otherwise, |
457 | * just skip over the blocks it describes. */ | ||
424 | if (pass != PASS_REPLAY) { | 458 | if (pass != PASS_REPLAY) { |
459 | if (pass == PASS_SCAN && | ||
460 | JBD2_HAS_COMPAT_FEATURE(journal, | ||
461 | JBD2_FEATURE_COMPAT_CHECKSUM) && | ||
462 | !info->end_transaction) { | ||
463 | if (calc_chksums(journal, bh, | ||
464 | &next_log_block, | ||
465 | &crc32_sum)) { | ||
466 | put_bh(bh); | ||
467 | break; | ||
468 | } | ||
469 | put_bh(bh); | ||
470 | continue; | ||
471 | } | ||
425 | next_log_block += count_tags(journal, bh); | 472 | next_log_block += count_tags(journal, bh); |
426 | wrap(journal, next_log_block); | 473 | wrap(journal, next_log_block); |
427 | brelse(bh); | 474 | put_bh(bh); |
428 | continue; | 475 | continue; |
429 | } | 476 | } |
430 | 477 | ||
@@ -516,9 +563,96 @@ static int do_one_pass(journal_t *journal, | |||
516 | continue; | 563 | continue; |
517 | 564 | ||
518 | case JBD2_COMMIT_BLOCK: | 565 | case JBD2_COMMIT_BLOCK: |
519 | /* Found an expected commit block: not much to | 566 | /* How to differentiate between interrupted commit |
520 | * do other than move on to the next sequence | 567 | * and journal corruption ? |
568 | * | ||
569 | * {nth transaction} | ||
570 | * Checksum Verification Failed | ||
571 | * | | ||
572 | * ____________________ | ||
573 | * | | | ||
574 | * async_commit sync_commit | ||
575 | * | | | ||
576 | * | GO TO NEXT "Journal Corruption" | ||
577 | * | TRANSACTION | ||
578 | * | | ||
579 | * {(n+1)th transanction} | ||
580 | * | | ||
581 | * _______|______________ | ||
582 | * | | | ||
583 | * Commit block found Commit block not found | ||
584 | * | | | ||
585 | * "Journal Corruption" | | ||
586 | * _____________|_________ | ||
587 | * | | | ||
588 | * nth trans corrupt OR nth trans | ||
589 | * and (n+1)th interrupted interrupted | ||
590 | * before commit block | ||
591 | * could reach the disk. | ||
592 | * (Cannot find the difference in above | ||
593 | * mentioned conditions. Hence assume | ||
594 | * "Interrupted Commit".) | ||
595 | */ | ||
596 | |||
597 | /* Found an expected commit block: if checksums | ||
598 | * are present verify them in PASS_SCAN; else not | ||
599 | * much to do other than move on to the next sequence | ||
521 | * number. */ | 600 | * number. */ |
601 | if (pass == PASS_SCAN && | ||
602 | JBD2_HAS_COMPAT_FEATURE(journal, | ||
603 | JBD2_FEATURE_COMPAT_CHECKSUM)) { | ||
604 | int chksum_err, chksum_seen; | ||
605 | struct commit_header *cbh = | ||
606 | (struct commit_header *)bh->b_data; | ||
607 | unsigned found_chksum = | ||
608 | be32_to_cpu(cbh->h_chksum[0]); | ||
609 | |||
610 | chksum_err = chksum_seen = 0; | ||
611 | |||
612 | if (info->end_transaction) { | ||
613 | printk(KERN_ERR "JBD: Transaction %u " | ||
614 | "found to be corrupt.\n", | ||
615 | next_commit_ID - 1); | ||
616 | brelse(bh); | ||
617 | break; | ||
618 | } | ||
619 | |||
620 | if (crc32_sum == found_chksum && | ||
621 | cbh->h_chksum_type == JBD2_CRC32_CHKSUM && | ||
622 | cbh->h_chksum_size == | ||
623 | JBD2_CRC32_CHKSUM_SIZE) | ||
624 | chksum_seen = 1; | ||
625 | else if (!(cbh->h_chksum_type == 0 && | ||
626 | cbh->h_chksum_size == 0 && | ||
627 | found_chksum == 0 && | ||
628 | !chksum_seen)) | ||
629 | /* | ||
630 | * If fs is mounted using an old kernel and then | ||
631 | * kernel with journal_chksum is used then we | ||
632 | * get a situation where the journal flag has | ||
633 | * checksum flag set but checksums are not | ||
634 | * present i.e chksum = 0, in the individual | ||
635 | * commit blocks. | ||
636 | * Hence to avoid checksum failures, in this | ||
637 | * situation, this extra check is added. | ||
638 | */ | ||
639 | chksum_err = 1; | ||
640 | |||
641 | if (chksum_err) { | ||
642 | info->end_transaction = next_commit_ID; | ||
643 | |||
644 | if (!JBD2_HAS_COMPAT_FEATURE(journal, | ||
645 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){ | ||
646 | printk(KERN_ERR | ||
647 | "JBD: Transaction %u " | ||
648 | "found to be corrupt.\n", | ||
649 | next_commit_ID); | ||
650 | brelse(bh); | ||
651 | break; | ||
652 | } | ||
653 | } | ||
654 | crc32_sum = ~0; | ||
655 | } | ||
522 | brelse(bh); | 656 | brelse(bh); |
523 | next_commit_ID++; | 657 | next_commit_ID++; |
524 | continue; | 658 | continue; |
@@ -554,9 +688,10 @@ static int do_one_pass(journal_t *journal, | |||
554 | * transaction marks the end of the valid log. | 688 | * transaction marks the end of the valid log. |
555 | */ | 689 | */ |
556 | 690 | ||
557 | if (pass == PASS_SCAN) | 691 | if (pass == PASS_SCAN) { |
558 | info->end_transaction = next_commit_ID; | 692 | if (!info->end_transaction) |
559 | else { | 693 | info->end_transaction = next_commit_ID; |
694 | } else { | ||
560 | /* It's really bad news if different passes end up at | 695 | /* It's really bad news if different passes end up at |
561 | * different places (but possible due to IO errors). */ | 696 | * different places (but possible due to IO errors). */ |
562 | if (info->end_transaction != next_commit_ID) { | 697 | if (info->end_transaction != next_commit_ID) { |
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h index 300cc5a5adb..cd406dba0e6 100644 --- a/include/linux/ext4_fs.h +++ b/include/linux/ext4_fs.h | |||
@@ -467,7 +467,8 @@ do { \ | |||
467 | #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ | 467 | #define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ |
468 | #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ | 468 | #define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ |
469 | #define EXT4_MOUNT_EXTENTS 0x400000 /* Extents support */ | 469 | #define EXT4_MOUNT_EXTENTS 0x400000 /* Extents support */ |
470 | 470 | #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ | |
471 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ | ||
471 | /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ | 472 | /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ |
472 | #ifndef _LINUX_EXT2_FS_H | 473 | #ifndef _LINUX_EXT2_FS_H |
473 | #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt | 474 | #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt |
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 685640036e8..98a2bc5d3e3 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h | |||
@@ -149,6 +149,28 @@ typedef struct journal_header_s | |||
149 | __be32 h_sequence; | 149 | __be32 h_sequence; |
150 | } journal_header_t; | 150 | } journal_header_t; |
151 | 151 | ||
152 | /* | ||
153 | * Checksum types. | ||
154 | */ | ||
155 | #define JBD2_CRC32_CHKSUM 1 | ||
156 | #define JBD2_MD5_CHKSUM 2 | ||
157 | #define JBD2_SHA1_CHKSUM 3 | ||
158 | |||
159 | #define JBD2_CRC32_CHKSUM_SIZE 4 | ||
160 | |||
161 | #define JBD2_CHECKSUM_BYTES (32 / sizeof(u32)) | ||
162 | /* | ||
163 | * Commit block header for storing transactional checksums: | ||
164 | */ | ||
165 | struct commit_header { | ||
166 | __be32 h_magic; | ||
167 | __be32 h_blocktype; | ||
168 | __be32 h_sequence; | ||
169 | unsigned char h_chksum_type; | ||
170 | unsigned char h_chksum_size; | ||
171 | unsigned char h_padding[2]; | ||
172 | __be32 h_chksum[JBD2_CHECKSUM_BYTES]; | ||
173 | }; | ||
152 | 174 | ||
153 | /* | 175 | /* |
154 | * The block tag: used to describe a single buffer in the journal. | 176 | * The block tag: used to describe a single buffer in the journal. |
@@ -242,14 +264,18 @@ typedef struct journal_superblock_s | |||
242 | ((j)->j_format_version >= 2 && \ | 264 | ((j)->j_format_version >= 2 && \ |
243 | ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask)))) | 265 | ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask)))) |
244 | 266 | ||
245 | #define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001 | 267 | #define JBD2_FEATURE_COMPAT_CHECKSUM 0x00000001 |
246 | #define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002 | 268 | |
269 | #define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001 | ||
270 | #define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002 | ||
271 | #define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004 | ||
247 | 272 | ||
248 | /* Features known to this kernel version: */ | 273 | /* Features known to this kernel version: */ |
249 | #define JBD2_KNOWN_COMPAT_FEATURES 0 | 274 | #define JBD2_KNOWN_COMPAT_FEATURES JBD2_FEATURE_COMPAT_CHECKSUM |
250 | #define JBD2_KNOWN_ROCOMPAT_FEATURES 0 | 275 | #define JBD2_KNOWN_ROCOMPAT_FEATURES 0 |
251 | #define JBD2_KNOWN_INCOMPAT_FEATURES (JBD2_FEATURE_INCOMPAT_REVOKE | \ | 276 | #define JBD2_KNOWN_INCOMPAT_FEATURES (JBD2_FEATURE_INCOMPAT_REVOKE | \ |
252 | JBD2_FEATURE_INCOMPAT_64BIT) | 277 | JBD2_FEATURE_INCOMPAT_64BIT | \ |
278 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) | ||
253 | 279 | ||
254 | #ifdef __KERNEL__ | 280 | #ifdef __KERNEL__ |
255 | 281 | ||
@@ -997,6 +1023,8 @@ extern int jbd2_journal_check_available_features | |||
997 | (journal_t *, unsigned long, unsigned long, unsigned long); | 1023 | (journal_t *, unsigned long, unsigned long, unsigned long); |
998 | extern int jbd2_journal_set_features | 1024 | extern int jbd2_journal_set_features |
999 | (journal_t *, unsigned long, unsigned long, unsigned long); | 1025 | (journal_t *, unsigned long, unsigned long, unsigned long); |
1026 | extern void jbd2_journal_clear_features | ||
1027 | (journal_t *, unsigned long, unsigned long, unsigned long); | ||
1000 | extern int jbd2_journal_create (journal_t *); | 1028 | extern int jbd2_journal_create (journal_t *); |
1001 | extern int jbd2_journal_load (journal_t *journal); | 1029 | extern int jbd2_journal_load (journal_t *journal); |
1002 | extern void jbd2_journal_destroy (journal_t *); | 1030 | extern void jbd2_journal_destroy (journal_t *); |